From 9ad89997a533744695b380b315f1f70293bb30c4 Mon Sep 17 00:00:00 2001 From: Jacky Zhao Date: Sun, 4 Jun 2023 12:35:45 -0400 Subject: [PATCH] multi-core builds --- .gitignore | 1 + quartz.config.ts | 25 +++-- quartz/{bootstrap.mjs => bootstrap-cli.mjs} | 15 ++- quartz/bootstrap-worker.mjs | 7 ++ quartz/build.ts | 75 +++++++++++++++ quartz/index.ts | 77 --------------- quartz/plugins/transformers/gfm.ts | 2 +- quartz/processors/parse.ts | 100 ++++++++++++++++---- quartz/worker.ts | 30 ++++++ 9 files changed, 224 insertions(+), 108 deletions(-) rename quartz/{bootstrap.mjs => bootstrap-cli.mjs} (89%) create mode 100644 quartz/bootstrap-worker.mjs create mode 100644 quartz/build.ts delete mode 100644 quartz/index.ts create mode 100644 quartz/worker.ts diff --git a/.gitignore b/.gitignore index 690975f9b..b39ea57a0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ .DS_Store node_modules public +.quartz-cache diff --git a/quartz.config.ts b/quartz.config.ts index 14ef1c152..6808d9322 100644 --- a/quartz.config.ts +++ b/quartz.config.ts @@ -1,9 +1,20 @@ -import { buildQuartz } from "./quartz" -import Head from "./quartz/components/Head" -import Header from "./quartz/components/Header" -import { ContentPage, CreatedModifiedDate, Description, FrontMatter, GitHubFlavoredMarkdown, Katex, ObsidianFlavoredMarkdown, RemoveDrafts, ResolveLinks, SyntaxHighlighting } from "./quartz/plugins" +import { QuartzConfig } from "./quartz/cfg" +import * as Head from "./quartz/components/Head" +import * as Header from "./quartz/components/Header" +import { + ContentPage, + CreatedModifiedDate, + Description, + FrontMatter, + GitHubFlavoredMarkdown, + Katex, + ObsidianFlavoredMarkdown, + RemoveDrafts, + ResolveLinks, + SyntaxHighlighting +} from "./quartz/plugins" -export default buildQuartz({ +const config: QuartzConfig = { configuration: { siteTitle: "🪴 Quartz 4.0", enableSPA: true, @@ -61,4 +72,6 @@ export default buildQuartz({ }) ] }, -}) +} + +export default config diff --git a/quartz/bootstrap.mjs b/quartz/bootstrap-cli.mjs similarity index 89% rename from quartz/bootstrap.mjs rename to quartz/bootstrap-cli.mjs index 16a0c695c..be6270e2f 100755 --- a/quartz/bootstrap.mjs +++ b/quartz/bootstrap-cli.mjs @@ -5,10 +5,10 @@ import path from 'path' import { hideBin } from 'yargs/helpers' import esbuild from 'esbuild' import chalk from 'chalk' -import requireFromString from 'require-from-string' import { sassPlugin } from 'esbuild-sass-plugin' -const fp = "./quartz.config.ts" +const cacheFile = "./.quartz-cache/transpiled-build.mjs" +const fp = "./quartz/build.ts" const { version } = JSON.parse(readFileSync("./package.json").toString()) export const BuildArgv = { @@ -52,16 +52,16 @@ yargs(hideBin(process.argv)) .version(version) .usage('$0 [args]') .command('build', 'Build Quartz into a bundle of static HTML files', BuildArgv, async (argv) => { - const out = await esbuild.build({ + await esbuild.build({ entryPoints: [fp], - write: false, + outfile: path.join("quartz", cacheFile), bundle: true, keepNames: true, platform: "node", - format: "cjs", + format: "esm", jsx: "automatic", jsxImportSource: "preact", - external: ["@napi-rs/simple-git", "shiki"], + packages: "external", plugins: [ sassPlugin({ type: 'css-text' @@ -97,8 +97,7 @@ yargs(hideBin(process.argv)) process.exit(1) }) - const mod = out.outputFiles[0].text - const init = requireFromString(mod, fp).default + const { default: init } = await import(cacheFile) init(argv, version) }) .showHelpOnFail(false) diff --git a/quartz/bootstrap-worker.mjs b/quartz/bootstrap-worker.mjs new file mode 100644 index 000000000..7db24c06b --- /dev/null +++ b/quartz/bootstrap-worker.mjs @@ -0,0 +1,7 @@ +#!/usr/bin/env node +import workerpool from 'workerpool' +const cacheFile = "./.quartz-cache/transpiled-worker.mjs" +const { parseFiles } = await import(cacheFile) +workerpool.worker({ + parseFiles +}) diff --git a/quartz/build.ts b/quartz/build.ts new file mode 100644 index 000000000..73cebce3a --- /dev/null +++ b/quartz/build.ts @@ -0,0 +1,75 @@ +import path from "path" +import { PerfTimer } from "./perf" +import { rimraf } from "rimraf" +import { globby } from "globby" +import chalk from "chalk" +import http from "http" +import serveHandler from "serve-handler" +import { parseMarkdown } from "./processors/parse" +import { filterContent } from "./processors/filter" +import { emitContent } from "./processors/emit" +import cfg from "../quartz.config" + +interface Argv { + directory: string + verbose: boolean + output: string + clean: boolean + serve: boolean + port: number +} + +export default async function buildQuartz(argv: Argv, version: string) { + console.log(chalk.bgGreen.black(`\n Quartz v${version} \n`)) + const perf = new PerfTimer() + const output = argv.output + + if (argv.verbose) { + const pluginCount = Object.values(cfg.plugins).flat().length + const pluginNames = (key: 'transformers' | 'filters' | 'emitters') => cfg.plugins[key].map(plugin => plugin.name) + console.log(`Loaded ${pluginCount} plugins`) + console.log(` Transformers: ${pluginNames('transformers').join(", ")}`) + console.log(` Filters: ${pluginNames('filters').join(", ")}`) + console.log(` Emitters: ${pluginNames('emitters').join(", ")}`) + } + + // clean + if (argv.clean) { + perf.addEvent('clean') + await rimraf(output) + if (argv.verbose) { + console.log(`Cleaned output directory \`${output}\` in ${perf.timeSince('clean')}`) + } + } + + // glob + perf.addEvent('glob') + const fps = await globby('**/*.md', { + cwd: argv.directory, + ignore: cfg.configuration.ignorePatterns, + gitignore: true, + }) + + if (argv.verbose) { + console.log(`Found ${fps.length} input files in ${perf.timeSince('glob')}`) + + } + + const filePaths = fps.map(fp => `${argv.directory}${path.sep}${fp}`) + const parsedFiles = await parseMarkdown(cfg.plugins.transformers, argv.directory, filePaths, argv.verbose) + const filteredContent = filterContent(cfg.plugins.filters, parsedFiles, argv.verbose) + await emitContent(argv.directory, output, cfg, filteredContent, argv.verbose) + console.log(chalk.green(`Done processing ${fps.length} files in ${perf.timeSince()}`)) + + if (argv.serve) { + const server = http.createServer(async (req, res) => { + return serveHandler(req, res, { + public: output, + directoryListing: false + }) + }) + server.listen(argv.port) + console.log(`Started a Quartz server listening at http://localhost:${argv.port}`) + console.log('hint: exit with ctrl+c') + } +} diff --git a/quartz/index.ts b/quartz/index.ts deleted file mode 100644 index c64f01274..000000000 --- a/quartz/index.ts +++ /dev/null @@ -1,77 +0,0 @@ -import path from "path" -import { QuartzConfig } from "./cfg" -import { PerfTimer } from "./perf" -import { rimraf } from "rimraf" -import { globby } from "globby" -import chalk from "chalk" -import http from "http" -import serveHandler from "serve-handler" -import { createProcessor, parseMarkdown } from "./processors/parse" -import { filterContent } from "./processors/filter" -import { emitContent } from "./processors/emit" - -interface Argv { - directory: string - verbose: boolean - output: string - clean: boolean - serve: boolean - port: number -} - -export function buildQuartz(cfg: QuartzConfig) { - return async (argv: Argv, version: string) => { - console.log(chalk.bgGreen.black(`\n Quartz v${version} \n`)) - const perf = new PerfTimer() - const output = argv.output - - if (argv.verbose) { - const pluginCount = Object.values(cfg.plugins).flat().length - const pluginNames = (key: 'transformers' | 'filters' | 'emitters') => cfg.plugins[key].map(plugin => plugin.name) - console.log(`Loaded ${pluginCount} plugins`) - console.log(` Transformers: ${pluginNames('transformers').join(", ")}`) - console.log(` Filters: ${pluginNames('filters').join(", ")}`) - console.log(` Emitters: ${pluginNames('emitters').join(", ")}`) - } - - // clean - if (argv.clean) { - perf.addEvent('clean') - await rimraf(output) - if (argv.verbose) { - console.log(`Cleaned output directory \`${output}\` in ${perf.timeSince('clean')}`) - } - } - - // glob - perf.addEvent('glob') - const fps = await globby('**/*.md', { - cwd: argv.directory, - ignore: cfg.configuration.ignorePatterns, - gitignore: true, - }) - - if (argv.verbose) { - console.log(`Found ${fps.length} input files in ${perf.timeSince('glob')}`) - } - - const processor = createProcessor(cfg.plugins.transformers) - const filePaths = fps.map(fp => `${argv.directory}${path.sep}${fp}`) - const parsedFiles = await parseMarkdown(processor, argv.directory, filePaths, argv.verbose) - const filteredContent = filterContent(cfg.plugins.filters, parsedFiles, argv.verbose) - await emitContent(argv.directory, output, cfg, filteredContent, argv.verbose) - console.log(chalk.green(`Done processing ${fps.length} files in ${perf.timeSince()}`)) - - if (argv.serve) { - const server = http.createServer(async (req, res) => { - return serveHandler(req, res, { - public: output, - directoryListing: false - }) - }) - server.listen(argv.port) - console.log(`Started a Quartz server listening at http://localhost:${argv.port}`) - console.log('hint: exit with ctrl+c') - } - } -} diff --git a/quartz/plugins/transformers/gfm.ts b/quartz/plugins/transformers/gfm.ts index 55dbda2e1..1cb0fc69b 100644 --- a/quartz/plugins/transformers/gfm.ts +++ b/quartz/plugins/transformers/gfm.ts @@ -3,7 +3,7 @@ import remarkGfm from "remark-gfm" import smartypants from 'remark-smartypants' import { QuartzTransformerPlugin } from "../types" import rehypeSlug from "rehype-slug" -import rehypeAutolinkHeadings from "rehype-autolink-headings/lib" +import rehypeAutolinkHeadings from "rehype-autolink-headings" export interface Options { enableSmartyPants: boolean diff --git a/quartz/processors/parse.ts b/quartz/processors/parse.ts index 83a05d465..715a4e972 100644 --- a/quartz/processors/parse.ts +++ b/quartz/processors/parse.ts @@ -1,3 +1,4 @@ +import esbuild from 'esbuild' import remarkParse from 'remark-parse' import remarkRehype from 'remark-rehype' import { Processor, unified } from "unified" @@ -8,6 +9,8 @@ import { PerfTimer } from '../perf' import { read } from 'to-vfile' import { slugify } from '../path' import path from 'path' +import os from 'os' +import workerpool, { Promise as WorkerPromise } from 'workerpool' import { QuartzTransformerPlugin } from '../plugins/types' export type QuartzProcessor = Processor @@ -32,24 +35,89 @@ export function createProcessor(transformers: QuartzTransformerPlugin[]): any { return processor } -export async function parseMarkdown(processor: QuartzProcessor, baseDir: string, fps: string[], verbose: boolean): Promise { - const perf = new PerfTimer() - const res: ProcessedContent[] = [] - for (const fp of fps) { - const file = await read(fp) +function* chunks(arr: T[], n: number) { + for (let i = 0; i < arr.length; i += n) { + yield arr.slice(i, i + n) + } +} - // base data properties that plugins may use - file.data.slug = slugify(path.relative(baseDir, file.path)) - file.data.filePath = fp - - const ast = processor.parse(file) - res.push([await processor.run(ast, file), file]) - - if (verbose) { - console.log(`[process] ${fp} -> ${file.data.slug}`) - } +async function transpileWorkerScript(verbose: boolean) { + // transpile worker script + const cacheFile = "./.quartz-cache/transpiled-worker.mjs" + const fp = "./quartz/worker.ts" + if (verbose) { + console.log("Transpiling worker script") } - console.log(`Parsed and transformed ${res.length} Markdown files in ${perf.timeSince()}`) + await esbuild.build({ + entryPoints: [fp], + outfile: path.join("quartz", cacheFile), + bundle: true, + keepNames: true, + platform: "node", + format: "esm", + packages: "external", + plugins: [ + { + name: 'css-and-scripts-as-text', + setup(build) { + build.onLoad({ filter: /\.scss$/ }, (_) => ({ + contents: '', + loader: 'text' + })) + build.onLoad({ filter: /\.inline\.(ts|js)$/ }, (_) => ({ + contents: '', + loader: 'text' + })) + } + } + ] + }) +} + +export async function parseMarkdown(transformers: QuartzTransformerPlugin[], baseDir: string, fps: string[], verbose: boolean): Promise { + const perf = new PerfTimer() + + const CHUNK_SIZE = 128 + let concurrency = fps.length < CHUNK_SIZE ? 1 : os.availableParallelism() + const res: ProcessedContent[] = [] + if (concurrency === 1) { + // single-thread + const processor = createProcessor(transformers) + for (const fp of fps) { + const file = await read(fp) + + // base data properties that plugins may use + file.data.slug = slugify(path.relative(baseDir, file.path)) + file.data.filePath = fp + + const ast = processor.parse(file) + res.push([await processor.run(ast, file), file]) + + if (verbose) { + console.log(`[process] ${fp} -> ${file.data.slug}`) + } + } + } else { + await transpileWorkerScript(verbose) + const pool = workerpool.pool( + './quartz/bootstrap-worker.mjs', + { + minWorkers: 'max', + maxWorkers: concurrency, + workerType: 'thread' + } + ) + + const childPromises: WorkerPromise[] = [] + for (const chunk of chunks(fps, CHUNK_SIZE)) { + childPromises.push(pool.exec('parseFiles', [baseDir, chunk, verbose])) + } + const results: ProcessedContent[][] = await WorkerPromise.all(childPromises) + res.push(...results.flat()) + await pool.terminate() + } + + console.log(`Parsed and transformed ${res.length} Markdown files with ${concurrency} cores in ${perf.timeSince()}`) return res } diff --git a/quartz/worker.ts b/quartz/worker.ts new file mode 100644 index 000000000..71678b8eb --- /dev/null +++ b/quartz/worker.ts @@ -0,0 +1,30 @@ +import { read } from "to-vfile" +import config from "../quartz.config" +import { createProcessor } from "./processors/parse" +import { slugify } from "./path" +import path from "path" +import { ProcessedContent } from "./plugins/vfile" + +const transformers = config.plugins.transformers +const processor = createProcessor(transformers) + +// only called from worker thread +export async function parseFiles(baseDir: string, fps: string[], verbose: boolean) { + const res: ProcessedContent[] = [] + for (const fp of fps) { + const file = await read(fp) + + // base data properties that plugins may use + file.data.slug = slugify(path.relative(baseDir, file.path)) + file.data.filePath = fp + + const ast = processor.parse(file) + res.push([await processor.run(ast, file), file]) + + if (verbose) { + console.log(`[process] ${fp} -> ${file.data.slug}`) + } + } + + return res +}