diff --git a/docs/docs/html-generation.md b/docs/docs/html-generation.md index b6d786bc91139..451bddb33cfb1 100644 --- a/docs/docs/html-generation.md +++ b/docs/docs/html-generation.md @@ -89,7 +89,7 @@ Finally, we call [react-dom](https://reactjs.org/docs/react-dom.html) and render So, we've built the means to generate HTML for a page. This webpack bundle is saved to `public/render-page.js`. Next, we need to use it to generate HTML for all the site's pages. -Page HTML does not depend on other pages. So we can perform this step in parallel. We use the [jest-worker](https://github.com/facebook/jest/tree/master/packages/jest-worker) library to make this easier. The [html-renderer-queue.js](https://github.com/gatsbyjs/gatsby/blob/master/packages/gatsby/src/utils/html-renderer-queue.js) creates a pool of workers equal to the number of cores on your machine. It then partitions the pages into groups and sends them to the workers, which run [worker.js](https://github.com/gatsbyjs/gatsby/blob/master/packages/gatsby/src/utils/worker.js). +Page HTML does not depend on other pages. So we can perform this step in parallel. We use the [jest-worker](https://github.com/facebook/jest/tree/master/packages/jest-worker) library to make this easier. By default, the [html-renderer-queue.js](https://github.com/gatsbyjs/gatsby/blob/master/packages/gatsby/src/utils/html-renderer-queue.js) creates a pool of workers equal to the number of physical cores on your machine. You can configure the number of pools by passing an optional environment variable, [`GATSBY_CPU_COUNT`](/docs/multi-core-builds). It then partitions the pages into groups and sends them to the workers, which run [worker.js](https://github.com/gatsbyjs/gatsby/blob/master/packages/gatsby/src/utils/worker.js). The workers simply iterate over each page in their partition, and call the `render-page.js` with the page. It then saves the html for the page's path in `/public`. diff --git a/docs/docs/multi-core-builds.md b/docs/docs/multi-core-builds.md new file mode 100644 index 0000000000000..db3b1295ea5b4 --- /dev/null +++ b/docs/docs/multi-core-builds.md @@ -0,0 +1,35 @@ +--- +title: Multi-core builds +--- + +Gatsby now performs the static HTML generation phase of the overall [Page HTML Generation](/docs/html-generation/) process using multi-core parallel pools of workers. This helps speed up builds by distributing build generation tasks across multiple cores on your machine. + +By default, Gatsby creates a pool of workers equal to the number of physical cores on your machine. See [build-html.js](/docs/html-generation/#build-htmljs). + +In some scenarios, it may be appropriate to tell Gatsby to use a different method to calculate the number of worker pools. + +**For example**, if you are running a Cloud server (like AWS EC2), your DevOps engineers may want to control the number of worker pools to improve the efficiency of server resource usage. + +## Warning + +You could negatively impact performance if you use this variable incorrectly. The default Gatsby setting (no env variable or `physical_cores`) is the safest option. + +## Setup + +Set the `GATSBY_CPU_COUNT` environment variable whilst running the `gatsby build` command. + +`GATSBY_CPU_COUNT=physical_cores` - (default) calculate the number of worker pools based on the number of physical CPU cores on your machine. + +`GATSBY_CPU_COUNT=logical_cores` - calculate the number worker of pools based on the number of logical CPU cores on your machine. + +`GATSBY_CPU_COUNT=2` - calculate the number worker pools based on a definite number. + +## More information + +Understanding how processors work is complex and out of scope for this documentation. + +In brief, some processors use _Simultaneous Multithreading (SMT)_, sometimes known as _Hyper-Threading_, which is the process of a CPU splitting each of its physical cores into virtual/logical cores. + +SMT _can_ help to increase performance of some workloads by allowing each physical core to run two streams of work at once. + +However, sometimes latency can be increased. As logical cores share the same physical CPU core, sometimes more memory is required for each worker in the pool and more time is needed to spawn worker processes. diff --git a/packages/gatsby-plugin-manifest/src/__tests__/gatsby-node.js b/packages/gatsby-plugin-manifest/src/__tests__/gatsby-node.js index 4d8f91ed9e84f..ed5bcbf198233 100644 --- a/packages/gatsby-plugin-manifest/src/__tests__/gatsby-node.js +++ b/packages/gatsby-plugin-manifest/src/__tests__/gatsby-node.js @@ -22,6 +22,7 @@ jest.mock(`sharp`, () => { }() ) sharp.simd = jest.fn() + sharp.concurrency = jest.fn() return sharp }) const fs = require(`fs`) diff --git a/packages/gatsby-plugin-manifest/src/gatsby-node.js b/packages/gatsby-plugin-manifest/src/gatsby-node.js index a153c7d288c45..4fa1a2e058a9e 100644 --- a/packages/gatsby-plugin-manifest/src/gatsby-node.js +++ b/packages/gatsby-plugin-manifest/src/gatsby-node.js @@ -6,6 +6,12 @@ const { defaultIcons, doesIconExist } = require(`./common.js`) sharp.simd(true) +// Handle Sharp's concurrency based on the Gatsby CPU count +// See: http://sharp.pixelplumbing.com/en/stable/api-utility/#concurrency +// See: https://www.gatsbyjs.org/docs/multi-core-builds/ +const cpuCoreCount = require(`gatsby/dist/utils/cpu-core-count`) +sharp.concurrency(cpuCoreCount()) + function generateIcons(icons, srcIcon) { return Promise.map(icons, icon => { const size = parseInt(icon.sizes.substring(0, icon.sizes.lastIndexOf(`x`))) diff --git a/packages/gatsby-plugin-sharp/src/index.js b/packages/gatsby-plugin-sharp/src/index.js index a3d239d6fafc5..5a4d70f38827d 100644 --- a/packages/gatsby-plugin-sharp/src/index.js +++ b/packages/gatsby-plugin-sharp/src/index.js @@ -62,6 +62,12 @@ Promise.promisifyAll(sharp.prototype, { multiArgs: true }) // adventurous and see what happens with it on. sharp.simd(true) +// Handle Sharp's concurrency based on the Gatsby CPU count +// See: http://sharp.pixelplumbing.com/en/stable/api-utility/#concurrency +// See: https://www.gatsbyjs.org/docs/multi-core-builds/ +const cpuCoreCount = require(`gatsby/dist/utils/cpu-core-count`) +sharp.concurrency(cpuCoreCount()) + const bar = new ProgressBar( `Generating image thumbnails [:bar] :current/:total :elapsed secs :percent`, { diff --git a/packages/gatsby/src/utils/cpu-core-count.js b/packages/gatsby/src/utils/cpu-core-count.js new file mode 100644 index 0000000000000..f52359ebf68cc --- /dev/null +++ b/packages/gatsby/src/utils/cpu-core-count.js @@ -0,0 +1,55 @@ +/** + * Calculate CPU core count + * @param {boolean} [useEnvVar=false] Use the 'GATSBY_CPU_COUNT' env var to calculate the requested type of CPU cores + * @returns {number} Count of the requested type of CPU cores. Defaults to number of physical cores or 1 + */ + +const cpuCoreCount = (useEnvVar = false) => { + try { + let coreCount = require(`physical-cpu-count`) || 1 + + if (!useEnvVar) { + // Return the physical CPU count, + // or default to 1 if we can't detect + return coreCount + } + + if (typeof process.env.GATSBY_CPU_COUNT !== `undefined`) { + const coreCountArg = + Number(process.env.GATSBY_CPU_COUNT) || process.env.GATSBY_CPU_COUNT + + switch (typeof coreCountArg) { + case `string`: + // Leave at Default CPU count if coreCountArg === `physical_cores` + + // CPU count === logical CPU count + // throw error if we have a problem counting logical cores + if (coreCountArg === `logical_cores`) { + coreCount = require(`os`).cpus().length + + if (typeof coreCount !== `number`) { + throw new Error( + `process.env.GATSBY_CPU_COUNT is set to 'logical_cores' but there was a problem finding the number of logical cores` + ) + } + } + break + + case `number`: + // CPU count === passed in count + coreCount = coreCountArg + break + + default: + break + } + } + + return coreCount + } catch (err) { + console.error(err) + throw new Error(`There has been a problem counting the number of CPU cores`) + } +} + +module.exports = cpuCoreCount diff --git a/packages/gatsby/src/utils/html-renderer-queue.js b/packages/gatsby/src/utils/html-renderer-queue.js index b8182dece2c2e..4a40dfa826364 100644 --- a/packages/gatsby/src/utils/html-renderer-queue.js +++ b/packages/gatsby/src/utils/html-renderer-queue.js @@ -1,11 +1,11 @@ const Promise = require(`bluebird`) const convertHrtime = require(`convert-hrtime`) const Worker = require(`jest-worker`).default -const numWorkers = require(`physical-cpu-count`) || 1 const { chunk } = require(`lodash`) +const cpuCoreCount = require(`./cpu-core-count`) const workerPool = new Worker(require.resolve(`./worker`), { - numWorkers, + numWorkers: cpuCoreCount(true), forkOptions: { silent: false, }, diff --git a/www/src/data/sidebars/doc-links.yaml b/www/src/data/sidebars/doc-links.yaml index 220e48ec34dcc..f12c20c1243ad 100644 --- a/www/src/data/sidebars/doc-links.yaml +++ b/www/src/data/sidebars/doc-links.yaml @@ -40,7 +40,9 @@ link: /docs/path-prefix/ - title: How Gatsby Works with GitHub Pages link: /docs/how-gatsby-works-with-github-pages/ - - title: Custom Configuration + - title: Multi-core builds + link: /docs/multi-core-builds/ + - title: Custom configuration link: /docs/customization/ items: - title: Babel.js