Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(gatsby): Optimize creating many child nodes from one parent #35504

Open
wants to merge 21 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 8 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
7a45d61
feat(gatsby-transformer-json): Speed up creating nodes for arrays
KyleAMathews Apr 27, 2022
f85f07b
100 is about 33% faster than 1000
KyleAMathews Apr 27, 2022
c409697
Add type cache
KyleAMathews Apr 27, 2022
a761e9c
Merge branch 'master' into faster-transformer-json
KyleAMathews May 31, 2022
880951e
Fix creating IDs for nodes
KyleAMathews May 31, 2022
7ceceea
Debounce writing updated parent node
KyleAMathews May 31, 2022
cbfe597
Invoke on leading and trailing to ensure the parent node's children a…
KyleAMathews May 31, 2022
74e12dc
Fix key for debounce fn
KyleAMathews Jun 1, 2022
6fa9892
batch actions instead of timeout on writing
KyleAMathews Jun 1, 2022
27ff437
Keep old behavior for tests
KyleAMathews Jun 1, 2022
81e96b6
Setting batch count of 1 seems to work
KyleAMathews Jun 1, 2022
bc76325
Merge branch 'master' into faster-transformer-json
KyleAMathews Jun 13, 2022
cc04b88
Merge branch 'master' into faster-transformer-json
KyleAMathews Jun 14, 2022
ce78af4
Merge branch 'master' into faster-transformer-json
KyleAMathews Jun 29, 2022
62f56da
Merge branch 'master' into faster-transformer-json
KyleAMathews Jul 28, 2022
e579e74
Merge branch 'master' into faster-transformer-json
KyleAMathews Oct 12, 2022
80b449f
Merge branch 'master' into faster-transformer-json
LekoArts Dec 9, 2022
f2dd1ff
fix csv transformer
LekoArts Dec 9, 2022
f8dcb3c
remove unnecessary await
LekoArts Dec 9, 2022
9485ac0
correct typescript
LekoArts Dec 9, 2022
8e9b755
fix csv tests
LekoArts Dec 9, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 52 additions & 13 deletions packages/gatsby-transformer-json/src/gatsby-node.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ function unstable_shouldOnCreateNode({ node }) {
return node.internal.mediaType === `application/json`
}

const typeCache = new Map()

async function onCreateNode(
{ node, actions, loadNodeContent, createNodeId, createContentDigest },
pluginOptions
Expand All @@ -20,15 +22,35 @@ async function onCreateNode(
} else if (pluginOptions && _.isString(pluginOptions.typeName)) {
return pluginOptions.typeName
} else if (node.internal.type !== `File`) {
return _.upperFirst(_.camelCase(`${node.internal.type} Json`))
if (typeCache.has(node.internal.type)) {
return typeCache.get(node.internal.type)
} else {
const type = _.upperFirst(_.camelCase(`${node.internal.type} Json`))
typeCache.set(node.internal.type, type)
return type
}
} else if (isArray) {
return _.upperFirst(_.camelCase(`${node.name} Json`))
if (typeCache.has(node.name)) {
return typeCache.get(node.name)
} else {
const type = _.upperFirst(_.camelCase(`${node.name} Json`))
typeCache.set(node.name, type)
return type
}
} else {
return _.upperFirst(_.camelCase(`${path.basename(node.dir)} Json`))
if (typeCache.has(node.dir)) {
return typeCache.get(node.dir)
} else {
const type = _.upperFirst(
_.camelCase(`${path.basename(node.dir)} Json`)
)
typeCache.set(node.dir, type)
return type
}
}
}

async function transformObject(obj, id, type) {
function transformObject(obj, id, type) {
const jsonNode = {
...obj,
id,
Expand All @@ -42,7 +64,7 @@ async function onCreateNode(
if (obj.id) {
jsonNode[`jsonId`] = obj.id
}
await createNode(jsonNode)
createNode(jsonNode)
createParentChildLink({ parent: node, child: jsonNode })
}

Expand All @@ -59,18 +81,35 @@ async function onCreateNode(
throw new Error(`Unable to parse JSON: ${hint}`)
}

if (_.isArray(parsedContent)) {
for (let i = 0, l = parsedContent.length; i < l; i++) {
const obj = parsedContent[i]

await transformObject(
async function transformArrayChunk({ chunk, startCount }) {
for (let i = 0, l = chunk.length; i < l; i++) {
const obj = chunk[i]
transformObject(
obj,
createNodeId(`${node.id} [${i}] >>> JSON`),
getType({ node, object: obj, isArray: true })
createNodeId(`${node.id} [${i + startCount}] >>> JSON`),
getType({
node,
object: obj,
isArray: true,
})
)
await new Promise(resolve =>
setImmediate(() => {
resolve()
})
)
}
}

if (_.isArray(parsedContent)) {
const chunks = _.chunk(parsedContent, 100)
let count = 0
for await (const chunk of chunks) {
await transformArrayChunk({ chunk, startCount: count })
count += chunk.length
}
} else if (_.isPlainObject(parsedContent)) {
await transformObject(
transformObject(
parsedContent,
createNodeId(`${node.id} >>> JSON`),
getType({ node, object: parsedContent, isArray: false })
Expand Down
26 changes: 25 additions & 1 deletion packages/gatsby/src/datastore/lmdb/lmdb-datastore.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import { IDataStore, ILmdbDatabases, IQueryResult } from "../types"
import { emitter, replaceReducer } from "../../redux"
import { GatsbyIterable } from "../common/iterable"
import { doRunQuery } from "./query/run-query"
import _ from "lodash"
import {
IRunFilterArg,
runFastFiltersAndSort,
Expand Down Expand Up @@ -219,6 +220,8 @@ async function runQuery(args: IRunFilterArg): Promise<IQueryResult> {

let lastOperationPromise: Promise<any> = Promise.resolve()

const debounceFunctionsPerNodeType = new Map()
KyleAMathews marked this conversation as resolved.
Show resolved Hide resolved

function updateDataStore(action: ActionsUnion): void {
switch (action.type) {
case `DELETE_CACHE`: {
Expand All @@ -237,10 +240,31 @@ function updateDataStore(action: ActionsUnion): void {
clearIndexes()
break
}
case `ADD_CHILD_NODE_TO_PARENT_NODE`: {
let fn
const dbs = getDatabases()
const key = action.payload.id + action.payload.internal.type
if (!debounceFunctionsPerNodeType.has(key)) {
fn = _.debounce(
_action => {
updateNodes(dbs.nodes, _action)
updateNodesByType(dbs.nodesByType, _action)
},
100,
{ leading: true, trailing: true }
)
KyleAMathews marked this conversation as resolved.
Show resolved Hide resolved
debounceFunctionsPerNodeType.set(key, fn)
} else {
fn = debounceFunctionsPerNodeType.get(key)
}

// Call the debounce function.
fn(action)
break
}
case `CREATE_NODE`:
case `DELETE_NODE`:
case `ADD_FIELD_TO_NODE`:
case `ADD_CHILD_NODE_TO_PARENT_NODE`:
case `MATERIALIZE_PAGE_MODE`: {
const dbs = getDatabases()
const operationPromise = Promise.all([
Expand Down