Skip to content

Commit

Permalink
Use sql unnest to optimize entity insertion (#1098)
Browse files Browse the repository at this point in the history
* Use sql unnest to optimize entity insertion

* Update entity import to use insertMany

* Update db util to stringify null prototype object

* Code review feedback
  • Loading branch information
ktuite authored Mar 14, 2024
1 parent 9ae024b commit e610ed1
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 43 deletions.
32 changes: 25 additions & 7 deletions lib/model/frames/entity.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,26 @@

/* eslint-disable no-multi-spaces */

const { embedded, Frame, readable, table } = require('../frame');
const { embedded, fieldTypes, Frame, readable, table } = require('../frame');
const { extractEntity, normalizeUuid, extractLabelFromSubmission, extractBaseVersionFromSubmission } = require('../../data/entity');

// These Frames don't interact with APIs directly, hence no readable/writable
class Entity extends Frame.define(
table('entities', 'entity'),
'id', 'uuid', readable,
'datasetId',
'createdAt', readable, 'creatorId', readable,
'updatedAt', readable, 'deletedAt', readable,
'datasetId', 'creatorId', readable,
'conflict', readable,
'createdAt', readable,
'updatedAt', readable, 'deletedAt', readable,
embedded('creator'),
embedded('currentVersion')
embedded('currentVersion'),
fieldTypes([
'int4', 'varchar',
'int4', 'int4',
'conflictType',
'timestamptz',
'timestamptz', 'timestamp',
])
) {
get def() { return this.aux.def; }

Expand Down Expand Up @@ -75,14 +82,25 @@ Entity.Extended = class extends Frame.define(
Entity.Def = Frame.define(
table('entity_defs', 'def'),
'id', 'entityId',
'createdAt', readable, 'current', readable,
'current', readable,
'sourceId', 'label', readable,
'creatorId', readable, 'userAgent', readable,
'data', readable, 'root',
'version', readable, 'baseVersion', readable,
'dataReceived', readable, 'conflictingProperties', readable,
'createdAt', readable,
embedded('creator'),
embedded('source')
embedded('source'),
fieldTypes([
'int4', 'int4',
'bool',
'int4', 'text',
'int4', 'varchar',
'jsonb', 'bool',
'int4', 'int4',
'jsonb', 'jsonb',
'timestamptz'
])
);

Entity.Def.Metadata = class extends Entity.Def {
Expand Down
46 changes: 18 additions & 28 deletions lib/model/query/entities.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

const { sql } = require('slonik');
const { Actor, Entity, Submission, Form } = require('../frames');
const { equals, extender, unjoiner, page, markDeleted } = require('../../util/db');
const { equals, extender, unjoiner, page, markDeleted, insertMany } = require('../../util/db');
const { map, mergeRight, pickAll } = require('ramda');
const { blankStringToNull, construct } = require('../../util/util');
const { QueryOptions } = require('../../util/db');
Expand Down Expand Up @@ -90,38 +90,28 @@ createNew.audit.withResult = true;
// it could be used in places of createNew() but createNew uses a single query so it may be faster
// in single entity situations (eg. processing submissions to make entities)
// Note: if the entity schema changes, createMany and createNew would both need to change.
const createMany = (dataset, entities, sourceId, userAgentIn) => async ({ all, context }) => {
const createMany = (dataset, rawEntities, sourceId, userAgentIn) => async ({ all, context }) => {
const creatorId = context.auth.actor.map((actor) => actor.id).orNull();
const userAgent = blankStringToNull(userAgentIn);

const entityInsert = sql.join(entities.map(e => sql`(${sql.join([dataset.id, e.uuid, creatorId, sql`clock_timestamp()`], sql`,`)} )`), sql`,`);
const newEntities = await all(sql`
INSERT INTO entities ("datasetId", "uuid", "creatorId", "createdAt")
VALUES ${entityInsert}
RETURNING id`);

const defInsert = sql.join(entities.map((e, i) => sql`(${sql.join([
newEntities[i].id,
e.def.label,
JSON.stringify(e.def.data),
JSON.stringify(e.def.dataReceived),
sourceId,
// Augment parsed entity data with dataset and creator IDs
const entitiesForInsert = rawEntities.map(e => new Entity({ datasetId: dataset.id, creatorId, ...e }));

const entities = await all(sql`${insertMany(entitiesForInsert)} RETURNING id`);

// Augment defs with IDs of freshly inserted entities and
// other default values
const defsForInsert = rawEntities.map((e, i) => new Entity.Def({
entityId: entities[i].id,
creatorId,
root: true,
current: true,
sourceId,
version: 1,
userAgent,
'true',
'true',
sql`clock_timestamp()`,
'1'
], sql`,`)} )`), sql`,`);

const defs = await all(sql`
INSERT INTO entity_defs ("entityId", "label", "data", "dataReceived",
"sourceId", "creatorId", "userAgent", "root", "current", "createdAt", "version")
VALUES ${defInsert}
RETURNING *
`);

return defs;
...e.def
}));
return all(insertMany(defsForInsert));
};

createMany.audit = (dataset, entities, sourceId) => (log) =>
Expand Down
11 changes: 6 additions & 5 deletions lib/util/db.js
Original file line number Diff line number Diff line change
Expand Up @@ -135,11 +135,12 @@ const extender = (...standard) => (...extended) => (sqlFunc) => {
const _assign = (obj) => (k) => {
if (k === 'createdAt') return sql`clock_timestamp()`;
const v = obj[k];
return (v === null) ? null :
(v === undefined) ? null :
((typeof v === 'object') && (v.constructor === Object)) ? JSON.stringify(v) : // eslint-disable-line indent
(v instanceof Date) ? v.toISOString() : // eslint-disable-line indent
v; // eslint-disable-line indent
if (v === null || v === undefined) return null;
if (typeof v === 'object') {
if (v instanceof Date) return v.toISOString();
if (v.constructor === Object || Object.getPrototypeOf(v) == null) return JSON.stringify(v);
}
return v;
};
const insert = (obj) => {
const keys = Object.keys(obj);
Expand Down
6 changes: 3 additions & 3 deletions test/unit/util/db.js
Original file line number Diff line number Diff line change
Expand Up @@ -291,10 +291,10 @@ returning *`);
});

it('should deal with strange data input types', () => {
insert(new T({ x: { test: true }, y: undefined, z: new Date('2000-01-01') }))
insert(new T({ x: { test: true }, y: undefined, z: new Date('2000-01-01'), w: Object.assign(Object.create(null), { foo: 'bar' }) }))
.should.eql(sql`
insert into frames ("x","y","z")
values (${'{"test":true}'},${null},${'2000-01-01T00:00:00.000Z'})
insert into frames ("x","y","z","w")
values (${'{"test":true}'},${null},${'2000-01-01T00:00:00.000Z'},${'{"foo":"bar"}'})
returning *`);
});

Expand Down

0 comments on commit e610ed1

Please sign in to comment.