From 73a9ef1d425dba186466e24491db2abba6eef4ac Mon Sep 17 00:00:00 2001 From: Jesse Wright <63333554+jeswr@users.noreply.github.com> Date: Tue, 30 Jul 2024 08:23:16 +1000 Subject: [PATCH] perf: shared entity index (#394) * feat: shared entity index * chore: remove unused getters/setter * chore: update function calls * chore: get full test coverage * chore: add tests for sharing of custom index * chore: add documentation for the shared entity index * chore: use ids for graphs * chore: editorial fix Co-authored-by: Ted Thibodeau Jr --------- Co-authored-by: Ted Thibodeau Jr --- README.md | 7 +++ src/N3Store.js | 142 +++++++++++++++++++++---------------------- src/index.js | 3 +- test/N3Store-test.js | 47 ++++++++++++++ 4 files changed, 124 insertions(+), 75 deletions(-) diff --git a/README.md b/README.md index 1ab09a69..2e98d790 100644 --- a/README.md +++ b/README.md @@ -304,6 +304,13 @@ for (const quad of store.match(namedNode('http://ex.org/Mickey'), null, null)) console.log(quad); ``` +If you are using multiple stores, you can reduce memory consumption by allowing them to share an entity index: +```JavaScript +const entityIndex = new N3.EntityIndex(); +const store1 = new N3.Store([], { entityIndex }); +const store2 = new N3.Store([], { entityIndex }); +``` + ### [`DatasetCore` Interface](https://rdf.js.org/dataset-spec/#datasetcore-interface) This store adheres to the `DatasetCore` interface which exposes the following properties diff --git a/src/N3Store.js b/src/N3Store.js index 660a25fe..79850db6 100644 --- a/src/N3Store.js +++ b/src/N3Store.js @@ -4,33 +4,22 @@ import { default as N3DataFactory, termToId, termFromId } from './N3DataFactory' import namespaces from './IRIs'; import { isDefaultGraph } from './N3Util'; -// ## Constructor -export default class N3Store { - constructor(quads, options) { - // The number of quads is initially zero - this._size = 0; - // `_graphs` contains subject, predicate, and object indexes per graph - this._graphs = Object.create(null); +export class N3EntityIndex { + constructor(options = {}) { + this._id = 1; // `_ids` maps entities such as `http://xmlns.com/foaf/0.1/name` to numbers, // saving memory by using only numbers as keys in `_graphs` - this._id = 0; this._ids = Object.create(null); - this._entities = Object.create(null); // inverse of `_ids` + this._ids[''] = 1; + // inverse of `_ids` + this._entities = Object.create(null); + this._entities[1] = ''; // `_blankNodeIndex` is the index of the last automatically named blank node this._blankNodeIndex = 0; - - // Shift parameters if `quads` is not given - if (!options && quads && !quads[0]) - options = quads, quads = null; - options = options || {}; this._factory = options.factory || N3DataFactory; - - // Add quads if passed - if (quads) - this.addQuads(quads); } - _termFromId(id, factory) { + _termFromId(id) { if (id[0] === '.') { const entities = this._entities; const terms = id.split('.'); @@ -42,7 +31,7 @@ export default class N3Store { ); return q; } - return termFromId(id, factory); + return termFromId(id, this._factory); } _termToNumericId(term) { @@ -69,6 +58,50 @@ export default class N3Store { return this._ids[str] || (this._ids[this._entities[++this._id] = str] = this._id); } + createBlankNode(suggestedName) { + let name, index; + // Generate a name based on the suggested name + if (suggestedName) { + name = suggestedName = `_:${suggestedName}`, index = 1; + while (this._ids[name]) + name = suggestedName + index++; + } + // Generate a generic blank node name + else { + do { name = `_:b${this._blankNodeIndex++}`; } + while (this._ids[name]); + } + // Add the blank node to the entities, avoiding the generation of duplicates + this._ids[name] = ++this._id; + this._entities[this._id] = name; + return this._factory.blankNode(name.substr(2)); + } +} + +// ## Constructor +export default class N3Store { + constructor(quads, options) { + // The number of quads is initially zero + this._size = 0; + // `_graphs` contains subject, predicate, and object indexes per graph + this._graphs = Object.create(null); + + // Shift parameters if `quads` is not given + if (!options && quads && !quads[0]) + options = quads, quads = null; + options = options || {}; + this._factory = options.factory || N3DataFactory; + this._entityIndex = options.entityIndex || new N3EntityIndex({ factory: this._factory }); + this._entities = this._entityIndex._entities; + this._termFromId = this._entityIndex._termFromId.bind(this._entityIndex); + this._termToNumericId = this._entityIndex._termToNumericId.bind(this._entityIndex); + this._termToNewNumericId = this._entityIndex._termToNewNumericId.bind(this._entityIndex); + + // Add quads if passed + if (quads) + this.addQuads(quads); + } + // ## Public properties // ### `size` returns the number of quads in the store @@ -127,24 +160,24 @@ export default class N3Store { *_findInIndex(index0, key0, key1, key2, name0, name1, name2, graphId) { let tmp, index1, index2; const entityKeys = this._entities; - const graph = this._termFromId(graphId, this._factory); + const graph = this._termFromId(entityKeys[graphId]); const parts = { subject: null, predicate: null, object: null }; // If a key is specified, use only that part of index 0. if (key0) (tmp = index0, index0 = {})[key0] = tmp[key0]; for (const value0 in index0) { if (index1 = index0[value0]) { - parts[name0] = this._termFromId(entityKeys[value0], this._factory); + parts[name0] = this._termFromId(entityKeys[value0]); // If a key is specified, use only that part of index 1. if (key1) (tmp = index1, index1 = {})[key1] = tmp[key1]; for (const value1 in index1) { if (index2 = index1[value1]) { - parts[name1] = this._termFromId(entityKeys[value1], this._factory); + parts[name1] = this._termFromId(entityKeys[value1]); // If a key is specified, use only that part of index 2, if it exists. const values = key2 ? (key2 in index2 ? [key2] : []) : Object.keys(index2); // Create quads for all items found in index 2. for (let l = 0; l < values.length; l++) { - parts[name2] = this._termFromId(entityKeys[values[l]], this._factory); + parts[name2] = this._termFromId(entityKeys[values[l]]); yield this._factory.quad(parts.subject, parts.predicate, parts.object, graph); } } @@ -215,11 +248,8 @@ export default class N3Store { // ### `_getGraphs` returns an array with the given graph, // or all graphs if the argument is null or undefined. _getGraphs(graph) { - if (!isString(graph)) - return this._graphs; - const graphs = {}; - graphs[graph] = this._graphs[graph]; - return graphs; + graph = graph === '' ? 1 : (graph && (this._termToNumericId(graph) || -1)); + return typeof graph !== 'number' ? this._graphs : { [graph]: this._graphs[graph] }; } // ### `_uniqueEntities` returns a function that accepts an entity ID @@ -253,7 +283,7 @@ export default class N3Store { predicate = subject.predicate, subject = subject.subject; // Convert terms to internal string representation - graph = termToId(graph); + graph = graph ? this._termToNewNumericId(graph) : 1; // Find the graph that will contain the triple let graphItem = this._graphs[graph]; @@ -313,9 +343,8 @@ export default class N3Store { if (!predicate) graph = subject.graph, object = subject.object, predicate = subject.predicate, subject = subject.subject; - // Convert terms to internal string representation - graph = termToId(graph); + graph = graph ? this._termToNumericId(graph) : 1; // Find internal identifiers for all components // and verify the quad exists. @@ -380,9 +409,6 @@ export default class N3Store { // ### `readQuads` returns an generator of quads matching a pattern. // Setting any field to `undefined` or `null` indicates a wildcard. *readQuads(subject, predicate, object, graph) { - // Convert terms to internal string representation - graph = graph && termToId(graph); - const graphs = this._getGraphs(graph); let content, subjectId, predicateId, objectId; @@ -429,15 +455,12 @@ export default class N3Store { // Setting any field to `undefined` or `null` indicates a wildcard. // For backwards compatibility, the object return also implements the Readable stream interface. match(subject, predicate, object, graph) { - return new DatasetCoreAndReadableStream(this, subject, predicate, object, graph); + return new DatasetCoreAndReadableStream(this, subject, predicate, object, graph, { entityIndex: this._entityIndex }); } // ### `countQuads` returns the number of quads matching a pattern. // Setting any field to `undefined` or `null` indicates a wildcard. countQuads(subject, predicate, object, graph) { - // Convert terms to internal string representation - graph = graph && termToId(graph); - const graphs = this._getGraphs(graph); let count = 0, content, subjectId, predicateId, objectId; @@ -514,9 +537,6 @@ export default class N3Store { // ### `forSubjects` executes the callback on all subjects that match the pattern. // Setting any field to `undefined` or `null` indicates a wildcard. forSubjects(callback, predicate, object, graph) { - // Convert terms to internal string representation - graph = graph && termToId(graph); - const graphs = this._getGraphs(graph); let content, predicateId, objectId; callback = this._uniqueEntities(callback); @@ -559,9 +579,6 @@ export default class N3Store { // ### `forPredicates` executes the callback on all predicates that match the pattern. // Setting any field to `undefined` or `null` indicates a wildcard. forPredicates(callback, subject, object, graph) { - // Convert terms to internal string representation - graph = graph && termToId(graph); - const graphs = this._getGraphs(graph); let content, subjectId, objectId; callback = this._uniqueEntities(callback); @@ -604,9 +621,6 @@ export default class N3Store { // ### `forObjects` executes the callback on all objects that match the pattern. // Setting any field to `undefined` or `null` indicates a wildcard. forObjects(callback, subject, predicate, graph) { - // Convert terms to internal string representation - graph = graph && termToId(graph); - const graphs = this._getGraphs(graph); let content, subjectId, predicateId; callback = this._uniqueEntities(callback); @@ -653,28 +667,13 @@ export default class N3Store { this.some(quad => { callback(quad.graph); return true; // Halt iteration of some() - }, subject, predicate, object, graph); + }, subject, predicate, object, this._termFromId(this._entities[graph])); } } // ### `createBlankNode` creates a new blank node, returning its name createBlankNode(suggestedName) { - let name, index; - // Generate a name based on the suggested name - if (suggestedName) { - name = suggestedName = `_:${suggestedName}`, index = 1; - while (this._ids[name]) - name = suggestedName + index++; - } - // Generate a generic blank node name - else { - do { name = `_:b${this._blankNodeIndex++}`; } - while (this._ids[name]); - } - // Add the blank node to the entities, avoiding the generation of duplicates - this._ids[name] = ++this._id; - this._entities[this._id] = name; - return this._factory.blankNode(name.substr(2)); + return this._entityIndex.createBlankNode(suggestedName); } // ### `extractLists` finds and removes all list triples @@ -783,24 +782,19 @@ export default class N3Store { } } -// Determines whether the argument is a string -function isString(s) { - return typeof s === 'string' || s instanceof String; -} - /** * A class that implements both DatasetCore and Readable. */ class DatasetCoreAndReadableStream extends Readable { - constructor(n3Store, subject, predicate, object, graph) { + constructor(n3Store, subject, predicate, object, graph, options) { super({ objectMode: true }); - Object.assign(this, { n3Store, subject, predicate, object, graph }); + Object.assign(this, { n3Store, subject, predicate, object, graph, options }); } get filtered() { if (!this._filtered) { const { n3Store, graph, object, predicate, subject } = this; - const newStore = this._filtered = new N3Store({ factory: n3Store._factory }); + const newStore = this._filtered = new N3Store({ factory: n3Store._factory, entityIndex: this.options.entityIndex }); for (const quad of n3Store.readQuads(subject, predicate, object, graph)) newStore.addQuad(quad); } @@ -830,7 +824,7 @@ class DatasetCoreAndReadableStream extends Readable { } match(subject, predicate, object, graph) { - return new DatasetCoreAndReadableStream(this.filtered, subject, predicate, object, graph); + return new DatasetCoreAndReadableStream(this.filtered, subject, predicate, object, graph, this.options); } *[Symbol.iterator]() { diff --git a/src/index.js b/src/index.js index 8307958e..0e19e1a0 100644 --- a/src/index.js +++ b/src/index.js @@ -1,7 +1,7 @@ import Lexer from './N3Lexer'; import Parser from './N3Parser'; import Writer from './N3Writer'; -import Store from './N3Store'; +import Store, { N3EntityIndex as EntityIndex } from './N3Store'; import StreamParser from './N3StreamParser'; import StreamWriter from './N3StreamWriter'; import * as Util from './N3Util'; @@ -28,6 +28,7 @@ export { Parser, Writer, Store, + EntityIndex, StreamParser, StreamWriter, Util, diff --git a/test/N3Store-test.js b/test/N3Store-test.js index 15b954d4..fa610ae6 100644 --- a/test/N3Store-test.js +++ b/test/N3Store-test.js @@ -1,6 +1,7 @@ import { Store, termFromId, termToId, + EntityIndex, } from '../src'; import { NamedNode, @@ -2013,6 +2014,52 @@ describe('Store', () => { }); }); +describe('EntityIndex', () => { + let entityIndex; + beforeEach(() => { + entityIndex = new EntityIndex(); + }); + + it('should be a constructor', () => { + expect(entityIndex).toBeInstanceOf(EntityIndex); + }); + + it('custom index should be used when instantiated with store', () => { + const index = { + '': 1, + 's1': 2, + 'p1': 3, + 'o0': 4, + 's2': 5, + 'p2': 6, + 'o2': 7, + }; + + const store = new Store([ + new Quad(new NamedNode('s1'), new NamedNode('p1'), new NamedNode('o0')), + ], { entityIndex }); + expect(store.size).toBe(1); + expect(entityIndex._id).toEqual(4); + + const substore = store.match(); + substore.add(new Quad(new NamedNode('s2'), new NamedNode('p2'), new NamedNode('o2'))); + expect(store.size).toBe(1); + expect(substore.size).toBe(2); + expect(entityIndex._id).toEqual(7); + expect(entityIndex._ids).toEqual(index); + + const store2 = new Store([ + new Quad(new NamedNode('s1'), new NamedNode('p1'), new NamedNode('o5')), + ], { entityIndex }); + expect(store2.size).toBe(1); + expect(entityIndex._id).toEqual(8); + expect(entityIndex._ids).toEqual({ + ...index, + o5: 8, + }); + }); +}); + function alwaysTrue() { return true; } function alwaysFalse() { return false; }