Skip to content

Commit

Permalink
perf: shared entity index (#394)
Browse files Browse the repository at this point in the history
* feat: shared entity index

* chore: remove unused getters/setter

* chore: update function calls

* chore: get full test coverage

* chore: add tests for sharing of custom index

* chore: add documentation for the shared entity index

* chore: use ids for graphs

* chore: editorial fix

Co-authored-by: Ted Thibodeau Jr <[email protected]>

---------

Co-authored-by: Ted Thibodeau Jr <[email protected]>
  • Loading branch information
jeswr and TallTed committed Jul 29, 2024
1 parent 5ce080d commit 73a9ef1
Show file tree
Hide file tree
Showing 4 changed files with 124 additions and 75 deletions.
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,13 @@ for (const quad of store.match(namedNode('http://ex.org/Mickey'), null, null))
console.log(quad);
```

If you are using multiple stores, you can reduce memory consumption by allowing them to share an entity index:
```JavaScript
const entityIndex = new N3.EntityIndex();
const store1 = new N3.Store([], { entityIndex });
const store2 = new N3.Store([], { entityIndex });
```

### [`DatasetCore` Interface](https://rdf.js.org/dataset-spec/#datasetcore-interface)
This store adheres to the `DatasetCore` interface which exposes the following properties

Expand Down
142 changes: 68 additions & 74 deletions src/N3Store.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,33 +4,22 @@ import { default as N3DataFactory, termToId, termFromId } from './N3DataFactory'
import namespaces from './IRIs';
import { isDefaultGraph } from './N3Util';

// ## Constructor
export default class N3Store {
constructor(quads, options) {
// The number of quads is initially zero
this._size = 0;
// `_graphs` contains subject, predicate, and object indexes per graph
this._graphs = Object.create(null);
export class N3EntityIndex {
constructor(options = {}) {
this._id = 1;
// `_ids` maps entities such as `http://xmlns.com/foaf/0.1/name` to numbers,
// saving memory by using only numbers as keys in `_graphs`
this._id = 0;
this._ids = Object.create(null);
this._entities = Object.create(null); // inverse of `_ids`
this._ids[''] = 1;
// inverse of `_ids`
this._entities = Object.create(null);
this._entities[1] = '';
// `_blankNodeIndex` is the index of the last automatically named blank node
this._blankNodeIndex = 0;

// Shift parameters if `quads` is not given
if (!options && quads && !quads[0])
options = quads, quads = null;
options = options || {};
this._factory = options.factory || N3DataFactory;

// Add quads if passed
if (quads)
this.addQuads(quads);
}

_termFromId(id, factory) {
_termFromId(id) {
if (id[0] === '.') {
const entities = this._entities;
const terms = id.split('.');
Expand All @@ -42,7 +31,7 @@ export default class N3Store {
);
return q;
}
return termFromId(id, factory);
return termFromId(id, this._factory);
}

_termToNumericId(term) {
Expand All @@ -69,6 +58,50 @@ export default class N3Store {
return this._ids[str] || (this._ids[this._entities[++this._id] = str] = this._id);
}

createBlankNode(suggestedName) {
let name, index;
// Generate a name based on the suggested name
if (suggestedName) {
name = suggestedName = `_:${suggestedName}`, index = 1;
while (this._ids[name])
name = suggestedName + index++;
}
// Generate a generic blank node name
else {
do { name = `_:b${this._blankNodeIndex++}`; }
while (this._ids[name]);
}
// Add the blank node to the entities, avoiding the generation of duplicates
this._ids[name] = ++this._id;
this._entities[this._id] = name;
return this._factory.blankNode(name.substr(2));
}
}

// ## Constructor
export default class N3Store {
constructor(quads, options) {
// The number of quads is initially zero
this._size = 0;
// `_graphs` contains subject, predicate, and object indexes per graph
this._graphs = Object.create(null);

// Shift parameters if `quads` is not given
if (!options && quads && !quads[0])
options = quads, quads = null;
options = options || {};
this._factory = options.factory || N3DataFactory;
this._entityIndex = options.entityIndex || new N3EntityIndex({ factory: this._factory });
this._entities = this._entityIndex._entities;
this._termFromId = this._entityIndex._termFromId.bind(this._entityIndex);
this._termToNumericId = this._entityIndex._termToNumericId.bind(this._entityIndex);
this._termToNewNumericId = this._entityIndex._termToNewNumericId.bind(this._entityIndex);

// Add quads if passed
if (quads)
this.addQuads(quads);
}

// ## Public properties

// ### `size` returns the number of quads in the store
Expand Down Expand Up @@ -127,24 +160,24 @@ export default class N3Store {
*_findInIndex(index0, key0, key1, key2, name0, name1, name2, graphId) {
let tmp, index1, index2;
const entityKeys = this._entities;
const graph = this._termFromId(graphId, this._factory);
const graph = this._termFromId(entityKeys[graphId]);
const parts = { subject: null, predicate: null, object: null };

// If a key is specified, use only that part of index 0.
if (key0) (tmp = index0, index0 = {})[key0] = tmp[key0];
for (const value0 in index0) {
if (index1 = index0[value0]) {
parts[name0] = this._termFromId(entityKeys[value0], this._factory);
parts[name0] = this._termFromId(entityKeys[value0]);
// If a key is specified, use only that part of index 1.
if (key1) (tmp = index1, index1 = {})[key1] = tmp[key1];
for (const value1 in index1) {
if (index2 = index1[value1]) {
parts[name1] = this._termFromId(entityKeys[value1], this._factory);
parts[name1] = this._termFromId(entityKeys[value1]);
// If a key is specified, use only that part of index 2, if it exists.
const values = key2 ? (key2 in index2 ? [key2] : []) : Object.keys(index2);
// Create quads for all items found in index 2.
for (let l = 0; l < values.length; l++) {
parts[name2] = this._termFromId(entityKeys[values[l]], this._factory);
parts[name2] = this._termFromId(entityKeys[values[l]]);
yield this._factory.quad(parts.subject, parts.predicate, parts.object, graph);
}
}
Expand Down Expand Up @@ -215,11 +248,8 @@ export default class N3Store {
// ### `_getGraphs` returns an array with the given graph,
// or all graphs if the argument is null or undefined.
_getGraphs(graph) {
if (!isString(graph))
return this._graphs;
const graphs = {};
graphs[graph] = this._graphs[graph];
return graphs;
graph = graph === '' ? 1 : (graph && (this._termToNumericId(graph) || -1));
return typeof graph !== 'number' ? this._graphs : { [graph]: this._graphs[graph] };
}

// ### `_uniqueEntities` returns a function that accepts an entity ID
Expand Down Expand Up @@ -253,7 +283,7 @@ export default class N3Store {
predicate = subject.predicate, subject = subject.subject;

// Convert terms to internal string representation
graph = termToId(graph);
graph = graph ? this._termToNewNumericId(graph) : 1;

// Find the graph that will contain the triple
let graphItem = this._graphs[graph];
Expand Down Expand Up @@ -313,9 +343,8 @@ export default class N3Store {
if (!predicate)
graph = subject.graph, object = subject.object,
predicate = subject.predicate, subject = subject.subject;

// Convert terms to internal string representation
graph = termToId(graph);
graph = graph ? this._termToNumericId(graph) : 1;

// Find internal identifiers for all components
// and verify the quad exists.
Expand Down Expand Up @@ -380,9 +409,6 @@ export default class N3Store {
// ### `readQuads` returns an generator of quads matching a pattern.
// Setting any field to `undefined` or `null` indicates a wildcard.
*readQuads(subject, predicate, object, graph) {
// Convert terms to internal string representation
graph = graph && termToId(graph);

const graphs = this._getGraphs(graph);
let content, subjectId, predicateId, objectId;

Expand Down Expand Up @@ -429,15 +455,12 @@ export default class N3Store {
// Setting any field to `undefined` or `null` indicates a wildcard.
// For backwards compatibility, the object return also implements the Readable stream interface.
match(subject, predicate, object, graph) {
return new DatasetCoreAndReadableStream(this, subject, predicate, object, graph);
return new DatasetCoreAndReadableStream(this, subject, predicate, object, graph, { entityIndex: this._entityIndex });
}

// ### `countQuads` returns the number of quads matching a pattern.
// Setting any field to `undefined` or `null` indicates a wildcard.
countQuads(subject, predicate, object, graph) {
// Convert terms to internal string representation
graph = graph && termToId(graph);

const graphs = this._getGraphs(graph);
let count = 0, content, subjectId, predicateId, objectId;

Expand Down Expand Up @@ -514,9 +537,6 @@ export default class N3Store {
// ### `forSubjects` executes the callback on all subjects that match the pattern.
// Setting any field to `undefined` or `null` indicates a wildcard.
forSubjects(callback, predicate, object, graph) {
// Convert terms to internal string representation
graph = graph && termToId(graph);

const graphs = this._getGraphs(graph);
let content, predicateId, objectId;
callback = this._uniqueEntities(callback);
Expand Down Expand Up @@ -559,9 +579,6 @@ export default class N3Store {
// ### `forPredicates` executes the callback on all predicates that match the pattern.
// Setting any field to `undefined` or `null` indicates a wildcard.
forPredicates(callback, subject, object, graph) {
// Convert terms to internal string representation
graph = graph && termToId(graph);

const graphs = this._getGraphs(graph);
let content, subjectId, objectId;
callback = this._uniqueEntities(callback);
Expand Down Expand Up @@ -604,9 +621,6 @@ export default class N3Store {
// ### `forObjects` executes the callback on all objects that match the pattern.
// Setting any field to `undefined` or `null` indicates a wildcard.
forObjects(callback, subject, predicate, graph) {
// Convert terms to internal string representation
graph = graph && termToId(graph);

const graphs = this._getGraphs(graph);
let content, subjectId, predicateId;
callback = this._uniqueEntities(callback);
Expand Down Expand Up @@ -653,28 +667,13 @@ export default class N3Store {
this.some(quad => {
callback(quad.graph);
return true; // Halt iteration of some()
}, subject, predicate, object, graph);
}, subject, predicate, object, this._termFromId(this._entities[graph]));
}
}

// ### `createBlankNode` creates a new blank node, returning its name
createBlankNode(suggestedName) {
let name, index;
// Generate a name based on the suggested name
if (suggestedName) {
name = suggestedName = `_:${suggestedName}`, index = 1;
while (this._ids[name])
name = suggestedName + index++;
}
// Generate a generic blank node name
else {
do { name = `_:b${this._blankNodeIndex++}`; }
while (this._ids[name]);
}
// Add the blank node to the entities, avoiding the generation of duplicates
this._ids[name] = ++this._id;
this._entities[this._id] = name;
return this._factory.blankNode(name.substr(2));
return this._entityIndex.createBlankNode(suggestedName);
}

// ### `extractLists` finds and removes all list triples
Expand Down Expand Up @@ -783,24 +782,19 @@ export default class N3Store {
}
}

// Determines whether the argument is a string
function isString(s) {
return typeof s === 'string' || s instanceof String;
}

/**
* A class that implements both DatasetCore and Readable.
*/
class DatasetCoreAndReadableStream extends Readable {
constructor(n3Store, subject, predicate, object, graph) {
constructor(n3Store, subject, predicate, object, graph, options) {
super({ objectMode: true });
Object.assign(this, { n3Store, subject, predicate, object, graph });
Object.assign(this, { n3Store, subject, predicate, object, graph, options });
}

get filtered() {
if (!this._filtered) {
const { n3Store, graph, object, predicate, subject } = this;
const newStore = this._filtered = new N3Store({ factory: n3Store._factory });
const newStore = this._filtered = new N3Store({ factory: n3Store._factory, entityIndex: this.options.entityIndex });
for (const quad of n3Store.readQuads(subject, predicate, object, graph))
newStore.addQuad(quad);
}
Expand Down Expand Up @@ -830,7 +824,7 @@ class DatasetCoreAndReadableStream extends Readable {
}

match(subject, predicate, object, graph) {
return new DatasetCoreAndReadableStream(this.filtered, subject, predicate, object, graph);
return new DatasetCoreAndReadableStream(this.filtered, subject, predicate, object, graph, this.options);
}

*[Symbol.iterator]() {
Expand Down
3 changes: 2 additions & 1 deletion src/index.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import Lexer from './N3Lexer';
import Parser from './N3Parser';
import Writer from './N3Writer';
import Store from './N3Store';
import Store, { N3EntityIndex as EntityIndex } from './N3Store';
import StreamParser from './N3StreamParser';
import StreamWriter from './N3StreamWriter';
import * as Util from './N3Util';
Expand All @@ -28,6 +28,7 @@ export {
Parser,
Writer,
Store,
EntityIndex,
StreamParser,
StreamWriter,
Util,
Expand Down
47 changes: 47 additions & 0 deletions test/N3Store-test.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import {
Store,
termFromId, termToId,
EntityIndex,
} from '../src';
import {
NamedNode,
Expand Down Expand Up @@ -2013,6 +2014,52 @@ describe('Store', () => {
});
});

describe('EntityIndex', () => {
let entityIndex;
beforeEach(() => {
entityIndex = new EntityIndex();
});

it('should be a constructor', () => {
expect(entityIndex).toBeInstanceOf(EntityIndex);
});

it('custom index should be used when instantiated with store', () => {
const index = {
'': 1,
's1': 2,
'p1': 3,
'o0': 4,
's2': 5,
'p2': 6,
'o2': 7,
};

const store = new Store([
new Quad(new NamedNode('s1'), new NamedNode('p1'), new NamedNode('o0')),
], { entityIndex });
expect(store.size).toBe(1);
expect(entityIndex._id).toEqual(4);

const substore = store.match();
substore.add(new Quad(new NamedNode('s2'), new NamedNode('p2'), new NamedNode('o2')));
expect(store.size).toBe(1);
expect(substore.size).toBe(2);
expect(entityIndex._id).toEqual(7);
expect(entityIndex._ids).toEqual(index);

const store2 = new Store([
new Quad(new NamedNode('s1'), new NamedNode('p1'), new NamedNode('o5')),
], { entityIndex });
expect(store2.size).toBe(1);
expect(entityIndex._id).toEqual(8);
expect(entityIndex._ids).toEqual({
...index,
o5: 8,
});
});
});

function alwaysTrue() { return true; }
function alwaysFalse() { return false; }

Expand Down

0 comments on commit 73a9ef1

Please sign in to comment.