Skip to content

Commit

Permalink
feat: add callback and event for comments (#423)
Browse files Browse the repository at this point in the history
* Added tests and support for a comments callback in the N3Parser

* Added support and test for emitting comments in the N3StreamParser

* Added setter for enabling comments in lexer

* Perf: no token check on comment when not in comment mode

* Support new backward compatible function signature for parse()

* keeping the {…} off for single lines

* Make emitting comments from N3StreamParser optional through a constructor option (off by default)

* Make comments a public var in N3Lexer

* Update test/N3Parser-test.js

Co-authored-by: Ruben Verborgh <[email protected]>

* Adding tests to fix coverage

* Updated documentation in the README.md

* chore: revert prefix arg drop

* chore: cleanup documentation

* chore: make languaage precise

---------

Co-authored-by: Jesse Wright <[email protected]>
Co-authored-by: Ruben Verborgh <[email protected]>
  • Loading branch information
3 people authored Aug 26, 2024
1 parent 330dc33 commit 881d0aa
Show file tree
Hide file tree
Showing 6 changed files with 293 additions and 23 deletions.
40 changes: 33 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,12 +83,15 @@ we assume that a quad is simply a triple in a named or default graph.

`N3.Parser` transforms Turtle, TriG, N-Triples, or N-Quads document into quads through a callback:
```JavaScript
const tomAndJerry = `PREFIX c: <http://example.org/cartoons#>
# Tom is a cat
c:Tom a c:Cat.
c:Jerry a c:Mouse;
c:smarterThan c:Tom.`

const parser = new N3.Parser();
parser.parse(
`PREFIX c: <http://example.org/cartoons#>
c:Tom a c:Cat.
c:Jerry a c:Mouse;
c:smarterThan c:Tom.`,

parser.parse(tomAndJerry,
(error, quad, prefixes) => {
if (quad)
console.log(quad);
Expand All @@ -101,9 +104,30 @@ If there are no more quads,
the callback is invoked one last time with `null` for `quad`
and a hash of prefixes as third argument.
<br>
Pass a second callback to `parse` to retrieve prefixes as they are read.

Alternatively, an object can be supplied, where `onQuad`, `onPrefix` and `onComment` are used to listen for `quads`, `prefixes` and `comments` as follows:
```JavaScript
const parser = new N3.Parser();

parser.parse(tomAndJerry, {
// onQuad (required) accepts a listener of type (quad: RDF.Quad) => void
onQuad: (err, quad) => { console.log(quad); },
// onPrefix (optional) accepts a listener of type (prefix: string, iri: NamedNode) => void
onPrefix: (prefix, iri) => { console.log(prefix, 'expands to', iri.value); },
// onComment (optional) accepts a listener of type (comment: string) => void
onComment: (comment) => { console.log('#', comment); },
});
```

<br>
If no callbacks are provided, parsing happens synchronously.
If no callbacks are provided, parsing happens synchronously returning an array of quads.

```JavaScript
const parser = new N3.Parser();

// An array of resultant Quads
const quadArray = parser.parse(tomAndJerry);
```

By default, `N3.Parser` parses a permissive superset of Turtle, TriG, N-Triples, and N-Quads.
<br>
Expand Down Expand Up @@ -169,6 +193,8 @@ function SlowConsumer() {

A dedicated `prefix` event signals every prefix with `prefix` and `term` arguments.

A dedicated `comment` event can be enabled by setting `comments: true` in the N3.StreamParser constructor.

## Writing

### From quads to a string
Expand Down
6 changes: 3 additions & 3 deletions src/N3Lexer.js
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ export default class N3Lexer {
this._n3Mode = options.n3 !== false;
}
// Don't output comment tokens by default
this._comments = !!options.comments;
this.comments = !!options.comments;
// Cache the last tested closing position of long literals
this._literalClosingPos = 0;
}
Expand All @@ -85,7 +85,7 @@ export default class N3Lexer {
let whiteSpaceMatch, comment;
while (whiteSpaceMatch = this._newline.exec(input)) {
// Try to find a comment
if (this._comments && (comment = this._comment.exec(whiteSpaceMatch[0])))
if (this.comments && (comment = this._comment.exec(whiteSpaceMatch[0])))
emitToken('comment', comment[1], '', this._line, whiteSpaceMatch[0].length);
// Advance the input
input = input.substr(whiteSpaceMatch[0].length, input.length);
Expand All @@ -101,7 +101,7 @@ export default class N3Lexer {
// If the input is finished, emit EOF
if (inputFinished) {
// Try to find a final comment
if (this._comments && (comment = this._comment.exec(input)))
if (this.comments && (comment = this._comment.exec(input)))
emitToken('comment', comment[1], '', this._line, input.length);
input = null;
emitToken('eof', '', '', this._line, 0);
Expand Down
45 changes: 38 additions & 7 deletions src/N3Parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -1010,21 +1010,33 @@ export default class N3Parser {

// ## Public methods

// ### `parse` parses the N3 input and emits each parsed quad through the callback
// ### `parse` parses the N3 input and emits each parsed quad through the onQuad callback.
parse(input, quadCallback, prefixCallback) {
// The second parameter accepts an object { onQuad: ..., onPrefix: ..., onComment: ...}
// As a second and third parameter it still accepts a separate quadCallback and prefixCallback for backward compatibility as well
let onQuad, onPrefix, onComment;
if (quadCallback && (quadCallback.onQuad || quadCallback.onPrefix || quadCallback.onComment)) {
onQuad = quadCallback.onQuad;
onPrefix = quadCallback.onPrefix;
onComment = quadCallback.onComment;
}
else {
onQuad = quadCallback;
onPrefix = prefixCallback;
}
// The read callback is the next function to be executed when a token arrives.
// We start reading in the top context.
this._readCallback = this._readInTopContext;
this._sparqlStyle = false;
this._prefixes = Object.create(null);
this._prefixes._ = this._blankNodePrefix ? this._blankNodePrefix.substr(2)
: `b${blankNodePrefix++}_`;
this._prefixCallback = prefixCallback || noop;
this._prefixCallback = onPrefix || noop;
this._inversePredicate = false;
this._quantified = Object.create(null);

// Parse synchronously if no quad callback is given
if (!quadCallback) {
if (!onQuad) {
const quads = [];
let error;
this._callback = (e, t) => { e ? (error = e) : t && quads.push(t); };
Expand All @@ -1035,14 +1047,33 @@ export default class N3Parser {
return quads;
}

// Parse asynchronously otherwise, executing the read callback when a token arrives
this._callback = quadCallback;
this._lexer.tokenize(input, (error, token) => {
let processNextToken = (error, token) => {
if (error !== null)
this._callback(error), this._callback = noop;
else if (this._readCallback)
this._readCallback = this._readCallback(token);
});
};

// Enable checking for comments on every token when a commentCallback has been set
if (onComment) {
// Enable the lexer to return comments as tokens first (disabled by default)
this._lexer.comments = true;
// Patch the processNextToken function
processNextToken = (error, token) => {
if (error !== null)
this._callback(error), this._callback = noop;
else if (this._readCallback) {
if (token.type === 'comment')
onComment(token.value);
else
this._readCallback = this._readCallback(token);
}
};
}

// Parse asynchronously otherwise, executing the read callback when a token arrives
this._callback = onQuad;
this._lexer.tokenize(input, processNextToken);
}
}

Expand Down
18 changes: 12 additions & 6 deletions src/N3StreamParser.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,19 +11,25 @@ export default class N3StreamParser extends Transform {
// Set up parser with dummy stream to obtain `data` and `end` callbacks
const parser = new N3Parser(options);
let onData, onEnd;

const callbacks = {
// Handle quads by pushing them down the pipeline
onQuad: (error, quad) => { error && this.emit('error', error) || quad && this.push(quad); },
// Emit prefixes through the `prefix` event
onPrefix: (prefix, uri) => { this.emit('prefix', prefix, uri); },
};

if (options && options.comments)
callbacks.onComment = comment => { this.emit('comment', comment); };

parser.parse({
on: (event, callback) => {
switch (event) {
case 'data': onData = callback; break;
case 'end': onEnd = callback; break;
}
},
},
// Handle quads by pushing them down the pipeline
(error, quad) => { error && this.emit('error', error) || quad && this.push(quad); },
// Emit prefixes through the `prefix` event
(prefix, uri) => { this.emit('prefix', prefix, uri); },
);
}, callbacks);

// Implement Transform methods through parser callbacks
this._transform = (chunk, encoding, done) => { onData(chunk); done(); };
Expand Down
117 changes: 117 additions & 0 deletions test/N3Parser-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,28 @@ describe('Parser', () => {
['g', 'h', 'i']),
);

it(
'should parse three triples with comments if no comment callback is set',
shouldParse('<a> <b> #comment2\n <c> . \n<d> <e> <f>.\n<g> <h> <i>.',
['a', 'b', 'c'],
['d', 'e', 'f'],
['g', 'h', 'i']),
);

it(
'should parse three triples with comments when comment callback is set',
shouldParseWithCommentsEnabled('<a> <b> #comment2\n <c> . \n<d> <e> <f>.\n<g> <h> <i>.',
['a', 'b', 'c'],
['d', 'e', 'f'],
['g', 'h', 'i']),
);

it(
'should callback comments when a comment callback is set',
shouldCallbackComments('#comment1\n<a> <b> #comment2\n <c> . \n<d> <e> <f>.\n<g> <h> <i>.',
'comment1', 'comment2'),
);

it('should parse a triple with a literal', shouldParse('<a> <b> "string".',
['a', 'b', '"string"']));

Expand Down Expand Up @@ -203,6 +225,12 @@ describe('Parser', () => {
'Undefined prefix "d:" on line 1.'),
);

it(
'should not parse undefined prefix in datatype with comments enabled',
shouldNotParseWithComments('#comment\n<a> <b> "c"^^d:e ',
'Undefined prefix "d:" on line 2.'),
);

it(
'should parse triples with SPARQL prefixes',
shouldParse('PREFIX : <#>\n' +
Expand Down Expand Up @@ -1601,6 +1629,12 @@ describe('Parser', () => {
'Unexpected literal on line 1.'),
);

it(
'should not parse a literal as subject',
shouldNotParseWithComments(parser, '1 <a> <b>.',
'Unexpected literal on line 1.'),
);

it(
'should not parse RDF-star in the subject position',
shouldNotParse(parser, '<<<a> <b> <c>>> <a> <b> .',
Expand Down Expand Up @@ -1632,6 +1666,12 @@ describe('Parser', () => {
shouldNotParse(parser, '<<_:a <http://ex.org/b> _:b <http://ex.org/b>>> <http://ex.org/b> "c" .',
'Expected >> to follow "_:b0_b" on line 1.'),
);

it(
'should not parse nested quads with comments',
shouldNotParseWithComments(parser, '#comment1\n<<_:a <http://ex.org/b> _:b <http://ex.org/b>>> <http://ex.org/b> "c" .',
'Expected >> to follow "_:b0_b" on line 2.'),
);
});

describe('A Parser instance for the TriG format', () => {
Expand Down Expand Up @@ -3038,6 +3078,57 @@ function shouldParse(parser, input) {
};
}

function shouldParseWithCommentsEnabled(parser, input) {
const expected = Array.prototype.slice.call(arguments, 1);
// Shift parameters as necessary
if (parser.call)
expected.shift();
else
input = parser, parser = Parser;

return function (done) {
const results = [];
const items = expected.map(mapToQuad);
new parser({ baseIRI: BASE_IRI }).parse(input, {
onQuad: (error, triple) => {
expect(error).toBeFalsy();
if (triple)
results.push(triple);
else
expect(toSortedJSON(results)).toBe(toSortedJSON(items)), done();
},
onComment: comment => {
expect(comment).toBeDefined();
},
});
};
}


function shouldCallbackComments(parser, input) {
const expected = Array.prototype.slice.call(arguments, 1);
// Shift parameters as necessary
if (parser.call)
expected.shift();
else
input = parser, parser = Parser;

return function (done) {
const items = expected;
const comments = [];
new parser({ baseIRI: BASE_IRI }).parse(input, {
onQuad: (error, triple) => {
if (!triple) {
// Marks the end
expect(JSON.stringify(comments)).toBe(JSON.stringify(items));
done();
}
},
onComment: comment => { comments.push(comment); },
});
};
}

function mapToQuad(item) {
item = item.map(t => {
// don't touch if it's already an object
Expand Down Expand Up @@ -3082,6 +3173,32 @@ function shouldNotParse(parser, input, expectedError, expectedContext) {
};
}

function shouldNotParseWithComments(parser, input, expectedError, expectedContext) {
// Shift parameters if necessary
if (!parser.call)
expectedContext = expectedError, expectedError = input, input = parser, parser = Parser;

return function (done) {
new parser({ baseIRI: BASE_IRI }).parse(input, {
onQuad: (error, triple) => {
if (error) {
expect(triple).toBeFalsy();
expect(error).toBeInstanceOf(Error);
expect(error.message).toEqual(expectedError);
if (expectedContext) expect(error.context).toEqual(expectedContext);
done();
}
else if (!triple)
done(new Error(`Expected error ${expectedError}`));
},
// Enables comment mode
onComment: comment => {
expect(comment).toBeDefined();
},
});
};
}

function itShouldResolve(baseIRI, relativeIri, expected) {
let result;
describe(`resolving <${relativeIri}> against <${baseIRI}>`, () => {
Expand Down
Loading

0 comments on commit 881d0aa

Please sign in to comment.