diff --git a/src/N3Lexer.js b/src/N3Lexer.js index eeb36bdd..0d3e84cd 100644 --- a/src/N3Lexer.js +++ b/src/N3Lexer.js @@ -300,13 +300,35 @@ export default class N3Lexer { case ']': case '(': case ')': - case '{': case '}': + matchLength = 1; + type = firstChar; + break; + case '{': if (!this._lineMode) { + // We need at least 2 tokens lookahead to distinguish "{|" and "{ " + if (input.length < 2) + break; + + // Try to find a quoted triple annotation start + if (input.length > 1 && input[1] === '|') { + type = '{|', matchLength = 2; + break; + } + matchLength = 1; type = firstChar; } break; + case '|': + // We need at least 2 tokens lookahead to distinguish "|}" and "|" + if (input.length < 2) + break; + // Try to find a quoted triple annotation end + if (input[0] === '|' && input.length > 1 && input[1] === '}') { + type = '|}', matchLength = 2; + break; + } default: inconclusive = true; diff --git a/src/N3Parser.js b/src/N3Parser.js index ea632932..5ada54e0 100644 --- a/src/N3Parser.js +++ b/src/N3Parser.js @@ -614,6 +614,23 @@ export default class N3Parser { case ',': next = this._readObject; break; + // {| means that the current triple is annotated with predicate-object pairs. + case '{|': + if (!this._supportsRDFStar) + return this._error('Unexpected RDF* syntax', token); + + // Continue using the last triple as quoted triple subject for the predicate-object pairs. + const predicate = this._predicate, object = this._object; + this._subject = this._quad(subject, predicate, object, this.DEFAULTGRAPH); + next = this._readPredicate; + break; + // |} means that the current quoted triple in annotation syntax is finalized. + case '|}': + if (this._subject.termType !== 'Quad') + return this._error('Unexpected asserted triple closing', token); + this._subject = null; + next = this._readPunctuation; + break; default: // An entity means this is a quad (only allowed if not already inside a graph) if (this._supportsQuads && this._graph === null && (graph = this._readEntity(token)) !== undefined) { diff --git a/test/N3Lexer-test.js b/test/N3Lexer-test.js index 03902d3f..d9030714 100644 --- a/test/N3Lexer-test.js +++ b/test/N3Lexer-test.js @@ -1090,6 +1090,49 @@ describe('Lexer', () => { { type: '.', line: 1 }, { type: 'eof', line: 1 })); + it('should tokenize a quoted triple annotation start', + shouldTokenize('{|', + { type: '{|', line: 1 }, + { type: 'eof', line: 1 })); + + it('should tokenize a split quoted triple annotation start', + shouldTokenize(streamOf('{', '|'), + { type: '{|', line: 1 }, + { type: 'eof', line: 1 })); + + it('should tokenize a quoted triple annotation end', + shouldTokenize('|}', + { type: '|}', line: 1 }, + { type: 'eof', line: 1 })); + + it('should tokenize a split quoted triple annotation end', + shouldTokenize(streamOf('|', '}'), + { type: '|}', line: 1 }, + { type: 'eof', line: 1 })); + + it('should tokenize an empty quoted triple annotation', + shouldTokenize('{| |}', + { type: '{|', line: 1 }, + { type: '|}', line: 1 }, + { type: 'eof', line: 1 })); + + it('should tokenize a non-empty quoted triple annotation', + shouldTokenize('{| \n\t |}.', + { type: '{|', line: 1 }, + { type: 'IRI', value: 'http://ex.org/?bla#bar', line: 1 }, + { type: 'IRI', value: 'http://ex.org/?bla#boo', line: 2 }, + { type: '|}', line: 2 }, + { type: '.', line: 2 }, + { type: 'eof', line: 2 })); + + it('should not tokenize an incomplete closing triple annotation', + shouldNotTokenize('{| |', + 'Unexpected "|" on line 1.')); + + it('should not tokenize an invalid closing triple annotation', + shouldNotTokenize('{| ||', + 'Unexpected "||" on line 1.')); + it('returns start and end index for every token', () => { const tokens = new Lexer().tokenize(' "lit"@EN.'); tokens.should.deep.equal([ diff --git a/test/N3Parser-test.js b/test/N3Parser-test.js index a08c1327..16ddccb1 100644 --- a/test/N3Parser-test.js +++ b/test/N3Parser-test.js @@ -746,7 +746,7 @@ describe('Parser', () => { it('should not parse a single opening brace', shouldNotParse('{', - 'Expected entity but got eof on line 1.')); + 'Unexpected "{" on line 1.')); it('should not parse a superfluous closing brace ', shouldNotParse('{}}', @@ -1044,6 +1044,46 @@ describe('Parser', () => { shouldParse(' .\n<< >> .', ['a', 'b', 'c', 'g'], [['a', 'b', 'c'], 'd', 'e'])); + + it('should parse an RDF* triple using annotation syntax with one predicate-object', + shouldParse(' {| |}.', + ['a', 'b', 'c'], [['a', 'b', 'c'], 'b', 'c'])); + + it('should parse an RDF* triple using annotation syntax with two predicate-objects', + shouldParse(' {| ; |}.', + ['a', 'b', 'c'], [['a', 'b', 'c'], 'b1', 'c1'], [['a', 'b', 'c'], 'b2', 'c2'])); + + it('should parse an RDF* triple using annotation syntax with one predicate-object followed by regular triples', + shouldParse(' {| |}.\n .', + ['a', 'b', 'c'], [['a', 'b', 'c'], 'b', 'c'], ['a2', 'b2', 'c2'])); + + it('should not parse an RDF* triple using annotation syntax with zero predicate-objects', + shouldNotParse(' {| |}', + 'Expected entity but got |} on line 1.')); + + it('should not parse an RDF* triple using an incomplete annotation syntax', + shouldNotParse(' {| |}', + 'Expected entity but got |} on line 1.')); + + it('should not parse an RDF* triple using an incomplete annotation syntax after a semicolon', + shouldNotParse(' {| ; |}', + 'Expected entity but got |} on line 1.')); + + it('should not parse an RDF* triple using an incomplete annotation syntax after a semicolon and entity', + shouldNotParse(' {| ; |}', + 'Expected entity but got |} on line 1.')); + + it('should not parse an RDF* triple using an incomplete annotation syntax that misses |}', + shouldNotParse(' {| ', + 'Expected entity but got eof on line 1.')); + + it('should not parse an RDF* triple using an incomplete annotation syntax that misses |} and starts a new subject', + shouldNotParse(' {| . ', + 'Expected entity but got eof on line 1.')); + + it('should not parse an out of place |}', + shouldNotParse(' |}', + 'Unexpected asserted triple closing on line 1.')); }); describe('An Parser instance without document IRI', () => { @@ -1227,6 +1267,10 @@ describe('Parser', () => { it('should not parse RDF* in the object position', shouldNotParse(parser, ' < >>.', 'Unexpected RDF* syntax on line 1.')); + + it('should not parse RDF* with annotated syntax', + shouldNotParse(parser, ' {| |}.', + 'Unexpected RDF* syntax on line 1.')); }); describe('A Parser instance for the TurtleStar format', () => { @@ -1288,6 +1332,10 @@ describe('Parser', () => { it('should not parse RDF* in the object position', shouldNotParse(parser, ' << >>.', 'Unexpected RDF* syntax on line 1.')); + + it('should not parse RDF* with annotated syntax', + shouldNotParse(parser, ' {| |}.', + 'Unexpected RDF* syntax on line 1.')); }); describe('A Parser instance for the TriGStar format', () => { @@ -1375,6 +1423,10 @@ describe('Parser', () => { it('should not parse nested quads', shouldNotParse(parser, '<<_:a _:b >> "c" .', 'Expected >> to follow "_:b0_b" on line 1.')); + + it('should not parse annotated triples', + shouldNotParse(parser, '_:a _:c {| "c1" |} .', + 'Unexpected "{|" on line 1.')); }); describe('A Parser instance for the N-Quads format', () => { @@ -1430,6 +1482,10 @@ describe('Parser', () => { it('should parse RDF*', shouldParse(parser, '<<_:a _:c>> _:c .', [['_:b0_a', 'b', '_:b0_c'], 'a', '_:b0_c'])); + + it('should not parse annotated triples', + shouldNotParse(parser, '_:a _:c {| "c1" |} .', + 'Unexpected "{|" on line 1.')); }); describe('A Parser instance for the N3 format', () => { @@ -1777,6 +1833,10 @@ describe('Parser', () => { it('should not parse RDF* in the object position', shouldNotParse(parser, ' << >>.', 'Unexpected RDF* syntax on line 1.')); + + it('should not parse RDF* with annotated syntax', + shouldNotParse(parser, ' {| |}.', + 'Unexpected RDF* syntax on line 1.')); }); describe('A Parser instance for the N3Star format', () => {