Skip to content
This repository has been archived by the owner on Dec 15, 2022. It is now read-only.

Commit

Permalink
Merge pull request #160 from atom/wl-entities-take-two
Browse files Browse the repository at this point in the history
Improve entity scopes, take two
  • Loading branch information
Wliu authored Sep 20, 2017
2 parents 457b294 + adef4ad commit 9fde136
Show file tree
Hide file tree
Showing 2 changed files with 124 additions and 16 deletions.
62 changes: 51 additions & 11 deletions grammars/html.cson
Original file line number Diff line number Diff line change
Expand Up @@ -398,7 +398,7 @@
]
}
{
'include': '#entities'
'include': '#text-entities'
}
{
'match': '<>'
Expand All @@ -415,21 +415,61 @@
'include': '#python'
}
]
'entities':
'text-entities':
# https://www.w3.org/TR/html51/syntax.html#consume-a-character-reference
'patterns': [
{
'match': '(&)([a-zA-Z0-9]+|#[0-9]+|#x[0-9a-fA-F]+)(;)'
'captures':
'begin': '(&)([a-zA-Z0-9]+|#\\d+|#[xX][0-9a-fA-F]+)'
'beginCaptures':
'1':
'name': 'punctuation.definition.entity.begin.html'
'2':
'name': 'entity.name.entity.other.html'
'3':
'end': ';'
'endCaptures':
'0':
'name': 'punctuation.definition.entity.end.html'
'name': 'constant.character.entity.html'
}
{
'match': '&(?!\\s|<|&)'
'name': 'invalid.illegal.bad-ampersand.html'
}
]
'attribute-entities':
# https://www.w3.org/TR/html51/syntax.html#consume-a-character-reference
# Because it would be infeasible to include the entire list of allowed entities,
# make sure that an equals sign or the end of a string does not follow a potential reference.
'patterns': [
{
'begin': '(&)(#\\d+|#[xX][0-9a-fA-F]+)'
'beginCaptures':
'1':
'name': 'punctuation.definition.entity.begin.html'
'2':
'name': 'entity.name.entity.other.html'
'end': ';'
'endCaptures':
'0':
'name': 'punctuation.definition.entity.end.html'
'name': 'constant.character.entity.html'
}
{
'begin': '(&)([a-zA-Z0-9]++)(?!["\'=])'
'beginCaptures':
'1':
'name': 'punctuation.definition.entity.begin.html'
'2':
'name': 'entity.name.entity.other.html'
'end': ';'
'endCaptures':
'0':
'name': 'punctuation.definition.entity.end.html'
'name': 'constant.character.entity.html'
}
{
'match': '&'
# In attributes, potential references that end with an equals sign are fine
'match': '&(?!\\s|<|&|[a-zA-Z0-9]+=)'
'name': 'invalid.illegal.bad-ampersand.html'
}
]
Expand Down Expand Up @@ -480,7 +520,7 @@
'include': '#embedded-code'
}
{
'include': '#entities'
'include': '#attribute-entities'
}
]
'string-single-quoted':
Expand All @@ -498,7 +538,7 @@
'include': '#embedded-code'
}
{
'include': '#entities'
'include': '#attribute-entities'
}
]
'tag-generic-attribute':
Expand Down Expand Up @@ -559,7 +599,7 @@
'include': '#embedded-code'
}
{
'include': '#entities'
'include': '#attribute-entities'
}
]
}
Expand All @@ -579,7 +619,7 @@
'include': '#embedded-code'
}
{
'include': '#entities'
'include': '#attribute-entities'
}
]
}
Expand Down Expand Up @@ -608,7 +648,7 @@
'unquoted-attribute':
'patterns': [
{
'include': '#entities'
'include': '#attribute-entities'
}
{
# https://www.w3.org/TR/html51/syntax.html#attribute-value-unquoted-state
Expand Down
78 changes: 73 additions & 5 deletions spec/html-spec.coffee
Original file line number Diff line number Diff line change
Expand Up @@ -376,14 +376,82 @@ describe 'HTML grammar', ->
expect(lines[2][1]).toEqual value: 'disabled', scopes: ['text.html.basic', 'meta.tag.inline.any.html', 'meta.attribute-without-value.html', 'entity.other.attribute-name.html']
expect(lines[2][2]).toEqual value: '>', scopes: ['text.html.basic', 'meta.tag.inline.any.html', 'punctuation.definition.tag.end.html']

describe "entities", ->
describe "entities in text", ->
it "tokenizes & and characters after it", ->
{tokens} = grammar.tokenizeLine '& &amp; &a'

expect(tokens[0]).toEqual value: '&', scopes: ['text.html.basic', 'invalid.illegal.bad-ampersand.html']
expect(tokens[3]).toEqual value: 'amp', scopes: ['text.html.basic', 'constant.character.entity.html', 'entity.name.entity.other.html']
expect(tokens[4]).toEqual value: ';', scopes: ['text.html.basic', 'constant.character.entity.html', 'punctuation.definition.entity.end.html']
expect(tokens[7]).toEqual value: 'a', scopes: ['text.html.basic']
expect(tokens[0]).toEqual value: '& ', scopes: ['text.html.basic']
expect(tokens[1]).toEqual value: '&', scopes: ['text.html.basic', 'constant.character.entity.html', 'punctuation.definition.entity.begin.html']
expect(tokens[2]).toEqual value: 'amp', scopes: ['text.html.basic', 'constant.character.entity.html', 'entity.name.entity.other.html']
expect(tokens[3]).toEqual value: ';', scopes: ['text.html.basic', 'constant.character.entity.html', 'punctuation.definition.entity.end.html']
expect(tokens[5]).toEqual value: '&', scopes: ['text.html.basic', 'constant.character.entity.html', 'punctuation.definition.entity.begin.html']
expect(tokens[6]).toEqual value: 'a', scopes: ['text.html.basic', 'constant.character.entity.html', 'entity.name.entity.other.html']

lines = grammar.tokenizeLines '&\n'
expect(lines[0][0]).toEqual value: '&', scopes: ['text.html.basic']

it "tokenizes hexadecimal and digit entities", ->
{tokens} = grammar.tokenizeLine '&#x00022; &#X00022; &#34;'

expect(tokens[0]).toEqual value: '&', scopes: ['text.html.basic', 'constant.character.entity.html', 'punctuation.definition.entity.begin.html']
expect(tokens[1]).toEqual value: '#x00022', scopes: ['text.html.basic', 'constant.character.entity.html', 'entity.name.entity.other.html']
expect(tokens[2]).toEqual value: ';', scopes: ['text.html.basic', 'constant.character.entity.html', 'punctuation.definition.entity.end.html']
expect(tokens[4]).toEqual value: '&', scopes: ['text.html.basic', 'constant.character.entity.html', 'punctuation.definition.entity.begin.html']
expect(tokens[5]).toEqual value: '#X00022', scopes: ['text.html.basic', 'constant.character.entity.html', 'entity.name.entity.other.html']
expect(tokens[6]).toEqual value: ';', scopes: ['text.html.basic', 'constant.character.entity.html', 'punctuation.definition.entity.end.html']
expect(tokens[8]).toEqual value: '&', scopes: ['text.html.basic', 'constant.character.entity.html', 'punctuation.definition.entity.begin.html']
expect(tokens[9]).toEqual value: '#34', scopes: ['text.html.basic', 'constant.character.entity.html', 'entity.name.entity.other.html']
expect(tokens[10]).toEqual value: ';', scopes: ['text.html.basic', 'constant.character.entity.html', 'punctuation.definition.entity.end.html']

it "tokenizes invalid ampersands", ->
{tokens} = grammar.tokenizeLine 'PSE&>'
expect(tokens[0]).toEqual value: 'PSE', scopes: ['text.html.basic']
expect(tokens[1]).toEqual value: '&', scopes: ['text.html.basic', 'invalid.illegal.bad-ampersand.html']
expect(tokens[2]).toEqual value: '>', scopes: ['text.html.basic']

{tokens} = grammar.tokenizeLine 'PSE&'
expect(tokens[0]).toEqual value: 'PSE&', scopes: ['text.html.basic']

{tokens} = grammar.tokenizeLine '&<'
expect(tokens[0]).toEqual value: '&<', scopes: ['text.html.basic']

{tokens} = grammar.tokenizeLine '& '
expect(tokens[0]).toEqual value: '& ', scopes: ['text.html.basic']

{tokens} = grammar.tokenizeLine '&'
expect(tokens[0]).toEqual value: '&', scopes: ['text.html.basic']

{tokens} = grammar.tokenizeLine '&&'
expect(tokens[0]).toEqual value: '&&', scopes: ['text.html.basic']

describe "entities in attributes", ->
it "tokenizes entities", ->
{tokens} = grammar.tokenizeLine '<a href="http://example.com?&amp;">'
expect(tokens[7]).toEqual value: '&', scopes: ['text.html.basic', 'meta.tag.inline.any.html', 'meta.attribute-with-value.html', 'string.quoted.double.html', 'constant.character.entity.html', 'punctuation.definition.entity.begin.html']
expect(tokens[8]).toEqual value: 'amp', scopes: ['text.html.basic', 'meta.tag.inline.any.html', 'meta.attribute-with-value.html', 'string.quoted.double.html', 'constant.character.entity.html', 'entity.name.entity.other.html']
expect(tokens[9]).toEqual value: ';', scopes: ['text.html.basic', 'meta.tag.inline.any.html', 'meta.attribute-with-value.html', 'string.quoted.double.html', 'constant.character.entity.html', 'punctuation.definition.entity.end.html']

it "does not tokenize query parameters as entities", ->
{tokens} = grammar.tokenizeLine '<a href="http://example.com?one=1&type=json&topic=css">'
expect(tokens[6]).toEqual value: 'http://example.com?one=1&type=json&topic=css', scopes: ['text.html.basic', 'meta.tag.inline.any.html', 'meta.attribute-with-value.html', 'string.quoted.double.html']

it "tokenizes invalid ampersands", ->
# Note: in order to replicate the following tests' behaviors, make sure you have language-hyperlink disabled
{tokens} = grammar.tokenizeLine '<a href="http://example.com?&">'
expect(tokens[7]).toEqual value: '&', scopes: ['text.html.basic', 'meta.tag.inline.any.html', 'meta.attribute-with-value.html', 'string.quoted.double.html', 'invalid.illegal.bad-ampersand.html']

{tokens} = grammar.tokenizeLine '<a href="http://example.com?&=">'
expect(tokens[7]).toEqual value: '&', scopes: ['text.html.basic', 'meta.tag.inline.any.html', 'meta.attribute-with-value.html', 'string.quoted.double.html', 'invalid.illegal.bad-ampersand.html']

{tokens} = grammar.tokenizeLine '<a href="http://example.com?& ">'
expect(tokens[6]).toEqual value: 'http://example.com?& ', scopes: ['text.html.basic', 'meta.tag.inline.any.html', 'meta.attribute-with-value.html', 'string.quoted.double.html']

lines = grammar.tokenizeLines '<a href="http://example.com?&\n">'
expect(lines[0][6]).toEqual value: 'http://example.com?&', scopes: ['text.html.basic', 'meta.tag.inline.any.html', 'meta.attribute-with-value.html', 'string.quoted.double.html']

{tokens} = grammar.tokenizeLine '<a href="http://example.com?&&">'
expect(tokens[6]).toEqual value: 'http://example.com?&', scopes: ['text.html.basic', 'meta.tag.inline.any.html', 'meta.attribute-with-value.html', 'string.quoted.double.html']
expect(tokens[7]).toEqual value: '&', scopes: ['text.html.basic', 'meta.tag.inline.any.html', 'meta.attribute-with-value.html', 'string.quoted.double.html', 'invalid.illegal.bad-ampersand.html']

describe "firstLineMatch", ->
it "recognises HTML5 doctypes", ->
Expand Down

0 comments on commit 9fde136

Please sign in to comment.