Skip to content

Commit

Permalink
Bug 1319410 - Make consecutive hyphens in comments a non-error
Browse files Browse the repository at this point in the history
Also allow `<!-->` at (IE conditional) comment end

See whatwg/html#1356
See whatwg/html#1456

Differential Revision: https://phabricator.services.mozilla.com/D82000
  • Loading branch information
sideshowbarker authored and hsivonen committed Jul 3, 2020
1 parent ffe4d30 commit 1848152
Show file tree
Hide file tree
Showing 6 changed files with 462 additions and 28 deletions.
2 changes: 1 addition & 1 deletion dom/locales/en-US/chrome/layout/htmlparser.properties
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ errDuplicateAttribute=Duplicate attribute.
errEofInSystemId=End of file inside system identifier.
errExpectedSystemId=Expected a system identifier but the doctype ended.
errMissingSpaceBeforeDoctypeName=Missing space before doctype name.
errHyphenHyphenBang=“--!” found in comment.
errNestedComment=Saw “<!--” within a comment. Probable cause: Nested comment (not allowed).
errNcrZero=Character reference expands to zero.
errNoSpaceBetweenDoctypeSystemKeywordAndQuote=No space between the doctype “SYSTEM” keyword and the quote.
errNoSpaceBetweenPublicAndSystemIds=No space between the doctype public and system identifiers.
Expand Down
199 changes: 187 additions & 12 deletions parser/html/javasrc/Tokenizer.java
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,14 @@ public class Tokenizer implements Locator {

public static final int AMBIGUOUS_AMPERSAND = 75;

public static final int COMMENT_LESSTHAN = 76;

public static final int COMMENT_LESSTHAN_BANG = 77;

public static final int COMMENT_LESSTHAN_BANG_DASH = 78;

public static final int COMMENT_LESSTHAN_BANG_DASH_DASH = 79;

/**
* Magic value for UTF-16 operations.
*/
Expand Down Expand Up @@ -1034,9 +1042,8 @@ private void maybeAppendSpaceToBogusComment() throws SAXException {

// ]NOCPP]

@Inline private void adjustDoubleHyphenAndAppendToStrBufAndErr(char c)
@Inline private void adjustDoubleHyphenAndAppendToStrBufAndErr(char c, boolean reportedConsecutiveHyphens)
throws SAXException {
errConsecutiveHyphens();
// [NOCPP[
switch (commentPolicy) {
case ALTER_INFOSET:
Expand All @@ -1047,7 +1054,9 @@ private void maybeAppendSpaceToBogusComment() throws SAXException {
appendStrBuf('-');
// CPPONLY: MOZ_FALLTHROUGH;
case ALLOW:
warn("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment.");
if (!reportedConsecutiveHyphens) {
warn("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment.");
}
// ]NOCPP]
appendStrBuf(c);
// [NOCPP[
Expand Down Expand Up @@ -1509,6 +1518,7 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
@SuppressWarnings("unused") private int stateLoop(int state, char c,
int pos, @NoLength char[] buf, boolean reconsume, int returnState,
int endPos) throws SAXException {
boolean reportedConsecutiveHyphens = false;
/*
* Idioms used in this code:
*
Expand Down Expand Up @@ -2594,6 +2604,7 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
}
// CPPONLY: MOZ_FALLTHROUGH;
case COMMENT_START:
reportedConsecutiveHyphens = false;
commentstartloop: for (;;) {
if (++pos == endPos) {
break stateloop;
Expand Down Expand Up @@ -2626,6 +2637,10 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
*/
state = transition(state, Tokenizer.DATA, reconsume, pos);
continue stateloop;
case '<':
appendStrBuf(c);
state = transition(state, Tokenizer.COMMENT_LESSTHAN, reconsume, pos);
continue stateloop;
case '\r':
appendStrBufCarriageReturn();
state = transition(state, Tokenizer.COMMENT, reconsume, pos);
Expand Down Expand Up @@ -2671,6 +2686,10 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
state = transition(state, Tokenizer.COMMENT_END_DASH, reconsume, pos);
break commentloop;
// continue stateloop;
case '<':
appendStrBuf(c);
state = transition(state, Tokenizer.COMMENT_LESSTHAN, reconsume, pos);
continue stateloop;
case '\r':
appendStrBufCarriageReturn();
break stateloop;
Expand Down Expand Up @@ -2713,6 +2732,10 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
state = transition(state, Tokenizer.COMMENT_END, reconsume, pos);
break commentenddashloop;
// continue stateloop;
case '<':
appendStrBuf(c);
state = transition(state, Tokenizer.COMMENT_LESSTHAN, reconsume, pos);
continue stateloop;
case '\r':
appendStrBufCarriageReturn();
state = transition(state, Tokenizer.COMMENT, reconsume, pos);
Expand Down Expand Up @@ -2767,11 +2790,16 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
* Append a U+002D HYPHEN-MINUS (-) character to
* the comment token's data.
*/
adjustDoubleHyphenAndAppendToStrBufAndErr(c);
adjustDoubleHyphenAndAppendToStrBufAndErr(c, reportedConsecutiveHyphens);
reportedConsecutiveHyphens = true;
/*
* Stay in the comment end state.
*/
continue;
case '<':
appendStrBuf(c);
state = transition(state, Tokenizer.COMMENT_LESSTHAN, reconsume, pos);
continue stateloop;
case '\r':
adjustDoubleHyphenAndAppendToStrBufCarriageReturn();
state = transition(state, Tokenizer.COMMENT, reconsume, pos);
Expand All @@ -2781,7 +2809,6 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
state = transition(state, Tokenizer.COMMENT, reconsume, pos);
continue stateloop;
case '!':
errHyphenHyphenBang();
appendStrBuf(c);
state = transition(state, Tokenizer.COMMENT_END_BANG, reconsume, pos);
continue stateloop;
Expand All @@ -2794,7 +2821,8 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
* and the input character to the comment
* token's data.
*/
adjustDoubleHyphenAndAppendToStrBufAndErr(c);
adjustDoubleHyphenAndAppendToStrBufAndErr(c, reportedConsecutiveHyphens);
reportedConsecutiveHyphens = true;
/*
* Switch to the comment state.
*/
Expand Down Expand Up @@ -2864,6 +2892,148 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
continue stateloop;
}
}
case COMMENT_LESSTHAN:
for (;;) {
if (++pos == endPos) {
break stateloop;
}
c = checkChar(buf, pos);
switch (c) {
case '!':
appendStrBuf(c);
state = transition(state, Tokenizer.COMMENT_LESSTHAN_BANG, reconsume, pos);
continue stateloop;
case '<':
appendStrBuf(c);
state = transition(state, Tokenizer.COMMENT_LESSTHAN, reconsume, pos);
continue stateloop;
case '-':
appendStrBuf(c);
state = transition(state, Tokenizer.COMMENT_END_DASH, reconsume, pos);
continue stateloop;
case '\r':
appendStrBufCarriageReturn();
break stateloop;
case '\n':
appendStrBufLineFeed();
continue;
case '\u0000':
c = '\uFFFD';
// fall thru
default:
appendStrBuf(c);
state = transition(state, Tokenizer.COMMENT, reconsume, pos);
continue stateloop;
}
}
case COMMENT_LESSTHAN_BANG:
for (;;) {
if (++pos == endPos) {
break stateloop;
}
c = checkChar(buf, pos);
switch (c) {
case '-':
appendStrBuf(c);
state = transition(state, Tokenizer.COMMENT_LESSTHAN_BANG_DASH, reconsume, pos);
continue stateloop;
case '<':
appendStrBuf(c);
state = transition(state, Tokenizer.COMMENT_LESSTHAN, reconsume, pos);
continue stateloop;
case '\r':
appendStrBufCarriageReturn();
break stateloop;
case '\n':
appendStrBufLineFeed();
continue;
case '\u0000':
c = '\uFFFD';
// fall thru
default:
appendStrBuf(c);
state = transition(state, Tokenizer.COMMENT, reconsume, pos);
continue stateloop;
}
}
case COMMENT_LESSTHAN_BANG_DASH:
for (;;) {
if (++pos == endPos) {
break stateloop;
}
c = checkChar(buf, pos);
switch (c) {
case '-':
appendStrBuf(c);
state = transition(state, Tokenizer.COMMENT_LESSTHAN_BANG_DASH_DASH, reconsume, pos);
continue stateloop;
case '<':
appendStrBuf(c);
state = transition(state, Tokenizer.COMMENT_LESSTHAN, reconsume, pos);
continue stateloop;
case '\r':
appendStrBufCarriageReturn();
break stateloop;
case '\n':
appendStrBufLineFeed();
continue;
case '\u0000':
c = '\uFFFD';
// fall thru
default:
appendStrBuf(c);
state = transition(state, Tokenizer.COMMENT, reconsume, pos);
continue stateloop;
}
}
case COMMENT_LESSTHAN_BANG_DASH_DASH:
for (;;) {
if (++pos == endPos) {
break stateloop;
}
c = checkChar(buf, pos);
switch (c) {
case '>':
appendStrBuf(c);
emitComment(3, pos);
state = transition(state, Tokenizer.DATA, reconsume, pos);
continue stateloop;
case '-':
errNestedComment();
adjustDoubleHyphenAndAppendToStrBufAndErr(c, reportedConsecutiveHyphens);
reportedConsecutiveHyphens = true;
state = transition(state, Tokenizer.COMMENT_END, reconsume, pos);
continue stateloop;
case '\r':
errNestedComment();
adjustDoubleHyphenAndAppendToStrBufAndErr(c, reportedConsecutiveHyphens);
reportedConsecutiveHyphens = true;
state = transition(state, Tokenizer.COMMENT, reconsume, pos);
break stateloop;
case '\n':
errNestedComment();
adjustDoubleHyphenAndAppendToStrBufAndErr(c, reportedConsecutiveHyphens);
reportedConsecutiveHyphens = true;
state = transition(state, Tokenizer.COMMENT, reconsume, pos);
continue;
case '\u0000':
c = '\uFFFD';
// fall thru
case '!':
errNestedComment();
adjustDoubleHyphenAndAppendToStrBufAndErr(c, reportedConsecutiveHyphens);
reportedConsecutiveHyphens = true;
state = transition(state, Tokenizer.COMMENT_END_BANG, reconsume, pos);
continue stateloop;
default:
errNestedComment();
adjustDoubleHyphenAndAppendToStrBufAndErr(c, reportedConsecutiveHyphens);
reportedConsecutiveHyphens = true;
state = transition(state, Tokenizer.COMMENT_END, reconsume, pos);
continue stateloop;
}
}
// XXX reorder point
case COMMENT_START_DASH:
if (++pos == endPos) {
break stateloop;
Expand Down Expand Up @@ -2892,6 +3062,10 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
*/
state = transition(state, Tokenizer.DATA, reconsume, pos);
continue stateloop;
case '<':
appendStrBuf(c);
state = transition(state, Tokenizer.COMMENT_LESSTHAN, reconsume, pos);
continue stateloop;
case '\r':
appendStrBufCarriageReturn();
state = transition(state, Tokenizer.COMMENT, reconsume, pos);
Expand Down Expand Up @@ -6025,13 +6199,13 @@ private void initDoctypeFields() {
@Inline private void adjustDoubleHyphenAndAppendToStrBufCarriageReturn()
throws SAXException {
silentCarriageReturn();
adjustDoubleHyphenAndAppendToStrBufAndErr('\n');
adjustDoubleHyphenAndAppendToStrBufAndErr('\n', false);
}

@Inline private void adjustDoubleHyphenAndAppendToStrBufLineFeed()
throws SAXException {
silentLineFeed();
adjustDoubleHyphenAndAppendToStrBufAndErr('\n');
adjustDoubleHyphenAndAppendToStrBufAndErr('\n', false);
}

@Inline private void appendStrBufLineFeed() {
Expand Down Expand Up @@ -6336,6 +6510,8 @@ public void eof() throws SAXException {
break eofloop;
case COMMENT_START:
case COMMENT:
case COMMENT_LESSTHAN:
case COMMENT_LESSTHAN_BANG:
/*
* EOF Parse error.
*/
Expand All @@ -6347,6 +6523,7 @@ public void eof() throws SAXException {
*/
break eofloop;
case COMMENT_END:
case COMMENT_LESSTHAN_BANG_DASH_DASH:
errEofInComment();
/* Emit the comment token. */
emitComment(2, 0);
Expand All @@ -6356,6 +6533,7 @@ public void eof() throws SAXException {
break eofloop;
case COMMENT_END_DASH:
case COMMENT_START_DASH:
case COMMENT_LESSTHAN_BANG_DASH:
errEofInComment();
/* Emit the comment token. */
emitComment(1, 0);
Expand Down Expand Up @@ -6980,7 +7158,7 @@ protected void errGtInPublicId() throws SAXException {
protected void errNamelessDoctype() throws SAXException {
}

protected void errConsecutiveHyphens() throws SAXException {
protected void errNestedComment() throws SAXException {
}

protected void errPrematureEndOfComment() throws SAXException {
Expand Down Expand Up @@ -7130,9 +7308,6 @@ protected void errExpectedSystemId() throws SAXException {
protected void errMissingSpaceBeforeDoctypeName() throws SAXException {
}

protected void errHyphenHyphenBang() throws SAXException {
}

protected void errNcrControlChar() throws SAXException {
}

Expand Down
Loading

0 comments on commit 1848152

Please sign in to comment.