Skip to content

Commit

Permalink
Make consecutive hyphens in comments a non-error
Browse files Browse the repository at this point in the history
Also allow `<!-->` at (IE conditional) comment end

See whatwg/html#1356
See whatwg/html#1456
  • Loading branch information
sideshowbarker committed Aug 3, 2020
1 parent 067faf0 commit b25f655
Show file tree
Hide file tree
Showing 2 changed files with 190 additions and 19 deletions.
10 changes: 3 additions & 7 deletions src/nu/validator/htmlparser/impl/ErrorReportingTokenizer.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2009-2013 Mozilla Foundation
* Copyright (c) 2009-2017 Mozilla Foundation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
Expand Down Expand Up @@ -413,8 +413,8 @@ private boolean isAstralPrivateUse(int c) {
err("Nameless doctype.");
}

@Override protected void errConsecutiveHyphens() throws SAXException {
err("Consecutive hyphens did not terminate a comment. \u201C--\u201D is not permitted inside a comment, but e.g. \u201C- -\u201D is.");
@Override protected void errNestedComment() throws SAXException {
err("Saw \u201C<!--\u201D within a comment. Probable cause: Nested comment (not allowed).");
}

@Override protected void errPrematureEndOfComment() throws SAXException {
Expand Down Expand Up @@ -712,10 +712,6 @@ private boolean isAstralPrivateUse(int c) {
err("Missing space before doctype name.");
}

@Override protected void errHyphenHyphenBang() throws SAXException {
err("\u201C--!\u201D found in comment.");
}

@Override protected void errNcrControlChar() throws SAXException {
err("Character reference expands to a control character ("
+ toUPlusString((char) value) + ").");
Expand Down
199 changes: 187 additions & 12 deletions src/nu/validator/htmlparser/impl/Tokenizer.java
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,14 @@ public class Tokenizer implements Locator {

public static final int AMBIGUOUS_AMPERSAND = 75;

public static final int COMMENT_LESSTHAN = 76;

public static final int COMMENT_LESSTHAN_BANG = 77;

public static final int COMMENT_LESSTHAN_BANG_DASH = 78;

public static final int COMMENT_LESSTHAN_BANG_DASH_DASH = 79;

/**
* Magic value for UTF-16 operations.
*/
Expand Down Expand Up @@ -1034,9 +1042,8 @@ private void maybeAppendSpaceToBogusComment() throws SAXException {

// ]NOCPP]

@Inline private void adjustDoubleHyphenAndAppendToStrBufAndErr(char c)
@Inline private void adjustDoubleHyphenAndAppendToStrBufAndErr(char c, boolean reportedConsecutiveHyphens)
throws SAXException {
errConsecutiveHyphens();
// [NOCPP[
switch (commentPolicy) {
case ALTER_INFOSET:
Expand All @@ -1047,7 +1054,9 @@ private void maybeAppendSpaceToBogusComment() throws SAXException {
appendStrBuf('-');
// CPPONLY: MOZ_FALLTHROUGH;
case ALLOW:
warn("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment.");
if (!reportedConsecutiveHyphens) {
warn("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment.");
}
// ]NOCPP]
appendStrBuf(c);
// [NOCPP[
Expand Down Expand Up @@ -1509,6 +1518,7 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
@SuppressWarnings("unused") private int stateLoop(int state, char c,
int pos, @NoLength char[] buf, boolean reconsume, int returnState,
int endPos) throws SAXException {
boolean reportedConsecutiveHyphens = false;
/*
* Idioms used in this code:
*
Expand Down Expand Up @@ -2594,6 +2604,7 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
}
// CPPONLY: MOZ_FALLTHROUGH;
case COMMENT_START:
reportedConsecutiveHyphens = false;
commentstartloop: for (;;) {
if (++pos == endPos) {
break stateloop;
Expand Down Expand Up @@ -2626,6 +2637,10 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
*/
state = transition(state, Tokenizer.DATA, reconsume, pos);
continue stateloop;
case '<':
appendStrBuf(c);
state = transition(state, Tokenizer.COMMENT_LESSTHAN, reconsume, pos);
continue stateloop;
case '\r':
appendStrBufCarriageReturn();
state = transition(state, Tokenizer.COMMENT, reconsume, pos);
Expand Down Expand Up @@ -2671,6 +2686,10 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
state = transition(state, Tokenizer.COMMENT_END_DASH, reconsume, pos);
break commentloop;
// continue stateloop;
case '<':
appendStrBuf(c);
state = transition(state, Tokenizer.COMMENT_LESSTHAN, reconsume, pos);
continue stateloop;
case '\r':
appendStrBufCarriageReturn();
break stateloop;
Expand Down Expand Up @@ -2713,6 +2732,10 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
state = transition(state, Tokenizer.COMMENT_END, reconsume, pos);
break commentenddashloop;
// continue stateloop;
case '<':
appendStrBuf(c);
state = transition(state, Tokenizer.COMMENT_LESSTHAN, reconsume, pos);
continue stateloop;
case '\r':
appendStrBufCarriageReturn();
state = transition(state, Tokenizer.COMMENT, reconsume, pos);
Expand Down Expand Up @@ -2767,11 +2790,16 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
* Append a U+002D HYPHEN-MINUS (-) character to
* the comment token's data.
*/
adjustDoubleHyphenAndAppendToStrBufAndErr(c);
adjustDoubleHyphenAndAppendToStrBufAndErr(c, reportedConsecutiveHyphens);
reportedConsecutiveHyphens = true;
/*
* Stay in the comment end state.
*/
continue;
case '<':
appendStrBuf(c);
state = transition(state, Tokenizer.COMMENT_LESSTHAN, reconsume, pos);
continue stateloop;
case '\r':
adjustDoubleHyphenAndAppendToStrBufCarriageReturn();
state = transition(state, Tokenizer.COMMENT, reconsume, pos);
Expand All @@ -2781,7 +2809,6 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
state = transition(state, Tokenizer.COMMENT, reconsume, pos);
continue stateloop;
case '!':
errHyphenHyphenBang();
appendStrBuf(c);
state = transition(state, Tokenizer.COMMENT_END_BANG, reconsume, pos);
continue stateloop;
Expand All @@ -2794,7 +2821,8 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
* and the input character to the comment
* token's data.
*/
adjustDoubleHyphenAndAppendToStrBufAndErr(c);
adjustDoubleHyphenAndAppendToStrBufAndErr(c, reportedConsecutiveHyphens);
reportedConsecutiveHyphens = true;
/*
* Switch to the comment state.
*/
Expand Down Expand Up @@ -2864,6 +2892,148 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
continue stateloop;
}
}
case COMMENT_LESSTHAN:
for (;;) {
if (++pos == endPos) {
break stateloop;
}
c = checkChar(buf, pos);
switch (c) {
case '!':
appendStrBuf(c);
state = transition(state, Tokenizer.COMMENT_LESSTHAN_BANG, reconsume, pos);
continue stateloop;
case '<':
appendStrBuf(c);
state = transition(state, Tokenizer.COMMENT_LESSTHAN, reconsume, pos);
continue stateloop;
case '-':
appendStrBuf(c);
state = transition(state, Tokenizer.COMMENT_END_DASH, reconsume, pos);
continue stateloop;
case '\r':
appendStrBufCarriageReturn();
break stateloop;
case '\n':
appendStrBufLineFeed();
continue;
case '\u0000':
c = '\uFFFD';
// fall thru
default:
appendStrBuf(c);
state = transition(state, Tokenizer.COMMENT, reconsume, pos);
continue stateloop;
}
}
case COMMENT_LESSTHAN_BANG:
for (;;) {
if (++pos == endPos) {
break stateloop;
}
c = checkChar(buf, pos);
switch (c) {
case '-':
appendStrBuf(c);
state = transition(state, Tokenizer.COMMENT_LESSTHAN_BANG_DASH, reconsume, pos);
continue stateloop;
case '<':
appendStrBuf(c);
state = transition(state, Tokenizer.COMMENT_LESSTHAN, reconsume, pos);
continue stateloop;
case '\r':
appendStrBufCarriageReturn();
break stateloop;
case '\n':
appendStrBufLineFeed();
continue;
case '\u0000':
c = '\uFFFD';
// fall thru
default:
appendStrBuf(c);
state = transition(state, Tokenizer.COMMENT, reconsume, pos);
continue stateloop;
}
}
case COMMENT_LESSTHAN_BANG_DASH:
for (;;) {
if (++pos == endPos) {
break stateloop;
}
c = checkChar(buf, pos);
switch (c) {
case '-':
appendStrBuf(c);
state = transition(state, Tokenizer.COMMENT_LESSTHAN_BANG_DASH_DASH, reconsume, pos);
continue stateloop;
case '<':
appendStrBuf(c);
state = transition(state, Tokenizer.COMMENT_LESSTHAN, reconsume, pos);
continue stateloop;
case '\r':
appendStrBufCarriageReturn();
break stateloop;
case '\n':
appendStrBufLineFeed();
continue;
case '\u0000':
c = '\uFFFD';
// fall thru
default:
appendStrBuf(c);
state = transition(state, Tokenizer.COMMENT, reconsume, pos);
continue stateloop;
}
}
case COMMENT_LESSTHAN_BANG_DASH_DASH:
for (;;) {
if (++pos == endPos) {
break stateloop;
}
c = checkChar(buf, pos);
switch (c) {
case '>':
appendStrBuf(c);
emitComment(3, pos);
state = transition(state, Tokenizer.DATA, reconsume, pos);
continue stateloop;
case '-':
errNestedComment();
adjustDoubleHyphenAndAppendToStrBufAndErr(c, reportedConsecutiveHyphens);
reportedConsecutiveHyphens = true;
state = transition(state, Tokenizer.COMMENT_END, reconsume, pos);
continue stateloop;
case '\r':
errNestedComment();
adjustDoubleHyphenAndAppendToStrBufAndErr(c, reportedConsecutiveHyphens);
reportedConsecutiveHyphens = true;
state = transition(state, Tokenizer.COMMENT, reconsume, pos);
break stateloop;
case '\n':
errNestedComment();
adjustDoubleHyphenAndAppendToStrBufAndErr(c, reportedConsecutiveHyphens);
reportedConsecutiveHyphens = true;
state = transition(state, Tokenizer.COMMENT, reconsume, pos);
continue;
case '\u0000':
c = '\uFFFD';
// fall thru
case '!':
errNestedComment();
adjustDoubleHyphenAndAppendToStrBufAndErr(c, reportedConsecutiveHyphens);
reportedConsecutiveHyphens = true;
state = transition(state, Tokenizer.COMMENT_END_BANG, reconsume, pos);
continue stateloop;
default:
errNestedComment();
adjustDoubleHyphenAndAppendToStrBufAndErr(c, reportedConsecutiveHyphens);
reportedConsecutiveHyphens = true;
state = transition(state, Tokenizer.COMMENT_END, reconsume, pos);
continue stateloop;
}
}
// XXX reorder point
case COMMENT_START_DASH:
if (++pos == endPos) {
break stateloop;
Expand Down Expand Up @@ -2892,6 +3062,10 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
*/
state = transition(state, Tokenizer.DATA, reconsume, pos);
continue stateloop;
case '<':
appendStrBuf(c);
state = transition(state, Tokenizer.COMMENT_LESSTHAN, reconsume, pos);
continue stateloop;
case '\r':
appendStrBufCarriageReturn();
state = transition(state, Tokenizer.COMMENT, reconsume, pos);
Expand Down Expand Up @@ -6026,13 +6200,13 @@ private void initDoctypeFields() {
@Inline private void adjustDoubleHyphenAndAppendToStrBufCarriageReturn()
throws SAXException {
silentCarriageReturn();
adjustDoubleHyphenAndAppendToStrBufAndErr('\n');
adjustDoubleHyphenAndAppendToStrBufAndErr('\n', false);
}

@Inline private void adjustDoubleHyphenAndAppendToStrBufLineFeed()
throws SAXException {
silentLineFeed();
adjustDoubleHyphenAndAppendToStrBufAndErr('\n');
adjustDoubleHyphenAndAppendToStrBufAndErr('\n', false);
}

@Inline private void appendStrBufLineFeed() {
Expand Down Expand Up @@ -6337,6 +6511,8 @@ public void eof() throws SAXException {
break eofloop;
case COMMENT_START:
case COMMENT:
case COMMENT_LESSTHAN:
case COMMENT_LESSTHAN_BANG:
/*
* EOF Parse error.
*/
Expand All @@ -6348,6 +6524,7 @@ public void eof() throws SAXException {
*/
break eofloop;
case COMMENT_END:
case COMMENT_LESSTHAN_BANG_DASH_DASH:
errEofInComment();
/* Emit the comment token. */
emitComment(2, 0);
Expand All @@ -6357,6 +6534,7 @@ public void eof() throws SAXException {
break eofloop;
case COMMENT_END_DASH:
case COMMENT_START_DASH:
case COMMENT_LESSTHAN_BANG_DASH:
errEofInComment();
/* Emit the comment token. */
emitComment(1, 0);
Expand Down Expand Up @@ -6981,7 +7159,7 @@ protected void errGtInPublicId() throws SAXException {
protected void errNamelessDoctype() throws SAXException {
}

protected void errConsecutiveHyphens() throws SAXException {
protected void errNestedComment() throws SAXException {
}

protected void errPrematureEndOfComment() throws SAXException {
Expand Down Expand Up @@ -7131,9 +7309,6 @@ protected void errExpectedSystemId() throws SAXException {
protected void errMissingSpaceBeforeDoctypeName() throws SAXException {
}

protected void errHyphenHyphenBang() throws SAXException {
}

protected void errNcrControlChar() throws SAXException {
}

Expand Down

0 comments on commit b25f655

Please sign in to comment.