Skip to content

Commit 44427c0

Browse files
hsivonensideshowbarker
authored andcommitted
Mozilla bug 1153920 - Conform ambiguous-ampersand reporting to HTML spec.
Created by inlining the `AMBIGUOUS_AMPERSAND` state in #30 back into the states that transitioned to `AMBIGUOUS_AMPERSAND` in that PR by @sideshowbarker.
1 parent e032e17 commit 44427c0

File tree

1 file changed

+14
-21
lines changed

1 file changed

+14
-21
lines changed

src/nu/validator/htmlparser/impl/Tokenizer.java

Lines changed: 14 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -3231,6 +3231,7 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
32313231
case '<':
32323232
case '&':
32333233
case '\u0000':
3234+
case ';':
32343235
emitOrAppendCharRefBuf(returnState);
32353236
if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
32363237
cstart = pos;
@@ -3259,11 +3260,9 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
32593260
firstCharKey = c - 'A';
32603261
} else {
32613262
// No match
3262-
/*
3263-
* If no match can be made, then this is a parse
3264-
* error.
3265-
*/
3266-
errNoNamedCharacterMatch();
3263+
if (c == ';') {
3264+
errNoNamedCharacterMatch();
3265+
}
32673266
emitOrAppendCharRefBuf(returnState);
32683267
if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
32693268
cstart = pos;
@@ -3330,11 +3329,9 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
33303329
}
33313330
}
33323331
if (hilo == 0) {
3333-
/*
3334-
* If no match can be made, then this is a parse
3335-
* error.
3336-
*/
3337-
errNoNamedCharacterMatch();
3332+
if (c == ';') {
3333+
errNoNamedCharacterMatch();
3334+
}
33383335
emitOrAppendCharRefBuf(returnState);
33393336
if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
33403337
cstart = pos;
@@ -3423,10 +3420,9 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
34233420

34243421
if (candidate == -1) {
34253422
// reconsume deals with CR, LF or nul
3426-
/*
3427-
* If no match can be made, then this is a parse error.
3428-
*/
3429-
errNoNamedCharacterMatch();
3423+
if (c == ';') {
3424+
errNoNamedCharacterMatch();
3425+
}
34303426
emitOrAppendCharRefBuf(returnState);
34313427
if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
34323428
cstart = pos;
@@ -3470,7 +3466,9 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
34703466
* after the U+0026 AMPERSAND (&) must be
34713467
* unconsumed, and nothing is returned.
34723468
*/
3473-
errNoNamedCharacterMatch();
3469+
if (c == ';') {
3470+
errNoNamedCharacterMatch();
3471+
}
34743472
appendCharRefBufToStrBuf();
34753473
reconsume = true;
34763474
state = transition(state, returnState, reconsume, pos);
@@ -3536,6 +3534,7 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
35363534
* I'm ∉ I tell you.
35373535
*/
35383536
}
3537+
// XXX reorder point
35393538
case CONSUME_NCR:
35403539
if (++pos == endPos) {
35413540
break stateloop;
@@ -6637,7 +6636,6 @@ public void eof() throws SAXException {
66376636
state = returnState;
66386637
continue;
66396638
case CHARACTER_REFERENCE_HILO_LOOKUP:
6640-
errNoNamedCharacterMatch();
66416639
emitOrAppendCharRefBuf(returnState);
66426640
state = returnState;
66436641
continue;
@@ -6691,10 +6689,6 @@ public void eof() throws SAXException {
66916689
}
66926690

66936691
if (candidate == -1) {
6694-
/*
6695-
* If no match can be made, then this is a parse error.
6696-
*/
6697-
errNoNamedCharacterMatch();
66986692
emitOrAppendCharRefBuf(returnState);
66996693
state = returnState;
67006694
continue eofloop;
@@ -6732,7 +6726,6 @@ public void eof() throws SAXException {
67326726
* after the U+0026 AMPERSAND (&) must be
67336727
* unconsumed, and nothing is returned.
67346728
*/
6735-
errNoNamedCharacterMatch();
67366729
appendCharRefBufToStrBuf();
67376730
state = returnState;
67386731
continue eofloop;

0 commit comments

Comments
 (0)