Skip to content

Commit 9e1805d

Browse files
committed
Fix character references ending in raw nodes
Closes GH-15.
1 parent 5175c03 commit 9e1805d

File tree

2 files changed

+36
-1
lines changed

2 files changed

+36
-1
lines changed

lib/index.js

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,17 +28,21 @@
2828
* Way too simple, but works for us.
2929
* @property {HiddenPreprocessor} preprocessor
3030
* @property {(value: string) => void} write
31+
* @property {() => number} _consume
3132
* @property {Array.<HiddenToken>} tokenQueue
3233
* @property {string} state
3334
* @property {string} returnState
3435
* @property {number} charRefCode
3536
* @property {Array.<number>} tempBuff
37+
* @property {Function} _flushCodePointsConsumedAsCharacterReference
3638
* @property {string} lastStartTagName
3739
* @property {number} consumedAfterSnapshot
3840
* @property {boolean} active
3941
* @property {HiddenToken|undefined} currentCharacterToken
4042
* @property {HiddenToken|undefined} currentToken
4143
* @property {unknown} currentAttr
44+
* @property {Function} NAMED_CHARACTER_REFERENCE_STATE
45+
* @property {Function} NUMERIC_CHARACTER_REFERENCE_END_STATE
4246
*
4347
* @typedef {Object.<string, unknown> & {location: P5Location}} HiddenToken
4448
*
@@ -359,7 +363,20 @@ export const raw =
359363
// See the code for `parse` and `parseFragment`:
360364
// See: <https://github.com/inikulin/parse5/blob/9c683e1/packages/parse5/lib/parser/index.js#L371>.
361365
tokenizer.write(node.value)
362-
parser._runParsingLoop(undefined)
366+
parser._runParsingLoop(null)
367+
368+
// Character references hang, so if we ended there, we need to flush
369+
// those too.
370+
// We reset the preprocessor as if the document ends here.
371+
// Then one single call to the relevant state does the trick, parse5
372+
// consumes the whole token.
373+
if (
374+
tokenizer.state === 'NAMED_CHARACTER_REFERENCE_STATE' ||
375+
tokenizer.state === 'NUMERIC_CHARACTER_REFERENCE_END_STATE'
376+
) {
377+
preprocessor.lastChunkWritten = true
378+
tokenizer[tokenizer.state](tokenizer._consume())
379+
}
363380

364381
// Process final characters if they’re still there after hibernating.
365382
// Similar to:

test.js

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,24 @@ test('raw', (t) => {
153153
'should pass raw nodes through even after textarea (#3)'
154154
)
155155

156+
t.deepEqual(
157+
raw(u('root', [u('raw', '&#123;and&#125;')])),
158+
u('root', {data: {quirksMode: false}}, [u('text', '{and}')]),
159+
'should pass character references through (decimal)'
160+
)
161+
162+
t.deepEqual(
163+
raw(u('root', [u('raw', '&lt;and&gt;')])),
164+
u('root', {data: {quirksMode: false}}, [u('text', '<and>')]),
165+
'should pass character references through (named)'
166+
)
167+
168+
t.deepEqual(
169+
raw(u('root', [u('raw', '&#x7b;and&#x7d;')])),
170+
u('root', {data: {quirksMode: false}}, [u('text', '{and}')]),
171+
'should pass character references through (hexadecimal)'
172+
)
173+
156174
t.deepEqual(
157175
raw(u('root', [u('raw', '<template>a<b></b>c</template>')])),
158176
u('root', {data: {quirksMode: false}}, [

0 commit comments

Comments
 (0)