• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1import { Preprocessor } from './preprocessor.js';
2import {
3    CODE_POINTS as $,
4    SEQUENCES as $$,
5    REPLACEMENT_CHARACTER,
6    isSurrogate,
7    isUndefinedCodePoint,
8    isControlCodePoint,
9} from '../common/unicode.js';
10import {
11    TokenType,
12    getTokenAttr,
13    type Token,
14    type CharacterToken,
15    type DoctypeToken,
16    type TagToken,
17    type EOFToken,
18    type CommentToken,
19    type Attribute,
20    type Location,
21} from '../common/token.js';
22import { htmlDecodeTree, BinTrieFlags, determineBranch } from 'entities/lib/decode.js';
23import { ERR, type ParserErrorHandler } from '../common/error-codes.js';
24import { TAG_ID, getTagID } from '../common/html.js';
25
26//C1 Unicode control character reference replacements
27const C1_CONTROLS_REFERENCE_REPLACEMENTS = new Map([
28    [0x80, 0x20_ac],
29    [0x82, 0x20_1a],
30    [0x83, 0x01_92],
31    [0x84, 0x20_1e],
32    [0x85, 0x20_26],
33    [0x86, 0x20_20],
34    [0x87, 0x20_21],
35    [0x88, 0x02_c6],
36    [0x89, 0x20_30],
37    [0x8a, 0x01_60],
38    [0x8b, 0x20_39],
39    [0x8c, 0x01_52],
40    [0x8e, 0x01_7d],
41    [0x91, 0x20_18],
42    [0x92, 0x20_19],
43    [0x93, 0x20_1c],
44    [0x94, 0x20_1d],
45    [0x95, 0x20_22],
46    [0x96, 0x20_13],
47    [0x97, 0x20_14],
48    [0x98, 0x02_dc],
49    [0x99, 0x21_22],
50    [0x9a, 0x01_61],
51    [0x9b, 0x20_3a],
52    [0x9c, 0x01_53],
53    [0x9e, 0x01_7e],
54    [0x9f, 0x01_78],
55]);
56
57//States
58const enum State {
59    DATA,
60    RCDATA,
61    RAWTEXT,
62    SCRIPT_DATA,
63    PLAINTEXT,
64    TAG_OPEN,
65    END_TAG_OPEN,
66    TAG_NAME,
67    RCDATA_LESS_THAN_SIGN,
68    RCDATA_END_TAG_OPEN,
69    RCDATA_END_TAG_NAME,
70    RAWTEXT_LESS_THAN_SIGN,
71    RAWTEXT_END_TAG_OPEN,
72    RAWTEXT_END_TAG_NAME,
73    SCRIPT_DATA_LESS_THAN_SIGN,
74    SCRIPT_DATA_END_TAG_OPEN,
75    SCRIPT_DATA_END_TAG_NAME,
76    SCRIPT_DATA_ESCAPE_START,
77    SCRIPT_DATA_ESCAPE_START_DASH,
78    SCRIPT_DATA_ESCAPED,
79    SCRIPT_DATA_ESCAPED_DASH,
80    SCRIPT_DATA_ESCAPED_DASH_DASH,
81    SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN,
82    SCRIPT_DATA_ESCAPED_END_TAG_OPEN,
83    SCRIPT_DATA_ESCAPED_END_TAG_NAME,
84    SCRIPT_DATA_DOUBLE_ESCAPE_START,
85    SCRIPT_DATA_DOUBLE_ESCAPED,
86    SCRIPT_DATA_DOUBLE_ESCAPED_DASH,
87    SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH,
88    SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN,
89    SCRIPT_DATA_DOUBLE_ESCAPE_END,
90    BEFORE_ATTRIBUTE_NAME,
91    ATTRIBUTE_NAME,
92    AFTER_ATTRIBUTE_NAME,
93    BEFORE_ATTRIBUTE_VALUE,
94    ATTRIBUTE_VALUE_DOUBLE_QUOTED,
95    ATTRIBUTE_VALUE_SINGLE_QUOTED,
96    ATTRIBUTE_VALUE_UNQUOTED,
97    AFTER_ATTRIBUTE_VALUE_QUOTED,
98    SELF_CLOSING_START_TAG,
99    BOGUS_COMMENT,
100    MARKUP_DECLARATION_OPEN,
101    COMMENT_START,
102    COMMENT_START_DASH,
103    COMMENT,
104    COMMENT_LESS_THAN_SIGN,
105    COMMENT_LESS_THAN_SIGN_BANG,
106    COMMENT_LESS_THAN_SIGN_BANG_DASH,
107    COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH,
108    COMMENT_END_DASH,
109    COMMENT_END,
110    COMMENT_END_BANG,
111    DOCTYPE,
112    BEFORE_DOCTYPE_NAME,
113    DOCTYPE_NAME,
114    AFTER_DOCTYPE_NAME,
115    AFTER_DOCTYPE_PUBLIC_KEYWORD,
116    BEFORE_DOCTYPE_PUBLIC_IDENTIFIER,
117    DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED,
118    DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED,
119    AFTER_DOCTYPE_PUBLIC_IDENTIFIER,
120    BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS,
121    AFTER_DOCTYPE_SYSTEM_KEYWORD,
122    BEFORE_DOCTYPE_SYSTEM_IDENTIFIER,
123    DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED,
124    DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED,
125    AFTER_DOCTYPE_SYSTEM_IDENTIFIER,
126    BOGUS_DOCTYPE,
127    CDATA_SECTION,
128    CDATA_SECTION_BRACKET,
129    CDATA_SECTION_END,
130    CHARACTER_REFERENCE,
131    NAMED_CHARACTER_REFERENCE,
132    AMBIGUOUS_AMPERSAND,
133    NUMERIC_CHARACTER_REFERENCE,
134    HEXADEMICAL_CHARACTER_REFERENCE_START,
135    DECIMAL_CHARACTER_REFERENCE_START,
136    HEXADEMICAL_CHARACTER_REFERENCE,
137    DECIMAL_CHARACTER_REFERENCE,
138    NUMERIC_CHARACTER_REFERENCE_END,
139}
140
141//Tokenizer initial states for different modes
142export const TokenizerMode = {
143    DATA: State.DATA,
144    RCDATA: State.RCDATA,
145    RAWTEXT: State.RAWTEXT,
146    SCRIPT_DATA: State.SCRIPT_DATA,
147    PLAINTEXT: State.PLAINTEXT,
148    CDATA_SECTION: State.CDATA_SECTION,
149} as const;
150
151//Utils
152
153//OPTIMIZATION: these utility functions should not be moved out of this module. V8 Crankshaft will not inline
154//this functions if they will be situated in another module due to context switch.
155//Always perform inlining check before modifying this functions ('node --trace-inlining').
156
157function isAsciiDigit(cp: number): boolean {
158    return cp >= $.DIGIT_0 && cp <= $.DIGIT_9;
159}
160
161function isAsciiUpper(cp: number): boolean {
162    return cp >= $.LATIN_CAPITAL_A && cp <= $.LATIN_CAPITAL_Z;
163}
164
165function isAsciiLower(cp: number): boolean {
166    return cp >= $.LATIN_SMALL_A && cp <= $.LATIN_SMALL_Z;
167}
168
169function isAsciiLetter(cp: number): boolean {
170    return isAsciiLower(cp) || isAsciiUpper(cp);
171}
172
173function isAsciiAlphaNumeric(cp: number): boolean {
174    return isAsciiLetter(cp) || isAsciiDigit(cp);
175}
176
177function isAsciiUpperHexDigit(cp: number): boolean {
178    return cp >= $.LATIN_CAPITAL_A && cp <= $.LATIN_CAPITAL_F;
179}
180
181function isAsciiLowerHexDigit(cp: number): boolean {
182    return cp >= $.LATIN_SMALL_A && cp <= $.LATIN_SMALL_F;
183}
184
185function isAsciiHexDigit(cp: number): boolean {
186    return isAsciiDigit(cp) || isAsciiUpperHexDigit(cp) || isAsciiLowerHexDigit(cp);
187}
188
189function toAsciiLower(cp: number): number {
190    return cp + 0x00_20;
191}
192
193function isWhitespace(cp: number): boolean {
194    return cp === $.SPACE || cp === $.LINE_FEED || cp === $.TABULATION || cp === $.FORM_FEED;
195}
196
197function isEntityInAttributeInvalidEnd(nextCp: number): boolean {
198    return nextCp === $.EQUALS_SIGN || isAsciiAlphaNumeric(nextCp);
199}
200
201function isScriptDataDoubleEscapeSequenceEnd(cp: number): boolean {
202    return isWhitespace(cp) || cp === $.SOLIDUS || cp === $.GREATER_THAN_SIGN;
203}
204
205const componentValidator = { isSupportedSelfClosing: () => false };
206
207interface Validator {
208    isSupportedSelfClosing(tagName: string): boolean;
209}
210
211interface CompileResult {
212    jsonTemplate: {},
213    deps: [],
214    log: {
215        line: number,
216        column: number,
217        reason: string
218    }[]
219}
220
221interface NodeInfo {
222    tn: string,
223    sc: boolean,
224    pos: string
225}
226
227export interface TokenizerOptions {
228    componentValidator?: Validator;
229    compileResult?: CompileResult;
230    sourceCodeLocationInfo?: boolean;
231}
232
233export interface TokenHandler {
234    onComment(token: CommentToken): void;
235    onDoctype(token: DoctypeToken): void;
236    onStartTag(token: TagToken): void;
237    onEndTag(token: TagToken): void;
238    onEof(token: EOFToken): void;
239    onCharacter(token: CharacterToken): void;
240    onNullCharacter(token: CharacterToken): void;
241    onWhitespaceCharacter(token: CharacterToken): void;
242
243    onParseError?: ParserErrorHandler | null;
244}
245
246//Tokenizer
247export class Tokenizer {
248    public preprocessor: Preprocessor;
249
250    private paused = false;
251    /** Ensures that the parsing loop isn't run multiple times at once. */
252    private inLoop = false;
253
254    /**
255     * Indicates that the current adjusted node exists, is not an element in the HTML namespace,
256     * and that it is not an integration point for either MathML or HTML.
257     *
258     * @see {@link https://html.spec.whatwg.org/multipage/parsing.html#tree-construction}
259     */
260    public inForeignNode = false;
261    public lastStartTagName = '';
262    public active = false;
263
264    public nodeInfo: NodeInfo = { tn: '', sc: false, pos: '' };
265    public validator: Validator = componentValidator;
266    public compileResult: CompileResult = { jsonTemplate: {}, deps: [], log: [] };
267
268    public state = State.DATA;
269    private returnState = State.DATA;
270
271    private charRefCode = -1;
272
273    private consumedAfterSnapshot = -1;
274
275    private currentLocation: Location | null;
276    private currentCharacterToken: CharacterToken | null = null;
277    private currentToken: Token | null = null;
278    private currentAttr: Attribute = { name: '', value: '' };
279
280    constructor(private options: TokenizerOptions, private handler: TokenHandler) {
281        this.preprocessor = new Preprocessor(handler);
282        this.currentLocation = this.getCurrentLocation(-1);
283
284        if(options.componentValidator){
285            this.validator = options.componentValidator;
286        }
287        if(options.compileResult){
288            this.compileResult = options.compileResult;
289        }
290    }
291
292    //Errors
293    private _err(code: ERR): void {
294        this.handler.onParseError?.(this.preprocessor.getError(code));
295    }
296
297    // NOTE: `offset` may never run across line boundaries.
298    private getCurrentLocation(offset: number): Location | null {
299        if (!this.options.sourceCodeLocationInfo) {
300            return null;
301        }
302
303        return {
304            startLine: this.preprocessor.line,
305            startCol: this.preprocessor.col - offset,
306            startOffset: this.preprocessor.offset - offset,
307            endLine: -1,
308            endCol: -1,
309            endOffset: -1,
310        };
311    }
312
313    private _runParsingLoop(): void {
314        if (this.inLoop) return;
315
316        this.inLoop = true;
317
318        while (this.active && !this.paused) {
319            this.consumedAfterSnapshot = 0;
320
321            const cp = this._consume();
322
323            if (!this._ensureHibernation()) {
324                this._callState(cp);
325            }
326        }
327
328        this.inLoop = false;
329    }
330
331    //API
332    public pause(): void {
333        this.paused = true;
334    }
335
336    public resume(writeCallback?: () => void): void {
337        if (!this.paused) {
338            throw new Error('Parser was already resumed');
339        }
340
341        this.paused = false;
342
343        // Necessary for synchronous resume.
344        if (this.inLoop) return;
345
346        this._runParsingLoop();
347
348        if (!this.paused) {
349            writeCallback?.();
350        }
351    }
352
353    public write(chunk: string, isLastChunk: boolean, writeCallback?: () => void): void {
354        this.active = true;
355        this.preprocessor.write(chunk, isLastChunk);
356        this._runParsingLoop();
357
358        if (!this.paused) {
359            writeCallback?.();
360        }
361    }
362
363    public insertHtmlAtCurrentPos(chunk: string): void {
364        this.active = true;
365        this.preprocessor.insertHtmlAtCurrentPos(chunk);
366        this._runParsingLoop();
367    }
368
369    //Hibernation
370    private _ensureHibernation(): boolean {
371        if (this.preprocessor.endOfChunkHit) {
372            this._unconsume(this.consumedAfterSnapshot);
373            this.active = false;
374
375            return true;
376        }
377
378        return false;
379    }
380
381    //Consumption
382    private _consume(): number {
383        this.consumedAfterSnapshot++;
384        return this.preprocessor.advance();
385    }
386
387    private _unconsume(count: number): void {
388        this.consumedAfterSnapshot -= count;
389        this.preprocessor.retreat(count);
390    }
391
392    private _reconsumeInState(state: State): void {
393        this.state = state;
394        this._unconsume(1);
395    }
396
397    private _advanceBy(count: number): void {
398        this.consumedAfterSnapshot += count;
399        for (let i = 0; i < count; i++) {
400            this.preprocessor.advance();
401        }
402    }
403
404    private _consumeSequenceIfMatch(pattern: string, caseSensitive: boolean): boolean {
405        if (this.preprocessor.startsWith(pattern, caseSensitive)) {
406            // We will already have consumed one character before calling this method.
407            this._advanceBy(pattern.length - 1);
408            return true;
409        }
410        return false;
411    }
412
413    //Token creation
414    private _createStartTagToken(): void {
415        this.currentToken = {
416            type: TokenType.START_TAG,
417            tagName: '',
418            tagID: TAG_ID.UNKNOWN,
419            selfClosing: false,
420            ackSelfClosing: false,
421            attrs: [],
422            location: this.getCurrentLocation(1),
423        };
424    }
425
426    private _createEndTagToken(): void {
427        this.currentToken = {
428            type: TokenType.END_TAG,
429            tagName: '',
430            tagID: TAG_ID.UNKNOWN,
431            selfClosing: false,
432            ackSelfClosing: false,
433            attrs: [],
434            location: this.getCurrentLocation(2),
435        };
436    }
437
438    private _createCommentToken(offset: number): void {
439        this.currentToken = {
440            type: TokenType.COMMENT,
441            data: '',
442            location: this.getCurrentLocation(offset),
443        };
444    }
445
446    private _createDoctypeToken(initialName: string | null): void {
447        this.currentToken = {
448            type: TokenType.DOCTYPE,
449            name: initialName,
450            forceQuirks: false,
451            publicId: null,
452            systemId: null,
453            location: this.currentLocation,
454        };
455    }
456
457    private _createCharacterToken(type: CharacterToken['type'], chars: string): void {
458        this.currentCharacterToken = {
459            type,
460            chars,
461            location: this.currentLocation,
462        };
463    }
464
465    //Tag attributes
466    private _createAttr(attrNameFirstCh: string): void {
467        this.currentAttr = {
468            name: attrNameFirstCh,
469            value: '',
470        };
471        this.currentLocation = this.getCurrentLocation(0);
472    }
473
474    private _leaveAttrName(): void {
475        const token = this.currentToken as TagToken;
476
477        if (getTokenAttr(token, this.currentAttr.name) === null) {
478            token.attrs.push(this.currentAttr);
479
480            if (token.location && this.currentLocation) {
481                const attrLocations = (token.location.attrs ??= Object.create(null));
482                attrLocations[this.currentAttr.name] = this.currentLocation;
483
484                // Set end location
485                this._leaveAttrValue();
486            }
487        } else {
488            this._err(ERR.duplicateAttribute);
489        }
490    }
491
492    private _leaveAttrValue(): void {
493        if (this.currentLocation) {
494            this.currentLocation.endLine = this.preprocessor.line;
495            this.currentLocation.endCol = this.preprocessor.col;
496            this.currentLocation.endOffset = this.preprocessor.offset;
497        }
498    }
499
500    //Token emission
501    private prepareToken(ct: Token): void {
502        this._emitCurrentCharacterToken(ct.location);
503        this.currentToken = null;
504
505        if (ct.location) {
506            ct.location.endLine = this.preprocessor.line;
507            ct.location.endCol = this.preprocessor.col + 1;
508            ct.location.endOffset = this.preprocessor.offset + 1;
509        }
510
511        this.currentLocation = this.getCurrentLocation(-1);
512    }
513
514    private emitCurrentTagToken(): void {
515        const ct = this.currentToken as TagToken;
516
517        checkselfClosingNode(this, ct);
518
519        this.prepareToken(ct);
520
521        ct.tagID = getTagID(ct.tagName);
522
523        if (ct.type === TokenType.START_TAG) {
524            this.lastStartTagName = ct.tagName;
525            this.handler.onStartTag(ct);
526        } else {
527            if (ct.attrs.length > 0) {
528                this._err(ERR.endTagWithAttributes);
529            }
530
531            if (ct.selfClosing) {
532                this._err(ERR.endTagWithTrailingSolidus);
533            }
534
535            this.handler.onEndTag(ct);
536        }
537
538        this.preprocessor.dropParsedChunk();
539    }
540
541    private emitCurrentComment(ct: CommentToken): void {
542        this.prepareToken(ct);
543        this.handler.onComment(ct);
544
545        this.preprocessor.dropParsedChunk();
546    }
547
548    private emitCurrentDoctype(ct: DoctypeToken): void {
549        this.prepareToken(ct);
550        this.handler.onDoctype(ct);
551
552        this.preprocessor.dropParsedChunk();
553    }
554
555    private _emitCurrentCharacterToken(nextLocation: Location | null): void {
556        if (this.currentCharacterToken) {
557            //NOTE: if we have a pending character token, make it's end location equal to the
558            //current token's start location.
559            if (nextLocation && this.currentCharacterToken.location) {
560                this.currentCharacterToken.location.endLine = nextLocation.startLine;
561                this.currentCharacterToken.location.endCol = nextLocation.startCol;
562                this.currentCharacterToken.location.endOffset = nextLocation.startOffset;
563            }
564
565            switch (this.currentCharacterToken.type) {
566                case TokenType.CHARACTER: {
567                    this.handler.onCharacter(this.currentCharacterToken);
568                    break;
569                }
570                case TokenType.NULL_CHARACTER: {
571                    this.handler.onNullCharacter(this.currentCharacterToken);
572                    break;
573                }
574                case TokenType.WHITESPACE_CHARACTER: {
575                    this.handler.onWhitespaceCharacter(this.currentCharacterToken);
576                    break;
577                }
578            }
579
580            this.currentCharacterToken = null;
581        }
582    }
583
584    private _emitEOFToken(): void {
585        const location = this.getCurrentLocation(0);
586
587        if (location) {
588            location.endLine = location.startLine;
589            location.endCol = location.startCol;
590            location.endOffset = location.startOffset;
591        }
592
593        this._emitCurrentCharacterToken(location);
594        this.handler.onEof({ type: TokenType.EOF, location });
595        this.active = false;
596    }
597
598    //Characters emission
599
600    //OPTIMIZATION: specification uses only one type of character tokens (one token per character).
601    //This causes a huge memory overhead and a lot of unnecessary parser loops. parse5 uses 3 groups of characters.
602    //If we have a sequence of characters that belong to the same group, the parser can process it
603    //as a single solid character token.
604    //So, there are 3 types of character tokens in parse5:
605    //1)TokenType.NULL_CHARACTER - \u0000-character sequences (e.g. '\u0000\u0000\u0000')
606    //2)TokenType.WHITESPACE_CHARACTER - any whitespace/new-line character sequences (e.g. '\n  \r\t   \f')
607    //3)TokenType.CHARACTER - any character sequence which don't belong to groups 1 and 2 (e.g. 'abcdef1234@@#$%^')
608    private _appendCharToCurrentCharacterToken(type: CharacterToken['type'], ch: string): void {
609        if (this.currentCharacterToken) {
610            if (this.currentCharacterToken.type !== type) {
611                this.currentLocation = this.getCurrentLocation(0);
612                this._emitCurrentCharacterToken(this.currentLocation);
613                this.preprocessor.dropParsedChunk();
614            } else {
615                this.currentCharacterToken.chars += ch;
616                return;
617            }
618        }
619
620        this._createCharacterToken(type, ch);
621    }
622
623    private _emitCodePoint(cp: number): void {
624        let type = TokenType.CHARACTER;
625
626        if (isWhitespace(cp)) {
627            type = TokenType.WHITESPACE_CHARACTER;
628        } else if (cp === $.NULL) {
629            type = TokenType.NULL_CHARACTER;
630        }
631
632        this._appendCharToCurrentCharacterToken(type, String.fromCodePoint(cp));
633    }
634
635    //NOTE: used when we emit characters explicitly.
636    //This is always for non-whitespace and non-null characters, which allows us to avoid additional checks.
637    private _emitChars(ch: string): void {
638        this._appendCharToCurrentCharacterToken(TokenType.CHARACTER, ch);
639    }
640
641    // Character reference helpers
642    private _matchNamedCharacterReference(cp: number): number[] | null {
643        let result: number[] | null = null;
644        let excess = 0;
645        let withoutSemicolon = false;
646
647        for (let i = 0, current = htmlDecodeTree[0]; i >= 0; cp = this._consume()) {
648            i = determineBranch(htmlDecodeTree, current, i + 1, cp);
649
650            if (i < 0) break;
651
652            excess += 1;
653
654            current = htmlDecodeTree[i];
655
656            const masked = current & BinTrieFlags.VALUE_LENGTH;
657
658            // If the branch is a value, store it and continue
659            if (masked) {
660                // The mask is the number of bytes of the value, including the current byte.
661                const valueLength = (masked >> 14) - 1;
662
663                // Attribute values that aren't terminated properly aren't parsed, and shouldn't lead to a parser error.
664                // See the example in https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state
665                if (
666                    cp !== $.SEMICOLON &&
667                    this._isCharacterReferenceInAttribute() &&
668                    isEntityInAttributeInvalidEnd(this.preprocessor.peek(1))
669                ) {
670                    //NOTE: we don't flush all consumed code points here, and instead switch back to the original state after
671                    //emitting an ampersand. This is fine, as alphanumeric characters won't be parsed differently in attributes.
672                    result = [$.AMPERSAND];
673
674                    // Skip over the value.
675                    i += valueLength;
676                } else {
677                    // If this is a surrogate pair, consume the next two bytes.
678                    result =
679                        valueLength === 0
680                            ? [htmlDecodeTree[i] & ~BinTrieFlags.VALUE_LENGTH]
681                            : valueLength === 1
682                            ? [htmlDecodeTree[++i]]
683                            : [htmlDecodeTree[++i], htmlDecodeTree[++i]];
684                    excess = 0;
685                    withoutSemicolon = cp !== $.SEMICOLON;
686                }
687
688                if (valueLength === 0) {
689                    // If the value is zero-length, we're done.
690                    this._consume();
691                    break;
692                }
693            }
694        }
695
696        this._unconsume(excess);
697
698        if (withoutSemicolon && !this.preprocessor.endOfChunkHit) {
699            this._err(ERR.missingSemicolonAfterCharacterReference);
700        }
701
702        // We want to emit the error above on the code point after the entity.
703        // We always consume one code point too many in the loop, and we wait to
704        // unconsume it until after the error is emitted.
705        this._unconsume(1);
706
707        return result;
708    }
709
710    private _isCharacterReferenceInAttribute(): boolean {
711        return (
712            this.returnState === State.ATTRIBUTE_VALUE_DOUBLE_QUOTED ||
713            this.returnState === State.ATTRIBUTE_VALUE_SINGLE_QUOTED ||
714            this.returnState === State.ATTRIBUTE_VALUE_UNQUOTED
715        );
716    }
717
718    private _flushCodePointConsumedAsCharacterReference(cp: number): void {
719        if (this._isCharacterReferenceInAttribute()) {
720            this.currentAttr.value += String.fromCodePoint(cp);
721        } else {
722            this._emitCodePoint(cp);
723        }
724    }
725
726    // Calling states this way turns out to be much faster than any other approach.
727    private _callState(cp: number): void {
728        switch (this.state) {
729            case State.DATA: {
730                this._stateData(cp);
731                break;
732            }
733            case State.RCDATA: {
734                this._stateRcdata(cp);
735                break;
736            }
737            case State.RAWTEXT: {
738                this._stateRawtext(cp);
739                break;
740            }
741            case State.SCRIPT_DATA: {
742                this._stateScriptData(cp);
743                break;
744            }
745            case State.PLAINTEXT: {
746                this._statePlaintext(cp);
747                break;
748            }
749            case State.TAG_OPEN: {
750                this._stateTagOpen(cp);
751                break;
752            }
753            case State.END_TAG_OPEN: {
754                this._stateEndTagOpen(cp);
755                break;
756            }
757            case State.TAG_NAME: {
758                this._stateTagName(cp);
759                break;
760            }
761            case State.RCDATA_LESS_THAN_SIGN: {
762                this._stateRcdataLessThanSign(cp);
763                break;
764            }
765            case State.RCDATA_END_TAG_OPEN: {
766                this._stateRcdataEndTagOpen(cp);
767                break;
768            }
769            case State.RCDATA_END_TAG_NAME: {
770                this._stateRcdataEndTagName(cp);
771                break;
772            }
773            case State.RAWTEXT_LESS_THAN_SIGN: {
774                this._stateRawtextLessThanSign(cp);
775                break;
776            }
777            case State.RAWTEXT_END_TAG_OPEN: {
778                this._stateRawtextEndTagOpen(cp);
779                break;
780            }
781            case State.RAWTEXT_END_TAG_NAME: {
782                this._stateRawtextEndTagName(cp);
783                break;
784            }
785            case State.SCRIPT_DATA_LESS_THAN_SIGN: {
786                this._stateScriptDataLessThanSign(cp);
787                break;
788            }
789            case State.SCRIPT_DATA_END_TAG_OPEN: {
790                this._stateScriptDataEndTagOpen(cp);
791                break;
792            }
793            case State.SCRIPT_DATA_END_TAG_NAME: {
794                this._stateScriptDataEndTagName(cp);
795                break;
796            }
797            case State.SCRIPT_DATA_ESCAPE_START: {
798                this._stateScriptDataEscapeStart(cp);
799                break;
800            }
801            case State.SCRIPT_DATA_ESCAPE_START_DASH: {
802                this._stateScriptDataEscapeStartDash(cp);
803                break;
804            }
805            case State.SCRIPT_DATA_ESCAPED: {
806                this._stateScriptDataEscaped(cp);
807                break;
808            }
809            case State.SCRIPT_DATA_ESCAPED_DASH: {
810                this._stateScriptDataEscapedDash(cp);
811                break;
812            }
813            case State.SCRIPT_DATA_ESCAPED_DASH_DASH: {
814                this._stateScriptDataEscapedDashDash(cp);
815                break;
816            }
817            case State.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN: {
818                this._stateScriptDataEscapedLessThanSign(cp);
819                break;
820            }
821            case State.SCRIPT_DATA_ESCAPED_END_TAG_OPEN: {
822                this._stateScriptDataEscapedEndTagOpen(cp);
823                break;
824            }
825            case State.SCRIPT_DATA_ESCAPED_END_TAG_NAME: {
826                this._stateScriptDataEscapedEndTagName(cp);
827                break;
828            }
829            case State.SCRIPT_DATA_DOUBLE_ESCAPE_START: {
830                this._stateScriptDataDoubleEscapeStart(cp);
831                break;
832            }
833            case State.SCRIPT_DATA_DOUBLE_ESCAPED: {
834                this._stateScriptDataDoubleEscaped(cp);
835                break;
836            }
837            case State.SCRIPT_DATA_DOUBLE_ESCAPED_DASH: {
838                this._stateScriptDataDoubleEscapedDash(cp);
839                break;
840            }
841            case State.SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH: {
842                this._stateScriptDataDoubleEscapedDashDash(cp);
843                break;
844            }
845            case State.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN: {
846                this._stateScriptDataDoubleEscapedLessThanSign(cp);
847                break;
848            }
849            case State.SCRIPT_DATA_DOUBLE_ESCAPE_END: {
850                this._stateScriptDataDoubleEscapeEnd(cp);
851                break;
852            }
853            case State.BEFORE_ATTRIBUTE_NAME: {
854                this._stateBeforeAttributeName(cp);
855                break;
856            }
857            case State.ATTRIBUTE_NAME: {
858                this._stateAttributeName(cp);
859                break;
860            }
861            case State.AFTER_ATTRIBUTE_NAME: {
862                this._stateAfterAttributeName(cp);
863                break;
864            }
865            case State.BEFORE_ATTRIBUTE_VALUE: {
866                this._stateBeforeAttributeValue(cp);
867                break;
868            }
869            case State.ATTRIBUTE_VALUE_DOUBLE_QUOTED: {
870                this._stateAttributeValueDoubleQuoted(cp);
871                break;
872            }
873            case State.ATTRIBUTE_VALUE_SINGLE_QUOTED: {
874                this._stateAttributeValueSingleQuoted(cp);
875                break;
876            }
877            case State.ATTRIBUTE_VALUE_UNQUOTED: {
878                this._stateAttributeValueUnquoted(cp);
879                break;
880            }
881            case State.AFTER_ATTRIBUTE_VALUE_QUOTED: {
882                this._stateAfterAttributeValueQuoted(cp);
883                break;
884            }
885            case State.SELF_CLOSING_START_TAG: {
886                this._stateSelfClosingStartTag(cp);
887                break;
888            }
889            case State.BOGUS_COMMENT: {
890                this._stateBogusComment(cp);
891                break;
892            }
893            case State.MARKUP_DECLARATION_OPEN: {
894                this._stateMarkupDeclarationOpen(cp);
895                break;
896            }
897            case State.COMMENT_START: {
898                this._stateCommentStart(cp);
899                break;
900            }
901            case State.COMMENT_START_DASH: {
902                this._stateCommentStartDash(cp);
903                break;
904            }
905            case State.COMMENT: {
906                this._stateComment(cp);
907                break;
908            }
909            case State.COMMENT_LESS_THAN_SIGN: {
910                this._stateCommentLessThanSign(cp);
911                break;
912            }
913            case State.COMMENT_LESS_THAN_SIGN_BANG: {
914                this._stateCommentLessThanSignBang(cp);
915                break;
916            }
917            case State.COMMENT_LESS_THAN_SIGN_BANG_DASH: {
918                this._stateCommentLessThanSignBangDash(cp);
919                break;
920            }
921            case State.COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH: {
922                this._stateCommentLessThanSignBangDashDash(cp);
923                break;
924            }
925            case State.COMMENT_END_DASH: {
926                this._stateCommentEndDash(cp);
927                break;
928            }
929            case State.COMMENT_END: {
930                this._stateCommentEnd(cp);
931                break;
932            }
933            case State.COMMENT_END_BANG: {
934                this._stateCommentEndBang(cp);
935                break;
936            }
937            case State.DOCTYPE: {
938                this._stateDoctype(cp);
939                break;
940            }
941            case State.BEFORE_DOCTYPE_NAME: {
942                this._stateBeforeDoctypeName(cp);
943                break;
944            }
945            case State.DOCTYPE_NAME: {
946                this._stateDoctypeName(cp);
947                break;
948            }
949            case State.AFTER_DOCTYPE_NAME: {
950                this._stateAfterDoctypeName(cp);
951                break;
952            }
953            case State.AFTER_DOCTYPE_PUBLIC_KEYWORD: {
954                this._stateAfterDoctypePublicKeyword(cp);
955                break;
956            }
957            case State.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER: {
958                this._stateBeforeDoctypePublicIdentifier(cp);
959                break;
960            }
961            case State.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED: {
962                this._stateDoctypePublicIdentifierDoubleQuoted(cp);
963                break;
964            }
965            case State.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED: {
966                this._stateDoctypePublicIdentifierSingleQuoted(cp);
967                break;
968            }
969            case State.AFTER_DOCTYPE_PUBLIC_IDENTIFIER: {
970                this._stateAfterDoctypePublicIdentifier(cp);
971                break;
972            }
973            case State.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS: {
974                this._stateBetweenDoctypePublicAndSystemIdentifiers(cp);
975                break;
976            }
977            case State.AFTER_DOCTYPE_SYSTEM_KEYWORD: {
978                this._stateAfterDoctypeSystemKeyword(cp);
979                break;
980            }
981            case State.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER: {
982                this._stateBeforeDoctypeSystemIdentifier(cp);
983                break;
984            }
985            case State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED: {
986                this._stateDoctypeSystemIdentifierDoubleQuoted(cp);
987                break;
988            }
989            case State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED: {
990                this._stateDoctypeSystemIdentifierSingleQuoted(cp);
991                break;
992            }
993            case State.AFTER_DOCTYPE_SYSTEM_IDENTIFIER: {
994                this._stateAfterDoctypeSystemIdentifier(cp);
995                break;
996            }
997            case State.BOGUS_DOCTYPE: {
998                this._stateBogusDoctype(cp);
999                break;
1000            }
1001            case State.CDATA_SECTION: {
1002                this._stateCdataSection(cp);
1003                break;
1004            }
1005            case State.CDATA_SECTION_BRACKET: {
1006                this._stateCdataSectionBracket(cp);
1007                break;
1008            }
1009            case State.CDATA_SECTION_END: {
1010                this._stateCdataSectionEnd(cp);
1011                break;
1012            }
1013            case State.CHARACTER_REFERENCE: {
1014                this._stateCharacterReference(cp);
1015                break;
1016            }
1017            case State.NAMED_CHARACTER_REFERENCE: {
1018                this._stateNamedCharacterReference(cp);
1019                break;
1020            }
1021            case State.AMBIGUOUS_AMPERSAND: {
1022                this._stateAmbiguousAmpersand(cp);
1023                break;
1024            }
1025            case State.NUMERIC_CHARACTER_REFERENCE: {
1026                this._stateNumericCharacterReference(cp);
1027                break;
1028            }
1029            case State.HEXADEMICAL_CHARACTER_REFERENCE_START: {
1030                this._stateHexademicalCharacterReferenceStart(cp);
1031                break;
1032            }
1033            case State.DECIMAL_CHARACTER_REFERENCE_START: {
1034                this._stateDecimalCharacterReferenceStart(cp);
1035                break;
1036            }
1037            case State.HEXADEMICAL_CHARACTER_REFERENCE: {
1038                this._stateHexademicalCharacterReference(cp);
1039                break;
1040            }
1041            case State.DECIMAL_CHARACTER_REFERENCE: {
1042                this._stateDecimalCharacterReference(cp);
1043                break;
1044            }
1045            case State.NUMERIC_CHARACTER_REFERENCE_END: {
1046                this._stateNumericCharacterReferenceEnd();
1047                break;
1048            }
1049            default: {
1050                throw new Error('Unknown state');
1051            }
1052        }
1053    }
1054
1055    // State machine
1056
1057    // Data state
1058    //------------------------------------------------------------------
1059    private _stateData(cp: number): void {
1060        switch (cp) {
1061            case $.LESS_THAN_SIGN: {
1062                this.state = State.TAG_OPEN;
1063                break;
1064            }
1065            case $.AMPERSAND: {
1066                this.returnState = State.DATA;
1067                this.state = State.CHARACTER_REFERENCE;
1068                break;
1069            }
1070            case $.NULL: {
1071                this._err(ERR.unexpectedNullCharacter);
1072                this._emitCodePoint(cp);
1073                break;
1074            }
1075            case $.EOF: {
1076                this._emitEOFToken();
1077                break;
1078            }
1079            default: {
1080                this._emitCodePoint(cp);
1081            }
1082        }
1083    }
1084
1085    //  RCDATA state
1086    //------------------------------------------------------------------
1087    private _stateRcdata(cp: number): void {
1088        switch (cp) {
1089            case $.AMPERSAND: {
1090                this.returnState = State.RCDATA;
1091                this.state = State.CHARACTER_REFERENCE;
1092                break;
1093            }
1094            case $.LESS_THAN_SIGN: {
1095                this.state = State.RCDATA_LESS_THAN_SIGN;
1096                break;
1097            }
1098            case $.NULL: {
1099                this._err(ERR.unexpectedNullCharacter);
1100                this._emitChars(REPLACEMENT_CHARACTER);
1101                break;
1102            }
1103            case $.EOF: {
1104                this._emitEOFToken();
1105                break;
1106            }
1107            default: {
1108                this._emitCodePoint(cp);
1109            }
1110        }
1111    }
1112
1113    // RAWTEXT state
1114    //------------------------------------------------------------------
1115    private _stateRawtext(cp: number): void {
1116        switch (cp) {
1117            case $.LESS_THAN_SIGN: {
1118                this.state = State.RAWTEXT_LESS_THAN_SIGN;
1119                break;
1120            }
1121            case $.NULL: {
1122                this._err(ERR.unexpectedNullCharacter);
1123                this._emitChars(REPLACEMENT_CHARACTER);
1124                break;
1125            }
1126            case $.EOF: {
1127                this._emitEOFToken();
1128                break;
1129            }
1130            default: {
1131                this._emitCodePoint(cp);
1132            }
1133        }
1134    }
1135
1136    // Script data state
1137    //------------------------------------------------------------------
1138    private _stateScriptData(cp: number): void {
1139        switch (cp) {
1140            case $.LESS_THAN_SIGN: {
1141                this.state = State.SCRIPT_DATA_LESS_THAN_SIGN;
1142                break;
1143            }
1144            case $.NULL: {
1145                this._err(ERR.unexpectedNullCharacter);
1146                this._emitChars(REPLACEMENT_CHARACTER);
1147                break;
1148            }
1149            case $.EOF: {
1150                this._emitEOFToken();
1151                break;
1152            }
1153            default: {
1154                this._emitCodePoint(cp);
1155            }
1156        }
1157    }
1158
1159    // PLAINTEXT state
1160    //------------------------------------------------------------------
1161    private _statePlaintext(cp: number): void {
1162        switch (cp) {
1163            case $.NULL: {
1164                this._err(ERR.unexpectedNullCharacter);
1165                this._emitChars(REPLACEMENT_CHARACTER);
1166                break;
1167            }
1168            case $.EOF: {
1169                this._emitEOFToken();
1170                break;
1171            }
1172            default: {
1173                this._emitCodePoint(cp);
1174            }
1175        }
1176    }
1177
1178    // Tag open state
1179    //------------------------------------------------------------------
1180    private _stateTagOpen(cp: number): void {
1181        if (isAsciiLetter(cp)) {
1182            this._createStartTagToken();
1183            this.state = State.TAG_NAME;
1184            this._stateTagName(cp);
1185        } else
1186            switch (cp) {
1187                case $.EXCLAMATION_MARK: {
1188                    this.state = State.MARKUP_DECLARATION_OPEN;
1189                    break;
1190                }
1191                case $.SOLIDUS: {
1192                    this.state = State.END_TAG_OPEN;
1193                    break;
1194                }
1195                case $.QUESTION_MARK: {
1196                    this._err(ERR.unexpectedQuestionMarkInsteadOfTagName);
1197                    this._createCommentToken(1);
1198                    this.state = State.BOGUS_COMMENT;
1199                    this._stateBogusComment(cp);
1200                    break;
1201                }
1202                case $.EOF: {
1203                    this._err(ERR.eofBeforeTagName);
1204                    this._emitChars('<');
1205                    this._emitEOFToken();
1206                    break;
1207                }
1208                default: {
1209                    this._err(ERR.invalidFirstCharacterOfTagName);
1210                    this._emitChars('<');
1211                    this.state = State.DATA;
1212                    this._stateData(cp);
1213                }
1214            }
1215    }
1216
1217    // End tag open state
1218    //------------------------------------------------------------------
1219    private _stateEndTagOpen(cp: number): void {
1220        if (isAsciiLetter(cp)) {
1221            this._createEndTagToken();
1222            this.state = State.TAG_NAME;
1223            this._stateTagName(cp);
1224        } else
1225            switch (cp) {
1226                case $.GREATER_THAN_SIGN: {
1227                    this._err(ERR.missingEndTagName);
1228                    this.state = State.DATA;
1229                    break;
1230                }
1231                case $.EOF: {
1232                    this._err(ERR.eofBeforeTagName);
1233                    this._emitChars('</');
1234                    this._emitEOFToken();
1235                    break;
1236                }
1237                default: {
1238                    this._err(ERR.invalidFirstCharacterOfTagName);
1239                    this._createCommentToken(2);
1240                    this.state = State.BOGUS_COMMENT;
1241                    this._stateBogusComment(cp);
1242                }
1243            }
1244    }
1245
1246    // Tag name state
1247    //------------------------------------------------------------------
1248    private _stateTagName(cp: number): void {
1249        const token = this.currentToken as TagToken;
1250
1251        switch (cp) {
1252            case $.SPACE:
1253            case $.LINE_FEED:
1254            case $.TABULATION:
1255            case $.FORM_FEED: {
1256                this.state = State.BEFORE_ATTRIBUTE_NAME;
1257                break;
1258            }
1259            case $.SOLIDUS: {
1260                this.state = State.SELF_CLOSING_START_TAG;
1261                break;
1262            }
1263            case $.GREATER_THAN_SIGN: {
1264                this.state = State.DATA;
1265                this.emitCurrentTagToken();
1266                break;
1267            }
1268            case $.NULL: {
1269                this._err(ERR.unexpectedNullCharacter);
1270                token.tagName += REPLACEMENT_CHARACTER;
1271                break;
1272            }
1273            case $.EOF: {
1274                this._err(ERR.eofInTag);
1275                this._emitEOFToken();
1276                break;
1277            }
1278            default: {
1279                token.tagName += String.fromCodePoint(isAsciiUpper(cp) ? toAsciiLower(cp) : cp);
1280            }
1281        }
1282    }
1283
1284    // RCDATA less-than sign state
1285    //------------------------------------------------------------------
1286    private _stateRcdataLessThanSign(cp: number): void {
1287        if (cp === $.SOLIDUS) {
1288            this.state = State.RCDATA_END_TAG_OPEN;
1289        } else {
1290            this._emitChars('<');
1291            this.state = State.RCDATA;
1292            this._stateRcdata(cp);
1293        }
1294    }
1295
1296    // RCDATA end tag open state
1297    //------------------------------------------------------------------
1298    private _stateRcdataEndTagOpen(cp: number): void {
1299        if (isAsciiLetter(cp)) {
1300            this.state = State.RCDATA_END_TAG_NAME;
1301            this._stateRcdataEndTagName(cp);
1302        } else {
1303            this._emitChars('</');
1304            this.state = State.RCDATA;
1305            this._stateRcdata(cp);
1306        }
1307    }
1308
1309    private handleSpecialEndTag(_cp: number): boolean {
1310        if (!this.preprocessor.startsWith(this.lastStartTagName, false)) {
1311            return !this._ensureHibernation();
1312        }
1313
1314        this._createEndTagToken();
1315        const token = this.currentToken as TagToken;
1316        token.tagName = this.lastStartTagName;
1317
1318        const cp = this.preprocessor.peek(this.lastStartTagName.length);
1319
1320        switch (cp) {
1321            case $.SPACE:
1322            case $.LINE_FEED:
1323            case $.TABULATION:
1324            case $.FORM_FEED: {
1325                this._advanceBy(this.lastStartTagName.length);
1326                this.state = State.BEFORE_ATTRIBUTE_NAME;
1327                return false;
1328            }
1329            case $.SOLIDUS: {
1330                this._advanceBy(this.lastStartTagName.length);
1331                this.state = State.SELF_CLOSING_START_TAG;
1332                return false;
1333            }
1334            case $.GREATER_THAN_SIGN: {
1335                this._advanceBy(this.lastStartTagName.length);
1336                this.emitCurrentTagToken();
1337                this.state = State.DATA;
1338                return false;
1339            }
1340            default: {
1341                return !this._ensureHibernation();
1342            }
1343        }
1344    }
1345
1346    // RCDATA end tag name state
1347    //------------------------------------------------------------------
1348    private _stateRcdataEndTagName(cp: number): void {
1349        if (this.handleSpecialEndTag(cp)) {
1350            this._emitChars('</');
1351            this.state = State.RCDATA;
1352            this._stateRcdata(cp);
1353        }
1354    }
1355
1356    // RAWTEXT less-than sign state
1357    //------------------------------------------------------------------
1358    private _stateRawtextLessThanSign(cp: number): void {
1359        if (cp === $.SOLIDUS) {
1360            this.state = State.RAWTEXT_END_TAG_OPEN;
1361        } else {
1362            this._emitChars('<');
1363            this.state = State.RAWTEXT;
1364            this._stateRawtext(cp);
1365        }
1366    }
1367
1368    // RAWTEXT end tag open state
1369    //------------------------------------------------------------------
1370    private _stateRawtextEndTagOpen(cp: number): void {
1371        if (isAsciiLetter(cp)) {
1372            this.state = State.RAWTEXT_END_TAG_NAME;
1373            this._stateRawtextEndTagName(cp);
1374        } else {
1375            this._emitChars('</');
1376            this.state = State.RAWTEXT;
1377            this._stateRawtext(cp);
1378        }
1379    }
1380
1381    // RAWTEXT end tag name state
1382    //------------------------------------------------------------------
1383    private _stateRawtextEndTagName(cp: number): void {
1384        if (this.handleSpecialEndTag(cp)) {
1385            this._emitChars('</');
1386            this.state = State.RAWTEXT;
1387            this._stateRawtext(cp);
1388        }
1389    }
1390
1391    // Script data less-than sign state
1392    //------------------------------------------------------------------
1393    private _stateScriptDataLessThanSign(cp: number): void {
1394        switch (cp) {
1395            case $.SOLIDUS: {
1396                this.state = State.SCRIPT_DATA_END_TAG_OPEN;
1397                break;
1398            }
1399            case $.EXCLAMATION_MARK: {
1400                this.state = State.SCRIPT_DATA_ESCAPE_START;
1401                this._emitChars('<!');
1402                break;
1403            }
1404            default: {
1405                this._emitChars('<');
1406                this.state = State.SCRIPT_DATA;
1407                this._stateScriptData(cp);
1408            }
1409        }
1410    }
1411
1412    // Script data end tag open state
1413    //------------------------------------------------------------------
1414    private _stateScriptDataEndTagOpen(cp: number): void {
1415        if (isAsciiLetter(cp)) {
1416            this.state = State.SCRIPT_DATA_END_TAG_NAME;
1417            this._stateScriptDataEndTagName(cp);
1418        } else {
1419            this._emitChars('</');
1420            this.state = State.SCRIPT_DATA;
1421            this._stateScriptData(cp);
1422        }
1423    }
1424
1425    // Script data end tag name state
1426    //------------------------------------------------------------------
1427    private _stateScriptDataEndTagName(cp: number): void {
1428        if (this.handleSpecialEndTag(cp)) {
1429            this._emitChars('</');
1430            this.state = State.SCRIPT_DATA;
1431            this._stateScriptData(cp);
1432        }
1433    }
1434
1435    // Script data escape start state
1436    //------------------------------------------------------------------
1437    private _stateScriptDataEscapeStart(cp: number): void {
1438        if (cp === $.HYPHEN_MINUS) {
1439            this.state = State.SCRIPT_DATA_ESCAPE_START_DASH;
1440            this._emitChars('-');
1441        } else {
1442            this.state = State.SCRIPT_DATA;
1443            this._stateScriptData(cp);
1444        }
1445    }
1446
1447    // Script data escape start dash state
1448    //------------------------------------------------------------------
1449    private _stateScriptDataEscapeStartDash(cp: number): void {
1450        if (cp === $.HYPHEN_MINUS) {
1451            this.state = State.SCRIPT_DATA_ESCAPED_DASH_DASH;
1452            this._emitChars('-');
1453        } else {
1454            this.state = State.SCRIPT_DATA;
1455            this._stateScriptData(cp);
1456        }
1457    }
1458
1459    // Script data escaped state
1460    //------------------------------------------------------------------
1461    private _stateScriptDataEscaped(cp: number): void {
1462        switch (cp) {
1463            case $.HYPHEN_MINUS: {
1464                this.state = State.SCRIPT_DATA_ESCAPED_DASH;
1465                this._emitChars('-');
1466                break;
1467            }
1468            case $.LESS_THAN_SIGN: {
1469                this.state = State.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN;
1470                break;
1471            }
1472            case $.NULL: {
1473                this._err(ERR.unexpectedNullCharacter);
1474                this._emitChars(REPLACEMENT_CHARACTER);
1475                break;
1476            }
1477            case $.EOF: {
1478                this._err(ERR.eofInScriptHtmlCommentLikeText);
1479                this._emitEOFToken();
1480                break;
1481            }
1482            default: {
1483                this._emitCodePoint(cp);
1484            }
1485        }
1486    }
1487
1488    // Script data escaped dash state
1489    //------------------------------------------------------------------
1490    private _stateScriptDataEscapedDash(cp: number): void {
1491        switch (cp) {
1492            case $.HYPHEN_MINUS: {
1493                this.state = State.SCRIPT_DATA_ESCAPED_DASH_DASH;
1494                this._emitChars('-');
1495                break;
1496            }
1497            case $.LESS_THAN_SIGN: {
1498                this.state = State.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN;
1499                break;
1500            }
1501            case $.NULL: {
1502                this._err(ERR.unexpectedNullCharacter);
1503                this.state = State.SCRIPT_DATA_ESCAPED;
1504                this._emitChars(REPLACEMENT_CHARACTER);
1505                break;
1506            }
1507            case $.EOF: {
1508                this._err(ERR.eofInScriptHtmlCommentLikeText);
1509                this._emitEOFToken();
1510                break;
1511            }
1512            default: {
1513                this.state = State.SCRIPT_DATA_ESCAPED;
1514                this._emitCodePoint(cp);
1515            }
1516        }
1517    }
1518
1519    // Script data escaped dash dash state
1520    //------------------------------------------------------------------
1521    private _stateScriptDataEscapedDashDash(cp: number): void {
1522        switch (cp) {
1523            case $.HYPHEN_MINUS: {
1524                this._emitChars('-');
1525                break;
1526            }
1527            case $.LESS_THAN_SIGN: {
1528                this.state = State.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN;
1529                break;
1530            }
1531            case $.GREATER_THAN_SIGN: {
1532                this.state = State.SCRIPT_DATA;
1533                this._emitChars('>');
1534                break;
1535            }
1536            case $.NULL: {
1537                this._err(ERR.unexpectedNullCharacter);
1538                this.state = State.SCRIPT_DATA_ESCAPED;
1539                this._emitChars(REPLACEMENT_CHARACTER);
1540                break;
1541            }
1542            case $.EOF: {
1543                this._err(ERR.eofInScriptHtmlCommentLikeText);
1544                this._emitEOFToken();
1545                break;
1546            }
1547            default: {
1548                this.state = State.SCRIPT_DATA_ESCAPED;
1549                this._emitCodePoint(cp);
1550            }
1551        }
1552    }
1553
1554    // Script data escaped less-than sign state
1555    //------------------------------------------------------------------
1556    private _stateScriptDataEscapedLessThanSign(cp: number): void {
1557        if (cp === $.SOLIDUS) {
1558            this.state = State.SCRIPT_DATA_ESCAPED_END_TAG_OPEN;
1559        } else if (isAsciiLetter(cp)) {
1560            this._emitChars('<');
1561            this.state = State.SCRIPT_DATA_DOUBLE_ESCAPE_START;
1562            this._stateScriptDataDoubleEscapeStart(cp);
1563        } else {
1564            this._emitChars('<');
1565            this.state = State.SCRIPT_DATA_ESCAPED;
1566            this._stateScriptDataEscaped(cp);
1567        }
1568    }
1569
1570    // Script data escaped end tag open state
1571    //------------------------------------------------------------------
1572    private _stateScriptDataEscapedEndTagOpen(cp: number): void {
1573        if (isAsciiLetter(cp)) {
1574            this.state = State.SCRIPT_DATA_ESCAPED_END_TAG_NAME;
1575            this._stateScriptDataEscapedEndTagName(cp);
1576        } else {
1577            this._emitChars('</');
1578            this.state = State.SCRIPT_DATA_ESCAPED;
1579            this._stateScriptDataEscaped(cp);
1580        }
1581    }
1582
1583    // Script data escaped end tag name state
1584    //------------------------------------------------------------------
1585    private _stateScriptDataEscapedEndTagName(cp: number): void {
1586        if (this.handleSpecialEndTag(cp)) {
1587            this._emitChars('</');
1588            this.state = State.SCRIPT_DATA_ESCAPED;
1589            this._stateScriptDataEscaped(cp);
1590        }
1591    }
1592
1593    // Script data double escape start state
1594    //------------------------------------------------------------------
1595    private _stateScriptDataDoubleEscapeStart(cp: number): void {
1596        if (
1597            this.preprocessor.startsWith($$.SCRIPT, false) &&
1598            isScriptDataDoubleEscapeSequenceEnd(this.preprocessor.peek($$.SCRIPT.length))
1599        ) {
1600            this._emitCodePoint(cp);
1601            for (let i = 0; i < $$.SCRIPT.length; i++) {
1602                this._emitCodePoint(this._consume());
1603            }
1604
1605            this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED;
1606        } else if (!this._ensureHibernation()) {
1607            this.state = State.SCRIPT_DATA_ESCAPED;
1608            this._stateScriptDataEscaped(cp);
1609        }
1610    }
1611
1612    // Script data double escaped state
1613    //------------------------------------------------------------------
1614    private _stateScriptDataDoubleEscaped(cp: number): void {
1615        switch (cp) {
1616            case $.HYPHEN_MINUS: {
1617                this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED_DASH;
1618                this._emitChars('-');
1619                break;
1620            }
1621            case $.LESS_THAN_SIGN: {
1622                this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN;
1623                this._emitChars('<');
1624                break;
1625            }
1626            case $.NULL: {
1627                this._err(ERR.unexpectedNullCharacter);
1628                this._emitChars(REPLACEMENT_CHARACTER);
1629                break;
1630            }
1631            case $.EOF: {
1632                this._err(ERR.eofInScriptHtmlCommentLikeText);
1633                this._emitEOFToken();
1634                break;
1635            }
1636            default: {
1637                this._emitCodePoint(cp);
1638            }
1639        }
1640    }
1641
1642    // Script data double escaped dash state
1643    //------------------------------------------------------------------
1644    private _stateScriptDataDoubleEscapedDash(cp: number): void {
1645        switch (cp) {
1646            case $.HYPHEN_MINUS: {
1647                this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH;
1648                this._emitChars('-');
1649                break;
1650            }
1651            case $.LESS_THAN_SIGN: {
1652                this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN;
1653                this._emitChars('<');
1654                break;
1655            }
1656            case $.NULL: {
1657                this._err(ERR.unexpectedNullCharacter);
1658                this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED;
1659                this._emitChars(REPLACEMENT_CHARACTER);
1660                break;
1661            }
1662            case $.EOF: {
1663                this._err(ERR.eofInScriptHtmlCommentLikeText);
1664                this._emitEOFToken();
1665                break;
1666            }
1667            default: {
1668                this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED;
1669                this._emitCodePoint(cp);
1670            }
1671        }
1672    }
1673
1674    // Script data double escaped dash dash state
1675    //------------------------------------------------------------------
1676    private _stateScriptDataDoubleEscapedDashDash(cp: number): void {
1677        switch (cp) {
1678            case $.HYPHEN_MINUS: {
1679                this._emitChars('-');
1680                break;
1681            }
1682            case $.LESS_THAN_SIGN: {
1683                this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN;
1684                this._emitChars('<');
1685                break;
1686            }
1687            case $.GREATER_THAN_SIGN: {
1688                this.state = State.SCRIPT_DATA;
1689                this._emitChars('>');
1690                break;
1691            }
1692            case $.NULL: {
1693                this._err(ERR.unexpectedNullCharacter);
1694                this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED;
1695                this._emitChars(REPLACEMENT_CHARACTER);
1696                break;
1697            }
1698            case $.EOF: {
1699                this._err(ERR.eofInScriptHtmlCommentLikeText);
1700                this._emitEOFToken();
1701                break;
1702            }
1703            default: {
1704                this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED;
1705                this._emitCodePoint(cp);
1706            }
1707        }
1708    }
1709
1710    // Script data double escaped less-than sign state
1711    //------------------------------------------------------------------
1712    private _stateScriptDataDoubleEscapedLessThanSign(cp: number): void {
1713        if (cp === $.SOLIDUS) {
1714            this.state = State.SCRIPT_DATA_DOUBLE_ESCAPE_END;
1715            this._emitChars('/');
1716        } else {
1717            this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED;
1718            this._stateScriptDataDoubleEscaped(cp);
1719        }
1720    }
1721
1722    // Script data double escape end state
1723    //------------------------------------------------------------------
1724    private _stateScriptDataDoubleEscapeEnd(cp: number): void {
1725        if (
1726            this.preprocessor.startsWith($$.SCRIPT, false) &&
1727            isScriptDataDoubleEscapeSequenceEnd(this.preprocessor.peek($$.SCRIPT.length))
1728        ) {
1729            this._emitCodePoint(cp);
1730            for (let i = 0; i < $$.SCRIPT.length; i++) {
1731                this._emitCodePoint(this._consume());
1732            }
1733
1734            this.state = State.SCRIPT_DATA_ESCAPED;
1735        } else if (!this._ensureHibernation()) {
1736            this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED;
1737            this._stateScriptDataDoubleEscaped(cp);
1738        }
1739    }
1740
1741    // Before attribute name state
1742    //------------------------------------------------------------------
1743    private _stateBeforeAttributeName(cp: number): void {
1744        switch (cp) {
1745            case $.SPACE:
1746            case $.LINE_FEED:
1747            case $.TABULATION:
1748            case $.FORM_FEED: {
1749                // Ignore whitespace
1750                break;
1751            }
1752            case $.SOLIDUS:
1753            case $.GREATER_THAN_SIGN:
1754            case $.EOF: {
1755                this.state = State.AFTER_ATTRIBUTE_NAME;
1756                this._stateAfterAttributeName(cp);
1757                break;
1758            }
1759            case $.EQUALS_SIGN: {
1760                this._err(ERR.unexpectedEqualsSignBeforeAttributeName);
1761                this._createAttr('=');
1762                this.state = State.ATTRIBUTE_NAME;
1763                break;
1764            }
1765            default: {
1766                this._createAttr('');
1767                this.state = State.ATTRIBUTE_NAME;
1768                this._stateAttributeName(cp);
1769            }
1770        }
1771    }
1772
1773    // Attribute name state
1774    //------------------------------------------------------------------
1775    private _stateAttributeName(cp: number): void {
1776        switch (cp) {
1777            case $.SPACE:
1778            case $.LINE_FEED:
1779            case $.TABULATION:
1780            case $.FORM_FEED:
1781            case $.SOLIDUS:
1782            case $.GREATER_THAN_SIGN:
1783            case $.EOF: {
1784                this._leaveAttrName();
1785                this.state = State.AFTER_ATTRIBUTE_NAME;
1786                this._stateAfterAttributeName(cp);
1787                break;
1788            }
1789            case $.EQUALS_SIGN: {
1790                this._leaveAttrName();
1791                this.state = State.BEFORE_ATTRIBUTE_VALUE;
1792                break;
1793            }
1794            case $.QUOTATION_MARK:
1795            case $.APOSTROPHE:
1796            case $.LESS_THAN_SIGN: {
1797                this._err(ERR.unexpectedCharacterInAttributeName);
1798                this.currentAttr.name += String.fromCodePoint(cp);
1799                break;
1800            }
1801            case $.NULL: {
1802                this._err(ERR.unexpectedNullCharacter);
1803                this.currentAttr.name += REPLACEMENT_CHARACTER;
1804                break;
1805            }
1806            default: {
1807                this.currentAttr.name += String.fromCodePoint(isAsciiUpper(cp) ? toAsciiLower(cp) : cp);
1808            }
1809        }
1810    }
1811
1812    // After attribute name state
1813    //------------------------------------------------------------------
1814    private _stateAfterAttributeName(cp: number): void {
1815        switch (cp) {
1816            case $.SPACE:
1817            case $.LINE_FEED:
1818            case $.TABULATION:
1819            case $.FORM_FEED: {
1820                // Ignore whitespace
1821                break;
1822            }
1823            case $.SOLIDUS: {
1824                this.state = State.SELF_CLOSING_START_TAG;
1825                break;
1826            }
1827            case $.EQUALS_SIGN: {
1828                this.state = State.BEFORE_ATTRIBUTE_VALUE;
1829                break;
1830            }
1831            case $.GREATER_THAN_SIGN: {
1832                this.state = State.DATA;
1833                this.emitCurrentTagToken();
1834                break;
1835            }
1836            case $.EOF: {
1837                this._err(ERR.eofInTag);
1838                this._emitEOFToken();
1839                break;
1840            }
1841            default: {
1842                this._createAttr('');
1843                this.state = State.ATTRIBUTE_NAME;
1844                this._stateAttributeName(cp);
1845            }
1846        }
1847    }
1848
1849    // Before attribute value state
1850    //------------------------------------------------------------------
1851    private _stateBeforeAttributeValue(cp: number): void {
1852        switch (cp) {
1853            case $.SPACE:
1854            case $.LINE_FEED:
1855            case $.TABULATION:
1856            case $.FORM_FEED: {
1857                // Ignore whitespace
1858                break;
1859            }
1860            case $.QUOTATION_MARK: {
1861                this.state = State.ATTRIBUTE_VALUE_DOUBLE_QUOTED;
1862                break;
1863            }
1864            case $.APOSTROPHE: {
1865                this.state = State.ATTRIBUTE_VALUE_SINGLE_QUOTED;
1866                break;
1867            }
1868            case $.GREATER_THAN_SIGN: {
1869                this._err(ERR.missingAttributeValue);
1870                this.state = State.DATA;
1871                this.emitCurrentTagToken();
1872                break;
1873            }
1874            default: {
1875                this.state = State.ATTRIBUTE_VALUE_UNQUOTED;
1876                this._stateAttributeValueUnquoted(cp);
1877            }
1878        }
1879    }
1880
1881    // Attribute value (double-quoted) state
1882    //------------------------------------------------------------------
1883    private _stateAttributeValueDoubleQuoted(cp: number): void {
1884        switch (cp) {
1885            case $.QUOTATION_MARK: {
1886                this.state = State.AFTER_ATTRIBUTE_VALUE_QUOTED;
1887                break;
1888            }
1889            case $.AMPERSAND: {
1890                this.returnState = State.ATTRIBUTE_VALUE_DOUBLE_QUOTED;
1891                this.state = State.CHARACTER_REFERENCE;
1892                break;
1893            }
1894            case $.NULL: {
1895                this._err(ERR.unexpectedNullCharacter);
1896                this.currentAttr.value += REPLACEMENT_CHARACTER;
1897                break;
1898            }
1899            case $.EOF: {
1900                this._err(ERR.eofInTag);
1901                this._emitEOFToken();
1902                break;
1903            }
1904            default: {
1905                this.currentAttr.value += String.fromCodePoint(cp);
1906            }
1907        }
1908    }
1909
1910    // Attribute value (single-quoted) state
1911    //------------------------------------------------------------------
1912    private _stateAttributeValueSingleQuoted(cp: number): void {
1913        switch (cp) {
1914            case $.APOSTROPHE: {
1915                this.state = State.AFTER_ATTRIBUTE_VALUE_QUOTED;
1916                break;
1917            }
1918            case $.AMPERSAND: {
1919                this.returnState = State.ATTRIBUTE_VALUE_SINGLE_QUOTED;
1920                this.state = State.CHARACTER_REFERENCE;
1921                break;
1922            }
1923            case $.NULL: {
1924                this._err(ERR.unexpectedNullCharacter);
1925                this.currentAttr.value += REPLACEMENT_CHARACTER;
1926                break;
1927            }
1928            case $.EOF: {
1929                this._err(ERR.eofInTag);
1930                this._emitEOFToken();
1931                break;
1932            }
1933            default: {
1934                this.currentAttr.value += String.fromCodePoint(cp);
1935            }
1936        }
1937    }
1938
1939    // Attribute value (unquoted) state
1940    //------------------------------------------------------------------
1941    private _stateAttributeValueUnquoted(cp: number): void {
1942        switch (cp) {
1943            case $.SPACE:
1944            case $.LINE_FEED:
1945            case $.TABULATION:
1946            case $.FORM_FEED: {
1947                this._leaveAttrValue();
1948                this.state = State.BEFORE_ATTRIBUTE_NAME;
1949                break;
1950            }
1951            case $.AMPERSAND: {
1952                this.returnState = State.ATTRIBUTE_VALUE_UNQUOTED;
1953                this.state = State.CHARACTER_REFERENCE;
1954                break;
1955            }
1956            case $.GREATER_THAN_SIGN: {
1957                this._leaveAttrValue();
1958                this.state = State.DATA;
1959                this.emitCurrentTagToken();
1960                break;
1961            }
1962            case $.NULL: {
1963                this._err(ERR.unexpectedNullCharacter);
1964                this.currentAttr.value += REPLACEMENT_CHARACTER;
1965                break;
1966            }
1967            case $.QUOTATION_MARK:
1968            case $.APOSTROPHE:
1969            case $.LESS_THAN_SIGN:
1970            case $.EQUALS_SIGN:
1971            case $.GRAVE_ACCENT: {
1972                this._err(ERR.unexpectedCharacterInUnquotedAttributeValue);
1973                this.currentAttr.value += String.fromCodePoint(cp);
1974                break;
1975            }
1976            case $.EOF: {
1977                this._err(ERR.eofInTag);
1978                this._emitEOFToken();
1979                break;
1980            }
1981            default: {
1982                this.currentAttr.value += String.fromCodePoint(cp);
1983            }
1984        }
1985    }
1986
1987    // After attribute value (quoted) state
1988    //------------------------------------------------------------------
1989    private _stateAfterAttributeValueQuoted(cp: number): void {
1990        switch (cp) {
1991            case $.SPACE:
1992            case $.LINE_FEED:
1993            case $.TABULATION:
1994            case $.FORM_FEED: {
1995                this._leaveAttrValue();
1996                this.state = State.BEFORE_ATTRIBUTE_NAME;
1997                break;
1998            }
1999            case $.SOLIDUS: {
2000                this._leaveAttrValue();
2001                this.state = State.SELF_CLOSING_START_TAG;
2002                break;
2003            }
2004            case $.GREATER_THAN_SIGN: {
2005                this._leaveAttrValue();
2006                this.state = State.DATA;
2007                this.emitCurrentTagToken();
2008                break;
2009            }
2010            case $.EOF: {
2011                this._err(ERR.eofInTag);
2012                this._emitEOFToken();
2013                break;
2014            }
2015            default: {
2016                this._err(ERR.missingWhitespaceBetweenAttributes);
2017                this.state = State.BEFORE_ATTRIBUTE_NAME;
2018                this._stateBeforeAttributeName(cp);
2019            }
2020        }
2021    }
2022
2023    // Self-closing start tag state
2024    //------------------------------------------------------------------
2025    private _stateSelfClosingStartTag(cp: number): void {
2026        switch (cp) {
2027            case $.GREATER_THAN_SIGN: {
2028                const token = this.currentToken as TagToken;
2029                token.selfClosing = true;
2030                this.state = State.DATA;
2031                this.emitCurrentTagToken();
2032                break;
2033            }
2034            case $.EOF: {
2035                this._err(ERR.eofInTag);
2036                this._emitEOFToken();
2037                break;
2038            }
2039            default: {
2040                this._err(ERR.unexpectedSolidusInTag);
2041                this.state = State.BEFORE_ATTRIBUTE_NAME;
2042                this._stateBeforeAttributeName(cp);
2043            }
2044        }
2045    }
2046
2047    // Bogus comment state
2048    //------------------------------------------------------------------
2049    private _stateBogusComment(cp: number): void {
2050        const token = this.currentToken as CommentToken;
2051
2052        switch (cp) {
2053            case $.GREATER_THAN_SIGN: {
2054                this.state = State.DATA;
2055                this.emitCurrentComment(token);
2056                break;
2057            }
2058            case $.EOF: {
2059                this.emitCurrentComment(token);
2060                this._emitEOFToken();
2061                break;
2062            }
2063            case $.NULL: {
2064                this._err(ERR.unexpectedNullCharacter);
2065                token.data += REPLACEMENT_CHARACTER;
2066                break;
2067            }
2068            default: {
2069                token.data += String.fromCodePoint(cp);
2070            }
2071        }
2072    }
2073
2074    // Markup declaration open state
2075    //------------------------------------------------------------------
2076    private _stateMarkupDeclarationOpen(cp: number): void {
2077        if (this._consumeSequenceIfMatch($$.DASH_DASH, true)) {
2078            this._createCommentToken($$.DASH_DASH.length + 1);
2079            this.state = State.COMMENT_START;
2080        } else if (this._consumeSequenceIfMatch($$.DOCTYPE, false)) {
2081            // NOTE: Doctypes tokens are created without fixed offsets. We keep track of the moment a doctype *might* start here.
2082            this.currentLocation = this.getCurrentLocation($$.DOCTYPE.length + 1);
2083            this.state = State.DOCTYPE;
2084        } else if (this._consumeSequenceIfMatch($$.CDATA_START, true)) {
2085            if (this.inForeignNode) {
2086                this.state = State.CDATA_SECTION;
2087            } else {
2088                this._err(ERR.cdataInHtmlContent);
2089                this._createCommentToken($$.CDATA_START.length + 1);
2090                (this.currentToken as CommentToken).data = '[CDATA[';
2091                this.state = State.BOGUS_COMMENT;
2092            }
2093        }
2094
2095        //NOTE: Sequence lookups can be abrupted by hibernation. In that case, lookup
2096        //results are no longer valid and we will need to start over.
2097        else if (!this._ensureHibernation()) {
2098            this._err(ERR.incorrectlyOpenedComment);
2099            this._createCommentToken(2);
2100            this.state = State.BOGUS_COMMENT;
2101            this._stateBogusComment(cp);
2102        }
2103    }
2104
2105    // Comment start state
2106    //------------------------------------------------------------------
2107    private _stateCommentStart(cp: number): void {
2108        switch (cp) {
2109            case $.HYPHEN_MINUS: {
2110                this.state = State.COMMENT_START_DASH;
2111                break;
2112            }
2113            case $.GREATER_THAN_SIGN: {
2114                this._err(ERR.abruptClosingOfEmptyComment);
2115                this.state = State.DATA;
2116                const token = this.currentToken as CommentToken;
2117                this.emitCurrentComment(token);
2118                break;
2119            }
2120            default: {
2121                this.state = State.COMMENT;
2122                this._stateComment(cp);
2123            }
2124        }
2125    }
2126
2127    // Comment start dash state
2128    //------------------------------------------------------------------
2129    private _stateCommentStartDash(cp: number): void {
2130        const token = this.currentToken as CommentToken;
2131        switch (cp) {
2132            case $.HYPHEN_MINUS: {
2133                this.state = State.COMMENT_END;
2134                break;
2135            }
2136            case $.GREATER_THAN_SIGN: {
2137                this._err(ERR.abruptClosingOfEmptyComment);
2138                this.state = State.DATA;
2139                this.emitCurrentComment(token);
2140                break;
2141            }
2142            case $.EOF: {
2143                this._err(ERR.eofInComment);
2144                this.emitCurrentComment(token);
2145                this._emitEOFToken();
2146                break;
2147            }
2148            default: {
2149                token.data += '-';
2150                this.state = State.COMMENT;
2151                this._stateComment(cp);
2152            }
2153        }
2154    }
2155
2156    // Comment state
2157    //------------------------------------------------------------------
2158    private _stateComment(cp: number): void {
2159        const token = this.currentToken as CommentToken;
2160
2161        switch (cp) {
2162            case $.HYPHEN_MINUS: {
2163                this.state = State.COMMENT_END_DASH;
2164                break;
2165            }
2166            case $.LESS_THAN_SIGN: {
2167                token.data += '<';
2168                this.state = State.COMMENT_LESS_THAN_SIGN;
2169                break;
2170            }
2171            case $.NULL: {
2172                this._err(ERR.unexpectedNullCharacter);
2173                token.data += REPLACEMENT_CHARACTER;
2174                break;
2175            }
2176            case $.EOF: {
2177                this._err(ERR.eofInComment);
2178                this.emitCurrentComment(token);
2179                this._emitEOFToken();
2180                break;
2181            }
2182            default: {
2183                token.data += String.fromCodePoint(cp);
2184            }
2185        }
2186    }
2187
2188    // Comment less-than sign state
2189    //------------------------------------------------------------------
2190    private _stateCommentLessThanSign(cp: number): void {
2191        const token = this.currentToken as CommentToken;
2192
2193        switch (cp) {
2194            case $.EXCLAMATION_MARK: {
2195                token.data += '!';
2196                this.state = State.COMMENT_LESS_THAN_SIGN_BANG;
2197                break;
2198            }
2199            case $.LESS_THAN_SIGN: {
2200                token.data += '<';
2201                break;
2202            }
2203            default: {
2204                this.state = State.COMMENT;
2205                this._stateComment(cp);
2206            }
2207        }
2208    }
2209
2210    // Comment less-than sign bang state
2211    //------------------------------------------------------------------
2212    private _stateCommentLessThanSignBang(cp: number): void {
2213        if (cp === $.HYPHEN_MINUS) {
2214            this.state = State.COMMENT_LESS_THAN_SIGN_BANG_DASH;
2215        } else {
2216            this.state = State.COMMENT;
2217            this._stateComment(cp);
2218        }
2219    }
2220
2221    // Comment less-than sign bang dash state
2222    //------------------------------------------------------------------
2223    private _stateCommentLessThanSignBangDash(cp: number): void {
2224        if (cp === $.HYPHEN_MINUS) {
2225            this.state = State.COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH;
2226        } else {
2227            this.state = State.COMMENT_END_DASH;
2228            this._stateCommentEndDash(cp);
2229        }
2230    }
2231
2232    // Comment less-than sign bang dash dash state
2233    //------------------------------------------------------------------
2234    private _stateCommentLessThanSignBangDashDash(cp: number): void {
2235        if (cp !== $.GREATER_THAN_SIGN && cp !== $.EOF) {
2236            this._err(ERR.nestedComment);
2237        }
2238
2239        this.state = State.COMMENT_END;
2240        this._stateCommentEnd(cp);
2241    }
2242
2243    // Comment end dash state
2244    //------------------------------------------------------------------
2245    private _stateCommentEndDash(cp: number): void {
2246        const token = this.currentToken as CommentToken;
2247        switch (cp) {
2248            case $.HYPHEN_MINUS: {
2249                this.state = State.COMMENT_END;
2250                break;
2251            }
2252            case $.EOF: {
2253                this._err(ERR.eofInComment);
2254                this.emitCurrentComment(token);
2255                this._emitEOFToken();
2256                break;
2257            }
2258            default: {
2259                token.data += '-';
2260                this.state = State.COMMENT;
2261                this._stateComment(cp);
2262            }
2263        }
2264    }
2265
2266    // Comment end state
2267    //------------------------------------------------------------------
2268    private _stateCommentEnd(cp: number): void {
2269        const token = this.currentToken as CommentToken;
2270
2271        switch (cp) {
2272            case $.GREATER_THAN_SIGN: {
2273                this.state = State.DATA;
2274                this.emitCurrentComment(token);
2275                break;
2276            }
2277            case $.EXCLAMATION_MARK: {
2278                this.state = State.COMMENT_END_BANG;
2279                break;
2280            }
2281            case $.HYPHEN_MINUS: {
2282                token.data += '-';
2283                break;
2284            }
2285            case $.EOF: {
2286                this._err(ERR.eofInComment);
2287                this.emitCurrentComment(token);
2288                this._emitEOFToken();
2289                break;
2290            }
2291            default: {
2292                token.data += '--';
2293                this.state = State.COMMENT;
2294                this._stateComment(cp);
2295            }
2296        }
2297    }
2298
2299    // Comment end bang state
2300    //------------------------------------------------------------------
2301    private _stateCommentEndBang(cp: number): void {
2302        const token = this.currentToken as CommentToken;
2303
2304        switch (cp) {
2305            case $.HYPHEN_MINUS: {
2306                token.data += '--!';
2307                this.state = State.COMMENT_END_DASH;
2308                break;
2309            }
2310            case $.GREATER_THAN_SIGN: {
2311                this._err(ERR.incorrectlyClosedComment);
2312                this.state = State.DATA;
2313                this.emitCurrentComment(token);
2314                break;
2315            }
2316            case $.EOF: {
2317                this._err(ERR.eofInComment);
2318                this.emitCurrentComment(token);
2319                this._emitEOFToken();
2320                break;
2321            }
2322            default: {
2323                token.data += '--!';
2324                this.state = State.COMMENT;
2325                this._stateComment(cp);
2326            }
2327        }
2328    }
2329
2330    // DOCTYPE state
2331    //------------------------------------------------------------------
2332    private _stateDoctype(cp: number): void {
2333        switch (cp) {
2334            case $.SPACE:
2335            case $.LINE_FEED:
2336            case $.TABULATION:
2337            case $.FORM_FEED: {
2338                this.state = State.BEFORE_DOCTYPE_NAME;
2339                break;
2340            }
2341            case $.GREATER_THAN_SIGN: {
2342                this.state = State.BEFORE_DOCTYPE_NAME;
2343                this._stateBeforeDoctypeName(cp);
2344                break;
2345            }
2346            case $.EOF: {
2347                this._err(ERR.eofInDoctype);
2348                this._createDoctypeToken(null);
2349                const token = this.currentToken as DoctypeToken;
2350                token.forceQuirks = true;
2351                this.emitCurrentDoctype(token);
2352                this._emitEOFToken();
2353                break;
2354            }
2355            default: {
2356                this._err(ERR.missingWhitespaceBeforeDoctypeName);
2357                this.state = State.BEFORE_DOCTYPE_NAME;
2358                this._stateBeforeDoctypeName(cp);
2359            }
2360        }
2361    }
2362
2363    // Before DOCTYPE name state
2364    //------------------------------------------------------------------
2365    private _stateBeforeDoctypeName(cp: number): void {
2366        if (isAsciiUpper(cp)) {
2367            this._createDoctypeToken(String.fromCharCode(toAsciiLower(cp)));
2368            this.state = State.DOCTYPE_NAME;
2369        } else
2370            switch (cp) {
2371                case $.SPACE:
2372                case $.LINE_FEED:
2373                case $.TABULATION:
2374                case $.FORM_FEED: {
2375                    // Ignore whitespace
2376                    break;
2377                }
2378                case $.NULL: {
2379                    this._err(ERR.unexpectedNullCharacter);
2380                    this._createDoctypeToken(REPLACEMENT_CHARACTER);
2381                    this.state = State.DOCTYPE_NAME;
2382                    break;
2383                }
2384                case $.GREATER_THAN_SIGN: {
2385                    this._err(ERR.missingDoctypeName);
2386                    this._createDoctypeToken(null);
2387                    const token = this.currentToken as DoctypeToken;
2388                    token.forceQuirks = true;
2389                    this.emitCurrentDoctype(token);
2390                    this.state = State.DATA;
2391                    break;
2392                }
2393                case $.EOF: {
2394                    this._err(ERR.eofInDoctype);
2395                    this._createDoctypeToken(null);
2396                    const token = this.currentToken as DoctypeToken;
2397                    token.forceQuirks = true;
2398                    this.emitCurrentDoctype(token);
2399                    this._emitEOFToken();
2400                    break;
2401                }
2402                default: {
2403                    this._createDoctypeToken(String.fromCodePoint(cp));
2404                    this.state = State.DOCTYPE_NAME;
2405                }
2406            }
2407    }
2408
2409    // DOCTYPE name state
2410    //------------------------------------------------------------------
2411    private _stateDoctypeName(cp: number): void {
2412        const token = this.currentToken as DoctypeToken;
2413
2414        switch (cp) {
2415            case $.SPACE:
2416            case $.LINE_FEED:
2417            case $.TABULATION:
2418            case $.FORM_FEED: {
2419                this.state = State.AFTER_DOCTYPE_NAME;
2420                break;
2421            }
2422            case $.GREATER_THAN_SIGN: {
2423                this.state = State.DATA;
2424                this.emitCurrentDoctype(token);
2425                break;
2426            }
2427            case $.NULL: {
2428                this._err(ERR.unexpectedNullCharacter);
2429                token.name += REPLACEMENT_CHARACTER;
2430                break;
2431            }
2432            case $.EOF: {
2433                this._err(ERR.eofInDoctype);
2434                token.forceQuirks = true;
2435                this.emitCurrentDoctype(token);
2436                this._emitEOFToken();
2437                break;
2438            }
2439            default: {
2440                token.name += String.fromCodePoint(isAsciiUpper(cp) ? toAsciiLower(cp) : cp);
2441            }
2442        }
2443    }
2444
2445    // After DOCTYPE name state
2446    //------------------------------------------------------------------
2447    private _stateAfterDoctypeName(cp: number): void {
2448        const token = this.currentToken as DoctypeToken;
2449
2450        switch (cp) {
2451            case $.SPACE:
2452            case $.LINE_FEED:
2453            case $.TABULATION:
2454            case $.FORM_FEED: {
2455                // Ignore whitespace
2456                break;
2457            }
2458            case $.GREATER_THAN_SIGN: {
2459                this.state = State.DATA;
2460                this.emitCurrentDoctype(token);
2461                break;
2462            }
2463            case $.EOF: {
2464                this._err(ERR.eofInDoctype);
2465                token.forceQuirks = true;
2466                this.emitCurrentDoctype(token);
2467                this._emitEOFToken();
2468                break;
2469            }
2470            default:
2471                if (this._consumeSequenceIfMatch($$.PUBLIC, false)) {
2472                    this.state = State.AFTER_DOCTYPE_PUBLIC_KEYWORD;
2473                } else if (this._consumeSequenceIfMatch($$.SYSTEM, false)) {
2474                    this.state = State.AFTER_DOCTYPE_SYSTEM_KEYWORD;
2475                }
2476                //NOTE: sequence lookup can be abrupted by hibernation. In that case lookup
2477                //results are no longer valid and we will need to start over.
2478                else if (!this._ensureHibernation()) {
2479                    this._err(ERR.invalidCharacterSequenceAfterDoctypeName);
2480                    token.forceQuirks = true;
2481                    this.state = State.BOGUS_DOCTYPE;
2482                    this._stateBogusDoctype(cp);
2483                }
2484        }
2485    }
2486
2487    // After DOCTYPE public keyword state
2488    //------------------------------------------------------------------
2489    private _stateAfterDoctypePublicKeyword(cp: number): void {
2490        const token = this.currentToken as DoctypeToken;
2491
2492        switch (cp) {
2493            case $.SPACE:
2494            case $.LINE_FEED:
2495            case $.TABULATION:
2496            case $.FORM_FEED: {
2497                this.state = State.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER;
2498                break;
2499            }
2500            case $.QUOTATION_MARK: {
2501                this._err(ERR.missingWhitespaceAfterDoctypePublicKeyword);
2502                token.publicId = '';
2503                this.state = State.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED;
2504                break;
2505            }
2506            case $.APOSTROPHE: {
2507                this._err(ERR.missingWhitespaceAfterDoctypePublicKeyword);
2508                token.publicId = '';
2509                this.state = State.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED;
2510                break;
2511            }
2512            case $.GREATER_THAN_SIGN: {
2513                this._err(ERR.missingDoctypePublicIdentifier);
2514                token.forceQuirks = true;
2515                this.state = State.DATA;
2516                this.emitCurrentDoctype(token);
2517                break;
2518            }
2519            case $.EOF: {
2520                this._err(ERR.eofInDoctype);
2521                token.forceQuirks = true;
2522                this.emitCurrentDoctype(token);
2523                this._emitEOFToken();
2524                break;
2525            }
2526            default: {
2527                this._err(ERR.missingQuoteBeforeDoctypePublicIdentifier);
2528                token.forceQuirks = true;
2529                this.state = State.BOGUS_DOCTYPE;
2530                this._stateBogusDoctype(cp);
2531            }
2532        }
2533    }
2534
2535    // Before DOCTYPE public identifier state
2536    //------------------------------------------------------------------
2537    private _stateBeforeDoctypePublicIdentifier(cp: number): void {
2538        const token = this.currentToken as DoctypeToken;
2539
2540        switch (cp) {
2541            case $.SPACE:
2542            case $.LINE_FEED:
2543            case $.TABULATION:
2544            case $.FORM_FEED: {
2545                // Ignore whitespace
2546                break;
2547            }
2548            case $.QUOTATION_MARK: {
2549                token.publicId = '';
2550                this.state = State.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED;
2551                break;
2552            }
2553            case $.APOSTROPHE: {
2554                token.publicId = '';
2555                this.state = State.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED;
2556                break;
2557            }
2558            case $.GREATER_THAN_SIGN: {
2559                this._err(ERR.missingDoctypePublicIdentifier);
2560                token.forceQuirks = true;
2561                this.state = State.DATA;
2562                this.emitCurrentDoctype(token);
2563                break;
2564            }
2565            case $.EOF: {
2566                this._err(ERR.eofInDoctype);
2567                token.forceQuirks = true;
2568                this.emitCurrentDoctype(token);
2569                this._emitEOFToken();
2570                break;
2571            }
2572            default: {
2573                this._err(ERR.missingQuoteBeforeDoctypePublicIdentifier);
2574                token.forceQuirks = true;
2575                this.state = State.BOGUS_DOCTYPE;
2576                this._stateBogusDoctype(cp);
2577            }
2578        }
2579    }
2580
2581    // DOCTYPE public identifier (double-quoted) state
2582    //------------------------------------------------------------------
2583    private _stateDoctypePublicIdentifierDoubleQuoted(cp: number): void {
2584        const token = this.currentToken as DoctypeToken;
2585
2586        switch (cp) {
2587            case $.QUOTATION_MARK: {
2588                this.state = State.AFTER_DOCTYPE_PUBLIC_IDENTIFIER;
2589                break;
2590            }
2591            case $.NULL: {
2592                this._err(ERR.unexpectedNullCharacter);
2593                token.publicId += REPLACEMENT_CHARACTER;
2594                break;
2595            }
2596            case $.GREATER_THAN_SIGN: {
2597                this._err(ERR.abruptDoctypePublicIdentifier);
2598                token.forceQuirks = true;
2599                this.emitCurrentDoctype(token);
2600                this.state = State.DATA;
2601                break;
2602            }
2603            case $.EOF: {
2604                this._err(ERR.eofInDoctype);
2605                token.forceQuirks = true;
2606                this.emitCurrentDoctype(token);
2607                this._emitEOFToken();
2608                break;
2609            }
2610            default: {
2611                token.publicId += String.fromCodePoint(cp);
2612            }
2613        }
2614    }
2615
2616    // DOCTYPE public identifier (single-quoted) state
2617    //------------------------------------------------------------------
2618    private _stateDoctypePublicIdentifierSingleQuoted(cp: number): void {
2619        const token = this.currentToken as DoctypeToken;
2620
2621        switch (cp) {
2622            case $.APOSTROPHE: {
2623                this.state = State.AFTER_DOCTYPE_PUBLIC_IDENTIFIER;
2624                break;
2625            }
2626            case $.NULL: {
2627                this._err(ERR.unexpectedNullCharacter);
2628                token.publicId += REPLACEMENT_CHARACTER;
2629                break;
2630            }
2631            case $.GREATER_THAN_SIGN: {
2632                this._err(ERR.abruptDoctypePublicIdentifier);
2633                token.forceQuirks = true;
2634                this.emitCurrentDoctype(token);
2635                this.state = State.DATA;
2636                break;
2637            }
2638            case $.EOF: {
2639                this._err(ERR.eofInDoctype);
2640                token.forceQuirks = true;
2641                this.emitCurrentDoctype(token);
2642                this._emitEOFToken();
2643                break;
2644            }
2645            default: {
2646                token.publicId += String.fromCodePoint(cp);
2647            }
2648        }
2649    }
2650
2651    // After DOCTYPE public identifier state
2652    //------------------------------------------------------------------
2653    private _stateAfterDoctypePublicIdentifier(cp: number): void {
2654        const token = this.currentToken as DoctypeToken;
2655
2656        switch (cp) {
2657            case $.SPACE:
2658            case $.LINE_FEED:
2659            case $.TABULATION:
2660            case $.FORM_FEED: {
2661                this.state = State.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS;
2662                break;
2663            }
2664            case $.GREATER_THAN_SIGN: {
2665                this.state = State.DATA;
2666                this.emitCurrentDoctype(token);
2667                break;
2668            }
2669            case $.QUOTATION_MARK: {
2670                this._err(ERR.missingWhitespaceBetweenDoctypePublicAndSystemIdentifiers);
2671                token.systemId = '';
2672                this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
2673                break;
2674            }
2675            case $.APOSTROPHE: {
2676                this._err(ERR.missingWhitespaceBetweenDoctypePublicAndSystemIdentifiers);
2677                token.systemId = '';
2678                this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
2679                break;
2680            }
2681            case $.EOF: {
2682                this._err(ERR.eofInDoctype);
2683                token.forceQuirks = true;
2684                this.emitCurrentDoctype(token);
2685                this._emitEOFToken();
2686                break;
2687            }
2688            default: {
2689                this._err(ERR.missingQuoteBeforeDoctypeSystemIdentifier);
2690                token.forceQuirks = true;
2691                this.state = State.BOGUS_DOCTYPE;
2692                this._stateBogusDoctype(cp);
2693            }
2694        }
2695    }
2696
2697    // Between DOCTYPE public and system identifiers state
2698    //------------------------------------------------------------------
2699    private _stateBetweenDoctypePublicAndSystemIdentifiers(cp: number): void {
2700        const token = this.currentToken as DoctypeToken;
2701
2702        switch (cp) {
2703            case $.SPACE:
2704            case $.LINE_FEED:
2705            case $.TABULATION:
2706            case $.FORM_FEED: {
2707                // Ignore whitespace
2708                break;
2709            }
2710            case $.GREATER_THAN_SIGN: {
2711                this.emitCurrentDoctype(token);
2712                this.state = State.DATA;
2713                break;
2714            }
2715            case $.QUOTATION_MARK: {
2716                token.systemId = '';
2717                this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
2718                break;
2719            }
2720            case $.APOSTROPHE: {
2721                token.systemId = '';
2722                this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
2723                break;
2724            }
2725            case $.EOF: {
2726                this._err(ERR.eofInDoctype);
2727                token.forceQuirks = true;
2728                this.emitCurrentDoctype(token);
2729                this._emitEOFToken();
2730                break;
2731            }
2732            default: {
2733                this._err(ERR.missingQuoteBeforeDoctypeSystemIdentifier);
2734                token.forceQuirks = true;
2735                this.state = State.BOGUS_DOCTYPE;
2736                this._stateBogusDoctype(cp);
2737            }
2738        }
2739    }
2740
2741    // After DOCTYPE system keyword state
2742    //------------------------------------------------------------------
2743    private _stateAfterDoctypeSystemKeyword(cp: number): void {
2744        const token = this.currentToken as DoctypeToken;
2745
2746        switch (cp) {
2747            case $.SPACE:
2748            case $.LINE_FEED:
2749            case $.TABULATION:
2750            case $.FORM_FEED: {
2751                this.state = State.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER;
2752                break;
2753            }
2754            case $.QUOTATION_MARK: {
2755                this._err(ERR.missingWhitespaceAfterDoctypeSystemKeyword);
2756                token.systemId = '';
2757                this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
2758                break;
2759            }
2760            case $.APOSTROPHE: {
2761                this._err(ERR.missingWhitespaceAfterDoctypeSystemKeyword);
2762                token.systemId = '';
2763                this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
2764                break;
2765            }
2766            case $.GREATER_THAN_SIGN: {
2767                this._err(ERR.missingDoctypeSystemIdentifier);
2768                token.forceQuirks = true;
2769                this.state = State.DATA;
2770                this.emitCurrentDoctype(token);
2771                break;
2772            }
2773            case $.EOF: {
2774                this._err(ERR.eofInDoctype);
2775                token.forceQuirks = true;
2776                this.emitCurrentDoctype(token);
2777                this._emitEOFToken();
2778                break;
2779            }
2780            default: {
2781                this._err(ERR.missingQuoteBeforeDoctypeSystemIdentifier);
2782                token.forceQuirks = true;
2783                this.state = State.BOGUS_DOCTYPE;
2784                this._stateBogusDoctype(cp);
2785            }
2786        }
2787    }
2788
2789    // Before DOCTYPE system identifier state
2790    //------------------------------------------------------------------
2791    private _stateBeforeDoctypeSystemIdentifier(cp: number): void {
2792        const token = this.currentToken as DoctypeToken;
2793
2794        switch (cp) {
2795            case $.SPACE:
2796            case $.LINE_FEED:
2797            case $.TABULATION:
2798            case $.FORM_FEED: {
2799                // Ignore whitespace
2800                break;
2801            }
2802            case $.QUOTATION_MARK: {
2803                token.systemId = '';
2804                this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
2805                break;
2806            }
2807            case $.APOSTROPHE: {
2808                token.systemId = '';
2809                this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
2810                break;
2811            }
2812            case $.GREATER_THAN_SIGN: {
2813                this._err(ERR.missingDoctypeSystemIdentifier);
2814                token.forceQuirks = true;
2815                this.state = State.DATA;
2816                this.emitCurrentDoctype(token);
2817                break;
2818            }
2819            case $.EOF: {
2820                this._err(ERR.eofInDoctype);
2821                token.forceQuirks = true;
2822                this.emitCurrentDoctype(token);
2823                this._emitEOFToken();
2824                break;
2825            }
2826            default: {
2827                this._err(ERR.missingQuoteBeforeDoctypeSystemIdentifier);
2828                token.forceQuirks = true;
2829                this.state = State.BOGUS_DOCTYPE;
2830                this._stateBogusDoctype(cp);
2831            }
2832        }
2833    }
2834
2835    // DOCTYPE system identifier (double-quoted) state
2836    //------------------------------------------------------------------
2837    private _stateDoctypeSystemIdentifierDoubleQuoted(cp: number): void {
2838        const token = this.currentToken as DoctypeToken;
2839
2840        switch (cp) {
2841            case $.QUOTATION_MARK: {
2842                this.state = State.AFTER_DOCTYPE_SYSTEM_IDENTIFIER;
2843                break;
2844            }
2845            case $.NULL: {
2846                this._err(ERR.unexpectedNullCharacter);
2847                token.systemId += REPLACEMENT_CHARACTER;
2848                break;
2849            }
2850            case $.GREATER_THAN_SIGN: {
2851                this._err(ERR.abruptDoctypeSystemIdentifier);
2852                token.forceQuirks = true;
2853                this.emitCurrentDoctype(token);
2854                this.state = State.DATA;
2855                break;
2856            }
2857            case $.EOF: {
2858                this._err(ERR.eofInDoctype);
2859                token.forceQuirks = true;
2860                this.emitCurrentDoctype(token);
2861                this._emitEOFToken();
2862                break;
2863            }
2864            default: {
2865                token.systemId += String.fromCodePoint(cp);
2866            }
2867        }
2868    }
2869
2870    // DOCTYPE system identifier (single-quoted) state
2871    //------------------------------------------------------------------
2872    private _stateDoctypeSystemIdentifierSingleQuoted(cp: number): void {
2873        const token = this.currentToken as DoctypeToken;
2874
2875        switch (cp) {
2876            case $.APOSTROPHE: {
2877                this.state = State.AFTER_DOCTYPE_SYSTEM_IDENTIFIER;
2878                break;
2879            }
2880            case $.NULL: {
2881                this._err(ERR.unexpectedNullCharacter);
2882                token.systemId += REPLACEMENT_CHARACTER;
2883                break;
2884            }
2885            case $.GREATER_THAN_SIGN: {
2886                this._err(ERR.abruptDoctypeSystemIdentifier);
2887                token.forceQuirks = true;
2888                this.emitCurrentDoctype(token);
2889                this.state = State.DATA;
2890                break;
2891            }
2892            case $.EOF: {
2893                this._err(ERR.eofInDoctype);
2894                token.forceQuirks = true;
2895                this.emitCurrentDoctype(token);
2896                this._emitEOFToken();
2897                break;
2898            }
2899            default: {
2900                token.systemId += String.fromCodePoint(cp);
2901            }
2902        }
2903    }
2904
2905    // After DOCTYPE system identifier state
2906    //------------------------------------------------------------------
2907    private _stateAfterDoctypeSystemIdentifier(cp: number): void {
2908        const token = this.currentToken as DoctypeToken;
2909
2910        switch (cp) {
2911            case $.SPACE:
2912            case $.LINE_FEED:
2913            case $.TABULATION:
2914            case $.FORM_FEED: {
2915                // Ignore whitespace
2916                break;
2917            }
2918            case $.GREATER_THAN_SIGN: {
2919                this.emitCurrentDoctype(token);
2920                this.state = State.DATA;
2921                break;
2922            }
2923            case $.EOF: {
2924                this._err(ERR.eofInDoctype);
2925                token.forceQuirks = true;
2926                this.emitCurrentDoctype(token);
2927                this._emitEOFToken();
2928                break;
2929            }
2930            default: {
2931                this._err(ERR.unexpectedCharacterAfterDoctypeSystemIdentifier);
2932                this.state = State.BOGUS_DOCTYPE;
2933                this._stateBogusDoctype(cp);
2934            }
2935        }
2936    }
2937
2938    // Bogus DOCTYPE state
2939    //------------------------------------------------------------------
2940    private _stateBogusDoctype(cp: number): void {
2941        const token = this.currentToken as DoctypeToken;
2942
2943        switch (cp) {
2944            case $.GREATER_THAN_SIGN: {
2945                this.emitCurrentDoctype(token);
2946                this.state = State.DATA;
2947                break;
2948            }
2949            case $.NULL: {
2950                this._err(ERR.unexpectedNullCharacter);
2951                break;
2952            }
2953            case $.EOF: {
2954                this.emitCurrentDoctype(token);
2955                this._emitEOFToken();
2956                break;
2957            }
2958            default:
2959            // Do nothing
2960        }
2961    }
2962
2963    // CDATA section state
2964    //------------------------------------------------------------------
2965    private _stateCdataSection(cp: number): void {
2966        switch (cp) {
2967            case $.RIGHT_SQUARE_BRACKET: {
2968                this.state = State.CDATA_SECTION_BRACKET;
2969                break;
2970            }
2971            case $.EOF: {
2972                this._err(ERR.eofInCdata);
2973                this._emitEOFToken();
2974                break;
2975            }
2976            default: {
2977                this._emitCodePoint(cp);
2978            }
2979        }
2980    }
2981
2982    // CDATA section bracket state
2983    //------------------------------------------------------------------
2984    private _stateCdataSectionBracket(cp: number): void {
2985        if (cp === $.RIGHT_SQUARE_BRACKET) {
2986            this.state = State.CDATA_SECTION_END;
2987        } else {
2988            this._emitChars(']');
2989            this.state = State.CDATA_SECTION;
2990            this._stateCdataSection(cp);
2991        }
2992    }
2993
2994    // CDATA section end state
2995    //------------------------------------------------------------------
2996    private _stateCdataSectionEnd(cp: number): void {
2997        switch (cp) {
2998            case $.GREATER_THAN_SIGN: {
2999                this.state = State.DATA;
3000                break;
3001            }
3002            case $.RIGHT_SQUARE_BRACKET: {
3003                this._emitChars(']');
3004                break;
3005            }
3006            default: {
3007                this._emitChars(']]');
3008                this.state = State.CDATA_SECTION;
3009                this._stateCdataSection(cp);
3010            }
3011        }
3012    }
3013
3014    // Character reference state
3015    //------------------------------------------------------------------
3016    private _stateCharacterReference(cp: number): void {
3017        if (cp === $.NUMBER_SIGN) {
3018            this.state = State.NUMERIC_CHARACTER_REFERENCE;
3019        } else if (isAsciiAlphaNumeric(cp)) {
3020            this.state = State.NAMED_CHARACTER_REFERENCE;
3021            this._stateNamedCharacterReference(cp);
3022        } else {
3023            this._flushCodePointConsumedAsCharacterReference($.AMPERSAND);
3024            this._reconsumeInState(this.returnState);
3025        }
3026    }
3027
3028    // Named character reference state
3029    //------------------------------------------------------------------
3030    private _stateNamedCharacterReference(cp: number): void {
3031        const matchResult = this._matchNamedCharacterReference(cp);
3032
3033        //NOTE: Matching can be abrupted by hibernation. In that case, match
3034        //results are no longer valid and we will need to start over.
3035        if (this._ensureHibernation()) {
3036            // Stay in the state, try again.
3037        } else if (matchResult) {
3038            for (let i = 0; i < matchResult.length; i++) {
3039                this._flushCodePointConsumedAsCharacterReference(matchResult[i]);
3040            }
3041            this.state = this.returnState;
3042        } else {
3043            this._flushCodePointConsumedAsCharacterReference($.AMPERSAND);
3044            this.state = State.AMBIGUOUS_AMPERSAND;
3045        }
3046    }
3047
3048    // Ambiguos ampersand state
3049    //------------------------------------------------------------------
3050    private _stateAmbiguousAmpersand(cp: number): void {
3051        if (isAsciiAlphaNumeric(cp)) {
3052            this._flushCodePointConsumedAsCharacterReference(cp);
3053        } else {
3054            if (cp === $.SEMICOLON) {
3055                this._err(ERR.unknownNamedCharacterReference);
3056            }
3057
3058            this._reconsumeInState(this.returnState);
3059        }
3060    }
3061
3062    // Numeric character reference state
3063    //------------------------------------------------------------------
3064    private _stateNumericCharacterReference(cp: number): void {
3065        this.charRefCode = 0;
3066
3067        if (cp === $.LATIN_SMALL_X || cp === $.LATIN_CAPITAL_X) {
3068            this.state = State.HEXADEMICAL_CHARACTER_REFERENCE_START;
3069        } else {
3070            this.state = State.DECIMAL_CHARACTER_REFERENCE_START;
3071            this._stateDecimalCharacterReferenceStart(cp);
3072        }
3073    }
3074
3075    // Hexademical character reference start state
3076    //------------------------------------------------------------------
3077    private _stateHexademicalCharacterReferenceStart(cp: number): void {
3078        if (isAsciiHexDigit(cp)) {
3079            this.state = State.HEXADEMICAL_CHARACTER_REFERENCE;
3080            this._stateHexademicalCharacterReference(cp);
3081        } else {
3082            this._err(ERR.absenceOfDigitsInNumericCharacterReference);
3083            this._flushCodePointConsumedAsCharacterReference($.AMPERSAND);
3084            this._flushCodePointConsumedAsCharacterReference($.NUMBER_SIGN);
3085            this._unconsume(2);
3086            this.state = this.returnState;
3087        }
3088    }
3089
3090    // Decimal character reference start state
3091    //------------------------------------------------------------------
3092    private _stateDecimalCharacterReferenceStart(cp: number): void {
3093        if (isAsciiDigit(cp)) {
3094            this.state = State.DECIMAL_CHARACTER_REFERENCE;
3095            this._stateDecimalCharacterReference(cp);
3096        } else {
3097            this._err(ERR.absenceOfDigitsInNumericCharacterReference);
3098            this._flushCodePointConsumedAsCharacterReference($.AMPERSAND);
3099            this._flushCodePointConsumedAsCharacterReference($.NUMBER_SIGN);
3100            this._reconsumeInState(this.returnState);
3101        }
3102    }
3103
3104    // Hexademical character reference state
3105    //------------------------------------------------------------------
3106    private _stateHexademicalCharacterReference(cp: number): void {
3107        if (isAsciiUpperHexDigit(cp)) {
3108            this.charRefCode = this.charRefCode * 16 + cp - 0x37;
3109        } else if (isAsciiLowerHexDigit(cp)) {
3110            this.charRefCode = this.charRefCode * 16 + cp - 0x57;
3111        } else if (isAsciiDigit(cp)) {
3112            this.charRefCode = this.charRefCode * 16 + cp - 0x30;
3113        } else if (cp === $.SEMICOLON) {
3114            this.state = State.NUMERIC_CHARACTER_REFERENCE_END;
3115        } else {
3116            this._err(ERR.missingSemicolonAfterCharacterReference);
3117            this.state = State.NUMERIC_CHARACTER_REFERENCE_END;
3118            this._stateNumericCharacterReferenceEnd();
3119        }
3120    }
3121
3122    // Decimal character reference state
3123    //------------------------------------------------------------------
3124    private _stateDecimalCharacterReference(cp: number): void {
3125        if (isAsciiDigit(cp)) {
3126            this.charRefCode = this.charRefCode * 10 + cp - 0x30;
3127        } else if (cp === $.SEMICOLON) {
3128            this.state = State.NUMERIC_CHARACTER_REFERENCE_END;
3129        } else {
3130            this._err(ERR.missingSemicolonAfterCharacterReference);
3131            this.state = State.NUMERIC_CHARACTER_REFERENCE_END;
3132            this._stateNumericCharacterReferenceEnd();
3133        }
3134    }
3135
3136    // Numeric character reference end state
3137    //------------------------------------------------------------------
3138    private _stateNumericCharacterReferenceEnd(): void {
3139        if (this.charRefCode === $.NULL) {
3140            this._err(ERR.nullCharacterReference);
3141            this.charRefCode = $.REPLACEMENT_CHARACTER;
3142        } else if (this.charRefCode > 0x10_ff_ff) {
3143            this._err(ERR.characterReferenceOutsideUnicodeRange);
3144            this.charRefCode = $.REPLACEMENT_CHARACTER;
3145        } else if (isSurrogate(this.charRefCode)) {
3146            this._err(ERR.surrogateCharacterReference);
3147            this.charRefCode = $.REPLACEMENT_CHARACTER;
3148        } else if (isUndefinedCodePoint(this.charRefCode)) {
3149            this._err(ERR.noncharacterCharacterReference);
3150        } else if (isControlCodePoint(this.charRefCode) || this.charRefCode === $.CARRIAGE_RETURN) {
3151            this._err(ERR.controlCharacterReference);
3152
3153            const replacement = C1_CONTROLS_REFERENCE_REPLACEMENTS.get(this.charRefCode);
3154
3155            if (replacement !== undefined) {
3156                this.charRefCode = replacement;
3157            }
3158        }
3159
3160        this._flushCodePointConsumedAsCharacterReference(this.charRefCode);
3161        this._reconsumeInState(this.returnState);
3162    }
3163}
3164
3165function checkselfClosingNode(parse: Tokenizer, token: TagToken) {
3166    const tagName: string = (token.tagName || "").toLowerCase();
3167    const selfClosing: boolean = token.selfClosing;
3168    const flag: boolean = parse.validator.isSupportedSelfClosing(tagName);
3169    if (parse.nodeInfo.tn && tagName && !parse.nodeInfo.sc) {
3170        const loc: string =
3171            String(token.location?.startLine) + ',' + String(token.location?.startCol);
3172        if (
3173            !flag ||
3174            (loc !== parse.nodeInfo.pos && token.type === TokenType.START_TAG)
3175        ) {
3176            const posInfo: string = parse.nodeInfo.pos;
3177            const posArr: string[] = posInfo.split(',');
3178            parse.compileResult.log.push({
3179                line: Number(posArr[0]) || 1,
3180                column: Number(posArr[1]) || 1,
3181                reason: 'ERROR: tag `' + parse.nodeInfo.tn + '` must be closed, please follow norm',
3182            });
3183            parse.nodeInfo = { tn: '', sc: false, pos: '' };
3184        }
3185    }
3186    if (tagName && flag) {
3187        if (token.type === TokenType.START_TAG && !selfClosing) {
3188            parse.nodeInfo.tn = tagName;
3189            parse.nodeInfo.sc = false;
3190            parse.nodeInfo.pos =
3191                String(token.location?.startLine) + ',' + String(token.location?.startCol);
3192        }
3193        if (
3194            token.type === TokenType.END_TAG && tagName === parse.nodeInfo.tn
3195        ) {
3196            parse.nodeInfo.sc = true;
3197        }
3198    }
3199    if (!flag && selfClosing && token.type === TokenType.START_TAG) {
3200        parse.compileResult.log.push({
3201            line: token.location?.startLine || 1,
3202            column: token.location?.startCol || 1,
3203            reason: "ERROR: tag `" + tagName + "` can not use selfClosing",
3204        });
3205    }
3206}