• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1import { Preprocessor } from './preprocessor.js';
2import {
3    CODE_POINTS as $,
4    SEQUENCES as $$,
5    REPLACEMENT_CHARACTER,
6    isSurrogate,
7    isUndefinedCodePoint,
8    isControlCodePoint,
9} from '../common/unicode.js';
10import {
11    TokenType,
12    getTokenAttr,
13    type Token,
14    type CharacterToken,
15    type DoctypeToken,
16    type TagToken,
17    type EOFToken,
18    type CommentToken,
19    type Attribute,
20    type Location,
21} from '../common/token.js';
22import { htmlDecodeTree, BinTrieFlags, determineBranch } from 'entities/lib/decode.js';
23import { ERR, type ParserErrorHandler } from '../common/error-codes.js';
24import { TAG_ID, getTagID } from '../common/html.js';
25
26//C1 Unicode control character reference replacements
27const C1_CONTROLS_REFERENCE_REPLACEMENTS = new Map([
28    [0x80, 0x20_ac],
29    [0x82, 0x20_1a],
30    [0x83, 0x01_92],
31    [0x84, 0x20_1e],
32    [0x85, 0x20_26],
33    [0x86, 0x20_20],
34    [0x87, 0x20_21],
35    [0x88, 0x02_c6],
36    [0x89, 0x20_30],
37    [0x8a, 0x01_60],
38    [0x8b, 0x20_39],
39    [0x8c, 0x01_52],
40    [0x8e, 0x01_7d],
41    [0x91, 0x20_18],
42    [0x92, 0x20_19],
43    [0x93, 0x20_1c],
44    [0x94, 0x20_1d],
45    [0x95, 0x20_22],
46    [0x96, 0x20_13],
47    [0x97, 0x20_14],
48    [0x98, 0x02_dc],
49    [0x99, 0x21_22],
50    [0x9a, 0x01_61],
51    [0x9b, 0x20_3a],
52    [0x9c, 0x01_53],
53    [0x9e, 0x01_7e],
54    [0x9f, 0x01_78],
55]);
56
57//States
58const enum State {
59    DATA,
60    RCDATA,
61    RAWTEXT,
62    SCRIPT_DATA,
63    PLAINTEXT,
64    TAG_OPEN,
65    END_TAG_OPEN,
66    TAG_NAME,
67    RCDATA_LESS_THAN_SIGN,
68    RCDATA_END_TAG_OPEN,
69    RCDATA_END_TAG_NAME,
70    RAWTEXT_LESS_THAN_SIGN,
71    RAWTEXT_END_TAG_OPEN,
72    RAWTEXT_END_TAG_NAME,
73    SCRIPT_DATA_LESS_THAN_SIGN,
74    SCRIPT_DATA_END_TAG_OPEN,
75    SCRIPT_DATA_END_TAG_NAME,
76    SCRIPT_DATA_ESCAPE_START,
77    SCRIPT_DATA_ESCAPE_START_DASH,
78    SCRIPT_DATA_ESCAPED,
79    SCRIPT_DATA_ESCAPED_DASH,
80    SCRIPT_DATA_ESCAPED_DASH_DASH,
81    SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN,
82    SCRIPT_DATA_ESCAPED_END_TAG_OPEN,
83    SCRIPT_DATA_ESCAPED_END_TAG_NAME,
84    SCRIPT_DATA_DOUBLE_ESCAPE_START,
85    SCRIPT_DATA_DOUBLE_ESCAPED,
86    SCRIPT_DATA_DOUBLE_ESCAPED_DASH,
87    SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH,
88    SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN,
89    SCRIPT_DATA_DOUBLE_ESCAPE_END,
90    BEFORE_ATTRIBUTE_NAME,
91    ATTRIBUTE_NAME,
92    AFTER_ATTRIBUTE_NAME,
93    BEFORE_ATTRIBUTE_VALUE,
94    ATTRIBUTE_VALUE_DOUBLE_QUOTED,
95    ATTRIBUTE_VALUE_SINGLE_QUOTED,
96    ATTRIBUTE_VALUE_UNQUOTED,
97    AFTER_ATTRIBUTE_VALUE_QUOTED,
98    SELF_CLOSING_START_TAG,
99    BOGUS_COMMENT,
100    MARKUP_DECLARATION_OPEN,
101    COMMENT_START,
102    COMMENT_START_DASH,
103    COMMENT,
104    COMMENT_LESS_THAN_SIGN,
105    COMMENT_LESS_THAN_SIGN_BANG,
106    COMMENT_LESS_THAN_SIGN_BANG_DASH,
107    COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH,
108    COMMENT_END_DASH,
109    COMMENT_END,
110    COMMENT_END_BANG,
111    DOCTYPE,
112    BEFORE_DOCTYPE_NAME,
113    DOCTYPE_NAME,
114    AFTER_DOCTYPE_NAME,
115    AFTER_DOCTYPE_PUBLIC_KEYWORD,
116    BEFORE_DOCTYPE_PUBLIC_IDENTIFIER,
117    DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED,
118    DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED,
119    AFTER_DOCTYPE_PUBLIC_IDENTIFIER,
120    BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS,
121    AFTER_DOCTYPE_SYSTEM_KEYWORD,
122    BEFORE_DOCTYPE_SYSTEM_IDENTIFIER,
123    DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED,
124    DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED,
125    AFTER_DOCTYPE_SYSTEM_IDENTIFIER,
126    BOGUS_DOCTYPE,
127    CDATA_SECTION,
128    CDATA_SECTION_BRACKET,
129    CDATA_SECTION_END,
130    CHARACTER_REFERENCE,
131    NAMED_CHARACTER_REFERENCE,
132    AMBIGUOUS_AMPERSAND,
133    NUMERIC_CHARACTER_REFERENCE,
134    HEXADEMICAL_CHARACTER_REFERENCE_START,
135    HEXADEMICAL_CHARACTER_REFERENCE,
136    DECIMAL_CHARACTER_REFERENCE,
137    NUMERIC_CHARACTER_REFERENCE_END,
138}
139
140//Tokenizer initial states for different modes
141export const TokenizerMode = {
142    DATA: State.DATA,
143    RCDATA: State.RCDATA,
144    RAWTEXT: State.RAWTEXT,
145    SCRIPT_DATA: State.SCRIPT_DATA,
146    PLAINTEXT: State.PLAINTEXT,
147    CDATA_SECTION: State.CDATA_SECTION,
148} as const;
149
150//Utils
151
152//OPTIMIZATION: these utility functions should not be moved out of this module. V8 Crankshaft will not inline
153//this functions if they will be situated in another module due to context switch.
154//Always perform inlining check before modifying this functions ('node --trace-inlining').
155
156function isAsciiDigit(cp: number): boolean {
157    return cp >= $.DIGIT_0 && cp <= $.DIGIT_9;
158}
159
160function isAsciiUpper(cp: number): boolean {
161    return cp >= $.LATIN_CAPITAL_A && cp <= $.LATIN_CAPITAL_Z;
162}
163
164function isAsciiLower(cp: number): boolean {
165    return cp >= $.LATIN_SMALL_A && cp <= $.LATIN_SMALL_Z;
166}
167
168function isAsciiLetter(cp: number): boolean {
169    return isAsciiLower(cp) || isAsciiUpper(cp);
170}
171
172function isAsciiAlphaNumeric(cp: number): boolean {
173    return isAsciiLetter(cp) || isAsciiDigit(cp);
174}
175
176function isAsciiUpperHexDigit(cp: number): boolean {
177    return cp >= $.LATIN_CAPITAL_A && cp <= $.LATIN_CAPITAL_F;
178}
179
180function isAsciiLowerHexDigit(cp: number): boolean {
181    return cp >= $.LATIN_SMALL_A && cp <= $.LATIN_SMALL_F;
182}
183
184function isAsciiHexDigit(cp: number): boolean {
185    return isAsciiDigit(cp) || isAsciiUpperHexDigit(cp) || isAsciiLowerHexDigit(cp);
186}
187
188function toAsciiLower(cp: number): number {
189    return cp + 0x00_20;
190}
191
192function isWhitespace(cp: number): boolean {
193    return cp === $.SPACE || cp === $.LINE_FEED || cp === $.TABULATION || cp === $.FORM_FEED;
194}
195
196function isEntityInAttributeInvalidEnd(nextCp: number): boolean {
197    return nextCp === $.EQUALS_SIGN || isAsciiAlphaNumeric(nextCp);
198}
199
200function isScriptDataDoubleEscapeSequenceEnd(cp: number): boolean {
201    return isWhitespace(cp) || cp === $.SOLIDUS || cp === $.GREATER_THAN_SIGN;
202}
203
204const componentValidator = { isSupportedSelfClosing: () => false };
205
206interface Validator {
207    isSupportedSelfClosing(tagName: string): boolean;
208}
209
210interface CompileResult {
211    jsonTemplate: {},
212    deps: [],
213    log: {
214        line: number,
215        column: number,
216        reason: string
217    }[]
218}
219
220interface NodeInfo {
221    tn: string,
222    sc: boolean,
223    pos: string
224}
225
226export interface TokenizerOptions {
227    componentValidator?: Validator;
228    compileResult?: CompileResult;
229    sourceCodeLocationInfo?: boolean;
230}
231
232export interface TokenHandler {
233    onComment(token: CommentToken): void;
234    onDoctype(token: DoctypeToken): void;
235    onStartTag(token: TagToken): void;
236    onEndTag(token: TagToken): void;
237    onEof(token: EOFToken): void;
238    onCharacter(token: CharacterToken): void;
239    onNullCharacter(token: CharacterToken): void;
240    onWhitespaceCharacter(token: CharacterToken): void;
241
242    onParseError?: ParserErrorHandler | null;
243}
244
245//Tokenizer
246export class Tokenizer {
247    public preprocessor: Preprocessor;
248
249    private paused = false;
250    /** Ensures that the parsing loop isn't run multiple times at once. */
251    private inLoop = false;
252
253    /**
254     * Indicates that the current adjusted node exists, is not an element in the HTML namespace,
255     * and that it is not an integration point for either MathML or HTML.
256     *
257     * @see {@link https://html.spec.whatwg.org/multipage/parsing.html#tree-construction}
258     */
259    public inForeignNode = false;
260    public lastStartTagName = '';
261    public active = false;
262
263    public nodeInfo: NodeInfo = { tn: '', sc: false, pos: '' };
264    public validator: Validator = componentValidator;
265    public compileResult: CompileResult = { jsonTemplate: {}, deps: [], log: [] };
266
267    public state = State.DATA;
268    private returnState = State.DATA;
269
270    private charRefCode = -1;
271
272    private consumedAfterSnapshot = -1;
273
274    private currentLocation: Location | null;
275    private currentCharacterToken: CharacterToken | null = null;
276    private currentToken: Token | null = null;
277    private currentAttr: Attribute = { name: '', value: '' };
278
279    constructor(private options: TokenizerOptions, private handler: TokenHandler) {
280        this.preprocessor = new Preprocessor(handler);
281        this.currentLocation = this.getCurrentLocation(-1);
282
283        if(options.componentValidator){
284            this.validator = options.componentValidator;
285        }
286        if(options.compileResult){
287            this.compileResult = options.compileResult;
288        }
289    }
290
291    //Errors
292    private _err(code: ERR): void {
293        this.handler.onParseError?.(this.preprocessor.getError(code));
294    }
295
296    // NOTE: `offset` may never run across line boundaries.
297    private getCurrentLocation(offset: number): Location | null {
298        if (!this.options.sourceCodeLocationInfo) {
299            return null;
300        }
301
302        return {
303            startLine: this.preprocessor.line,
304            startCol: this.preprocessor.col - offset,
305            startOffset: this.preprocessor.offset - offset,
306            endLine: -1,
307            endCol: -1,
308            endOffset: -1,
309        };
310    }
311
312    private _runParsingLoop(): void {
313        if (this.inLoop) return;
314
315        this.inLoop = true;
316
317        while (this.active && !this.paused) {
318            this.consumedAfterSnapshot = 0;
319
320            const cp = this._consume();
321
322            if (!this._ensureHibernation()) {
323                this._callState(cp);
324            }
325        }
326
327        this.inLoop = false;
328    }
329
330    //API
331    public pause(): void {
332        this.paused = true;
333    }
334
335    public resume(writeCallback?: () => void): void {
336        if (!this.paused) {
337            throw new Error('Parser was already resumed');
338        }
339
340        this.paused = false;
341
342        // Necessary for synchronous resume.
343        if (this.inLoop) return;
344
345        this._runParsingLoop();
346
347        if (!this.paused) {
348            writeCallback?.();
349        }
350    }
351
352    public write(chunk: string, isLastChunk: boolean, writeCallback?: () => void): void {
353        this.active = true;
354        this.preprocessor.write(chunk, isLastChunk);
355        this._runParsingLoop();
356
357        if (!this.paused) {
358            writeCallback?.();
359        }
360    }
361
362    public insertHtmlAtCurrentPos(chunk: string): void {
363        this.active = true;
364        this.preprocessor.insertHtmlAtCurrentPos(chunk);
365        this._runParsingLoop();
366    }
367
368    //Hibernation
369    private _ensureHibernation(): boolean {
370        if (this.preprocessor.endOfChunkHit) {
371            this._unconsume(this.consumedAfterSnapshot);
372            this.active = false;
373
374            return true;
375        }
376
377        return false;
378    }
379
380    //Consumption
381    private _consume(): number {
382        this.consumedAfterSnapshot++;
383        return this.preprocessor.advance();
384    }
385
386    private _unconsume(count: number): void {
387        this.consumedAfterSnapshot -= count;
388        this.preprocessor.retreat(count);
389    }
390
391    private _reconsumeInState(state: State, cp: number): void {
392        this.state = state;
393        this._callState(cp);
394    }
395
396    private _advanceBy(count: number): void {
397        this.consumedAfterSnapshot += count;
398        for (let i = 0; i < count; i++) {
399            this.preprocessor.advance();
400        }
401    }
402
403    private _consumeSequenceIfMatch(pattern: string, caseSensitive: boolean): boolean {
404        if (this.preprocessor.startsWith(pattern, caseSensitive)) {
405            // We will already have consumed one character before calling this method.
406            this._advanceBy(pattern.length - 1);
407            return true;
408        }
409        return false;
410    }
411
412    //Token creation
413    private _createStartTagToken(): void {
414        this.currentToken = {
415            type: TokenType.START_TAG,
416            tagName: '',
417            tagID: TAG_ID.UNKNOWN,
418            selfClosing: false,
419            ackSelfClosing: false,
420            attrs: [],
421            location: this.getCurrentLocation(1),
422        };
423    }
424
425    private _createEndTagToken(): void {
426        this.currentToken = {
427            type: TokenType.END_TAG,
428            tagName: '',
429            tagID: TAG_ID.UNKNOWN,
430            selfClosing: false,
431            ackSelfClosing: false,
432            attrs: [],
433            location: this.getCurrentLocation(2),
434        };
435    }
436
437    private _createCommentToken(offset: number): void {
438        this.currentToken = {
439            type: TokenType.COMMENT,
440            data: '',
441            location: this.getCurrentLocation(offset),
442        };
443    }
444
445    private _createDoctypeToken(initialName: string | null): void {
446        this.currentToken = {
447            type: TokenType.DOCTYPE,
448            name: initialName,
449            forceQuirks: false,
450            publicId: null,
451            systemId: null,
452            location: this.currentLocation,
453        };
454    }
455
456    private _createCharacterToken(type: CharacterToken['type'], chars: string): void {
457        this.currentCharacterToken = {
458            type,
459            chars,
460            location: this.currentLocation,
461        };
462    }
463
464    //Tag attributes
465    private _createAttr(attrNameFirstCh: string): void {
466        this.currentAttr = {
467            name: attrNameFirstCh,
468            value: '',
469        };
470        this.currentLocation = this.getCurrentLocation(0);
471    }
472
473    private _leaveAttrName(): void {
474        const token = this.currentToken as TagToken;
475
476        if (getTokenAttr(token, this.currentAttr.name) === null) {
477            token.attrs.push(this.currentAttr);
478
479            if (token.location && this.currentLocation) {
480                const attrLocations = (token.location.attrs ??= Object.create(null));
481                attrLocations[this.currentAttr.name] = this.currentLocation;
482
483                // Set end location
484                this._leaveAttrValue();
485            }
486        } else {
487            this._err(ERR.duplicateAttribute);
488        }
489    }
490
491    private _leaveAttrValue(): void {
492        if (this.currentLocation) {
493            this.currentLocation.endLine = this.preprocessor.line;
494            this.currentLocation.endCol = this.preprocessor.col;
495            this.currentLocation.endOffset = this.preprocessor.offset;
496        }
497    }
498
499    //Token emission
500    private prepareToken(ct: Token): void {
501        this._emitCurrentCharacterToken(ct.location);
502        this.currentToken = null;
503
504        if (ct.location) {
505            ct.location.endLine = this.preprocessor.line;
506            ct.location.endCol = this.preprocessor.col + 1;
507            ct.location.endOffset = this.preprocessor.offset + 1;
508        }
509
510        this.currentLocation = this.getCurrentLocation(-1);
511    }
512
513    private emitCurrentTagToken(): void {
514        const ct = this.currentToken as TagToken;
515        checkselfClosingNode(this, ct);
516        this.prepareToken(ct);
517
518        ct.tagID = getTagID(ct.tagName);
519
520        if (ct.type === TokenType.START_TAG) {
521            this.lastStartTagName = ct.tagName;
522            this.handler.onStartTag(ct);
523        } else {
524            if (ct.attrs.length > 0) {
525                this._err(ERR.endTagWithAttributes);
526            }
527
528            if (ct.selfClosing) {
529                this._err(ERR.endTagWithTrailingSolidus);
530            }
531
532            this.handler.onEndTag(ct);
533        }
534
535        this.preprocessor.dropParsedChunk();
536    }
537
538    private emitCurrentComment(ct: CommentToken): void {
539        this.prepareToken(ct);
540        this.handler.onComment(ct);
541
542        this.preprocessor.dropParsedChunk();
543    }
544
545    private emitCurrentDoctype(ct: DoctypeToken): void {
546        this.prepareToken(ct);
547        this.handler.onDoctype(ct);
548
549        this.preprocessor.dropParsedChunk();
550    }
551
552    private _emitCurrentCharacterToken(nextLocation: Location | null): void {
553        if (this.currentCharacterToken) {
554            //NOTE: if we have a pending character token, make it's end location equal to the
555            //current token's start location.
556            if (nextLocation && this.currentCharacterToken.location) {
557                this.currentCharacterToken.location.endLine = nextLocation.startLine;
558                this.currentCharacterToken.location.endCol = nextLocation.startCol;
559                this.currentCharacterToken.location.endOffset = nextLocation.startOffset;
560            }
561
562            switch (this.currentCharacterToken.type) {
563                case TokenType.CHARACTER: {
564                    this.handler.onCharacter(this.currentCharacterToken);
565                    break;
566                }
567                case TokenType.NULL_CHARACTER: {
568                    this.handler.onNullCharacter(this.currentCharacterToken);
569                    break;
570                }
571                case TokenType.WHITESPACE_CHARACTER: {
572                    this.handler.onWhitespaceCharacter(this.currentCharacterToken);
573                    break;
574                }
575            }
576
577            this.currentCharacterToken = null;
578        }
579    }
580
581    private _emitEOFToken(): void {
582        const location = this.getCurrentLocation(0);
583
584        if (location) {
585            location.endLine = location.startLine;
586            location.endCol = location.startCol;
587            location.endOffset = location.startOffset;
588        }
589
590        this._emitCurrentCharacterToken(location);
591        this.handler.onEof({ type: TokenType.EOF, location });
592        this.active = false;
593    }
594
595    //Characters emission
596
597    //OPTIMIZATION: specification uses only one type of character tokens (one token per character).
598    //This causes a huge memory overhead and a lot of unnecessary parser loops. parse5 uses 3 groups of characters.
599    //If we have a sequence of characters that belong to the same group, the parser can process it
600    //as a single solid character token.
601    //So, there are 3 types of character tokens in parse5:
602    //1)TokenType.NULL_CHARACTER - \u0000-character sequences (e.g. '\u0000\u0000\u0000')
603    //2)TokenType.WHITESPACE_CHARACTER - any whitespace/new-line character sequences (e.g. '\n  \r\t   \f')
604    //3)TokenType.CHARACTER - any character sequence which don't belong to groups 1 and 2 (e.g. 'abcdef1234@@#$%^')
605    private _appendCharToCurrentCharacterToken(type: CharacterToken['type'], ch: string): void {
606        if (this.currentCharacterToken) {
607            if (this.currentCharacterToken.type !== type) {
608                this.currentLocation = this.getCurrentLocation(0);
609                this._emitCurrentCharacterToken(this.currentLocation);
610                this.preprocessor.dropParsedChunk();
611            } else {
612                this.currentCharacterToken.chars += ch;
613                return;
614            }
615        }
616
617        this._createCharacterToken(type, ch);
618    }
619
620    private _emitCodePoint(cp: number): void {
621        const type = isWhitespace(cp)
622            ? TokenType.WHITESPACE_CHARACTER
623            : cp === $.NULL
624            ? TokenType.NULL_CHARACTER
625            : TokenType.CHARACTER;
626
627        this._appendCharToCurrentCharacterToken(type, String.fromCodePoint(cp));
628    }
629
630    //NOTE: used when we emit characters explicitly.
631    //This is always for non-whitespace and non-null characters, which allows us to avoid additional checks.
632    private _emitChars(ch: string): void {
633        this._appendCharToCurrentCharacterToken(TokenType.CHARACTER, ch);
634    }
635
636    // Character reference helpers
637    private _matchNamedCharacterReference(cp: number): number[] | null {
638        let result: number[] | null = null;
639        let excess = 0;
640        let withoutSemicolon = false;
641
642        for (let i = 0, current = htmlDecodeTree[0]; i >= 0; cp = this._consume()) {
643            i = determineBranch(htmlDecodeTree, current, i + 1, cp);
644
645            if (i < 0) break;
646
647            excess += 1;
648
649            current = htmlDecodeTree[i];
650
651            const masked = current & BinTrieFlags.VALUE_LENGTH;
652
653            // If the branch is a value, store it and continue
654            if (masked) {
655                // The mask is the number of bytes of the value, including the current byte.
656                const valueLength = (masked >> 14) - 1;
657
658                // Attribute values that aren't terminated properly aren't parsed, and shouldn't lead to a parser error.
659                // See the example in https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state
660                if (
661                    cp !== $.SEMICOLON &&
662                    this._isCharacterReferenceInAttribute() &&
663                    isEntityInAttributeInvalidEnd(this.preprocessor.peek(1))
664                ) {
665                    //NOTE: we don't flush all consumed code points here, and instead switch back to the original state after
666                    //emitting an ampersand. This is fine, as alphanumeric characters won't be parsed differently in attributes.
667                    result = [$.AMPERSAND];
668
669                    // Skip over the value.
670                    i += valueLength;
671                } else {
672                    // If this is a surrogate pair, consume the next two bytes.
673                    result =
674                        valueLength === 0
675                            ? [htmlDecodeTree[i] & ~BinTrieFlags.VALUE_LENGTH]
676                            : valueLength === 1
677                            ? [htmlDecodeTree[++i]]
678                            : [htmlDecodeTree[++i], htmlDecodeTree[++i]];
679                    excess = 0;
680                    withoutSemicolon = cp !== $.SEMICOLON;
681                }
682
683                if (valueLength === 0) {
684                    // If the value is zero-length, we're done.
685                    this._consume();
686                    break;
687                }
688            }
689        }
690
691        this._unconsume(excess);
692
693        if (withoutSemicolon && !this.preprocessor.endOfChunkHit) {
694            this._err(ERR.missingSemicolonAfterCharacterReference);
695        }
696
697        // We want to emit the error above on the code point after the entity.
698        // We always consume one code point too many in the loop, and we wait to
699        // unconsume it until after the error is emitted.
700        this._unconsume(1);
701
702        return result;
703    }
704
705    private _isCharacterReferenceInAttribute(): boolean {
706        return (
707            this.returnState === State.ATTRIBUTE_VALUE_DOUBLE_QUOTED ||
708            this.returnState === State.ATTRIBUTE_VALUE_SINGLE_QUOTED ||
709            this.returnState === State.ATTRIBUTE_VALUE_UNQUOTED
710        );
711    }
712
713    private _flushCodePointConsumedAsCharacterReference(cp: number): void {
714        if (this._isCharacterReferenceInAttribute()) {
715            this.currentAttr.value += String.fromCodePoint(cp);
716        } else {
717            this._emitCodePoint(cp);
718        }
719    }
720
721    // Calling states this way turns out to be much faster than any other approach.
722    private _callState(cp: number): void {
723        switch (this.state) {
724            case State.DATA: {
725                this._stateData(cp);
726                break;
727            }
728            case State.RCDATA: {
729                this._stateRcdata(cp);
730                break;
731            }
732            case State.RAWTEXT: {
733                this._stateRawtext(cp);
734                break;
735            }
736            case State.SCRIPT_DATA: {
737                this._stateScriptData(cp);
738                break;
739            }
740            case State.PLAINTEXT: {
741                this._statePlaintext(cp);
742                break;
743            }
744            case State.TAG_OPEN: {
745                this._stateTagOpen(cp);
746                break;
747            }
748            case State.END_TAG_OPEN: {
749                this._stateEndTagOpen(cp);
750                break;
751            }
752            case State.TAG_NAME: {
753                this._stateTagName(cp);
754                break;
755            }
756            case State.RCDATA_LESS_THAN_SIGN: {
757                this._stateRcdataLessThanSign(cp);
758                break;
759            }
760            case State.RCDATA_END_TAG_OPEN: {
761                this._stateRcdataEndTagOpen(cp);
762                break;
763            }
764            case State.RCDATA_END_TAG_NAME: {
765                this._stateRcdataEndTagName(cp);
766                break;
767            }
768            case State.RAWTEXT_LESS_THAN_SIGN: {
769                this._stateRawtextLessThanSign(cp);
770                break;
771            }
772            case State.RAWTEXT_END_TAG_OPEN: {
773                this._stateRawtextEndTagOpen(cp);
774                break;
775            }
776            case State.RAWTEXT_END_TAG_NAME: {
777                this._stateRawtextEndTagName(cp);
778                break;
779            }
780            case State.SCRIPT_DATA_LESS_THAN_SIGN: {
781                this._stateScriptDataLessThanSign(cp);
782                break;
783            }
784            case State.SCRIPT_DATA_END_TAG_OPEN: {
785                this._stateScriptDataEndTagOpen(cp);
786                break;
787            }
788            case State.SCRIPT_DATA_END_TAG_NAME: {
789                this._stateScriptDataEndTagName(cp);
790                break;
791            }
792            case State.SCRIPT_DATA_ESCAPE_START: {
793                this._stateScriptDataEscapeStart(cp);
794                break;
795            }
796            case State.SCRIPT_DATA_ESCAPE_START_DASH: {
797                this._stateScriptDataEscapeStartDash(cp);
798                break;
799            }
800            case State.SCRIPT_DATA_ESCAPED: {
801                this._stateScriptDataEscaped(cp);
802                break;
803            }
804            case State.SCRIPT_DATA_ESCAPED_DASH: {
805                this._stateScriptDataEscapedDash(cp);
806                break;
807            }
808            case State.SCRIPT_DATA_ESCAPED_DASH_DASH: {
809                this._stateScriptDataEscapedDashDash(cp);
810                break;
811            }
812            case State.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN: {
813                this._stateScriptDataEscapedLessThanSign(cp);
814                break;
815            }
816            case State.SCRIPT_DATA_ESCAPED_END_TAG_OPEN: {
817                this._stateScriptDataEscapedEndTagOpen(cp);
818                break;
819            }
820            case State.SCRIPT_DATA_ESCAPED_END_TAG_NAME: {
821                this._stateScriptDataEscapedEndTagName(cp);
822                break;
823            }
824            case State.SCRIPT_DATA_DOUBLE_ESCAPE_START: {
825                this._stateScriptDataDoubleEscapeStart(cp);
826                break;
827            }
828            case State.SCRIPT_DATA_DOUBLE_ESCAPED: {
829                this._stateScriptDataDoubleEscaped(cp);
830                break;
831            }
832            case State.SCRIPT_DATA_DOUBLE_ESCAPED_DASH: {
833                this._stateScriptDataDoubleEscapedDash(cp);
834                break;
835            }
836            case State.SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH: {
837                this._stateScriptDataDoubleEscapedDashDash(cp);
838                break;
839            }
840            case State.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN: {
841                this._stateScriptDataDoubleEscapedLessThanSign(cp);
842                break;
843            }
844            case State.SCRIPT_DATA_DOUBLE_ESCAPE_END: {
845                this._stateScriptDataDoubleEscapeEnd(cp);
846                break;
847            }
848            case State.BEFORE_ATTRIBUTE_NAME: {
849                this._stateBeforeAttributeName(cp);
850                break;
851            }
852            case State.ATTRIBUTE_NAME: {
853                this._stateAttributeName(cp);
854                break;
855            }
856            case State.AFTER_ATTRIBUTE_NAME: {
857                this._stateAfterAttributeName(cp);
858                break;
859            }
860            case State.BEFORE_ATTRIBUTE_VALUE: {
861                this._stateBeforeAttributeValue(cp);
862                break;
863            }
864            case State.ATTRIBUTE_VALUE_DOUBLE_QUOTED: {
865                this._stateAttributeValueDoubleQuoted(cp);
866                break;
867            }
868            case State.ATTRIBUTE_VALUE_SINGLE_QUOTED: {
869                this._stateAttributeValueSingleQuoted(cp);
870                break;
871            }
872            case State.ATTRIBUTE_VALUE_UNQUOTED: {
873                this._stateAttributeValueUnquoted(cp);
874                break;
875            }
876            case State.AFTER_ATTRIBUTE_VALUE_QUOTED: {
877                this._stateAfterAttributeValueQuoted(cp);
878                break;
879            }
880            case State.SELF_CLOSING_START_TAG: {
881                this._stateSelfClosingStartTag(cp);
882                break;
883            }
884            case State.BOGUS_COMMENT: {
885                this._stateBogusComment(cp);
886                break;
887            }
888            case State.MARKUP_DECLARATION_OPEN: {
889                this._stateMarkupDeclarationOpen(cp);
890                break;
891            }
892            case State.COMMENT_START: {
893                this._stateCommentStart(cp);
894                break;
895            }
896            case State.COMMENT_START_DASH: {
897                this._stateCommentStartDash(cp);
898                break;
899            }
900            case State.COMMENT: {
901                this._stateComment(cp);
902                break;
903            }
904            case State.COMMENT_LESS_THAN_SIGN: {
905                this._stateCommentLessThanSign(cp);
906                break;
907            }
908            case State.COMMENT_LESS_THAN_SIGN_BANG: {
909                this._stateCommentLessThanSignBang(cp);
910                break;
911            }
912            case State.COMMENT_LESS_THAN_SIGN_BANG_DASH: {
913                this._stateCommentLessThanSignBangDash(cp);
914                break;
915            }
916            case State.COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH: {
917                this._stateCommentLessThanSignBangDashDash(cp);
918                break;
919            }
920            case State.COMMENT_END_DASH: {
921                this._stateCommentEndDash(cp);
922                break;
923            }
924            case State.COMMENT_END: {
925                this._stateCommentEnd(cp);
926                break;
927            }
928            case State.COMMENT_END_BANG: {
929                this._stateCommentEndBang(cp);
930                break;
931            }
932            case State.DOCTYPE: {
933                this._stateDoctype(cp);
934                break;
935            }
936            case State.BEFORE_DOCTYPE_NAME: {
937                this._stateBeforeDoctypeName(cp);
938                break;
939            }
940            case State.DOCTYPE_NAME: {
941                this._stateDoctypeName(cp);
942                break;
943            }
944            case State.AFTER_DOCTYPE_NAME: {
945                this._stateAfterDoctypeName(cp);
946                break;
947            }
948            case State.AFTER_DOCTYPE_PUBLIC_KEYWORD: {
949                this._stateAfterDoctypePublicKeyword(cp);
950                break;
951            }
952            case State.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER: {
953                this._stateBeforeDoctypePublicIdentifier(cp);
954                break;
955            }
956            case State.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED: {
957                this._stateDoctypePublicIdentifierDoubleQuoted(cp);
958                break;
959            }
960            case State.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED: {
961                this._stateDoctypePublicIdentifierSingleQuoted(cp);
962                break;
963            }
964            case State.AFTER_DOCTYPE_PUBLIC_IDENTIFIER: {
965                this._stateAfterDoctypePublicIdentifier(cp);
966                break;
967            }
968            case State.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS: {
969                this._stateBetweenDoctypePublicAndSystemIdentifiers(cp);
970                break;
971            }
972            case State.AFTER_DOCTYPE_SYSTEM_KEYWORD: {
973                this._stateAfterDoctypeSystemKeyword(cp);
974                break;
975            }
976            case State.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER: {
977                this._stateBeforeDoctypeSystemIdentifier(cp);
978                break;
979            }
980            case State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED: {
981                this._stateDoctypeSystemIdentifierDoubleQuoted(cp);
982                break;
983            }
984            case State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED: {
985                this._stateDoctypeSystemIdentifierSingleQuoted(cp);
986                break;
987            }
988            case State.AFTER_DOCTYPE_SYSTEM_IDENTIFIER: {
989                this._stateAfterDoctypeSystemIdentifier(cp);
990                break;
991            }
992            case State.BOGUS_DOCTYPE: {
993                this._stateBogusDoctype(cp);
994                break;
995            }
996            case State.CDATA_SECTION: {
997                this._stateCdataSection(cp);
998                break;
999            }
1000            case State.CDATA_SECTION_BRACKET: {
1001                this._stateCdataSectionBracket(cp);
1002                break;
1003            }
1004            case State.CDATA_SECTION_END: {
1005                this._stateCdataSectionEnd(cp);
1006                break;
1007            }
1008            case State.CHARACTER_REFERENCE: {
1009                this._stateCharacterReference(cp);
1010                break;
1011            }
1012            case State.NAMED_CHARACTER_REFERENCE: {
1013                this._stateNamedCharacterReference(cp);
1014                break;
1015            }
1016            case State.AMBIGUOUS_AMPERSAND: {
1017                this._stateAmbiguousAmpersand(cp);
1018                break;
1019            }
1020            case State.NUMERIC_CHARACTER_REFERENCE: {
1021                this._stateNumericCharacterReference(cp);
1022                break;
1023            }
1024            case State.HEXADEMICAL_CHARACTER_REFERENCE_START: {
1025                this._stateHexademicalCharacterReferenceStart(cp);
1026                break;
1027            }
1028            case State.HEXADEMICAL_CHARACTER_REFERENCE: {
1029                this._stateHexademicalCharacterReference(cp);
1030                break;
1031            }
1032            case State.DECIMAL_CHARACTER_REFERENCE: {
1033                this._stateDecimalCharacterReference(cp);
1034                break;
1035            }
1036            case State.NUMERIC_CHARACTER_REFERENCE_END: {
1037                this._stateNumericCharacterReferenceEnd(cp);
1038                break;
1039            }
1040            default: {
1041                throw new Error('Unknown state');
1042            }
1043        }
1044    }
1045
1046    // State machine
1047
1048    // Data state
1049    //------------------------------------------------------------------
1050    private _stateData(cp: number): void {
1051        switch (cp) {
1052            case $.LESS_THAN_SIGN: {
1053                this.state = State.TAG_OPEN;
1054                break;
1055            }
1056            case $.AMPERSAND: {
1057                this.returnState = State.DATA;
1058                this.state = State.CHARACTER_REFERENCE;
1059                break;
1060            }
1061            case $.NULL: {
1062                this._err(ERR.unexpectedNullCharacter);
1063                this._emitCodePoint(cp);
1064                break;
1065            }
1066            case $.EOF: {
1067                this._emitEOFToken();
1068                break;
1069            }
1070            default: {
1071                this._emitCodePoint(cp);
1072            }
1073        }
1074    }
1075
1076    //  RCDATA state
1077    //------------------------------------------------------------------
1078    private _stateRcdata(cp: number): void {
1079        switch (cp) {
1080            case $.AMPERSAND: {
1081                this.returnState = State.RCDATA;
1082                this.state = State.CHARACTER_REFERENCE;
1083                break;
1084            }
1085            case $.LESS_THAN_SIGN: {
1086                this.state = State.RCDATA_LESS_THAN_SIGN;
1087                break;
1088            }
1089            case $.NULL: {
1090                this._err(ERR.unexpectedNullCharacter);
1091                this._emitChars(REPLACEMENT_CHARACTER);
1092                break;
1093            }
1094            case $.EOF: {
1095                this._emitEOFToken();
1096                break;
1097            }
1098            default: {
1099                this._emitCodePoint(cp);
1100            }
1101        }
1102    }
1103
1104    // RAWTEXT state
1105    //------------------------------------------------------------------
1106    private _stateRawtext(cp: number): void {
1107        switch (cp) {
1108            case $.LESS_THAN_SIGN: {
1109                this.state = State.RAWTEXT_LESS_THAN_SIGN;
1110                break;
1111            }
1112            case $.NULL: {
1113                this._err(ERR.unexpectedNullCharacter);
1114                this._emitChars(REPLACEMENT_CHARACTER);
1115                break;
1116            }
1117            case $.EOF: {
1118                this._emitEOFToken();
1119                break;
1120            }
1121            default: {
1122                this._emitCodePoint(cp);
1123            }
1124        }
1125    }
1126
1127    // Script data state
1128    //------------------------------------------------------------------
1129    private _stateScriptData(cp: number): void {
1130        switch (cp) {
1131            case $.LESS_THAN_SIGN: {
1132                this.state = State.SCRIPT_DATA_LESS_THAN_SIGN;
1133                break;
1134            }
1135            case $.NULL: {
1136                this._err(ERR.unexpectedNullCharacter);
1137                this._emitChars(REPLACEMENT_CHARACTER);
1138                break;
1139            }
1140            case $.EOF: {
1141                this._emitEOFToken();
1142                break;
1143            }
1144            default: {
1145                this._emitCodePoint(cp);
1146            }
1147        }
1148    }
1149
1150    // PLAINTEXT state
1151    //------------------------------------------------------------------
1152    private _statePlaintext(cp: number): void {
1153        switch (cp) {
1154            case $.NULL: {
1155                this._err(ERR.unexpectedNullCharacter);
1156                this._emitChars(REPLACEMENT_CHARACTER);
1157                break;
1158            }
1159            case $.EOF: {
1160                this._emitEOFToken();
1161                break;
1162            }
1163            default: {
1164                this._emitCodePoint(cp);
1165            }
1166        }
1167    }
1168
1169    // Tag open state
1170    //------------------------------------------------------------------
1171    private _stateTagOpen(cp: number): void {
1172        if (isAsciiLetter(cp)) {
1173            this._createStartTagToken();
1174            this.state = State.TAG_NAME;
1175            this._stateTagName(cp);
1176        } else
1177            switch (cp) {
1178                case $.EXCLAMATION_MARK: {
1179                    this.state = State.MARKUP_DECLARATION_OPEN;
1180                    break;
1181                }
1182                case $.SOLIDUS: {
1183                    this.state = State.END_TAG_OPEN;
1184                    break;
1185                }
1186                case $.QUESTION_MARK: {
1187                    this._err(ERR.unexpectedQuestionMarkInsteadOfTagName);
1188                    this._createCommentToken(1);
1189                    this.state = State.BOGUS_COMMENT;
1190                    this._stateBogusComment(cp);
1191                    break;
1192                }
1193                case $.EOF: {
1194                    this._err(ERR.eofBeforeTagName);
1195                    this._emitChars('<');
1196                    this._emitEOFToken();
1197                    break;
1198                }
1199                default: {
1200                    this._err(ERR.invalidFirstCharacterOfTagName);
1201                    this._emitChars('<');
1202                    this.state = State.DATA;
1203                    this._stateData(cp);
1204                }
1205            }
1206    }
1207
1208    // End tag open state
1209    //------------------------------------------------------------------
1210    private _stateEndTagOpen(cp: number): void {
1211        if (isAsciiLetter(cp)) {
1212            this._createEndTagToken();
1213            this.state = State.TAG_NAME;
1214            this._stateTagName(cp);
1215        } else
1216            switch (cp) {
1217                case $.GREATER_THAN_SIGN: {
1218                    this._err(ERR.missingEndTagName);
1219                    this.state = State.DATA;
1220                    break;
1221                }
1222                case $.EOF: {
1223                    this._err(ERR.eofBeforeTagName);
1224                    this._emitChars('</');
1225                    this._emitEOFToken();
1226                    break;
1227                }
1228                default: {
1229                    this._err(ERR.invalidFirstCharacterOfTagName);
1230                    this._createCommentToken(2);
1231                    this.state = State.BOGUS_COMMENT;
1232                    this._stateBogusComment(cp);
1233                }
1234            }
1235    }
1236
1237    // Tag name state
1238    //------------------------------------------------------------------
1239    private _stateTagName(cp: number): void {
1240        const token = this.currentToken as TagToken;
1241
1242        switch (cp) {
1243            case $.SPACE:
1244            case $.LINE_FEED:
1245            case $.TABULATION:
1246            case $.FORM_FEED: {
1247                this.state = State.BEFORE_ATTRIBUTE_NAME;
1248                break;
1249            }
1250            case $.SOLIDUS: {
1251                this.state = State.SELF_CLOSING_START_TAG;
1252                break;
1253            }
1254            case $.GREATER_THAN_SIGN: {
1255                this.state = State.DATA;
1256                this.emitCurrentTagToken();
1257                break;
1258            }
1259            case $.NULL: {
1260                this._err(ERR.unexpectedNullCharacter);
1261                token.tagName += REPLACEMENT_CHARACTER;
1262                break;
1263            }
1264            case $.EOF: {
1265                this._err(ERR.eofInTag);
1266                this._emitEOFToken();
1267                break;
1268            }
1269            default: {
1270                token.tagName += String.fromCodePoint(isAsciiUpper(cp) ? toAsciiLower(cp) : cp);
1271            }
1272        }
1273    }
1274
1275    // RCDATA less-than sign state
1276    //------------------------------------------------------------------
1277    private _stateRcdataLessThanSign(cp: number): void {
1278        if (cp === $.SOLIDUS) {
1279            this.state = State.RCDATA_END_TAG_OPEN;
1280        } else {
1281            this._emitChars('<');
1282            this.state = State.RCDATA;
1283            this._stateRcdata(cp);
1284        }
1285    }
1286
1287    // RCDATA end tag open state
1288    //------------------------------------------------------------------
1289    private _stateRcdataEndTagOpen(cp: number): void {
1290        if (isAsciiLetter(cp)) {
1291            this.state = State.RCDATA_END_TAG_NAME;
1292            this._stateRcdataEndTagName(cp);
1293        } else {
1294            this._emitChars('</');
1295            this.state = State.RCDATA;
1296            this._stateRcdata(cp);
1297        }
1298    }
1299
1300    private handleSpecialEndTag(_cp: number): boolean {
1301        if (!this.preprocessor.startsWith(this.lastStartTagName, false)) {
1302            return !this._ensureHibernation();
1303        }
1304
1305        this._createEndTagToken();
1306        const token = this.currentToken as TagToken;
1307        token.tagName = this.lastStartTagName;
1308
1309        const cp = this.preprocessor.peek(this.lastStartTagName.length);
1310
1311        switch (cp) {
1312            case $.SPACE:
1313            case $.LINE_FEED:
1314            case $.TABULATION:
1315            case $.FORM_FEED: {
1316                this._advanceBy(this.lastStartTagName.length);
1317                this.state = State.BEFORE_ATTRIBUTE_NAME;
1318                return false;
1319            }
1320            case $.SOLIDUS: {
1321                this._advanceBy(this.lastStartTagName.length);
1322                this.state = State.SELF_CLOSING_START_TAG;
1323                return false;
1324            }
1325            case $.GREATER_THAN_SIGN: {
1326                this._advanceBy(this.lastStartTagName.length);
1327                this.emitCurrentTagToken();
1328                this.state = State.DATA;
1329                return false;
1330            }
1331            default: {
1332                return !this._ensureHibernation();
1333            }
1334        }
1335    }
1336
1337    // RCDATA end tag name state
1338    //------------------------------------------------------------------
1339    private _stateRcdataEndTagName(cp: number): void {
1340        if (this.handleSpecialEndTag(cp)) {
1341            this._emitChars('</');
1342            this.state = State.RCDATA;
1343            this._stateRcdata(cp);
1344        }
1345    }
1346
1347    // RAWTEXT less-than sign state
1348    //------------------------------------------------------------------
1349    private _stateRawtextLessThanSign(cp: number): void {
1350        if (cp === $.SOLIDUS) {
1351            this.state = State.RAWTEXT_END_TAG_OPEN;
1352        } else {
1353            this._emitChars('<');
1354            this.state = State.RAWTEXT;
1355            this._stateRawtext(cp);
1356        }
1357    }
1358
1359    // RAWTEXT end tag open state
1360    //------------------------------------------------------------------
1361    private _stateRawtextEndTagOpen(cp: number): void {
1362        if (isAsciiLetter(cp)) {
1363            this.state = State.RAWTEXT_END_TAG_NAME;
1364            this._stateRawtextEndTagName(cp);
1365        } else {
1366            this._emitChars('</');
1367            this.state = State.RAWTEXT;
1368            this._stateRawtext(cp);
1369        }
1370    }
1371
1372    // RAWTEXT end tag name state
1373    //------------------------------------------------------------------
1374    private _stateRawtextEndTagName(cp: number): void {
1375        if (this.handleSpecialEndTag(cp)) {
1376            this._emitChars('</');
1377            this.state = State.RAWTEXT;
1378            this._stateRawtext(cp);
1379        }
1380    }
1381
1382    // Script data less-than sign state
1383    //------------------------------------------------------------------
1384    private _stateScriptDataLessThanSign(cp: number): void {
1385        switch (cp) {
1386            case $.SOLIDUS: {
1387                this.state = State.SCRIPT_DATA_END_TAG_OPEN;
1388                break;
1389            }
1390            case $.EXCLAMATION_MARK: {
1391                this.state = State.SCRIPT_DATA_ESCAPE_START;
1392                this._emitChars('<!');
1393                break;
1394            }
1395            default: {
1396                this._emitChars('<');
1397                this.state = State.SCRIPT_DATA;
1398                this._stateScriptData(cp);
1399            }
1400        }
1401    }
1402
1403    // Script data end tag open state
1404    //------------------------------------------------------------------
1405    private _stateScriptDataEndTagOpen(cp: number): void {
1406        if (isAsciiLetter(cp)) {
1407            this.state = State.SCRIPT_DATA_END_TAG_NAME;
1408            this._stateScriptDataEndTagName(cp);
1409        } else {
1410            this._emitChars('</');
1411            this.state = State.SCRIPT_DATA;
1412            this._stateScriptData(cp);
1413        }
1414    }
1415
1416    // Script data end tag name state
1417    //------------------------------------------------------------------
1418    private _stateScriptDataEndTagName(cp: number): void {
1419        if (this.handleSpecialEndTag(cp)) {
1420            this._emitChars('</');
1421            this.state = State.SCRIPT_DATA;
1422            this._stateScriptData(cp);
1423        }
1424    }
1425
1426    // Script data escape start state
1427    //------------------------------------------------------------------
1428    private _stateScriptDataEscapeStart(cp: number): void {
1429        if (cp === $.HYPHEN_MINUS) {
1430            this.state = State.SCRIPT_DATA_ESCAPE_START_DASH;
1431            this._emitChars('-');
1432        } else {
1433            this.state = State.SCRIPT_DATA;
1434            this._stateScriptData(cp);
1435        }
1436    }
1437
1438    // Script data escape start dash state
1439    //------------------------------------------------------------------
1440    private _stateScriptDataEscapeStartDash(cp: number): void {
1441        if (cp === $.HYPHEN_MINUS) {
1442            this.state = State.SCRIPT_DATA_ESCAPED_DASH_DASH;
1443            this._emitChars('-');
1444        } else {
1445            this.state = State.SCRIPT_DATA;
1446            this._stateScriptData(cp);
1447        }
1448    }
1449
1450    // Script data escaped state
1451    //------------------------------------------------------------------
1452    private _stateScriptDataEscaped(cp: number): void {
1453        switch (cp) {
1454            case $.HYPHEN_MINUS: {
1455                this.state = State.SCRIPT_DATA_ESCAPED_DASH;
1456                this._emitChars('-');
1457                break;
1458            }
1459            case $.LESS_THAN_SIGN: {
1460                this.state = State.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN;
1461                break;
1462            }
1463            case $.NULL: {
1464                this._err(ERR.unexpectedNullCharacter);
1465                this._emitChars(REPLACEMENT_CHARACTER);
1466                break;
1467            }
1468            case $.EOF: {
1469                this._err(ERR.eofInScriptHtmlCommentLikeText);
1470                this._emitEOFToken();
1471                break;
1472            }
1473            default: {
1474                this._emitCodePoint(cp);
1475            }
1476        }
1477    }
1478
1479    // Script data escaped dash state
1480    //------------------------------------------------------------------
1481    private _stateScriptDataEscapedDash(cp: number): void {
1482        switch (cp) {
1483            case $.HYPHEN_MINUS: {
1484                this.state = State.SCRIPT_DATA_ESCAPED_DASH_DASH;
1485                this._emitChars('-');
1486                break;
1487            }
1488            case $.LESS_THAN_SIGN: {
1489                this.state = State.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN;
1490                break;
1491            }
1492            case $.NULL: {
1493                this._err(ERR.unexpectedNullCharacter);
1494                this.state = State.SCRIPT_DATA_ESCAPED;
1495                this._emitChars(REPLACEMENT_CHARACTER);
1496                break;
1497            }
1498            case $.EOF: {
1499                this._err(ERR.eofInScriptHtmlCommentLikeText);
1500                this._emitEOFToken();
1501                break;
1502            }
1503            default: {
1504                this.state = State.SCRIPT_DATA_ESCAPED;
1505                this._emitCodePoint(cp);
1506            }
1507        }
1508    }
1509
1510    // Script data escaped dash dash state
1511    //------------------------------------------------------------------
1512    private _stateScriptDataEscapedDashDash(cp: number): void {
1513        switch (cp) {
1514            case $.HYPHEN_MINUS: {
1515                this._emitChars('-');
1516                break;
1517            }
1518            case $.LESS_THAN_SIGN: {
1519                this.state = State.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN;
1520                break;
1521            }
1522            case $.GREATER_THAN_SIGN: {
1523                this.state = State.SCRIPT_DATA;
1524                this._emitChars('>');
1525                break;
1526            }
1527            case $.NULL: {
1528                this._err(ERR.unexpectedNullCharacter);
1529                this.state = State.SCRIPT_DATA_ESCAPED;
1530                this._emitChars(REPLACEMENT_CHARACTER);
1531                break;
1532            }
1533            case $.EOF: {
1534                this._err(ERR.eofInScriptHtmlCommentLikeText);
1535                this._emitEOFToken();
1536                break;
1537            }
1538            default: {
1539                this.state = State.SCRIPT_DATA_ESCAPED;
1540                this._emitCodePoint(cp);
1541            }
1542        }
1543    }
1544
1545    // Script data escaped less-than sign state
1546    //------------------------------------------------------------------
1547    private _stateScriptDataEscapedLessThanSign(cp: number): void {
1548        if (cp === $.SOLIDUS) {
1549            this.state = State.SCRIPT_DATA_ESCAPED_END_TAG_OPEN;
1550        } else if (isAsciiLetter(cp)) {
1551            this._emitChars('<');
1552            this.state = State.SCRIPT_DATA_DOUBLE_ESCAPE_START;
1553            this._stateScriptDataDoubleEscapeStart(cp);
1554        } else {
1555            this._emitChars('<');
1556            this.state = State.SCRIPT_DATA_ESCAPED;
1557            this._stateScriptDataEscaped(cp);
1558        }
1559    }
1560
1561    // Script data escaped end tag open state
1562    //------------------------------------------------------------------
1563    private _stateScriptDataEscapedEndTagOpen(cp: number): void {
1564        if (isAsciiLetter(cp)) {
1565            this.state = State.SCRIPT_DATA_ESCAPED_END_TAG_NAME;
1566            this._stateScriptDataEscapedEndTagName(cp);
1567        } else {
1568            this._emitChars('</');
1569            this.state = State.SCRIPT_DATA_ESCAPED;
1570            this._stateScriptDataEscaped(cp);
1571        }
1572    }
1573
1574    // Script data escaped end tag name state
1575    //------------------------------------------------------------------
1576    private _stateScriptDataEscapedEndTagName(cp: number): void {
1577        if (this.handleSpecialEndTag(cp)) {
1578            this._emitChars('</');
1579            this.state = State.SCRIPT_DATA_ESCAPED;
1580            this._stateScriptDataEscaped(cp);
1581        }
1582    }
1583
1584    // Script data double escape start state
1585    //------------------------------------------------------------------
1586    private _stateScriptDataDoubleEscapeStart(cp: number): void {
1587        if (
1588            this.preprocessor.startsWith($$.SCRIPT, false) &&
1589            isScriptDataDoubleEscapeSequenceEnd(this.preprocessor.peek($$.SCRIPT.length))
1590        ) {
1591            this._emitCodePoint(cp);
1592            for (let i = 0; i < $$.SCRIPT.length; i++) {
1593                this._emitCodePoint(this._consume());
1594            }
1595
1596            this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED;
1597        } else if (!this._ensureHibernation()) {
1598            this.state = State.SCRIPT_DATA_ESCAPED;
1599            this._stateScriptDataEscaped(cp);
1600        }
1601    }
1602
1603    // Script data double escaped state
1604    //------------------------------------------------------------------
1605    private _stateScriptDataDoubleEscaped(cp: number): void {
1606        switch (cp) {
1607            case $.HYPHEN_MINUS: {
1608                this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED_DASH;
1609                this._emitChars('-');
1610                break;
1611            }
1612            case $.LESS_THAN_SIGN: {
1613                this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN;
1614                this._emitChars('<');
1615                break;
1616            }
1617            case $.NULL: {
1618                this._err(ERR.unexpectedNullCharacter);
1619                this._emitChars(REPLACEMENT_CHARACTER);
1620                break;
1621            }
1622            case $.EOF: {
1623                this._err(ERR.eofInScriptHtmlCommentLikeText);
1624                this._emitEOFToken();
1625                break;
1626            }
1627            default: {
1628                this._emitCodePoint(cp);
1629            }
1630        }
1631    }
1632
1633    // Script data double escaped dash state
1634    //------------------------------------------------------------------
1635    private _stateScriptDataDoubleEscapedDash(cp: number): void {
1636        switch (cp) {
1637            case $.HYPHEN_MINUS: {
1638                this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH;
1639                this._emitChars('-');
1640                break;
1641            }
1642            case $.LESS_THAN_SIGN: {
1643                this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN;
1644                this._emitChars('<');
1645                break;
1646            }
1647            case $.NULL: {
1648                this._err(ERR.unexpectedNullCharacter);
1649                this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED;
1650                this._emitChars(REPLACEMENT_CHARACTER);
1651                break;
1652            }
1653            case $.EOF: {
1654                this._err(ERR.eofInScriptHtmlCommentLikeText);
1655                this._emitEOFToken();
1656                break;
1657            }
1658            default: {
1659                this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED;
1660                this._emitCodePoint(cp);
1661            }
1662        }
1663    }
1664
1665    // Script data double escaped dash dash state
1666    //------------------------------------------------------------------
1667    private _stateScriptDataDoubleEscapedDashDash(cp: number): void {
1668        switch (cp) {
1669            case $.HYPHEN_MINUS: {
1670                this._emitChars('-');
1671                break;
1672            }
1673            case $.LESS_THAN_SIGN: {
1674                this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN;
1675                this._emitChars('<');
1676                break;
1677            }
1678            case $.GREATER_THAN_SIGN: {
1679                this.state = State.SCRIPT_DATA;
1680                this._emitChars('>');
1681                break;
1682            }
1683            case $.NULL: {
1684                this._err(ERR.unexpectedNullCharacter);
1685                this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED;
1686                this._emitChars(REPLACEMENT_CHARACTER);
1687                break;
1688            }
1689            case $.EOF: {
1690                this._err(ERR.eofInScriptHtmlCommentLikeText);
1691                this._emitEOFToken();
1692                break;
1693            }
1694            default: {
1695                this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED;
1696                this._emitCodePoint(cp);
1697            }
1698        }
1699    }
1700
1701    // Script data double escaped less-than sign state
1702    //------------------------------------------------------------------
1703    private _stateScriptDataDoubleEscapedLessThanSign(cp: number): void {
1704        if (cp === $.SOLIDUS) {
1705            this.state = State.SCRIPT_DATA_DOUBLE_ESCAPE_END;
1706            this._emitChars('/');
1707        } else {
1708            this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED;
1709            this._stateScriptDataDoubleEscaped(cp);
1710        }
1711    }
1712
1713    // Script data double escape end state
1714    //------------------------------------------------------------------
1715    private _stateScriptDataDoubleEscapeEnd(cp: number): void {
1716        if (
1717            this.preprocessor.startsWith($$.SCRIPT, false) &&
1718            isScriptDataDoubleEscapeSequenceEnd(this.preprocessor.peek($$.SCRIPT.length))
1719        ) {
1720            this._emitCodePoint(cp);
1721            for (let i = 0; i < $$.SCRIPT.length; i++) {
1722                this._emitCodePoint(this._consume());
1723            }
1724
1725            this.state = State.SCRIPT_DATA_ESCAPED;
1726        } else if (!this._ensureHibernation()) {
1727            this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED;
1728            this._stateScriptDataDoubleEscaped(cp);
1729        }
1730    }
1731
1732    // Before attribute name state
1733    //------------------------------------------------------------------
1734    private _stateBeforeAttributeName(cp: number): void {
1735        switch (cp) {
1736            case $.SPACE:
1737            case $.LINE_FEED:
1738            case $.TABULATION:
1739            case $.FORM_FEED: {
1740                // Ignore whitespace
1741                break;
1742            }
1743            case $.SOLIDUS:
1744            case $.GREATER_THAN_SIGN:
1745            case $.EOF: {
1746                this.state = State.AFTER_ATTRIBUTE_NAME;
1747                this._stateAfterAttributeName(cp);
1748                break;
1749            }
1750            case $.EQUALS_SIGN: {
1751                this._err(ERR.unexpectedEqualsSignBeforeAttributeName);
1752                this._createAttr('=');
1753                this.state = State.ATTRIBUTE_NAME;
1754                break;
1755            }
1756            default: {
1757                this._createAttr('');
1758                this.state = State.ATTRIBUTE_NAME;
1759                this._stateAttributeName(cp);
1760            }
1761        }
1762    }
1763
1764    // Attribute name state
1765    //------------------------------------------------------------------
1766    private _stateAttributeName(cp: number): void {
1767        switch (cp) {
1768            case $.SPACE:
1769            case $.LINE_FEED:
1770            case $.TABULATION:
1771            case $.FORM_FEED:
1772            case $.SOLIDUS:
1773            case $.GREATER_THAN_SIGN:
1774            case $.EOF: {
1775                this._leaveAttrName();
1776                this.state = State.AFTER_ATTRIBUTE_NAME;
1777                this._stateAfterAttributeName(cp);
1778                break;
1779            }
1780            case $.EQUALS_SIGN: {
1781                this._leaveAttrName();
1782                this.state = State.BEFORE_ATTRIBUTE_VALUE;
1783                break;
1784            }
1785            case $.QUOTATION_MARK:
1786            case $.APOSTROPHE:
1787            case $.LESS_THAN_SIGN: {
1788                this._err(ERR.unexpectedCharacterInAttributeName);
1789                this.currentAttr.name += String.fromCodePoint(cp);
1790                break;
1791            }
1792            case $.NULL: {
1793                this._err(ERR.unexpectedNullCharacter);
1794                this.currentAttr.name += REPLACEMENT_CHARACTER;
1795                break;
1796            }
1797            default: {
1798                this.currentAttr.name += String.fromCodePoint(isAsciiUpper(cp) ? toAsciiLower(cp) : cp);
1799            }
1800        }
1801    }
1802
1803    // After attribute name state
1804    //------------------------------------------------------------------
1805    private _stateAfterAttributeName(cp: number): void {
1806        switch (cp) {
1807            case $.SPACE:
1808            case $.LINE_FEED:
1809            case $.TABULATION:
1810            case $.FORM_FEED: {
1811                // Ignore whitespace
1812                break;
1813            }
1814            case $.SOLIDUS: {
1815                this.state = State.SELF_CLOSING_START_TAG;
1816                break;
1817            }
1818            case $.EQUALS_SIGN: {
1819                this.state = State.BEFORE_ATTRIBUTE_VALUE;
1820                break;
1821            }
1822            case $.GREATER_THAN_SIGN: {
1823                this.state = State.DATA;
1824                this.emitCurrentTagToken();
1825                break;
1826            }
1827            case $.EOF: {
1828                this._err(ERR.eofInTag);
1829                this._emitEOFToken();
1830                break;
1831            }
1832            default: {
1833                this._createAttr('');
1834                this.state = State.ATTRIBUTE_NAME;
1835                this._stateAttributeName(cp);
1836            }
1837        }
1838    }
1839
1840    // Before attribute value state
1841    //------------------------------------------------------------------
1842    private _stateBeforeAttributeValue(cp: number): void {
1843        switch (cp) {
1844            case $.SPACE:
1845            case $.LINE_FEED:
1846            case $.TABULATION:
1847            case $.FORM_FEED: {
1848                // Ignore whitespace
1849                break;
1850            }
1851            case $.QUOTATION_MARK: {
1852                this.state = State.ATTRIBUTE_VALUE_DOUBLE_QUOTED;
1853                break;
1854            }
1855            case $.APOSTROPHE: {
1856                this.state = State.ATTRIBUTE_VALUE_SINGLE_QUOTED;
1857                break;
1858            }
1859            case $.GREATER_THAN_SIGN: {
1860                this._err(ERR.missingAttributeValue);
1861                this.state = State.DATA;
1862                this.emitCurrentTagToken();
1863                break;
1864            }
1865            default: {
1866                this.state = State.ATTRIBUTE_VALUE_UNQUOTED;
1867                this._stateAttributeValueUnquoted(cp);
1868            }
1869        }
1870    }
1871
1872    // Attribute value (double-quoted) state
1873    //------------------------------------------------------------------
1874    private _stateAttributeValueDoubleQuoted(cp: number): void {
1875        switch (cp) {
1876            case $.QUOTATION_MARK: {
1877                this.state = State.AFTER_ATTRIBUTE_VALUE_QUOTED;
1878                break;
1879            }
1880            case $.AMPERSAND: {
1881                this.returnState = State.ATTRIBUTE_VALUE_DOUBLE_QUOTED;
1882                this.state = State.CHARACTER_REFERENCE;
1883                break;
1884            }
1885            case $.NULL: {
1886                this._err(ERR.unexpectedNullCharacter);
1887                this.currentAttr.value += REPLACEMENT_CHARACTER;
1888                break;
1889            }
1890            case $.EOF: {
1891                this._err(ERR.eofInTag);
1892                this._emitEOFToken();
1893                break;
1894            }
1895            default: {
1896                this.currentAttr.value += String.fromCodePoint(cp);
1897            }
1898        }
1899    }
1900
1901    // Attribute value (single-quoted) state
1902    //------------------------------------------------------------------
1903    private _stateAttributeValueSingleQuoted(cp: number): void {
1904        switch (cp) {
1905            case $.APOSTROPHE: {
1906                this.state = State.AFTER_ATTRIBUTE_VALUE_QUOTED;
1907                break;
1908            }
1909            case $.AMPERSAND: {
1910                this.returnState = State.ATTRIBUTE_VALUE_SINGLE_QUOTED;
1911                this.state = State.CHARACTER_REFERENCE;
1912                break;
1913            }
1914            case $.NULL: {
1915                this._err(ERR.unexpectedNullCharacter);
1916                this.currentAttr.value += REPLACEMENT_CHARACTER;
1917                break;
1918            }
1919            case $.EOF: {
1920                this._err(ERR.eofInTag);
1921                this._emitEOFToken();
1922                break;
1923            }
1924            default: {
1925                this.currentAttr.value += String.fromCodePoint(cp);
1926            }
1927        }
1928    }
1929
1930    // Attribute value (unquoted) state
1931    //------------------------------------------------------------------
1932    private _stateAttributeValueUnquoted(cp: number): void {
1933        switch (cp) {
1934            case $.SPACE:
1935            case $.LINE_FEED:
1936            case $.TABULATION:
1937            case $.FORM_FEED: {
1938                this._leaveAttrValue();
1939                this.state = State.BEFORE_ATTRIBUTE_NAME;
1940                break;
1941            }
1942            case $.AMPERSAND: {
1943                this.returnState = State.ATTRIBUTE_VALUE_UNQUOTED;
1944                this.state = State.CHARACTER_REFERENCE;
1945                break;
1946            }
1947            case $.GREATER_THAN_SIGN: {
1948                this._leaveAttrValue();
1949                this.state = State.DATA;
1950                this.emitCurrentTagToken();
1951                break;
1952            }
1953            case $.NULL: {
1954                this._err(ERR.unexpectedNullCharacter);
1955                this.currentAttr.value += REPLACEMENT_CHARACTER;
1956                break;
1957            }
1958            case $.QUOTATION_MARK:
1959            case $.APOSTROPHE:
1960            case $.LESS_THAN_SIGN:
1961            case $.EQUALS_SIGN:
1962            case $.GRAVE_ACCENT: {
1963                this._err(ERR.unexpectedCharacterInUnquotedAttributeValue);
1964                this.currentAttr.value += String.fromCodePoint(cp);
1965                break;
1966            }
1967            case $.EOF: {
1968                this._err(ERR.eofInTag);
1969                this._emitEOFToken();
1970                break;
1971            }
1972            default: {
1973                this.currentAttr.value += String.fromCodePoint(cp);
1974            }
1975        }
1976    }
1977
1978    // After attribute value (quoted) state
1979    //------------------------------------------------------------------
1980    private _stateAfterAttributeValueQuoted(cp: number): void {
1981        switch (cp) {
1982            case $.SPACE:
1983            case $.LINE_FEED:
1984            case $.TABULATION:
1985            case $.FORM_FEED: {
1986                this._leaveAttrValue();
1987                this.state = State.BEFORE_ATTRIBUTE_NAME;
1988                break;
1989            }
1990            case $.SOLIDUS: {
1991                this._leaveAttrValue();
1992                this.state = State.SELF_CLOSING_START_TAG;
1993                break;
1994            }
1995            case $.GREATER_THAN_SIGN: {
1996                this._leaveAttrValue();
1997                this.state = State.DATA;
1998                this.emitCurrentTagToken();
1999                break;
2000            }
2001            case $.EOF: {
2002                this._err(ERR.eofInTag);
2003                this._emitEOFToken();
2004                break;
2005            }
2006            default: {
2007                this._err(ERR.missingWhitespaceBetweenAttributes);
2008                this.state = State.BEFORE_ATTRIBUTE_NAME;
2009                this._stateBeforeAttributeName(cp);
2010            }
2011        }
2012    }
2013
2014    // Self-closing start tag state
2015    //------------------------------------------------------------------
2016    private _stateSelfClosingStartTag(cp: number): void {
2017        switch (cp) {
2018            case $.GREATER_THAN_SIGN: {
2019                const token = this.currentToken as TagToken;
2020                token.selfClosing = true;
2021                this.state = State.DATA;
2022                this.emitCurrentTagToken();
2023                break;
2024            }
2025            case $.EOF: {
2026                this._err(ERR.eofInTag);
2027                this._emitEOFToken();
2028                break;
2029            }
2030            default: {
2031                this._err(ERR.unexpectedSolidusInTag);
2032                this.state = State.BEFORE_ATTRIBUTE_NAME;
2033                this._stateBeforeAttributeName(cp);
2034            }
2035        }
2036    }
2037
2038    // Bogus comment state
2039    //------------------------------------------------------------------
2040    private _stateBogusComment(cp: number): void {
2041        const token = this.currentToken as CommentToken;
2042
2043        switch (cp) {
2044            case $.GREATER_THAN_SIGN: {
2045                this.state = State.DATA;
2046                this.emitCurrentComment(token);
2047                break;
2048            }
2049            case $.EOF: {
2050                this.emitCurrentComment(token);
2051                this._emitEOFToken();
2052                break;
2053            }
2054            case $.NULL: {
2055                this._err(ERR.unexpectedNullCharacter);
2056                token.data += REPLACEMENT_CHARACTER;
2057                break;
2058            }
2059            default: {
2060                token.data += String.fromCodePoint(cp);
2061            }
2062        }
2063    }
2064
2065    // Markup declaration open state
2066    //------------------------------------------------------------------
2067    private _stateMarkupDeclarationOpen(cp: number): void {
2068        if (this._consumeSequenceIfMatch($$.DASH_DASH, true)) {
2069            this._createCommentToken($$.DASH_DASH.length + 1);
2070            this.state = State.COMMENT_START;
2071        } else if (this._consumeSequenceIfMatch($$.DOCTYPE, false)) {
2072            // NOTE: Doctypes tokens are created without fixed offsets. We keep track of the moment a doctype *might* start here.
2073            this.currentLocation = this.getCurrentLocation($$.DOCTYPE.length + 1);
2074            this.state = State.DOCTYPE;
2075        } else if (this._consumeSequenceIfMatch($$.CDATA_START, true)) {
2076            if (this.inForeignNode) {
2077                this.state = State.CDATA_SECTION;
2078            } else {
2079                this._err(ERR.cdataInHtmlContent);
2080                this._createCommentToken($$.CDATA_START.length + 1);
2081                (this.currentToken as CommentToken).data = '[CDATA[';
2082                this.state = State.BOGUS_COMMENT;
2083            }
2084        }
2085
2086        //NOTE: Sequence lookups can be abrupted by hibernation. In that case, lookup
2087        //results are no longer valid and we will need to start over.
2088        else if (!this._ensureHibernation()) {
2089            this._err(ERR.incorrectlyOpenedComment);
2090            this._createCommentToken(2);
2091            this.state = State.BOGUS_COMMENT;
2092            this._stateBogusComment(cp);
2093        }
2094    }
2095
2096    // Comment start state
2097    //------------------------------------------------------------------
2098    private _stateCommentStart(cp: number): void {
2099        switch (cp) {
2100            case $.HYPHEN_MINUS: {
2101                this.state = State.COMMENT_START_DASH;
2102                break;
2103            }
2104            case $.GREATER_THAN_SIGN: {
2105                this._err(ERR.abruptClosingOfEmptyComment);
2106                this.state = State.DATA;
2107                const token = this.currentToken as CommentToken;
2108                this.emitCurrentComment(token);
2109                break;
2110            }
2111            default: {
2112                this.state = State.COMMENT;
2113                this._stateComment(cp);
2114            }
2115        }
2116    }
2117
2118    // Comment start dash state
2119    //------------------------------------------------------------------
2120    private _stateCommentStartDash(cp: number): void {
2121        const token = this.currentToken as CommentToken;
2122        switch (cp) {
2123            case $.HYPHEN_MINUS: {
2124                this.state = State.COMMENT_END;
2125                break;
2126            }
2127            case $.GREATER_THAN_SIGN: {
2128                this._err(ERR.abruptClosingOfEmptyComment);
2129                this.state = State.DATA;
2130                this.emitCurrentComment(token);
2131                break;
2132            }
2133            case $.EOF: {
2134                this._err(ERR.eofInComment);
2135                this.emitCurrentComment(token);
2136                this._emitEOFToken();
2137                break;
2138            }
2139            default: {
2140                token.data += '-';
2141                this.state = State.COMMENT;
2142                this._stateComment(cp);
2143            }
2144        }
2145    }
2146
2147    // Comment state
2148    //------------------------------------------------------------------
2149    private _stateComment(cp: number): void {
2150        const token = this.currentToken as CommentToken;
2151
2152        switch (cp) {
2153            case $.HYPHEN_MINUS: {
2154                this.state = State.COMMENT_END_DASH;
2155                break;
2156            }
2157            case $.LESS_THAN_SIGN: {
2158                token.data += '<';
2159                this.state = State.COMMENT_LESS_THAN_SIGN;
2160                break;
2161            }
2162            case $.NULL: {
2163                this._err(ERR.unexpectedNullCharacter);
2164                token.data += REPLACEMENT_CHARACTER;
2165                break;
2166            }
2167            case $.EOF: {
2168                this._err(ERR.eofInComment);
2169                this.emitCurrentComment(token);
2170                this._emitEOFToken();
2171                break;
2172            }
2173            default: {
2174                token.data += String.fromCodePoint(cp);
2175            }
2176        }
2177    }
2178
2179    // Comment less-than sign state
2180    //------------------------------------------------------------------
2181    private _stateCommentLessThanSign(cp: number): void {
2182        const token = this.currentToken as CommentToken;
2183
2184        switch (cp) {
2185            case $.EXCLAMATION_MARK: {
2186                token.data += '!';
2187                this.state = State.COMMENT_LESS_THAN_SIGN_BANG;
2188                break;
2189            }
2190            case $.LESS_THAN_SIGN: {
2191                token.data += '<';
2192                break;
2193            }
2194            default: {
2195                this.state = State.COMMENT;
2196                this._stateComment(cp);
2197            }
2198        }
2199    }
2200
2201    // Comment less-than sign bang state
2202    //------------------------------------------------------------------
2203    private _stateCommentLessThanSignBang(cp: number): void {
2204        if (cp === $.HYPHEN_MINUS) {
2205            this.state = State.COMMENT_LESS_THAN_SIGN_BANG_DASH;
2206        } else {
2207            this.state = State.COMMENT;
2208            this._stateComment(cp);
2209        }
2210    }
2211
2212    // Comment less-than sign bang dash state
2213    //------------------------------------------------------------------
2214    private _stateCommentLessThanSignBangDash(cp: number): void {
2215        if (cp === $.HYPHEN_MINUS) {
2216            this.state = State.COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH;
2217        } else {
2218            this.state = State.COMMENT_END_DASH;
2219            this._stateCommentEndDash(cp);
2220        }
2221    }
2222
2223    // Comment less-than sign bang dash dash state
2224    //------------------------------------------------------------------
2225    private _stateCommentLessThanSignBangDashDash(cp: number): void {
2226        if (cp !== $.GREATER_THAN_SIGN && cp !== $.EOF) {
2227            this._err(ERR.nestedComment);
2228        }
2229
2230        this.state = State.COMMENT_END;
2231        this._stateCommentEnd(cp);
2232    }
2233
2234    // Comment end dash state
2235    //------------------------------------------------------------------
2236    private _stateCommentEndDash(cp: number): void {
2237        const token = this.currentToken as CommentToken;
2238        switch (cp) {
2239            case $.HYPHEN_MINUS: {
2240                this.state = State.COMMENT_END;
2241                break;
2242            }
2243            case $.EOF: {
2244                this._err(ERR.eofInComment);
2245                this.emitCurrentComment(token);
2246                this._emitEOFToken();
2247                break;
2248            }
2249            default: {
2250                token.data += '-';
2251                this.state = State.COMMENT;
2252                this._stateComment(cp);
2253            }
2254        }
2255    }
2256
2257    // Comment end state
2258    //------------------------------------------------------------------
2259    private _stateCommentEnd(cp: number): void {
2260        const token = this.currentToken as CommentToken;
2261
2262        switch (cp) {
2263            case $.GREATER_THAN_SIGN: {
2264                this.state = State.DATA;
2265                this.emitCurrentComment(token);
2266                break;
2267            }
2268            case $.EXCLAMATION_MARK: {
2269                this.state = State.COMMENT_END_BANG;
2270                break;
2271            }
2272            case $.HYPHEN_MINUS: {
2273                token.data += '-';
2274                break;
2275            }
2276            case $.EOF: {
2277                this._err(ERR.eofInComment);
2278                this.emitCurrentComment(token);
2279                this._emitEOFToken();
2280                break;
2281            }
2282            default: {
2283                token.data += '--';
2284                this.state = State.COMMENT;
2285                this._stateComment(cp);
2286            }
2287        }
2288    }
2289
2290    // Comment end bang state
2291    //------------------------------------------------------------------
2292    private _stateCommentEndBang(cp: number): void {
2293        const token = this.currentToken as CommentToken;
2294
2295        switch (cp) {
2296            case $.HYPHEN_MINUS: {
2297                token.data += '--!';
2298                this.state = State.COMMENT_END_DASH;
2299                break;
2300            }
2301            case $.GREATER_THAN_SIGN: {
2302                this._err(ERR.incorrectlyClosedComment);
2303                this.state = State.DATA;
2304                this.emitCurrentComment(token);
2305                break;
2306            }
2307            case $.EOF: {
2308                this._err(ERR.eofInComment);
2309                this.emitCurrentComment(token);
2310                this._emitEOFToken();
2311                break;
2312            }
2313            default: {
2314                token.data += '--!';
2315                this.state = State.COMMENT;
2316                this._stateComment(cp);
2317            }
2318        }
2319    }
2320
2321    // DOCTYPE state
2322    //------------------------------------------------------------------
2323    private _stateDoctype(cp: number): void {
2324        switch (cp) {
2325            case $.SPACE:
2326            case $.LINE_FEED:
2327            case $.TABULATION:
2328            case $.FORM_FEED: {
2329                this.state = State.BEFORE_DOCTYPE_NAME;
2330                break;
2331            }
2332            case $.GREATER_THAN_SIGN: {
2333                this.state = State.BEFORE_DOCTYPE_NAME;
2334                this._stateBeforeDoctypeName(cp);
2335                break;
2336            }
2337            case $.EOF: {
2338                this._err(ERR.eofInDoctype);
2339                this._createDoctypeToken(null);
2340                const token = this.currentToken as DoctypeToken;
2341                token.forceQuirks = true;
2342                this.emitCurrentDoctype(token);
2343                this._emitEOFToken();
2344                break;
2345            }
2346            default: {
2347                this._err(ERR.missingWhitespaceBeforeDoctypeName);
2348                this.state = State.BEFORE_DOCTYPE_NAME;
2349                this._stateBeforeDoctypeName(cp);
2350            }
2351        }
2352    }
2353
2354    // Before DOCTYPE name state
2355    //------------------------------------------------------------------
2356    private _stateBeforeDoctypeName(cp: number): void {
2357        if (isAsciiUpper(cp)) {
2358            this._createDoctypeToken(String.fromCharCode(toAsciiLower(cp)));
2359            this.state = State.DOCTYPE_NAME;
2360        } else
2361            switch (cp) {
2362                case $.SPACE:
2363                case $.LINE_FEED:
2364                case $.TABULATION:
2365                case $.FORM_FEED: {
2366                    // Ignore whitespace
2367                    break;
2368                }
2369                case $.NULL: {
2370                    this._err(ERR.unexpectedNullCharacter);
2371                    this._createDoctypeToken(REPLACEMENT_CHARACTER);
2372                    this.state = State.DOCTYPE_NAME;
2373                    break;
2374                }
2375                case $.GREATER_THAN_SIGN: {
2376                    this._err(ERR.missingDoctypeName);
2377                    this._createDoctypeToken(null);
2378                    const token = this.currentToken as DoctypeToken;
2379                    token.forceQuirks = true;
2380                    this.emitCurrentDoctype(token);
2381                    this.state = State.DATA;
2382                    break;
2383                }
2384                case $.EOF: {
2385                    this._err(ERR.eofInDoctype);
2386                    this._createDoctypeToken(null);
2387                    const token = this.currentToken as DoctypeToken;
2388                    token.forceQuirks = true;
2389                    this.emitCurrentDoctype(token);
2390                    this._emitEOFToken();
2391                    break;
2392                }
2393                default: {
2394                    this._createDoctypeToken(String.fromCodePoint(cp));
2395                    this.state = State.DOCTYPE_NAME;
2396                }
2397            }
2398    }
2399
2400    // DOCTYPE name state
2401    //------------------------------------------------------------------
2402    private _stateDoctypeName(cp: number): void {
2403        const token = this.currentToken as DoctypeToken;
2404
2405        switch (cp) {
2406            case $.SPACE:
2407            case $.LINE_FEED:
2408            case $.TABULATION:
2409            case $.FORM_FEED: {
2410                this.state = State.AFTER_DOCTYPE_NAME;
2411                break;
2412            }
2413            case $.GREATER_THAN_SIGN: {
2414                this.state = State.DATA;
2415                this.emitCurrentDoctype(token);
2416                break;
2417            }
2418            case $.NULL: {
2419                this._err(ERR.unexpectedNullCharacter);
2420                token.name += REPLACEMENT_CHARACTER;
2421                break;
2422            }
2423            case $.EOF: {
2424                this._err(ERR.eofInDoctype);
2425                token.forceQuirks = true;
2426                this.emitCurrentDoctype(token);
2427                this._emitEOFToken();
2428                break;
2429            }
2430            default: {
2431                token.name += String.fromCodePoint(isAsciiUpper(cp) ? toAsciiLower(cp) : cp);
2432            }
2433        }
2434    }
2435
2436    // After DOCTYPE name state
2437    //------------------------------------------------------------------
2438    private _stateAfterDoctypeName(cp: number): void {
2439        const token = this.currentToken as DoctypeToken;
2440
2441        switch (cp) {
2442            case $.SPACE:
2443            case $.LINE_FEED:
2444            case $.TABULATION:
2445            case $.FORM_FEED: {
2446                // Ignore whitespace
2447                break;
2448            }
2449            case $.GREATER_THAN_SIGN: {
2450                this.state = State.DATA;
2451                this.emitCurrentDoctype(token);
2452                break;
2453            }
2454            case $.EOF: {
2455                this._err(ERR.eofInDoctype);
2456                token.forceQuirks = true;
2457                this.emitCurrentDoctype(token);
2458                this._emitEOFToken();
2459                break;
2460            }
2461            default: {
2462                if (this._consumeSequenceIfMatch($$.PUBLIC, false)) {
2463                    this.state = State.AFTER_DOCTYPE_PUBLIC_KEYWORD;
2464                } else if (this._consumeSequenceIfMatch($$.SYSTEM, false)) {
2465                    this.state = State.AFTER_DOCTYPE_SYSTEM_KEYWORD;
2466                }
2467                //NOTE: sequence lookup can be abrupted by hibernation. In that case lookup
2468                //results are no longer valid and we will need to start over.
2469                else if (!this._ensureHibernation()) {
2470                    this._err(ERR.invalidCharacterSequenceAfterDoctypeName);
2471                    token.forceQuirks = true;
2472                    this.state = State.BOGUS_DOCTYPE;
2473                    this._stateBogusDoctype(cp);
2474                }
2475            }
2476        }
2477    }
2478
2479    // After DOCTYPE public keyword state
2480    //------------------------------------------------------------------
2481    private _stateAfterDoctypePublicKeyword(cp: number): void {
2482        const token = this.currentToken as DoctypeToken;
2483
2484        switch (cp) {
2485            case $.SPACE:
2486            case $.LINE_FEED:
2487            case $.TABULATION:
2488            case $.FORM_FEED: {
2489                this.state = State.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER;
2490                break;
2491            }
2492            case $.QUOTATION_MARK: {
2493                this._err(ERR.missingWhitespaceAfterDoctypePublicKeyword);
2494                token.publicId = '';
2495                this.state = State.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED;
2496                break;
2497            }
2498            case $.APOSTROPHE: {
2499                this._err(ERR.missingWhitespaceAfterDoctypePublicKeyword);
2500                token.publicId = '';
2501                this.state = State.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED;
2502                break;
2503            }
2504            case $.GREATER_THAN_SIGN: {
2505                this._err(ERR.missingDoctypePublicIdentifier);
2506                token.forceQuirks = true;
2507                this.state = State.DATA;
2508                this.emitCurrentDoctype(token);
2509                break;
2510            }
2511            case $.EOF: {
2512                this._err(ERR.eofInDoctype);
2513                token.forceQuirks = true;
2514                this.emitCurrentDoctype(token);
2515                this._emitEOFToken();
2516                break;
2517            }
2518            default: {
2519                this._err(ERR.missingQuoteBeforeDoctypePublicIdentifier);
2520                token.forceQuirks = true;
2521                this.state = State.BOGUS_DOCTYPE;
2522                this._stateBogusDoctype(cp);
2523            }
2524        }
2525    }
2526
2527    // Before DOCTYPE public identifier state
2528    //------------------------------------------------------------------
2529    private _stateBeforeDoctypePublicIdentifier(cp: number): void {
2530        const token = this.currentToken as DoctypeToken;
2531
2532        switch (cp) {
2533            case $.SPACE:
2534            case $.LINE_FEED:
2535            case $.TABULATION:
2536            case $.FORM_FEED: {
2537                // Ignore whitespace
2538                break;
2539            }
2540            case $.QUOTATION_MARK: {
2541                token.publicId = '';
2542                this.state = State.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED;
2543                break;
2544            }
2545            case $.APOSTROPHE: {
2546                token.publicId = '';
2547                this.state = State.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED;
2548                break;
2549            }
2550            case $.GREATER_THAN_SIGN: {
2551                this._err(ERR.missingDoctypePublicIdentifier);
2552                token.forceQuirks = true;
2553                this.state = State.DATA;
2554                this.emitCurrentDoctype(token);
2555                break;
2556            }
2557            case $.EOF: {
2558                this._err(ERR.eofInDoctype);
2559                token.forceQuirks = true;
2560                this.emitCurrentDoctype(token);
2561                this._emitEOFToken();
2562                break;
2563            }
2564            default: {
2565                this._err(ERR.missingQuoteBeforeDoctypePublicIdentifier);
2566                token.forceQuirks = true;
2567                this.state = State.BOGUS_DOCTYPE;
2568                this._stateBogusDoctype(cp);
2569            }
2570        }
2571    }
2572
2573    // DOCTYPE public identifier (double-quoted) state
2574    //------------------------------------------------------------------
2575    private _stateDoctypePublicIdentifierDoubleQuoted(cp: number): void {
2576        const token = this.currentToken as DoctypeToken;
2577
2578        switch (cp) {
2579            case $.QUOTATION_MARK: {
2580                this.state = State.AFTER_DOCTYPE_PUBLIC_IDENTIFIER;
2581                break;
2582            }
2583            case $.NULL: {
2584                this._err(ERR.unexpectedNullCharacter);
2585                token.publicId += REPLACEMENT_CHARACTER;
2586                break;
2587            }
2588            case $.GREATER_THAN_SIGN: {
2589                this._err(ERR.abruptDoctypePublicIdentifier);
2590                token.forceQuirks = true;
2591                this.emitCurrentDoctype(token);
2592                this.state = State.DATA;
2593                break;
2594            }
2595            case $.EOF: {
2596                this._err(ERR.eofInDoctype);
2597                token.forceQuirks = true;
2598                this.emitCurrentDoctype(token);
2599                this._emitEOFToken();
2600                break;
2601            }
2602            default: {
2603                token.publicId += String.fromCodePoint(cp);
2604            }
2605        }
2606    }
2607
2608    // DOCTYPE public identifier (single-quoted) state
2609    //------------------------------------------------------------------
2610    private _stateDoctypePublicIdentifierSingleQuoted(cp: number): void {
2611        const token = this.currentToken as DoctypeToken;
2612
2613        switch (cp) {
2614            case $.APOSTROPHE: {
2615                this.state = State.AFTER_DOCTYPE_PUBLIC_IDENTIFIER;
2616                break;
2617            }
2618            case $.NULL: {
2619                this._err(ERR.unexpectedNullCharacter);
2620                token.publicId += REPLACEMENT_CHARACTER;
2621                break;
2622            }
2623            case $.GREATER_THAN_SIGN: {
2624                this._err(ERR.abruptDoctypePublicIdentifier);
2625                token.forceQuirks = true;
2626                this.emitCurrentDoctype(token);
2627                this.state = State.DATA;
2628                break;
2629            }
2630            case $.EOF: {
2631                this._err(ERR.eofInDoctype);
2632                token.forceQuirks = true;
2633                this.emitCurrentDoctype(token);
2634                this._emitEOFToken();
2635                break;
2636            }
2637            default: {
2638                token.publicId += String.fromCodePoint(cp);
2639            }
2640        }
2641    }
2642
2643    // After DOCTYPE public identifier state
2644    //------------------------------------------------------------------
2645    private _stateAfterDoctypePublicIdentifier(cp: number): void {
2646        const token = this.currentToken as DoctypeToken;
2647
2648        switch (cp) {
2649            case $.SPACE:
2650            case $.LINE_FEED:
2651            case $.TABULATION:
2652            case $.FORM_FEED: {
2653                this.state = State.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS;
2654                break;
2655            }
2656            case $.GREATER_THAN_SIGN: {
2657                this.state = State.DATA;
2658                this.emitCurrentDoctype(token);
2659                break;
2660            }
2661            case $.QUOTATION_MARK: {
2662                this._err(ERR.missingWhitespaceBetweenDoctypePublicAndSystemIdentifiers);
2663                token.systemId = '';
2664                this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
2665                break;
2666            }
2667            case $.APOSTROPHE: {
2668                this._err(ERR.missingWhitespaceBetweenDoctypePublicAndSystemIdentifiers);
2669                token.systemId = '';
2670                this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
2671                break;
2672            }
2673            case $.EOF: {
2674                this._err(ERR.eofInDoctype);
2675                token.forceQuirks = true;
2676                this.emitCurrentDoctype(token);
2677                this._emitEOFToken();
2678                break;
2679            }
2680            default: {
2681                this._err(ERR.missingQuoteBeforeDoctypeSystemIdentifier);
2682                token.forceQuirks = true;
2683                this.state = State.BOGUS_DOCTYPE;
2684                this._stateBogusDoctype(cp);
2685            }
2686        }
2687    }
2688
2689    // Between DOCTYPE public and system identifiers state
2690    //------------------------------------------------------------------
2691    private _stateBetweenDoctypePublicAndSystemIdentifiers(cp: number): void {
2692        const token = this.currentToken as DoctypeToken;
2693
2694        switch (cp) {
2695            case $.SPACE:
2696            case $.LINE_FEED:
2697            case $.TABULATION:
2698            case $.FORM_FEED: {
2699                // Ignore whitespace
2700                break;
2701            }
2702            case $.GREATER_THAN_SIGN: {
2703                this.emitCurrentDoctype(token);
2704                this.state = State.DATA;
2705                break;
2706            }
2707            case $.QUOTATION_MARK: {
2708                token.systemId = '';
2709                this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
2710                break;
2711            }
2712            case $.APOSTROPHE: {
2713                token.systemId = '';
2714                this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
2715                break;
2716            }
2717            case $.EOF: {
2718                this._err(ERR.eofInDoctype);
2719                token.forceQuirks = true;
2720                this.emitCurrentDoctype(token);
2721                this._emitEOFToken();
2722                break;
2723            }
2724            default: {
2725                this._err(ERR.missingQuoteBeforeDoctypeSystemIdentifier);
2726                token.forceQuirks = true;
2727                this.state = State.BOGUS_DOCTYPE;
2728                this._stateBogusDoctype(cp);
2729            }
2730        }
2731    }
2732
2733    // After DOCTYPE system keyword state
2734    //------------------------------------------------------------------
2735    private _stateAfterDoctypeSystemKeyword(cp: number): void {
2736        const token = this.currentToken as DoctypeToken;
2737
2738        switch (cp) {
2739            case $.SPACE:
2740            case $.LINE_FEED:
2741            case $.TABULATION:
2742            case $.FORM_FEED: {
2743                this.state = State.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER;
2744                break;
2745            }
2746            case $.QUOTATION_MARK: {
2747                this._err(ERR.missingWhitespaceAfterDoctypeSystemKeyword);
2748                token.systemId = '';
2749                this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
2750                break;
2751            }
2752            case $.APOSTROPHE: {
2753                this._err(ERR.missingWhitespaceAfterDoctypeSystemKeyword);
2754                token.systemId = '';
2755                this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
2756                break;
2757            }
2758            case $.GREATER_THAN_SIGN: {
2759                this._err(ERR.missingDoctypeSystemIdentifier);
2760                token.forceQuirks = true;
2761                this.state = State.DATA;
2762                this.emitCurrentDoctype(token);
2763                break;
2764            }
2765            case $.EOF: {
2766                this._err(ERR.eofInDoctype);
2767                token.forceQuirks = true;
2768                this.emitCurrentDoctype(token);
2769                this._emitEOFToken();
2770                break;
2771            }
2772            default: {
2773                this._err(ERR.missingQuoteBeforeDoctypeSystemIdentifier);
2774                token.forceQuirks = true;
2775                this.state = State.BOGUS_DOCTYPE;
2776                this._stateBogusDoctype(cp);
2777            }
2778        }
2779    }
2780
2781    // Before DOCTYPE system identifier state
2782    //------------------------------------------------------------------
2783    private _stateBeforeDoctypeSystemIdentifier(cp: number): void {
2784        const token = this.currentToken as DoctypeToken;
2785
2786        switch (cp) {
2787            case $.SPACE:
2788            case $.LINE_FEED:
2789            case $.TABULATION:
2790            case $.FORM_FEED: {
2791                // Ignore whitespace
2792                break;
2793            }
2794            case $.QUOTATION_MARK: {
2795                token.systemId = '';
2796                this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
2797                break;
2798            }
2799            case $.APOSTROPHE: {
2800                token.systemId = '';
2801                this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
2802                break;
2803            }
2804            case $.GREATER_THAN_SIGN: {
2805                this._err(ERR.missingDoctypeSystemIdentifier);
2806                token.forceQuirks = true;
2807                this.state = State.DATA;
2808                this.emitCurrentDoctype(token);
2809                break;
2810            }
2811            case $.EOF: {
2812                this._err(ERR.eofInDoctype);
2813                token.forceQuirks = true;
2814                this.emitCurrentDoctype(token);
2815                this._emitEOFToken();
2816                break;
2817            }
2818            default: {
2819                this._err(ERR.missingQuoteBeforeDoctypeSystemIdentifier);
2820                token.forceQuirks = true;
2821                this.state = State.BOGUS_DOCTYPE;
2822                this._stateBogusDoctype(cp);
2823            }
2824        }
2825    }
2826
2827    // DOCTYPE system identifier (double-quoted) state
2828    //------------------------------------------------------------------
2829    private _stateDoctypeSystemIdentifierDoubleQuoted(cp: number): void {
2830        const token = this.currentToken as DoctypeToken;
2831
2832        switch (cp) {
2833            case $.QUOTATION_MARK: {
2834                this.state = State.AFTER_DOCTYPE_SYSTEM_IDENTIFIER;
2835                break;
2836            }
2837            case $.NULL: {
2838                this._err(ERR.unexpectedNullCharacter);
2839                token.systemId += REPLACEMENT_CHARACTER;
2840                break;
2841            }
2842            case $.GREATER_THAN_SIGN: {
2843                this._err(ERR.abruptDoctypeSystemIdentifier);
2844                token.forceQuirks = true;
2845                this.emitCurrentDoctype(token);
2846                this.state = State.DATA;
2847                break;
2848            }
2849            case $.EOF: {
2850                this._err(ERR.eofInDoctype);
2851                token.forceQuirks = true;
2852                this.emitCurrentDoctype(token);
2853                this._emitEOFToken();
2854                break;
2855            }
2856            default: {
2857                token.systemId += String.fromCodePoint(cp);
2858            }
2859        }
2860    }
2861
2862    // DOCTYPE system identifier (single-quoted) state
2863    //------------------------------------------------------------------
2864    private _stateDoctypeSystemIdentifierSingleQuoted(cp: number): void {
2865        const token = this.currentToken as DoctypeToken;
2866
2867        switch (cp) {
2868            case $.APOSTROPHE: {
2869                this.state = State.AFTER_DOCTYPE_SYSTEM_IDENTIFIER;
2870                break;
2871            }
2872            case $.NULL: {
2873                this._err(ERR.unexpectedNullCharacter);
2874                token.systemId += REPLACEMENT_CHARACTER;
2875                break;
2876            }
2877            case $.GREATER_THAN_SIGN: {
2878                this._err(ERR.abruptDoctypeSystemIdentifier);
2879                token.forceQuirks = true;
2880                this.emitCurrentDoctype(token);
2881                this.state = State.DATA;
2882                break;
2883            }
2884            case $.EOF: {
2885                this._err(ERR.eofInDoctype);
2886                token.forceQuirks = true;
2887                this.emitCurrentDoctype(token);
2888                this._emitEOFToken();
2889                break;
2890            }
2891            default: {
2892                token.systemId += String.fromCodePoint(cp);
2893            }
2894        }
2895    }
2896
2897    // After DOCTYPE system identifier state
2898    //------------------------------------------------------------------
2899    private _stateAfterDoctypeSystemIdentifier(cp: number): void {
2900        const token = this.currentToken as DoctypeToken;
2901
2902        switch (cp) {
2903            case $.SPACE:
2904            case $.LINE_FEED:
2905            case $.TABULATION:
2906            case $.FORM_FEED: {
2907                // Ignore whitespace
2908                break;
2909            }
2910            case $.GREATER_THAN_SIGN: {
2911                this.emitCurrentDoctype(token);
2912                this.state = State.DATA;
2913                break;
2914            }
2915            case $.EOF: {
2916                this._err(ERR.eofInDoctype);
2917                token.forceQuirks = true;
2918                this.emitCurrentDoctype(token);
2919                this._emitEOFToken();
2920                break;
2921            }
2922            default: {
2923                this._err(ERR.unexpectedCharacterAfterDoctypeSystemIdentifier);
2924                this.state = State.BOGUS_DOCTYPE;
2925                this._stateBogusDoctype(cp);
2926            }
2927        }
2928    }
2929
2930    // Bogus DOCTYPE state
2931    //------------------------------------------------------------------
2932    private _stateBogusDoctype(cp: number): void {
2933        const token = this.currentToken as DoctypeToken;
2934
2935        switch (cp) {
2936            case $.GREATER_THAN_SIGN: {
2937                this.emitCurrentDoctype(token);
2938                this.state = State.DATA;
2939                break;
2940            }
2941            case $.NULL: {
2942                this._err(ERR.unexpectedNullCharacter);
2943                break;
2944            }
2945            case $.EOF: {
2946                this.emitCurrentDoctype(token);
2947                this._emitEOFToken();
2948                break;
2949            }
2950            default:
2951            // Do nothing
2952        }
2953    }
2954
2955    // CDATA section state
2956    //------------------------------------------------------------------
2957    private _stateCdataSection(cp: number): void {
2958        switch (cp) {
2959            case $.RIGHT_SQUARE_BRACKET: {
2960                this.state = State.CDATA_SECTION_BRACKET;
2961                break;
2962            }
2963            case $.EOF: {
2964                this._err(ERR.eofInCdata);
2965                this._emitEOFToken();
2966                break;
2967            }
2968            default: {
2969                this._emitCodePoint(cp);
2970            }
2971        }
2972    }
2973
2974    // CDATA section bracket state
2975    //------------------------------------------------------------------
2976    private _stateCdataSectionBracket(cp: number): void {
2977        if (cp === $.RIGHT_SQUARE_BRACKET) {
2978            this.state = State.CDATA_SECTION_END;
2979        } else {
2980            this._emitChars(']');
2981            this.state = State.CDATA_SECTION;
2982            this._stateCdataSection(cp);
2983        }
2984    }
2985
2986    // CDATA section end state
2987    //------------------------------------------------------------------
2988    private _stateCdataSectionEnd(cp: number): void {
2989        switch (cp) {
2990            case $.GREATER_THAN_SIGN: {
2991                this.state = State.DATA;
2992                break;
2993            }
2994            case $.RIGHT_SQUARE_BRACKET: {
2995                this._emitChars(']');
2996                break;
2997            }
2998            default: {
2999                this._emitChars(']]');
3000                this.state = State.CDATA_SECTION;
3001                this._stateCdataSection(cp);
3002            }
3003        }
3004    }
3005
3006    // Character reference state
3007    //------------------------------------------------------------------
3008    private _stateCharacterReference(cp: number): void {
3009        if (cp === $.NUMBER_SIGN) {
3010            this.state = State.NUMERIC_CHARACTER_REFERENCE;
3011        } else if (isAsciiAlphaNumeric(cp)) {
3012            this.state = State.NAMED_CHARACTER_REFERENCE;
3013            this._stateNamedCharacterReference(cp);
3014        } else {
3015            this._flushCodePointConsumedAsCharacterReference($.AMPERSAND);
3016            this._reconsumeInState(this.returnState, cp);
3017        }
3018    }
3019
3020    // Named character reference state
3021    //------------------------------------------------------------------
3022    private _stateNamedCharacterReference(cp: number): void {
3023        const matchResult = this._matchNamedCharacterReference(cp);
3024
3025        //NOTE: Matching can be abrupted by hibernation. In that case, match
3026        //results are no longer valid and we will need to start over.
3027        if (this._ensureHibernation()) {
3028            // Stay in the state, try again.
3029        } else if (matchResult) {
3030            for (let i = 0; i < matchResult.length; i++) {
3031                this._flushCodePointConsumedAsCharacterReference(matchResult[i]);
3032            }
3033            this.state = this.returnState;
3034        } else {
3035            this._flushCodePointConsumedAsCharacterReference($.AMPERSAND);
3036            this.state = State.AMBIGUOUS_AMPERSAND;
3037        }
3038    }
3039
3040    // Ambiguos ampersand state
3041    //------------------------------------------------------------------
3042    private _stateAmbiguousAmpersand(cp: number): void {
3043        if (isAsciiAlphaNumeric(cp)) {
3044            this._flushCodePointConsumedAsCharacterReference(cp);
3045        } else {
3046            if (cp === $.SEMICOLON) {
3047                this._err(ERR.unknownNamedCharacterReference);
3048            }
3049
3050            this._reconsumeInState(this.returnState, cp);
3051        }
3052    }
3053
3054    // Numeric character reference state
3055    //------------------------------------------------------------------
3056    private _stateNumericCharacterReference(cp: number): void {
3057        this.charRefCode = 0;
3058
3059        if (cp === $.LATIN_SMALL_X || cp === $.LATIN_CAPITAL_X) {
3060            this.state = State.HEXADEMICAL_CHARACTER_REFERENCE_START;
3061        }
3062        // Inlined decimal character reference start state
3063        else if (isAsciiDigit(cp)) {
3064            this.state = State.DECIMAL_CHARACTER_REFERENCE;
3065            this._stateDecimalCharacterReference(cp);
3066        } else {
3067            this._err(ERR.absenceOfDigitsInNumericCharacterReference);
3068            this._flushCodePointConsumedAsCharacterReference($.AMPERSAND);
3069            this._flushCodePointConsumedAsCharacterReference($.NUMBER_SIGN);
3070            this._reconsumeInState(this.returnState, cp);
3071        }
3072    }
3073
3074    // Hexademical character reference start state
3075    //------------------------------------------------------------------
3076    private _stateHexademicalCharacterReferenceStart(cp: number): void {
3077        if (isAsciiHexDigit(cp)) {
3078            this.state = State.HEXADEMICAL_CHARACTER_REFERENCE;
3079            this._stateHexademicalCharacterReference(cp);
3080        } else {
3081            this._err(ERR.absenceOfDigitsInNumericCharacterReference);
3082            this._flushCodePointConsumedAsCharacterReference($.AMPERSAND);
3083            this._flushCodePointConsumedAsCharacterReference($.NUMBER_SIGN);
3084            this._unconsume(2);
3085            this.state = this.returnState;
3086        }
3087    }
3088
3089    // Hexademical character reference state
3090    //------------------------------------------------------------------
3091    private _stateHexademicalCharacterReference(cp: number): void {
3092        if (isAsciiUpperHexDigit(cp)) {
3093            this.charRefCode = this.charRefCode * 16 + cp - 0x37;
3094        } else if (isAsciiLowerHexDigit(cp)) {
3095            this.charRefCode = this.charRefCode * 16 + cp - 0x57;
3096        } else if (isAsciiDigit(cp)) {
3097            this.charRefCode = this.charRefCode * 16 + cp - 0x30;
3098        } else if (cp === $.SEMICOLON) {
3099            this.state = State.NUMERIC_CHARACTER_REFERENCE_END;
3100        } else {
3101            this._err(ERR.missingSemicolonAfterCharacterReference);
3102            this.state = State.NUMERIC_CHARACTER_REFERENCE_END;
3103            this._stateNumericCharacterReferenceEnd(cp);
3104        }
3105    }
3106
3107    // Decimal character reference state
3108    //------------------------------------------------------------------
3109    private _stateDecimalCharacterReference(cp: number): void {
3110        if (isAsciiDigit(cp)) {
3111            this.charRefCode = this.charRefCode * 10 + cp - 0x30;
3112        } else if (cp === $.SEMICOLON) {
3113            this.state = State.NUMERIC_CHARACTER_REFERENCE_END;
3114        } else {
3115            this._err(ERR.missingSemicolonAfterCharacterReference);
3116            this.state = State.NUMERIC_CHARACTER_REFERENCE_END;
3117            this._stateNumericCharacterReferenceEnd(cp);
3118        }
3119    }
3120
3121    // Numeric character reference end state
3122    //------------------------------------------------------------------
3123    private _stateNumericCharacterReferenceEnd(cp: number): void {
3124        if (this.charRefCode === $.NULL) {
3125            this._err(ERR.nullCharacterReference);
3126            this.charRefCode = $.REPLACEMENT_CHARACTER;
3127        } else if (this.charRefCode > 0x10_ff_ff) {
3128            this._err(ERR.characterReferenceOutsideUnicodeRange);
3129            this.charRefCode = $.REPLACEMENT_CHARACTER;
3130        } else if (isSurrogate(this.charRefCode)) {
3131            this._err(ERR.surrogateCharacterReference);
3132            this.charRefCode = $.REPLACEMENT_CHARACTER;
3133        } else if (isUndefinedCodePoint(this.charRefCode)) {
3134            this._err(ERR.noncharacterCharacterReference);
3135        } else if (isControlCodePoint(this.charRefCode) || this.charRefCode === $.CARRIAGE_RETURN) {
3136            this._err(ERR.controlCharacterReference);
3137
3138            const replacement = C1_CONTROLS_REFERENCE_REPLACEMENTS.get(this.charRefCode);
3139
3140            if (replacement !== undefined) {
3141                this.charRefCode = replacement;
3142            }
3143        }
3144
3145        this._flushCodePointConsumedAsCharacterReference(this.charRefCode);
3146        this._reconsumeInState(this.returnState, cp);
3147    }
3148}
3149
3150function checkselfClosingNode(parse: Tokenizer, token: TagToken) {
3151    const tagName: string = (token.tagName || "").toLowerCase();
3152    const selfClosing: boolean = token.selfClosing;
3153    const flag: boolean = parse.validator.isSupportedSelfClosing(tagName);
3154    if (parse.nodeInfo.tn && tagName && !parse.nodeInfo.sc) {
3155        const loc: string =
3156            String(token.location?.startLine) + ',' + String(token.location?.startCol);
3157        if (
3158            !flag ||
3159            (loc !== parse.nodeInfo.pos && token.type === TokenType.START_TAG)
3160        ) {
3161            const posInfo: string = parse.nodeInfo.pos;
3162            const posArr: string[] = posInfo.split(',');
3163            parse.compileResult.log.push({
3164                line: Number(posArr[0]) || 1,
3165                column: Number(posArr[1]) || 1,
3166                reason: 'ERROR: tag `' + parse.nodeInfo.tn + '` must be closed, please follow norm',
3167            });
3168            parse.nodeInfo = { tn: '', sc: false, pos: '' };
3169        }
3170    }
3171    if (tagName && flag) {
3172        if (token.type === TokenType.START_TAG && !selfClosing) {
3173            parse.nodeInfo.tn = tagName;
3174            parse.nodeInfo.sc = false;
3175            parse.nodeInfo.pos =
3176                String(token.location?.startLine) + ',' + String(token.location?.startCol);
3177        }
3178        if (
3179            token.type === TokenType.END_TAG && tagName === parse.nodeInfo.tn
3180        ) {
3181            parse.nodeInfo.sc = true;
3182        }
3183    }
3184    if (!flag && selfClosing && token.type === TokenType.START_TAG) {
3185        parse.compileResult.log.push({
3186            line: token.location?.startLine || 1,
3187            column: token.location?.startCol || 1,
3188            reason: "ERROR: tag `" + tagName + "` can not use selfClosing",
3189        });
3190    }
3191}