• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1import { Preprocessor } from './preprocessor.js';
2import {
3    CODE_POINTS as $,
4    SEQUENCES as $$,
5    REPLACEMENT_CHARACTER,
6    isSurrogate,
7    isUndefinedCodePoint,
8    isControlCodePoint,
9} from '../common/unicode.js';
10import {
11    TokenType,
12    getTokenAttr,
13    type Token,
14    type CharacterToken,
15    type DoctypeToken,
16    type TagToken,
17    type EOFToken,
18    type CommentToken,
19    type Attribute,
20    type Location,
21} from '../common/token.js';
22import { htmlDecodeTree, EntityDecoder, DecodingMode } from 'entities/lib/decode.js';
23import { ERR, type ParserErrorHandler } from '../common/error-codes.js';
24import { TAG_ID, getTagID } from '../common/html.js';
25
26//States
27const enum State {
28    DATA,
29    RCDATA,
30    RAWTEXT,
31    SCRIPT_DATA,
32    PLAINTEXT,
33    TAG_OPEN,
34    END_TAG_OPEN,
35    TAG_NAME,
36    RCDATA_LESS_THAN_SIGN,
37    RCDATA_END_TAG_OPEN,
38    RCDATA_END_TAG_NAME,
39    RAWTEXT_LESS_THAN_SIGN,
40    RAWTEXT_END_TAG_OPEN,
41    RAWTEXT_END_TAG_NAME,
42    SCRIPT_DATA_LESS_THAN_SIGN,
43    SCRIPT_DATA_END_TAG_OPEN,
44    SCRIPT_DATA_END_TAG_NAME,
45    SCRIPT_DATA_ESCAPE_START,
46    SCRIPT_DATA_ESCAPE_START_DASH,
47    SCRIPT_DATA_ESCAPED,
48    SCRIPT_DATA_ESCAPED_DASH,
49    SCRIPT_DATA_ESCAPED_DASH_DASH,
50    SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN,
51    SCRIPT_DATA_ESCAPED_END_TAG_OPEN,
52    SCRIPT_DATA_ESCAPED_END_TAG_NAME,
53    SCRIPT_DATA_DOUBLE_ESCAPE_START,
54    SCRIPT_DATA_DOUBLE_ESCAPED,
55    SCRIPT_DATA_DOUBLE_ESCAPED_DASH,
56    SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH,
57    SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN,
58    SCRIPT_DATA_DOUBLE_ESCAPE_END,
59    BEFORE_ATTRIBUTE_NAME,
60    ATTRIBUTE_NAME,
61    AFTER_ATTRIBUTE_NAME,
62    BEFORE_ATTRIBUTE_VALUE,
63    ATTRIBUTE_VALUE_DOUBLE_QUOTED,
64    ATTRIBUTE_VALUE_SINGLE_QUOTED,
65    ATTRIBUTE_VALUE_UNQUOTED,
66    AFTER_ATTRIBUTE_VALUE_QUOTED,
67    SELF_CLOSING_START_TAG,
68    BOGUS_COMMENT,
69    MARKUP_DECLARATION_OPEN,
70    COMMENT_START,
71    COMMENT_START_DASH,
72    COMMENT,
73    COMMENT_LESS_THAN_SIGN,
74    COMMENT_LESS_THAN_SIGN_BANG,
75    COMMENT_LESS_THAN_SIGN_BANG_DASH,
76    COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH,
77    COMMENT_END_DASH,
78    COMMENT_END,
79    COMMENT_END_BANG,
80    DOCTYPE,
81    BEFORE_DOCTYPE_NAME,
82    DOCTYPE_NAME,
83    AFTER_DOCTYPE_NAME,
84    AFTER_DOCTYPE_PUBLIC_KEYWORD,
85    BEFORE_DOCTYPE_PUBLIC_IDENTIFIER,
86    DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED,
87    DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED,
88    AFTER_DOCTYPE_PUBLIC_IDENTIFIER,
89    BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS,
90    AFTER_DOCTYPE_SYSTEM_KEYWORD,
91    BEFORE_DOCTYPE_SYSTEM_IDENTIFIER,
92    DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED,
93    DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED,
94    AFTER_DOCTYPE_SYSTEM_IDENTIFIER,
95    BOGUS_DOCTYPE,
96    CDATA_SECTION,
97    CDATA_SECTION_BRACKET,
98    CDATA_SECTION_END,
99    CHARACTER_REFERENCE,
100    AMBIGUOUS_AMPERSAND,
101}
102
103//Tokenizer initial states for different modes
104export const TokenizerMode = {
105    DATA: State.DATA,
106    RCDATA: State.RCDATA,
107    RAWTEXT: State.RAWTEXT,
108    SCRIPT_DATA: State.SCRIPT_DATA,
109    PLAINTEXT: State.PLAINTEXT,
110    CDATA_SECTION: State.CDATA_SECTION,
111} as const;
112
113//Utils
114
115//OPTIMIZATION: these utility functions should not be moved out of this module. V8 Crankshaft will not inline
116//this functions if they will be situated in another module due to context switch.
117//Always perform inlining check before modifying this functions ('node --trace-inlining').
118
119function isAsciiDigit(cp: number): boolean {
120    return cp >= $.DIGIT_0 && cp <= $.DIGIT_9;
121}
122
123function isAsciiUpper(cp: number): boolean {
124    return cp >= $.LATIN_CAPITAL_A && cp <= $.LATIN_CAPITAL_Z;
125}
126
127function isAsciiLower(cp: number): boolean {
128    return cp >= $.LATIN_SMALL_A && cp <= $.LATIN_SMALL_Z;
129}
130
131function isAsciiLetter(cp: number): boolean {
132    return isAsciiLower(cp) || isAsciiUpper(cp);
133}
134
135function isAsciiAlphaNumeric(cp: number): boolean {
136    return isAsciiLetter(cp) || isAsciiDigit(cp);
137}
138function toAsciiLower(cp: number): number {
139    return cp + 0x00_20;
140}
141
142function isWhitespace(cp: number): boolean {
143    return cp === $.SPACE || cp === $.LINE_FEED || cp === $.TABULATION || cp === $.FORM_FEED;
144}
145
146function isScriptDataDoubleEscapeSequenceEnd(cp: number): boolean {
147    return isWhitespace(cp) || cp === $.SOLIDUS || cp === $.GREATER_THAN_SIGN;
148}
149
150const componentValidator = { isSupportedSelfClosing: () => false };
151
152interface Validator {
153    isSupportedSelfClosing(tagName: string): boolean;
154}
155
156interface CompileResult {
157    jsonTemplate: {},
158    deps: [],
159    log: {
160        line: number,
161        column: number,
162        reason: string
163    }[]
164}
165
166interface NodeInfo {
167    tn: string,
168    sc: boolean,
169    pos: string
170}
171
172function getErrorForNumericCharacterReference(code: number): ERR | null {
173    if (code === $.NULL) {
174        return ERR.nullCharacterReference;
175    } else if (code > 0x10_ff_ff) {
176        return ERR.characterReferenceOutsideUnicodeRange;
177    } else if (isSurrogate(code)) {
178        return ERR.surrogateCharacterReference;
179    } else if (isUndefinedCodePoint(code)) {
180        return ERR.noncharacterCharacterReference;
181    } else if (isControlCodePoint(code) || code === $.CARRIAGE_RETURN) {
182        return ERR.controlCharacterReference;
183    }
184
185    return null;
186}
187
188export interface TokenizerOptions {
189    componentValidator?: Validator;
190    compileResult?: CompileResult;
191    sourceCodeLocationInfo?: boolean;
192}
193
194export interface TokenHandler {
195    onComment(token: CommentToken): void;
196    onDoctype(token: DoctypeToken): void;
197    onStartTag(token: TagToken): void;
198    onEndTag(token: TagToken): void;
199    onEof(token: EOFToken): void;
200    onCharacter(token: CharacterToken): void;
201    onNullCharacter(token: CharacterToken): void;
202    onWhitespaceCharacter(token: CharacterToken): void;
203
204    onParseError?: ParserErrorHandler | null;
205}
206
207//Tokenizer
208export class Tokenizer {
209    public preprocessor: Preprocessor;
210
211    protected paused = false;
212    /** Ensures that the parsing loop isn't run multiple times at once. */
213    protected inLoop = false;
214
215    /**
216     * Indicates that the current adjusted node exists, is not an element in the HTML namespace,
217     * and that it is not an integration point for either MathML or HTML.
218     *
219     * @see {@link https://html.spec.whatwg.org/multipage/parsing.html#tree-construction}
220     */
221    public inForeignNode = false;
222    public lastStartTagName = '';
223    public active = false;
224
225    public nodeInfo: NodeInfo = { tn: '', sc: false, pos: '' };
226    public validator: Validator = componentValidator;
227    public compileResult: CompileResult = { jsonTemplate: {}, deps: [], log: [] };
228
229    public state = State.DATA;
230    protected returnState = State.DATA;
231
232    /**
233     * We use `entities`' `EntityDecoder` to parse character references.
234     *
235     * All of the following states are handled by the `EntityDecoder`:
236     *
237     * - Named character reference state
238     * - Numeric character reference state
239     * - Hexademical character reference start state
240     * - Hexademical character reference state
241     * - Decimal character reference state
242     * - Numeric character reference end state
243     */
244    protected entityDecoder: EntityDecoder;
245    protected entityStartPos = 0;
246    protected consumedAfterSnapshot = -1;
247
248    protected currentLocation: Location | null;
249    protected currentCharacterToken: CharacterToken | null = null;
250    protected currentToken: Token | null = null;
251    protected currentAttr: Attribute = { name: '', value: '' };
252
253    constructor(
254        protected options: TokenizerOptions,
255        protected handler: TokenHandler,
256    ) {
257        this.preprocessor = new Preprocessor(handler);
258        this.currentLocation = this.getCurrentLocation(-1);
259        if(options.componentValidator) {
260            this.validator = options.componentValidator;
261        }
262        if(options.compileResult) {
263            this.compileResult = options.compileResult;
264        }
265        this.entityDecoder = new EntityDecoder(
266            htmlDecodeTree,
267            (cp: number, consumed: number) => {
268                // Note: Set `pos` _before_ flushing, as flushing might drop
269                // the current chunk and invalidate `entityStartPos`.
270                this.preprocessor.pos = this.entityStartPos + consumed - 1;
271                this._flushCodePointConsumedAsCharacterReference(cp);
272            },
273            handler.onParseError
274                ? {
275                      missingSemicolonAfterCharacterReference: (): void => {
276                          this._err(ERR.missingSemicolonAfterCharacterReference, 1);
277                      },
278                      absenceOfDigitsInNumericCharacterReference: (consumed: number): void => {
279                          this._err(
280                              ERR.absenceOfDigitsInNumericCharacterReference,
281                              this.entityStartPos - this.preprocessor.pos + consumed,
282                          );
283                      },
284                      validateNumericCharacterReference: (code: number): void => {
285                          const error = getErrorForNumericCharacterReference(code);
286                          if (error) this._err(error, 1);
287                      },
288                  }
289                : undefined,
290        );
291    }
292
293    //Errors
294    protected _err(code: ERR, cpOffset = 0): void {
295        this.handler.onParseError?.(this.preprocessor.getError(code, cpOffset));
296    }
297
298    // NOTE: `offset` may never run across line boundaries.
299    protected getCurrentLocation(offset: number): Location | null {
300        if (!this.options.sourceCodeLocationInfo) {
301            return null;
302        }
303
304        return {
305            startLine: this.preprocessor.line,
306            startCol: this.preprocessor.col - offset,
307            startOffset: this.preprocessor.offset - offset,
308            endLine: -1,
309            endCol: -1,
310            endOffset: -1,
311        };
312    }
313
314    protected _runParsingLoop(): void {
315        if (this.inLoop) return;
316
317        this.inLoop = true;
318
319        while (this.active && !this.paused) {
320            this.consumedAfterSnapshot = 0;
321
322            const cp = this._consume();
323
324            if (!this._ensureHibernation()) {
325                this._callState(cp);
326            }
327        }
328
329        this.inLoop = false;
330    }
331
332    //API
333    public pause(): void {
334        this.paused = true;
335    }
336
337    public resume(writeCallback?: () => void): void {
338        if (!this.paused) {
339            throw new Error('Parser was already resumed');
340        }
341
342        this.paused = false;
343
344        // Necessary for synchronous resume.
345        if (this.inLoop) return;
346
347        this._runParsingLoop();
348
349        if (!this.paused) {
350            writeCallback?.();
351        }
352    }
353
354    public write(chunk: string, isLastChunk: boolean, writeCallback?: () => void): void {
355        this.active = true;
356        this.preprocessor.write(chunk, isLastChunk);
357        this._runParsingLoop();
358
359        if (!this.paused) {
360            writeCallback?.();
361        }
362    }
363
364    public insertHtmlAtCurrentPos(chunk: string): void {
365        this.active = true;
366        this.preprocessor.insertHtmlAtCurrentPos(chunk);
367        this._runParsingLoop();
368    }
369
370    //Hibernation
371    protected _ensureHibernation(): boolean {
372        if (this.preprocessor.endOfChunkHit) {
373            this.preprocessor.retreat(this.consumedAfterSnapshot);
374            this.consumedAfterSnapshot = 0;
375            this.active = false;
376
377            return true;
378        }
379
380        return false;
381    }
382
383    //Consumption
384    protected _consume(): number {
385        this.consumedAfterSnapshot++;
386        return this.preprocessor.advance();
387    }
388
389    protected _advanceBy(count: number): void {
390        this.consumedAfterSnapshot += count;
391        for (let i = 0; i < count; i++) {
392            this.preprocessor.advance();
393        }
394    }
395
396    protected _consumeSequenceIfMatch(pattern: string, caseSensitive: boolean): boolean {
397        if (this.preprocessor.startsWith(pattern, caseSensitive)) {
398            // We will already have consumed one character before calling this method.
399            this._advanceBy(pattern.length - 1);
400            return true;
401        }
402        return false;
403    }
404
405    //Token creation
406    protected _createStartTagToken(): void {
407        this.currentToken = {
408            type: TokenType.START_TAG,
409            tagName: '',
410            tagID: TAG_ID.UNKNOWN,
411            selfClosing: false,
412            ackSelfClosing: false,
413            attrs: [],
414            location: this.getCurrentLocation(1),
415        };
416    }
417
418    protected _createEndTagToken(): void {
419        this.currentToken = {
420            type: TokenType.END_TAG,
421            tagName: '',
422            tagID: TAG_ID.UNKNOWN,
423            selfClosing: false,
424            ackSelfClosing: false,
425            attrs: [],
426            location: this.getCurrentLocation(2),
427        };
428    }
429
430    protected _createCommentToken(offset: number): void {
431        this.currentToken = {
432            type: TokenType.COMMENT,
433            data: '',
434            location: this.getCurrentLocation(offset),
435        };
436    }
437
438    protected _createDoctypeToken(initialName: string | null): void {
439        this.currentToken = {
440            type: TokenType.DOCTYPE,
441            name: initialName,
442            forceQuirks: false,
443            publicId: null,
444            systemId: null,
445            location: this.currentLocation,
446        };
447    }
448
449    protected _createCharacterToken(type: CharacterToken['type'], chars: string): void {
450        this.currentCharacterToken = {
451            type,
452            chars,
453            location: this.currentLocation,
454        };
455    }
456
457    //Tag attributes
458    protected _createAttr(attrNameFirstCh: string): void {
459        this.currentAttr = {
460            name: attrNameFirstCh,
461            value: '',
462        };
463        this.currentLocation = this.getCurrentLocation(0);
464    }
465
466    protected _leaveAttrName(): void {
467        const token = this.currentToken as TagToken;
468
469        if (getTokenAttr(token, this.currentAttr.name) === null) {
470            token.attrs.push(this.currentAttr);
471
472            if (token.location && this.currentLocation) {
473                const attrLocations = (token.location.attrs ??= Object.create(null));
474                attrLocations[this.currentAttr.name] = this.currentLocation;
475
476                // Set end location
477                this._leaveAttrValue();
478            }
479        } else {
480            this._err(ERR.duplicateAttribute);
481        }
482    }
483
484    protected _leaveAttrValue(): void {
485        if (this.currentLocation) {
486            this.currentLocation.endLine = this.preprocessor.line;
487            this.currentLocation.endCol = this.preprocessor.col;
488            this.currentLocation.endOffset = this.preprocessor.offset;
489        }
490    }
491
492    //Token emission
493    protected prepareToken(ct: Token): void {
494        this._emitCurrentCharacterToken(ct.location);
495        this.currentToken = null;
496
497        if (ct.location) {
498            ct.location.endLine = this.preprocessor.line;
499            ct.location.endCol = this.preprocessor.col + 1;
500            ct.location.endOffset = this.preprocessor.offset + 1;
501        }
502
503        this.currentLocation = this.getCurrentLocation(-1);
504    }
505
506    protected emitCurrentTagToken(): void {
507        const ct = this.currentToken as TagToken;
508        checkselfClosingNode(this, ct);
509        this.prepareToken(ct);
510
511        ct.tagID = getTagID(ct.tagName);
512
513        if (ct.type === TokenType.START_TAG) {
514            this.lastStartTagName = ct.tagName;
515            this.handler.onStartTag(ct);
516        } else {
517            if (ct.attrs.length > 0) {
518                this._err(ERR.endTagWithAttributes);
519            }
520
521            if (ct.selfClosing) {
522                this._err(ERR.endTagWithTrailingSolidus);
523            }
524
525            this.handler.onEndTag(ct);
526        }
527
528        this.preprocessor.dropParsedChunk();
529    }
530
531    protected emitCurrentComment(ct: CommentToken): void {
532        this.prepareToken(ct);
533        this.handler.onComment(ct);
534
535        this.preprocessor.dropParsedChunk();
536    }
537
538    protected emitCurrentDoctype(ct: DoctypeToken): void {
539        this.prepareToken(ct);
540        this.handler.onDoctype(ct);
541
542        this.preprocessor.dropParsedChunk();
543    }
544
545    protected _emitCurrentCharacterToken(nextLocation: Location | null): void {
546        if (this.currentCharacterToken) {
547            //NOTE: if we have a pending character token, make it's end location equal to the
548            //current token's start location.
549            if (nextLocation && this.currentCharacterToken.location) {
550                this.currentCharacterToken.location.endLine = nextLocation.startLine;
551                this.currentCharacterToken.location.endCol = nextLocation.startCol;
552                this.currentCharacterToken.location.endOffset = nextLocation.startOffset;
553            }
554
555            switch (this.currentCharacterToken.type) {
556                case TokenType.CHARACTER: {
557                    this.handler.onCharacter(this.currentCharacterToken);
558                    break;
559                }
560                case TokenType.NULL_CHARACTER: {
561                    this.handler.onNullCharacter(this.currentCharacterToken);
562                    break;
563                }
564                case TokenType.WHITESPACE_CHARACTER: {
565                    this.handler.onWhitespaceCharacter(this.currentCharacterToken);
566                    break;
567                }
568            }
569
570            this.currentCharacterToken = null;
571        }
572    }
573
574    protected _emitEOFToken(): void {
575        const location = this.getCurrentLocation(0);
576
577        if (location) {
578            location.endLine = location.startLine;
579            location.endCol = location.startCol;
580            location.endOffset = location.startOffset;
581        }
582
583        this._emitCurrentCharacterToken(location);
584        this.handler.onEof({ type: TokenType.EOF, location });
585        this.active = false;
586    }
587
588    //Characters emission
589
590    //OPTIMIZATION: The specification uses only one type of character token (one token per character).
591    //This causes a huge memory overhead and a lot of unnecessary parser loops. parse5 uses 3 groups of characters.
592    //If we have a sequence of characters that belong to the same group, the parser can process it
593    //as a single solid character token.
594    //So, there are 3 types of character tokens in parse5:
595    //1)TokenType.NULL_CHARACTER - \u0000-character sequences (e.g. '\u0000\u0000\u0000')
596    //2)TokenType.WHITESPACE_CHARACTER - any whitespace/new-line character sequences (e.g. '\n  \r\t   \f')
597    //3)TokenType.CHARACTER - any character sequence which don't belong to groups 1 and 2 (e.g. 'abcdef1234@@#$%^')
598    protected _appendCharToCurrentCharacterToken(type: CharacterToken['type'], ch: string): void {
599        if (this.currentCharacterToken) {
600            if (this.currentCharacterToken.type === type) {
601                this.currentCharacterToken.chars += ch;
602                return;
603            } else {
604                this.currentLocation = this.getCurrentLocation(0);
605                this._emitCurrentCharacterToken(this.currentLocation);
606                this.preprocessor.dropParsedChunk();
607            }
608        }
609
610        this._createCharacterToken(type, ch);
611    }
612
613    protected _emitCodePoint(cp: number): void {
614        const type = isWhitespace(cp)
615            ? TokenType.WHITESPACE_CHARACTER
616            : cp === $.NULL
617              ? TokenType.NULL_CHARACTER
618              : TokenType.CHARACTER;
619
620        this._appendCharToCurrentCharacterToken(type, String.fromCodePoint(cp));
621    }
622
623    //NOTE: used when we emit characters explicitly.
624    //This is always for non-whitespace and non-null characters, which allows us to avoid additional checks.
625    protected _emitChars(ch: string): void {
626        this._appendCharToCurrentCharacterToken(TokenType.CHARACTER, ch);
627    }
628
629    // Character reference helpers
630    protected _startCharacterReference(): void {
631        this.returnState = this.state;
632        this.state = State.CHARACTER_REFERENCE;
633        this.entityStartPos = this.preprocessor.pos;
634        this.entityDecoder.startEntity(
635            this._isCharacterReferenceInAttribute() ? DecodingMode.Attribute : DecodingMode.Legacy,
636        );
637    }
638
639    protected _isCharacterReferenceInAttribute(): boolean {
640        return (
641            this.returnState === State.ATTRIBUTE_VALUE_DOUBLE_QUOTED ||
642            this.returnState === State.ATTRIBUTE_VALUE_SINGLE_QUOTED ||
643            this.returnState === State.ATTRIBUTE_VALUE_UNQUOTED
644        );
645    }
646
647    protected _flushCodePointConsumedAsCharacterReference(cp: number): void {
648        if (this._isCharacterReferenceInAttribute()) {
649            this.currentAttr.value += String.fromCodePoint(cp);
650        } else {
651            this._emitCodePoint(cp);
652        }
653    }
654
655    // Calling states this way turns out to be much faster than any other approach.
656    protected _callState(cp: number): void {
657        switch (this.state) {
658            case State.DATA: {
659                this._stateData(cp);
660                break;
661            }
662            case State.RCDATA: {
663                this._stateRcdata(cp);
664                break;
665            }
666            case State.RAWTEXT: {
667                this._stateRawtext(cp);
668                break;
669            }
670            case State.SCRIPT_DATA: {
671                this._stateScriptData(cp);
672                break;
673            }
674            case State.PLAINTEXT: {
675                this._statePlaintext(cp);
676                break;
677            }
678            case State.TAG_OPEN: {
679                this._stateTagOpen(cp);
680                break;
681            }
682            case State.END_TAG_OPEN: {
683                this._stateEndTagOpen(cp);
684                break;
685            }
686            case State.TAG_NAME: {
687                this._stateTagName(cp);
688                break;
689            }
690            case State.RCDATA_LESS_THAN_SIGN: {
691                this._stateRcdataLessThanSign(cp);
692                break;
693            }
694            case State.RCDATA_END_TAG_OPEN: {
695                this._stateRcdataEndTagOpen(cp);
696                break;
697            }
698            case State.RCDATA_END_TAG_NAME: {
699                this._stateRcdataEndTagName(cp);
700                break;
701            }
702            case State.RAWTEXT_LESS_THAN_SIGN: {
703                this._stateRawtextLessThanSign(cp);
704                break;
705            }
706            case State.RAWTEXT_END_TAG_OPEN: {
707                this._stateRawtextEndTagOpen(cp);
708                break;
709            }
710            case State.RAWTEXT_END_TAG_NAME: {
711                this._stateRawtextEndTagName(cp);
712                break;
713            }
714            case State.SCRIPT_DATA_LESS_THAN_SIGN: {
715                this._stateScriptDataLessThanSign(cp);
716                break;
717            }
718            case State.SCRIPT_DATA_END_TAG_OPEN: {
719                this._stateScriptDataEndTagOpen(cp);
720                break;
721            }
722            case State.SCRIPT_DATA_END_TAG_NAME: {
723                this._stateScriptDataEndTagName(cp);
724                break;
725            }
726            case State.SCRIPT_DATA_ESCAPE_START: {
727                this._stateScriptDataEscapeStart(cp);
728                break;
729            }
730            case State.SCRIPT_DATA_ESCAPE_START_DASH: {
731                this._stateScriptDataEscapeStartDash(cp);
732                break;
733            }
734            case State.SCRIPT_DATA_ESCAPED: {
735                this._stateScriptDataEscaped(cp);
736                break;
737            }
738            case State.SCRIPT_DATA_ESCAPED_DASH: {
739                this._stateScriptDataEscapedDash(cp);
740                break;
741            }
742            case State.SCRIPT_DATA_ESCAPED_DASH_DASH: {
743                this._stateScriptDataEscapedDashDash(cp);
744                break;
745            }
746            case State.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN: {
747                this._stateScriptDataEscapedLessThanSign(cp);
748                break;
749            }
750            case State.SCRIPT_DATA_ESCAPED_END_TAG_OPEN: {
751                this._stateScriptDataEscapedEndTagOpen(cp);
752                break;
753            }
754            case State.SCRIPT_DATA_ESCAPED_END_TAG_NAME: {
755                this._stateScriptDataEscapedEndTagName(cp);
756                break;
757            }
758            case State.SCRIPT_DATA_DOUBLE_ESCAPE_START: {
759                this._stateScriptDataDoubleEscapeStart(cp);
760                break;
761            }
762            case State.SCRIPT_DATA_DOUBLE_ESCAPED: {
763                this._stateScriptDataDoubleEscaped(cp);
764                break;
765            }
766            case State.SCRIPT_DATA_DOUBLE_ESCAPED_DASH: {
767                this._stateScriptDataDoubleEscapedDash(cp);
768                break;
769            }
770            case State.SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH: {
771                this._stateScriptDataDoubleEscapedDashDash(cp);
772                break;
773            }
774            case State.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN: {
775                this._stateScriptDataDoubleEscapedLessThanSign(cp);
776                break;
777            }
778            case State.SCRIPT_DATA_DOUBLE_ESCAPE_END: {
779                this._stateScriptDataDoubleEscapeEnd(cp);
780                break;
781            }
782            case State.BEFORE_ATTRIBUTE_NAME: {
783                this._stateBeforeAttributeName(cp);
784                break;
785            }
786            case State.ATTRIBUTE_NAME: {
787                this._stateAttributeName(cp);
788                break;
789            }
790            case State.AFTER_ATTRIBUTE_NAME: {
791                this._stateAfterAttributeName(cp);
792                break;
793            }
794            case State.BEFORE_ATTRIBUTE_VALUE: {
795                this._stateBeforeAttributeValue(cp);
796                break;
797            }
798            case State.ATTRIBUTE_VALUE_DOUBLE_QUOTED: {
799                this._stateAttributeValueDoubleQuoted(cp);
800                break;
801            }
802            case State.ATTRIBUTE_VALUE_SINGLE_QUOTED: {
803                this._stateAttributeValueSingleQuoted(cp);
804                break;
805            }
806            case State.ATTRIBUTE_VALUE_UNQUOTED: {
807                this._stateAttributeValueUnquoted(cp);
808                break;
809            }
810            case State.AFTER_ATTRIBUTE_VALUE_QUOTED: {
811                this._stateAfterAttributeValueQuoted(cp);
812                break;
813            }
814            case State.SELF_CLOSING_START_TAG: {
815                this._stateSelfClosingStartTag(cp);
816                break;
817            }
818            case State.BOGUS_COMMENT: {
819                this._stateBogusComment(cp);
820                break;
821            }
822            case State.MARKUP_DECLARATION_OPEN: {
823                this._stateMarkupDeclarationOpen(cp);
824                break;
825            }
826            case State.COMMENT_START: {
827                this._stateCommentStart(cp);
828                break;
829            }
830            case State.COMMENT_START_DASH: {
831                this._stateCommentStartDash(cp);
832                break;
833            }
834            case State.COMMENT: {
835                this._stateComment(cp);
836                break;
837            }
838            case State.COMMENT_LESS_THAN_SIGN: {
839                this._stateCommentLessThanSign(cp);
840                break;
841            }
842            case State.COMMENT_LESS_THAN_SIGN_BANG: {
843                this._stateCommentLessThanSignBang(cp);
844                break;
845            }
846            case State.COMMENT_LESS_THAN_SIGN_BANG_DASH: {
847                this._stateCommentLessThanSignBangDash(cp);
848                break;
849            }
850            case State.COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH: {
851                this._stateCommentLessThanSignBangDashDash(cp);
852                break;
853            }
854            case State.COMMENT_END_DASH: {
855                this._stateCommentEndDash(cp);
856                break;
857            }
858            case State.COMMENT_END: {
859                this._stateCommentEnd(cp);
860                break;
861            }
862            case State.COMMENT_END_BANG: {
863                this._stateCommentEndBang(cp);
864                break;
865            }
866            case State.DOCTYPE: {
867                this._stateDoctype(cp);
868                break;
869            }
870            case State.BEFORE_DOCTYPE_NAME: {
871                this._stateBeforeDoctypeName(cp);
872                break;
873            }
874            case State.DOCTYPE_NAME: {
875                this._stateDoctypeName(cp);
876                break;
877            }
878            case State.AFTER_DOCTYPE_NAME: {
879                this._stateAfterDoctypeName(cp);
880                break;
881            }
882            case State.AFTER_DOCTYPE_PUBLIC_KEYWORD: {
883                this._stateAfterDoctypePublicKeyword(cp);
884                break;
885            }
886            case State.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER: {
887                this._stateBeforeDoctypePublicIdentifier(cp);
888                break;
889            }
890            case State.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED: {
891                this._stateDoctypePublicIdentifierDoubleQuoted(cp);
892                break;
893            }
894            case State.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED: {
895                this._stateDoctypePublicIdentifierSingleQuoted(cp);
896                break;
897            }
898            case State.AFTER_DOCTYPE_PUBLIC_IDENTIFIER: {
899                this._stateAfterDoctypePublicIdentifier(cp);
900                break;
901            }
902            case State.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS: {
903                this._stateBetweenDoctypePublicAndSystemIdentifiers(cp);
904                break;
905            }
906            case State.AFTER_DOCTYPE_SYSTEM_KEYWORD: {
907                this._stateAfterDoctypeSystemKeyword(cp);
908                break;
909            }
910            case State.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER: {
911                this._stateBeforeDoctypeSystemIdentifier(cp);
912                break;
913            }
914            case State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED: {
915                this._stateDoctypeSystemIdentifierDoubleQuoted(cp);
916                break;
917            }
918            case State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED: {
919                this._stateDoctypeSystemIdentifierSingleQuoted(cp);
920                break;
921            }
922            case State.AFTER_DOCTYPE_SYSTEM_IDENTIFIER: {
923                this._stateAfterDoctypeSystemIdentifier(cp);
924                break;
925            }
926            case State.BOGUS_DOCTYPE: {
927                this._stateBogusDoctype(cp);
928                break;
929            }
930            case State.CDATA_SECTION: {
931                this._stateCdataSection(cp);
932                break;
933            }
934            case State.CDATA_SECTION_BRACKET: {
935                this._stateCdataSectionBracket(cp);
936                break;
937            }
938            case State.CDATA_SECTION_END: {
939                this._stateCdataSectionEnd(cp);
940                break;
941            }
942            case State.CHARACTER_REFERENCE: {
943                this._stateCharacterReference();
944                break;
945            }
946            case State.AMBIGUOUS_AMPERSAND: {
947                this._stateAmbiguousAmpersand(cp);
948                break;
949            }
950            default: {
951                throw new Error('Unknown state');
952            }
953        }
954    }
955
956    // State machine
957
958    // Data state
959    //------------------------------------------------------------------
960    protected _stateData(cp: number): void {
961        switch (cp) {
962            case $.LESS_THAN_SIGN: {
963                this.state = State.TAG_OPEN;
964                break;
965            }
966            case $.AMPERSAND: {
967                this._startCharacterReference();
968                break;
969            }
970            case $.NULL: {
971                this._err(ERR.unexpectedNullCharacter);
972                this._emitCodePoint(cp);
973                break;
974            }
975            case $.EOF: {
976                this._emitEOFToken();
977                break;
978            }
979            default: {
980                this._emitCodePoint(cp);
981            }
982        }
983    }
984
985    //  RCDATA state
986    //------------------------------------------------------------------
987    protected _stateRcdata(cp: number): void {
988        switch (cp) {
989            case $.AMPERSAND: {
990                this._startCharacterReference();
991                break;
992            }
993            case $.LESS_THAN_SIGN: {
994                this.state = State.RCDATA_LESS_THAN_SIGN;
995                break;
996            }
997            case $.NULL: {
998                this._err(ERR.unexpectedNullCharacter);
999                this._emitChars(REPLACEMENT_CHARACTER);
1000                break;
1001            }
1002            case $.EOF: {
1003                this._emitEOFToken();
1004                break;
1005            }
1006            default: {
1007                this._emitCodePoint(cp);
1008            }
1009        }
1010    }
1011
1012    // RAWTEXT state
1013    //------------------------------------------------------------------
1014    protected _stateRawtext(cp: number): void {
1015        switch (cp) {
1016            case $.LESS_THAN_SIGN: {
1017                this.state = State.RAWTEXT_LESS_THAN_SIGN;
1018                break;
1019            }
1020            case $.NULL: {
1021                this._err(ERR.unexpectedNullCharacter);
1022                this._emitChars(REPLACEMENT_CHARACTER);
1023                break;
1024            }
1025            case $.EOF: {
1026                this._emitEOFToken();
1027                break;
1028            }
1029            default: {
1030                this._emitCodePoint(cp);
1031            }
1032        }
1033    }
1034
1035    // Script data state
1036    //------------------------------------------------------------------
1037    protected _stateScriptData(cp: number): void {
1038        switch (cp) {
1039            case $.LESS_THAN_SIGN: {
1040                this.state = State.SCRIPT_DATA_LESS_THAN_SIGN;
1041                break;
1042            }
1043            case $.NULL: {
1044                this._err(ERR.unexpectedNullCharacter);
1045                this._emitChars(REPLACEMENT_CHARACTER);
1046                break;
1047            }
1048            case $.EOF: {
1049                this._emitEOFToken();
1050                break;
1051            }
1052            default: {
1053                this._emitCodePoint(cp);
1054            }
1055        }
1056    }
1057
1058    // PLAINTEXT state
1059    //------------------------------------------------------------------
1060    protected _statePlaintext(cp: number): void {
1061        switch (cp) {
1062            case $.NULL: {
1063                this._err(ERR.unexpectedNullCharacter);
1064                this._emitChars(REPLACEMENT_CHARACTER);
1065                break;
1066            }
1067            case $.EOF: {
1068                this._emitEOFToken();
1069                break;
1070            }
1071            default: {
1072                this._emitCodePoint(cp);
1073            }
1074        }
1075    }
1076
1077    // Tag open state
1078    //------------------------------------------------------------------
1079    protected _stateTagOpen(cp: number): void {
1080        if (isAsciiLetter(cp)) {
1081            this._createStartTagToken();
1082            this.state = State.TAG_NAME;
1083            this._stateTagName(cp);
1084        } else
1085            switch (cp) {
1086                case $.EXCLAMATION_MARK: {
1087                    this.state = State.MARKUP_DECLARATION_OPEN;
1088                    break;
1089                }
1090                case $.SOLIDUS: {
1091                    this.state = State.END_TAG_OPEN;
1092                    break;
1093                }
1094                case $.QUESTION_MARK: {
1095                    this._err(ERR.unexpectedQuestionMarkInsteadOfTagName);
1096                    this._createCommentToken(1);
1097                    this.state = State.BOGUS_COMMENT;
1098                    this._stateBogusComment(cp);
1099                    break;
1100                }
1101                case $.EOF: {
1102                    this._err(ERR.eofBeforeTagName);
1103                    this._emitChars('<');
1104                    this._emitEOFToken();
1105                    break;
1106                }
1107                default: {
1108                    this._err(ERR.invalidFirstCharacterOfTagName);
1109                    this._emitChars('<');
1110                    this.state = State.DATA;
1111                    this._stateData(cp);
1112                }
1113            }
1114    }
1115
1116    // End tag open state
1117    //------------------------------------------------------------------
1118    protected _stateEndTagOpen(cp: number): void {
1119        if (isAsciiLetter(cp)) {
1120            this._createEndTagToken();
1121            this.state = State.TAG_NAME;
1122            this._stateTagName(cp);
1123        } else
1124            switch (cp) {
1125                case $.GREATER_THAN_SIGN: {
1126                    this._err(ERR.missingEndTagName);
1127                    this.state = State.DATA;
1128                    break;
1129                }
1130                case $.EOF: {
1131                    this._err(ERR.eofBeforeTagName);
1132                    this._emitChars('</');
1133                    this._emitEOFToken();
1134                    break;
1135                }
1136                default: {
1137                    this._err(ERR.invalidFirstCharacterOfTagName);
1138                    this._createCommentToken(2);
1139                    this.state = State.BOGUS_COMMENT;
1140                    this._stateBogusComment(cp);
1141                }
1142            }
1143    }
1144
1145    // Tag name state
1146    //------------------------------------------------------------------
1147    protected _stateTagName(cp: number): void {
1148        const token = this.currentToken as TagToken;
1149
1150        switch (cp) {
1151            case $.SPACE:
1152            case $.LINE_FEED:
1153            case $.TABULATION:
1154            case $.FORM_FEED: {
1155                this.state = State.BEFORE_ATTRIBUTE_NAME;
1156                break;
1157            }
1158            case $.SOLIDUS: {
1159                this.state = State.SELF_CLOSING_START_TAG;
1160                break;
1161            }
1162            case $.GREATER_THAN_SIGN: {
1163                this.state = State.DATA;
1164                this.emitCurrentTagToken();
1165                break;
1166            }
1167            case $.NULL: {
1168                this._err(ERR.unexpectedNullCharacter);
1169                token.tagName += REPLACEMENT_CHARACTER;
1170                break;
1171            }
1172            case $.EOF: {
1173                this._err(ERR.eofInTag);
1174                this._emitEOFToken();
1175                break;
1176            }
1177            default: {
1178                token.tagName += String.fromCodePoint(isAsciiUpper(cp) ? toAsciiLower(cp) : cp);
1179            }
1180        }
1181    }
1182
1183    // RCDATA less-than sign state
1184    //------------------------------------------------------------------
1185    protected _stateRcdataLessThanSign(cp: number): void {
1186        if (cp === $.SOLIDUS) {
1187            this.state = State.RCDATA_END_TAG_OPEN;
1188        } else {
1189            this._emitChars('<');
1190            this.state = State.RCDATA;
1191            this._stateRcdata(cp);
1192        }
1193    }
1194
1195    // RCDATA end tag open state
1196    //------------------------------------------------------------------
1197    protected _stateRcdataEndTagOpen(cp: number): void {
1198        if (isAsciiLetter(cp)) {
1199            this.state = State.RCDATA_END_TAG_NAME;
1200            this._stateRcdataEndTagName(cp);
1201        } else {
1202            this._emitChars('</');
1203            this.state = State.RCDATA;
1204            this._stateRcdata(cp);
1205        }
1206    }
1207
1208    protected handleSpecialEndTag(_cp: number): boolean {
1209        if (!this.preprocessor.startsWith(this.lastStartTagName, false)) {
1210            return !this._ensureHibernation();
1211        }
1212
1213        this._createEndTagToken();
1214        const token = this.currentToken as TagToken;
1215        token.tagName = this.lastStartTagName;
1216
1217        const cp = this.preprocessor.peek(this.lastStartTagName.length);
1218
1219        switch (cp) {
1220            case $.SPACE:
1221            case $.LINE_FEED:
1222            case $.TABULATION:
1223            case $.FORM_FEED: {
1224                this._advanceBy(this.lastStartTagName.length);
1225                this.state = State.BEFORE_ATTRIBUTE_NAME;
1226                return false;
1227            }
1228            case $.SOLIDUS: {
1229                this._advanceBy(this.lastStartTagName.length);
1230                this.state = State.SELF_CLOSING_START_TAG;
1231                return false;
1232            }
1233            case $.GREATER_THAN_SIGN: {
1234                this._advanceBy(this.lastStartTagName.length);
1235                this.emitCurrentTagToken();
1236                this.state = State.DATA;
1237                return false;
1238            }
1239            default: {
1240                return !this._ensureHibernation();
1241            }
1242        }
1243    }
1244
1245    // RCDATA end tag name state
1246    //------------------------------------------------------------------
1247    protected _stateRcdataEndTagName(cp: number): void {
1248        if (this.handleSpecialEndTag(cp)) {
1249            this._emitChars('</');
1250            this.state = State.RCDATA;
1251            this._stateRcdata(cp);
1252        }
1253    }
1254
1255    // RAWTEXT less-than sign state
1256    //------------------------------------------------------------------
1257    protected _stateRawtextLessThanSign(cp: number): void {
1258        if (cp === $.SOLIDUS) {
1259            this.state = State.RAWTEXT_END_TAG_OPEN;
1260        } else {
1261            this._emitChars('<');
1262            this.state = State.RAWTEXT;
1263            this._stateRawtext(cp);
1264        }
1265    }
1266
1267    // RAWTEXT end tag open state
1268    //------------------------------------------------------------------
1269    protected _stateRawtextEndTagOpen(cp: number): void {
1270        if (isAsciiLetter(cp)) {
1271            this.state = State.RAWTEXT_END_TAG_NAME;
1272            this._stateRawtextEndTagName(cp);
1273        } else {
1274            this._emitChars('</');
1275            this.state = State.RAWTEXT;
1276            this._stateRawtext(cp);
1277        }
1278    }
1279
1280    // RAWTEXT end tag name state
1281    //------------------------------------------------------------------
1282    protected _stateRawtextEndTagName(cp: number): void {
1283        if (this.handleSpecialEndTag(cp)) {
1284            this._emitChars('</');
1285            this.state = State.RAWTEXT;
1286            this._stateRawtext(cp);
1287        }
1288    }
1289
1290    // Script data less-than sign state
1291    //------------------------------------------------------------------
1292    protected _stateScriptDataLessThanSign(cp: number): void {
1293        switch (cp) {
1294            case $.SOLIDUS: {
1295                this.state = State.SCRIPT_DATA_END_TAG_OPEN;
1296                break;
1297            }
1298            case $.EXCLAMATION_MARK: {
1299                this.state = State.SCRIPT_DATA_ESCAPE_START;
1300                this._emitChars('<!');
1301                break;
1302            }
1303            default: {
1304                this._emitChars('<');
1305                this.state = State.SCRIPT_DATA;
1306                this._stateScriptData(cp);
1307            }
1308        }
1309    }
1310
1311    // Script data end tag open state
1312    //------------------------------------------------------------------
1313    protected _stateScriptDataEndTagOpen(cp: number): void {
1314        if (isAsciiLetter(cp)) {
1315            this.state = State.SCRIPT_DATA_END_TAG_NAME;
1316            this._stateScriptDataEndTagName(cp);
1317        } else {
1318            this._emitChars('</');
1319            this.state = State.SCRIPT_DATA;
1320            this._stateScriptData(cp);
1321        }
1322    }
1323
1324    // Script data end tag name state
1325    //------------------------------------------------------------------
1326    protected _stateScriptDataEndTagName(cp: number): void {
1327        if (this.handleSpecialEndTag(cp)) {
1328            this._emitChars('</');
1329            this.state = State.SCRIPT_DATA;
1330            this._stateScriptData(cp);
1331        }
1332    }
1333
1334    // Script data escape start state
1335    //------------------------------------------------------------------
1336    protected _stateScriptDataEscapeStart(cp: number): void {
1337        if (cp === $.HYPHEN_MINUS) {
1338            this.state = State.SCRIPT_DATA_ESCAPE_START_DASH;
1339            this._emitChars('-');
1340        } else {
1341            this.state = State.SCRIPT_DATA;
1342            this._stateScriptData(cp);
1343        }
1344    }
1345
1346    // Script data escape start dash state
1347    //------------------------------------------------------------------
1348    protected _stateScriptDataEscapeStartDash(cp: number): void {
1349        if (cp === $.HYPHEN_MINUS) {
1350            this.state = State.SCRIPT_DATA_ESCAPED_DASH_DASH;
1351            this._emitChars('-');
1352        } else {
1353            this.state = State.SCRIPT_DATA;
1354            this._stateScriptData(cp);
1355        }
1356    }
1357
1358    // Script data escaped state
1359    //------------------------------------------------------------------
1360    protected _stateScriptDataEscaped(cp: number): void {
1361        switch (cp) {
1362            case $.HYPHEN_MINUS: {
1363                this.state = State.SCRIPT_DATA_ESCAPED_DASH;
1364                this._emitChars('-');
1365                break;
1366            }
1367            case $.LESS_THAN_SIGN: {
1368                this.state = State.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN;
1369                break;
1370            }
1371            case $.NULL: {
1372                this._err(ERR.unexpectedNullCharacter);
1373                this._emitChars(REPLACEMENT_CHARACTER);
1374                break;
1375            }
1376            case $.EOF: {
1377                this._err(ERR.eofInScriptHtmlCommentLikeText);
1378                this._emitEOFToken();
1379                break;
1380            }
1381            default: {
1382                this._emitCodePoint(cp);
1383            }
1384        }
1385    }
1386
1387    // Script data escaped dash state
1388    //------------------------------------------------------------------
1389    protected _stateScriptDataEscapedDash(cp: number): void {
1390        switch (cp) {
1391            case $.HYPHEN_MINUS: {
1392                this.state = State.SCRIPT_DATA_ESCAPED_DASH_DASH;
1393                this._emitChars('-');
1394                break;
1395            }
1396            case $.LESS_THAN_SIGN: {
1397                this.state = State.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN;
1398                break;
1399            }
1400            case $.NULL: {
1401                this._err(ERR.unexpectedNullCharacter);
1402                this.state = State.SCRIPT_DATA_ESCAPED;
1403                this._emitChars(REPLACEMENT_CHARACTER);
1404                break;
1405            }
1406            case $.EOF: {
1407                this._err(ERR.eofInScriptHtmlCommentLikeText);
1408                this._emitEOFToken();
1409                break;
1410            }
1411            default: {
1412                this.state = State.SCRIPT_DATA_ESCAPED;
1413                this._emitCodePoint(cp);
1414            }
1415        }
1416    }
1417
1418    // Script data escaped dash dash state
1419    //------------------------------------------------------------------
1420    protected _stateScriptDataEscapedDashDash(cp: number): void {
1421        switch (cp) {
1422            case $.HYPHEN_MINUS: {
1423                this._emitChars('-');
1424                break;
1425            }
1426            case $.LESS_THAN_SIGN: {
1427                this.state = State.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN;
1428                break;
1429            }
1430            case $.GREATER_THAN_SIGN: {
1431                this.state = State.SCRIPT_DATA;
1432                this._emitChars('>');
1433                break;
1434            }
1435            case $.NULL: {
1436                this._err(ERR.unexpectedNullCharacter);
1437                this.state = State.SCRIPT_DATA_ESCAPED;
1438                this._emitChars(REPLACEMENT_CHARACTER);
1439                break;
1440            }
1441            case $.EOF: {
1442                this._err(ERR.eofInScriptHtmlCommentLikeText);
1443                this._emitEOFToken();
1444                break;
1445            }
1446            default: {
1447                this.state = State.SCRIPT_DATA_ESCAPED;
1448                this._emitCodePoint(cp);
1449            }
1450        }
1451    }
1452
1453    // Script data escaped less-than sign state
1454    //------------------------------------------------------------------
1455    protected _stateScriptDataEscapedLessThanSign(cp: number): void {
1456        if (cp === $.SOLIDUS) {
1457            this.state = State.SCRIPT_DATA_ESCAPED_END_TAG_OPEN;
1458        } else if (isAsciiLetter(cp)) {
1459            this._emitChars('<');
1460            this.state = State.SCRIPT_DATA_DOUBLE_ESCAPE_START;
1461            this._stateScriptDataDoubleEscapeStart(cp);
1462        } else {
1463            this._emitChars('<');
1464            this.state = State.SCRIPT_DATA_ESCAPED;
1465            this._stateScriptDataEscaped(cp);
1466        }
1467    }
1468
1469    // Script data escaped end tag open state
1470    //------------------------------------------------------------------
1471    protected _stateScriptDataEscapedEndTagOpen(cp: number): void {
1472        if (isAsciiLetter(cp)) {
1473            this.state = State.SCRIPT_DATA_ESCAPED_END_TAG_NAME;
1474            this._stateScriptDataEscapedEndTagName(cp);
1475        } else {
1476            this._emitChars('</');
1477            this.state = State.SCRIPT_DATA_ESCAPED;
1478            this._stateScriptDataEscaped(cp);
1479        }
1480    }
1481
1482    // Script data escaped end tag name state
1483    //------------------------------------------------------------------
1484    protected _stateScriptDataEscapedEndTagName(cp: number): void {
1485        if (this.handleSpecialEndTag(cp)) {
1486            this._emitChars('</');
1487            this.state = State.SCRIPT_DATA_ESCAPED;
1488            this._stateScriptDataEscaped(cp);
1489        }
1490    }
1491
1492    // Script data double escape start state
1493    //------------------------------------------------------------------
1494    protected _stateScriptDataDoubleEscapeStart(cp: number): void {
1495        if (
1496            this.preprocessor.startsWith($$.SCRIPT, false) &&
1497            isScriptDataDoubleEscapeSequenceEnd(this.preprocessor.peek($$.SCRIPT.length))
1498        ) {
1499            this._emitCodePoint(cp);
1500            for (let i = 0; i < $$.SCRIPT.length; i++) {
1501                this._emitCodePoint(this._consume());
1502            }
1503
1504            this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED;
1505        } else if (!this._ensureHibernation()) {
1506            this.state = State.SCRIPT_DATA_ESCAPED;
1507            this._stateScriptDataEscaped(cp);
1508        }
1509    }
1510
1511    // Script data double escaped state
1512    //------------------------------------------------------------------
1513    protected _stateScriptDataDoubleEscaped(cp: number): void {
1514        switch (cp) {
1515            case $.HYPHEN_MINUS: {
1516                this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED_DASH;
1517                this._emitChars('-');
1518                break;
1519            }
1520            case $.LESS_THAN_SIGN: {
1521                this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN;
1522                this._emitChars('<');
1523                break;
1524            }
1525            case $.NULL: {
1526                this._err(ERR.unexpectedNullCharacter);
1527                this._emitChars(REPLACEMENT_CHARACTER);
1528                break;
1529            }
1530            case $.EOF: {
1531                this._err(ERR.eofInScriptHtmlCommentLikeText);
1532                this._emitEOFToken();
1533                break;
1534            }
1535            default: {
1536                this._emitCodePoint(cp);
1537            }
1538        }
1539    }
1540
1541    // Script data double escaped dash state
1542    //------------------------------------------------------------------
1543    protected _stateScriptDataDoubleEscapedDash(cp: number): void {
1544        switch (cp) {
1545            case $.HYPHEN_MINUS: {
1546                this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH;
1547                this._emitChars('-');
1548                break;
1549            }
1550            case $.LESS_THAN_SIGN: {
1551                this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN;
1552                this._emitChars('<');
1553                break;
1554            }
1555            case $.NULL: {
1556                this._err(ERR.unexpectedNullCharacter);
1557                this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED;
1558                this._emitChars(REPLACEMENT_CHARACTER);
1559                break;
1560            }
1561            case $.EOF: {
1562                this._err(ERR.eofInScriptHtmlCommentLikeText);
1563                this._emitEOFToken();
1564                break;
1565            }
1566            default: {
1567                this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED;
1568                this._emitCodePoint(cp);
1569            }
1570        }
1571    }
1572
1573    // Script data double escaped dash dash state
1574    //------------------------------------------------------------------
1575    protected _stateScriptDataDoubleEscapedDashDash(cp: number): void {
1576        switch (cp) {
1577            case $.HYPHEN_MINUS: {
1578                this._emitChars('-');
1579                break;
1580            }
1581            case $.LESS_THAN_SIGN: {
1582                this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN;
1583                this._emitChars('<');
1584                break;
1585            }
1586            case $.GREATER_THAN_SIGN: {
1587                this.state = State.SCRIPT_DATA;
1588                this._emitChars('>');
1589                break;
1590            }
1591            case $.NULL: {
1592                this._err(ERR.unexpectedNullCharacter);
1593                this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED;
1594                this._emitChars(REPLACEMENT_CHARACTER);
1595                break;
1596            }
1597            case $.EOF: {
1598                this._err(ERR.eofInScriptHtmlCommentLikeText);
1599                this._emitEOFToken();
1600                break;
1601            }
1602            default: {
1603                this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED;
1604                this._emitCodePoint(cp);
1605            }
1606        }
1607    }
1608
1609    // Script data double escaped less-than sign state
1610    //------------------------------------------------------------------
1611    protected _stateScriptDataDoubleEscapedLessThanSign(cp: number): void {
1612        if (cp === $.SOLIDUS) {
1613            this.state = State.SCRIPT_DATA_DOUBLE_ESCAPE_END;
1614            this._emitChars('/');
1615        } else {
1616            this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED;
1617            this._stateScriptDataDoubleEscaped(cp);
1618        }
1619    }
1620
1621    // Script data double escape end state
1622    //------------------------------------------------------------------
1623    protected _stateScriptDataDoubleEscapeEnd(cp: number): void {
1624        if (
1625            this.preprocessor.startsWith($$.SCRIPT, false) &&
1626            isScriptDataDoubleEscapeSequenceEnd(this.preprocessor.peek($$.SCRIPT.length))
1627        ) {
1628            this._emitCodePoint(cp);
1629            for (let i = 0; i < $$.SCRIPT.length; i++) {
1630                this._emitCodePoint(this._consume());
1631            }
1632
1633            this.state = State.SCRIPT_DATA_ESCAPED;
1634        } else if (!this._ensureHibernation()) {
1635            this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED;
1636            this._stateScriptDataDoubleEscaped(cp);
1637        }
1638    }
1639
1640    // Before attribute name state
1641    //------------------------------------------------------------------
1642    protected _stateBeforeAttributeName(cp: number): void {
1643        switch (cp) {
1644            case $.SPACE:
1645            case $.LINE_FEED:
1646            case $.TABULATION:
1647            case $.FORM_FEED: {
1648                // Ignore whitespace
1649                break;
1650            }
1651            case $.SOLIDUS:
1652            case $.GREATER_THAN_SIGN:
1653            case $.EOF: {
1654                this.state = State.AFTER_ATTRIBUTE_NAME;
1655                this._stateAfterAttributeName(cp);
1656                break;
1657            }
1658            case $.EQUALS_SIGN: {
1659                this._err(ERR.unexpectedEqualsSignBeforeAttributeName);
1660                this._createAttr('=');
1661                this.state = State.ATTRIBUTE_NAME;
1662                break;
1663            }
1664            default: {
1665                this._createAttr('');
1666                this.state = State.ATTRIBUTE_NAME;
1667                this._stateAttributeName(cp);
1668            }
1669        }
1670    }
1671
1672    // Attribute name state
1673    //------------------------------------------------------------------
1674    protected _stateAttributeName(cp: number): void {
1675        switch (cp) {
1676            case $.SPACE:
1677            case $.LINE_FEED:
1678            case $.TABULATION:
1679            case $.FORM_FEED:
1680            case $.SOLIDUS:
1681            case $.GREATER_THAN_SIGN:
1682            case $.EOF: {
1683                this._leaveAttrName();
1684                this.state = State.AFTER_ATTRIBUTE_NAME;
1685                this._stateAfterAttributeName(cp);
1686                break;
1687            }
1688            case $.EQUALS_SIGN: {
1689                this._leaveAttrName();
1690                this.state = State.BEFORE_ATTRIBUTE_VALUE;
1691                break;
1692            }
1693            case $.QUOTATION_MARK:
1694            case $.APOSTROPHE:
1695            case $.LESS_THAN_SIGN: {
1696                this._err(ERR.unexpectedCharacterInAttributeName);
1697                this.currentAttr.name += String.fromCodePoint(cp);
1698                break;
1699            }
1700            case $.NULL: {
1701                this._err(ERR.unexpectedNullCharacter);
1702                this.currentAttr.name += REPLACEMENT_CHARACTER;
1703                break;
1704            }
1705            default: {
1706                this.currentAttr.name += String.fromCodePoint(isAsciiUpper(cp) ? toAsciiLower(cp) : cp);
1707            }
1708        }
1709    }
1710
1711    // After attribute name state
1712    //------------------------------------------------------------------
1713    protected _stateAfterAttributeName(cp: number): void {
1714        switch (cp) {
1715            case $.SPACE:
1716            case $.LINE_FEED:
1717            case $.TABULATION:
1718            case $.FORM_FEED: {
1719                // Ignore whitespace
1720                break;
1721            }
1722            case $.SOLIDUS: {
1723                this.state = State.SELF_CLOSING_START_TAG;
1724                break;
1725            }
1726            case $.EQUALS_SIGN: {
1727                this.state = State.BEFORE_ATTRIBUTE_VALUE;
1728                break;
1729            }
1730            case $.GREATER_THAN_SIGN: {
1731                this.state = State.DATA;
1732                this.emitCurrentTagToken();
1733                break;
1734            }
1735            case $.EOF: {
1736                this._err(ERR.eofInTag);
1737                this._emitEOFToken();
1738                break;
1739            }
1740            default: {
1741                this._createAttr('');
1742                this.state = State.ATTRIBUTE_NAME;
1743                this._stateAttributeName(cp);
1744            }
1745        }
1746    }
1747
1748    // Before attribute value state
1749    //------------------------------------------------------------------
1750    protected _stateBeforeAttributeValue(cp: number): void {
1751        switch (cp) {
1752            case $.SPACE:
1753            case $.LINE_FEED:
1754            case $.TABULATION:
1755            case $.FORM_FEED: {
1756                // Ignore whitespace
1757                break;
1758            }
1759            case $.QUOTATION_MARK: {
1760                this.state = State.ATTRIBUTE_VALUE_DOUBLE_QUOTED;
1761                break;
1762            }
1763            case $.APOSTROPHE: {
1764                this.state = State.ATTRIBUTE_VALUE_SINGLE_QUOTED;
1765                break;
1766            }
1767            case $.GREATER_THAN_SIGN: {
1768                this._err(ERR.missingAttributeValue);
1769                this.state = State.DATA;
1770                this.emitCurrentTagToken();
1771                break;
1772            }
1773            default: {
1774                this.state = State.ATTRIBUTE_VALUE_UNQUOTED;
1775                this._stateAttributeValueUnquoted(cp);
1776            }
1777        }
1778    }
1779
1780    // Attribute value (double-quoted) state
1781    //------------------------------------------------------------------
1782    protected _stateAttributeValueDoubleQuoted(cp: number): void {
1783        switch (cp) {
1784            case $.QUOTATION_MARK: {
1785                this.state = State.AFTER_ATTRIBUTE_VALUE_QUOTED;
1786                break;
1787            }
1788            case $.AMPERSAND: {
1789                this._startCharacterReference();
1790                break;
1791            }
1792            case $.NULL: {
1793                this._err(ERR.unexpectedNullCharacter);
1794                this.currentAttr.value += REPLACEMENT_CHARACTER;
1795                break;
1796            }
1797            case $.EOF: {
1798                this._err(ERR.eofInTag);
1799                this._emitEOFToken();
1800                break;
1801            }
1802            default: {
1803                this.currentAttr.value += String.fromCodePoint(cp);
1804            }
1805        }
1806    }
1807
1808    // Attribute value (single-quoted) state
1809    //------------------------------------------------------------------
1810    protected _stateAttributeValueSingleQuoted(cp: number): void {
1811        switch (cp) {
1812            case $.APOSTROPHE: {
1813                this.state = State.AFTER_ATTRIBUTE_VALUE_QUOTED;
1814                break;
1815            }
1816            case $.AMPERSAND: {
1817                this._startCharacterReference();
1818                break;
1819            }
1820            case $.NULL: {
1821                this._err(ERR.unexpectedNullCharacter);
1822                this.currentAttr.value += REPLACEMENT_CHARACTER;
1823                break;
1824            }
1825            case $.EOF: {
1826                this._err(ERR.eofInTag);
1827                this._emitEOFToken();
1828                break;
1829            }
1830            default: {
1831                this.currentAttr.value += String.fromCodePoint(cp);
1832            }
1833        }
1834    }
1835
1836    // Attribute value (unquoted) state
1837    //------------------------------------------------------------------
1838    protected _stateAttributeValueUnquoted(cp: number): void {
1839        switch (cp) {
1840            case $.SPACE:
1841            case $.LINE_FEED:
1842            case $.TABULATION:
1843            case $.FORM_FEED: {
1844                this._leaveAttrValue();
1845                this.state = State.BEFORE_ATTRIBUTE_NAME;
1846                break;
1847            }
1848            case $.AMPERSAND: {
1849                this._startCharacterReference();
1850                break;
1851            }
1852            case $.GREATER_THAN_SIGN: {
1853                this._leaveAttrValue();
1854                this.state = State.DATA;
1855                this.emitCurrentTagToken();
1856                break;
1857            }
1858            case $.NULL: {
1859                this._err(ERR.unexpectedNullCharacter);
1860                this.currentAttr.value += REPLACEMENT_CHARACTER;
1861                break;
1862            }
1863            case $.QUOTATION_MARK:
1864            case $.APOSTROPHE:
1865            case $.LESS_THAN_SIGN:
1866            case $.EQUALS_SIGN:
1867            case $.GRAVE_ACCENT: {
1868                this._err(ERR.unexpectedCharacterInUnquotedAttributeValue);
1869                this.currentAttr.value += String.fromCodePoint(cp);
1870                break;
1871            }
1872            case $.EOF: {
1873                this._err(ERR.eofInTag);
1874                this._emitEOFToken();
1875                break;
1876            }
1877            default: {
1878                this.currentAttr.value += String.fromCodePoint(cp);
1879            }
1880        }
1881    }
1882
1883    // After attribute value (quoted) state
1884    //------------------------------------------------------------------
1885    protected _stateAfterAttributeValueQuoted(cp: number): void {
1886        switch (cp) {
1887            case $.SPACE:
1888            case $.LINE_FEED:
1889            case $.TABULATION:
1890            case $.FORM_FEED: {
1891                this._leaveAttrValue();
1892                this.state = State.BEFORE_ATTRIBUTE_NAME;
1893                break;
1894            }
1895            case $.SOLIDUS: {
1896                this._leaveAttrValue();
1897                this.state = State.SELF_CLOSING_START_TAG;
1898                break;
1899            }
1900            case $.GREATER_THAN_SIGN: {
1901                this._leaveAttrValue();
1902                this.state = State.DATA;
1903                this.emitCurrentTagToken();
1904                break;
1905            }
1906            case $.EOF: {
1907                this._err(ERR.eofInTag);
1908                this._emitEOFToken();
1909                break;
1910            }
1911            default: {
1912                this._err(ERR.missingWhitespaceBetweenAttributes);
1913                this.state = State.BEFORE_ATTRIBUTE_NAME;
1914                this._stateBeforeAttributeName(cp);
1915            }
1916        }
1917    }
1918
1919    // Self-closing start tag state
1920    //------------------------------------------------------------------
1921    protected _stateSelfClosingStartTag(cp: number): void {
1922        switch (cp) {
1923            case $.GREATER_THAN_SIGN: {
1924                const token = this.currentToken as TagToken;
1925                token.selfClosing = true;
1926                this.state = State.DATA;
1927                this.emitCurrentTagToken();
1928                break;
1929            }
1930            case $.EOF: {
1931                this._err(ERR.eofInTag);
1932                this._emitEOFToken();
1933                break;
1934            }
1935            default: {
1936                this._err(ERR.unexpectedSolidusInTag);
1937                this.state = State.BEFORE_ATTRIBUTE_NAME;
1938                this._stateBeforeAttributeName(cp);
1939            }
1940        }
1941    }
1942
1943    // Bogus comment state
1944    //------------------------------------------------------------------
1945    protected _stateBogusComment(cp: number): void {
1946        const token = this.currentToken as CommentToken;
1947
1948        switch (cp) {
1949            case $.GREATER_THAN_SIGN: {
1950                this.state = State.DATA;
1951                this.emitCurrentComment(token);
1952                break;
1953            }
1954            case $.EOF: {
1955                this.emitCurrentComment(token);
1956                this._emitEOFToken();
1957                break;
1958            }
1959            case $.NULL: {
1960                this._err(ERR.unexpectedNullCharacter);
1961                token.data += REPLACEMENT_CHARACTER;
1962                break;
1963            }
1964            default: {
1965                token.data += String.fromCodePoint(cp);
1966            }
1967        }
1968    }
1969
1970    // Markup declaration open state
1971    //------------------------------------------------------------------
1972    protected _stateMarkupDeclarationOpen(cp: number): void {
1973        if (this._consumeSequenceIfMatch($$.DASH_DASH, true)) {
1974            this._createCommentToken($$.DASH_DASH.length + 1);
1975            this.state = State.COMMENT_START;
1976        } else if (this._consumeSequenceIfMatch($$.DOCTYPE, false)) {
1977            // NOTE: Doctypes tokens are created without fixed offsets. We keep track of the moment a doctype *might* start here.
1978            this.currentLocation = this.getCurrentLocation($$.DOCTYPE.length + 1);
1979            this.state = State.DOCTYPE;
1980        } else if (this._consumeSequenceIfMatch($$.CDATA_START, true)) {
1981            if (this.inForeignNode) {
1982                this.state = State.CDATA_SECTION;
1983            } else {
1984                this._err(ERR.cdataInHtmlContent);
1985                this._createCommentToken($$.CDATA_START.length + 1);
1986                (this.currentToken as CommentToken).data = '[CDATA[';
1987                this.state = State.BOGUS_COMMENT;
1988            }
1989        }
1990
1991        //NOTE: Sequence lookups can be abrupted by hibernation. In that case, lookup
1992        //results are no longer valid and we will need to start over.
1993        else if (!this._ensureHibernation()) {
1994            this._err(ERR.incorrectlyOpenedComment);
1995            this._createCommentToken(2);
1996            this.state = State.BOGUS_COMMENT;
1997            this._stateBogusComment(cp);
1998        }
1999    }
2000
2001    // Comment start state
2002    //------------------------------------------------------------------
2003    protected _stateCommentStart(cp: number): void {
2004        switch (cp) {
2005            case $.HYPHEN_MINUS: {
2006                this.state = State.COMMENT_START_DASH;
2007                break;
2008            }
2009            case $.GREATER_THAN_SIGN: {
2010                this._err(ERR.abruptClosingOfEmptyComment);
2011                this.state = State.DATA;
2012                const token = this.currentToken as CommentToken;
2013                this.emitCurrentComment(token);
2014                break;
2015            }
2016            default: {
2017                this.state = State.COMMENT;
2018                this._stateComment(cp);
2019            }
2020        }
2021    }
2022
2023    // Comment start dash state
2024    //------------------------------------------------------------------
2025    protected _stateCommentStartDash(cp: number): void {
2026        const token = this.currentToken as CommentToken;
2027        switch (cp) {
2028            case $.HYPHEN_MINUS: {
2029                this.state = State.COMMENT_END;
2030                break;
2031            }
2032            case $.GREATER_THAN_SIGN: {
2033                this._err(ERR.abruptClosingOfEmptyComment);
2034                this.state = State.DATA;
2035                this.emitCurrentComment(token);
2036                break;
2037            }
2038            case $.EOF: {
2039                this._err(ERR.eofInComment);
2040                this.emitCurrentComment(token);
2041                this._emitEOFToken();
2042                break;
2043            }
2044            default: {
2045                token.data += '-';
2046                this.state = State.COMMENT;
2047                this._stateComment(cp);
2048            }
2049        }
2050    }
2051
2052    // Comment state
2053    //------------------------------------------------------------------
2054    protected _stateComment(cp: number): void {
2055        const token = this.currentToken as CommentToken;
2056
2057        switch (cp) {
2058            case $.HYPHEN_MINUS: {
2059                this.state = State.COMMENT_END_DASH;
2060                break;
2061            }
2062            case $.LESS_THAN_SIGN: {
2063                token.data += '<';
2064                this.state = State.COMMENT_LESS_THAN_SIGN;
2065                break;
2066            }
2067            case $.NULL: {
2068                this._err(ERR.unexpectedNullCharacter);
2069                token.data += REPLACEMENT_CHARACTER;
2070                break;
2071            }
2072            case $.EOF: {
2073                this._err(ERR.eofInComment);
2074                this.emitCurrentComment(token);
2075                this._emitEOFToken();
2076                break;
2077            }
2078            default: {
2079                token.data += String.fromCodePoint(cp);
2080            }
2081        }
2082    }
2083
2084    // Comment less-than sign state
2085    //------------------------------------------------------------------
2086    protected _stateCommentLessThanSign(cp: number): void {
2087        const token = this.currentToken as CommentToken;
2088
2089        switch (cp) {
2090            case $.EXCLAMATION_MARK: {
2091                token.data += '!';
2092                this.state = State.COMMENT_LESS_THAN_SIGN_BANG;
2093                break;
2094            }
2095            case $.LESS_THAN_SIGN: {
2096                token.data += '<';
2097                break;
2098            }
2099            default: {
2100                this.state = State.COMMENT;
2101                this._stateComment(cp);
2102            }
2103        }
2104    }
2105
2106    // Comment less-than sign bang state
2107    //------------------------------------------------------------------
2108    protected _stateCommentLessThanSignBang(cp: number): void {
2109        if (cp === $.HYPHEN_MINUS) {
2110            this.state = State.COMMENT_LESS_THAN_SIGN_BANG_DASH;
2111        } else {
2112            this.state = State.COMMENT;
2113            this._stateComment(cp);
2114        }
2115    }
2116
2117    // Comment less-than sign bang dash state
2118    //------------------------------------------------------------------
2119    protected _stateCommentLessThanSignBangDash(cp: number): void {
2120        if (cp === $.HYPHEN_MINUS) {
2121            this.state = State.COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH;
2122        } else {
2123            this.state = State.COMMENT_END_DASH;
2124            this._stateCommentEndDash(cp);
2125        }
2126    }
2127
2128    // Comment less-than sign bang dash dash state
2129    //------------------------------------------------------------------
2130    protected _stateCommentLessThanSignBangDashDash(cp: number): void {
2131        if (cp !== $.GREATER_THAN_SIGN && cp !== $.EOF) {
2132            this._err(ERR.nestedComment);
2133        }
2134
2135        this.state = State.COMMENT_END;
2136        this._stateCommentEnd(cp);
2137    }
2138
2139    // Comment end dash state
2140    //------------------------------------------------------------------
2141    protected _stateCommentEndDash(cp: number): void {
2142        const token = this.currentToken as CommentToken;
2143        switch (cp) {
2144            case $.HYPHEN_MINUS: {
2145                this.state = State.COMMENT_END;
2146                break;
2147            }
2148            case $.EOF: {
2149                this._err(ERR.eofInComment);
2150                this.emitCurrentComment(token);
2151                this._emitEOFToken();
2152                break;
2153            }
2154            default: {
2155                token.data += '-';
2156                this.state = State.COMMENT;
2157                this._stateComment(cp);
2158            }
2159        }
2160    }
2161
2162    // Comment end state
2163    //------------------------------------------------------------------
2164    protected _stateCommentEnd(cp: number): void {
2165        const token = this.currentToken as CommentToken;
2166
2167        switch (cp) {
2168            case $.GREATER_THAN_SIGN: {
2169                this.state = State.DATA;
2170                this.emitCurrentComment(token);
2171                break;
2172            }
2173            case $.EXCLAMATION_MARK: {
2174                this.state = State.COMMENT_END_BANG;
2175                break;
2176            }
2177            case $.HYPHEN_MINUS: {
2178                token.data += '-';
2179                break;
2180            }
2181            case $.EOF: {
2182                this._err(ERR.eofInComment);
2183                this.emitCurrentComment(token);
2184                this._emitEOFToken();
2185                break;
2186            }
2187            default: {
2188                token.data += '--';
2189                this.state = State.COMMENT;
2190                this._stateComment(cp);
2191            }
2192        }
2193    }
2194
2195    // Comment end bang state
2196    //------------------------------------------------------------------
2197    protected _stateCommentEndBang(cp: number): void {
2198        const token = this.currentToken as CommentToken;
2199
2200        switch (cp) {
2201            case $.HYPHEN_MINUS: {
2202                token.data += '--!';
2203                this.state = State.COMMENT_END_DASH;
2204                break;
2205            }
2206            case $.GREATER_THAN_SIGN: {
2207                this._err(ERR.incorrectlyClosedComment);
2208                this.state = State.DATA;
2209                this.emitCurrentComment(token);
2210                break;
2211            }
2212            case $.EOF: {
2213                this._err(ERR.eofInComment);
2214                this.emitCurrentComment(token);
2215                this._emitEOFToken();
2216                break;
2217            }
2218            default: {
2219                token.data += '--!';
2220                this.state = State.COMMENT;
2221                this._stateComment(cp);
2222            }
2223        }
2224    }
2225
2226    // DOCTYPE state
2227    //------------------------------------------------------------------
2228    protected _stateDoctype(cp: number): void {
2229        switch (cp) {
2230            case $.SPACE:
2231            case $.LINE_FEED:
2232            case $.TABULATION:
2233            case $.FORM_FEED: {
2234                this.state = State.BEFORE_DOCTYPE_NAME;
2235                break;
2236            }
2237            case $.GREATER_THAN_SIGN: {
2238                this.state = State.BEFORE_DOCTYPE_NAME;
2239                this._stateBeforeDoctypeName(cp);
2240                break;
2241            }
2242            case $.EOF: {
2243                this._err(ERR.eofInDoctype);
2244                this._createDoctypeToken(null);
2245                const token = this.currentToken as DoctypeToken;
2246                token.forceQuirks = true;
2247                this.emitCurrentDoctype(token);
2248                this._emitEOFToken();
2249                break;
2250            }
2251            default: {
2252                this._err(ERR.missingWhitespaceBeforeDoctypeName);
2253                this.state = State.BEFORE_DOCTYPE_NAME;
2254                this._stateBeforeDoctypeName(cp);
2255            }
2256        }
2257    }
2258
2259    // Before DOCTYPE name state
2260    //------------------------------------------------------------------
2261    protected _stateBeforeDoctypeName(cp: number): void {
2262        if (isAsciiUpper(cp)) {
2263            this._createDoctypeToken(String.fromCharCode(toAsciiLower(cp)));
2264            this.state = State.DOCTYPE_NAME;
2265        } else
2266            switch (cp) {
2267                case $.SPACE:
2268                case $.LINE_FEED:
2269                case $.TABULATION:
2270                case $.FORM_FEED: {
2271                    // Ignore whitespace
2272                    break;
2273                }
2274                case $.NULL: {
2275                    this._err(ERR.unexpectedNullCharacter);
2276                    this._createDoctypeToken(REPLACEMENT_CHARACTER);
2277                    this.state = State.DOCTYPE_NAME;
2278                    break;
2279                }
2280                case $.GREATER_THAN_SIGN: {
2281                    this._err(ERR.missingDoctypeName);
2282                    this._createDoctypeToken(null);
2283                    const token = this.currentToken as DoctypeToken;
2284                    token.forceQuirks = true;
2285                    this.emitCurrentDoctype(token);
2286                    this.state = State.DATA;
2287                    break;
2288                }
2289                case $.EOF: {
2290                    this._err(ERR.eofInDoctype);
2291                    this._createDoctypeToken(null);
2292                    const token = this.currentToken as DoctypeToken;
2293                    token.forceQuirks = true;
2294                    this.emitCurrentDoctype(token);
2295                    this._emitEOFToken();
2296                    break;
2297                }
2298                default: {
2299                    this._createDoctypeToken(String.fromCodePoint(cp));
2300                    this.state = State.DOCTYPE_NAME;
2301                }
2302            }
2303    }
2304
2305    // DOCTYPE name state
2306    //------------------------------------------------------------------
2307    protected _stateDoctypeName(cp: number): void {
2308        const token = this.currentToken as DoctypeToken;
2309
2310        switch (cp) {
2311            case $.SPACE:
2312            case $.LINE_FEED:
2313            case $.TABULATION:
2314            case $.FORM_FEED: {
2315                this.state = State.AFTER_DOCTYPE_NAME;
2316                break;
2317            }
2318            case $.GREATER_THAN_SIGN: {
2319                this.state = State.DATA;
2320                this.emitCurrentDoctype(token);
2321                break;
2322            }
2323            case $.NULL: {
2324                this._err(ERR.unexpectedNullCharacter);
2325                token.name += REPLACEMENT_CHARACTER;
2326                break;
2327            }
2328            case $.EOF: {
2329                this._err(ERR.eofInDoctype);
2330                token.forceQuirks = true;
2331                this.emitCurrentDoctype(token);
2332                this._emitEOFToken();
2333                break;
2334            }
2335            default: {
2336                token.name += String.fromCodePoint(isAsciiUpper(cp) ? toAsciiLower(cp) : cp);
2337            }
2338        }
2339    }
2340
2341    // After DOCTYPE name state
2342    //------------------------------------------------------------------
2343    protected _stateAfterDoctypeName(cp: number): void {
2344        const token = this.currentToken as DoctypeToken;
2345
2346        switch (cp) {
2347            case $.SPACE:
2348            case $.LINE_FEED:
2349            case $.TABULATION:
2350            case $.FORM_FEED: {
2351                // Ignore whitespace
2352                break;
2353            }
2354            case $.GREATER_THAN_SIGN: {
2355                this.state = State.DATA;
2356                this.emitCurrentDoctype(token);
2357                break;
2358            }
2359            case $.EOF: {
2360                this._err(ERR.eofInDoctype);
2361                token.forceQuirks = true;
2362                this.emitCurrentDoctype(token);
2363                this._emitEOFToken();
2364                break;
2365            }
2366            default: {
2367                if (this._consumeSequenceIfMatch($$.PUBLIC, false)) {
2368                    this.state = State.AFTER_DOCTYPE_PUBLIC_KEYWORD;
2369                } else if (this._consumeSequenceIfMatch($$.SYSTEM, false)) {
2370                    this.state = State.AFTER_DOCTYPE_SYSTEM_KEYWORD;
2371                }
2372                //NOTE: sequence lookup can be abrupted by hibernation. In that case lookup
2373                //results are no longer valid and we will need to start over.
2374                else if (!this._ensureHibernation()) {
2375                    this._err(ERR.invalidCharacterSequenceAfterDoctypeName);
2376                    token.forceQuirks = true;
2377                    this.state = State.BOGUS_DOCTYPE;
2378                    this._stateBogusDoctype(cp);
2379                }
2380            }
2381        }
2382    }
2383
2384    // After DOCTYPE public keyword state
2385    //------------------------------------------------------------------
2386    protected _stateAfterDoctypePublicKeyword(cp: number): void {
2387        const token = this.currentToken as DoctypeToken;
2388
2389        switch (cp) {
2390            case $.SPACE:
2391            case $.LINE_FEED:
2392            case $.TABULATION:
2393            case $.FORM_FEED: {
2394                this.state = State.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER;
2395                break;
2396            }
2397            case $.QUOTATION_MARK: {
2398                this._err(ERR.missingWhitespaceAfterDoctypePublicKeyword);
2399                token.publicId = '';
2400                this.state = State.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED;
2401                break;
2402            }
2403            case $.APOSTROPHE: {
2404                this._err(ERR.missingWhitespaceAfterDoctypePublicKeyword);
2405                token.publicId = '';
2406                this.state = State.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED;
2407                break;
2408            }
2409            case $.GREATER_THAN_SIGN: {
2410                this._err(ERR.missingDoctypePublicIdentifier);
2411                token.forceQuirks = true;
2412                this.state = State.DATA;
2413                this.emitCurrentDoctype(token);
2414                break;
2415            }
2416            case $.EOF: {
2417                this._err(ERR.eofInDoctype);
2418                token.forceQuirks = true;
2419                this.emitCurrentDoctype(token);
2420                this._emitEOFToken();
2421                break;
2422            }
2423            default: {
2424                this._err(ERR.missingQuoteBeforeDoctypePublicIdentifier);
2425                token.forceQuirks = true;
2426                this.state = State.BOGUS_DOCTYPE;
2427                this._stateBogusDoctype(cp);
2428            }
2429        }
2430    }
2431
2432    // Before DOCTYPE public identifier state
2433    //------------------------------------------------------------------
2434    protected _stateBeforeDoctypePublicIdentifier(cp: number): void {
2435        const token = this.currentToken as DoctypeToken;
2436
2437        switch (cp) {
2438            case $.SPACE:
2439            case $.LINE_FEED:
2440            case $.TABULATION:
2441            case $.FORM_FEED: {
2442                // Ignore whitespace
2443                break;
2444            }
2445            case $.QUOTATION_MARK: {
2446                token.publicId = '';
2447                this.state = State.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED;
2448                break;
2449            }
2450            case $.APOSTROPHE: {
2451                token.publicId = '';
2452                this.state = State.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED;
2453                break;
2454            }
2455            case $.GREATER_THAN_SIGN: {
2456                this._err(ERR.missingDoctypePublicIdentifier);
2457                token.forceQuirks = true;
2458                this.state = State.DATA;
2459                this.emitCurrentDoctype(token);
2460                break;
2461            }
2462            case $.EOF: {
2463                this._err(ERR.eofInDoctype);
2464                token.forceQuirks = true;
2465                this.emitCurrentDoctype(token);
2466                this._emitEOFToken();
2467                break;
2468            }
2469            default: {
2470                this._err(ERR.missingQuoteBeforeDoctypePublicIdentifier);
2471                token.forceQuirks = true;
2472                this.state = State.BOGUS_DOCTYPE;
2473                this._stateBogusDoctype(cp);
2474            }
2475        }
2476    }
2477
2478    // DOCTYPE public identifier (double-quoted) state
2479    //------------------------------------------------------------------
2480    protected _stateDoctypePublicIdentifierDoubleQuoted(cp: number): void {
2481        const token = this.currentToken as DoctypeToken;
2482
2483        switch (cp) {
2484            case $.QUOTATION_MARK: {
2485                this.state = State.AFTER_DOCTYPE_PUBLIC_IDENTIFIER;
2486                break;
2487            }
2488            case $.NULL: {
2489                this._err(ERR.unexpectedNullCharacter);
2490                token.publicId += REPLACEMENT_CHARACTER;
2491                break;
2492            }
2493            case $.GREATER_THAN_SIGN: {
2494                this._err(ERR.abruptDoctypePublicIdentifier);
2495                token.forceQuirks = true;
2496                this.emitCurrentDoctype(token);
2497                this.state = State.DATA;
2498                break;
2499            }
2500            case $.EOF: {
2501                this._err(ERR.eofInDoctype);
2502                token.forceQuirks = true;
2503                this.emitCurrentDoctype(token);
2504                this._emitEOFToken();
2505                break;
2506            }
2507            default: {
2508                token.publicId += String.fromCodePoint(cp);
2509            }
2510        }
2511    }
2512
2513    // DOCTYPE public identifier (single-quoted) state
2514    //------------------------------------------------------------------
2515    protected _stateDoctypePublicIdentifierSingleQuoted(cp: number): void {
2516        const token = this.currentToken as DoctypeToken;
2517
2518        switch (cp) {
2519            case $.APOSTROPHE: {
2520                this.state = State.AFTER_DOCTYPE_PUBLIC_IDENTIFIER;
2521                break;
2522            }
2523            case $.NULL: {
2524                this._err(ERR.unexpectedNullCharacter);
2525                token.publicId += REPLACEMENT_CHARACTER;
2526                break;
2527            }
2528            case $.GREATER_THAN_SIGN: {
2529                this._err(ERR.abruptDoctypePublicIdentifier);
2530                token.forceQuirks = true;
2531                this.emitCurrentDoctype(token);
2532                this.state = State.DATA;
2533                break;
2534            }
2535            case $.EOF: {
2536                this._err(ERR.eofInDoctype);
2537                token.forceQuirks = true;
2538                this.emitCurrentDoctype(token);
2539                this._emitEOFToken();
2540                break;
2541            }
2542            default: {
2543                token.publicId += String.fromCodePoint(cp);
2544            }
2545        }
2546    }
2547
2548    // After DOCTYPE public identifier state
2549    //------------------------------------------------------------------
2550    protected _stateAfterDoctypePublicIdentifier(cp: number): void {
2551        const token = this.currentToken as DoctypeToken;
2552
2553        switch (cp) {
2554            case $.SPACE:
2555            case $.LINE_FEED:
2556            case $.TABULATION:
2557            case $.FORM_FEED: {
2558                this.state = State.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS;
2559                break;
2560            }
2561            case $.GREATER_THAN_SIGN: {
2562                this.state = State.DATA;
2563                this.emitCurrentDoctype(token);
2564                break;
2565            }
2566            case $.QUOTATION_MARK: {
2567                this._err(ERR.missingWhitespaceBetweenDoctypePublicAndSystemIdentifiers);
2568                token.systemId = '';
2569                this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
2570                break;
2571            }
2572            case $.APOSTROPHE: {
2573                this._err(ERR.missingWhitespaceBetweenDoctypePublicAndSystemIdentifiers);
2574                token.systemId = '';
2575                this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
2576                break;
2577            }
2578            case $.EOF: {
2579                this._err(ERR.eofInDoctype);
2580                token.forceQuirks = true;
2581                this.emitCurrentDoctype(token);
2582                this._emitEOFToken();
2583                break;
2584            }
2585            default: {
2586                this._err(ERR.missingQuoteBeforeDoctypeSystemIdentifier);
2587                token.forceQuirks = true;
2588                this.state = State.BOGUS_DOCTYPE;
2589                this._stateBogusDoctype(cp);
2590            }
2591        }
2592    }
2593
2594    // Between DOCTYPE public and system identifiers state
2595    //------------------------------------------------------------------
2596    protected _stateBetweenDoctypePublicAndSystemIdentifiers(cp: number): void {
2597        const token = this.currentToken as DoctypeToken;
2598
2599        switch (cp) {
2600            case $.SPACE:
2601            case $.LINE_FEED:
2602            case $.TABULATION:
2603            case $.FORM_FEED: {
2604                // Ignore whitespace
2605                break;
2606            }
2607            case $.GREATER_THAN_SIGN: {
2608                this.emitCurrentDoctype(token);
2609                this.state = State.DATA;
2610                break;
2611            }
2612            case $.QUOTATION_MARK: {
2613                token.systemId = '';
2614                this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
2615                break;
2616            }
2617            case $.APOSTROPHE: {
2618                token.systemId = '';
2619                this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
2620                break;
2621            }
2622            case $.EOF: {
2623                this._err(ERR.eofInDoctype);
2624                token.forceQuirks = true;
2625                this.emitCurrentDoctype(token);
2626                this._emitEOFToken();
2627                break;
2628            }
2629            default: {
2630                this._err(ERR.missingQuoteBeforeDoctypeSystemIdentifier);
2631                token.forceQuirks = true;
2632                this.state = State.BOGUS_DOCTYPE;
2633                this._stateBogusDoctype(cp);
2634            }
2635        }
2636    }
2637
2638    // After DOCTYPE system keyword state
2639    //------------------------------------------------------------------
2640    protected _stateAfterDoctypeSystemKeyword(cp: number): void {
2641        const token = this.currentToken as DoctypeToken;
2642
2643        switch (cp) {
2644            case $.SPACE:
2645            case $.LINE_FEED:
2646            case $.TABULATION:
2647            case $.FORM_FEED: {
2648                this.state = State.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER;
2649                break;
2650            }
2651            case $.QUOTATION_MARK: {
2652                this._err(ERR.missingWhitespaceAfterDoctypeSystemKeyword);
2653                token.systemId = '';
2654                this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
2655                break;
2656            }
2657            case $.APOSTROPHE: {
2658                this._err(ERR.missingWhitespaceAfterDoctypeSystemKeyword);
2659                token.systemId = '';
2660                this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
2661                break;
2662            }
2663            case $.GREATER_THAN_SIGN: {
2664                this._err(ERR.missingDoctypeSystemIdentifier);
2665                token.forceQuirks = true;
2666                this.state = State.DATA;
2667                this.emitCurrentDoctype(token);
2668                break;
2669            }
2670            case $.EOF: {
2671                this._err(ERR.eofInDoctype);
2672                token.forceQuirks = true;
2673                this.emitCurrentDoctype(token);
2674                this._emitEOFToken();
2675                break;
2676            }
2677            default: {
2678                this._err(ERR.missingQuoteBeforeDoctypeSystemIdentifier);
2679                token.forceQuirks = true;
2680                this.state = State.BOGUS_DOCTYPE;
2681                this._stateBogusDoctype(cp);
2682            }
2683        }
2684    }
2685
2686    // Before DOCTYPE system identifier state
2687    //------------------------------------------------------------------
2688    protected _stateBeforeDoctypeSystemIdentifier(cp: number): void {
2689        const token = this.currentToken as DoctypeToken;
2690
2691        switch (cp) {
2692            case $.SPACE:
2693            case $.LINE_FEED:
2694            case $.TABULATION:
2695            case $.FORM_FEED: {
2696                // Ignore whitespace
2697                break;
2698            }
2699            case $.QUOTATION_MARK: {
2700                token.systemId = '';
2701                this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
2702                break;
2703            }
2704            case $.APOSTROPHE: {
2705                token.systemId = '';
2706                this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
2707                break;
2708            }
2709            case $.GREATER_THAN_SIGN: {
2710                this._err(ERR.missingDoctypeSystemIdentifier);
2711                token.forceQuirks = true;
2712                this.state = State.DATA;
2713                this.emitCurrentDoctype(token);
2714                break;
2715            }
2716            case $.EOF: {
2717                this._err(ERR.eofInDoctype);
2718                token.forceQuirks = true;
2719                this.emitCurrentDoctype(token);
2720                this._emitEOFToken();
2721                break;
2722            }
2723            default: {
2724                this._err(ERR.missingQuoteBeforeDoctypeSystemIdentifier);
2725                token.forceQuirks = true;
2726                this.state = State.BOGUS_DOCTYPE;
2727                this._stateBogusDoctype(cp);
2728            }
2729        }
2730    }
2731
2732    // DOCTYPE system identifier (double-quoted) state
2733    //------------------------------------------------------------------
2734    protected _stateDoctypeSystemIdentifierDoubleQuoted(cp: number): void {
2735        const token = this.currentToken as DoctypeToken;
2736
2737        switch (cp) {
2738            case $.QUOTATION_MARK: {
2739                this.state = State.AFTER_DOCTYPE_SYSTEM_IDENTIFIER;
2740                break;
2741            }
2742            case $.NULL: {
2743                this._err(ERR.unexpectedNullCharacter);
2744                token.systemId += REPLACEMENT_CHARACTER;
2745                break;
2746            }
2747            case $.GREATER_THAN_SIGN: {
2748                this._err(ERR.abruptDoctypeSystemIdentifier);
2749                token.forceQuirks = true;
2750                this.emitCurrentDoctype(token);
2751                this.state = State.DATA;
2752                break;
2753            }
2754            case $.EOF: {
2755                this._err(ERR.eofInDoctype);
2756                token.forceQuirks = true;
2757                this.emitCurrentDoctype(token);
2758                this._emitEOFToken();
2759                break;
2760            }
2761            default: {
2762                token.systemId += String.fromCodePoint(cp);
2763            }
2764        }
2765    }
2766
2767    // DOCTYPE system identifier (single-quoted) state
2768    //------------------------------------------------------------------
2769    protected _stateDoctypeSystemIdentifierSingleQuoted(cp: number): void {
2770        const token = this.currentToken as DoctypeToken;
2771
2772        switch (cp) {
2773            case $.APOSTROPHE: {
2774                this.state = State.AFTER_DOCTYPE_SYSTEM_IDENTIFIER;
2775                break;
2776            }
2777            case $.NULL: {
2778                this._err(ERR.unexpectedNullCharacter);
2779                token.systemId += REPLACEMENT_CHARACTER;
2780                break;
2781            }
2782            case $.GREATER_THAN_SIGN: {
2783                this._err(ERR.abruptDoctypeSystemIdentifier);
2784                token.forceQuirks = true;
2785                this.emitCurrentDoctype(token);
2786                this.state = State.DATA;
2787                break;
2788            }
2789            case $.EOF: {
2790                this._err(ERR.eofInDoctype);
2791                token.forceQuirks = true;
2792                this.emitCurrentDoctype(token);
2793                this._emitEOFToken();
2794                break;
2795            }
2796            default: {
2797                token.systemId += String.fromCodePoint(cp);
2798            }
2799        }
2800    }
2801
2802    // After DOCTYPE system identifier state
2803    //------------------------------------------------------------------
2804    protected _stateAfterDoctypeSystemIdentifier(cp: number): void {
2805        const token = this.currentToken as DoctypeToken;
2806
2807        switch (cp) {
2808            case $.SPACE:
2809            case $.LINE_FEED:
2810            case $.TABULATION:
2811            case $.FORM_FEED: {
2812                // Ignore whitespace
2813                break;
2814            }
2815            case $.GREATER_THAN_SIGN: {
2816                this.emitCurrentDoctype(token);
2817                this.state = State.DATA;
2818                break;
2819            }
2820            case $.EOF: {
2821                this._err(ERR.eofInDoctype);
2822                token.forceQuirks = true;
2823                this.emitCurrentDoctype(token);
2824                this._emitEOFToken();
2825                break;
2826            }
2827            default: {
2828                this._err(ERR.unexpectedCharacterAfterDoctypeSystemIdentifier);
2829                this.state = State.BOGUS_DOCTYPE;
2830                this._stateBogusDoctype(cp);
2831            }
2832        }
2833    }
2834
2835    // Bogus DOCTYPE state
2836    //------------------------------------------------------------------
2837    protected _stateBogusDoctype(cp: number): void {
2838        const token = this.currentToken as DoctypeToken;
2839
2840        switch (cp) {
2841            case $.GREATER_THAN_SIGN: {
2842                this.emitCurrentDoctype(token);
2843                this.state = State.DATA;
2844                break;
2845            }
2846            case $.NULL: {
2847                this._err(ERR.unexpectedNullCharacter);
2848                break;
2849            }
2850            case $.EOF: {
2851                this.emitCurrentDoctype(token);
2852                this._emitEOFToken();
2853                break;
2854            }
2855            default:
2856            // Do nothing
2857        }
2858    }
2859
2860    // CDATA section state
2861    //------------------------------------------------------------------
2862    protected _stateCdataSection(cp: number): void {
2863        switch (cp) {
2864            case $.RIGHT_SQUARE_BRACKET: {
2865                this.state = State.CDATA_SECTION_BRACKET;
2866                break;
2867            }
2868            case $.EOF: {
2869                this._err(ERR.eofInCdata);
2870                this._emitEOFToken();
2871                break;
2872            }
2873            default: {
2874                this._emitCodePoint(cp);
2875            }
2876        }
2877    }
2878
2879    // CDATA section bracket state
2880    //------------------------------------------------------------------
2881    protected _stateCdataSectionBracket(cp: number): void {
2882        if (cp === $.RIGHT_SQUARE_BRACKET) {
2883            this.state = State.CDATA_SECTION_END;
2884        } else {
2885            this._emitChars(']');
2886            this.state = State.CDATA_SECTION;
2887            this._stateCdataSection(cp);
2888        }
2889    }
2890
2891    // CDATA section end state
2892    //------------------------------------------------------------------
2893    protected _stateCdataSectionEnd(cp: number): void {
2894        switch (cp) {
2895            case $.GREATER_THAN_SIGN: {
2896                this.state = State.DATA;
2897                break;
2898            }
2899            case $.RIGHT_SQUARE_BRACKET: {
2900                this._emitChars(']');
2901                break;
2902            }
2903            default: {
2904                this._emitChars(']]');
2905                this.state = State.CDATA_SECTION;
2906                this._stateCdataSection(cp);
2907            }
2908        }
2909    }
2910
2911    // Character reference state
2912    //------------------------------------------------------------------
2913    protected _stateCharacterReference(): void {
2914        let length = this.entityDecoder.write(this.preprocessor.html, this.preprocessor.pos);
2915
2916        if (length < 0) {
2917            if (this.preprocessor.lastChunkWritten) {
2918                length = this.entityDecoder.end();
2919            } else {
2920                // Wait for the rest of the entity.
2921                this.active = false;
2922                // Mark the entire buffer as read.
2923                this.preprocessor.pos = this.preprocessor.html.length - 1;
2924                this.consumedAfterSnapshot = 0;
2925                this.preprocessor.endOfChunkHit = true;
2926                return;
2927            }
2928        }
2929
2930        if (length === 0) {
2931            // This was not a valid entity. Go back to the beginning, and
2932            // figure out what to do.
2933            this.preprocessor.pos = this.entityStartPos;
2934            this._flushCodePointConsumedAsCharacterReference($.AMPERSAND);
2935
2936            this.state =
2937                !this._isCharacterReferenceInAttribute() && isAsciiAlphaNumeric(this.preprocessor.peek(1))
2938                    ? State.AMBIGUOUS_AMPERSAND
2939                    : this.returnState;
2940        } else {
2941            // We successfully parsed an entity. Switch to the return state.
2942            this.state = this.returnState;
2943        }
2944    }
2945
2946    // Ambiguos ampersand state
2947    //------------------------------------------------------------------
2948    protected _stateAmbiguousAmpersand(cp: number): void {
2949        if (isAsciiAlphaNumeric(cp)) {
2950            this._flushCodePointConsumedAsCharacterReference(cp);
2951        } else {
2952            if (cp === $.SEMICOLON) {
2953                this._err(ERR.unknownNamedCharacterReference);
2954            }
2955
2956            this.state = this.returnState;
2957            this._callState(cp);
2958        }
2959    }
2960}
2961
2962function checkselfClosingNode(parse: Tokenizer, token: TagToken) {
2963    const tagName: string = (token.tagName || "").toLowerCase();
2964    const selfClosing: boolean = token.selfClosing;
2965    const flag: boolean = parse.validator.isSupportedSelfClosing(tagName);
2966    if (parse.nodeInfo.tn && tagName && !parse.nodeInfo.sc) {
2967        const loc: string =
2968            String(token.location?.startLine) + ',' + String(token.location?.startCol);
2969        if (
2970            !flag ||
2971            (loc !== parse.nodeInfo.pos && token.type === TokenType.START_TAG)
2972        ) {
2973            const posInfo: string = parse.nodeInfo.pos;
2974            const posArr: string[] = posInfo.split(',');
2975            parse.compileResult.log.push({
2976                line: Number(posArr[0]) || 1,
2977                column: Number(posArr[1]) || 1,
2978                reason: 'ERROR: tag `' + parse.nodeInfo.tn + '` must be closed, please follow norm',
2979            });
2980            parse.nodeInfo = { tn: '', sc: false, pos: '' };
2981        }
2982    }
2983    if (tagName && flag) {
2984        if (token.type === TokenType.START_TAG && !selfClosing) {
2985            parse.nodeInfo.tn = tagName;
2986            parse.nodeInfo.sc = false;
2987            parse.nodeInfo.pos =
2988                String(token.location?.startLine) + ',' + String(token.location?.startCol);
2989        }
2990        if (
2991            token.type === TokenType.END_TAG && tagName === parse.nodeInfo.tn
2992        ) {
2993            parse.nodeInfo.sc = true;
2994        }
2995    }
2996    if (!flag && selfClosing && token.type === TokenType.START_TAG) {
2997        parse.compileResult.log.push({
2998            line: token.location?.startLine || 1,
2999            column: token.location?.startCol || 1,
3000            reason: "ERROR: tag `" + tagName + "` can not use selfClosing",
3001        });
3002    }
3003}
3004