1import { Preprocessor } from './preprocessor.js'; 2import { 3 CODE_POINTS as $, 4 SEQUENCES as $$, 5 REPLACEMENT_CHARACTER, 6 isSurrogate, 7 isUndefinedCodePoint, 8 isControlCodePoint, 9} from '../common/unicode.js'; 10import { 11 TokenType, 12 getTokenAttr, 13 type Token, 14 type CharacterToken, 15 type DoctypeToken, 16 type TagToken, 17 type EOFToken, 18 type CommentToken, 19 type Attribute, 20 type Location, 21} from '../common/token.js'; 22import { htmlDecodeTree, EntityDecoder, DecodingMode } from 'entities/lib/decode.js'; 23import { ERR, type ParserErrorHandler } from '../common/error-codes.js'; 24import { TAG_ID, getTagID } from '../common/html.js'; 25 26//States 27const enum State { 28 DATA, 29 RCDATA, 30 RAWTEXT, 31 SCRIPT_DATA, 32 PLAINTEXT, 33 TAG_OPEN, 34 END_TAG_OPEN, 35 TAG_NAME, 36 RCDATA_LESS_THAN_SIGN, 37 RCDATA_END_TAG_OPEN, 38 RCDATA_END_TAG_NAME, 39 RAWTEXT_LESS_THAN_SIGN, 40 RAWTEXT_END_TAG_OPEN, 41 RAWTEXT_END_TAG_NAME, 42 SCRIPT_DATA_LESS_THAN_SIGN, 43 SCRIPT_DATA_END_TAG_OPEN, 44 SCRIPT_DATA_END_TAG_NAME, 45 SCRIPT_DATA_ESCAPE_START, 46 SCRIPT_DATA_ESCAPE_START_DASH, 47 SCRIPT_DATA_ESCAPED, 48 SCRIPT_DATA_ESCAPED_DASH, 49 SCRIPT_DATA_ESCAPED_DASH_DASH, 50 SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN, 51 SCRIPT_DATA_ESCAPED_END_TAG_OPEN, 52 SCRIPT_DATA_ESCAPED_END_TAG_NAME, 53 SCRIPT_DATA_DOUBLE_ESCAPE_START, 54 SCRIPT_DATA_DOUBLE_ESCAPED, 55 SCRIPT_DATA_DOUBLE_ESCAPED_DASH, 56 SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH, 57 SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN, 58 SCRIPT_DATA_DOUBLE_ESCAPE_END, 59 BEFORE_ATTRIBUTE_NAME, 60 ATTRIBUTE_NAME, 61 AFTER_ATTRIBUTE_NAME, 62 BEFORE_ATTRIBUTE_VALUE, 63 ATTRIBUTE_VALUE_DOUBLE_QUOTED, 64 ATTRIBUTE_VALUE_SINGLE_QUOTED, 65 ATTRIBUTE_VALUE_UNQUOTED, 66 AFTER_ATTRIBUTE_VALUE_QUOTED, 67 SELF_CLOSING_START_TAG, 68 BOGUS_COMMENT, 69 MARKUP_DECLARATION_OPEN, 70 COMMENT_START, 71 COMMENT_START_DASH, 72 COMMENT, 73 COMMENT_LESS_THAN_SIGN, 74 COMMENT_LESS_THAN_SIGN_BANG, 75 COMMENT_LESS_THAN_SIGN_BANG_DASH, 76 COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH, 77 COMMENT_END_DASH, 78 COMMENT_END, 79 COMMENT_END_BANG, 80 DOCTYPE, 81 BEFORE_DOCTYPE_NAME, 82 DOCTYPE_NAME, 83 AFTER_DOCTYPE_NAME, 84 AFTER_DOCTYPE_PUBLIC_KEYWORD, 85 BEFORE_DOCTYPE_PUBLIC_IDENTIFIER, 86 DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED, 87 DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED, 88 AFTER_DOCTYPE_PUBLIC_IDENTIFIER, 89 BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS, 90 AFTER_DOCTYPE_SYSTEM_KEYWORD, 91 BEFORE_DOCTYPE_SYSTEM_IDENTIFIER, 92 DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, 93 DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, 94 AFTER_DOCTYPE_SYSTEM_IDENTIFIER, 95 BOGUS_DOCTYPE, 96 CDATA_SECTION, 97 CDATA_SECTION_BRACKET, 98 CDATA_SECTION_END, 99 CHARACTER_REFERENCE, 100 AMBIGUOUS_AMPERSAND, 101} 102 103//Tokenizer initial states for different modes 104export const TokenizerMode = { 105 DATA: State.DATA, 106 RCDATA: State.RCDATA, 107 RAWTEXT: State.RAWTEXT, 108 SCRIPT_DATA: State.SCRIPT_DATA, 109 PLAINTEXT: State.PLAINTEXT, 110 CDATA_SECTION: State.CDATA_SECTION, 111} as const; 112 113//Utils 114 115//OPTIMIZATION: these utility functions should not be moved out of this module. V8 Crankshaft will not inline 116//this functions if they will be situated in another module due to context switch. 117//Always perform inlining check before modifying this functions ('node --trace-inlining'). 118 119function isAsciiDigit(cp: number): boolean { 120 return cp >= $.DIGIT_0 && cp <= $.DIGIT_9; 121} 122 123function isAsciiUpper(cp: number): boolean { 124 return cp >= $.LATIN_CAPITAL_A && cp <= $.LATIN_CAPITAL_Z; 125} 126 127function isAsciiLower(cp: number): boolean { 128 return cp >= $.LATIN_SMALL_A && cp <= $.LATIN_SMALL_Z; 129} 130 131function isAsciiLetter(cp: number): boolean { 132 return isAsciiLower(cp) || isAsciiUpper(cp); 133} 134 135function isAsciiAlphaNumeric(cp: number): boolean { 136 return isAsciiLetter(cp) || isAsciiDigit(cp); 137} 138function toAsciiLower(cp: number): number { 139 return cp + 0x00_20; 140} 141 142function isWhitespace(cp: number): boolean { 143 return cp === $.SPACE || cp === $.LINE_FEED || cp === $.TABULATION || cp === $.FORM_FEED; 144} 145 146function isScriptDataDoubleEscapeSequenceEnd(cp: number): boolean { 147 return isWhitespace(cp) || cp === $.SOLIDUS || cp === $.GREATER_THAN_SIGN; 148} 149 150const componentValidator = { isSupportedSelfClosing: () => false }; 151 152interface Validator { 153 isSupportedSelfClosing(tagName: string): boolean; 154} 155 156interface CompileResult { 157 jsonTemplate: {}, 158 deps: [], 159 log: { 160 line: number, 161 column: number, 162 reason: string 163 }[] 164} 165 166interface NodeInfo { 167 tn: string, 168 sc: boolean, 169 pos: string 170} 171 172function getErrorForNumericCharacterReference(code: number): ERR | null { 173 if (code === $.NULL) { 174 return ERR.nullCharacterReference; 175 } else if (code > 0x10_ff_ff) { 176 return ERR.characterReferenceOutsideUnicodeRange; 177 } else if (isSurrogate(code)) { 178 return ERR.surrogateCharacterReference; 179 } else if (isUndefinedCodePoint(code)) { 180 return ERR.noncharacterCharacterReference; 181 } else if (isControlCodePoint(code) || code === $.CARRIAGE_RETURN) { 182 return ERR.controlCharacterReference; 183 } 184 185 return null; 186} 187 188export interface TokenizerOptions { 189 componentValidator?: Validator; 190 compileResult?: CompileResult; 191 sourceCodeLocationInfo?: boolean; 192} 193 194export interface TokenHandler { 195 onComment(token: CommentToken): void; 196 onDoctype(token: DoctypeToken): void; 197 onStartTag(token: TagToken): void; 198 onEndTag(token: TagToken): void; 199 onEof(token: EOFToken): void; 200 onCharacter(token: CharacterToken): void; 201 onNullCharacter(token: CharacterToken): void; 202 onWhitespaceCharacter(token: CharacterToken): void; 203 204 onParseError?: ParserErrorHandler | null; 205} 206 207//Tokenizer 208export class Tokenizer { 209 public preprocessor: Preprocessor; 210 211 protected paused = false; 212 /** Ensures that the parsing loop isn't run multiple times at once. */ 213 protected inLoop = false; 214 215 /** 216 * Indicates that the current adjusted node exists, is not an element in the HTML namespace, 217 * and that it is not an integration point for either MathML or HTML. 218 * 219 * @see {@link https://html.spec.whatwg.org/multipage/parsing.html#tree-construction} 220 */ 221 public inForeignNode = false; 222 public lastStartTagName = ''; 223 public active = false; 224 225 public nodeInfo: NodeInfo = { tn: '', sc: false, pos: '' }; 226 public validator: Validator = componentValidator; 227 public compileResult: CompileResult = { jsonTemplate: {}, deps: [], log: [] }; 228 229 public state = State.DATA; 230 protected returnState = State.DATA; 231 232 /** 233 * We use `entities`' `EntityDecoder` to parse character references. 234 * 235 * All of the following states are handled by the `EntityDecoder`: 236 * 237 * - Named character reference state 238 * - Numeric character reference state 239 * - Hexademical character reference start state 240 * - Hexademical character reference state 241 * - Decimal character reference state 242 * - Numeric character reference end state 243 */ 244 protected entityDecoder: EntityDecoder; 245 protected entityStartPos = 0; 246 protected consumedAfterSnapshot = -1; 247 248 protected currentLocation: Location | null; 249 protected currentCharacterToken: CharacterToken | null = null; 250 protected currentToken: Token | null = null; 251 protected currentAttr: Attribute = { name: '', value: '' }; 252 253 constructor( 254 protected options: TokenizerOptions, 255 protected handler: TokenHandler, 256 ) { 257 this.preprocessor = new Preprocessor(handler); 258 this.currentLocation = this.getCurrentLocation(-1); 259 if(options.componentValidator) { 260 this.validator = options.componentValidator; 261 } 262 if(options.compileResult) { 263 this.compileResult = options.compileResult; 264 } 265 this.entityDecoder = new EntityDecoder( 266 htmlDecodeTree, 267 (cp: number, consumed: number) => { 268 // Note: Set `pos` _before_ flushing, as flushing might drop 269 // the current chunk and invalidate `entityStartPos`. 270 this.preprocessor.pos = this.entityStartPos + consumed - 1; 271 this._flushCodePointConsumedAsCharacterReference(cp); 272 }, 273 handler.onParseError 274 ? { 275 missingSemicolonAfterCharacterReference: (): void => { 276 this._err(ERR.missingSemicolonAfterCharacterReference, 1); 277 }, 278 absenceOfDigitsInNumericCharacterReference: (consumed: number): void => { 279 this._err( 280 ERR.absenceOfDigitsInNumericCharacterReference, 281 this.entityStartPos - this.preprocessor.pos + consumed, 282 ); 283 }, 284 validateNumericCharacterReference: (code: number): void => { 285 const error = getErrorForNumericCharacterReference(code); 286 if (error) this._err(error, 1); 287 }, 288 } 289 : undefined, 290 ); 291 } 292 293 //Errors 294 protected _err(code: ERR, cpOffset = 0): void { 295 this.handler.onParseError?.(this.preprocessor.getError(code, cpOffset)); 296 } 297 298 // NOTE: `offset` may never run across line boundaries. 299 protected getCurrentLocation(offset: number): Location | null { 300 if (!this.options.sourceCodeLocationInfo) { 301 return null; 302 } 303 304 return { 305 startLine: this.preprocessor.line, 306 startCol: this.preprocessor.col - offset, 307 startOffset: this.preprocessor.offset - offset, 308 endLine: -1, 309 endCol: -1, 310 endOffset: -1, 311 }; 312 } 313 314 protected _runParsingLoop(): void { 315 if (this.inLoop) return; 316 317 this.inLoop = true; 318 319 while (this.active && !this.paused) { 320 this.consumedAfterSnapshot = 0; 321 322 const cp = this._consume(); 323 324 if (!this._ensureHibernation()) { 325 this._callState(cp); 326 } 327 } 328 329 this.inLoop = false; 330 } 331 332 //API 333 public pause(): void { 334 this.paused = true; 335 } 336 337 public resume(writeCallback?: () => void): void { 338 if (!this.paused) { 339 throw new Error('Parser was already resumed'); 340 } 341 342 this.paused = false; 343 344 // Necessary for synchronous resume. 345 if (this.inLoop) return; 346 347 this._runParsingLoop(); 348 349 if (!this.paused) { 350 writeCallback?.(); 351 } 352 } 353 354 public write(chunk: string, isLastChunk: boolean, writeCallback?: () => void): void { 355 this.active = true; 356 this.preprocessor.write(chunk, isLastChunk); 357 this._runParsingLoop(); 358 359 if (!this.paused) { 360 writeCallback?.(); 361 } 362 } 363 364 public insertHtmlAtCurrentPos(chunk: string): void { 365 this.active = true; 366 this.preprocessor.insertHtmlAtCurrentPos(chunk); 367 this._runParsingLoop(); 368 } 369 370 //Hibernation 371 protected _ensureHibernation(): boolean { 372 if (this.preprocessor.endOfChunkHit) { 373 this.preprocessor.retreat(this.consumedAfterSnapshot); 374 this.consumedAfterSnapshot = 0; 375 this.active = false; 376 377 return true; 378 } 379 380 return false; 381 } 382 383 //Consumption 384 protected _consume(): number { 385 this.consumedAfterSnapshot++; 386 return this.preprocessor.advance(); 387 } 388 389 protected _advanceBy(count: number): void { 390 this.consumedAfterSnapshot += count; 391 for (let i = 0; i < count; i++) { 392 this.preprocessor.advance(); 393 } 394 } 395 396 protected _consumeSequenceIfMatch(pattern: string, caseSensitive: boolean): boolean { 397 if (this.preprocessor.startsWith(pattern, caseSensitive)) { 398 // We will already have consumed one character before calling this method. 399 this._advanceBy(pattern.length - 1); 400 return true; 401 } 402 return false; 403 } 404 405 //Token creation 406 protected _createStartTagToken(): void { 407 this.currentToken = { 408 type: TokenType.START_TAG, 409 tagName: '', 410 tagID: TAG_ID.UNKNOWN, 411 selfClosing: false, 412 ackSelfClosing: false, 413 attrs: [], 414 location: this.getCurrentLocation(1), 415 }; 416 } 417 418 protected _createEndTagToken(): void { 419 this.currentToken = { 420 type: TokenType.END_TAG, 421 tagName: '', 422 tagID: TAG_ID.UNKNOWN, 423 selfClosing: false, 424 ackSelfClosing: false, 425 attrs: [], 426 location: this.getCurrentLocation(2), 427 }; 428 } 429 430 protected _createCommentToken(offset: number): void { 431 this.currentToken = { 432 type: TokenType.COMMENT, 433 data: '', 434 location: this.getCurrentLocation(offset), 435 }; 436 } 437 438 protected _createDoctypeToken(initialName: string | null): void { 439 this.currentToken = { 440 type: TokenType.DOCTYPE, 441 name: initialName, 442 forceQuirks: false, 443 publicId: null, 444 systemId: null, 445 location: this.currentLocation, 446 }; 447 } 448 449 protected _createCharacterToken(type: CharacterToken['type'], chars: string): void { 450 this.currentCharacterToken = { 451 type, 452 chars, 453 location: this.currentLocation, 454 }; 455 } 456 457 //Tag attributes 458 protected _createAttr(attrNameFirstCh: string): void { 459 this.currentAttr = { 460 name: attrNameFirstCh, 461 value: '', 462 }; 463 this.currentLocation = this.getCurrentLocation(0); 464 } 465 466 protected _leaveAttrName(): void { 467 const token = this.currentToken as TagToken; 468 469 if (getTokenAttr(token, this.currentAttr.name) === null) { 470 token.attrs.push(this.currentAttr); 471 472 if (token.location && this.currentLocation) { 473 const attrLocations = (token.location.attrs ??= Object.create(null)); 474 attrLocations[this.currentAttr.name] = this.currentLocation; 475 476 // Set end location 477 this._leaveAttrValue(); 478 } 479 } else { 480 this._err(ERR.duplicateAttribute); 481 } 482 } 483 484 protected _leaveAttrValue(): void { 485 if (this.currentLocation) { 486 this.currentLocation.endLine = this.preprocessor.line; 487 this.currentLocation.endCol = this.preprocessor.col; 488 this.currentLocation.endOffset = this.preprocessor.offset; 489 } 490 } 491 492 //Token emission 493 protected prepareToken(ct: Token): void { 494 this._emitCurrentCharacterToken(ct.location); 495 this.currentToken = null; 496 497 if (ct.location) { 498 ct.location.endLine = this.preprocessor.line; 499 ct.location.endCol = this.preprocessor.col + 1; 500 ct.location.endOffset = this.preprocessor.offset + 1; 501 } 502 503 this.currentLocation = this.getCurrentLocation(-1); 504 } 505 506 protected emitCurrentTagToken(): void { 507 const ct = this.currentToken as TagToken; 508 checkselfClosingNode(this, ct); 509 this.prepareToken(ct); 510 511 ct.tagID = getTagID(ct.tagName); 512 513 if (ct.type === TokenType.START_TAG) { 514 this.lastStartTagName = ct.tagName; 515 this.handler.onStartTag(ct); 516 } else { 517 if (ct.attrs.length > 0) { 518 this._err(ERR.endTagWithAttributes); 519 } 520 521 if (ct.selfClosing) { 522 this._err(ERR.endTagWithTrailingSolidus); 523 } 524 525 this.handler.onEndTag(ct); 526 } 527 528 this.preprocessor.dropParsedChunk(); 529 } 530 531 protected emitCurrentComment(ct: CommentToken): void { 532 this.prepareToken(ct); 533 this.handler.onComment(ct); 534 535 this.preprocessor.dropParsedChunk(); 536 } 537 538 protected emitCurrentDoctype(ct: DoctypeToken): void { 539 this.prepareToken(ct); 540 this.handler.onDoctype(ct); 541 542 this.preprocessor.dropParsedChunk(); 543 } 544 545 protected _emitCurrentCharacterToken(nextLocation: Location | null): void { 546 if (this.currentCharacterToken) { 547 //NOTE: if we have a pending character token, make it's end location equal to the 548 //current token's start location. 549 if (nextLocation && this.currentCharacterToken.location) { 550 this.currentCharacterToken.location.endLine = nextLocation.startLine; 551 this.currentCharacterToken.location.endCol = nextLocation.startCol; 552 this.currentCharacterToken.location.endOffset = nextLocation.startOffset; 553 } 554 555 switch (this.currentCharacterToken.type) { 556 case TokenType.CHARACTER: { 557 this.handler.onCharacter(this.currentCharacterToken); 558 break; 559 } 560 case TokenType.NULL_CHARACTER: { 561 this.handler.onNullCharacter(this.currentCharacterToken); 562 break; 563 } 564 case TokenType.WHITESPACE_CHARACTER: { 565 this.handler.onWhitespaceCharacter(this.currentCharacterToken); 566 break; 567 } 568 } 569 570 this.currentCharacterToken = null; 571 } 572 } 573 574 protected _emitEOFToken(): void { 575 const location = this.getCurrentLocation(0); 576 577 if (location) { 578 location.endLine = location.startLine; 579 location.endCol = location.startCol; 580 location.endOffset = location.startOffset; 581 } 582 583 this._emitCurrentCharacterToken(location); 584 this.handler.onEof({ type: TokenType.EOF, location }); 585 this.active = false; 586 } 587 588 //Characters emission 589 590 //OPTIMIZATION: The specification uses only one type of character token (one token per character). 591 //This causes a huge memory overhead and a lot of unnecessary parser loops. parse5 uses 3 groups of characters. 592 //If we have a sequence of characters that belong to the same group, the parser can process it 593 //as a single solid character token. 594 //So, there are 3 types of character tokens in parse5: 595 //1)TokenType.NULL_CHARACTER - \u0000-character sequences (e.g. '\u0000\u0000\u0000') 596 //2)TokenType.WHITESPACE_CHARACTER - any whitespace/new-line character sequences (e.g. '\n \r\t \f') 597 //3)TokenType.CHARACTER - any character sequence which don't belong to groups 1 and 2 (e.g. 'abcdef1234@@#$%^') 598 protected _appendCharToCurrentCharacterToken(type: CharacterToken['type'], ch: string): void { 599 if (this.currentCharacterToken) { 600 if (this.currentCharacterToken.type === type) { 601 this.currentCharacterToken.chars += ch; 602 return; 603 } else { 604 this.currentLocation = this.getCurrentLocation(0); 605 this._emitCurrentCharacterToken(this.currentLocation); 606 this.preprocessor.dropParsedChunk(); 607 } 608 } 609 610 this._createCharacterToken(type, ch); 611 } 612 613 protected _emitCodePoint(cp: number): void { 614 const type = isWhitespace(cp) 615 ? TokenType.WHITESPACE_CHARACTER 616 : cp === $.NULL 617 ? TokenType.NULL_CHARACTER 618 : TokenType.CHARACTER; 619 620 this._appendCharToCurrentCharacterToken(type, String.fromCodePoint(cp)); 621 } 622 623 //NOTE: used when we emit characters explicitly. 624 //This is always for non-whitespace and non-null characters, which allows us to avoid additional checks. 625 protected _emitChars(ch: string): void { 626 this._appendCharToCurrentCharacterToken(TokenType.CHARACTER, ch); 627 } 628 629 // Character reference helpers 630 protected _startCharacterReference(): void { 631 this.returnState = this.state; 632 this.state = State.CHARACTER_REFERENCE; 633 this.entityStartPos = this.preprocessor.pos; 634 this.entityDecoder.startEntity( 635 this._isCharacterReferenceInAttribute() ? DecodingMode.Attribute : DecodingMode.Legacy, 636 ); 637 } 638 639 protected _isCharacterReferenceInAttribute(): boolean { 640 return ( 641 this.returnState === State.ATTRIBUTE_VALUE_DOUBLE_QUOTED || 642 this.returnState === State.ATTRIBUTE_VALUE_SINGLE_QUOTED || 643 this.returnState === State.ATTRIBUTE_VALUE_UNQUOTED 644 ); 645 } 646 647 protected _flushCodePointConsumedAsCharacterReference(cp: number): void { 648 if (this._isCharacterReferenceInAttribute()) { 649 this.currentAttr.value += String.fromCodePoint(cp); 650 } else { 651 this._emitCodePoint(cp); 652 } 653 } 654 655 // Calling states this way turns out to be much faster than any other approach. 656 protected _callState(cp: number): void { 657 switch (this.state) { 658 case State.DATA: { 659 this._stateData(cp); 660 break; 661 } 662 case State.RCDATA: { 663 this._stateRcdata(cp); 664 break; 665 } 666 case State.RAWTEXT: { 667 this._stateRawtext(cp); 668 break; 669 } 670 case State.SCRIPT_DATA: { 671 this._stateScriptData(cp); 672 break; 673 } 674 case State.PLAINTEXT: { 675 this._statePlaintext(cp); 676 break; 677 } 678 case State.TAG_OPEN: { 679 this._stateTagOpen(cp); 680 break; 681 } 682 case State.END_TAG_OPEN: { 683 this._stateEndTagOpen(cp); 684 break; 685 } 686 case State.TAG_NAME: { 687 this._stateTagName(cp); 688 break; 689 } 690 case State.RCDATA_LESS_THAN_SIGN: { 691 this._stateRcdataLessThanSign(cp); 692 break; 693 } 694 case State.RCDATA_END_TAG_OPEN: { 695 this._stateRcdataEndTagOpen(cp); 696 break; 697 } 698 case State.RCDATA_END_TAG_NAME: { 699 this._stateRcdataEndTagName(cp); 700 break; 701 } 702 case State.RAWTEXT_LESS_THAN_SIGN: { 703 this._stateRawtextLessThanSign(cp); 704 break; 705 } 706 case State.RAWTEXT_END_TAG_OPEN: { 707 this._stateRawtextEndTagOpen(cp); 708 break; 709 } 710 case State.RAWTEXT_END_TAG_NAME: { 711 this._stateRawtextEndTagName(cp); 712 break; 713 } 714 case State.SCRIPT_DATA_LESS_THAN_SIGN: { 715 this._stateScriptDataLessThanSign(cp); 716 break; 717 } 718 case State.SCRIPT_DATA_END_TAG_OPEN: { 719 this._stateScriptDataEndTagOpen(cp); 720 break; 721 } 722 case State.SCRIPT_DATA_END_TAG_NAME: { 723 this._stateScriptDataEndTagName(cp); 724 break; 725 } 726 case State.SCRIPT_DATA_ESCAPE_START: { 727 this._stateScriptDataEscapeStart(cp); 728 break; 729 } 730 case State.SCRIPT_DATA_ESCAPE_START_DASH: { 731 this._stateScriptDataEscapeStartDash(cp); 732 break; 733 } 734 case State.SCRIPT_DATA_ESCAPED: { 735 this._stateScriptDataEscaped(cp); 736 break; 737 } 738 case State.SCRIPT_DATA_ESCAPED_DASH: { 739 this._stateScriptDataEscapedDash(cp); 740 break; 741 } 742 case State.SCRIPT_DATA_ESCAPED_DASH_DASH: { 743 this._stateScriptDataEscapedDashDash(cp); 744 break; 745 } 746 case State.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN: { 747 this._stateScriptDataEscapedLessThanSign(cp); 748 break; 749 } 750 case State.SCRIPT_DATA_ESCAPED_END_TAG_OPEN: { 751 this._stateScriptDataEscapedEndTagOpen(cp); 752 break; 753 } 754 case State.SCRIPT_DATA_ESCAPED_END_TAG_NAME: { 755 this._stateScriptDataEscapedEndTagName(cp); 756 break; 757 } 758 case State.SCRIPT_DATA_DOUBLE_ESCAPE_START: { 759 this._stateScriptDataDoubleEscapeStart(cp); 760 break; 761 } 762 case State.SCRIPT_DATA_DOUBLE_ESCAPED: { 763 this._stateScriptDataDoubleEscaped(cp); 764 break; 765 } 766 case State.SCRIPT_DATA_DOUBLE_ESCAPED_DASH: { 767 this._stateScriptDataDoubleEscapedDash(cp); 768 break; 769 } 770 case State.SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH: { 771 this._stateScriptDataDoubleEscapedDashDash(cp); 772 break; 773 } 774 case State.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN: { 775 this._stateScriptDataDoubleEscapedLessThanSign(cp); 776 break; 777 } 778 case State.SCRIPT_DATA_DOUBLE_ESCAPE_END: { 779 this._stateScriptDataDoubleEscapeEnd(cp); 780 break; 781 } 782 case State.BEFORE_ATTRIBUTE_NAME: { 783 this._stateBeforeAttributeName(cp); 784 break; 785 } 786 case State.ATTRIBUTE_NAME: { 787 this._stateAttributeName(cp); 788 break; 789 } 790 case State.AFTER_ATTRIBUTE_NAME: { 791 this._stateAfterAttributeName(cp); 792 break; 793 } 794 case State.BEFORE_ATTRIBUTE_VALUE: { 795 this._stateBeforeAttributeValue(cp); 796 break; 797 } 798 case State.ATTRIBUTE_VALUE_DOUBLE_QUOTED: { 799 this._stateAttributeValueDoubleQuoted(cp); 800 break; 801 } 802 case State.ATTRIBUTE_VALUE_SINGLE_QUOTED: { 803 this._stateAttributeValueSingleQuoted(cp); 804 break; 805 } 806 case State.ATTRIBUTE_VALUE_UNQUOTED: { 807 this._stateAttributeValueUnquoted(cp); 808 break; 809 } 810 case State.AFTER_ATTRIBUTE_VALUE_QUOTED: { 811 this._stateAfterAttributeValueQuoted(cp); 812 break; 813 } 814 case State.SELF_CLOSING_START_TAG: { 815 this._stateSelfClosingStartTag(cp); 816 break; 817 } 818 case State.BOGUS_COMMENT: { 819 this._stateBogusComment(cp); 820 break; 821 } 822 case State.MARKUP_DECLARATION_OPEN: { 823 this._stateMarkupDeclarationOpen(cp); 824 break; 825 } 826 case State.COMMENT_START: { 827 this._stateCommentStart(cp); 828 break; 829 } 830 case State.COMMENT_START_DASH: { 831 this._stateCommentStartDash(cp); 832 break; 833 } 834 case State.COMMENT: { 835 this._stateComment(cp); 836 break; 837 } 838 case State.COMMENT_LESS_THAN_SIGN: { 839 this._stateCommentLessThanSign(cp); 840 break; 841 } 842 case State.COMMENT_LESS_THAN_SIGN_BANG: { 843 this._stateCommentLessThanSignBang(cp); 844 break; 845 } 846 case State.COMMENT_LESS_THAN_SIGN_BANG_DASH: { 847 this._stateCommentLessThanSignBangDash(cp); 848 break; 849 } 850 case State.COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH: { 851 this._stateCommentLessThanSignBangDashDash(cp); 852 break; 853 } 854 case State.COMMENT_END_DASH: { 855 this._stateCommentEndDash(cp); 856 break; 857 } 858 case State.COMMENT_END: { 859 this._stateCommentEnd(cp); 860 break; 861 } 862 case State.COMMENT_END_BANG: { 863 this._stateCommentEndBang(cp); 864 break; 865 } 866 case State.DOCTYPE: { 867 this._stateDoctype(cp); 868 break; 869 } 870 case State.BEFORE_DOCTYPE_NAME: { 871 this._stateBeforeDoctypeName(cp); 872 break; 873 } 874 case State.DOCTYPE_NAME: { 875 this._stateDoctypeName(cp); 876 break; 877 } 878 case State.AFTER_DOCTYPE_NAME: { 879 this._stateAfterDoctypeName(cp); 880 break; 881 } 882 case State.AFTER_DOCTYPE_PUBLIC_KEYWORD: { 883 this._stateAfterDoctypePublicKeyword(cp); 884 break; 885 } 886 case State.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER: { 887 this._stateBeforeDoctypePublicIdentifier(cp); 888 break; 889 } 890 case State.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED: { 891 this._stateDoctypePublicIdentifierDoubleQuoted(cp); 892 break; 893 } 894 case State.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED: { 895 this._stateDoctypePublicIdentifierSingleQuoted(cp); 896 break; 897 } 898 case State.AFTER_DOCTYPE_PUBLIC_IDENTIFIER: { 899 this._stateAfterDoctypePublicIdentifier(cp); 900 break; 901 } 902 case State.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS: { 903 this._stateBetweenDoctypePublicAndSystemIdentifiers(cp); 904 break; 905 } 906 case State.AFTER_DOCTYPE_SYSTEM_KEYWORD: { 907 this._stateAfterDoctypeSystemKeyword(cp); 908 break; 909 } 910 case State.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER: { 911 this._stateBeforeDoctypeSystemIdentifier(cp); 912 break; 913 } 914 case State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED: { 915 this._stateDoctypeSystemIdentifierDoubleQuoted(cp); 916 break; 917 } 918 case State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED: { 919 this._stateDoctypeSystemIdentifierSingleQuoted(cp); 920 break; 921 } 922 case State.AFTER_DOCTYPE_SYSTEM_IDENTIFIER: { 923 this._stateAfterDoctypeSystemIdentifier(cp); 924 break; 925 } 926 case State.BOGUS_DOCTYPE: { 927 this._stateBogusDoctype(cp); 928 break; 929 } 930 case State.CDATA_SECTION: { 931 this._stateCdataSection(cp); 932 break; 933 } 934 case State.CDATA_SECTION_BRACKET: { 935 this._stateCdataSectionBracket(cp); 936 break; 937 } 938 case State.CDATA_SECTION_END: { 939 this._stateCdataSectionEnd(cp); 940 break; 941 } 942 case State.CHARACTER_REFERENCE: { 943 this._stateCharacterReference(); 944 break; 945 } 946 case State.AMBIGUOUS_AMPERSAND: { 947 this._stateAmbiguousAmpersand(cp); 948 break; 949 } 950 default: { 951 throw new Error('Unknown state'); 952 } 953 } 954 } 955 956 // State machine 957 958 // Data state 959 //------------------------------------------------------------------ 960 protected _stateData(cp: number): void { 961 switch (cp) { 962 case $.LESS_THAN_SIGN: { 963 this.state = State.TAG_OPEN; 964 break; 965 } 966 case $.AMPERSAND: { 967 this._startCharacterReference(); 968 break; 969 } 970 case $.NULL: { 971 this._err(ERR.unexpectedNullCharacter); 972 this._emitCodePoint(cp); 973 break; 974 } 975 case $.EOF: { 976 this._emitEOFToken(); 977 break; 978 } 979 default: { 980 this._emitCodePoint(cp); 981 } 982 } 983 } 984 985 // RCDATA state 986 //------------------------------------------------------------------ 987 protected _stateRcdata(cp: number): void { 988 switch (cp) { 989 case $.AMPERSAND: { 990 this._startCharacterReference(); 991 break; 992 } 993 case $.LESS_THAN_SIGN: { 994 this.state = State.RCDATA_LESS_THAN_SIGN; 995 break; 996 } 997 case $.NULL: { 998 this._err(ERR.unexpectedNullCharacter); 999 this._emitChars(REPLACEMENT_CHARACTER); 1000 break; 1001 } 1002 case $.EOF: { 1003 this._emitEOFToken(); 1004 break; 1005 } 1006 default: { 1007 this._emitCodePoint(cp); 1008 } 1009 } 1010 } 1011 1012 // RAWTEXT state 1013 //------------------------------------------------------------------ 1014 protected _stateRawtext(cp: number): void { 1015 switch (cp) { 1016 case $.LESS_THAN_SIGN: { 1017 this.state = State.RAWTEXT_LESS_THAN_SIGN; 1018 break; 1019 } 1020 case $.NULL: { 1021 this._err(ERR.unexpectedNullCharacter); 1022 this._emitChars(REPLACEMENT_CHARACTER); 1023 break; 1024 } 1025 case $.EOF: { 1026 this._emitEOFToken(); 1027 break; 1028 } 1029 default: { 1030 this._emitCodePoint(cp); 1031 } 1032 } 1033 } 1034 1035 // Script data state 1036 //------------------------------------------------------------------ 1037 protected _stateScriptData(cp: number): void { 1038 switch (cp) { 1039 case $.LESS_THAN_SIGN: { 1040 this.state = State.SCRIPT_DATA_LESS_THAN_SIGN; 1041 break; 1042 } 1043 case $.NULL: { 1044 this._err(ERR.unexpectedNullCharacter); 1045 this._emitChars(REPLACEMENT_CHARACTER); 1046 break; 1047 } 1048 case $.EOF: { 1049 this._emitEOFToken(); 1050 break; 1051 } 1052 default: { 1053 this._emitCodePoint(cp); 1054 } 1055 } 1056 } 1057 1058 // PLAINTEXT state 1059 //------------------------------------------------------------------ 1060 protected _statePlaintext(cp: number): void { 1061 switch (cp) { 1062 case $.NULL: { 1063 this._err(ERR.unexpectedNullCharacter); 1064 this._emitChars(REPLACEMENT_CHARACTER); 1065 break; 1066 } 1067 case $.EOF: { 1068 this._emitEOFToken(); 1069 break; 1070 } 1071 default: { 1072 this._emitCodePoint(cp); 1073 } 1074 } 1075 } 1076 1077 // Tag open state 1078 //------------------------------------------------------------------ 1079 protected _stateTagOpen(cp: number): void { 1080 if (isAsciiLetter(cp)) { 1081 this._createStartTagToken(); 1082 this.state = State.TAG_NAME; 1083 this._stateTagName(cp); 1084 } else 1085 switch (cp) { 1086 case $.EXCLAMATION_MARK: { 1087 this.state = State.MARKUP_DECLARATION_OPEN; 1088 break; 1089 } 1090 case $.SOLIDUS: { 1091 this.state = State.END_TAG_OPEN; 1092 break; 1093 } 1094 case $.QUESTION_MARK: { 1095 this._err(ERR.unexpectedQuestionMarkInsteadOfTagName); 1096 this._createCommentToken(1); 1097 this.state = State.BOGUS_COMMENT; 1098 this._stateBogusComment(cp); 1099 break; 1100 } 1101 case $.EOF: { 1102 this._err(ERR.eofBeforeTagName); 1103 this._emitChars('<'); 1104 this._emitEOFToken(); 1105 break; 1106 } 1107 default: { 1108 this._err(ERR.invalidFirstCharacterOfTagName); 1109 this._emitChars('<'); 1110 this.state = State.DATA; 1111 this._stateData(cp); 1112 } 1113 } 1114 } 1115 1116 // End tag open state 1117 //------------------------------------------------------------------ 1118 protected _stateEndTagOpen(cp: number): void { 1119 if (isAsciiLetter(cp)) { 1120 this._createEndTagToken(); 1121 this.state = State.TAG_NAME; 1122 this._stateTagName(cp); 1123 } else 1124 switch (cp) { 1125 case $.GREATER_THAN_SIGN: { 1126 this._err(ERR.missingEndTagName); 1127 this.state = State.DATA; 1128 break; 1129 } 1130 case $.EOF: { 1131 this._err(ERR.eofBeforeTagName); 1132 this._emitChars('</'); 1133 this._emitEOFToken(); 1134 break; 1135 } 1136 default: { 1137 this._err(ERR.invalidFirstCharacterOfTagName); 1138 this._createCommentToken(2); 1139 this.state = State.BOGUS_COMMENT; 1140 this._stateBogusComment(cp); 1141 } 1142 } 1143 } 1144 1145 // Tag name state 1146 //------------------------------------------------------------------ 1147 protected _stateTagName(cp: number): void { 1148 const token = this.currentToken as TagToken; 1149 1150 switch (cp) { 1151 case $.SPACE: 1152 case $.LINE_FEED: 1153 case $.TABULATION: 1154 case $.FORM_FEED: { 1155 this.state = State.BEFORE_ATTRIBUTE_NAME; 1156 break; 1157 } 1158 case $.SOLIDUS: { 1159 this.state = State.SELF_CLOSING_START_TAG; 1160 break; 1161 } 1162 case $.GREATER_THAN_SIGN: { 1163 this.state = State.DATA; 1164 this.emitCurrentTagToken(); 1165 break; 1166 } 1167 case $.NULL: { 1168 this._err(ERR.unexpectedNullCharacter); 1169 token.tagName += REPLACEMENT_CHARACTER; 1170 break; 1171 } 1172 case $.EOF: { 1173 this._err(ERR.eofInTag); 1174 this._emitEOFToken(); 1175 break; 1176 } 1177 default: { 1178 token.tagName += String.fromCodePoint(isAsciiUpper(cp) ? toAsciiLower(cp) : cp); 1179 } 1180 } 1181 } 1182 1183 // RCDATA less-than sign state 1184 //------------------------------------------------------------------ 1185 protected _stateRcdataLessThanSign(cp: number): void { 1186 if (cp === $.SOLIDUS) { 1187 this.state = State.RCDATA_END_TAG_OPEN; 1188 } else { 1189 this._emitChars('<'); 1190 this.state = State.RCDATA; 1191 this._stateRcdata(cp); 1192 } 1193 } 1194 1195 // RCDATA end tag open state 1196 //------------------------------------------------------------------ 1197 protected _stateRcdataEndTagOpen(cp: number): void { 1198 if (isAsciiLetter(cp)) { 1199 this.state = State.RCDATA_END_TAG_NAME; 1200 this._stateRcdataEndTagName(cp); 1201 } else { 1202 this._emitChars('</'); 1203 this.state = State.RCDATA; 1204 this._stateRcdata(cp); 1205 } 1206 } 1207 1208 protected handleSpecialEndTag(_cp: number): boolean { 1209 if (!this.preprocessor.startsWith(this.lastStartTagName, false)) { 1210 return !this._ensureHibernation(); 1211 } 1212 1213 this._createEndTagToken(); 1214 const token = this.currentToken as TagToken; 1215 token.tagName = this.lastStartTagName; 1216 1217 const cp = this.preprocessor.peek(this.lastStartTagName.length); 1218 1219 switch (cp) { 1220 case $.SPACE: 1221 case $.LINE_FEED: 1222 case $.TABULATION: 1223 case $.FORM_FEED: { 1224 this._advanceBy(this.lastStartTagName.length); 1225 this.state = State.BEFORE_ATTRIBUTE_NAME; 1226 return false; 1227 } 1228 case $.SOLIDUS: { 1229 this._advanceBy(this.lastStartTagName.length); 1230 this.state = State.SELF_CLOSING_START_TAG; 1231 return false; 1232 } 1233 case $.GREATER_THAN_SIGN: { 1234 this._advanceBy(this.lastStartTagName.length); 1235 this.emitCurrentTagToken(); 1236 this.state = State.DATA; 1237 return false; 1238 } 1239 default: { 1240 return !this._ensureHibernation(); 1241 } 1242 } 1243 } 1244 1245 // RCDATA end tag name state 1246 //------------------------------------------------------------------ 1247 protected _stateRcdataEndTagName(cp: number): void { 1248 if (this.handleSpecialEndTag(cp)) { 1249 this._emitChars('</'); 1250 this.state = State.RCDATA; 1251 this._stateRcdata(cp); 1252 } 1253 } 1254 1255 // RAWTEXT less-than sign state 1256 //------------------------------------------------------------------ 1257 protected _stateRawtextLessThanSign(cp: number): void { 1258 if (cp === $.SOLIDUS) { 1259 this.state = State.RAWTEXT_END_TAG_OPEN; 1260 } else { 1261 this._emitChars('<'); 1262 this.state = State.RAWTEXT; 1263 this._stateRawtext(cp); 1264 } 1265 } 1266 1267 // RAWTEXT end tag open state 1268 //------------------------------------------------------------------ 1269 protected _stateRawtextEndTagOpen(cp: number): void { 1270 if (isAsciiLetter(cp)) { 1271 this.state = State.RAWTEXT_END_TAG_NAME; 1272 this._stateRawtextEndTagName(cp); 1273 } else { 1274 this._emitChars('</'); 1275 this.state = State.RAWTEXT; 1276 this._stateRawtext(cp); 1277 } 1278 } 1279 1280 // RAWTEXT end tag name state 1281 //------------------------------------------------------------------ 1282 protected _stateRawtextEndTagName(cp: number): void { 1283 if (this.handleSpecialEndTag(cp)) { 1284 this._emitChars('</'); 1285 this.state = State.RAWTEXT; 1286 this._stateRawtext(cp); 1287 } 1288 } 1289 1290 // Script data less-than sign state 1291 //------------------------------------------------------------------ 1292 protected _stateScriptDataLessThanSign(cp: number): void { 1293 switch (cp) { 1294 case $.SOLIDUS: { 1295 this.state = State.SCRIPT_DATA_END_TAG_OPEN; 1296 break; 1297 } 1298 case $.EXCLAMATION_MARK: { 1299 this.state = State.SCRIPT_DATA_ESCAPE_START; 1300 this._emitChars('<!'); 1301 break; 1302 } 1303 default: { 1304 this._emitChars('<'); 1305 this.state = State.SCRIPT_DATA; 1306 this._stateScriptData(cp); 1307 } 1308 } 1309 } 1310 1311 // Script data end tag open state 1312 //------------------------------------------------------------------ 1313 protected _stateScriptDataEndTagOpen(cp: number): void { 1314 if (isAsciiLetter(cp)) { 1315 this.state = State.SCRIPT_DATA_END_TAG_NAME; 1316 this._stateScriptDataEndTagName(cp); 1317 } else { 1318 this._emitChars('</'); 1319 this.state = State.SCRIPT_DATA; 1320 this._stateScriptData(cp); 1321 } 1322 } 1323 1324 // Script data end tag name state 1325 //------------------------------------------------------------------ 1326 protected _stateScriptDataEndTagName(cp: number): void { 1327 if (this.handleSpecialEndTag(cp)) { 1328 this._emitChars('</'); 1329 this.state = State.SCRIPT_DATA; 1330 this._stateScriptData(cp); 1331 } 1332 } 1333 1334 // Script data escape start state 1335 //------------------------------------------------------------------ 1336 protected _stateScriptDataEscapeStart(cp: number): void { 1337 if (cp === $.HYPHEN_MINUS) { 1338 this.state = State.SCRIPT_DATA_ESCAPE_START_DASH; 1339 this._emitChars('-'); 1340 } else { 1341 this.state = State.SCRIPT_DATA; 1342 this._stateScriptData(cp); 1343 } 1344 } 1345 1346 // Script data escape start dash state 1347 //------------------------------------------------------------------ 1348 protected _stateScriptDataEscapeStartDash(cp: number): void { 1349 if (cp === $.HYPHEN_MINUS) { 1350 this.state = State.SCRIPT_DATA_ESCAPED_DASH_DASH; 1351 this._emitChars('-'); 1352 } else { 1353 this.state = State.SCRIPT_DATA; 1354 this._stateScriptData(cp); 1355 } 1356 } 1357 1358 // Script data escaped state 1359 //------------------------------------------------------------------ 1360 protected _stateScriptDataEscaped(cp: number): void { 1361 switch (cp) { 1362 case $.HYPHEN_MINUS: { 1363 this.state = State.SCRIPT_DATA_ESCAPED_DASH; 1364 this._emitChars('-'); 1365 break; 1366 } 1367 case $.LESS_THAN_SIGN: { 1368 this.state = State.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN; 1369 break; 1370 } 1371 case $.NULL: { 1372 this._err(ERR.unexpectedNullCharacter); 1373 this._emitChars(REPLACEMENT_CHARACTER); 1374 break; 1375 } 1376 case $.EOF: { 1377 this._err(ERR.eofInScriptHtmlCommentLikeText); 1378 this._emitEOFToken(); 1379 break; 1380 } 1381 default: { 1382 this._emitCodePoint(cp); 1383 } 1384 } 1385 } 1386 1387 // Script data escaped dash state 1388 //------------------------------------------------------------------ 1389 protected _stateScriptDataEscapedDash(cp: number): void { 1390 switch (cp) { 1391 case $.HYPHEN_MINUS: { 1392 this.state = State.SCRIPT_DATA_ESCAPED_DASH_DASH; 1393 this._emitChars('-'); 1394 break; 1395 } 1396 case $.LESS_THAN_SIGN: { 1397 this.state = State.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN; 1398 break; 1399 } 1400 case $.NULL: { 1401 this._err(ERR.unexpectedNullCharacter); 1402 this.state = State.SCRIPT_DATA_ESCAPED; 1403 this._emitChars(REPLACEMENT_CHARACTER); 1404 break; 1405 } 1406 case $.EOF: { 1407 this._err(ERR.eofInScriptHtmlCommentLikeText); 1408 this._emitEOFToken(); 1409 break; 1410 } 1411 default: { 1412 this.state = State.SCRIPT_DATA_ESCAPED; 1413 this._emitCodePoint(cp); 1414 } 1415 } 1416 } 1417 1418 // Script data escaped dash dash state 1419 //------------------------------------------------------------------ 1420 protected _stateScriptDataEscapedDashDash(cp: number): void { 1421 switch (cp) { 1422 case $.HYPHEN_MINUS: { 1423 this._emitChars('-'); 1424 break; 1425 } 1426 case $.LESS_THAN_SIGN: { 1427 this.state = State.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN; 1428 break; 1429 } 1430 case $.GREATER_THAN_SIGN: { 1431 this.state = State.SCRIPT_DATA; 1432 this._emitChars('>'); 1433 break; 1434 } 1435 case $.NULL: { 1436 this._err(ERR.unexpectedNullCharacter); 1437 this.state = State.SCRIPT_DATA_ESCAPED; 1438 this._emitChars(REPLACEMENT_CHARACTER); 1439 break; 1440 } 1441 case $.EOF: { 1442 this._err(ERR.eofInScriptHtmlCommentLikeText); 1443 this._emitEOFToken(); 1444 break; 1445 } 1446 default: { 1447 this.state = State.SCRIPT_DATA_ESCAPED; 1448 this._emitCodePoint(cp); 1449 } 1450 } 1451 } 1452 1453 // Script data escaped less-than sign state 1454 //------------------------------------------------------------------ 1455 protected _stateScriptDataEscapedLessThanSign(cp: number): void { 1456 if (cp === $.SOLIDUS) { 1457 this.state = State.SCRIPT_DATA_ESCAPED_END_TAG_OPEN; 1458 } else if (isAsciiLetter(cp)) { 1459 this._emitChars('<'); 1460 this.state = State.SCRIPT_DATA_DOUBLE_ESCAPE_START; 1461 this._stateScriptDataDoubleEscapeStart(cp); 1462 } else { 1463 this._emitChars('<'); 1464 this.state = State.SCRIPT_DATA_ESCAPED; 1465 this._stateScriptDataEscaped(cp); 1466 } 1467 } 1468 1469 // Script data escaped end tag open state 1470 //------------------------------------------------------------------ 1471 protected _stateScriptDataEscapedEndTagOpen(cp: number): void { 1472 if (isAsciiLetter(cp)) { 1473 this.state = State.SCRIPT_DATA_ESCAPED_END_TAG_NAME; 1474 this._stateScriptDataEscapedEndTagName(cp); 1475 } else { 1476 this._emitChars('</'); 1477 this.state = State.SCRIPT_DATA_ESCAPED; 1478 this._stateScriptDataEscaped(cp); 1479 } 1480 } 1481 1482 // Script data escaped end tag name state 1483 //------------------------------------------------------------------ 1484 protected _stateScriptDataEscapedEndTagName(cp: number): void { 1485 if (this.handleSpecialEndTag(cp)) { 1486 this._emitChars('</'); 1487 this.state = State.SCRIPT_DATA_ESCAPED; 1488 this._stateScriptDataEscaped(cp); 1489 } 1490 } 1491 1492 // Script data double escape start state 1493 //------------------------------------------------------------------ 1494 protected _stateScriptDataDoubleEscapeStart(cp: number): void { 1495 if ( 1496 this.preprocessor.startsWith($$.SCRIPT, false) && 1497 isScriptDataDoubleEscapeSequenceEnd(this.preprocessor.peek($$.SCRIPT.length)) 1498 ) { 1499 this._emitCodePoint(cp); 1500 for (let i = 0; i < $$.SCRIPT.length; i++) { 1501 this._emitCodePoint(this._consume()); 1502 } 1503 1504 this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED; 1505 } else if (!this._ensureHibernation()) { 1506 this.state = State.SCRIPT_DATA_ESCAPED; 1507 this._stateScriptDataEscaped(cp); 1508 } 1509 } 1510 1511 // Script data double escaped state 1512 //------------------------------------------------------------------ 1513 protected _stateScriptDataDoubleEscaped(cp: number): void { 1514 switch (cp) { 1515 case $.HYPHEN_MINUS: { 1516 this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED_DASH; 1517 this._emitChars('-'); 1518 break; 1519 } 1520 case $.LESS_THAN_SIGN: { 1521 this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN; 1522 this._emitChars('<'); 1523 break; 1524 } 1525 case $.NULL: { 1526 this._err(ERR.unexpectedNullCharacter); 1527 this._emitChars(REPLACEMENT_CHARACTER); 1528 break; 1529 } 1530 case $.EOF: { 1531 this._err(ERR.eofInScriptHtmlCommentLikeText); 1532 this._emitEOFToken(); 1533 break; 1534 } 1535 default: { 1536 this._emitCodePoint(cp); 1537 } 1538 } 1539 } 1540 1541 // Script data double escaped dash state 1542 //------------------------------------------------------------------ 1543 protected _stateScriptDataDoubleEscapedDash(cp: number): void { 1544 switch (cp) { 1545 case $.HYPHEN_MINUS: { 1546 this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH; 1547 this._emitChars('-'); 1548 break; 1549 } 1550 case $.LESS_THAN_SIGN: { 1551 this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN; 1552 this._emitChars('<'); 1553 break; 1554 } 1555 case $.NULL: { 1556 this._err(ERR.unexpectedNullCharacter); 1557 this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED; 1558 this._emitChars(REPLACEMENT_CHARACTER); 1559 break; 1560 } 1561 case $.EOF: { 1562 this._err(ERR.eofInScriptHtmlCommentLikeText); 1563 this._emitEOFToken(); 1564 break; 1565 } 1566 default: { 1567 this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED; 1568 this._emitCodePoint(cp); 1569 } 1570 } 1571 } 1572 1573 // Script data double escaped dash dash state 1574 //------------------------------------------------------------------ 1575 protected _stateScriptDataDoubleEscapedDashDash(cp: number): void { 1576 switch (cp) { 1577 case $.HYPHEN_MINUS: { 1578 this._emitChars('-'); 1579 break; 1580 } 1581 case $.LESS_THAN_SIGN: { 1582 this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN; 1583 this._emitChars('<'); 1584 break; 1585 } 1586 case $.GREATER_THAN_SIGN: { 1587 this.state = State.SCRIPT_DATA; 1588 this._emitChars('>'); 1589 break; 1590 } 1591 case $.NULL: { 1592 this._err(ERR.unexpectedNullCharacter); 1593 this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED; 1594 this._emitChars(REPLACEMENT_CHARACTER); 1595 break; 1596 } 1597 case $.EOF: { 1598 this._err(ERR.eofInScriptHtmlCommentLikeText); 1599 this._emitEOFToken(); 1600 break; 1601 } 1602 default: { 1603 this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED; 1604 this._emitCodePoint(cp); 1605 } 1606 } 1607 } 1608 1609 // Script data double escaped less-than sign state 1610 //------------------------------------------------------------------ 1611 protected _stateScriptDataDoubleEscapedLessThanSign(cp: number): void { 1612 if (cp === $.SOLIDUS) { 1613 this.state = State.SCRIPT_DATA_DOUBLE_ESCAPE_END; 1614 this._emitChars('/'); 1615 } else { 1616 this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED; 1617 this._stateScriptDataDoubleEscaped(cp); 1618 } 1619 } 1620 1621 // Script data double escape end state 1622 //------------------------------------------------------------------ 1623 protected _stateScriptDataDoubleEscapeEnd(cp: number): void { 1624 if ( 1625 this.preprocessor.startsWith($$.SCRIPT, false) && 1626 isScriptDataDoubleEscapeSequenceEnd(this.preprocessor.peek($$.SCRIPT.length)) 1627 ) { 1628 this._emitCodePoint(cp); 1629 for (let i = 0; i < $$.SCRIPT.length; i++) { 1630 this._emitCodePoint(this._consume()); 1631 } 1632 1633 this.state = State.SCRIPT_DATA_ESCAPED; 1634 } else if (!this._ensureHibernation()) { 1635 this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED; 1636 this._stateScriptDataDoubleEscaped(cp); 1637 } 1638 } 1639 1640 // Before attribute name state 1641 //------------------------------------------------------------------ 1642 protected _stateBeforeAttributeName(cp: number): void { 1643 switch (cp) { 1644 case $.SPACE: 1645 case $.LINE_FEED: 1646 case $.TABULATION: 1647 case $.FORM_FEED: { 1648 // Ignore whitespace 1649 break; 1650 } 1651 case $.SOLIDUS: 1652 case $.GREATER_THAN_SIGN: 1653 case $.EOF: { 1654 this.state = State.AFTER_ATTRIBUTE_NAME; 1655 this._stateAfterAttributeName(cp); 1656 break; 1657 } 1658 case $.EQUALS_SIGN: { 1659 this._err(ERR.unexpectedEqualsSignBeforeAttributeName); 1660 this._createAttr('='); 1661 this.state = State.ATTRIBUTE_NAME; 1662 break; 1663 } 1664 default: { 1665 this._createAttr(''); 1666 this.state = State.ATTRIBUTE_NAME; 1667 this._stateAttributeName(cp); 1668 } 1669 } 1670 } 1671 1672 // Attribute name state 1673 //------------------------------------------------------------------ 1674 protected _stateAttributeName(cp: number): void { 1675 switch (cp) { 1676 case $.SPACE: 1677 case $.LINE_FEED: 1678 case $.TABULATION: 1679 case $.FORM_FEED: 1680 case $.SOLIDUS: 1681 case $.GREATER_THAN_SIGN: 1682 case $.EOF: { 1683 this._leaveAttrName(); 1684 this.state = State.AFTER_ATTRIBUTE_NAME; 1685 this._stateAfterAttributeName(cp); 1686 break; 1687 } 1688 case $.EQUALS_SIGN: { 1689 this._leaveAttrName(); 1690 this.state = State.BEFORE_ATTRIBUTE_VALUE; 1691 break; 1692 } 1693 case $.QUOTATION_MARK: 1694 case $.APOSTROPHE: 1695 case $.LESS_THAN_SIGN: { 1696 this._err(ERR.unexpectedCharacterInAttributeName); 1697 this.currentAttr.name += String.fromCodePoint(cp); 1698 break; 1699 } 1700 case $.NULL: { 1701 this._err(ERR.unexpectedNullCharacter); 1702 this.currentAttr.name += REPLACEMENT_CHARACTER; 1703 break; 1704 } 1705 default: { 1706 this.currentAttr.name += String.fromCodePoint(isAsciiUpper(cp) ? toAsciiLower(cp) : cp); 1707 } 1708 } 1709 } 1710 1711 // After attribute name state 1712 //------------------------------------------------------------------ 1713 protected _stateAfterAttributeName(cp: number): void { 1714 switch (cp) { 1715 case $.SPACE: 1716 case $.LINE_FEED: 1717 case $.TABULATION: 1718 case $.FORM_FEED: { 1719 // Ignore whitespace 1720 break; 1721 } 1722 case $.SOLIDUS: { 1723 this.state = State.SELF_CLOSING_START_TAG; 1724 break; 1725 } 1726 case $.EQUALS_SIGN: { 1727 this.state = State.BEFORE_ATTRIBUTE_VALUE; 1728 break; 1729 } 1730 case $.GREATER_THAN_SIGN: { 1731 this.state = State.DATA; 1732 this.emitCurrentTagToken(); 1733 break; 1734 } 1735 case $.EOF: { 1736 this._err(ERR.eofInTag); 1737 this._emitEOFToken(); 1738 break; 1739 } 1740 default: { 1741 this._createAttr(''); 1742 this.state = State.ATTRIBUTE_NAME; 1743 this._stateAttributeName(cp); 1744 } 1745 } 1746 } 1747 1748 // Before attribute value state 1749 //------------------------------------------------------------------ 1750 protected _stateBeforeAttributeValue(cp: number): void { 1751 switch (cp) { 1752 case $.SPACE: 1753 case $.LINE_FEED: 1754 case $.TABULATION: 1755 case $.FORM_FEED: { 1756 // Ignore whitespace 1757 break; 1758 } 1759 case $.QUOTATION_MARK: { 1760 this.state = State.ATTRIBUTE_VALUE_DOUBLE_QUOTED; 1761 break; 1762 } 1763 case $.APOSTROPHE: { 1764 this.state = State.ATTRIBUTE_VALUE_SINGLE_QUOTED; 1765 break; 1766 } 1767 case $.GREATER_THAN_SIGN: { 1768 this._err(ERR.missingAttributeValue); 1769 this.state = State.DATA; 1770 this.emitCurrentTagToken(); 1771 break; 1772 } 1773 default: { 1774 this.state = State.ATTRIBUTE_VALUE_UNQUOTED; 1775 this._stateAttributeValueUnquoted(cp); 1776 } 1777 } 1778 } 1779 1780 // Attribute value (double-quoted) state 1781 //------------------------------------------------------------------ 1782 protected _stateAttributeValueDoubleQuoted(cp: number): void { 1783 switch (cp) { 1784 case $.QUOTATION_MARK: { 1785 this.state = State.AFTER_ATTRIBUTE_VALUE_QUOTED; 1786 break; 1787 } 1788 case $.AMPERSAND: { 1789 this._startCharacterReference(); 1790 break; 1791 } 1792 case $.NULL: { 1793 this._err(ERR.unexpectedNullCharacter); 1794 this.currentAttr.value += REPLACEMENT_CHARACTER; 1795 break; 1796 } 1797 case $.EOF: { 1798 this._err(ERR.eofInTag); 1799 this._emitEOFToken(); 1800 break; 1801 } 1802 default: { 1803 this.currentAttr.value += String.fromCodePoint(cp); 1804 } 1805 } 1806 } 1807 1808 // Attribute value (single-quoted) state 1809 //------------------------------------------------------------------ 1810 protected _stateAttributeValueSingleQuoted(cp: number): void { 1811 switch (cp) { 1812 case $.APOSTROPHE: { 1813 this.state = State.AFTER_ATTRIBUTE_VALUE_QUOTED; 1814 break; 1815 } 1816 case $.AMPERSAND: { 1817 this._startCharacterReference(); 1818 break; 1819 } 1820 case $.NULL: { 1821 this._err(ERR.unexpectedNullCharacter); 1822 this.currentAttr.value += REPLACEMENT_CHARACTER; 1823 break; 1824 } 1825 case $.EOF: { 1826 this._err(ERR.eofInTag); 1827 this._emitEOFToken(); 1828 break; 1829 } 1830 default: { 1831 this.currentAttr.value += String.fromCodePoint(cp); 1832 } 1833 } 1834 } 1835 1836 // Attribute value (unquoted) state 1837 //------------------------------------------------------------------ 1838 protected _stateAttributeValueUnquoted(cp: number): void { 1839 switch (cp) { 1840 case $.SPACE: 1841 case $.LINE_FEED: 1842 case $.TABULATION: 1843 case $.FORM_FEED: { 1844 this._leaveAttrValue(); 1845 this.state = State.BEFORE_ATTRIBUTE_NAME; 1846 break; 1847 } 1848 case $.AMPERSAND: { 1849 this._startCharacterReference(); 1850 break; 1851 } 1852 case $.GREATER_THAN_SIGN: { 1853 this._leaveAttrValue(); 1854 this.state = State.DATA; 1855 this.emitCurrentTagToken(); 1856 break; 1857 } 1858 case $.NULL: { 1859 this._err(ERR.unexpectedNullCharacter); 1860 this.currentAttr.value += REPLACEMENT_CHARACTER; 1861 break; 1862 } 1863 case $.QUOTATION_MARK: 1864 case $.APOSTROPHE: 1865 case $.LESS_THAN_SIGN: 1866 case $.EQUALS_SIGN: 1867 case $.GRAVE_ACCENT: { 1868 this._err(ERR.unexpectedCharacterInUnquotedAttributeValue); 1869 this.currentAttr.value += String.fromCodePoint(cp); 1870 break; 1871 } 1872 case $.EOF: { 1873 this._err(ERR.eofInTag); 1874 this._emitEOFToken(); 1875 break; 1876 } 1877 default: { 1878 this.currentAttr.value += String.fromCodePoint(cp); 1879 } 1880 } 1881 } 1882 1883 // After attribute value (quoted) state 1884 //------------------------------------------------------------------ 1885 protected _stateAfterAttributeValueQuoted(cp: number): void { 1886 switch (cp) { 1887 case $.SPACE: 1888 case $.LINE_FEED: 1889 case $.TABULATION: 1890 case $.FORM_FEED: { 1891 this._leaveAttrValue(); 1892 this.state = State.BEFORE_ATTRIBUTE_NAME; 1893 break; 1894 } 1895 case $.SOLIDUS: { 1896 this._leaveAttrValue(); 1897 this.state = State.SELF_CLOSING_START_TAG; 1898 break; 1899 } 1900 case $.GREATER_THAN_SIGN: { 1901 this._leaveAttrValue(); 1902 this.state = State.DATA; 1903 this.emitCurrentTagToken(); 1904 break; 1905 } 1906 case $.EOF: { 1907 this._err(ERR.eofInTag); 1908 this._emitEOFToken(); 1909 break; 1910 } 1911 default: { 1912 this._err(ERR.missingWhitespaceBetweenAttributes); 1913 this.state = State.BEFORE_ATTRIBUTE_NAME; 1914 this._stateBeforeAttributeName(cp); 1915 } 1916 } 1917 } 1918 1919 // Self-closing start tag state 1920 //------------------------------------------------------------------ 1921 protected _stateSelfClosingStartTag(cp: number): void { 1922 switch (cp) { 1923 case $.GREATER_THAN_SIGN: { 1924 const token = this.currentToken as TagToken; 1925 token.selfClosing = true; 1926 this.state = State.DATA; 1927 this.emitCurrentTagToken(); 1928 break; 1929 } 1930 case $.EOF: { 1931 this._err(ERR.eofInTag); 1932 this._emitEOFToken(); 1933 break; 1934 } 1935 default: { 1936 this._err(ERR.unexpectedSolidusInTag); 1937 this.state = State.BEFORE_ATTRIBUTE_NAME; 1938 this._stateBeforeAttributeName(cp); 1939 } 1940 } 1941 } 1942 1943 // Bogus comment state 1944 //------------------------------------------------------------------ 1945 protected _stateBogusComment(cp: number): void { 1946 const token = this.currentToken as CommentToken; 1947 1948 switch (cp) { 1949 case $.GREATER_THAN_SIGN: { 1950 this.state = State.DATA; 1951 this.emitCurrentComment(token); 1952 break; 1953 } 1954 case $.EOF: { 1955 this.emitCurrentComment(token); 1956 this._emitEOFToken(); 1957 break; 1958 } 1959 case $.NULL: { 1960 this._err(ERR.unexpectedNullCharacter); 1961 token.data += REPLACEMENT_CHARACTER; 1962 break; 1963 } 1964 default: { 1965 token.data += String.fromCodePoint(cp); 1966 } 1967 } 1968 } 1969 1970 // Markup declaration open state 1971 //------------------------------------------------------------------ 1972 protected _stateMarkupDeclarationOpen(cp: number): void { 1973 if (this._consumeSequenceIfMatch($$.DASH_DASH, true)) { 1974 this._createCommentToken($$.DASH_DASH.length + 1); 1975 this.state = State.COMMENT_START; 1976 } else if (this._consumeSequenceIfMatch($$.DOCTYPE, false)) { 1977 // NOTE: Doctypes tokens are created without fixed offsets. We keep track of the moment a doctype *might* start here. 1978 this.currentLocation = this.getCurrentLocation($$.DOCTYPE.length + 1); 1979 this.state = State.DOCTYPE; 1980 } else if (this._consumeSequenceIfMatch($$.CDATA_START, true)) { 1981 if (this.inForeignNode) { 1982 this.state = State.CDATA_SECTION; 1983 } else { 1984 this._err(ERR.cdataInHtmlContent); 1985 this._createCommentToken($$.CDATA_START.length + 1); 1986 (this.currentToken as CommentToken).data = '[CDATA['; 1987 this.state = State.BOGUS_COMMENT; 1988 } 1989 } 1990 1991 //NOTE: Sequence lookups can be abrupted by hibernation. In that case, lookup 1992 //results are no longer valid and we will need to start over. 1993 else if (!this._ensureHibernation()) { 1994 this._err(ERR.incorrectlyOpenedComment); 1995 this._createCommentToken(2); 1996 this.state = State.BOGUS_COMMENT; 1997 this._stateBogusComment(cp); 1998 } 1999 } 2000 2001 // Comment start state 2002 //------------------------------------------------------------------ 2003 protected _stateCommentStart(cp: number): void { 2004 switch (cp) { 2005 case $.HYPHEN_MINUS: { 2006 this.state = State.COMMENT_START_DASH; 2007 break; 2008 } 2009 case $.GREATER_THAN_SIGN: { 2010 this._err(ERR.abruptClosingOfEmptyComment); 2011 this.state = State.DATA; 2012 const token = this.currentToken as CommentToken; 2013 this.emitCurrentComment(token); 2014 break; 2015 } 2016 default: { 2017 this.state = State.COMMENT; 2018 this._stateComment(cp); 2019 } 2020 } 2021 } 2022 2023 // Comment start dash state 2024 //------------------------------------------------------------------ 2025 protected _stateCommentStartDash(cp: number): void { 2026 const token = this.currentToken as CommentToken; 2027 switch (cp) { 2028 case $.HYPHEN_MINUS: { 2029 this.state = State.COMMENT_END; 2030 break; 2031 } 2032 case $.GREATER_THAN_SIGN: { 2033 this._err(ERR.abruptClosingOfEmptyComment); 2034 this.state = State.DATA; 2035 this.emitCurrentComment(token); 2036 break; 2037 } 2038 case $.EOF: { 2039 this._err(ERR.eofInComment); 2040 this.emitCurrentComment(token); 2041 this._emitEOFToken(); 2042 break; 2043 } 2044 default: { 2045 token.data += '-'; 2046 this.state = State.COMMENT; 2047 this._stateComment(cp); 2048 } 2049 } 2050 } 2051 2052 // Comment state 2053 //------------------------------------------------------------------ 2054 protected _stateComment(cp: number): void { 2055 const token = this.currentToken as CommentToken; 2056 2057 switch (cp) { 2058 case $.HYPHEN_MINUS: { 2059 this.state = State.COMMENT_END_DASH; 2060 break; 2061 } 2062 case $.LESS_THAN_SIGN: { 2063 token.data += '<'; 2064 this.state = State.COMMENT_LESS_THAN_SIGN; 2065 break; 2066 } 2067 case $.NULL: { 2068 this._err(ERR.unexpectedNullCharacter); 2069 token.data += REPLACEMENT_CHARACTER; 2070 break; 2071 } 2072 case $.EOF: { 2073 this._err(ERR.eofInComment); 2074 this.emitCurrentComment(token); 2075 this._emitEOFToken(); 2076 break; 2077 } 2078 default: { 2079 token.data += String.fromCodePoint(cp); 2080 } 2081 } 2082 } 2083 2084 // Comment less-than sign state 2085 //------------------------------------------------------------------ 2086 protected _stateCommentLessThanSign(cp: number): void { 2087 const token = this.currentToken as CommentToken; 2088 2089 switch (cp) { 2090 case $.EXCLAMATION_MARK: { 2091 token.data += '!'; 2092 this.state = State.COMMENT_LESS_THAN_SIGN_BANG; 2093 break; 2094 } 2095 case $.LESS_THAN_SIGN: { 2096 token.data += '<'; 2097 break; 2098 } 2099 default: { 2100 this.state = State.COMMENT; 2101 this._stateComment(cp); 2102 } 2103 } 2104 } 2105 2106 // Comment less-than sign bang state 2107 //------------------------------------------------------------------ 2108 protected _stateCommentLessThanSignBang(cp: number): void { 2109 if (cp === $.HYPHEN_MINUS) { 2110 this.state = State.COMMENT_LESS_THAN_SIGN_BANG_DASH; 2111 } else { 2112 this.state = State.COMMENT; 2113 this._stateComment(cp); 2114 } 2115 } 2116 2117 // Comment less-than sign bang dash state 2118 //------------------------------------------------------------------ 2119 protected _stateCommentLessThanSignBangDash(cp: number): void { 2120 if (cp === $.HYPHEN_MINUS) { 2121 this.state = State.COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH; 2122 } else { 2123 this.state = State.COMMENT_END_DASH; 2124 this._stateCommentEndDash(cp); 2125 } 2126 } 2127 2128 // Comment less-than sign bang dash dash state 2129 //------------------------------------------------------------------ 2130 protected _stateCommentLessThanSignBangDashDash(cp: number): void { 2131 if (cp !== $.GREATER_THAN_SIGN && cp !== $.EOF) { 2132 this._err(ERR.nestedComment); 2133 } 2134 2135 this.state = State.COMMENT_END; 2136 this._stateCommentEnd(cp); 2137 } 2138 2139 // Comment end dash state 2140 //------------------------------------------------------------------ 2141 protected _stateCommentEndDash(cp: number): void { 2142 const token = this.currentToken as CommentToken; 2143 switch (cp) { 2144 case $.HYPHEN_MINUS: { 2145 this.state = State.COMMENT_END; 2146 break; 2147 } 2148 case $.EOF: { 2149 this._err(ERR.eofInComment); 2150 this.emitCurrentComment(token); 2151 this._emitEOFToken(); 2152 break; 2153 } 2154 default: { 2155 token.data += '-'; 2156 this.state = State.COMMENT; 2157 this._stateComment(cp); 2158 } 2159 } 2160 } 2161 2162 // Comment end state 2163 //------------------------------------------------------------------ 2164 protected _stateCommentEnd(cp: number): void { 2165 const token = this.currentToken as CommentToken; 2166 2167 switch (cp) { 2168 case $.GREATER_THAN_SIGN: { 2169 this.state = State.DATA; 2170 this.emitCurrentComment(token); 2171 break; 2172 } 2173 case $.EXCLAMATION_MARK: { 2174 this.state = State.COMMENT_END_BANG; 2175 break; 2176 } 2177 case $.HYPHEN_MINUS: { 2178 token.data += '-'; 2179 break; 2180 } 2181 case $.EOF: { 2182 this._err(ERR.eofInComment); 2183 this.emitCurrentComment(token); 2184 this._emitEOFToken(); 2185 break; 2186 } 2187 default: { 2188 token.data += '--'; 2189 this.state = State.COMMENT; 2190 this._stateComment(cp); 2191 } 2192 } 2193 } 2194 2195 // Comment end bang state 2196 //------------------------------------------------------------------ 2197 protected _stateCommentEndBang(cp: number): void { 2198 const token = this.currentToken as CommentToken; 2199 2200 switch (cp) { 2201 case $.HYPHEN_MINUS: { 2202 token.data += '--!'; 2203 this.state = State.COMMENT_END_DASH; 2204 break; 2205 } 2206 case $.GREATER_THAN_SIGN: { 2207 this._err(ERR.incorrectlyClosedComment); 2208 this.state = State.DATA; 2209 this.emitCurrentComment(token); 2210 break; 2211 } 2212 case $.EOF: { 2213 this._err(ERR.eofInComment); 2214 this.emitCurrentComment(token); 2215 this._emitEOFToken(); 2216 break; 2217 } 2218 default: { 2219 token.data += '--!'; 2220 this.state = State.COMMENT; 2221 this._stateComment(cp); 2222 } 2223 } 2224 } 2225 2226 // DOCTYPE state 2227 //------------------------------------------------------------------ 2228 protected _stateDoctype(cp: number): void { 2229 switch (cp) { 2230 case $.SPACE: 2231 case $.LINE_FEED: 2232 case $.TABULATION: 2233 case $.FORM_FEED: { 2234 this.state = State.BEFORE_DOCTYPE_NAME; 2235 break; 2236 } 2237 case $.GREATER_THAN_SIGN: { 2238 this.state = State.BEFORE_DOCTYPE_NAME; 2239 this._stateBeforeDoctypeName(cp); 2240 break; 2241 } 2242 case $.EOF: { 2243 this._err(ERR.eofInDoctype); 2244 this._createDoctypeToken(null); 2245 const token = this.currentToken as DoctypeToken; 2246 token.forceQuirks = true; 2247 this.emitCurrentDoctype(token); 2248 this._emitEOFToken(); 2249 break; 2250 } 2251 default: { 2252 this._err(ERR.missingWhitespaceBeforeDoctypeName); 2253 this.state = State.BEFORE_DOCTYPE_NAME; 2254 this._stateBeforeDoctypeName(cp); 2255 } 2256 } 2257 } 2258 2259 // Before DOCTYPE name state 2260 //------------------------------------------------------------------ 2261 protected _stateBeforeDoctypeName(cp: number): void { 2262 if (isAsciiUpper(cp)) { 2263 this._createDoctypeToken(String.fromCharCode(toAsciiLower(cp))); 2264 this.state = State.DOCTYPE_NAME; 2265 } else 2266 switch (cp) { 2267 case $.SPACE: 2268 case $.LINE_FEED: 2269 case $.TABULATION: 2270 case $.FORM_FEED: { 2271 // Ignore whitespace 2272 break; 2273 } 2274 case $.NULL: { 2275 this._err(ERR.unexpectedNullCharacter); 2276 this._createDoctypeToken(REPLACEMENT_CHARACTER); 2277 this.state = State.DOCTYPE_NAME; 2278 break; 2279 } 2280 case $.GREATER_THAN_SIGN: { 2281 this._err(ERR.missingDoctypeName); 2282 this._createDoctypeToken(null); 2283 const token = this.currentToken as DoctypeToken; 2284 token.forceQuirks = true; 2285 this.emitCurrentDoctype(token); 2286 this.state = State.DATA; 2287 break; 2288 } 2289 case $.EOF: { 2290 this._err(ERR.eofInDoctype); 2291 this._createDoctypeToken(null); 2292 const token = this.currentToken as DoctypeToken; 2293 token.forceQuirks = true; 2294 this.emitCurrentDoctype(token); 2295 this._emitEOFToken(); 2296 break; 2297 } 2298 default: { 2299 this._createDoctypeToken(String.fromCodePoint(cp)); 2300 this.state = State.DOCTYPE_NAME; 2301 } 2302 } 2303 } 2304 2305 // DOCTYPE name state 2306 //------------------------------------------------------------------ 2307 protected _stateDoctypeName(cp: number): void { 2308 const token = this.currentToken as DoctypeToken; 2309 2310 switch (cp) { 2311 case $.SPACE: 2312 case $.LINE_FEED: 2313 case $.TABULATION: 2314 case $.FORM_FEED: { 2315 this.state = State.AFTER_DOCTYPE_NAME; 2316 break; 2317 } 2318 case $.GREATER_THAN_SIGN: { 2319 this.state = State.DATA; 2320 this.emitCurrentDoctype(token); 2321 break; 2322 } 2323 case $.NULL: { 2324 this._err(ERR.unexpectedNullCharacter); 2325 token.name += REPLACEMENT_CHARACTER; 2326 break; 2327 } 2328 case $.EOF: { 2329 this._err(ERR.eofInDoctype); 2330 token.forceQuirks = true; 2331 this.emitCurrentDoctype(token); 2332 this._emitEOFToken(); 2333 break; 2334 } 2335 default: { 2336 token.name += String.fromCodePoint(isAsciiUpper(cp) ? toAsciiLower(cp) : cp); 2337 } 2338 } 2339 } 2340 2341 // After DOCTYPE name state 2342 //------------------------------------------------------------------ 2343 protected _stateAfterDoctypeName(cp: number): void { 2344 const token = this.currentToken as DoctypeToken; 2345 2346 switch (cp) { 2347 case $.SPACE: 2348 case $.LINE_FEED: 2349 case $.TABULATION: 2350 case $.FORM_FEED: { 2351 // Ignore whitespace 2352 break; 2353 } 2354 case $.GREATER_THAN_SIGN: { 2355 this.state = State.DATA; 2356 this.emitCurrentDoctype(token); 2357 break; 2358 } 2359 case $.EOF: { 2360 this._err(ERR.eofInDoctype); 2361 token.forceQuirks = true; 2362 this.emitCurrentDoctype(token); 2363 this._emitEOFToken(); 2364 break; 2365 } 2366 default: { 2367 if (this._consumeSequenceIfMatch($$.PUBLIC, false)) { 2368 this.state = State.AFTER_DOCTYPE_PUBLIC_KEYWORD; 2369 } else if (this._consumeSequenceIfMatch($$.SYSTEM, false)) { 2370 this.state = State.AFTER_DOCTYPE_SYSTEM_KEYWORD; 2371 } 2372 //NOTE: sequence lookup can be abrupted by hibernation. In that case lookup 2373 //results are no longer valid and we will need to start over. 2374 else if (!this._ensureHibernation()) { 2375 this._err(ERR.invalidCharacterSequenceAfterDoctypeName); 2376 token.forceQuirks = true; 2377 this.state = State.BOGUS_DOCTYPE; 2378 this._stateBogusDoctype(cp); 2379 } 2380 } 2381 } 2382 } 2383 2384 // After DOCTYPE public keyword state 2385 //------------------------------------------------------------------ 2386 protected _stateAfterDoctypePublicKeyword(cp: number): void { 2387 const token = this.currentToken as DoctypeToken; 2388 2389 switch (cp) { 2390 case $.SPACE: 2391 case $.LINE_FEED: 2392 case $.TABULATION: 2393 case $.FORM_FEED: { 2394 this.state = State.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER; 2395 break; 2396 } 2397 case $.QUOTATION_MARK: { 2398 this._err(ERR.missingWhitespaceAfterDoctypePublicKeyword); 2399 token.publicId = ''; 2400 this.state = State.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED; 2401 break; 2402 } 2403 case $.APOSTROPHE: { 2404 this._err(ERR.missingWhitespaceAfterDoctypePublicKeyword); 2405 token.publicId = ''; 2406 this.state = State.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED; 2407 break; 2408 } 2409 case $.GREATER_THAN_SIGN: { 2410 this._err(ERR.missingDoctypePublicIdentifier); 2411 token.forceQuirks = true; 2412 this.state = State.DATA; 2413 this.emitCurrentDoctype(token); 2414 break; 2415 } 2416 case $.EOF: { 2417 this._err(ERR.eofInDoctype); 2418 token.forceQuirks = true; 2419 this.emitCurrentDoctype(token); 2420 this._emitEOFToken(); 2421 break; 2422 } 2423 default: { 2424 this._err(ERR.missingQuoteBeforeDoctypePublicIdentifier); 2425 token.forceQuirks = true; 2426 this.state = State.BOGUS_DOCTYPE; 2427 this._stateBogusDoctype(cp); 2428 } 2429 } 2430 } 2431 2432 // Before DOCTYPE public identifier state 2433 //------------------------------------------------------------------ 2434 protected _stateBeforeDoctypePublicIdentifier(cp: number): void { 2435 const token = this.currentToken as DoctypeToken; 2436 2437 switch (cp) { 2438 case $.SPACE: 2439 case $.LINE_FEED: 2440 case $.TABULATION: 2441 case $.FORM_FEED: { 2442 // Ignore whitespace 2443 break; 2444 } 2445 case $.QUOTATION_MARK: { 2446 token.publicId = ''; 2447 this.state = State.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED; 2448 break; 2449 } 2450 case $.APOSTROPHE: { 2451 token.publicId = ''; 2452 this.state = State.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED; 2453 break; 2454 } 2455 case $.GREATER_THAN_SIGN: { 2456 this._err(ERR.missingDoctypePublicIdentifier); 2457 token.forceQuirks = true; 2458 this.state = State.DATA; 2459 this.emitCurrentDoctype(token); 2460 break; 2461 } 2462 case $.EOF: { 2463 this._err(ERR.eofInDoctype); 2464 token.forceQuirks = true; 2465 this.emitCurrentDoctype(token); 2466 this._emitEOFToken(); 2467 break; 2468 } 2469 default: { 2470 this._err(ERR.missingQuoteBeforeDoctypePublicIdentifier); 2471 token.forceQuirks = true; 2472 this.state = State.BOGUS_DOCTYPE; 2473 this._stateBogusDoctype(cp); 2474 } 2475 } 2476 } 2477 2478 // DOCTYPE public identifier (double-quoted) state 2479 //------------------------------------------------------------------ 2480 protected _stateDoctypePublicIdentifierDoubleQuoted(cp: number): void { 2481 const token = this.currentToken as DoctypeToken; 2482 2483 switch (cp) { 2484 case $.QUOTATION_MARK: { 2485 this.state = State.AFTER_DOCTYPE_PUBLIC_IDENTIFIER; 2486 break; 2487 } 2488 case $.NULL: { 2489 this._err(ERR.unexpectedNullCharacter); 2490 token.publicId += REPLACEMENT_CHARACTER; 2491 break; 2492 } 2493 case $.GREATER_THAN_SIGN: { 2494 this._err(ERR.abruptDoctypePublicIdentifier); 2495 token.forceQuirks = true; 2496 this.emitCurrentDoctype(token); 2497 this.state = State.DATA; 2498 break; 2499 } 2500 case $.EOF: { 2501 this._err(ERR.eofInDoctype); 2502 token.forceQuirks = true; 2503 this.emitCurrentDoctype(token); 2504 this._emitEOFToken(); 2505 break; 2506 } 2507 default: { 2508 token.publicId += String.fromCodePoint(cp); 2509 } 2510 } 2511 } 2512 2513 // DOCTYPE public identifier (single-quoted) state 2514 //------------------------------------------------------------------ 2515 protected _stateDoctypePublicIdentifierSingleQuoted(cp: number): void { 2516 const token = this.currentToken as DoctypeToken; 2517 2518 switch (cp) { 2519 case $.APOSTROPHE: { 2520 this.state = State.AFTER_DOCTYPE_PUBLIC_IDENTIFIER; 2521 break; 2522 } 2523 case $.NULL: { 2524 this._err(ERR.unexpectedNullCharacter); 2525 token.publicId += REPLACEMENT_CHARACTER; 2526 break; 2527 } 2528 case $.GREATER_THAN_SIGN: { 2529 this._err(ERR.abruptDoctypePublicIdentifier); 2530 token.forceQuirks = true; 2531 this.emitCurrentDoctype(token); 2532 this.state = State.DATA; 2533 break; 2534 } 2535 case $.EOF: { 2536 this._err(ERR.eofInDoctype); 2537 token.forceQuirks = true; 2538 this.emitCurrentDoctype(token); 2539 this._emitEOFToken(); 2540 break; 2541 } 2542 default: { 2543 token.publicId += String.fromCodePoint(cp); 2544 } 2545 } 2546 } 2547 2548 // After DOCTYPE public identifier state 2549 //------------------------------------------------------------------ 2550 protected _stateAfterDoctypePublicIdentifier(cp: number): void { 2551 const token = this.currentToken as DoctypeToken; 2552 2553 switch (cp) { 2554 case $.SPACE: 2555 case $.LINE_FEED: 2556 case $.TABULATION: 2557 case $.FORM_FEED: { 2558 this.state = State.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS; 2559 break; 2560 } 2561 case $.GREATER_THAN_SIGN: { 2562 this.state = State.DATA; 2563 this.emitCurrentDoctype(token); 2564 break; 2565 } 2566 case $.QUOTATION_MARK: { 2567 this._err(ERR.missingWhitespaceBetweenDoctypePublicAndSystemIdentifiers); 2568 token.systemId = ''; 2569 this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED; 2570 break; 2571 } 2572 case $.APOSTROPHE: { 2573 this._err(ERR.missingWhitespaceBetweenDoctypePublicAndSystemIdentifiers); 2574 token.systemId = ''; 2575 this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED; 2576 break; 2577 } 2578 case $.EOF: { 2579 this._err(ERR.eofInDoctype); 2580 token.forceQuirks = true; 2581 this.emitCurrentDoctype(token); 2582 this._emitEOFToken(); 2583 break; 2584 } 2585 default: { 2586 this._err(ERR.missingQuoteBeforeDoctypeSystemIdentifier); 2587 token.forceQuirks = true; 2588 this.state = State.BOGUS_DOCTYPE; 2589 this._stateBogusDoctype(cp); 2590 } 2591 } 2592 } 2593 2594 // Between DOCTYPE public and system identifiers state 2595 //------------------------------------------------------------------ 2596 protected _stateBetweenDoctypePublicAndSystemIdentifiers(cp: number): void { 2597 const token = this.currentToken as DoctypeToken; 2598 2599 switch (cp) { 2600 case $.SPACE: 2601 case $.LINE_FEED: 2602 case $.TABULATION: 2603 case $.FORM_FEED: { 2604 // Ignore whitespace 2605 break; 2606 } 2607 case $.GREATER_THAN_SIGN: { 2608 this.emitCurrentDoctype(token); 2609 this.state = State.DATA; 2610 break; 2611 } 2612 case $.QUOTATION_MARK: { 2613 token.systemId = ''; 2614 this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED; 2615 break; 2616 } 2617 case $.APOSTROPHE: { 2618 token.systemId = ''; 2619 this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED; 2620 break; 2621 } 2622 case $.EOF: { 2623 this._err(ERR.eofInDoctype); 2624 token.forceQuirks = true; 2625 this.emitCurrentDoctype(token); 2626 this._emitEOFToken(); 2627 break; 2628 } 2629 default: { 2630 this._err(ERR.missingQuoteBeforeDoctypeSystemIdentifier); 2631 token.forceQuirks = true; 2632 this.state = State.BOGUS_DOCTYPE; 2633 this._stateBogusDoctype(cp); 2634 } 2635 } 2636 } 2637 2638 // After DOCTYPE system keyword state 2639 //------------------------------------------------------------------ 2640 protected _stateAfterDoctypeSystemKeyword(cp: number): void { 2641 const token = this.currentToken as DoctypeToken; 2642 2643 switch (cp) { 2644 case $.SPACE: 2645 case $.LINE_FEED: 2646 case $.TABULATION: 2647 case $.FORM_FEED: { 2648 this.state = State.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER; 2649 break; 2650 } 2651 case $.QUOTATION_MARK: { 2652 this._err(ERR.missingWhitespaceAfterDoctypeSystemKeyword); 2653 token.systemId = ''; 2654 this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED; 2655 break; 2656 } 2657 case $.APOSTROPHE: { 2658 this._err(ERR.missingWhitespaceAfterDoctypeSystemKeyword); 2659 token.systemId = ''; 2660 this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED; 2661 break; 2662 } 2663 case $.GREATER_THAN_SIGN: { 2664 this._err(ERR.missingDoctypeSystemIdentifier); 2665 token.forceQuirks = true; 2666 this.state = State.DATA; 2667 this.emitCurrentDoctype(token); 2668 break; 2669 } 2670 case $.EOF: { 2671 this._err(ERR.eofInDoctype); 2672 token.forceQuirks = true; 2673 this.emitCurrentDoctype(token); 2674 this._emitEOFToken(); 2675 break; 2676 } 2677 default: { 2678 this._err(ERR.missingQuoteBeforeDoctypeSystemIdentifier); 2679 token.forceQuirks = true; 2680 this.state = State.BOGUS_DOCTYPE; 2681 this._stateBogusDoctype(cp); 2682 } 2683 } 2684 } 2685 2686 // Before DOCTYPE system identifier state 2687 //------------------------------------------------------------------ 2688 protected _stateBeforeDoctypeSystemIdentifier(cp: number): void { 2689 const token = this.currentToken as DoctypeToken; 2690 2691 switch (cp) { 2692 case $.SPACE: 2693 case $.LINE_FEED: 2694 case $.TABULATION: 2695 case $.FORM_FEED: { 2696 // Ignore whitespace 2697 break; 2698 } 2699 case $.QUOTATION_MARK: { 2700 token.systemId = ''; 2701 this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED; 2702 break; 2703 } 2704 case $.APOSTROPHE: { 2705 token.systemId = ''; 2706 this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED; 2707 break; 2708 } 2709 case $.GREATER_THAN_SIGN: { 2710 this._err(ERR.missingDoctypeSystemIdentifier); 2711 token.forceQuirks = true; 2712 this.state = State.DATA; 2713 this.emitCurrentDoctype(token); 2714 break; 2715 } 2716 case $.EOF: { 2717 this._err(ERR.eofInDoctype); 2718 token.forceQuirks = true; 2719 this.emitCurrentDoctype(token); 2720 this._emitEOFToken(); 2721 break; 2722 } 2723 default: { 2724 this._err(ERR.missingQuoteBeforeDoctypeSystemIdentifier); 2725 token.forceQuirks = true; 2726 this.state = State.BOGUS_DOCTYPE; 2727 this._stateBogusDoctype(cp); 2728 } 2729 } 2730 } 2731 2732 // DOCTYPE system identifier (double-quoted) state 2733 //------------------------------------------------------------------ 2734 protected _stateDoctypeSystemIdentifierDoubleQuoted(cp: number): void { 2735 const token = this.currentToken as DoctypeToken; 2736 2737 switch (cp) { 2738 case $.QUOTATION_MARK: { 2739 this.state = State.AFTER_DOCTYPE_SYSTEM_IDENTIFIER; 2740 break; 2741 } 2742 case $.NULL: { 2743 this._err(ERR.unexpectedNullCharacter); 2744 token.systemId += REPLACEMENT_CHARACTER; 2745 break; 2746 } 2747 case $.GREATER_THAN_SIGN: { 2748 this._err(ERR.abruptDoctypeSystemIdentifier); 2749 token.forceQuirks = true; 2750 this.emitCurrentDoctype(token); 2751 this.state = State.DATA; 2752 break; 2753 } 2754 case $.EOF: { 2755 this._err(ERR.eofInDoctype); 2756 token.forceQuirks = true; 2757 this.emitCurrentDoctype(token); 2758 this._emitEOFToken(); 2759 break; 2760 } 2761 default: { 2762 token.systemId += String.fromCodePoint(cp); 2763 } 2764 } 2765 } 2766 2767 // DOCTYPE system identifier (single-quoted) state 2768 //------------------------------------------------------------------ 2769 protected _stateDoctypeSystemIdentifierSingleQuoted(cp: number): void { 2770 const token = this.currentToken as DoctypeToken; 2771 2772 switch (cp) { 2773 case $.APOSTROPHE: { 2774 this.state = State.AFTER_DOCTYPE_SYSTEM_IDENTIFIER; 2775 break; 2776 } 2777 case $.NULL: { 2778 this._err(ERR.unexpectedNullCharacter); 2779 token.systemId += REPLACEMENT_CHARACTER; 2780 break; 2781 } 2782 case $.GREATER_THAN_SIGN: { 2783 this._err(ERR.abruptDoctypeSystemIdentifier); 2784 token.forceQuirks = true; 2785 this.emitCurrentDoctype(token); 2786 this.state = State.DATA; 2787 break; 2788 } 2789 case $.EOF: { 2790 this._err(ERR.eofInDoctype); 2791 token.forceQuirks = true; 2792 this.emitCurrentDoctype(token); 2793 this._emitEOFToken(); 2794 break; 2795 } 2796 default: { 2797 token.systemId += String.fromCodePoint(cp); 2798 } 2799 } 2800 } 2801 2802 // After DOCTYPE system identifier state 2803 //------------------------------------------------------------------ 2804 protected _stateAfterDoctypeSystemIdentifier(cp: number): void { 2805 const token = this.currentToken as DoctypeToken; 2806 2807 switch (cp) { 2808 case $.SPACE: 2809 case $.LINE_FEED: 2810 case $.TABULATION: 2811 case $.FORM_FEED: { 2812 // Ignore whitespace 2813 break; 2814 } 2815 case $.GREATER_THAN_SIGN: { 2816 this.emitCurrentDoctype(token); 2817 this.state = State.DATA; 2818 break; 2819 } 2820 case $.EOF: { 2821 this._err(ERR.eofInDoctype); 2822 token.forceQuirks = true; 2823 this.emitCurrentDoctype(token); 2824 this._emitEOFToken(); 2825 break; 2826 } 2827 default: { 2828 this._err(ERR.unexpectedCharacterAfterDoctypeSystemIdentifier); 2829 this.state = State.BOGUS_DOCTYPE; 2830 this._stateBogusDoctype(cp); 2831 } 2832 } 2833 } 2834 2835 // Bogus DOCTYPE state 2836 //------------------------------------------------------------------ 2837 protected _stateBogusDoctype(cp: number): void { 2838 const token = this.currentToken as DoctypeToken; 2839 2840 switch (cp) { 2841 case $.GREATER_THAN_SIGN: { 2842 this.emitCurrentDoctype(token); 2843 this.state = State.DATA; 2844 break; 2845 } 2846 case $.NULL: { 2847 this._err(ERR.unexpectedNullCharacter); 2848 break; 2849 } 2850 case $.EOF: { 2851 this.emitCurrentDoctype(token); 2852 this._emitEOFToken(); 2853 break; 2854 } 2855 default: 2856 // Do nothing 2857 } 2858 } 2859 2860 // CDATA section state 2861 //------------------------------------------------------------------ 2862 protected _stateCdataSection(cp: number): void { 2863 switch (cp) { 2864 case $.RIGHT_SQUARE_BRACKET: { 2865 this.state = State.CDATA_SECTION_BRACKET; 2866 break; 2867 } 2868 case $.EOF: { 2869 this._err(ERR.eofInCdata); 2870 this._emitEOFToken(); 2871 break; 2872 } 2873 default: { 2874 this._emitCodePoint(cp); 2875 } 2876 } 2877 } 2878 2879 // CDATA section bracket state 2880 //------------------------------------------------------------------ 2881 protected _stateCdataSectionBracket(cp: number): void { 2882 if (cp === $.RIGHT_SQUARE_BRACKET) { 2883 this.state = State.CDATA_SECTION_END; 2884 } else { 2885 this._emitChars(']'); 2886 this.state = State.CDATA_SECTION; 2887 this._stateCdataSection(cp); 2888 } 2889 } 2890 2891 // CDATA section end state 2892 //------------------------------------------------------------------ 2893 protected _stateCdataSectionEnd(cp: number): void { 2894 switch (cp) { 2895 case $.GREATER_THAN_SIGN: { 2896 this.state = State.DATA; 2897 break; 2898 } 2899 case $.RIGHT_SQUARE_BRACKET: { 2900 this._emitChars(']'); 2901 break; 2902 } 2903 default: { 2904 this._emitChars(']]'); 2905 this.state = State.CDATA_SECTION; 2906 this._stateCdataSection(cp); 2907 } 2908 } 2909 } 2910 2911 // Character reference state 2912 //------------------------------------------------------------------ 2913 protected _stateCharacterReference(): void { 2914 let length = this.entityDecoder.write(this.preprocessor.html, this.preprocessor.pos); 2915 2916 if (length < 0) { 2917 if (this.preprocessor.lastChunkWritten) { 2918 length = this.entityDecoder.end(); 2919 } else { 2920 // Wait for the rest of the entity. 2921 this.active = false; 2922 // Mark the entire buffer as read. 2923 this.preprocessor.pos = this.preprocessor.html.length - 1; 2924 this.consumedAfterSnapshot = 0; 2925 this.preprocessor.endOfChunkHit = true; 2926 return; 2927 } 2928 } 2929 2930 if (length === 0) { 2931 // This was not a valid entity. Go back to the beginning, and 2932 // figure out what to do. 2933 this.preprocessor.pos = this.entityStartPos; 2934 this._flushCodePointConsumedAsCharacterReference($.AMPERSAND); 2935 2936 this.state = 2937 !this._isCharacterReferenceInAttribute() && isAsciiAlphaNumeric(this.preprocessor.peek(1)) 2938 ? State.AMBIGUOUS_AMPERSAND 2939 : this.returnState; 2940 } else { 2941 // We successfully parsed an entity. Switch to the return state. 2942 this.state = this.returnState; 2943 } 2944 } 2945 2946 // Ambiguos ampersand state 2947 //------------------------------------------------------------------ 2948 protected _stateAmbiguousAmpersand(cp: number): void { 2949 if (isAsciiAlphaNumeric(cp)) { 2950 this._flushCodePointConsumedAsCharacterReference(cp); 2951 } else { 2952 if (cp === $.SEMICOLON) { 2953 this._err(ERR.unknownNamedCharacterReference); 2954 } 2955 2956 this.state = this.returnState; 2957 this._callState(cp); 2958 } 2959 } 2960} 2961 2962function checkselfClosingNode(parse: Tokenizer, token: TagToken) { 2963 const tagName: string = (token.tagName || "").toLowerCase(); 2964 const selfClosing: boolean = token.selfClosing; 2965 const flag: boolean = parse.validator.isSupportedSelfClosing(tagName); 2966 if (parse.nodeInfo.tn && tagName && !parse.nodeInfo.sc) { 2967 const loc: string = 2968 String(token.location?.startLine) + ',' + String(token.location?.startCol); 2969 if ( 2970 !flag || 2971 (loc !== parse.nodeInfo.pos && token.type === TokenType.START_TAG) 2972 ) { 2973 const posInfo: string = parse.nodeInfo.pos; 2974 const posArr: string[] = posInfo.split(','); 2975 parse.compileResult.log.push({ 2976 line: Number(posArr[0]) || 1, 2977 column: Number(posArr[1]) || 1, 2978 reason: 'ERROR: tag `' + parse.nodeInfo.tn + '` must be closed, please follow norm', 2979 }); 2980 parse.nodeInfo = { tn: '', sc: false, pos: '' }; 2981 } 2982 } 2983 if (tagName && flag) { 2984 if (token.type === TokenType.START_TAG && !selfClosing) { 2985 parse.nodeInfo.tn = tagName; 2986 parse.nodeInfo.sc = false; 2987 parse.nodeInfo.pos = 2988 String(token.location?.startLine) + ',' + String(token.location?.startCol); 2989 } 2990 if ( 2991 token.type === TokenType.END_TAG && tagName === parse.nodeInfo.tn 2992 ) { 2993 parse.nodeInfo.sc = true; 2994 } 2995 } 2996 if (!flag && selfClosing && token.type === TokenType.START_TAG) { 2997 parse.compileResult.log.push({ 2998 line: token.location?.startLine || 1, 2999 column: token.location?.startCol || 1, 3000 reason: "ERROR: tag `" + tagName + "` can not use selfClosing", 3001 }); 3002 } 3003} 3004