1import { 2 Tokenizer, 3 type TokenizerOptions, 4 TokenizerMode, 5 type TokenHandler, 6 Token, 7 foreignContent, 8 html, 9} from 'parse5'; 10 11const $ = html.TAG_ID; 12 13const REPLACEMENT_CHARACTER = '\uFFFD'; 14const LINE_FEED_CODE_POINT = 0x0a; 15 16/** 17 * Simulates adjustments of the Tokenizer which are performed by the standard parser during tree construction. 18 */ 19export class ParserFeedbackSimulator implements TokenHandler { 20 private namespaceStack: html.NS[] = []; 21 public inForeignContent = false; 22 public skipNextNewLine = false; 23 public tokenizer: Tokenizer; 24 25 constructor( 26 options: TokenizerOptions, 27 private handler: TokenHandler, 28 ) { 29 this.tokenizer = new Tokenizer(options, this); 30 this._enterNamespace(html.NS.HTML); 31 } 32 33 /** @internal */ 34 onNullCharacter(token: Token.CharacterToken): void { 35 this.skipNextNewLine = false; 36 37 if (this.inForeignContent) { 38 this.handler.onCharacter({ 39 type: Token.TokenType.CHARACTER, 40 chars: REPLACEMENT_CHARACTER, 41 location: token.location, 42 }); 43 } else { 44 this.handler.onNullCharacter(token); 45 } 46 } 47 48 /** @internal */ 49 onWhitespaceCharacter(token: Token.CharacterToken): void { 50 if (this.skipNextNewLine && token.chars.charCodeAt(0) === LINE_FEED_CODE_POINT) { 51 this.skipNextNewLine = false; 52 53 if (token.chars.length === 1) { 54 return; 55 } 56 57 token.chars = token.chars.substr(1); 58 } 59 60 this.handler.onWhitespaceCharacter(token); 61 } 62 63 /** @internal */ 64 onCharacter(token: Token.CharacterToken): void { 65 this.skipNextNewLine = false; 66 this.handler.onCharacter(token); 67 } 68 69 /** @internal */ 70 onComment(token: Token.CommentToken): void { 71 this.skipNextNewLine = false; 72 this.handler.onComment(token); 73 } 74 75 /** @internal */ 76 onDoctype(token: Token.DoctypeToken): void { 77 this.skipNextNewLine = false; 78 this.handler.onDoctype(token); 79 } 80 81 /** @internal */ 82 onEof(token: Token.EOFToken): void { 83 this.skipNextNewLine = false; 84 this.handler.onEof(token); 85 } 86 87 //Namespace stack mutations 88 private _enterNamespace(namespace: html.NS): void { 89 this.namespaceStack.unshift(namespace); 90 this.inForeignContent = namespace !== html.NS.HTML; 91 this.tokenizer.inForeignNode = this.inForeignContent; 92 } 93 94 private _leaveCurrentNamespace(): void { 95 this.namespaceStack.shift(); 96 this.inForeignContent = this.namespaceStack[0] !== html.NS.HTML; 97 this.tokenizer.inForeignNode = this.inForeignContent; 98 } 99 100 //Token handlers 101 private _ensureTokenizerMode(tn: html.TAG_ID): void { 102 switch (tn) { 103 case $.TEXTAREA: 104 case $.TITLE: { 105 this.tokenizer.state = TokenizerMode.RCDATA; 106 break; 107 } 108 case $.PLAINTEXT: { 109 this.tokenizer.state = TokenizerMode.PLAINTEXT; 110 break; 111 } 112 case $.SCRIPT: { 113 this.tokenizer.state = TokenizerMode.SCRIPT_DATA; 114 break; 115 } 116 case $.STYLE: 117 case $.IFRAME: 118 case $.XMP: 119 case $.NOEMBED: 120 case $.NOFRAMES: 121 case $.NOSCRIPT: { 122 this.tokenizer.state = TokenizerMode.RAWTEXT; 123 break; 124 } 125 default: 126 // Do nothing 127 } 128 } 129 130 /** @internal */ 131 onStartTag(token: Token.TagToken): void { 132 let tn = token.tagID; 133 134 switch (tn) { 135 case $.SVG: { 136 this._enterNamespace(html.NS.SVG); 137 break; 138 } 139 case $.MATH: { 140 this._enterNamespace(html.NS.MATHML); 141 break; 142 } 143 default: 144 // Do nothing 145 } 146 147 if (this.inForeignContent) { 148 if (foreignContent.causesExit(token)) { 149 this._leaveCurrentNamespace(); 150 } else { 151 const currentNs = this.namespaceStack[0]; 152 153 tn = token.tagID; 154 155 if (!token.selfClosing && foreignContent.isIntegrationPoint(tn, currentNs, token.attrs)) { 156 this._enterNamespace(html.NS.HTML); 157 } 158 } 159 } else { 160 switch (tn) { 161 case $.PRE: 162 case $.TEXTAREA: 163 case $.LISTING: { 164 this.skipNextNewLine = true; 165 break; 166 } 167 case $.IMAGE: { 168 token.tagName = html.TAG_NAMES.IMG; 169 token.tagID = $.IMG; 170 break; 171 } 172 default: 173 // Do nothing 174 } 175 176 this._ensureTokenizerMode(tn); 177 } 178 179 this.handler.onStartTag(token); 180 } 181 182 /** @internal */ 183 onEndTag(token: Token.TagToken): void { 184 let tn = token.tagID; 185 186 if (!this.inForeignContent) { 187 const previousNs = this.namespaceStack[1]; 188 189 if (previousNs === html.NS.SVG) { 190 const adjustedTagName = foreignContent.SVG_TAG_NAMES_ADJUSTMENT_MAP.get(token.tagName); 191 192 if (adjustedTagName) { 193 tn = html.getTagID(adjustedTagName); 194 } 195 } 196 197 //NOTE: check for exit from integration point 198 if (foreignContent.isIntegrationPoint(tn, previousNs, token.attrs)) { 199 this._leaveCurrentNamespace(); 200 } 201 } else if ( 202 (tn === $.SVG && this.namespaceStack[0] === html.NS.SVG) || 203 (tn === $.MATH && this.namespaceStack[0] === html.NS.MATHML) 204 ) { 205 this._leaveCurrentNamespace(); 206 } 207 208 this.handler.onEndTag(token); 209 } 210} 211