1'use strict'; 2 3const Tokenizer = require('parse5/lib/tokenizer'); 4const foreignContent = require('parse5/lib/common/foreign-content'); 5const unicode = require('parse5/lib/common/unicode'); 6const HTML = require('parse5/lib/common/html'); 7 8//Aliases 9const $ = HTML.TAG_NAMES; 10const NS = HTML.NAMESPACES; 11 12//ParserFeedbackSimulator 13//Simulates adjustment of the Tokenizer which performed by standard parser during tree construction. 14class ParserFeedbackSimulator { 15 constructor(tokenizer) { 16 this.tokenizer = tokenizer; 17 18 this.namespaceStack = []; 19 this.namespaceStackTop = -1; 20 this._enterNamespace(NS.HTML); 21 } 22 23 getNextToken() { 24 const token = this.tokenizer.getNextToken(); 25 26 if (token.type === Tokenizer.START_TAG_TOKEN) { 27 this._handleStartTagToken(token); 28 } else if (token.type === Tokenizer.END_TAG_TOKEN) { 29 this._handleEndTagToken(token); 30 } else if (token.type === Tokenizer.NULL_CHARACTER_TOKEN && this.inForeignContent) { 31 token.type = Tokenizer.CHARACTER_TOKEN; 32 token.chars = unicode.REPLACEMENT_CHARACTER; 33 } else if (this.skipNextNewLine) { 34 if (token.type !== Tokenizer.HIBERNATION_TOKEN) { 35 this.skipNextNewLine = false; 36 } 37 38 if (token.type === Tokenizer.WHITESPACE_CHARACTER_TOKEN && token.chars[0] === '\n') { 39 if (token.chars.length === 1) { 40 return this.getNextToken(); 41 } 42 43 token.chars = token.chars.substr(1); 44 } 45 } 46 47 return token; 48 } 49 50 //Namespace stack mutations 51 _enterNamespace(namespace) { 52 this.namespaceStackTop++; 53 this.namespaceStack.push(namespace); 54 55 this.inForeignContent = namespace !== NS.HTML; 56 this.currentNamespace = namespace; 57 this.tokenizer.allowCDATA = this.inForeignContent; 58 } 59 60 _leaveCurrentNamespace() { 61 this.namespaceStackTop--; 62 this.namespaceStack.pop(); 63 64 this.currentNamespace = this.namespaceStack[this.namespaceStackTop]; 65 this.inForeignContent = this.currentNamespace !== NS.HTML; 66 this.tokenizer.allowCDATA = this.inForeignContent; 67 } 68 69 //Token handlers 70 _ensureTokenizerMode(tn) { 71 if (tn === $.TEXTAREA || tn === $.TITLE) { 72 this.tokenizer.state = Tokenizer.MODE.RCDATA; 73 } else if (tn === $.PLAINTEXT) { 74 this.tokenizer.state = Tokenizer.MODE.PLAINTEXT; 75 } else if (tn === $.SCRIPT) { 76 this.tokenizer.state = Tokenizer.MODE.SCRIPT_DATA; 77 } else if ( 78 tn === $.STYLE || 79 tn === $.IFRAME || 80 tn === $.XMP || 81 tn === $.NOEMBED || 82 tn === $.NOFRAMES || 83 tn === $.NOSCRIPT 84 ) { 85 this.tokenizer.state = Tokenizer.MODE.RAWTEXT; 86 } 87 } 88 89 _handleStartTagToken(token) { 90 let tn = token.tagName; 91 92 if (tn === $.SVG) { 93 this._enterNamespace(NS.SVG); 94 } else if (tn === $.MATH) { 95 this._enterNamespace(NS.MATHML); 96 } 97 98 if (this.inForeignContent) { 99 if (foreignContent.causesExit(token)) { 100 this._leaveCurrentNamespace(); 101 return; 102 } 103 104 const currentNs = this.currentNamespace; 105 106 if (currentNs === NS.MATHML) { 107 foreignContent.adjustTokenMathMLAttrs(token); 108 } else if (currentNs === NS.SVG) { 109 foreignContent.adjustTokenSVGTagName(token); 110 foreignContent.adjustTokenSVGAttrs(token); 111 } 112 113 foreignContent.adjustTokenXMLAttrs(token); 114 115 tn = token.tagName; 116 117 if (!token.selfClosing && foreignContent.isIntegrationPoint(tn, currentNs, token.attrs)) { 118 this._enterNamespace(NS.HTML); 119 } 120 } else { 121 if (tn === $.PRE || tn === $.TEXTAREA || tn === $.LISTING) { 122 this.skipNextNewLine = true; 123 } else if (tn === $.IMAGE) { 124 token.tagName = $.IMG; 125 } 126 127 this._ensureTokenizerMode(tn); 128 } 129 } 130 131 _handleEndTagToken(token) { 132 let tn = token.tagName; 133 134 if (!this.inForeignContent) { 135 const previousNs = this.namespaceStack[this.namespaceStackTop - 1]; 136 137 if (previousNs === NS.SVG && foreignContent.SVG_TAG_NAMES_ADJUSTMENT_MAP[tn]) { 138 tn = foreignContent.SVG_TAG_NAMES_ADJUSTMENT_MAP[tn]; 139 } 140 141 //NOTE: check for exit from integration point 142 if (foreignContent.isIntegrationPoint(tn, previousNs, token.attrs)) { 143 this._leaveCurrentNamespace(); 144 } 145 } else if ( 146 (tn === $.SVG && this.currentNamespace === NS.SVG) || 147 (tn === $.MATH && this.currentNamespace === NS.MATHML) 148 ) { 149 this._leaveCurrentNamespace(); 150 } 151 152 // NOTE: adjust end tag name as well for consistency 153 if (this.currentNamespace === NS.SVG) { 154 foreignContent.adjustTokenSVGTagName(token); 155 } 156 } 157} 158 159module.exports = ParserFeedbackSimulator; 160