1'use strict'; 2 3var Tokenizer = require('../tokenization/tokenizer'), 4 ForeignContent = require('../common/foreign_content'), 5 UNICODE = require('../common/unicode'), 6 HTML = require('../common/html'); 7 8//Aliases 9var $ = HTML.TAG_NAMES, 10 NS = HTML.NAMESPACES; 11 12 13//Tokenizer proxy 14//NOTE: this proxy simulates adjustment of the Tokenizer which performed by standard parser during tree construction. 15var TokenizerProxy = module.exports = function (html, options) { 16 this.tokenizer = new Tokenizer(html, options); 17 18 this.namespaceStack = []; 19 this.namespaceStackTop = -1; 20 this.currentNamespace = null; 21 this.inForeignContent = false; 22}; 23 24//API 25TokenizerProxy.prototype.getNextToken = function () { 26 var token = this.tokenizer.getNextToken(); 27 28 if (token.type === Tokenizer.START_TAG_TOKEN) 29 this._handleStartTagToken(token); 30 31 else if (token.type === Tokenizer.END_TAG_TOKEN) 32 this._handleEndTagToken(token); 33 34 else if (token.type === Tokenizer.NULL_CHARACTER_TOKEN && this.inForeignContent) { 35 token.type = Tokenizer.CHARACTER_TOKEN; 36 token.chars = UNICODE.REPLACEMENT_CHARACTER; 37 } 38 39 return token; 40}; 41 42//Namespace stack mutations 43TokenizerProxy.prototype._enterNamespace = function (namespace) { 44 this.namespaceStackTop++; 45 this.namespaceStack.push(namespace); 46 47 this.inForeignContent = namespace !== NS.HTML; 48 this.currentNamespace = namespace; 49 this.tokenizer.allowCDATA = this.inForeignContent; 50}; 51 52TokenizerProxy.prototype._leaveCurrentNamespace = function () { 53 this.namespaceStackTop--; 54 this.namespaceStack.pop(); 55 56 this.currentNamespace = this.namespaceStack[this.namespaceStackTop]; 57 this.inForeignContent = this.currentNamespace !== NS.HTML; 58 this.tokenizer.allowCDATA = this.inForeignContent; 59}; 60 61//Token handlers 62TokenizerProxy.prototype._ensureTokenizerMode = function (tn) { 63 if (tn === $.TEXTAREA || tn === $.TITLE) 64 this.tokenizer.state = Tokenizer.MODE.RCDATA; 65 66 else if (tn === $.PLAINTEXT) 67 this.tokenizer.state = Tokenizer.MODE.PLAINTEXT; 68 69 else if (tn === $.SCRIPT) 70 this.tokenizer.state = Tokenizer.MODE.SCRIPT_DATA; 71 72 else if (tn === $.STYLE || tn === $.IFRAME || tn === $.XMP || 73 tn === $.NOEMBED || tn === $.NOFRAMES || tn === $.NOSCRIPT) { 74 this.tokenizer.state = Tokenizer.MODE.RAWTEXT; 75 } 76}; 77 78TokenizerProxy.prototype._handleStartTagToken = function (token) { 79 var tn = token.tagName; 80 81 if (tn === $.SVG) 82 this._enterNamespace(NS.SVG); 83 84 else if (tn === $.MATH) 85 this._enterNamespace(NS.MATHML); 86 87 else { 88 if (this.inForeignContent) { 89 if (ForeignContent.causesExit(token)) 90 this._leaveCurrentNamespace(); 91 92 else if (ForeignContent.isMathMLTextIntegrationPoint(tn, this.currentNamespace) || 93 ForeignContent.isHtmlIntegrationPoint(tn, this.currentNamespace, token.attrs)) { 94 this._enterNamespace(NS.HTML); 95 } 96 } 97 98 else 99 this._ensureTokenizerMode(tn); 100 } 101}; 102 103TokenizerProxy.prototype._handleEndTagToken = function (token) { 104 var tn = token.tagName; 105 106 if (!this.inForeignContent) { 107 var previousNs = this.namespaceStack[this.namespaceStackTop - 1]; 108 109 //NOTE: check for exit from integration point 110 if (ForeignContent.isMathMLTextIntegrationPoint(tn, previousNs) || 111 ForeignContent.isHtmlIntegrationPoint(tn, previousNs, token.attrs)) { 112 this._leaveCurrentNamespace(); 113 } 114 115 else if (tn === $.SCRIPT) 116 this.tokenizer.state = Tokenizer.MODE.DATA; 117 } 118 119 else if ((tn === $.SVG && this.currentNamespace === NS.SVG) || 120 (tn === $.MATH && this.currentNamespace === NS.MATHML)) 121 this._leaveCurrentNamespace(); 122}; 123