• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1'use strict';
2
3var Tokenizer = require('../tokenization/tokenizer'),
4    ForeignContent = require('../common/foreign_content'),
5    UNICODE = require('../common/unicode'),
6    HTML = require('../common/html');
7
8//Aliases
9var $ = HTML.TAG_NAMES,
10    NS = HTML.NAMESPACES;
11
12
13//Tokenizer proxy
14//NOTE: this proxy simulates adjustment of the Tokenizer which performed by standard parser during tree construction.
15var TokenizerProxy = module.exports = function (html, options) {
16    this.tokenizer = new Tokenizer(html, options);
17
18    this.namespaceStack = [];
19    this.namespaceStackTop = -1;
20    this.currentNamespace = null;
21    this.inForeignContent = false;
22};
23
24//API
25TokenizerProxy.prototype.getNextToken = function () {
26    var token = this.tokenizer.getNextToken();
27
28    if (token.type === Tokenizer.START_TAG_TOKEN)
29        this._handleStartTagToken(token);
30
31    else if (token.type === Tokenizer.END_TAG_TOKEN)
32        this._handleEndTagToken(token);
33
34    else if (token.type === Tokenizer.NULL_CHARACTER_TOKEN && this.inForeignContent) {
35        token.type = Tokenizer.CHARACTER_TOKEN;
36        token.chars = UNICODE.REPLACEMENT_CHARACTER;
37    }
38
39    return token;
40};
41
42//Namespace stack mutations
43TokenizerProxy.prototype._enterNamespace = function (namespace) {
44    this.namespaceStackTop++;
45    this.namespaceStack.push(namespace);
46
47    this.inForeignContent = namespace !== NS.HTML;
48    this.currentNamespace = namespace;
49    this.tokenizer.allowCDATA = this.inForeignContent;
50};
51
52TokenizerProxy.prototype._leaveCurrentNamespace = function () {
53    this.namespaceStackTop--;
54    this.namespaceStack.pop();
55
56    this.currentNamespace = this.namespaceStack[this.namespaceStackTop];
57    this.inForeignContent = this.currentNamespace !== NS.HTML;
58    this.tokenizer.allowCDATA = this.inForeignContent;
59};
60
61//Token handlers
62TokenizerProxy.prototype._ensureTokenizerMode = function (tn) {
63    if (tn === $.TEXTAREA || tn === $.TITLE)
64        this.tokenizer.state = Tokenizer.MODE.RCDATA;
65
66    else if (tn === $.PLAINTEXT)
67        this.tokenizer.state = Tokenizer.MODE.PLAINTEXT;
68
69    else if (tn === $.SCRIPT)
70        this.tokenizer.state = Tokenizer.MODE.SCRIPT_DATA;
71
72    else if (tn === $.STYLE || tn === $.IFRAME || tn === $.XMP ||
73             tn === $.NOEMBED || tn === $.NOFRAMES || tn === $.NOSCRIPT) {
74        this.tokenizer.state = Tokenizer.MODE.RAWTEXT;
75    }
76};
77
78TokenizerProxy.prototype._handleStartTagToken = function (token) {
79    var tn = token.tagName;
80
81    if (tn === $.SVG)
82        this._enterNamespace(NS.SVG);
83
84    else if (tn === $.MATH)
85        this._enterNamespace(NS.MATHML);
86
87    else {
88        if (this.inForeignContent) {
89            if (ForeignContent.causesExit(token))
90                this._leaveCurrentNamespace();
91
92            else if (ForeignContent.isMathMLTextIntegrationPoint(tn, this.currentNamespace) ||
93                     ForeignContent.isHtmlIntegrationPoint(tn, this.currentNamespace, token.attrs)) {
94                this._enterNamespace(NS.HTML);
95            }
96        }
97
98        else
99            this._ensureTokenizerMode(tn);
100    }
101};
102
103TokenizerProxy.prototype._handleEndTagToken = function (token) {
104    var tn = token.tagName;
105
106    if (!this.inForeignContent) {
107        var previousNs = this.namespaceStack[this.namespaceStackTop - 1];
108
109        //NOTE: check for exit from integration point
110        if (ForeignContent.isMathMLTextIntegrationPoint(tn, previousNs) ||
111            ForeignContent.isHtmlIntegrationPoint(tn, previousNs, token.attrs)) {
112            this._leaveCurrentNamespace();
113        }
114
115        else if (tn === $.SCRIPT)
116            this.tokenizer.state = Tokenizer.MODE.DATA;
117    }
118
119    else if ((tn === $.SVG && this.currentNamespace === NS.SVG) ||
120             (tn === $.MATH && this.currentNamespace === NS.MATHML))
121        this._leaveCurrentNamespace();
122};
123