• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1'use strict';
2
3const Tokenizer = require('parse5/lib/tokenizer');
4const foreignContent = require('parse5/lib/common/foreign-content');
5const unicode = require('parse5/lib/common/unicode');
6const HTML = require('parse5/lib/common/html');
7
8//Aliases
9const $ = HTML.TAG_NAMES;
10const NS = HTML.NAMESPACES;
11
12//ParserFeedbackSimulator
13//Simulates adjustment of the Tokenizer which performed by standard parser during tree construction.
14class ParserFeedbackSimulator {
15    constructor(tokenizer) {
16        this.tokenizer = tokenizer;
17
18        this.namespaceStack = [];
19        this.namespaceStackTop = -1;
20        this._enterNamespace(NS.HTML);
21    }
22
23    getNextToken() {
24        const token = this.tokenizer.getNextToken();
25
26        if (token.type === Tokenizer.START_TAG_TOKEN) {
27            this._handleStartTagToken(token);
28        } else if (token.type === Tokenizer.END_TAG_TOKEN) {
29            this._handleEndTagToken(token);
30        } else if (token.type === Tokenizer.NULL_CHARACTER_TOKEN && this.inForeignContent) {
31            token.type = Tokenizer.CHARACTER_TOKEN;
32            token.chars = unicode.REPLACEMENT_CHARACTER;
33        } else if (this.skipNextNewLine) {
34            if (token.type !== Tokenizer.HIBERNATION_TOKEN) {
35                this.skipNextNewLine = false;
36            }
37
38            if (token.type === Tokenizer.WHITESPACE_CHARACTER_TOKEN && token.chars[0] === '\n') {
39                if (token.chars.length === 1) {
40                    return this.getNextToken();
41                }
42
43                token.chars = token.chars.substr(1);
44            }
45        }
46
47        return token;
48    }
49
50    //Namespace stack mutations
51    _enterNamespace(namespace) {
52        this.namespaceStackTop++;
53        this.namespaceStack.push(namespace);
54
55        this.inForeignContent = namespace !== NS.HTML;
56        this.currentNamespace = namespace;
57        this.tokenizer.allowCDATA = this.inForeignContent;
58    }
59
60    _leaveCurrentNamespace() {
61        this.namespaceStackTop--;
62        this.namespaceStack.pop();
63
64        this.currentNamespace = this.namespaceStack[this.namespaceStackTop];
65        this.inForeignContent = this.currentNamespace !== NS.HTML;
66        this.tokenizer.allowCDATA = this.inForeignContent;
67    }
68
69    //Token handlers
70    _ensureTokenizerMode(tn) {
71        if (tn === $.TEXTAREA || tn === $.TITLE) {
72            this.tokenizer.state = Tokenizer.MODE.RCDATA;
73        } else if (tn === $.PLAINTEXT) {
74            this.tokenizer.state = Tokenizer.MODE.PLAINTEXT;
75        } else if (tn === $.SCRIPT) {
76            this.tokenizer.state = Tokenizer.MODE.SCRIPT_DATA;
77        } else if (
78            tn === $.STYLE ||
79            tn === $.IFRAME ||
80            tn === $.XMP ||
81            tn === $.NOEMBED ||
82            tn === $.NOFRAMES ||
83            tn === $.NOSCRIPT
84        ) {
85            this.tokenizer.state = Tokenizer.MODE.RAWTEXT;
86        }
87    }
88
89    _handleStartTagToken(token) {
90        let tn = token.tagName;
91
92        if (tn === $.SVG) {
93            this._enterNamespace(NS.SVG);
94        } else if (tn === $.MATH) {
95            this._enterNamespace(NS.MATHML);
96        }
97
98        if (this.inForeignContent) {
99            if (foreignContent.causesExit(token)) {
100                this._leaveCurrentNamespace();
101                return;
102            }
103
104            const currentNs = this.currentNamespace;
105
106            if (currentNs === NS.MATHML) {
107                foreignContent.adjustTokenMathMLAttrs(token);
108            } else if (currentNs === NS.SVG) {
109                foreignContent.adjustTokenSVGTagName(token);
110                foreignContent.adjustTokenSVGAttrs(token);
111            }
112
113            foreignContent.adjustTokenXMLAttrs(token);
114
115            tn = token.tagName;
116
117            if (!token.selfClosing && foreignContent.isIntegrationPoint(tn, currentNs, token.attrs)) {
118                this._enterNamespace(NS.HTML);
119            }
120        } else {
121            if (tn === $.PRE || tn === $.TEXTAREA || tn === $.LISTING) {
122                this.skipNextNewLine = true;
123            } else if (tn === $.IMAGE) {
124                token.tagName = $.IMG;
125            }
126
127            this._ensureTokenizerMode(tn);
128        }
129    }
130
131    _handleEndTagToken(token) {
132        let tn = token.tagName;
133
134        if (!this.inForeignContent) {
135            const previousNs = this.namespaceStack[this.namespaceStackTop - 1];
136
137            if (previousNs === NS.SVG && foreignContent.SVG_TAG_NAMES_ADJUSTMENT_MAP[tn]) {
138                tn = foreignContent.SVG_TAG_NAMES_ADJUSTMENT_MAP[tn];
139            }
140
141            //NOTE: check for exit from integration point
142            if (foreignContent.isIntegrationPoint(tn, previousNs, token.attrs)) {
143                this._leaveCurrentNamespace();
144            }
145        } else if (
146            (tn === $.SVG && this.currentNamespace === NS.SVG) ||
147            (tn === $.MATH && this.currentNamespace === NS.MATHML)
148        ) {
149            this._leaveCurrentNamespace();
150        }
151
152        // NOTE: adjust end tag name as well for consistency
153        if (this.currentNamespace === NS.SVG) {
154            foreignContent.adjustTokenSVGTagName(token);
155        }
156    }
157}
158
159module.exports = ParserFeedbackSimulator;
160