• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1import {
2    Tokenizer,
3    type TokenizerOptions,
4    TokenizerMode,
5    type TokenHandler,
6    Token,
7    foreignContent,
8    html,
9} from 'parse5';
10
11const $ = html.TAG_ID;
12
13const REPLACEMENT_CHARACTER = '\uFFFD';
14const LINE_FEED_CODE_POINT = 0x0a;
15
16/**
17 * Simulates adjustments of the Tokenizer which are performed by the standard parser during tree construction.
18 */
19export class ParserFeedbackSimulator implements TokenHandler {
20    private namespaceStack: html.NS[] = [];
21    public inForeignContent = false;
22    public skipNextNewLine = false;
23    public tokenizer: Tokenizer;
24
25    constructor(
26        options: TokenizerOptions,
27        private handler: TokenHandler,
28    ) {
29        this.tokenizer = new Tokenizer(options, this);
30        this._enterNamespace(html.NS.HTML);
31    }
32
33    /** @internal */
34    onNullCharacter(token: Token.CharacterToken): void {
35        this.skipNextNewLine = false;
36
37        if (this.inForeignContent) {
38            this.handler.onCharacter({
39                type: Token.TokenType.CHARACTER,
40                chars: REPLACEMENT_CHARACTER,
41                location: token.location,
42            });
43        } else {
44            this.handler.onNullCharacter(token);
45        }
46    }
47
48    /** @internal */
49    onWhitespaceCharacter(token: Token.CharacterToken): void {
50        if (this.skipNextNewLine && token.chars.charCodeAt(0) === LINE_FEED_CODE_POINT) {
51            this.skipNextNewLine = false;
52
53            if (token.chars.length === 1) {
54                return;
55            }
56
57            token.chars = token.chars.substr(1);
58        }
59
60        this.handler.onWhitespaceCharacter(token);
61    }
62
63    /** @internal */
64    onCharacter(token: Token.CharacterToken): void {
65        this.skipNextNewLine = false;
66        this.handler.onCharacter(token);
67    }
68
69    /** @internal */
70    onComment(token: Token.CommentToken): void {
71        this.skipNextNewLine = false;
72        this.handler.onComment(token);
73    }
74
75    /** @internal */
76    onDoctype(token: Token.DoctypeToken): void {
77        this.skipNextNewLine = false;
78        this.handler.onDoctype(token);
79    }
80
81    /** @internal */
82    onEof(token: Token.EOFToken): void {
83        this.skipNextNewLine = false;
84        this.handler.onEof(token);
85    }
86
87    //Namespace stack mutations
88    private _enterNamespace(namespace: html.NS): void {
89        this.namespaceStack.unshift(namespace);
90        this.inForeignContent = namespace !== html.NS.HTML;
91        this.tokenizer.inForeignNode = this.inForeignContent;
92    }
93
94    private _leaveCurrentNamespace(): void {
95        this.namespaceStack.shift();
96        this.inForeignContent = this.namespaceStack[0] !== html.NS.HTML;
97        this.tokenizer.inForeignNode = this.inForeignContent;
98    }
99
100    //Token handlers
101    private _ensureTokenizerMode(tn: html.TAG_ID): void {
102        switch (tn) {
103            case $.TEXTAREA:
104            case $.TITLE: {
105                this.tokenizer.state = TokenizerMode.RCDATA;
106                break;
107            }
108            case $.PLAINTEXT: {
109                this.tokenizer.state = TokenizerMode.PLAINTEXT;
110                break;
111            }
112            case $.SCRIPT: {
113                this.tokenizer.state = TokenizerMode.SCRIPT_DATA;
114                break;
115            }
116            case $.STYLE:
117            case $.IFRAME:
118            case $.XMP:
119            case $.NOEMBED:
120            case $.NOFRAMES:
121            case $.NOSCRIPT: {
122                this.tokenizer.state = TokenizerMode.RAWTEXT;
123                break;
124            }
125            default:
126            // Do nothing
127        }
128    }
129
130    /** @internal */
131    onStartTag(token: Token.TagToken): void {
132        let tn = token.tagID;
133
134        switch (tn) {
135            case $.SVG: {
136                this._enterNamespace(html.NS.SVG);
137                break;
138            }
139            case $.MATH: {
140                this._enterNamespace(html.NS.MATHML);
141                break;
142            }
143            default:
144            // Do nothing
145        }
146
147        if (this.inForeignContent) {
148            if (foreignContent.causesExit(token)) {
149                this._leaveCurrentNamespace();
150            } else {
151                const currentNs = this.namespaceStack[0];
152
153                tn = token.tagID;
154
155                if (!token.selfClosing && foreignContent.isIntegrationPoint(tn, currentNs, token.attrs)) {
156                    this._enterNamespace(html.NS.HTML);
157                }
158            }
159        } else {
160            switch (tn) {
161                case $.PRE:
162                case $.TEXTAREA:
163                case $.LISTING: {
164                    this.skipNextNewLine = true;
165                    break;
166                }
167                case $.IMAGE: {
168                    token.tagName = html.TAG_NAMES.IMG;
169                    token.tagID = $.IMG;
170                    break;
171                }
172                default:
173                // Do nothing
174            }
175
176            this._ensureTokenizerMode(tn);
177        }
178
179        this.handler.onStartTag(token);
180    }
181
182    /** @internal */
183    onEndTag(token: Token.TagToken): void {
184        let tn = token.tagID;
185
186        if (!this.inForeignContent) {
187            const previousNs = this.namespaceStack[1];
188
189            if (previousNs === html.NS.SVG) {
190                const adjustedTagName = foreignContent.SVG_TAG_NAMES_ADJUSTMENT_MAP.get(token.tagName);
191
192                if (adjustedTagName) {
193                    tn = html.getTagID(adjustedTagName);
194                }
195            }
196
197            //NOTE: check for exit from integration point
198            if (foreignContent.isIntegrationPoint(tn, previousNs, token.attrs)) {
199                this._leaveCurrentNamespace();
200            }
201        } else if (
202            (tn === $.SVG && this.namespaceStack[0] === html.NS.SVG) ||
203            (tn === $.MATH && this.namespaceStack[0] === html.NS.MATHML)
204        ) {
205            this._leaveCurrentNamespace();
206        }
207
208        this.handler.onEndTag(token);
209    }
210}
211