• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 * Copyright (C) 2009 Google Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
6 * met:
7 *
8 *     * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 *     * Redistributions in binary form must reproduce the above
11 * copyright notice, this list of conditions and the following disclaimer
12 * in the documentation and/or other materials provided with the
13 * distribution.
14 *     * Neither the name of Google Inc. nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31// Generate js file as follows:
32//
33// re2c -isc WebCore/inspector/front-end/SourceHTMLTokenizer.re2js \
34// | sed 's|^yy\([^:]*\)*\:|case \1:|' \
35// | sed 's|[*]cursor[+][+]|this._charAt(cursor++)|' \
36// | sed 's|[[*][+][+]cursor|this._charAt(++cursor)|' \
37// | sed 's|[*]cursor|this._charAt(cursor)|' \
38// | sed 's|yych = \*\([^;]*\)|yych = this._charAt\1|' \
39// | sed 's|goto case \([^;]*\)|{ gotoCase = \1; continue; }|' \
40// | sed 's|unsigned\ int|var|' \
41// | sed 's|var\ yych|case 1: var yych|'
42
43WebInspector.SourceHTMLTokenizer = function()
44{
45    WebInspector.SourceTokenizer.call(this);
46
47    // The order is determined by the generated code.
48    this._lexConditions = {
49        INITIAL: 0,
50        COMMENT: 1,
51        DOCTYPE: 2,
52        TAG: 3,
53        DSTRING: 4,
54        SSTRING: 5
55    };
56    this.case_INITIAL = 1000;
57    this.case_COMMENT = 1001;
58    this.case_DOCTYPE = 1002;
59    this.case_TAG = 1003;
60    this.case_DSTRING = 1004;
61    this.case_SSTRING = 1005;
62
63    this._parseConditions = {
64        INITIAL: 0,
65        ATTRIBUTE: 1,
66        ATTRIBUTE_VALUE: 2,
67        LINKIFY: 4,
68        A_NODE: 8,
69        SCRIPT: 16,
70        STYLE: 32
71    };
72
73    this.initialCondition = { lexCondition: this._lexConditions.INITIAL, parseCondition: this._parseConditions.INITIAL };
74    this.condition = this.initialCondition;
75}
76
77WebInspector.SourceHTMLTokenizer.prototype = {
78    set line(line) {
79        if (this._internalJavaScriptTokenizer) {
80            var match = /<\/script/i.exec(line);
81            if (match) {
82                this._internalJavaScriptTokenizer.line = line.substring(0, match.index);
83            } else
84                this._internalJavaScriptTokenizer.line = line;
85        } else if (this._internalCSSTokenizer) {
86            var match = /<\/style/i.exec(line);
87            if (match) {
88                this._internalCSSTokenizer.line = line.substring(0, match.index);
89            } else
90                this._internalCSSTokenizer.line = line;
91        }
92        this._line = line;
93    },
94
95    _isExpectingAttribute: function()
96    {
97        return this._condition.parseCondition & this._parseConditions.ATTRIBUTE;
98    },
99
100    _isExpectingAttributeValue: function()
101    {
102        return this._condition.parseCondition & this._parseConditions.ATTRIBUTE_VALUE;
103    },
104
105    _setExpectingAttribute: function()
106    {
107        if (this._isExpectingAttributeValue())
108            this._condition.parseCondition ^= this._parseConditions.ATTRIBUTE_VALUE;
109        this._condition.parseCondition |= this._parseConditions.ATTRIBUTE;
110    },
111
112    _setExpectingAttributeValue: function()
113    {
114        if (this._isExpectingAttribute())
115            this._condition.parseCondition ^= this._parseConditions.ATTRIBUTE;
116        this._condition.parseCondition |= this._parseConditions.ATTRIBUTE_VALUE;
117    },
118
119    _stringToken: function(cursor, stringEnds)
120    {
121        if (!this._isExpectingAttributeValue()) {
122            this.tokenType = null;
123            return cursor;
124        }
125        this.tokenType = this._attrValueTokenType();
126        if (stringEnds)
127            this._setExpectingAttribute();
128        return cursor;
129    },
130
131    _attrValueTokenType: function()
132    {
133        if (this._condition.parseCondition & this._parseConditions.LINKIFY) {
134            if (this._condition.parseCondition & this._parseConditions.A_NODE)
135                return "html-external-link";
136            return "html-resource-link";
137        }
138        return "html-attribute-value";
139    },
140
141    nextToken: function(cursor)
142    {
143        if (this._internalJavaScriptTokenizer) {
144            // Re-set line to force </script> detection first.
145            this.line = this._line;
146            if (cursor !== this._internalJavaScriptTokenizer._line.length) {
147                // Tokenizer is stateless, so restore its condition before tokenizing and save it after.
148                this._internalJavaScriptTokenizer.condition = this._condition.internalJavaScriptTokenizerCondition;
149                var result = this._internalJavaScriptTokenizer.nextToken(cursor);
150                this.tokenType = this._internalJavaScriptTokenizer.tokenType;
151                this._condition.internalJavaScriptTokenizerCondition = this._internalJavaScriptTokenizer.condition;
152                return result;
153            } else if (cursor !== this._line.length)
154                delete this._internalJavaScriptTokenizer;
155        } else if (this._internalCSSTokenizer) {
156            // Re-set line to force </style> detection first.
157            this.line = this._line;
158            if (cursor !== this._internalCSSTokenizer._line.length) {
159                // Tokenizer is stateless, so restore its condition before tokenizing and save it after.
160                this._internalCSSTokenizer.condition = this._condition.internalCSSTokenizerCondition;
161                var result = this._internalCSSTokenizer.nextToken(cursor);
162                this.tokenType = this._internalCSSTokenizer.tokenType;
163                this._condition.internalCSSTokenizerCondition = this._internalCSSTokenizer.condition;
164                return result;
165            } else if (cursor !== this._line.length)
166                delete this._internalCSSTokenizer;
167        }
168
169        var cursorOnEnter = cursor;
170        var gotoCase = 1;
171        while (1) {
172            switch (gotoCase)
173            // Following comment is replaced with generated state machine.
174            /*!re2c
175                re2c:define:YYCTYPE  = "var";
176                re2c:define:YYCURSOR = cursor;
177                re2c:define:YYGETCONDITION = "this.getLexCondition";
178                re2c:define:YYSETCONDITION = "this.setLexCondition";
179                re2c:condprefix = "case this.case_";
180                re2c:condenumprefix = "this._lexConditions.";
181                re2c:yyfill:enable = 0;
182                re2c:labelprefix = "case ";
183                re2c:indent:top = 2;
184                re2c:indent:string = "    ";
185
186                CommentContent = ([^-\r\n] | ("--" [^>]))*;
187                Comment = "<!--" CommentContent "-->";
188                CommentStart = "<!--" CommentContent [\r\n];
189                CommentEnd = CommentContent "-->";
190
191                DocTypeStart = "<!" [Dd] [Oo] [Cc] [Tt] [Yy] [Pp] [Ee];
192                DocTypeContent = [^\r\n>]*;
193
194                ScriptStart = "<" [Ss] [Cc] [Rr] [Ii] [Pp] [Tt];
195                ScriptEnd = "</" [Ss] [Cc] [Rr] [Ii] [Pp] [Tt];
196
197                StyleStart = "<" [Ss] [Tt] [Yy] [Ll] [Ee];
198                StyleEnd = "</" [Ss] [Tt] [Yy] [Ll] [Ee];
199
200                LT = "<" | "</";
201                GT = ">";
202                EqualSign = "=";
203
204                DoubleStringContent = [^\r\n\"]*;
205                SingleStringContent = [^\r\n\']*;
206                StringLiteral = "\"" DoubleStringContent "\"" | "'" SingleStringContent "'";
207                DoubleStringStart = "\"" DoubleStringContent [\r\n];
208                DoubleStringEnd = DoubleStringContent "\"";
209                SingleStringStart = "'" SingleStringContent [\r\n];
210                SingleStringEnd = SingleStringContent "'";
211
212                Identifier = [^ \r\n"'<>\[\]=]+;
213
214                <INITIAL> Comment { this.tokenType = "html-comment"; return cursor; }
215                <INITIAL> CommentStart => COMMENT { this.tokenType = "html-comment"; return cursor; }
216                <COMMENT> CommentContent => COMMENT { this.tokenType = "html-comment"; return cursor; }
217                <COMMENT> CommentEnd => INITIAL { this.tokenType = "html-comment"; return cursor; }
218
219                <INITIAL> DocTypeStart => DOCTYPE { this.tokenType = "html-doctype"; return cursor; }
220                <DOCTYPE> DocTypeContent => DOCTYPE { this.tokenType = "html-doctype"; return cursor; }
221                <DOCTYPE> GT => INITIAL { this.tokenType = "html-doctype"; return cursor; }
222
223                <INITIAL> ScriptStart => TAG
224                {
225                    if (this._condition.parseCondition & this._parseConditions.SCRIPT) {
226                        // Do not tokenize script tag contents, keep lexer state, even though processing "<".
227                        this.setLexCondition(this._lexConditions.INITIAL);
228                        this.tokenType = null;
229                        return cursor;
230                    }
231                    this.tokenType = "html-tag";
232                    this._condition.parseCondition = this._parseConditions.SCRIPT;
233                    this._setExpectingAttribute();
234                    return cursor;
235                }
236
237                <INITIAL> ScriptEnd => TAG
238                {
239                    this.tokenType = "html-tag";
240                    this._condition.parseCondition = this._parseConditions.INITIAL;
241                    return cursor;
242                }
243
244                <INITIAL> StyleStart => TAG
245                {
246                    if (this._condition.parseCondition & this._parseConditions.STYLE) {
247                        // Do not tokenize style tag contents, keep lexer state, even though processing "<".
248                        this.setLexCondition(this._lexConditions.INITIAL);
249                        this.tokenType = null;
250                        return cursor;
251                    }
252                    this.tokenType = "html-tag";
253                    this._condition.parseCondition = this._parseConditions.STYLE;
254                    this._setExpectingAttribute();
255                    return cursor;
256                }
257
258                <INITIAL> StyleEnd => TAG
259                {
260                    this.tokenType = "html-tag";
261                    this._condition.parseCondition = this._parseConditions.INITIAL;
262                    return cursor;
263                }
264
265                <INITIAL> LT => TAG
266                {
267                    if (this._condition.parseCondition & (this._parseConditions.SCRIPT | this._parseConditions.STYLE)) {
268                        // Do not tokenize script and style tag contents, keep lexer state, even though processing "<".
269                        this.setLexCondition(this._lexConditions.INITIAL);
270                        this.tokenType = null;
271                        return cursor;
272                    }
273
274                    this._condition.parseCondition = this._parseConditions.INITIAL;
275                    this.tokenType = "html-tag";
276                    return cursor;
277                }
278
279                <TAG> GT => INITIAL
280                {
281                    this.tokenType = "html-tag";
282                    if (this._condition.parseCondition & this._parseConditions.SCRIPT) {
283                        if (!this._internalJavaScriptTokenizer) {
284                            this._internalJavaScriptTokenizer = WebInspector.SourceTokenizer.Registry.getInstance().getTokenizer("text/javascript");
285                            this._condition.internalJavaScriptTokenizerCondition = this._internalJavaScriptTokenizer.initialCondition;
286                        }
287                        // Do not tokenize script tag contents.
288                        return cursor;
289                    }
290
291                    if (this._condition.parseCondition & this._parseConditions.STYLE) {
292                        if (!this._internalCSSTokenizer) {
293                            this._internalCSSTokenizer = WebInspector.SourceTokenizer.Registry.getInstance().getTokenizer("text/css");
294                            this._condition.internalCSSTokenizerCondition = this._internalCSSTokenizer.initialCondition;
295                        }
296                        // Do not tokenize style tag contents.
297                        return cursor;
298                    }
299
300                    this._condition.parseCondition = this._parseConditions.INITIAL;
301                    return cursor;
302                }
303
304                <TAG> StringLiteral { return this._stringToken(cursor, true); }
305                <TAG> DoubleStringStart => DSTRING { return this._stringToken(cursor); }
306                <DSTRING> DoubleStringContent => DSTRING { return this._stringToken(cursor); }
307                <DSTRING> DoubleStringEnd => TAG { return this._stringToken(cursor, true); }
308                <TAG> SingleStringStart => SSTRING { return this._stringToken(cursor); }
309                <SSTRING> SingleStringContent => SSTRING { return this._stringToken(cursor); }
310                <SSTRING> SingleStringEnd => TAG { return this._stringToken(cursor, true); }
311
312                <TAG> EqualSign => TAG
313                {
314                    if (this._isExpectingAttribute())
315                        this._setExpectingAttributeValue();
316                    this.tokenType = null;
317                    return cursor;
318                }
319
320                <TAG> Identifier
321                {
322                    if (this._condition.parseCondition === this._parseConditions.SCRIPT || this._condition.parseCondition === this._parseConditions.STYLE) {
323                        // Fall through if expecting attributes.
324                        this.tokenType = null;
325                        return cursor;
326                    }
327
328                    if (this._condition.parseCondition === this._parseConditions.INITIAL) {
329                        this.tokenType = "html-tag";
330                        this._setExpectingAttribute();
331                        var token = this._line.substring(cursorOnEnter, cursor);
332                        if (token === "a")
333                            this._condition.parseCondition |= this._parseConditions.A_NODE;
334                        else if (this._condition.parseCondition & this._parseConditions.A_NODE)
335                            this._condition.parseCondition ^= this._parseConditions.A_NODE;
336                    } else if (this._isExpectingAttribute()) {
337                        var token = this._line.substring(cursorOnEnter, cursor);
338                        if (token === "href" || token === "src")
339                            this._condition.parseCondition |= this._parseConditions.LINKIFY;
340                        else if (this._condition.parseCondition |= this._parseConditions.LINKIFY)
341                            this._condition.parseCondition ^= this._parseConditions.LINKIFY;
342                        this.tokenType = "html-attribute-name";
343                    } else if (this._isExpectingAttributeValue())
344                        this.tokenType = this._attrValueTokenType();
345                    else
346                        this.tokenType = null;
347                    return cursor;
348                }
349                <*> [^] { this.tokenType = null; return cursor; }
350            */
351        }
352    }
353}
354
355WebInspector.SourceHTMLTokenizer.prototype.__proto__ = WebInspector.SourceTokenizer.prototype;
356