1/* 2 * Copyright (C) 2009 Google Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions are 6 * met: 7 * 8 * * Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * * Redistributions in binary form must reproduce the above 11 * copyright notice, this list of conditions and the following disclaimer 12 * in the documentation and/or other materials provided with the 13 * distribution. 14 * * Neither the name of Google Inc. nor the names of its 15 * contributors may be used to endorse or promote products derived from 16 * this software without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31// Generate js file as follows: 32// 33// re2c -isc WebCore/inspector/front-end/SourceJavaScriptTokenizer.re2js \ 34// | sed 's|^yy\([^:]*\)*\:|case \1:|' \ 35// | sed 's|[*]cursor[+][+]|this._charAt(cursor++)|' \ 36// | sed 's|[[*][+][+]cursor|this._charAt(++cursor)|' \ 37// | sed 's|[*]cursor|this._charAt(cursor)|' \ 38// | sed 's|yych = \*\([^;]*\)|yych = this._charAt\1|' \ 39// | sed 's|goto case \([^;]*\)|{ gotoCase = \1; continue; }|' \ 40// | sed 's|unsigned\ int|var|' \ 41// | sed 's|var\ yych|case 1: var yych|' 42 43WebInspector.SourceJavaScriptTokenizer = function() 44{ 45 WebInspector.SourceTokenizer.call(this); 46 47 this._keywords = [ 48 "null", "true", "false", "break", "case", "catch", "const", "default", "finally", "for", 49 "instanceof", "new", "var", "continue", "function", "return", "void", "delete", "if", 50 "this", "do", "while", "else", "in", "switch", "throw", "try", "typeof", "debugger", 51 "class", "enum", "export", "extends", "import", "super", "get", "set" 52 ].keySet(); 53 54 this._lexConditions = { 55 DIV: 0, 56 NODIV: 1, 57 COMMENT: 2, 58 DSTRING: 3, 59 SSTRING: 4, 60 REGEX: 5 61 }; 62 63 this.case_DIV = 1000; 64 this.case_NODIV = 1001; 65 this.case_COMMENT = 1002; 66 this.case_DSTRING = 1003; 67 this.case_SSTRING = 1004; 68 this.case_REGEX = 1005; 69 70 this.initialCondition = { lexCondition: this._lexConditions.NODIV } 71} 72 73WebInspector.SourceJavaScriptTokenizer.prototype = { 74 nextToken: function(cursor) 75 { 76 var cursorOnEnter = cursor; 77 var gotoCase = 1; 78 while (1) { 79 switch (gotoCase) 80 // Following comment is replaced with generated state machine. 81 /*!re2c 82 re2c:define:YYCTYPE = "var"; 83 re2c:define:YYCURSOR = cursor; 84 re2c:define:YYGETCONDITION = "this.getLexCondition"; 85 re2c:define:YYSETCONDITION = "this.setLexCondition"; 86 re2c:condprefix = "case this.case_"; 87 re2c:condenumprefix = "this._lexConditions."; 88 re2c:yyfill:enable = 0; 89 re2c:labelprefix = "case "; 90 re2c:indent:top = 2; 91 re2c:indent:string = " "; 92 93 LineComment = "//" [^\r\n]*; 94 CommentContent = ([^*\r\n] | ("*"+[^/*]))*; 95 Comment = "/*" CommentContent "*"+ "/"; 96 CommentStart = "/*" CommentContent [\r\n]; 97 CommentEnd = CommentContent "*"+ "/"; 98 99 DecimalDigit = [0-9]; 100 NonZeroDigit = [1-9]; 101 OctalDigit = [0-7]; 102 HexDigit = [0-9a-fA-F]; 103 SignedInteger = ("+"|"-")? DecimalDigit+; 104 ExponentPart = ("e" | "E") SignedInteger; 105 DecimalIntegerLiteral = "0" | NonZeroDigit DecimalDigit*; 106 DecimalLiteral = DecimalIntegerLiteral "." DecimalDigit* ExponentPart? | "." DecimalDigit+ ExponentPart? | DecimalIntegerLiteral ExponentPart?; 107 HexIntegerLiteral = "0" ("x"|"X") HexDigit+; 108 OctalIntegerLiteral = "0" OctalDigit+; 109 NumericLiteral = DecimalLiteral | HexIntegerLiteral | OctalIntegerLiteral; 110 111 Punctuation = [\!\%\&\(\*\+\,\-\.\:\;\<\=\>\?\[\]\^\{\|\}\~] | "!=" | "!==" | "%=" | "&&" | "&=" | "*=" | "++" | "+=" | "--" | "-=" | "<<" | "<<=" | "<=" | "==" | "===" | ">=" | ">>" | ">>=" | ">>>" | ">>>=" | "^=" | "|=" | "||"; 112 Division = "/" | "/="; 113 RightParen = ")"; 114 115 Letter = [a-zA-Z\x80-\xFF]; 116 UnicodeEscapeSequence = "\\u" HexDigit HexDigit HexDigit HexDigit; 117 118 IdentifierStart = Letter | "_" | "$" | UnicodeEscapeSequence; 119 IdentifierPart = IdentifierStart | DecimalDigit; 120 Identifier = IdentifierStart IdentifierPart *; 121 122 DoubleStringContent = ([^\r\n\"\\] | UnicodeEscapeSequence | "\\" ['"\\bfnrtv])*; 123 SingleStringContent = ([^\r\n\'\\] | UnicodeEscapeSequence | "\\" ['"\\bfnrtv])*; 124 StringLiteral = "\"" DoubleStringContent "\"" | "'" SingleStringContent "'"; 125 DoubleStringStart = "\"" DoubleStringContent "\\" [\r\n]; 126 DoubleStringEnd = DoubleStringContent "\""; 127 SingleStringStart = "'" SingleStringContent "\\" [\r\n]; 128 SingleStringEnd = SingleStringContent "'"; 129 130 BackslashSequence = "\\" [^\r\n]; 131 RegexSet = "[" ([^\r\n*\\/] | BackslashSequence)* "]"; 132 RegexFirstChar = [^\r\n*\\/\[\]] | BackslashSequence | RegexSet; 133 RegexChar = [^\r\n\\/\[\]] | BackslashSequence | RegexSet; 134 RegexContent = RegexChar*; 135 Regex = "/" RegexFirstChar RegexContent "/" [igm]*; 136 RegexStart = "/" RegexFirstChar RegexContent "\\"; 137 RegexEnd = RegexContent "/" [igm]*; 138 139 <DIV,NODIV> LineComment { this.tokenType = "javascript-comment"; return cursor; } 140 <DIV,NODIV> Comment { this.tokenType = "javascript-comment"; return cursor; } 141 <DIV,NODIV> CommentStart => COMMENT { this.tokenType = "javascript-comment"; return cursor; } 142 <COMMENT> CommentContent => COMMENT { this.tokenType = "javascript-comment"; return cursor; } 143 <COMMENT> CommentEnd => NODIV { this.tokenType = "javascript-comment"; return cursor; } 144 145 <DIV,NODIV> StringLiteral { this.tokenType = "javascript-string"; return cursor; } 146 <DIV,NODIV> DoubleStringStart => DSTRING { this.tokenType = "javascript-string"; return cursor; } 147 <DSTRING> DoubleStringContent => DSTRING { this.tokenType = "javascript-string"; return cursor; } 148 <DSTRING> DoubleStringEnd => NODIV { this.tokenType = "javascript-string"; return cursor; } 149 <DIV,NODIV> SingleStringStart => SSTRING { this.tokenType = "javascript-string"; return cursor; } 150 <SSTRING> SingleStringContent => SSTRING { this.tokenType = "javascript-string"; return cursor; } 151 <SSTRING> SingleStringEnd => NODIV { this.tokenType = "javascript-string"; return cursor; } 152 153 <NODIV> Regex { this.tokenType = "javascript-regexp"; return cursor; } 154 <NODIV> RegexStart => REGEX { this.tokenType = "javascript-regexp"; return cursor; } 155 <REGEX> RegexContent => REGEX { this.tokenType = "javascript-regexp"; return cursor; } 156 <REGEX> RegexEnd => NODIV { this.tokenType = "javascript-regexp"; return cursor; } 157 158 <DIV,NODIV> NumericLiteral => DIV { this.tokenType = "javascript-number"; return cursor; } 159 <DIV,NODIV> Identifier => DIV 160 { 161 var token = this._line.substring(cursorOnEnter, cursor); 162 if (token in this._keywords) 163 this.tokenType = "javascript-keyword"; 164 else 165 this.tokenType = "javascript-ident"; 166 return cursor; 167 } 168 <DIV,NODIV> RightParen => DIV { this.tokenType = null; return cursor; } 169 <DIV,NODIV> Punctuation => NODIV { this.tokenType = null; return cursor; } 170 <DIV> Division => NODIV { this.tokenType = null; return cursor; } 171 <*> [^] { this.tokenType = null; return cursor; } 172 */ 173 } 174 } 175} 176 177WebInspector.SourceJavaScriptTokenizer.prototype.__proto__ = WebInspector.SourceTokenizer.prototype; 178