1/** A lexer is recognizer that draws input symbols from a character stream. 2 * lexer grammars result in a subclass of this object. A Lexer object 3 * uses simplified match() and error recovery mechanisms in the interest 4 * of speed. 5 */ 6org.antlr.runtime.Lexer = function(input, state) { 7 if (state) { 8 org.antlr.runtime.Lexer.superclass.constructor.call(this, state); 9 } 10 if (input) { 11 this.input = input; 12 } 13}; 14 15org.antlr.lang.extend(org.antlr.runtime.Lexer, org.antlr.runtime.BaseRecognizer, { 16 reset: function() { 17 // reset all recognizer state variables 18 org.antlr.runtime.Lexer.superclass.reset.call(this); 19 if ( org.antlr.lang.isValue(this.input) ) { 20 this.input.seek(0); // rewind the input 21 } 22 if ( !org.antlr.lang.isValue(this.state) ) { 23 return; // no shared state work to do 24 } 25 this.state.token = null; 26 this.state.type = org.antlr.runtime.Token.INVALID_TOKEN_TYPE; 27 this.state.channel = org.antlr.runtime.Token.DEFAULT_CHANNEL; 28 this.state.tokenStartCharIndex = -1; 29 this.state.tokenStartCharPositionInLine = -1; 30 this.state.tokenStartLine = -1; 31 this.state.text = null; 32 }, 33 34 /** Return a token from this source; i.e., match a token on the char 35 * stream. 36 */ 37 nextToken: function() { 38 while (true) { 39 this.state.token = null; 40 this.state.channel = org.antlr.runtime.Token.DEFAULT_CHANNEL; 41 this.state.tokenStartCharIndex = this.input.index(); 42 this.state.tokenStartCharPositionInLine = this.input.getCharPositionInLine(); 43 this.state.tokenStartLine = this.input.getLine(); 44 this.state.text = null; 45 if ( this.input.LA(1)===org.antlr.runtime.CharStream.EOF ) { 46 return org.antlr.runtime.Token.EOF_TOKEN; 47 } 48 try { 49 this.mTokens(); 50 if ( !org.antlr.lang.isValue(this.state.token) ) { 51 this.emit(); 52 } 53 else if ( this.state.token==org.antlr.runtime.Token.SKIP_TOKEN ) { 54 continue; 55 } 56 return this.state.token; 57 } 58 catch (re) { 59 if (re instanceof org.antlr.runtime.NoViableAltException) { 60 this.reportError(re); 61 this.recover(re); 62 } else if ( re instanceof org.antlr.runtime.RecognitionException ) { 63 this.reportError(re); 64 } else { 65 throw re; 66 } 67 } 68 } 69 }, 70 71 /** Instruct the lexer to skip creating a token for current lexer rule 72 * and look for another token. nextToken() knows to keep looking when 73 * a lexer rule finishes with token set to SKIP_TOKEN. Recall that 74 * if token==null at end of any token rule, it creates one for you 75 * and emits it. 76 */ 77 skip: function() { 78 this.state.token = org.antlr.runtime.Token.SKIP_TOKEN; 79 }, 80 81 /** Set the char stream and reset the lexer */ 82 setCharStream: function(input) { 83 this.input = null; 84 this.reset(); 85 this.input = input; 86 }, 87 88 getCharStream: function() { 89 return this.input; 90 }, 91 92 getSourceName: function() { 93 return this.input.getSourceName(); 94 }, 95 96 /** Currently does not support multiple emits per nextToken invocation 97 * for efficiency reasons. Subclass and override this method and 98 * nextToken (to push tokens into a list and pull from that list rather 99 * than a single variable as this implementation does). 100 * 101 * The standard method called to automatically emit a token at the 102 * outermost lexical rule. The token object should point into the 103 * char buffer start..stop. If there is a text override in 'text', 104 * use that to set the token's text. Override this method to emit 105 * custom Token objects. 106 * 107 * If you are building trees, then you should also override 108 * Parser or TreeParser.getMissingSymbol(). 109 */ 110 emit: function() { 111 if (arguments.length===0) { 112 var t = new org.antlr.runtime.CommonToken(this.input, this.state.type, this.state.channel, this.state.tokenStartCharIndex, this.getCharIndex()-1); 113 t.setLine(this.state.tokenStartLine); 114 t.setText(this.state.text); 115 t.setCharPositionInLine(this.state.tokenStartCharPositionInLine); 116 this.state.token = t; 117 return t; 118 } else { 119 this.state.token = arguments[0]; 120 } 121 }, 122 123 match: function(s) { 124 var i = 0, 125 mte; 126 127 if (org.antlr.lang.isString(s)) { 128 while ( i<s.length ) { 129 if ( this.input.LA(1)!=s.charAt(i) ) { 130 if ( this.state.backtracking>0 ) { 131 this.state.failed = true; 132 return; 133 } 134 mte = new org.antlr.runtime.MismatchedTokenException(s.charAt(i), this.input); 135 this.recover(mte); 136 throw mte; 137 } 138 i++; 139 this.input.consume(); 140 this.state.failed = false; 141 } 142 } else if (org.antlr.lang.isNumber(s)) { 143 if ( this.input.LA(1)!=s ) { 144 if ( this.state.backtracking>0 ) { 145 this.state.failed = true; 146 return; 147 } 148 mte = new org.antlr.runtime.MismatchedTokenException(s, this.input); 149 this.recover(mte); 150 throw mte; 151 } 152 this.input.consume(); 153 this.state.failed = false; 154 } 155 }, 156 157 matchAny: function() { 158 this.input.consume(); 159 }, 160 161 matchRange: function(a, b) { 162 if ( this.input.LA(1)<a || this.input.LA(1)>b ) { 163 if ( this.state.backtracking>0 ) { 164 this.state.failed = true; 165 return; 166 } 167 var mre = new org.antlr.runtime.MismatchedRangeException(a,b,this.input); 168 this.recover(mre); 169 throw mre; 170 } 171 this.input.consume(); 172 this.state.failed = false; 173 }, 174 175 getLine: function() { 176 return this.input.getLine(); 177 }, 178 179 getCharPositionInLine: function() { 180 return this.input.getCharPositionInLine(); 181 }, 182 183 /** What is the index of the current character of lookahead? */ 184 getCharIndex: function() { 185 return this.input.index(); 186 }, 187 188 /** Return the text matched so far for the current token or any 189 * text override. 190 */ 191 getText: function() { 192 if ( org.antlr.lang.isString(this.state.text) ) { 193 return this.state.text; 194 } 195 return this.input.substring(this.state.tokenStartCharIndex,this.getCharIndex()-1); 196 }, 197 198 /** Set the complete text of this token; it wipes any previous 199 * changes to the text. 200 */ 201 setText: function(text) { 202 this.state.text = text; 203 }, 204 205 reportError: function(e) { 206 /** TODO: not thought about recovery in lexer yet. 207 * 208 // if we've already reported an error and have not matched a token 209 // yet successfully, don't report any errors. 210 if ( errorRecovery ) { 211 //System.err.print("[SPURIOUS] "); 212 return; 213 } 214 errorRecovery = true; 215 */ 216 217 this.displayRecognitionError(this.getTokenNames(), e); 218 }, 219 220 getErrorMessage: function(e, tokenNames) { 221 var msg = null; 222 if ( e instanceof org.antlr.runtime.MismatchedTokenException ) { 223 msg = "mismatched character "+this.getCharErrorDisplay(e.c)+" expecting "+this.getCharErrorDisplay(e.expecting); 224 } 225 else if ( e instanceof org.antlr.runtime.NoViableAltException ) { 226 msg = "no viable alternative at character "+this.getCharErrorDisplay(e.c); 227 } 228 else if ( e instanceof org.antlr.runtime.EarlyExitException ) { 229 msg = "required (...)+ loop did not match anything at character "+this.getCharErrorDisplay(e.c); 230 } 231 else if ( e instanceof org.antlr.runtime.MismatchedNotSetException ) { 232 msg = "mismatched character "+this.getCharErrorDisplay(e.c)+" expecting set "+e.expecting; 233 } 234 else if ( e instanceof org.antlr.runtime.MismatchedSetException ) { 235 msg = "mismatched character "+this.getCharErrorDisplay(e.c)+" expecting set "+e.expecting; 236 } 237 else if ( e instanceof org.antlr.runtime.MismatchedRangeException ) { 238 msg = "mismatched character "+this.getCharErrorDisplay(e.c)+" expecting set "+ 239 this.getCharErrorDisplay(e.a)+".."+this.getCharErrorDisplay(e.b); 240 } 241 else { 242 msg = org.antlr.runtime.Lexer.superclass.getErrorMessage.call(this, e, tokenNames); 243 } 244 return msg; 245 }, 246 247 getCharErrorDisplay: function(c) { 248 var s = c; //String.fromCharCode(c); 249 switch ( s ) { 250 case org.antlr.runtime.Token.EOF : 251 s = "<EOF>"; 252 break; 253 case "\n" : 254 s = "\\n"; 255 break; 256 case "\t" : 257 s = "\\t"; 258 break; 259 case "\r" : 260 s = "\\r"; 261 break; 262 } 263 return "'"+s+"'"; 264 }, 265 266 /** Lexers can normally match any char in it's vocabulary after matching 267 * a token, so do the easy thing and just kill a character and hope 268 * it all works out. You can instead use the rule invocation stack 269 * to do sophisticated error recovery if you are in a fragment rule. 270 */ 271 recover: function(re) { 272 this.input.consume(); 273 }, 274 275 traceIn: function(ruleName, ruleIndex) { 276 var inputSymbol = String.fromCharCode(this.input.LT(1))+" line="+this.getLine()+":"+this.getCharPositionInLine(); 277 org.antlr.runtime.Lexer.superclass.traceIn.call(this, ruleName, ruleIndex, inputSymbol); 278 }, 279 280 traceOut: function(ruleName, ruleIndex) { 281 var inputSymbol = String.fromCharCode(this.input.LT(1))+" line="+this.getLine()+":"+this.getCharPositionInLine(); 282 org.antlr.runtime.Lexer.superclass.traceOut.call(this, ruleName, ruleIndex, inputSymbol); 283 } 284}); 285