1 /* 2 * [The "BSD licence"] 3 * Copyright (c) 2005-2008 Terence Parr 4 * All rights reserved. 5 * 6 * Conversion to C#: 7 * Copyright (c) 2008-2009 Sam Harwell, Pixel Mine, Inc. 8 * All rights reserved. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. The name of the author may not be used to endorse or promote products 19 * derived from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 namespace Antlr.Runtime 34 { 35 using ConditionalAttribute = System.Diagnostics.ConditionalAttribute; 36 37 /** <summary> 38 * A lexer is recognizer that draws input symbols from a character stream. 39 * lexer grammars result in a subclass of this object. A Lexer object 40 * uses simplified match() and error recovery mechanisms in the interest 41 * of speed. 42 * </summary> 43 */ 44 public abstract class Lexer : BaseRecognizer, ITokenSource 45 { 46 /** <summary>Where is the lexer drawing characters from?</summary> */ 47 protected ICharStream input; 48 Lexer()49 public Lexer() 50 { 51 } 52 Lexer( ICharStream input )53 public Lexer( ICharStream input ) 54 { 55 this.input = input; 56 } 57 Lexer( ICharStream input, RecognizerSharedState state )58 public Lexer( ICharStream input, RecognizerSharedState state ) 59 : base(state) 60 { 61 this.input = input; 62 } 63 64 #region Properties 65 public string Text 66 { 67 /** <summary>Return the text matched so far for the current token or any text override.</summary> */ 68 get 69 { 70 if ( state.text != null ) 71 { 72 return state.text; 73 } 74 return input.Substring( state.tokenStartCharIndex, CharIndex - state.tokenStartCharIndex ); 75 } 76 /** <summary>Set the complete text of this token; it wipes any previous changes to the text.</summary> */ 77 set 78 { 79 state.text = value; 80 } 81 } 82 public int Line 83 { 84 get 85 { 86 return input.Line; 87 } 88 set 89 { 90 input.Line = value; 91 } 92 } 93 public int CharPositionInLine 94 { 95 get 96 { 97 return input.CharPositionInLine; 98 } 99 set 100 { 101 input.CharPositionInLine = value; 102 } 103 } 104 #endregion 105 Reset()106 public override void Reset() 107 { 108 base.Reset(); // reset all recognizer state variables 109 // wack Lexer state variables 110 if ( input != null ) 111 { 112 input.Seek( 0 ); // rewind the input 113 } 114 if ( state == null ) 115 { 116 return; // no shared state work to do 117 } 118 state.token = null; 119 state.type = TokenTypes.Invalid; 120 state.channel = TokenChannels.Default; 121 state.tokenStartCharIndex = -1; 122 state.tokenStartCharPositionInLine = -1; 123 state.tokenStartLine = -1; 124 state.text = null; 125 } 126 127 /** <summary>Return a token from this source; i.e., match a token on the char stream.</summary> */ NextToken()128 public virtual IToken NextToken() 129 { 130 for ( ; ; ) 131 { 132 state.token = null; 133 state.channel = TokenChannels.Default; 134 state.tokenStartCharIndex = input.Index; 135 state.tokenStartCharPositionInLine = input.CharPositionInLine; 136 state.tokenStartLine = input.Line; 137 state.text = null; 138 if ( input.LA( 1 ) == CharStreamConstants.EndOfFile ) 139 { 140 IToken eof = new CommonToken((ICharStream)input, CharStreamConstants.EndOfFile, TokenChannels.Default, input.Index, input.Index); 141 eof.Line = Line; 142 eof.CharPositionInLine = CharPositionInLine; 143 return eof; 144 } 145 try 146 { 147 ParseNextToken(); 148 if ( state.token == null ) 149 { 150 Emit(); 151 } 152 else if ( state.token == Tokens.Skip ) 153 { 154 continue; 155 } 156 return state.token; 157 } 158 catch (MismatchedRangeException mre) 159 { 160 ReportError(mre); 161 // MatchRange() routine has already called recover() 162 } 163 catch (MismatchedTokenException mte) 164 { 165 ReportError(mte); 166 // Match() routine has already called recover() 167 } 168 catch ( RecognitionException re ) 169 { 170 ReportError( re ); 171 Recover( re ); // throw out current char and try again 172 } 173 } 174 } 175 176 /** <summary> 177 * Instruct the lexer to skip creating a token for current lexer rule 178 * and look for another token. nextToken() knows to keep looking when 179 * a lexer rule finishes with token set to SKIP_TOKEN. Recall that 180 * if token==null at end of any token rule, it creates one for you 181 * and emits it. 182 * </summary> 183 */ Skip()184 public virtual void Skip() 185 { 186 state.token = Tokens.Skip; 187 } 188 189 /** <summary>This is the lexer entry point that sets instance var 'token'</summary> */ mTokens()190 public abstract void mTokens(); 191 192 public virtual ICharStream CharStream 193 { 194 get 195 { 196 return input; 197 } 198 /** <summary>Set the char stream and reset the lexer</summary> */ 199 set 200 { 201 input = null; 202 Reset(); 203 input = value; 204 } 205 } 206 207 public override string SourceName 208 { 209 get 210 { 211 return input.SourceName; 212 } 213 } 214 215 /** <summary> 216 * Currently does not support multiple emits per nextToken invocation 217 * for efficiency reasons. Subclass and override this method and 218 * nextToken (to push tokens into a list and pull from that list rather 219 * than a single variable as this implementation does). 220 * </summary> 221 */ Emit( IToken token )222 public virtual void Emit( IToken token ) 223 { 224 state.token = token; 225 } 226 227 /** <summary> 228 * The standard method called to automatically emit a token at the 229 * outermost lexical rule. The token object should point into the 230 * char buffer start..stop. If there is a text override in 'text', 231 * use that to set the token's text. Override this method to emit 232 * custom Token objects. 233 * </summary> 234 * 235 * <remarks> 236 * If you are building trees, then you should also override 237 * Parser or TreeParser.getMissingSymbol(). 238 * </remarks> 239 */ Emit()240 public virtual IToken Emit() 241 { 242 IToken t = new CommonToken( input, state.type, state.channel, state.tokenStartCharIndex, CharIndex - 1 ); 243 t.Line = state.tokenStartLine; 244 t.Text = state.text; 245 t.CharPositionInLine = state.tokenStartCharPositionInLine; 246 Emit( t ); 247 return t; 248 } 249 Match( string s )250 public virtual void Match( string s ) 251 { 252 int i = 0; 253 while ( i < s.Length ) 254 { 255 if ( input.LA( 1 ) != s[i] ) 256 { 257 if ( state.backtracking > 0 ) 258 { 259 state.failed = true; 260 return; 261 } 262 MismatchedTokenException mte = new MismatchedTokenException(s[i], input, TokenNames); 263 Recover( mte ); 264 throw mte; 265 } 266 i++; 267 input.Consume(); 268 state.failed = false; 269 } 270 } 271 MatchAny()272 public virtual void MatchAny() 273 { 274 input.Consume(); 275 } 276 Match( int c )277 public virtual void Match( int c ) 278 { 279 if ( input.LA( 1 ) != c ) 280 { 281 if ( state.backtracking > 0 ) 282 { 283 state.failed = true; 284 return; 285 } 286 MismatchedTokenException mte = new MismatchedTokenException(c, input, TokenNames); 287 Recover( mte ); // don't really recover; just consume in lexer 288 throw mte; 289 } 290 input.Consume(); 291 state.failed = false; 292 } 293 MatchRange( int a, int b )294 public virtual void MatchRange( int a, int b ) 295 { 296 if ( input.LA( 1 ) < a || input.LA( 1 ) > b ) 297 { 298 if ( state.backtracking > 0 ) 299 { 300 state.failed = true; 301 return; 302 } 303 MismatchedRangeException mre = new MismatchedRangeException(a, b, input); 304 Recover( mre ); 305 throw mre; 306 } 307 input.Consume(); 308 state.failed = false; 309 } 310 311 /** <summary>What is the index of the current character of lookahead?</summary> */ 312 public virtual int CharIndex 313 { 314 get 315 { 316 return input.Index; 317 } 318 } 319 ReportError( RecognitionException e )320 public override void ReportError( RecognitionException e ) 321 { 322 /** TODO: not thought about recovery in lexer yet. 323 * 324 // if we've already reported an error and have not matched a token 325 // yet successfully, don't report any errors. 326 if ( errorRecovery ) { 327 //System.err.print("[SPURIOUS] "); 328 return; 329 } 330 errorRecovery = true; 331 */ 332 333 DisplayRecognitionError( this.TokenNames, e ); 334 } 335 GetErrorMessage( RecognitionException e, string[] tokenNames )336 public override string GetErrorMessage( RecognitionException e, string[] tokenNames ) 337 { 338 string msg = null; 339 if ( e is MismatchedTokenException ) 340 { 341 MismatchedTokenException mte = (MismatchedTokenException)e; 342 msg = "mismatched character " + GetCharErrorDisplay( e.Character ) + " expecting " + GetCharErrorDisplay( mte.Expecting ); 343 } 344 else if ( e is NoViableAltException ) 345 { 346 NoViableAltException nvae = (NoViableAltException)e; 347 // for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>" 348 // and "(decision="+nvae.decisionNumber+") and 349 // "state "+nvae.stateNumber 350 msg = "no viable alternative at character " + GetCharErrorDisplay( e.Character ); 351 } 352 else if ( e is EarlyExitException ) 353 { 354 EarlyExitException eee = (EarlyExitException)e; 355 // for development, can add "(decision="+eee.decisionNumber+")" 356 msg = "required (...)+ loop did not match anything at character " + GetCharErrorDisplay( e.Character ); 357 } 358 else if ( e is MismatchedNotSetException ) 359 { 360 MismatchedNotSetException mse = (MismatchedNotSetException)e; 361 msg = "mismatched character " + GetCharErrorDisplay( e.Character ) + " expecting set " + mse.Expecting; 362 } 363 else if ( e is MismatchedSetException ) 364 { 365 MismatchedSetException mse = (MismatchedSetException)e; 366 msg = "mismatched character " + GetCharErrorDisplay( e.Character ) + " expecting set " + mse.Expecting; 367 } 368 else if ( e is MismatchedRangeException ) 369 { 370 MismatchedRangeException mre = (MismatchedRangeException)e; 371 msg = "mismatched character " + GetCharErrorDisplay( e.Character ) + " expecting set " + 372 GetCharErrorDisplay( mre.A ) + ".." + GetCharErrorDisplay( mre.B ); 373 } 374 else 375 { 376 msg = base.GetErrorMessage( e, tokenNames ); 377 } 378 return msg; 379 } 380 GetCharErrorDisplay( int c )381 public virtual string GetCharErrorDisplay( int c ) 382 { 383 string s = ( (char)c ).ToString(); 384 switch ( c ) 385 { 386 case TokenTypes.EndOfFile: 387 s = "<EOF>"; 388 break; 389 case '\n': 390 s = "\\n"; 391 break; 392 case '\t': 393 s = "\\t"; 394 break; 395 case '\r': 396 s = "\\r"; 397 break; 398 } 399 return "'" + s + "'"; 400 } 401 402 /** <summary> 403 * Lexers can normally match any char in it's vocabulary after matching 404 * a token, so do the easy thing and just kill a character and hope 405 * it all works out. You can instead use the rule invocation stack 406 * to do sophisticated error recovery if you are in a fragment rule. 407 * </summary> 408 */ Recover( RecognitionException re )409 public virtual void Recover( RecognitionException re ) 410 { 411 //System.out.println("consuming char "+(char)input.LA(1)+" during recovery"); 412 //re.printStackTrace(); 413 input.Consume(); 414 } 415 416 [Conditional("ANTLR_TRACE")] TraceIn( string ruleName, int ruleIndex )417 public virtual void TraceIn( string ruleName, int ruleIndex ) 418 { 419 string inputSymbol = ( (char)input.LT( 1 ) ) + " line=" + Line + ":" + CharPositionInLine; 420 base.TraceIn( ruleName, ruleIndex, inputSymbol ); 421 } 422 423 [Conditional("ANTLR_TRACE")] TraceOut( string ruleName, int ruleIndex )424 public virtual void TraceOut( string ruleName, int ruleIndex ) 425 { 426 string inputSymbol = ( (char)input.LT( 1 ) ) + " line=" + Line + ":" + CharPositionInLine; 427 base.TraceOut( ruleName, ruleIndex, inputSymbol ); 428 } 429 ParseNextToken()430 protected virtual void ParseNextToken() 431 { 432 mTokens(); 433 } 434 } 435 } 436