1 /* 2 * [The "BSD licence"] 3 * Copyright (c) 2005-2008 Terence Parr 4 * All rights reserved. 5 * 6 * Conversion to C#: 7 * Copyright (c) 2008-2009 Sam Harwell, Pixel Mine, Inc. 8 * All rights reserved. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. The name of the author may not be used to endorse or promote products 19 * derived from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 namespace Antlr.Runtime 34 { 35 using System.Collections.Generic; 36 37 using ArgumentNullException = System.ArgumentNullException; 38 using Array = System.Array; 39 using Conditional = System.Diagnostics.ConditionalAttribute; 40 using Exception = System.Exception; 41 using IDebugEventListener = Antlr.Runtime.Debug.IDebugEventListener; 42 using MethodBase = System.Reflection.MethodBase; 43 using NotSupportedException = System.NotSupportedException; 44 using Regex = System.Text.RegularExpressions.Regex; 45 using StackFrame = System.Diagnostics.StackFrame; 46 using StackTrace = System.Diagnostics.StackTrace; 47 using TextWriter = System.IO.TextWriter; 48 using Type = System.Type; 49 50 /** <summary> 51 * A generic recognizer that can handle recognizers generated from 52 * lexer, parser, and tree grammars. This is all the parsing 53 * support code essentially; most of it is error recovery stuff and 54 * backtracking. 55 * </summary> 56 */ 57 public abstract class BaseRecognizer 58 { 59 public const int MemoRuleFailed = -2; 60 public const int MemoRuleUnknown = -1; 61 public const int InitialFollowStackSize = 100; 62 63 // copies from Token object for convenience in actions 64 public const int DefaultTokenChannel = TokenChannels.Default; 65 public const int Hidden = TokenChannels.Hidden; 66 67 public const string NextTokenRuleName = "nextToken"; 68 69 /** <summary> 70 * State of a lexer, parser, or tree parser are collected into a state 71 * object so the state can be shared. This sharing is needed to 72 * have one grammar import others and share same error variables 73 * and other state variables. It's a kind of explicit multiple 74 * inheritance via delegation of methods and shared state. 75 * </summary> 76 */ 77 protected internal RecognizerSharedState state; 78 BaseRecognizer()79 public BaseRecognizer() 80 : this(new RecognizerSharedState()) 81 { 82 } 83 BaseRecognizer( RecognizerSharedState state )84 public BaseRecognizer( RecognizerSharedState state ) 85 { 86 if ( state == null ) 87 { 88 state = new RecognizerSharedState(); 89 } 90 this.state = state; 91 InitDFAs(); 92 } 93 94 public TextWriter TraceDestination 95 { 96 get; 97 set; 98 } 99 InitDFAs()100 protected virtual void InitDFAs() 101 { 102 } 103 104 /** <summary>reset the parser's state; subclasses must rewinds the input stream</summary> */ Reset()105 public virtual void Reset() 106 { 107 // wack everything related to error recovery 108 if ( state == null ) 109 { 110 return; // no shared state work to do 111 } 112 state._fsp = -1; 113 state.errorRecovery = false; 114 state.lastErrorIndex = -1; 115 state.failed = false; 116 state.syntaxErrors = 0; 117 // wack everything related to backtracking and memoization 118 state.backtracking = 0; 119 for ( int i = 0; state.ruleMemo != null && i < state.ruleMemo.Length; i++ ) 120 { // wipe cache 121 state.ruleMemo[i] = null; 122 } 123 } 124 125 126 /** <summary> 127 * Match current input symbol against ttype. Attempt 128 * single token insertion or deletion error recovery. If 129 * that fails, throw MismatchedTokenException. 130 * </summary> 131 * 132 * <remarks> 133 * To turn off single token insertion or deletion error 134 * recovery, override recoverFromMismatchedToken() and have it 135 * throw an exception. See TreeParser.recoverFromMismatchedToken(). 136 * This way any error in a rule will cause an exception and 137 * immediate exit from rule. Rule would recover by resynchronizing 138 * to the set of symbols that can follow rule ref. 139 * </remarks> 140 */ Match( IIntStream input, int ttype, BitSet follow )141 public virtual object Match( IIntStream input, int ttype, BitSet follow ) 142 { 143 //System.out.println("match "+((TokenStream)input).LT(1)); 144 object matchedSymbol = GetCurrentInputSymbol( input ); 145 if ( input.LA( 1 ) == ttype ) 146 { 147 input.Consume(); 148 state.errorRecovery = false; 149 state.failed = false; 150 return matchedSymbol; 151 } 152 if ( state.backtracking > 0 ) 153 { 154 state.failed = true; 155 return matchedSymbol; 156 } 157 matchedSymbol = RecoverFromMismatchedToken( input, ttype, follow ); 158 return matchedSymbol; 159 } 160 161 /** <summary>Match the wildcard: in a symbol</summary> */ MatchAny( IIntStream input )162 public virtual void MatchAny( IIntStream input ) 163 { 164 state.errorRecovery = false; 165 state.failed = false; 166 input.Consume(); 167 } 168 MismatchIsUnwantedToken( IIntStream input, int ttype )169 public virtual bool MismatchIsUnwantedToken( IIntStream input, int ttype ) 170 { 171 return input.LA( 2 ) == ttype; 172 } 173 MismatchIsMissingToken( IIntStream input, BitSet follow )174 public virtual bool MismatchIsMissingToken( IIntStream input, BitSet follow ) 175 { 176 if ( follow == null ) 177 { 178 // we have no information about the follow; we can only consume 179 // a single token and hope for the best 180 return false; 181 } 182 // compute what can follow this grammar element reference 183 if ( follow.Member( TokenTypes.EndOfRule ) ) 184 { 185 BitSet viableTokensFollowingThisRule = ComputeContextSensitiveRuleFOLLOW(); 186 follow = follow.Or( viableTokensFollowingThisRule ); 187 if ( state._fsp >= 0 ) 188 { // remove EOR if we're not the start symbol 189 follow.Remove( TokenTypes.EndOfRule ); 190 } 191 } 192 // if current token is consistent with what could come after set 193 // then we know we're missing a token; error recovery is free to 194 // "insert" the missing token 195 196 //System.out.println("viable tokens="+follow.toString(getTokenNames())); 197 //System.out.println("LT(1)="+((TokenStream)input).LT(1)); 198 199 // BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR 200 // in follow set to indicate that the fall of the start symbol is 201 // in the set (EOF can follow). 202 if ( follow.Member( input.LA( 1 ) ) || follow.Member( TokenTypes.EndOfRule ) ) 203 { 204 //System.out.println("LT(1)=="+((TokenStream)input).LT(1)+" is consistent with what follows; inserting..."); 205 return true; 206 } 207 return false; 208 } 209 210 /** <summary>Report a recognition problem.</summary> 211 * 212 * <remarks> 213 * This method sets errorRecovery to indicate the parser is recovering 214 * not parsing. Once in recovery mode, no errors are generated. 215 * To get out of recovery mode, the parser must successfully match 216 * a token (after a resync). So it will go: 217 * 218 * 1. error occurs 219 * 2. enter recovery mode, report error 220 * 3. consume until token found in resynch set 221 * 4. try to resume parsing 222 * 5. next match() will reset errorRecovery mode 223 * 224 * If you override, make sure to update syntaxErrors if you care about that. 225 * </remarks> 226 */ ReportError( RecognitionException e )227 public virtual void ReportError( RecognitionException e ) 228 { 229 // if we've already reported an error and have not matched a token 230 // yet successfully, don't report any errors. 231 if ( state.errorRecovery ) 232 { 233 //System.err.print("[SPURIOUS] "); 234 return; 235 } 236 state.syntaxErrors++; // don't count spurious 237 state.errorRecovery = true; 238 239 DisplayRecognitionError( this.TokenNames, e ); 240 } 241 DisplayRecognitionError( string[] tokenNames, RecognitionException e )242 public virtual void DisplayRecognitionError( string[] tokenNames, 243 RecognitionException e ) 244 { 245 string hdr = GetErrorHeader( e ); 246 string msg = GetErrorMessage( e, tokenNames ); 247 EmitErrorMessage( hdr + " " + msg ); 248 } 249 250 /** <summary>What error message should be generated for the various exception types?</summary> 251 * 252 * <remarks> 253 * Not very object-oriented code, but I like having all error message 254 * generation within one method rather than spread among all of the 255 * exception classes. This also makes it much easier for the exception 256 * handling because the exception classes do not have to have pointers back 257 * to this object to access utility routines and so on. Also, changing 258 * the message for an exception type would be difficult because you 259 * would have to subclassing exception, but then somehow get ANTLR 260 * to make those kinds of exception objects instead of the default. 261 * This looks weird, but trust me--it makes the most sense in terms 262 * of flexibility. 263 * 264 * For grammar debugging, you will want to override this to add 265 * more information such as the stack frame with 266 * getRuleInvocationStack(e, this.getClass().getName()) and, 267 * for no viable alts, the decision description and state etc... 268 * 269 * Override this to change the message generated for one or more 270 * exception types. 271 * </remarks> 272 */ GetErrorMessage( RecognitionException e, string[] tokenNames )273 public virtual string GetErrorMessage( RecognitionException e, string[] tokenNames ) 274 { 275 string msg = e.Message; 276 if ( e is UnwantedTokenException ) 277 { 278 UnwantedTokenException ute = (UnwantedTokenException)e; 279 string tokenName = "<unknown>"; 280 if ( ute.Expecting == TokenTypes.EndOfFile ) 281 { 282 tokenName = "EndOfFile"; 283 } 284 else 285 { 286 tokenName = tokenNames[ute.Expecting]; 287 } 288 msg = "extraneous input " + GetTokenErrorDisplay( ute.UnexpectedToken ) + 289 " expecting " + tokenName; 290 } 291 else if ( e is MissingTokenException ) 292 { 293 MissingTokenException mte = (MissingTokenException)e; 294 string tokenName = "<unknown>"; 295 if ( mte.Expecting == TokenTypes.EndOfFile ) 296 { 297 tokenName = "EndOfFile"; 298 } 299 else 300 { 301 tokenName = tokenNames[mte.Expecting]; 302 } 303 msg = "missing " + tokenName + " at " + GetTokenErrorDisplay( e.Token ); 304 } 305 else if ( e is MismatchedTokenException ) 306 { 307 MismatchedTokenException mte = (MismatchedTokenException)e; 308 string tokenName = "<unknown>"; 309 if ( mte.Expecting == TokenTypes.EndOfFile ) 310 { 311 tokenName = "EndOfFile"; 312 } 313 else 314 { 315 tokenName = tokenNames[mte.Expecting]; 316 } 317 msg = "mismatched input " + GetTokenErrorDisplay( e.Token ) + 318 " expecting " + tokenName; 319 } 320 else if ( e is MismatchedTreeNodeException ) 321 { 322 MismatchedTreeNodeException mtne = (MismatchedTreeNodeException)e; 323 string tokenName = "<unknown>"; 324 if ( mtne.Expecting == TokenTypes.EndOfFile ) 325 { 326 tokenName = "EndOfFile"; 327 } 328 else 329 { 330 tokenName = tokenNames[mtne.Expecting]; 331 } 332 // workaround for a .NET framework bug (NullReferenceException) 333 string nodeText = ( mtne.Node != null ) ? mtne.Node.ToString() ?? string.Empty : string.Empty; 334 msg = "mismatched tree node: " + nodeText + " expecting " + tokenName; 335 } 336 else if ( e is NoViableAltException ) 337 { 338 //NoViableAltException nvae = (NoViableAltException)e; 339 // for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>" 340 // and "(decision="+nvae.decisionNumber+") and 341 // "state "+nvae.stateNumber 342 msg = "no viable alternative at input " + GetTokenErrorDisplay( e.Token ); 343 } 344 else if ( e is EarlyExitException ) 345 { 346 //EarlyExitException eee = (EarlyExitException)e; 347 // for development, can add "(decision="+eee.decisionNumber+")" 348 msg = "required (...)+ loop did not match anything at input " + 349 GetTokenErrorDisplay( e.Token ); 350 } 351 else if ( e is MismatchedSetException ) 352 { 353 MismatchedSetException mse = (MismatchedSetException)e; 354 msg = "mismatched input " + GetTokenErrorDisplay( e.Token ) + 355 " expecting set " + mse.Expecting; 356 } 357 else if ( e is MismatchedNotSetException ) 358 { 359 MismatchedNotSetException mse = (MismatchedNotSetException)e; 360 msg = "mismatched input " + GetTokenErrorDisplay( e.Token ) + 361 " expecting set " + mse.Expecting; 362 } 363 else if ( e is FailedPredicateException ) 364 { 365 FailedPredicateException fpe = (FailedPredicateException)e; 366 msg = "rule " + fpe.RuleName + " failed predicate: {" + 367 fpe.PredicateText + "}?"; 368 } 369 return msg; 370 } 371 372 /** <summary> 373 * Get number of recognition errors (lexer, parser, tree parser). Each 374 * recognizer tracks its own number. So parser and lexer each have 375 * separate count. Does not count the spurious errors found between 376 * an error and next valid token match 377 * </summary> 378 * 379 * <seealso cref="reportError()"/> 380 */ 381 public virtual int NumberOfSyntaxErrors 382 { 383 get 384 { 385 return state.syntaxErrors; 386 } 387 } 388 389 /** <summary>What is the error header, normally line/character position information?</summary> */ GetErrorHeader( RecognitionException e )390 public virtual string GetErrorHeader( RecognitionException e ) 391 { 392 string prefix = SourceName ?? string.Empty; 393 if (prefix.Length > 0) 394 prefix += ' '; 395 396 return string.Format("{0}line {1}:{2}", prefix, e.Line, e.CharPositionInLine + 1); 397 } 398 399 /** <summary> 400 * How should a token be displayed in an error message? The default 401 * is to display just the text, but during development you might 402 * want to have a lot of information spit out. Override in that case 403 * to use t.ToString() (which, for CommonToken, dumps everything about 404 * the token). This is better than forcing you to override a method in 405 * your token objects because you don't have to go modify your lexer 406 * so that it creates a new Java type. 407 * </summary> 408 */ GetTokenErrorDisplay( IToken t )409 public virtual string GetTokenErrorDisplay( IToken t ) 410 { 411 string s = t.Text; 412 if ( s == null ) 413 { 414 if ( t.Type == TokenTypes.EndOfFile ) 415 { 416 s = "<EOF>"; 417 } 418 else 419 { 420 s = "<" + t.Type + ">"; 421 } 422 } 423 s = Regex.Replace( s, "\n", "\\\\n" ); 424 s = Regex.Replace( s, "\r", "\\\\r" ); 425 s = Regex.Replace( s, "\t", "\\\\t" ); 426 return "'" + s + "'"; 427 } 428 429 /** <summary>Override this method to change where error messages go</summary> */ EmitErrorMessage( string msg )430 public virtual void EmitErrorMessage( string msg ) 431 { 432 if (TraceDestination != null) 433 TraceDestination.WriteLine( msg ); 434 } 435 436 /** <summary> 437 * Recover from an error found on the input stream. This is 438 * for NoViableAlt and mismatched symbol exceptions. If you enable 439 * single token insertion and deletion, this will usually not 440 * handle mismatched symbol exceptions but there could be a mismatched 441 * token that the match() routine could not recover from. 442 * </summary> 443 */ Recover( IIntStream input, RecognitionException re )444 public virtual void Recover( IIntStream input, RecognitionException re ) 445 { 446 if ( state.lastErrorIndex == input.Index ) 447 { 448 // uh oh, another error at same token index; must be a case 449 // where LT(1) is in the recovery token set so nothing is 450 // consumed; consume a single token so at least to prevent 451 // an infinite loop; this is a failsafe. 452 input.Consume(); 453 } 454 state.lastErrorIndex = input.Index; 455 BitSet followSet = ComputeErrorRecoverySet(); 456 BeginResync(); 457 ConsumeUntil( input, followSet ); 458 EndResync(); 459 } 460 461 /** <summary> 462 * A hook to listen in on the token consumption during error recovery. 463 * The DebugParser subclasses this to fire events to the listenter. 464 * </summary> 465 */ BeginResync()466 public virtual void BeginResync() 467 { 468 } 469 EndResync()470 public virtual void EndResync() 471 { 472 } 473 474 /* Compute the error recovery set for the current rule. During 475 * rule invocation, the parser pushes the set of tokens that can 476 * follow that rule reference on the stack; this amounts to 477 * computing FIRST of what follows the rule reference in the 478 * enclosing rule. This local follow set only includes tokens 479 * from within the rule; i.e., the FIRST computation done by 480 * ANTLR stops at the end of a rule. 481 * 482 * EXAMPLE 483 * 484 * When you find a "no viable alt exception", the input is not 485 * consistent with any of the alternatives for rule r. The best 486 * thing to do is to consume tokens until you see something that 487 * can legally follow a call to r *or* any rule that called r. 488 * You don't want the exact set of viable next tokens because the 489 * input might just be missing a token--you might consume the 490 * rest of the input looking for one of the missing tokens. 491 * 492 * Consider grammar: 493 * 494 * a : '[' b ']' 495 * | '(' b ')' 496 * ; 497 * b : c '^' INT ; 498 * c : ID 499 * | INT 500 * ; 501 * 502 * At each rule invocation, the set of tokens that could follow 503 * that rule is pushed on a stack. Here are the various "local" 504 * follow sets: 505 * 506 * FOLLOW(b1_in_a) = FIRST(']') = ']' 507 * FOLLOW(b2_in_a) = FIRST(')') = ')' 508 * FOLLOW(c_in_b) = FIRST('^') = '^' 509 * 510 * Upon erroneous input "[]", the call chain is 511 * 512 * a -> b -> c 513 * 514 * and, hence, the follow context stack is: 515 * 516 * depth local follow set after call to rule 517 * 0 <EOF> a (from main()) 518 * 1 ']' b 519 * 3 '^' c 520 * 521 * Notice that ')' is not included, because b would have to have 522 * been called from a different context in rule a for ')' to be 523 * included. 524 * 525 * For error recovery, we cannot consider FOLLOW(c) 526 * (context-sensitive or otherwise). We need the combined set of 527 * all context-sensitive FOLLOW sets--the set of all tokens that 528 * could follow any reference in the call chain. We need to 529 * resync to one of those tokens. Note that FOLLOW(c)='^' and if 530 * we resync'd to that token, we'd consume until EOF. We need to 531 * sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}. 532 * In this case, for input "[]", LA(1) is in this set so we would 533 * not consume anything and after printing an error rule c would 534 * return normally. It would not find the required '^' though. 535 * At this point, it gets a mismatched token error and throws an 536 * exception (since LA(1) is not in the viable following token 537 * set). The rule exception handler tries to recover, but finds 538 * the same recovery set and doesn't consume anything. Rule b 539 * exits normally returning to rule a. Now it finds the ']' (and 540 * with the successful match exits errorRecovery mode). 541 * 542 * So, you cna see that the parser walks up call chain looking 543 * for the token that was a member of the recovery set. 544 * 545 * Errors are not generated in errorRecovery mode. 546 * 547 * ANTLR's error recovery mechanism is based upon original ideas: 548 * 549 * "Algorithms + Data Structures = Programs" by Niklaus Wirth 550 * 551 * and 552 * 553 * "A note on error recovery in recursive descent parsers": 554 * http://portal.acm.org/citation.cfm?id=947902.947905 555 * 556 * Later, Josef Grosch had some good ideas: 557 * 558 * "Efficient and Comfortable Error Recovery in Recursive Descent 559 * Parsers": 560 * ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip 561 * 562 * Like Grosch I implemented local FOLLOW sets that are combined 563 * at run-time upon error to avoid overhead during parsing. 564 */ ComputeErrorRecoverySet()565 protected virtual BitSet ComputeErrorRecoverySet() 566 { 567 return CombineFollows( false ); 568 } 569 570 /** <summary> 571 * Compute the context-sensitive FOLLOW set for current rule. 572 * This is set of token types that can follow a specific rule 573 * reference given a specific call chain. You get the set of 574 * viable tokens that can possibly come next (lookahead depth 1) 575 * given the current call chain. Contrast this with the 576 * definition of plain FOLLOW for rule r: 577 * </summary> 578 * 579 * FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)} 580 * 581 * where x in T* and alpha, beta in V*; T is set of terminals and 582 * V is the set of terminals and nonterminals. In other words, 583 * FOLLOW(r) is the set of all tokens that can possibly follow 584 * references to r in *any* sentential form (context). At 585 * runtime, however, we know precisely which context applies as 586 * we have the call chain. We may compute the exact (rather 587 * than covering superset) set of following tokens. 588 * 589 * For example, consider grammar: 590 * 591 * stat : ID '=' expr ';' // FOLLOW(stat)=={EOF} 592 * | "return" expr '.' 593 * ; 594 * expr : atom ('+' atom)* ; // FOLLOW(expr)=={';','.',')'} 595 * atom : INT // FOLLOW(atom)=={'+',')',';','.'} 596 * | '(' expr ')' 597 * ; 598 * 599 * The FOLLOW sets are all inclusive whereas context-sensitive 600 * FOLLOW sets are precisely what could follow a rule reference. 601 * For input input "i=(3);", here is the derivation: 602 * 603 * stat => ID '=' expr ';' 604 * => ID '=' atom ('+' atom)* ';' 605 * => ID '=' '(' expr ')' ('+' atom)* ';' 606 * => ID '=' '(' atom ')' ('+' atom)* ';' 607 * => ID '=' '(' INT ')' ('+' atom)* ';' 608 * => ID '=' '(' INT ')' ';' 609 * 610 * At the "3" token, you'd have a call chain of 611 * 612 * stat -> expr -> atom -> expr -> atom 613 * 614 * What can follow that specific nested ref to atom? Exactly ')' 615 * as you can see by looking at the derivation of this specific 616 * input. Contrast this with the FOLLOW(atom)={'+',')',';','.'}. 617 * 618 * You want the exact viable token set when recovering from a 619 * token mismatch. Upon token mismatch, if LA(1) is member of 620 * the viable next token set, then you know there is most likely 621 * a missing token in the input stream. "Insert" one by just not 622 * throwing an exception. 623 */ ComputeContextSensitiveRuleFOLLOW()624 protected virtual BitSet ComputeContextSensitiveRuleFOLLOW() 625 { 626 return CombineFollows( true ); 627 } 628 629 // what is exact? it seems to only add sets from above on stack 630 // if EOR is in set i. When it sees a set w/o EOR, it stops adding. 631 // Why would we ever want them all? Maybe no viable alt instead of 632 // mismatched token? CombineFollows(bool exact)633 protected virtual BitSet CombineFollows(bool exact) 634 { 635 int top = state._fsp; 636 BitSet followSet = new BitSet(); 637 for ( int i = top; i >= 0; i-- ) 638 { 639 BitSet localFollowSet = (BitSet)state.following[i]; 640 /* 641 System.out.println("local follow depth "+i+"="+ 642 localFollowSet.toString(getTokenNames())+")"); 643 */ 644 followSet.OrInPlace( localFollowSet ); 645 if ( exact ) 646 { 647 // can we see end of rule? 648 if ( localFollowSet.Member( TokenTypes.EndOfRule ) ) 649 { 650 // Only leave EOR in set if at top (start rule); this lets 651 // us know if have to include follow(start rule); i.e., EOF 652 if ( i > 0 ) 653 { 654 followSet.Remove( TokenTypes.EndOfRule ); 655 } 656 } 657 else 658 { // can't see end of rule, quit 659 break; 660 } 661 } 662 } 663 return followSet; 664 } 665 666 /** <summary>Attempt to recover from a single missing or extra token.</summary> 667 * 668 * EXTRA TOKEN 669 * 670 * LA(1) is not what we are looking for. If LA(2) has the right token, 671 * however, then assume LA(1) is some extra spurious token. Delete it 672 * and LA(2) as if we were doing a normal match(), which advances the 673 * input. 674 * 675 * MISSING TOKEN 676 * 677 * If current token is consistent with what could come after 678 * ttype then it is ok to "insert" the missing token, else throw 679 * exception For example, Input "i=(3;" is clearly missing the 680 * ')'. When the parser returns from the nested call to expr, it 681 * will have call chain: 682 * 683 * stat -> expr -> atom 684 * 685 * and it will be trying to match the ')' at this point in the 686 * derivation: 687 * 688 * => ID '=' '(' INT ')' ('+' atom)* ';' 689 * ^ 690 * match() will see that ';' doesn't match ')' and report a 691 * mismatched token error. To recover, it sees that LA(1)==';' 692 * is in the set of tokens that can follow the ')' token 693 * reference in rule atom. It can assume that you forgot the ')'. 694 */ RecoverFromMismatchedToken( IIntStream input, int ttype, BitSet follow )695 protected virtual object RecoverFromMismatchedToken( IIntStream input, int ttype, BitSet follow ) 696 { 697 RecognitionException e = null; 698 // if next token is what we are looking for then "delete" this token 699 if ( MismatchIsUnwantedToken( input, ttype ) ) 700 { 701 e = new UnwantedTokenException( ttype, input, TokenNames ); 702 /* 703 System.err.println("recoverFromMismatchedToken deleting "+ 704 ((TokenStream)input).LT(1)+ 705 " since "+((TokenStream)input).LT(2)+" is what we want"); 706 */ 707 BeginResync(); 708 input.Consume(); // simply delete extra token 709 EndResync(); 710 ReportError( e ); // report after consuming so AW sees the token in the exception 711 // we want to return the token we're actually matching 712 object matchedSymbol = GetCurrentInputSymbol( input ); 713 input.Consume(); // move past ttype token as if all were ok 714 return matchedSymbol; 715 } 716 // can't recover with single token deletion, try insertion 717 if ( MismatchIsMissingToken( input, follow ) ) 718 { 719 object inserted = GetMissingSymbol( input, e, ttype, follow ); 720 e = new MissingTokenException( ttype, input, inserted ); 721 ReportError( e ); // report after inserting so AW sees the token in the exception 722 return inserted; 723 } 724 // even that didn't work; must throw the exception 725 e = new MismatchedTokenException(ttype, input, TokenNames); 726 throw e; 727 } 728 729 /** Not currently used */ RecoverFromMismatchedSet( IIntStream input, RecognitionException e, BitSet follow )730 public virtual object RecoverFromMismatchedSet( IIntStream input, 731 RecognitionException e, 732 BitSet follow ) 733 { 734 if ( MismatchIsMissingToken( input, follow ) ) 735 { 736 // System.out.println("missing token"); 737 ReportError( e ); 738 // we don't know how to conjure up a token for sets yet 739 return GetMissingSymbol( input, e, TokenTypes.Invalid, follow ); 740 } 741 // TODO do single token deletion like above for Token mismatch 742 throw e; 743 } 744 745 /** <summary> 746 * Match needs to return the current input symbol, which gets put 747 * into the label for the associated token ref; e.g., x=ID. Token 748 * and tree parsers need to return different objects. Rather than test 749 * for input stream type or change the IntStream interface, I use 750 * a simple method to ask the recognizer to tell me what the current 751 * input symbol is. 752 * </summary> 753 * 754 * <remarks>This is ignored for lexers.</remarks> 755 */ GetCurrentInputSymbol( IIntStream input )756 protected virtual object GetCurrentInputSymbol( IIntStream input ) 757 { 758 return null; 759 } 760 761 /** <summary>Conjure up a missing token during error recovery.</summary> 762 * 763 * <remarks> 764 * The recognizer attempts to recover from single missing 765 * symbols. But, actions might refer to that missing symbol. 766 * For example, x=ID {f($x);}. The action clearly assumes 767 * that there has been an identifier matched previously and that 768 * $x points at that token. If that token is missing, but 769 * the next token in the stream is what we want we assume that 770 * this token is missing and we keep going. Because we 771 * have to return some token to replace the missing token, 772 * we have to conjure one up. This method gives the user control 773 * over the tokens returned for missing tokens. Mostly, 774 * you will want to create something special for identifier 775 * tokens. For literals such as '{' and ',', the default 776 * action in the parser or tree parser works. It simply creates 777 * a CommonToken of the appropriate type. The text will be the token. 778 * If you change what tokens must be created by the lexer, 779 * override this method to create the appropriate tokens. 780 * </remarks> 781 */ GetMissingSymbol( IIntStream input, RecognitionException e, int expectedTokenType, BitSet follow )782 protected virtual object GetMissingSymbol( IIntStream input, 783 RecognitionException e, 784 int expectedTokenType, 785 BitSet follow ) 786 { 787 return null; 788 } 789 ConsumeUntil( IIntStream input, int tokenType )790 public virtual void ConsumeUntil( IIntStream input, int tokenType ) 791 { 792 //System.out.println("consumeUntil "+tokenType); 793 int ttype = input.LA( 1 ); 794 while ( ttype != TokenTypes.EndOfFile && ttype != tokenType ) 795 { 796 input.Consume(); 797 ttype = input.LA( 1 ); 798 } 799 } 800 801 /** <summary>Consume tokens until one matches the given token set</summary> */ ConsumeUntil( IIntStream input, BitSet set )802 public virtual void ConsumeUntil( IIntStream input, BitSet set ) 803 { 804 //System.out.println("consumeUntil("+set.toString(getTokenNames())+")"); 805 int ttype = input.LA( 1 ); 806 while ( ttype != TokenTypes.EndOfFile && !set.Member( ttype ) ) 807 { 808 //System.out.println("consume during recover LA(1)="+getTokenNames()[input.LA(1)]); 809 input.Consume(); 810 ttype = input.LA( 1 ); 811 } 812 } 813 814 /** <summary>Push a rule's follow set using our own hardcoded stack</summary> */ PushFollow( BitSet fset )815 protected void PushFollow( BitSet fset ) 816 { 817 if ( ( state._fsp + 1 ) >= state.following.Length ) 818 { 819 Array.Resize(ref state.following, state.following.Length * 2); 820 } 821 state.following[++state._fsp] = fset; 822 } 823 PopFollow()824 protected void PopFollow() 825 { 826 state._fsp--; 827 } 828 829 /** <summary> 830 * Return List<String> of the rules in your parser instance 831 * leading up to a call to this method. You could override if 832 * you want more details such as the file/line info of where 833 * in the parser java code a rule is invoked. 834 * </summary> 835 * 836 * <remarks> 837 * This is very useful for error messages and for context-sensitive 838 * error recovery. 839 * </remarks> 840 */ GetRuleInvocationStack()841 public virtual IList<string> GetRuleInvocationStack() 842 { 843 return GetRuleInvocationStack( new StackTrace(true) ); 844 } 845 846 /** <summary> 847 * A more general version of GetRuleInvocationStack where you can 848 * pass in the StackTrace of, for example, a RecognitionException 849 * to get it's rule stack trace. 850 * </summary> 851 */ GetRuleInvocationStack(StackTrace trace)852 public static IList<string> GetRuleInvocationStack(StackTrace trace) 853 { 854 if (trace == null) 855 throw new ArgumentNullException("trace"); 856 857 List<string> rules = new List<string>(); 858 StackFrame[] stack = trace.GetFrames() ?? new StackFrame[0]; 859 860 for (int i = stack.Length - 1; i >= 0; i--) 861 { 862 StackFrame frame = stack[i]; 863 MethodBase method = frame.GetMethod(); 864 GrammarRuleAttribute[] attributes = (GrammarRuleAttribute[])method.GetCustomAttributes(typeof(GrammarRuleAttribute), true); 865 if (attributes != null && attributes.Length > 0) 866 rules.Add(attributes[0].Name); 867 } 868 869 return rules; 870 } 871 872 public virtual int BacktrackingLevel 873 { 874 get 875 { 876 return state.backtracking; 877 } 878 set 879 { 880 state.backtracking = value; 881 } 882 } 883 884 /** <summary>Return whether or not a backtracking attempt failed.</summary> */ 885 public virtual bool Failed 886 { 887 get 888 { 889 return state.failed; 890 } 891 } 892 893 /** <summary> 894 * Used to print out token names like ID during debugging and 895 * error reporting. The generated parsers implement a method 896 * that overrides this to point to their String[] tokenNames. 897 * </summary> 898 */ 899 public virtual string[] TokenNames 900 { 901 get 902 { 903 return null; 904 } 905 } 906 907 /** <summary> 908 * For debugging and other purposes, might want the grammar name. 909 * Have ANTLR generate an implementation for this method. 910 * </summary> 911 */ 912 public virtual string GrammarFileName 913 { 914 get 915 { 916 return null; 917 } 918 } 919 920 public abstract string SourceName 921 { 922 get; 923 } 924 925 /** <summary> 926 * A convenience method for use most often with template rewrites. 927 * Convert a List<Token> to List<String> 928 * </summary> 929 */ ToStrings( ICollection<IToken> tokens )930 public virtual List<string> ToStrings( ICollection<IToken> tokens ) 931 { 932 if ( tokens == null ) 933 return null; 934 935 List<string> strings = new List<string>( tokens.Count ); 936 foreach ( IToken token in tokens ) 937 { 938 strings.Add( token.Text ); 939 } 940 941 return strings; 942 } 943 944 /** <summary> 945 * Given a rule number and a start token index number, return 946 * MEMO_RULE_UNKNOWN if the rule has not parsed input starting from 947 * start index. If this rule has parsed input starting from the 948 * start index before, then return where the rule stopped parsing. 949 * It returns the index of the last token matched by the rule. 950 * </summary> 951 * 952 * <remarks> 953 * For now we use a hashtable and just the slow Object-based one. 954 * Later, we can make a special one for ints and also one that 955 * tosses out data after we commit past input position i. 956 * </remarks> 957 */ GetRuleMemoization( int ruleIndex, int ruleStartIndex )958 public virtual int GetRuleMemoization( int ruleIndex, int ruleStartIndex ) 959 { 960 if ( state.ruleMemo[ruleIndex] == null ) 961 { 962 state.ruleMemo[ruleIndex] = new Dictionary<int, int>(); 963 } 964 965 int stopIndex; 966 if ( !state.ruleMemo[ruleIndex].TryGetValue( ruleStartIndex, out stopIndex ) ) 967 return MemoRuleUnknown; 968 969 return stopIndex; 970 } 971 972 /** <summary> 973 * Has this rule already parsed input at the current index in the 974 * input stream? Return the stop token index or MEMO_RULE_UNKNOWN. 975 * If we attempted but failed to parse properly before, return 976 * MEMO_RULE_FAILED. 977 * </summary> 978 * 979 * <remarks> 980 * This method has a side-effect: if we have seen this input for 981 * this rule and successfully parsed before, then seek ahead to 982 * 1 past the stop token matched for this rule last time. 983 * </remarks> 984 */ AlreadyParsedRule( IIntStream input, int ruleIndex )985 public virtual bool AlreadyParsedRule( IIntStream input, int ruleIndex ) 986 { 987 int stopIndex = GetRuleMemoization( ruleIndex, input.Index ); 988 if ( stopIndex == MemoRuleUnknown ) 989 { 990 return false; 991 } 992 if ( stopIndex == MemoRuleFailed ) 993 { 994 //System.out.println("rule "+ruleIndex+" will never succeed"); 995 state.failed = true; 996 } 997 else 998 { 999 //System.out.println("seen rule "+ruleIndex+" before; skipping ahead to @"+(stopIndex+1)+" failed="+state.failed); 1000 input.Seek( stopIndex + 1 ); // jump to one past stop token 1001 } 1002 return true; 1003 } 1004 1005 /** <summary> 1006 * Record whether or not this rule parsed the input at this position 1007 * successfully. Use a standard java hashtable for now. 1008 * </summary> 1009 */ Memoize( IIntStream input, int ruleIndex, int ruleStartIndex )1010 public virtual void Memoize( IIntStream input, 1011 int ruleIndex, 1012 int ruleStartIndex ) 1013 { 1014 int stopTokenIndex = state.failed ? MemoRuleFailed : input.Index - 1; 1015 if ( state.ruleMemo == null ) 1016 { 1017 if (TraceDestination != null) 1018 TraceDestination.WriteLine( "!!!!!!!!! memo array is null for " + GrammarFileName ); 1019 } 1020 if ( ruleIndex >= state.ruleMemo.Length ) 1021 { 1022 if (TraceDestination != null) 1023 TraceDestination.WriteLine("!!!!!!!!! memo size is " + state.ruleMemo.Length + ", but rule index is " + ruleIndex); 1024 } 1025 if ( state.ruleMemo[ruleIndex] != null ) 1026 { 1027 state.ruleMemo[ruleIndex][ruleStartIndex] = stopTokenIndex; 1028 } 1029 } 1030 1031 /** <summary>return how many rule/input-index pairs there are in total.</summary> 1032 * TODO: this includes synpreds. :( 1033 */ GetRuleMemoizationCacheSize()1034 public virtual int GetRuleMemoizationCacheSize() 1035 { 1036 int n = 0; 1037 for ( int i = 0; state.ruleMemo != null && i < state.ruleMemo.Length; i++ ) 1038 { 1039 var ruleMap = state.ruleMemo[i]; 1040 if ( ruleMap != null ) 1041 { 1042 n += ruleMap.Count; // how many input indexes are recorded? 1043 } 1044 } 1045 return n; 1046 } 1047 TraceIn(string ruleName, int ruleIndex, object inputSymbol)1048 public virtual void TraceIn(string ruleName, int ruleIndex, object inputSymbol) 1049 { 1050 if (TraceDestination == null) 1051 return; 1052 1053 TraceDestination.Write("enter " + ruleName + " " + inputSymbol); 1054 if (state.backtracking > 0) 1055 { 1056 TraceDestination.Write(" backtracking=" + state.backtracking); 1057 } 1058 TraceDestination.WriteLine(); 1059 } 1060 TraceOut(string ruleName, int ruleIndex, object inputSymbol)1061 public virtual void TraceOut(string ruleName, int ruleIndex, object inputSymbol) 1062 { 1063 if (TraceDestination == null) 1064 return; 1065 1066 TraceDestination.Write("exit " + ruleName + " " + inputSymbol); 1067 if (state.backtracking > 0) 1068 { 1069 TraceDestination.Write(" backtracking=" + state.backtracking); 1070 if (state.failed) 1071 TraceDestination.Write(" failed"); 1072 else 1073 TraceDestination.Write(" succeeded"); 1074 } 1075 TraceDestination.WriteLine(); 1076 } 1077 1078 #region Debugging support 1079 public virtual IDebugEventListener DebugListener 1080 { 1081 get 1082 { 1083 return null; 1084 } 1085 } 1086 1087 [Conditional("ANTLR_DEBUG")] DebugEnterRule(string grammarFileName, string ruleName)1088 protected virtual void DebugEnterRule(string grammarFileName, string ruleName) 1089 { 1090 IDebugEventListener dbg = DebugListener; 1091 if (dbg != null) 1092 dbg.EnterRule(grammarFileName, ruleName); 1093 } 1094 1095 [Conditional("ANTLR_DEBUG")] DebugExitRule(string grammarFileName, string ruleName)1096 protected virtual void DebugExitRule(string grammarFileName, string ruleName) 1097 { 1098 IDebugEventListener dbg = DebugListener; 1099 if (dbg != null) 1100 dbg.ExitRule(grammarFileName, ruleName); 1101 } 1102 1103 [Conditional("ANTLR_DEBUG")] DebugEnterSubRule(int decisionNumber)1104 protected virtual void DebugEnterSubRule(int decisionNumber) 1105 { 1106 IDebugEventListener dbg = DebugListener; 1107 if (dbg != null) 1108 dbg.EnterSubRule(decisionNumber); 1109 } 1110 1111 [Conditional("ANTLR_DEBUG")] DebugExitSubRule(int decisionNumber)1112 protected virtual void DebugExitSubRule(int decisionNumber) 1113 { 1114 IDebugEventListener dbg = DebugListener; 1115 if (dbg != null) 1116 dbg.ExitSubRule(decisionNumber); 1117 } 1118 1119 [Conditional("ANTLR_DEBUG")] DebugEnterAlt(int alt)1120 protected virtual void DebugEnterAlt(int alt) 1121 { 1122 IDebugEventListener dbg = DebugListener; 1123 if (dbg != null) 1124 dbg.EnterAlt(alt); 1125 } 1126 1127 [Conditional("ANTLR_DEBUG")] DebugEnterDecision(int decisionNumber, bool couldBacktrack)1128 protected virtual void DebugEnterDecision(int decisionNumber, bool couldBacktrack) 1129 { 1130 IDebugEventListener dbg = DebugListener; 1131 if (dbg != null) 1132 dbg.EnterDecision(decisionNumber, couldBacktrack); 1133 } 1134 1135 [Conditional("ANTLR_DEBUG")] DebugExitDecision(int decisionNumber)1136 protected virtual void DebugExitDecision(int decisionNumber) 1137 { 1138 IDebugEventListener dbg = DebugListener; 1139 if (dbg != null) 1140 dbg.ExitDecision(decisionNumber); 1141 } 1142 1143 [Conditional("ANTLR_DEBUG")] DebugLocation(int line, int charPositionInLine)1144 protected virtual void DebugLocation(int line, int charPositionInLine) 1145 { 1146 IDebugEventListener dbg = DebugListener; 1147 if (dbg != null) 1148 dbg.Location(line, charPositionInLine); 1149 } 1150 1151 [Conditional("ANTLR_DEBUG")] DebugSemanticPredicate(bool result, string predicate)1152 protected virtual void DebugSemanticPredicate(bool result, string predicate) 1153 { 1154 IDebugEventListener dbg = DebugListener; 1155 if (dbg != null) 1156 dbg.SemanticPredicate(result, predicate); 1157 } 1158 1159 [Conditional("ANTLR_DEBUG")] DebugBeginBacktrack(int level)1160 protected virtual void DebugBeginBacktrack(int level) 1161 { 1162 IDebugEventListener dbg = DebugListener; 1163 if (dbg != null) 1164 dbg.BeginBacktrack(level); 1165 } 1166 1167 [Conditional("ANTLR_DEBUG")] DebugEndBacktrack(int level, bool successful)1168 protected virtual void DebugEndBacktrack(int level, bool successful) 1169 { 1170 IDebugEventListener dbg = DebugListener; 1171 if (dbg != null) 1172 dbg.EndBacktrack(level, successful); 1173 } 1174 1175 [Conditional("ANTLR_DEBUG")] DebugRecognitionException(RecognitionException ex)1176 protected virtual void DebugRecognitionException(RecognitionException ex) 1177 { 1178 IDebugEventListener dbg = DebugListener; 1179 if (dbg != null) 1180 dbg.RecognitionException(ex); 1181 } 1182 #endregion 1183 } 1184 } 1185