1 /* 2 * [The "BSD licence"] 3 * Copyright (c) 2005-2008 Terence Parr 4 * All rights reserved. 5 * 6 * Conversion to C#: 7 * Copyright (c) 2008-2009 Sam Harwell, Pixel Mine, Inc. 8 * All rights reserved. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. The name of the author may not be used to endorse or promote products 19 * derived from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 namespace Antlr.Runtime { 34 using System.Collections.Generic; 35 36 using ArgumentNullException = System.ArgumentNullException; 37 using Array = System.Array; 38 using Conditional = System.Diagnostics.ConditionalAttribute; 39 using Exception = System.Exception; 40 using IDebugEventListener = Antlr.Runtime.Debug.IDebugEventListener; 41 using MethodBase = System.Reflection.MethodBase; 42 using NotSupportedException = System.NotSupportedException; 43 using Regex = System.Text.RegularExpressions.Regex; 44 using StackFrame = System.Diagnostics.StackFrame; 45 using StackTrace = System.Diagnostics.StackTrace; 46 using TextWriter = System.IO.TextWriter; 47 using Type = System.Type; 48 49 /** <summary> 50 * A generic recognizer that can handle recognizers generated from 51 * lexer, parser, and tree grammars. This is all the parsing 52 * support code essentially; most of it is error recovery stuff and 53 * backtracking. 54 * </summary> 55 */ 56 public abstract class BaseRecognizer { 57 public const int MemoRuleFailed = -2; 58 public const int MemoRuleUnknown = -1; 59 public const int InitialFollowStackSize = 100; 60 61 // copies from Token object for convenience in actions 62 public const int DefaultTokenChannel = TokenChannels.Default; 63 public const int Hidden = TokenChannels.Hidden; 64 65 public const string NextTokenRuleName = "nextToken"; 66 67 /** <summary> 68 * State of a lexer, parser, or tree parser are collected into a state 69 * object so the state can be shared. This sharing is needed to 70 * have one grammar import others and share same error variables 71 * and other state variables. It's a kind of explicit multiple 72 * inheritance via delegation of methods and shared state. 73 * </summary> 74 */ 75 protected internal RecognizerSharedState state; 76 BaseRecognizer()77 public BaseRecognizer() 78 : this(new RecognizerSharedState()) { 79 } 80 BaseRecognizer(RecognizerSharedState state)81 public BaseRecognizer(RecognizerSharedState state) { 82 if (state == null) { 83 state = new RecognizerSharedState(); 84 } 85 this.state = state; 86 InitDFAs(); 87 } 88 89 public TextWriter TraceDestination { 90 get; 91 set; 92 } 93 InitDFAs()94 protected virtual void InitDFAs() { 95 } 96 97 /** <summary>reset the parser's state; subclasses must rewinds the input stream</summary> */ Reset()98 public virtual void Reset() { 99 // wack everything related to error recovery 100 if (state == null) { 101 return; // no shared state work to do 102 } 103 state._fsp = -1; 104 state.errorRecovery = false; 105 state.lastErrorIndex = -1; 106 state.failed = false; 107 state.syntaxErrors = 0; 108 // wack everything related to backtracking and memoization 109 state.backtracking = 0; 110 for (int i = 0; state.ruleMemo != null && i < state.ruleMemo.Length; i++) { // wipe cache 111 state.ruleMemo[i] = null; 112 } 113 } 114 115 116 /** <summary> 117 * Match current input symbol against ttype. Attempt 118 * single token insertion or deletion error recovery. If 119 * that fails, throw MismatchedTokenException. 120 * </summary> 121 * 122 * <remarks> 123 * To turn off single token insertion or deletion error 124 * recovery, override recoverFromMismatchedToken() and have it 125 * throw an exception. See TreeParser.recoverFromMismatchedToken(). 126 * This way any error in a rule will cause an exception and 127 * immediate exit from rule. Rule would recover by resynchronizing 128 * to the set of symbols that can follow rule ref. 129 * </remarks> 130 */ Match(IIntStream input, int ttype, BitSet follow)131 public virtual object Match(IIntStream input, int ttype, BitSet follow) { 132 //System.out.println("match "+((TokenStream)input).LT(1)); 133 object matchedSymbol = GetCurrentInputSymbol(input); 134 if (input.LA(1) == ttype) { 135 input.Consume(); 136 state.errorRecovery = false; 137 state.failed = false; 138 return matchedSymbol; 139 } 140 if (state.backtracking > 0) { 141 state.failed = true; 142 return matchedSymbol; 143 } 144 matchedSymbol = RecoverFromMismatchedToken(input, ttype, follow); 145 return matchedSymbol; 146 } 147 148 /** <summary>Match the wildcard: in a symbol</summary> */ MatchAny(IIntStream input)149 public virtual void MatchAny(IIntStream input) { 150 state.errorRecovery = false; 151 state.failed = false; 152 input.Consume(); 153 } 154 MismatchIsUnwantedToken(IIntStream input, int ttype)155 public virtual bool MismatchIsUnwantedToken(IIntStream input, int ttype) { 156 return input.LA(2) == ttype; 157 } 158 MismatchIsMissingToken(IIntStream input, BitSet follow)159 public virtual bool MismatchIsMissingToken(IIntStream input, BitSet follow) { 160 if (follow == null) { 161 // we have no information about the follow; we can only consume 162 // a single token and hope for the best 163 return false; 164 } 165 // compute what can follow this grammar element reference 166 if (follow.Member(TokenTypes.EndOfRule)) { 167 BitSet viableTokensFollowingThisRule = ComputeContextSensitiveRuleFOLLOW(); 168 follow = follow.Or(viableTokensFollowingThisRule); 169 if (state._fsp >= 0) { // remove EOR if we're not the start symbol 170 follow.Remove(TokenTypes.EndOfRule); 171 } 172 } 173 // if current token is consistent with what could come after set 174 // then we know we're missing a token; error recovery is free to 175 // "insert" the missing token 176 177 //System.out.println("viable tokens="+follow.toString(getTokenNames())); 178 //System.out.println("LT(1)="+((TokenStream)input).LT(1)); 179 180 // BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR 181 // in follow set to indicate that the fall of the start symbol is 182 // in the set (EOF can follow). 183 if (follow.Member(input.LA(1)) || follow.Member(TokenTypes.EndOfRule)) { 184 //System.out.println("LT(1)=="+((TokenStream)input).LT(1)+" is consistent with what follows; inserting..."); 185 return true; 186 } 187 return false; 188 } 189 190 /** <summary>Report a recognition problem.</summary> 191 * 192 * <remarks> 193 * This method sets errorRecovery to indicate the parser is recovering 194 * not parsing. Once in recovery mode, no errors are generated. 195 * To get out of recovery mode, the parser must successfully match 196 * a token (after a resync). So it will go: 197 * 198 * 1. error occurs 199 * 2. enter recovery mode, report error 200 * 3. consume until token found in resynch set 201 * 4. try to resume parsing 202 * 5. next match() will reset errorRecovery mode 203 * 204 * If you override, make sure to update syntaxErrors if you care about that. 205 * </remarks> 206 */ ReportError(RecognitionException e)207 public virtual void ReportError(RecognitionException e) { 208 // if we've already reported an error and have not matched a token 209 // yet successfully, don't report any errors. 210 if (state.errorRecovery) { 211 //System.err.print("[SPURIOUS] "); 212 return; 213 } 214 state.syntaxErrors++; // don't count spurious 215 state.errorRecovery = true; 216 217 DisplayRecognitionError(this.TokenNames, e); 218 } 219 DisplayRecognitionError(string[] tokenNames, RecognitionException e)220 public virtual void DisplayRecognitionError(string[] tokenNames, 221 RecognitionException e) { 222 string hdr = GetErrorHeader(e); 223 string msg = GetErrorMessage(e, tokenNames); 224 EmitErrorMessage(hdr + " " + msg); 225 } 226 227 /** <summary>What error message should be generated for the various exception types?</summary> 228 * 229 * <remarks> 230 * Not very object-oriented code, but I like having all error message 231 * generation within one method rather than spread among all of the 232 * exception classes. This also makes it much easier for the exception 233 * handling because the exception classes do not have to have pointers back 234 * to this object to access utility routines and so on. Also, changing 235 * the message for an exception type would be difficult because you 236 * would have to subclassing exception, but then somehow get ANTLR 237 * to make those kinds of exception objects instead of the default. 238 * This looks weird, but trust me--it makes the most sense in terms 239 * of flexibility. 240 * 241 * For grammar debugging, you will want to override this to add 242 * more information such as the stack frame with 243 * getRuleInvocationStack(e, this.getClass().getName()) and, 244 * for no viable alts, the decision description and state etc... 245 * 246 * Override this to change the message generated for one or more 247 * exception types. 248 * </remarks> 249 */ GetErrorMessage(RecognitionException e, string[] tokenNames)250 public virtual string GetErrorMessage(RecognitionException e, string[] tokenNames) { 251 string msg = e.Message; 252 if (e is UnwantedTokenException) { 253 UnwantedTokenException ute = (UnwantedTokenException)e; 254 string tokenName = "<unknown>"; 255 if (ute.Expecting == TokenTypes.EndOfFile) { 256 tokenName = "EndOfFile"; 257 } else { 258 tokenName = tokenNames[ute.Expecting]; 259 } 260 msg = "extraneous input " + GetTokenErrorDisplay(ute.UnexpectedToken) + 261 " expecting " + tokenName; 262 } else if (e is MissingTokenException) { 263 MissingTokenException mte = (MissingTokenException)e; 264 string tokenName = "<unknown>"; 265 if (mte.Expecting == TokenTypes.EndOfFile) { 266 tokenName = "EndOfFile"; 267 } else { 268 tokenName = tokenNames[mte.Expecting]; 269 } 270 msg = "missing " + tokenName + " at " + GetTokenErrorDisplay(e.Token); 271 } else if (e is MismatchedTokenException) { 272 MismatchedTokenException mte = (MismatchedTokenException)e; 273 string tokenName = "<unknown>"; 274 if (mte.Expecting == TokenTypes.EndOfFile) { 275 tokenName = "EndOfFile"; 276 } else { 277 tokenName = tokenNames[mte.Expecting]; 278 } 279 msg = "mismatched input " + GetTokenErrorDisplay(e.Token) + 280 " expecting " + tokenName; 281 } else if (e is MismatchedTreeNodeException) { 282 MismatchedTreeNodeException mtne = (MismatchedTreeNodeException)e; 283 string tokenName = "<unknown>"; 284 if (mtne.Expecting == TokenTypes.EndOfFile) { 285 tokenName = "EndOfFile"; 286 } else { 287 tokenName = tokenNames[mtne.Expecting]; 288 } 289 // workaround for a .NET framework bug (NullReferenceException) 290 string nodeText = (mtne.Node != null) ? mtne.Node.ToString() ?? string.Empty : string.Empty; 291 msg = "mismatched tree node: " + nodeText + " expecting " + tokenName; 292 } else if (e is NoViableAltException) { 293 //NoViableAltException nvae = (NoViableAltException)e; 294 // for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>" 295 // and "(decision="+nvae.decisionNumber+") and 296 // "state "+nvae.stateNumber 297 msg = "no viable alternative at input " + GetTokenErrorDisplay(e.Token); 298 } else if (e is EarlyExitException) { 299 //EarlyExitException eee = (EarlyExitException)e; 300 // for development, can add "(decision="+eee.decisionNumber+")" 301 msg = "required (...)+ loop did not match anything at input " + 302 GetTokenErrorDisplay(e.Token); 303 } else if (e is MismatchedSetException) { 304 MismatchedSetException mse = (MismatchedSetException)e; 305 msg = "mismatched input " + GetTokenErrorDisplay(e.Token) + 306 " expecting set " + mse.Expecting; 307 } else if (e is MismatchedNotSetException) { 308 MismatchedNotSetException mse = (MismatchedNotSetException)e; 309 msg = "mismatched input " + GetTokenErrorDisplay(e.Token) + 310 " expecting set " + mse.Expecting; 311 } else if (e is FailedPredicateException) { 312 FailedPredicateException fpe = (FailedPredicateException)e; 313 msg = "rule " + fpe.RuleName + " failed predicate: {" + 314 fpe.PredicateText + "}?"; 315 } 316 return msg; 317 } 318 319 /** <summary> 320 * Get number of recognition errors (lexer, parser, tree parser). Each 321 * recognizer tracks its own number. So parser and lexer each have 322 * separate count. Does not count the spurious errors found between 323 * an error and next valid token match 324 * </summary> 325 * 326 * <seealso cref="reportError()"/> 327 */ 328 public virtual int NumberOfSyntaxErrors { 329 get { 330 return state.syntaxErrors; 331 } 332 } 333 334 /** <summary>What is the error header, normally line/character position information?</summary> */ GetErrorHeader(RecognitionException e)335 public virtual string GetErrorHeader(RecognitionException e) { 336 return "line " + e.Line + ":" + (e.CharPositionInLine + 1); 337 } 338 339 /** <summary> 340 * How should a token be displayed in an error message? The default 341 * is to display just the text, but during development you might 342 * want to have a lot of information spit out. Override in that case 343 * to use t.ToString() (which, for CommonToken, dumps everything about 344 * the token). This is better than forcing you to override a method in 345 * your token objects because you don't have to go modify your lexer 346 * so that it creates a new Java type. 347 * </summary> 348 */ GetTokenErrorDisplay(IToken t)349 public virtual string GetTokenErrorDisplay(IToken t) { 350 string s = t.Text; 351 if (s == null) { 352 if (t.Type == TokenTypes.EndOfFile) { 353 s = "<EOF>"; 354 } else { 355 s = "<" + t.Type + ">"; 356 } 357 } 358 s = Regex.Replace(s, "\n", "\\\\n"); 359 s = Regex.Replace(s, "\r", "\\\\r"); 360 s = Regex.Replace(s, "\t", "\\\\t"); 361 return "'" + s + "'"; 362 } 363 364 /** <summary>Override this method to change where error messages go</summary> */ EmitErrorMessage(string msg)365 public virtual void EmitErrorMessage(string msg) { 366 if (TraceDestination != null) 367 TraceDestination.WriteLine(msg); 368 } 369 370 /** <summary> 371 * Recover from an error found on the input stream. This is 372 * for NoViableAlt and mismatched symbol exceptions. If you enable 373 * single token insertion and deletion, this will usually not 374 * handle mismatched symbol exceptions but there could be a mismatched 375 * token that the match() routine could not recover from. 376 * </summary> 377 */ Recover(IIntStream input, RecognitionException re)378 public virtual void Recover(IIntStream input, RecognitionException re) { 379 if (state.lastErrorIndex == input.Index) { 380 // uh oh, another error at same token index; must be a case 381 // where LT(1) is in the recovery token set so nothing is 382 // consumed; consume a single token so at least to prevent 383 // an infinite loop; this is a failsafe. 384 input.Consume(); 385 } 386 state.lastErrorIndex = input.Index; 387 BitSet followSet = ComputeErrorRecoverySet(); 388 BeginResync(); 389 ConsumeUntil(input, followSet); 390 EndResync(); 391 } 392 393 /** <summary> 394 * A hook to listen in on the token consumption during error recovery. 395 * The DebugParser subclasses this to fire events to the listenter. 396 * </summary> 397 */ BeginResync()398 public virtual void BeginResync() { 399 } 400 EndResync()401 public virtual void EndResync() { 402 } 403 404 /* Compute the error recovery set for the current rule. During 405 * rule invocation, the parser pushes the set of tokens that can 406 * follow that rule reference on the stack; this amounts to 407 * computing FIRST of what follows the rule reference in the 408 * enclosing rule. This local follow set only includes tokens 409 * from within the rule; i.e., the FIRST computation done by 410 * ANTLR stops at the end of a rule. 411 * 412 * EXAMPLE 413 * 414 * When you find a "no viable alt exception", the input is not 415 * consistent with any of the alternatives for rule r. The best 416 * thing to do is to consume tokens until you see something that 417 * can legally follow a call to r *or* any rule that called r. 418 * You don't want the exact set of viable next tokens because the 419 * input might just be missing a token--you might consume the 420 * rest of the input looking for one of the missing tokens. 421 * 422 * Consider grammar: 423 * 424 * a : '[' b ']' 425 * | '(' b ')' 426 * ; 427 * b : c '^' INT ; 428 * c : ID 429 * | INT 430 * ; 431 * 432 * At each rule invocation, the set of tokens that could follow 433 * that rule is pushed on a stack. Here are the various "local" 434 * follow sets: 435 * 436 * FOLLOW(b1_in_a) = FIRST(']') = ']' 437 * FOLLOW(b2_in_a) = FIRST(')') = ')' 438 * FOLLOW(c_in_b) = FIRST('^') = '^' 439 * 440 * Upon erroneous input "[]", the call chain is 441 * 442 * a -> b -> c 443 * 444 * and, hence, the follow context stack is: 445 * 446 * depth local follow set after call to rule 447 * 0 <EOF> a (from main()) 448 * 1 ']' b 449 * 3 '^' c 450 * 451 * Notice that ')' is not included, because b would have to have 452 * been called from a different context in rule a for ')' to be 453 * included. 454 * 455 * For error recovery, we cannot consider FOLLOW(c) 456 * (context-sensitive or otherwise). We need the combined set of 457 * all context-sensitive FOLLOW sets--the set of all tokens that 458 * could follow any reference in the call chain. We need to 459 * resync to one of those tokens. Note that FOLLOW(c)='^' and if 460 * we resync'd to that token, we'd consume until EOF. We need to 461 * sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}. 462 * In this case, for input "[]", LA(1) is in this set so we would 463 * not consume anything and after printing an error rule c would 464 * return normally. It would not find the required '^' though. 465 * At this point, it gets a mismatched token error and throws an 466 * exception (since LA(1) is not in the viable following token 467 * set). The rule exception handler tries to recover, but finds 468 * the same recovery set and doesn't consume anything. Rule b 469 * exits normally returning to rule a. Now it finds the ']' (and 470 * with the successful match exits errorRecovery mode). 471 * 472 * So, you cna see that the parser walks up call chain looking 473 * for the token that was a member of the recovery set. 474 * 475 * Errors are not generated in errorRecovery mode. 476 * 477 * ANTLR's error recovery mechanism is based upon original ideas: 478 * 479 * "Algorithms + Data Structures = Programs" by Niklaus Wirth 480 * 481 * and 482 * 483 * "A note on error recovery in recursive descent parsers": 484 * http://portal.acm.org/citation.cfm?id=947902.947905 485 * 486 * Later, Josef Grosch had some good ideas: 487 * 488 * "Efficient and Comfortable Error Recovery in Recursive Descent 489 * Parsers": 490 * ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip 491 * 492 * Like Grosch I implemented local FOLLOW sets that are combined 493 * at run-time upon error to avoid overhead during parsing. 494 */ ComputeErrorRecoverySet()495 protected virtual BitSet ComputeErrorRecoverySet() { 496 return CombineFollows(false); 497 } 498 499 /** <summary> 500 * Compute the context-sensitive FOLLOW set for current rule. 501 * This is set of token types that can follow a specific rule 502 * reference given a specific call chain. You get the set of 503 * viable tokens that can possibly come next (lookahead depth 1) 504 * given the current call chain. Contrast this with the 505 * definition of plain FOLLOW for rule r: 506 * </summary> 507 * 508 * FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)} 509 * 510 * where x in T* and alpha, beta in V*; T is set of terminals and 511 * V is the set of terminals and nonterminals. In other words, 512 * FOLLOW(r) is the set of all tokens that can possibly follow 513 * references to r in *any* sentential form (context). At 514 * runtime, however, we know precisely which context applies as 515 * we have the call chain. We may compute the exact (rather 516 * than covering superset) set of following tokens. 517 * 518 * For example, consider grammar: 519 * 520 * stat : ID '=' expr ';' // FOLLOW(stat)=={EOF} 521 * | "return" expr '.' 522 * ; 523 * expr : atom ('+' atom)* ; // FOLLOW(expr)=={';','.',')'} 524 * atom : INT // FOLLOW(atom)=={'+',')',';','.'} 525 * | '(' expr ')' 526 * ; 527 * 528 * The FOLLOW sets are all inclusive whereas context-sensitive 529 * FOLLOW sets are precisely what could follow a rule reference. 530 * For input input "i=(3);", here is the derivation: 531 * 532 * stat => ID '=' expr ';' 533 * => ID '=' atom ('+' atom)* ';' 534 * => ID '=' '(' expr ')' ('+' atom)* ';' 535 * => ID '=' '(' atom ')' ('+' atom)* ';' 536 * => ID '=' '(' INT ')' ('+' atom)* ';' 537 * => ID '=' '(' INT ')' ';' 538 * 539 * At the "3" token, you'd have a call chain of 540 * 541 * stat -> expr -> atom -> expr -> atom 542 * 543 * What can follow that specific nested ref to atom? Exactly ')' 544 * as you can see by looking at the derivation of this specific 545 * input. Contrast this with the FOLLOW(atom)={'+',')',';','.'}. 546 * 547 * You want the exact viable token set when recovering from a 548 * token mismatch. Upon token mismatch, if LA(1) is member of 549 * the viable next token set, then you know there is most likely 550 * a missing token in the input stream. "Insert" one by just not 551 * throwing an exception. 552 */ ComputeContextSensitiveRuleFOLLOW()553 protected virtual BitSet ComputeContextSensitiveRuleFOLLOW() { 554 return CombineFollows(true); 555 } 556 557 // what is exact? it seems to only add sets from above on stack 558 // if EOR is in set i. When it sees a set w/o EOR, it stops adding. 559 // Why would we ever want them all? Maybe no viable alt instead of 560 // mismatched token? CombineFollows(bool exact)561 protected virtual BitSet CombineFollows(bool exact) { 562 int top = state._fsp; 563 BitSet followSet = new BitSet(); 564 for (int i = top; i >= 0; i--) { 565 BitSet localFollowSet = (BitSet)state.following[i]; 566 /* 567 System.out.println("local follow depth "+i+"="+ 568 localFollowSet.toString(getTokenNames())+")"); 569 */ 570 followSet.OrInPlace(localFollowSet); 571 if (exact) { 572 // can we see end of rule? 573 if (localFollowSet.Member(TokenTypes.EndOfRule)) { 574 // Only leave EOR in set if at top (start rule); this lets 575 // us know if have to include follow(start rule); i.e., EOF 576 if (i > 0) { 577 followSet.Remove(TokenTypes.EndOfRule); 578 } 579 } else { // can't see end of rule, quit 580 break; 581 } 582 } 583 } 584 return followSet; 585 } 586 587 /** <summary>Attempt to recover from a single missing or extra token.</summary> 588 * 589 * EXTRA TOKEN 590 * 591 * LA(1) is not what we are looking for. If LA(2) has the right token, 592 * however, then assume LA(1) is some extra spurious token. Delete it 593 * and LA(2) as if we were doing a normal match(), which advances the 594 * input. 595 * 596 * MISSING TOKEN 597 * 598 * If current token is consistent with what could come after 599 * ttype then it is ok to "insert" the missing token, else throw 600 * exception For example, Input "i=(3;" is clearly missing the 601 * ')'. When the parser returns from the nested call to expr, it 602 * will have call chain: 603 * 604 * stat -> expr -> atom 605 * 606 * and it will be trying to match the ')' at this point in the 607 * derivation: 608 * 609 * => ID '=' '(' INT ')' ('+' atom)* ';' 610 * ^ 611 * match() will see that ';' doesn't match ')' and report a 612 * mismatched token error. To recover, it sees that LA(1)==';' 613 * is in the set of tokens that can follow the ')' token 614 * reference in rule atom. It can assume that you forgot the ')'. 615 */ RecoverFromMismatchedToken(IIntStream input, int ttype, BitSet follow)616 protected virtual object RecoverFromMismatchedToken(IIntStream input, int ttype, BitSet follow) { 617 RecognitionException e = null; 618 // if next token is what we are looking for then "delete" this token 619 if (MismatchIsUnwantedToken(input, ttype)) { 620 e = new UnwantedTokenException(ttype, input, TokenNames); 621 /* 622 System.err.println("recoverFromMismatchedToken deleting "+ 623 ((TokenStream)input).LT(1)+ 624 " since "+((TokenStream)input).LT(2)+" is what we want"); 625 */ 626 BeginResync(); 627 input.Consume(); // simply delete extra token 628 EndResync(); 629 ReportError(e); // report after consuming so AW sees the token in the exception 630 // we want to return the token we're actually matching 631 object matchedSymbol = GetCurrentInputSymbol(input); 632 input.Consume(); // move past ttype token as if all were ok 633 return matchedSymbol; 634 } 635 // can't recover with single token deletion, try insertion 636 if (MismatchIsMissingToken(input, follow)) { 637 object inserted = GetMissingSymbol(input, e, ttype, follow); 638 e = new MissingTokenException(ttype, input, inserted); 639 ReportError(e); // report after inserting so AW sees the token in the exception 640 return inserted; 641 } 642 // even that didn't work; must throw the exception 643 e = new MismatchedTokenException(ttype, input, TokenNames); 644 throw e; 645 } 646 647 /** Not currently used */ RecoverFromMismatchedSet(IIntStream input, RecognitionException e, BitSet follow)648 public virtual object RecoverFromMismatchedSet(IIntStream input, 649 RecognitionException e, 650 BitSet follow) { 651 if (MismatchIsMissingToken(input, follow)) { 652 // System.out.println("missing token"); 653 ReportError(e); 654 // we don't know how to conjure up a token for sets yet 655 return GetMissingSymbol(input, e, TokenTypes.Invalid, follow); 656 } 657 // TODO do single token deletion like above for Token mismatch 658 throw e; 659 } 660 661 /** <summary> 662 * Match needs to return the current input symbol, which gets put 663 * into the label for the associated token ref; e.g., x=ID. Token 664 * and tree parsers need to return different objects. Rather than test 665 * for input stream type or change the IntStream interface, I use 666 * a simple method to ask the recognizer to tell me what the current 667 * input symbol is. 668 * </summary> 669 * 670 * <remarks>This is ignored for lexers.</remarks> 671 */ GetCurrentInputSymbol(IIntStream input)672 protected virtual object GetCurrentInputSymbol(IIntStream input) { 673 return null; 674 } 675 676 /** <summary>Conjure up a missing token during error recovery.</summary> 677 * 678 * <remarks> 679 * The recognizer attempts to recover from single missing 680 * symbols. But, actions might refer to that missing symbol. 681 * For example, x=ID {f($x);}. The action clearly assumes 682 * that there has been an identifier matched previously and that 683 * $x points at that token. If that token is missing, but 684 * the next token in the stream is what we want we assume that 685 * this token is missing and we keep going. Because we 686 * have to return some token to replace the missing token, 687 * we have to conjure one up. This method gives the user control 688 * over the tokens returned for missing tokens. Mostly, 689 * you will want to create something special for identifier 690 * tokens. For literals such as '{' and ',', the default 691 * action in the parser or tree parser works. It simply creates 692 * a CommonToken of the appropriate type. The text will be the token. 693 * If you change what tokens must be created by the lexer, 694 * override this method to create the appropriate tokens. 695 * </remarks> 696 */ GetMissingSymbol(IIntStream input, RecognitionException e, int expectedTokenType, BitSet follow)697 protected virtual object GetMissingSymbol(IIntStream input, 698 RecognitionException e, 699 int expectedTokenType, 700 BitSet follow) { 701 return null; 702 } 703 ConsumeUntil(IIntStream input, int tokenType)704 public virtual void ConsumeUntil(IIntStream input, int tokenType) { 705 //System.out.println("consumeUntil "+tokenType); 706 int ttype = input.LA(1); 707 while (ttype != TokenTypes.EndOfFile && ttype != tokenType) { 708 input.Consume(); 709 ttype = input.LA(1); 710 } 711 } 712 713 /** <summary>Consume tokens until one matches the given token set</summary> */ ConsumeUntil(IIntStream input, BitSet set)714 public virtual void ConsumeUntil(IIntStream input, BitSet set) { 715 //System.out.println("consumeUntil("+set.toString(getTokenNames())+")"); 716 int ttype = input.LA(1); 717 while (ttype != TokenTypes.EndOfFile && !set.Member(ttype)) { 718 //System.out.println("consume during recover LA(1)="+getTokenNames()[input.LA(1)]); 719 input.Consume(); 720 ttype = input.LA(1); 721 } 722 } 723 724 /** <summary>Push a rule's follow set using our own hardcoded stack</summary> */ PushFollow(BitSet fset)725 protected void PushFollow(BitSet fset) { 726 if ((state._fsp + 1) >= state.following.Length) { 727 Array.Resize(ref state.following, state.following.Length * 2); 728 } 729 state.following[++state._fsp] = fset; 730 } 731 PopFollow()732 protected void PopFollow() { 733 state._fsp--; 734 } 735 736 /** <summary> 737 * Return List<String> of the rules in your parser instance 738 * leading up to a call to this method. You could override if 739 * you want more details such as the file/line info of where 740 * in the parser java code a rule is invoked. 741 * </summary> 742 * 743 * <remarks> 744 * This is very useful for error messages and for context-sensitive 745 * error recovery. 746 * </remarks> 747 */ GetRuleInvocationStack()748 public virtual IList<string> GetRuleInvocationStack() { 749 return GetRuleInvocationStack(new StackTrace(true)); 750 } 751 752 /** <summary> 753 * A more general version of GetRuleInvocationStack where you can 754 * pass in the StackTrace of, for example, a RecognitionException 755 * to get it's rule stack trace. 756 * </summary> 757 */ GetRuleInvocationStack(StackTrace trace)758 public static IList<string> GetRuleInvocationStack(StackTrace trace) { 759 if (trace == null) 760 throw new ArgumentNullException("trace"); 761 762 List<string> rules = new List<string>(); 763 StackFrame[] stack = trace.GetFrames() ?? new StackFrame[0]; 764 765 for (int i = stack.Length - 1; i >= 0; i--) { 766 StackFrame frame = stack[i]; 767 MethodBase method = frame.GetMethod(); 768 GrammarRuleAttribute[] attributes = (GrammarRuleAttribute[])method.GetCustomAttributes(typeof(GrammarRuleAttribute), true); 769 if (attributes != null && attributes.Length > 0) 770 rules.Add(attributes[0].Name); 771 } 772 773 return rules; 774 } 775 776 public virtual int BacktrackingLevel { 777 get { 778 return state.backtracking; 779 } 780 set { 781 state.backtracking = value; 782 } 783 } 784 785 /** <summary>Return whether or not a backtracking attempt failed.</summary> */ 786 public virtual bool Failed { 787 get { 788 return state.failed; 789 } 790 } 791 792 /** <summary> 793 * Used to print out token names like ID during debugging and 794 * error reporting. The generated parsers implement a method 795 * that overrides this to point to their String[] tokenNames. 796 * </summary> 797 */ 798 public virtual string[] TokenNames { 799 get { 800 return null; 801 } 802 } 803 804 /** <summary> 805 * For debugging and other purposes, might want the grammar name. 806 * Have ANTLR generate an implementation for this method. 807 * </summary> 808 */ 809 public virtual string GrammarFileName { 810 get { 811 return null; 812 } 813 } 814 815 public abstract string SourceName { 816 get; 817 } 818 819 /** <summary> 820 * A convenience method for use most often with template rewrites. 821 * Convert a List<Token> to List<String> 822 * </summary> 823 */ ToStrings(ICollection<IToken> tokens)824 public virtual List<string> ToStrings(ICollection<IToken> tokens) { 825 if (tokens == null) 826 return null; 827 828 List<string> strings = new List<string>(tokens.Count); 829 foreach (IToken token in tokens) { 830 strings.Add(token.Text); 831 } 832 833 return strings; 834 } 835 836 /** <summary> 837 * Given a rule number and a start token index number, return 838 * MEMO_RULE_UNKNOWN if the rule has not parsed input starting from 839 * start index. If this rule has parsed input starting from the 840 * start index before, then return where the rule stopped parsing. 841 * It returns the index of the last token matched by the rule. 842 * </summary> 843 * 844 * <remarks> 845 * For now we use a hashtable and just the slow Object-based one. 846 * Later, we can make a special one for ints and also one that 847 * tosses out data after we commit past input position i. 848 * </remarks> 849 */ GetRuleMemoization(int ruleIndex, int ruleStartIndex)850 public virtual int GetRuleMemoization(int ruleIndex, int ruleStartIndex) { 851 if (state.ruleMemo[ruleIndex] == null) { 852 state.ruleMemo[ruleIndex] = new Dictionary<int, int>(); 853 } 854 855 int stopIndex; 856 if (!state.ruleMemo[ruleIndex].TryGetValue(ruleStartIndex, out stopIndex)) 857 return MemoRuleUnknown; 858 859 return stopIndex; 860 } 861 862 /** <summary> 863 * Has this rule already parsed input at the current index in the 864 * input stream? Return the stop token index or MEMO_RULE_UNKNOWN. 865 * If we attempted but failed to parse properly before, return 866 * MEMO_RULE_FAILED. 867 * </summary> 868 * 869 * <remarks> 870 * This method has a side-effect: if we have seen this input for 871 * this rule and successfully parsed before, then seek ahead to 872 * 1 past the stop token matched for this rule last time. 873 * </remarks> 874 */ AlreadyParsedRule(IIntStream input, int ruleIndex)875 public virtual bool AlreadyParsedRule(IIntStream input, int ruleIndex) { 876 int stopIndex = GetRuleMemoization(ruleIndex, input.Index); 877 if (stopIndex == MemoRuleUnknown) { 878 return false; 879 } 880 if (stopIndex == MemoRuleFailed) { 881 //System.out.println("rule "+ruleIndex+" will never succeed"); 882 state.failed = true; 883 } else { 884 //System.out.println("seen rule "+ruleIndex+" before; skipping ahead to @"+(stopIndex+1)+" failed="+state.failed); 885 input.Seek(stopIndex + 1); // jump to one past stop token 886 } 887 return true; 888 } 889 890 /** <summary> 891 * Record whether or not this rule parsed the input at this position 892 * successfully. Use a standard java hashtable for now. 893 * </summary> 894 */ Memoize(IIntStream input, int ruleIndex, int ruleStartIndex)895 public virtual void Memoize(IIntStream input, 896 int ruleIndex, 897 int ruleStartIndex) { 898 int stopTokenIndex = state.failed ? MemoRuleFailed : input.Index - 1; 899 if (state.ruleMemo == null) { 900 if (TraceDestination != null) 901 TraceDestination.WriteLine("!!!!!!!!! memo array is null for " + GrammarFileName); 902 } 903 if (ruleIndex >= state.ruleMemo.Length) { 904 if (TraceDestination != null) 905 TraceDestination.WriteLine("!!!!!!!!! memo size is " + state.ruleMemo.Length + ", but rule index is " + ruleIndex); 906 } 907 if (state.ruleMemo[ruleIndex] != null) { 908 state.ruleMemo[ruleIndex][ruleStartIndex] = stopTokenIndex; 909 } 910 } 911 912 /** <summary>return how many rule/input-index pairs there are in total.</summary> 913 * TODO: this includes synpreds. :( 914 */ GetRuleMemoizationCacheSize()915 public virtual int GetRuleMemoizationCacheSize() { 916 int n = 0; 917 for (int i = 0; state.ruleMemo != null && i < state.ruleMemo.Length; i++) { 918 var ruleMap = state.ruleMemo[i]; 919 if (ruleMap != null) { 920 n += ruleMap.Count; // how many input indexes are recorded? 921 } 922 } 923 return n; 924 } 925 926 [Conditional("ANTLR_TRACE")] TraceIn(string ruleName, int ruleIndex, object inputSymbol)927 public virtual void TraceIn(string ruleName, int ruleIndex, object inputSymbol) { 928 if (TraceDestination == null) 929 return; 930 931 TraceDestination.Write("enter " + ruleName + " " + inputSymbol); 932 if (state.backtracking > 0) { 933 TraceDestination.Write(" backtracking=" + state.backtracking); 934 } 935 TraceDestination.WriteLine(); 936 } 937 938 [Conditional("ANTLR_TRACE")] TraceOut(string ruleName, int ruleIndex, object inputSymbol)939 public virtual void TraceOut(string ruleName, int ruleIndex, object inputSymbol) { 940 if (TraceDestination == null) 941 return; 942 943 TraceDestination.Write("exit " + ruleName + " " + inputSymbol); 944 if (state.backtracking > 0) { 945 TraceDestination.Write(" backtracking=" + state.backtracking); 946 if (state.failed) 947 TraceDestination.Write(" failed"); 948 else 949 TraceDestination.Write(" succeeded"); 950 } 951 TraceDestination.WriteLine(); 952 } 953 954 #region Debugging support 955 public virtual IDebugEventListener DebugListener { 956 get { 957 return null; 958 } 959 } 960 961 [Conditional("ANTLR_DEBUG")] DebugEnterRule(string grammarFileName, string ruleName)962 protected virtual void DebugEnterRule(string grammarFileName, string ruleName) { 963 IDebugEventListener dbg = DebugListener; 964 if (dbg != null) 965 dbg.EnterRule(grammarFileName, ruleName); 966 } 967 968 [Conditional("ANTLR_DEBUG")] DebugExitRule(string grammarFileName, string ruleName)969 protected virtual void DebugExitRule(string grammarFileName, string ruleName) { 970 IDebugEventListener dbg = DebugListener; 971 if (dbg != null) 972 dbg.ExitRule(grammarFileName, ruleName); 973 } 974 975 [Conditional("ANTLR_DEBUG")] DebugEnterSubRule(int decisionNumber)976 protected virtual void DebugEnterSubRule(int decisionNumber) { 977 IDebugEventListener dbg = DebugListener; 978 if (dbg != null) 979 dbg.EnterSubRule(decisionNumber); 980 } 981 982 [Conditional("ANTLR_DEBUG")] DebugExitSubRule(int decisionNumber)983 protected virtual void DebugExitSubRule(int decisionNumber) { 984 IDebugEventListener dbg = DebugListener; 985 if (dbg != null) 986 dbg.ExitSubRule(decisionNumber); 987 } 988 989 [Conditional("ANTLR_DEBUG")] DebugEnterAlt(int alt)990 protected virtual void DebugEnterAlt(int alt) { 991 IDebugEventListener dbg = DebugListener; 992 if (dbg != null) 993 dbg.EnterAlt(alt); 994 } 995 996 [Conditional("ANTLR_DEBUG")] DebugEnterDecision(int decisionNumber, bool couldBacktrack)997 protected virtual void DebugEnterDecision(int decisionNumber, bool couldBacktrack) { 998 IDebugEventListener dbg = DebugListener; 999 if (dbg != null) 1000 dbg.EnterDecision(decisionNumber, couldBacktrack); 1001 } 1002 1003 [Conditional("ANTLR_DEBUG")] DebugExitDecision(int decisionNumber)1004 protected virtual void DebugExitDecision(int decisionNumber) { 1005 IDebugEventListener dbg = DebugListener; 1006 if (dbg != null) 1007 dbg.ExitDecision(decisionNumber); 1008 } 1009 1010 [Conditional("ANTLR_DEBUG")] DebugLocation(int line, int charPositionInLine)1011 protected virtual void DebugLocation(int line, int charPositionInLine) { 1012 IDebugEventListener dbg = DebugListener; 1013 if (dbg != null) 1014 dbg.Location(line, charPositionInLine); 1015 } 1016 1017 [Conditional("ANTLR_DEBUG")] DebugSemanticPredicate(bool result, string predicate)1018 protected virtual void DebugSemanticPredicate(bool result, string predicate) { 1019 IDebugEventListener dbg = DebugListener; 1020 if (dbg != null) 1021 dbg.SemanticPredicate(result, predicate); 1022 } 1023 1024 [Conditional("ANTLR_DEBUG")] DebugBeginBacktrack(int level)1025 protected virtual void DebugBeginBacktrack(int level) { 1026 IDebugEventListener dbg = DebugListener; 1027 if (dbg != null) 1028 dbg.BeginBacktrack(level); 1029 } 1030 1031 [Conditional("ANTLR_DEBUG")] DebugEndBacktrack(int level, bool successful)1032 protected virtual void DebugEndBacktrack(int level, bool successful) { 1033 IDebugEventListener dbg = DebugListener; 1034 if (dbg != null) 1035 dbg.EndBacktrack(level, successful); 1036 } 1037 1038 [Conditional("ANTLR_DEBUG")] DebugRecognitionException(RecognitionException ex)1039 protected virtual void DebugRecognitionException(RecognitionException ex) { 1040 IDebugEventListener dbg = DebugListener; 1041 if (dbg != null) 1042 dbg.RecognitionException(ex); 1043 } 1044 #endregion 1045 } 1046 } 1047