1// 2// BaseRecognizer.m 3// ANTLR 4// 5// Created by Alan Condit on 6/16/10. 6// [The "BSD licence"] 7// Copyright (c) 2010 Alan Condit 8// All rights reserved. 9// 10// Redistribution and use in source and binary forms, with or without 11// modification, are permitted provided that the following conditions 12// are met: 13// 1. Redistributions of source code must retain the above copyright 14// notice, this list of conditions and the following disclaimer. 15// 2. Redistributions in binary form must reproduce the above copyright 16// notice, this list of conditions and the following disclaimer in the 17// documentation and/or other materials provided with the distribution. 18// 3. The name of the author may not be used to endorse or promote products 19// derived from this software without specific prior written permission. 20// 21// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 22// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 23// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 24// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 25// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 26// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 30// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 32#import "ACNumber.h" 33#import "BaseRecognizer.h" 34#import "HashRule.h" 35#import "RuleMemo.h" 36#import "CommonToken.h" 37#import "Map.h" 38#import "NoViableAltException.h" 39 40extern NSInteger debug; 41 42@implementation BaseRecognizer 43 44static AMutableArray *_tokenNames; 45static NSString *_grammarFileName; 46static NSString *NEXT_TOKEN_RULE_NAME; 47 48@synthesize state; 49@synthesize grammarFileName; 50//@synthesize failed; 51@synthesize sourceName; 52//@synthesize numberOfSyntaxErrors; 53@synthesize tokenNames; 54 55+ (void) initialize 56{ 57 NEXT_TOKEN_RULE_NAME = [NSString stringWithString:@"nextToken"]; 58 [NEXT_TOKEN_RULE_NAME retain]; 59} 60 61+ (BaseRecognizer *) newBaseRecognizer 62{ 63 return [[BaseRecognizer alloc] init]; 64} 65 66+ (BaseRecognizer *) newBaseRecognizerWithRuleLen:(NSInteger)aLen 67{ 68 return [[BaseRecognizer alloc] initWithLen:aLen]; 69} 70 71+ (BaseRecognizer *) newBaseRecognizer:(RecognizerSharedState *)aState 72{ 73 return [[BaseRecognizer alloc] initWithState:aState]; 74} 75 76+ (AMutableArray *)getTokenNames 77{ 78 return _tokenNames; 79} 80 81+ (void)setTokenNames:(AMutableArray *)theTokNams 82{ 83 if ( _tokenNames != theTokNams ) { 84 if ( _tokenNames ) [_tokenNames release]; 85 [theTokNams retain]; 86 } 87 _tokenNames = theTokNams; 88} 89 90+ (void)setGrammarFileName:(NSString *)aFileName 91{ 92 if ( _grammarFileName != aFileName ) { 93 if ( _grammarFileName ) [_grammarFileName release]; 94 [aFileName retain]; 95 } 96 [_grammarFileName retain]; 97} 98 99- (id) init 100{ 101 if ((self = [super init]) != nil) { 102 if (state == nil) { 103 state = [[RecognizerSharedState newRecognizerSharedState] retain]; 104 } 105 tokenNames = _tokenNames; 106 if ( tokenNames ) [tokenNames retain]; 107 grammarFileName = _grammarFileName; 108 if ( grammarFileName ) [grammarFileName retain]; 109 state._fsp = -1; 110 state.errorRecovery = NO; // are we recovering? 111 state.lastErrorIndex = -1; 112 state.failed = NO; // indicate that some match failed 113 state.syntaxErrors = 0; 114 state.backtracking = 0; // the level of backtracking 115 state.tokenStartCharIndex = -1; 116 } 117 return self; 118} 119 120- (id) initWithLen:(NSInteger)aLen 121{ 122 if ((self = [super init]) != nil) { 123 if (state == nil) { 124 state = [[RecognizerSharedState newRecognizerSharedStateWithRuleLen:aLen] retain]; 125 } 126 tokenNames = _tokenNames; 127 if ( tokenNames ) [tokenNames retain]; 128 grammarFileName = _grammarFileName; 129 if ( grammarFileName ) [grammarFileName retain]; 130 state._fsp = -1; 131 state.errorRecovery = NO; // are we recovering? 132 state.lastErrorIndex = -1; 133 state.failed = NO; // indicate that some match failed 134 state.syntaxErrors = 0; 135 state.backtracking = 0; // the level of backtracking 136 state.tokenStartCharIndex = -1; 137 } 138 return self; 139} 140 141- (id) initWithState:(RecognizerSharedState *)aState 142{ 143 if ((self = [super init]) != nil) { 144 state = aState; 145 if (state == nil) { 146 state = [RecognizerSharedState newRecognizerSharedState]; 147 } 148 [state retain]; 149 tokenNames = _tokenNames; 150 if ( tokenNames ) [tokenNames retain]; 151 grammarFileName = _grammarFileName; 152 if ( grammarFileName ) [grammarFileName retain]; 153 state._fsp = -1; 154 state.errorRecovery = NO; // are we recovering? 155 state.lastErrorIndex = -1; 156 state.failed = NO; // indicate that some match failed 157 state.syntaxErrors = 0; 158 state.backtracking = 0; // the level of backtracking 159 state.tokenStartCharIndex = -1; 160 } 161 return self; 162} 163 164- (void)dealloc 165{ 166#ifdef DEBUG_DEALLOC 167 NSLog( @"called dealloc in BaseRecognizer" ); 168#endif 169 if ( grammarFileName ) [grammarFileName release]; 170 if ( tokenNames ) [tokenNames release]; 171 if ( state ) [state release]; 172 [super dealloc]; 173} 174 175// reset the recognizer to the initial state. does not touch the token source! 176// this can be extended by the grammar writer to reset custom ivars 177- (void) reset 178{ 179 if ( state == nil ) 180 return; 181 if ( state.following != nil ) { 182 if ( [state.following count] ) 183 [state.following removeAllObjects]; 184 } 185 state._fsp = -1; 186 state.errorRecovery = NO; // are we recovering? 187 state.lastErrorIndex = -1; 188 state.failed = NO; // indicate that some match failed 189 state.syntaxErrors = 0; 190 state.backtracking = 0; // the level of backtracking 191 state.tokenStartCharIndex = -1; 192 if ( state.ruleMemo != nil ) { 193 if ( [state.ruleMemo count] ) 194 [state.ruleMemo removeAllObjects]; 195 } 196} 197 198- (BOOL) getFailed 199{ 200 return [state getFailed]; 201} 202 203- (void) setFailed:(BOOL)flag 204{ 205 [state setFailed:flag]; 206} 207 208- (RecognizerSharedState *) getState 209{ 210 return state; 211} 212 213- (void) setState:(RecognizerSharedState *) theState 214{ 215 if (state != theState) { 216 if ( state ) [state release]; 217 state = theState; 218 [state retain]; 219 } 220} 221 222- (id)input 223{ 224 return nil; // Must be overriden in inheriting class 225} 226 227- (void)skip // override in inheriting class 228{ 229 return; 230} 231 232-(id) match:(id<IntStream>)anInput TokenType:(NSInteger)ttype Follow:(ANTLRBitSet *)follow 233{ 234 id matchedSymbol = [self getCurrentInputSymbol:anInput]; 235 if ([anInput LA:1] == ttype) { 236 [anInput consume]; 237 state.errorRecovery = NO; 238 state.failed = NO; 239 return matchedSymbol; 240 } 241 if (state.backtracking > 0) { 242 state.failed = YES; 243 return matchedSymbol; 244 } 245 matchedSymbol = [self recoverFromMismatchedToken:anInput TokenType:ttype Follow:follow]; 246 return matchedSymbol; 247} 248 249-(void) matchAny:(id<IntStream>)anInput 250{ 251 state.errorRecovery = NO; 252 state.failed = NO; 253 [anInput consume]; 254} 255 256-(BOOL) mismatchIsUnwantedToken:(id<IntStream>)anInput TokenType:(NSInteger)ttype 257{ 258 return [anInput LA:2] == ttype; 259} 260 261-(BOOL) mismatchIsMissingToken:(id<IntStream>)anInput Follow:(ANTLRBitSet *) follow 262{ 263 if ( follow == nil ) { 264 // we have no information about the follow; we can only consume 265 // a single token and hope for the best 266 return NO; 267 } 268 // compute what can follow this grammar element reference 269 if ( [follow member:TokenTypeEOR] ) { 270 ANTLRBitSet *viableTokensFollowingThisRule = [self computeContextSensitiveRuleFOLLOW]; 271 follow = [follow or:viableTokensFollowingThisRule]; 272 if ( state._fsp >= 0 ) { // remove EOR if we're not the start symbol 273 [follow remove:(TokenTypeEOR)]; 274 } 275 } 276 // if current token is consistent with what could come after set 277 // then we know we're missing a token; error recovery is free to 278 // "insert" the missing token 279 280 //System.out.println("viable tokens="+follow.toString(getTokenNames())); 281 //System.out.println("LT(1)="+((TokenStream)input).LT(1)); 282 283 // BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR 284 // in follow set to indicate that the fall of the start symbol is 285 // in the set (EOF can follow). 286 if ( [follow member:[anInput LA:1]] || [follow member:TokenTypeEOR] ) { 287 //System.out.println("LT(1)=="+((TokenStream)input).LT(1)+" is consistent with what follows; inserting..."); 288 return YES; 289 } 290 return NO; 291} 292 293/** Report a recognition problem. 294 * 295 * This method sets errorRecovery to indicate the parser is recovering 296 * not parsing. Once in recovery mode, no errors are generated. 297 * To get out of recovery mode, the parser must successfully match 298 * a token (after a resync). So it will go: 299 * 300 * 1. error occurs 301 * 2. enter recovery mode, report error 302 * 3. consume until token found in resynch set 303 * 4. try to resume parsing 304 * 5. next match() will reset errorRecovery mode 305 * 306 * If you override, make sure to update syntaxErrors if you care about that. 307 */ 308-(void) reportError:(RecognitionException *) e 309{ 310 // if we've already reported an error and have not matched a token 311 // yet successfully, don't report any errors. 312 if ( state.errorRecovery ) { 313 //System.err.print("[SPURIOUS] "); 314 return; 315 } 316 state.syntaxErrors++; // don't count spurious 317 state.errorRecovery = YES; 318 319 [self displayRecognitionError:[self getTokenNames] Exception:e]; 320} 321 322-(void) displayRecognitionError:(AMutableArray *)theTokNams Exception:(RecognitionException *)e 323{ 324 NSString *hdr = [self getErrorHeader:e]; 325 NSString *msg = [self getErrorMessage:e TokenNames:theTokNams]; 326 [self emitErrorMessage:[NSString stringWithFormat:@" %@ %@", hdr, msg]]; 327} 328 329/** What error message should be generated for the various 330 * exception types? 331 * 332 * Not very object-oriented code, but I like having all error message 333 * generation within one method rather than spread among all of the 334 * exception classes. This also makes it much easier for the exception 335 * handling because the exception classes do not have to have pointers back 336 * to this object to access utility routines and so on. Also, changing 337 * the message for an exception type would be difficult because you 338 * would have to subclassing exception, but then somehow get ANTLR 339 * to make those kinds of exception objects instead of the default. 340 * This looks weird, but trust me--it makes the most sense in terms 341 * of flexibility. 342 * 343 * For grammar debugging, you will want to override this to add 344 * more information such as the stack frame with 345 * getRuleInvocationStack(e, this.getClass().getName()) and, 346 * for no viable alts, the decision description and state etc... 347 * 348 * Override this to change the message generated for one or more 349 * exception types. 350 */ 351- (NSString *)getErrorMessage:(RecognitionException *)e TokenNames:(AMutableArray *)theTokNams 352{ 353 // NSString *msg = [e getMessage]; 354 NSString *msg; 355 if ( [e isKindOfClass:[UnwantedTokenException class]] ) { 356 UnwantedTokenException *ute = (UnwantedTokenException *)e; 357 NSString *tokenName=@"<unknown>"; 358 if ( ute.expecting == TokenTypeEOF ) { 359 tokenName = @"EOF"; 360 } 361 else { 362 tokenName = (NSString *)[theTokNams objectAtIndex:ute.expecting]; 363 } 364 msg = [NSString stringWithFormat:@"extraneous input %@ expecting %@", [self getTokenErrorDisplay:[ute getUnexpectedToken]], 365 tokenName]; 366 } 367 else if ( [e isKindOfClass:[MissingTokenException class] ] ) { 368 MissingTokenException *mte = (MissingTokenException *)e; 369 NSString *tokenName=@"<unknown>"; 370 if ( mte.expecting== TokenTypeEOF ) { 371 tokenName = @"EOF"; 372 } 373 else { 374 tokenName = [theTokNams objectAtIndex:mte.expecting]; 375 } 376 msg = [NSString stringWithFormat:@"missing %@ at %@", tokenName, [self getTokenErrorDisplay:(e.token)] ]; 377 } 378 else if ( [e isKindOfClass:[MismatchedTokenException class]] ) { 379 MismatchedTokenException *mte = (MismatchedTokenException *)e; 380 NSString *tokenName=@"<unknown>"; 381 if ( mte.expecting== TokenTypeEOF ) { 382 tokenName = @"EOF"; 383 } 384 else { 385 tokenName = [theTokNams objectAtIndex:mte.expecting]; 386 } 387 msg = [NSString stringWithFormat:@"mismatched input %@ expecting %@",[self getTokenErrorDisplay:(e.token)], tokenName]; 388 } 389 else if ( [e isKindOfClass:[MismatchedTreeNodeException class]] ) { 390 MismatchedTreeNodeException *mtne = (MismatchedTreeNodeException *)e; 391 NSString *tokenName=@"<unknown>"; 392 if ( mtne.expecting==TokenTypeEOF ) { 393 tokenName = @"EOF"; 394 } 395 else { 396 tokenName = [theTokNams objectAtIndex:mtne.expecting]; 397 } 398 msg = [NSString stringWithFormat:@"mismatched tree node: %@ expecting %@", mtne.node, tokenName]; 399 } 400 else if ( [e isKindOfClass:[NoViableAltException class]] ) { 401 //NoViableAltException *nvae = (NoViableAltException *)e; 402 // for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>" 403 // and "(decision="+nvae.decisionNumber+") and 404 // "state "+nvae.stateNumber 405 // msg = [NSString stringWithFormat:@"no viable alternative at input %@", [self getTokenErrorDisplay:e.token]]; 406 msg = [NSString stringWithFormat:@"no viable alternative decision:%d state:%d at input %@", ((NoViableAltException *)e).stateNumber, ((NoViableAltException *)e).decisionNumber, [self getTokenErrorDisplay:e.token]]; 407 } 408 else if ( [e isKindOfClass:[EarlyExitException class]] ) { 409 //EarlyExitException *eee = (EarlyExitException *)e; 410 // for development, can add "(decision="+eee.decisionNumber+")" 411 msg =[NSString stringWithFormat: @"required (...)+ loop did not match anything at input ", [self getTokenErrorDisplay:e.token]]; 412 } 413 else if ( [e isKindOfClass:[MismatchedSetException class]] ) { 414 MismatchedSetException *mse = (MismatchedSetException *)e; 415 msg = [NSString stringWithFormat:@"mismatched input %@ expecting set %@", 416 [self getTokenErrorDisplay:(e.token)], 417 mse.expecting]; 418 } 419#pragma warning NotSet not yet implemented. 420 else if ( [e isKindOfClass:[MismatchedNotSetException class] ] ) { 421 MismatchedNotSetException *mse = (MismatchedNotSetException *)e; 422 msg = [NSString stringWithFormat:@"mismatched input %@ expecting set %@", 423 [self getTokenErrorDisplay:(e.token)], 424 mse.expecting]; 425 } 426 else if ( [e isKindOfClass:[FailedPredicateException class]] ) { 427 FailedPredicateException *fpe = (FailedPredicateException *)e; 428 msg = [NSString stringWithFormat:@"rule %@ failed predicate: { %@ }?", fpe.ruleName, fpe.predicate]; 429 } 430 else { 431 msg = [NSString stringWithFormat:@"Exception= %@\n", e.name]; 432 } 433 return msg; 434} 435 436/** Get number of recognition errors (lexer, parser, tree parser). Each 437 * recognizer tracks its own number. So parser and lexer each have 438 * separate count. Does not count the spurious errors found between 439 * an error and next valid token match 440 * 441 * See also reportError() 442 */ 443- (NSInteger) getNumberOfSyntaxErrors 444{ 445 return state.syntaxErrors; 446} 447 448/** What is the error header, normally line/character position information? */ 449- (NSString *)getErrorHeader:(RecognitionException *)e 450{ 451 return [NSString stringWithFormat:@"line %d:%d", e.line, e.charPositionInLine]; 452} 453 454/** How should a token be displayed in an error message? The default 455 * is to display just the text, but during development you might 456 * want to have a lot of information spit out. Override in that case 457 * to use t.toString() (which, for CommonToken, dumps everything about 458 * the token). This is better than forcing you to override a method in 459 * your token objects because you don't have to go modify your lexer 460 * so that it creates a new Java type. 461 */ 462- (NSString *)getTokenErrorDisplay:(id<Token>)t 463{ 464 NSString *s = t.text; 465 if ( s == nil ) { 466 if ( t.type == TokenTypeEOF ) { 467 s = @"<EOF>"; 468 } 469 else { 470 s = [NSString stringWithFormat:@"<%@>", t.type]; 471 } 472 } 473 s = [s stringByReplacingOccurrencesOfString:@"\n" withString:@"\\\\n"]; 474 s = [s stringByReplacingOccurrencesOfString:@"\r" withString:@"\\\\r"]; 475 s = [s stringByReplacingOccurrencesOfString:@"\t" withString:@"\\\\t"]; 476 return [NSString stringWithFormat:@"\'%@\'", s]; 477} 478 479/** Override this method to change where error messages go */ 480- (void) emitErrorMessage:(NSString *) msg 481{ 482// System.err.println(msg); 483 NSLog(@"%@", msg); 484} 485 486/** Recover from an error found on the input stream. This is 487 * for NoViableAlt and mismatched symbol exceptions. If you enable 488 * single token insertion and deletion, this will usually not 489 * handle mismatched symbol exceptions but there could be a mismatched 490 * token that the match() routine could not recover from. 491 */ 492- (void)recover:(id<IntStream>)anInput Exception:(RecognitionException *)re 493{ 494 if ( state.lastErrorIndex == anInput.index ) { 495 // uh oh, another error at same token index; must be a case 496 // where LT(1) is in the recovery token set so nothing is 497 // consumed; consume a single token so at least to prevent 498 // an infinite loop; this is a failsafe. 499 [anInput consume]; 500 } 501 state.lastErrorIndex = anInput.index; 502 ANTLRBitSet *followSet = [self computeErrorRecoverySet]; 503 [self beginResync]; 504 [self consumeUntilFollow:anInput Follow:followSet]; 505 [self endResync]; 506} 507 508- (void) beginResync 509{ 510 511} 512 513- (void) endResync 514{ 515 516} 517 518/* Compute the error recovery set for the current rule. During 519 * rule invocation, the parser pushes the set of tokens that can 520 * follow that rule reference on the stack; this amounts to 521 * computing FIRST of what follows the rule reference in the 522 * enclosing rule. This local follow set only includes tokens 523 * from within the rule; i.e., the FIRST computation done by 524 * ANTLR stops at the end of a rule. 525 * 526 * EXAMPLE 527 * 528 * When you find a "no viable alt exception", the input is not 529 * consistent with any of the alternatives for rule r. The best 530 * thing to do is to consume tokens until you see something that 531 * can legally follow a call to r *or* any rule that called r. 532 * You don't want the exact set of viable next tokens because the 533 * input might just be missing a token--you might consume the 534 * rest of the input looking for one of the missing tokens. 535 * 536 * Consider grammar: 537 * 538 * a : '[' b ']' 539 * | '(' b ')' 540 * ; 541 * b : c '^' INT ; 542 * c : ID 543 * | INT 544 * ; 545 * 546 * At each rule invocation, the set of tokens that could follow 547 * that rule is pushed on a stack. Here are the various "local" 548 * follow sets: 549 * 550 * FOLLOW(b1_in_a) = FIRST(']') = ']' 551 * FOLLOW(b2_in_a) = FIRST(')') = ')' 552 * FOLLOW(c_in_b) = FIRST('^') = '^' 553 * 554 * Upon erroneous input "[]", the call chain is 555 * 556 * a -> b -> c 557 * 558 * and, hence, the follow context stack is: 559 * 560 * depth local follow set after call to rule 561 * 0 <EOF> a (from main()) 562 * 1 ']' b 563 * 3 '^' c 564 * 565 * Notice that ')' is not included, because b would have to have 566 * been called from a different context in rule a for ')' to be 567 * included. 568 * 569 * For error recovery, we cannot consider FOLLOW(c) 570 * (context-sensitive or otherwise). We need the combined set of 571 * all context-sensitive FOLLOW sets--the set of all tokens that 572 * could follow any reference in the call chain. We need to 573 * resync to one of those tokens. Note that FOLLOW(c)='^' and if 574 * we resync'd to that token, we'd consume until EOF. We need to 575 * sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}. 576 * In this case, for input "[]", LA(1) is in this set so we would 577 * not consume anything and after printing an error rule c would 578 * return normally. It would not find the required '^' though. 579 * At this point, it gets a mismatched token error and throws an 580 * exception (since LA(1) is not in the viable following token 581 * set). The rule exception handler tries to recover, but finds 582 * the same recovery set and doesn't consume anything. Rule b 583 * exits normally returning to rule a. Now it finds the ']' (and 584 * with the successful match exits errorRecovery mode). 585 * 586 * So, you cna see that the parser walks up call chain looking 587 * for the token that was a member of the recovery set. 588 * 589 * Errors are not generated in errorRecovery mode. 590 * 591 * ANTLR's error recovery mechanism is based upon original ideas: 592 * 593 * "Algorithms + Data Structures = Programs" by Niklaus Wirth 594 * 595 * and 596 * 597 * "A note on error recovery in recursive descent parsers": 598 * http://portal.acm.org/citation.cfm?id=947902.947905 599 * 600 * Later, Josef Grosch had some good ideas: 601 * 602 * "Efficient and Comfortable Error Recovery in Recursive Descent 603 * Parsers": 604 * ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip 605 * 606 * Like Grosch I implemented local FOLLOW sets that are combined 607 * at run-time upon error to avoid overhead during parsing. 608 */ 609- (ANTLRBitSet *) computeErrorRecoverySet 610{ 611 return [self combineFollows:NO]; 612} 613 614/** Compute the context-sensitive FOLLOW set for current rule. 615 * This is set of token types that can follow a specific rule 616 * reference given a specific call chain. You get the set of 617 * viable tokens that can possibly come next (lookahead depth 1) 618 * given the current call chain. Contrast this with the 619 * definition of plain FOLLOW for rule r: 620 * 621 * FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)} 622 * 623 * where x in T* and alpha, beta in V*; T is set of terminals and 624 * V is the set of terminals and nonterminals. In other words, 625 * FOLLOW(r) is the set of all tokens that can possibly follow 626 * references to r in *any* sentential form (context). At 627 * runtime, however, we know precisely which context applies as 628 * we have the call chain. We may compute the exact (rather 629 * than covering superset) set of following tokens. 630 * 631 * For example, consider grammar: 632 * 633 * stat : ID '=' expr ';' // FOLLOW(stat)=={EOF} 634 * | "return" expr '.' 635 * ; 636 * expr : atom ('+' atom)* ; // FOLLOW(expr)=={';','.',')'} 637 * atom : INT // FOLLOW(atom)=={'+',')',';','.'} 638 * | '(' expr ')' 639 * ; 640 * 641 * The FOLLOW sets are all inclusive whereas context-sensitive 642 * FOLLOW sets are precisely what could follow a rule reference. 643 * For input input "i=(3);", here is the derivation: 644 * 645 * stat => ID '=' expr ';' 646 * => ID '=' atom ('+' atom)* ';' 647 * => ID '=' '(' expr ')' ('+' atom)* ';' 648 * => ID '=' '(' atom ')' ('+' atom)* ';' 649 * => ID '=' '(' INT ')' ('+' atom)* ';' 650 * => ID '=' '(' INT ')' ';' 651 * 652 * At the "3" token, you'd have a call chain of 653 * 654 * stat -> expr -> atom -> expr -> atom 655 * 656 * What can follow that specific nested ref to atom? Exactly ')' 657 * as you can see by looking at the derivation of this specific 658 * input. Contrast this with the FOLLOW(atom)={'+',')',';','.'}. 659 * 660 * You want the exact viable token set when recovering from a 661 * token mismatch. Upon token mismatch, if LA(1) is member of 662 * the viable next token set, then you know there is most likely 663 * a missing token in the input stream. "Insert" one by just not 664 * throwing an exception. 665 */ 666- (ANTLRBitSet *)computeContextSensitiveRuleFOLLOW 667{ 668 return [self combineFollows:YES]; 669} 670 671// what is exact? it seems to only add sets from above on stack 672// if EOR is in set i. When it sees a set w/o EOR, it stops adding. 673// Why would we ever want them all? Maybe no viable alt instead of 674// mismatched token? 675- (ANTLRBitSet *)combineFollows:(BOOL) exact 676{ 677 NSInteger top = state._fsp; 678 ANTLRBitSet *followSet = [[ANTLRBitSet newBitSet] retain]; 679 for (int i = top; i >= 0; i--) { 680 ANTLRBitSet *localFollowSet = (ANTLRBitSet *)[state.following objectAtIndex:i]; 681 /* 682 System.out.println("local follow depth "+i+"="+ 683 localFollowSet.toString(getTokenNames())+")"); 684 */ 685 [followSet orInPlace:localFollowSet]; 686 if ( exact ) { 687 // can we see end of rule? 688 if ( [localFollowSet member:TokenTypeEOR] ) { 689 // Only leave EOR in set if at top (start rule); this lets 690 // us know if have to include follow(start rule); i.e., EOF 691 if ( i > 0 ) { 692 [followSet remove:TokenTypeEOR]; 693 } 694 } 695 else { // can't see end of rule, quit 696 break; 697 } 698 } 699 } 700 return followSet; 701} 702 703/** Attempt to recover from a single missing or extra token. 704 * 705 * EXTRA TOKEN 706 * 707 * LA(1) is not what we are looking for. If LA(2) has the right token, 708 * however, then assume LA(1) is some extra spurious token. Delete it 709 * and LA(2) as if we were doing a normal match(), which advances the 710 * input. 711 * 712 * MISSING TOKEN 713 * 714 * If current token is consistent with what could come after 715 * ttype then it is ok to "insert" the missing token, else throw 716 * exception For example, Input "i=(3;" is clearly missing the 717 * ')'. When the parser returns from the nested call to expr, it 718 * will have call chain: 719 * 720 * stat -> expr -> atom 721 * 722 * and it will be trying to match the ')' at this point in the 723 * derivation: 724 * 725 * => ID '=' '(' INT ')' ('+' atom)* ';' 726 * ^ 727 * match() will see that ';' doesn't match ')' and report a 728 * mismatched token error. To recover, it sees that LA(1)==';' 729 * is in the set of tokens that can follow the ')' token 730 * reference in rule atom. It can assume that you forgot the ')'. 731 */ 732- (id<Token>)recoverFromMismatchedToken:(id<IntStream>)anInput 733 TokenType:(NSInteger)ttype 734 Follow:(ANTLRBitSet *)follow 735{ 736 RecognitionException *e = nil; 737 // if next token is what we are looking for then "delete" this token 738 if ( [self mismatchIsUnwantedToken:anInput TokenType:ttype] ) { 739 e = [UnwantedTokenException newException:ttype Stream:anInput]; 740 /* 741 System.err.println("recoverFromMismatchedToken deleting "+ 742 ((TokenStream)input).LT(1)+ 743 " since "+((TokenStream)input).LT(2)+" is what we want"); 744 */ 745 [self beginResync]; 746 [anInput consume]; // simply delete extra token 747 [self endResync]; 748 [self reportError:e]; // report after consuming so AW sees the token in the exception 749 // we want to return the token we're actually matching 750 id matchedSymbol = [self getCurrentInputSymbol:anInput]; 751 [anInput consume]; // move past ttype token as if all were ok 752 return matchedSymbol; 753 } 754 // can't recover with single token deletion, try insertion 755 if ( [self mismatchIsMissingToken:anInput Follow:follow] ) { 756 id<Token> inserted = [self getMissingSymbol:anInput Exception:e TokenType:ttype Follow:follow]; 757 e = [MissingTokenException newException:ttype Stream:anInput With:inserted]; 758 [self reportError:e]; // report after inserting so AW sees the token in the exception 759 return inserted; 760 } 761 // even that didn't work; must throw the exception 762 e = [MismatchedTokenException newException:ttype Stream:anInput]; 763 @throw e; 764} 765 766/** Not currently used */ 767-(id) recoverFromMismatchedSet:(id<IntStream>)anInput 768 Exception:(RecognitionException *)e 769 Follow:(ANTLRBitSet *) follow 770{ 771 if ( [self mismatchIsMissingToken:anInput Follow:follow] ) { 772 // System.out.println("missing token"); 773 [self reportError:e]; 774 // we don't know how to conjure up a token for sets yet 775 return [self getMissingSymbol:anInput Exception:e TokenType:TokenTypeInvalid Follow:follow]; 776 } 777 // TODO do single token deletion like above for Token mismatch 778 @throw e; 779} 780 781/** Match needs to return the current input symbol, which gets put 782 * into the label for the associated token ref; e.g., x=ID. Token 783 * and tree parsers need to return different objects. Rather than test 784 * for input stream type or change the IntStream interface, I use 785 * a simple method to ask the recognizer to tell me what the current 786 * input symbol is. 787 * 788 * This is ignored for lexers. 789 */ 790- (id) getCurrentInputSymbol:(id<IntStream>)anInput 791{ 792 return nil; 793} 794 795/** Conjure up a missing token during error recovery. 796 * 797 * The recognizer attempts to recover from single missing 798 * symbols. But, actions might refer to that missing symbol. 799 * For example, x=ID {f($x);}. The action clearly assumes 800 * that there has been an identifier matched previously and that 801 * $x points at that token. If that token is missing, but 802 * the next token in the stream is what we want we assume that 803 * this token is missing and we keep going. Because we 804 * have to return some token to replace the missing token, 805 * we have to conjure one up. This method gives the user control 806 * over the tokens returned for missing tokens. Mostly, 807 * you will want to create something special for identifier 808 * tokens. For literals such as '{' and ',', the default 809 * action in the parser or tree parser works. It simply creates 810 * a CommonToken of the appropriate type. The text will be the token. 811 * If you change what tokens must be created by the lexer, 812 * override this method to create the appropriate tokens. 813 */ 814- (id)getMissingSymbol:(id<IntStream>)anInput 815 Exception:(RecognitionException *)e 816 TokenType:(NSInteger)expectedTokenType 817 Follow:(ANTLRBitSet *)follow 818{ 819 return nil; 820} 821 822 823-(void) consumeUntilTType:(id<IntStream>)anInput TokenType:(NSInteger)tokenType 824{ 825 //System.out.println("consumeUntil "+tokenType); 826 int ttype = [anInput LA:1]; 827 while (ttype != TokenTypeEOF && ttype != tokenType) { 828 [anInput consume]; 829 ttype = [anInput LA:1]; 830 } 831} 832 833/** Consume tokens until one matches the given token set */ 834-(void) consumeUntilFollow:(id<IntStream>)anInput Follow:(ANTLRBitSet *)set 835{ 836 //System.out.println("consumeUntil("+set.toString(getTokenNames())+")"); 837 int ttype = [anInput LA:1]; 838 while (ttype != TokenTypeEOF && ![set member:ttype] ) { 839 //System.out.println("consume during recover LA(1)="+getTokenNames()[input.LA(1)]); 840 [anInput consume]; 841 ttype = [anInput LA:1]; 842 } 843} 844 845/** Push a rule's follow set using our own hardcoded stack */ 846- (void)pushFollow:(ANTLRBitSet *)fset 847{ 848 if ( (state._fsp +1) >= [state.following count] ) { 849 // AMutableArray *f = [AMutableArray arrayWithCapacity:[[state.following] count]*2]; 850 // System.arraycopy(state.following, 0, f, 0, state.following.length); 851 // state.following = f; 852 [state.following addObject:fset]; 853 [fset retain]; 854 state._fsp++; 855 } 856 else { 857 [state.following replaceObjectAtIndex:++state._fsp withObject:fset]; 858 } 859} 860 861- (ANTLRBitSet *)popFollow 862{ 863 ANTLRBitSet *fset; 864 865 if ( state._fsp >= 0 && [state.following count] > 0 ) { 866 fset = [state.following objectAtIndex:state._fsp--]; 867 [state.following removeLastObject]; 868 return fset; 869 } 870 else { 871 NSLog( @"Attempted to pop a follow when none exists on the stack\n" ); 872 } 873 return nil; 874} 875 876/** Return List<String> of the rules in your parser instance 877 * leading up to a call to this method. You could override if 878 * you want more details such as the file/line info of where 879 * in the parser java code a rule is invoked. 880 * 881 * This is very useful for error messages and for context-sensitive 882 * error recovery. 883 */ 884- (AMutableArray *)getRuleInvocationStack 885{ 886 NSString *parserClassName = [[self className] retain]; 887 return [self getRuleInvocationStack:[RecognitionException newException] Recognizer:parserClassName]; 888} 889 890/** A more general version of getRuleInvocationStack where you can 891 * pass in, for example, a RecognitionException to get it's rule 892 * stack trace. This routine is shared with all recognizers, hence, 893 * static. 894 * 895 * TODO: move to a utility class or something; weird having lexer call this 896 */ 897- (AMutableArray *)getRuleInvocationStack:(RecognitionException *)e 898 Recognizer:(NSString *)recognizerClassName 899{ 900 // char *name; 901 AMutableArray *rules = [[AMutableArray arrayWithCapacity:20] retain]; 902 NSArray *stack = [e callStackSymbols]; 903 int i = 0; 904 for (i = [stack count]-1; i >= 0; i--) { 905 NSString *t = [stack objectAtIndex:i]; 906 // NSLog(@"stack %d = %@\n", i, t); 907 if ( [t commonPrefixWithString:@"org.antlr.runtime." options:NSLiteralSearch] ) { 908 // id aClass = objc_getClass( [t UTF8String] ); 909 continue; // skip support code such as this method 910 } 911 if ( [t isEqualTo:NEXT_TOKEN_RULE_NAME] ) { 912 // name = sel_getName(method_getName(method)); 913 // NSString *aMethod = [NSString stringWithFormat:@"%s", name]; 914 continue; 915 } 916 if ( ![t isEqualTo:recognizerClassName] ) { 917 // name = class_getName( [t UTF8String] ); 918 continue; // must not be part of this parser 919 } 920 [rules addObject:t]; 921 } 922#ifdef DONTUSEYET 923 StackTraceElement[] stack = e.getStackTrace(); 924 int i = 0; 925 for (i=stack.length-1; i>=0; i--) { 926 StackTraceElement t = stack[i]; 927 if ( [t getClassName().startsWith("org.antlr.runtime.") ) { 928 continue; // skip support code such as this method 929 } 930 if ( [[t getMethodName] equals:NEXT_TOKEN_RULE_NAME] ) { 931 continue; 932 } 933 if ( ![[t getClassName] equals:recognizerClassName] ) { 934 continue; // must not be part of this parser 935 } 936 [rules addObject:[t getMethodName]]; 937 } 938#endif 939 [stack release]; 940 return rules; 941} 942 943- (NSInteger) getBacktrackingLevel 944{ 945 return [state getBacktracking]; 946} 947 948- (void) setBacktrackingLevel:(NSInteger)level 949{ 950 [state setBacktracking:level]; 951} 952 953 /** Used to print out token names like ID during debugging and 954 * error reporting. The generated parsers implement a method 955 * that overrides this to point to their String[] tokenNames. 956 */ 957- (NSArray *)getTokenNames 958{ 959 return tokenNames; 960} 961 962/** For debugging and other purposes, might want the grammar name. 963 * Have ANTLR generate an implementation for this method. 964 */ 965- (NSString *)getGrammarFileName 966{ 967 return grammarFileName; 968} 969 970- (NSString *)getSourceName 971{ 972 return nil; 973} 974 975/** A convenience method for use most often with template rewrites. 976 * Convert a List<Token> to List<String> 977 */ 978- (AMutableArray *)toStrings:(AMutableArray *)tokens 979{ 980 if ( tokens == nil ) 981 return nil; 982 AMutableArray *strings = [AMutableArray arrayWithCapacity:[tokens count]]; 983 id object; 984 NSInteger i = 0; 985 for (object in tokens) { 986 [strings addObject:[object text]]; 987 i++; 988 } 989 return strings; 990} 991 992/** Given a rule number and a start token index number, return 993 * ANTLR_MEMO_RULE_UNKNOWN if the rule has not parsed input starting from 994 * start index. If this rule has parsed input starting from the 995 * start index before, then return where the rule stopped parsing. 996 * It returns the index of the last token matched by the rule. 997 * 998 * For now we use a hashtable and just the slow Object-based one. 999 * Later, we can make a special one for ints and also one that 1000 * tosses out data after we commit past input position i. 1001 */ 1002- (NSInteger)getRuleMemoization:(NSInteger)ruleIndex StartIndex:(NSInteger)ruleStartIndex 1003{ 1004 ACNumber *stopIndexI; 1005 HashRule *aHashRule; 1006 if ( (aHashRule = [state.ruleMemo objectAtIndex:ruleIndex]) == nil ) { 1007 aHashRule = [HashRule newHashRuleWithLen:17]; 1008 [state.ruleMemo insertObject:aHashRule atIndex:ruleIndex]; 1009 } 1010 stopIndexI = [aHashRule getRuleMemoStopIndex:ruleStartIndex]; 1011 if ( stopIndexI == nil ) { 1012 return ANTLR_MEMO_RULE_UNKNOWN; 1013 } 1014 return [stopIndexI integerValue]; 1015} 1016 1017/** Has this rule already parsed input at the current index in the 1018 * input stream? Return the stop token index or MEMO_RULE_UNKNOWN. 1019 * If we attempted but failed to parse properly before, return 1020 * MEMO_RULE_FAILED. 1021 * 1022 * This method has a side-effect: if we have seen this input for 1023 * this rule and successfully parsed before, then seek ahead to 1024 * 1 past the stop token matched for this rule last time. 1025 */ 1026- (BOOL)alreadyParsedRule:(id<IntStream>)anInput RuleIndex:(NSInteger)ruleIndex 1027{ 1028 NSInteger aStopIndex = [self getRuleMemoization:ruleIndex StartIndex:anInput.index]; 1029 if ( aStopIndex == ANTLR_MEMO_RULE_UNKNOWN ) { 1030 // NSLog(@"rule %d not yet encountered\n", ruleIndex); 1031 return NO; 1032 } 1033 if ( aStopIndex == ANTLR_MEMO_RULE_FAILED ) { 1034 if (debug) NSLog(@"rule %d will never succeed\n", ruleIndex); 1035 state.failed = YES; 1036 } 1037 else { 1038 if (debug) NSLog(@"seen rule %d before; skipping ahead to %d failed = %@\n", ruleIndex, aStopIndex+1, state.failed?@"YES":@"NO"); 1039 [anInput seek:(aStopIndex+1)]; // jump to one past stop token 1040 } 1041 return YES; 1042} 1043 1044/** Record whether or not this rule parsed the input at this position 1045 * successfully. Use a standard java hashtable for now. 1046 */ 1047- (void)memoize:(id<IntStream>)anInput 1048 RuleIndex:(NSInteger)ruleIndex 1049 StartIndex:(NSInteger)ruleStartIndex 1050{ 1051 RuleStack *aRuleStack; 1052 NSInteger stopTokenIndex; 1053 1054 aRuleStack = state.ruleMemo; 1055 stopTokenIndex = (state.failed ? ANTLR_MEMO_RULE_FAILED : (anInput.index-1)); 1056 if ( aRuleStack == nil ) { 1057 if (debug) NSLog(@"!!!!!!!!! memo array is nil for %@", [self getGrammarFileName]); 1058 return; 1059 } 1060 if ( ruleIndex >= [aRuleStack length] ) { 1061 if (debug) NSLog(@"!!!!!!!!! memo size is %d, but rule index is %d", [state.ruleMemo length], ruleIndex); 1062 return; 1063 } 1064 if ( [aRuleStack objectAtIndex:ruleIndex] != nil ) { 1065 [aRuleStack putHashRuleAtRuleIndex:ruleIndex StartIndex:ruleStartIndex StopIndex:stopTokenIndex]; 1066 } 1067 return; 1068} 1069 1070/** return how many rule/input-index pairs there are in total. 1071 * TODO: this includes synpreds. :( 1072 */ 1073- (NSInteger)getRuleMemoizationCacheSize 1074{ 1075 RuleStack *aRuleStack; 1076 HashRule *aHashRule; 1077 1078 int aCnt = 0; 1079 aRuleStack = state.ruleMemo; 1080 for (NSUInteger i = 0; aRuleStack != nil && i < [aRuleStack length]; i++) { 1081 aHashRule = [aRuleStack objectAtIndex:i]; 1082 if ( aHashRule != nil ) { 1083 aCnt += [aHashRule count]; // how many input indexes are recorded? 1084 } 1085 } 1086 return aCnt; 1087} 1088 1089#pragma warning Have to fix traceIn and traceOut. 1090- (void)traceIn:(NSString *)ruleName Index:(NSInteger)ruleIndex Object:(id)inputSymbol 1091{ 1092 NSLog(@"enter %@ %@", ruleName, inputSymbol); 1093 if ( state.backtracking > 0 ) { 1094 NSLog(@" backtracking=%s", ((state.backtracking==YES)?"YES":"NO")); 1095 } 1096 NSLog(@"\n"); 1097} 1098 1099- (void)traceOut:(NSString *)ruleName Index:(NSInteger)ruleIndex Object:(id)inputSymbol 1100{ 1101 NSLog(@"exit %@ -- %@", ruleName, inputSymbol); 1102 if ( state.backtracking > 0 ) { 1103 NSLog(@" backtracking=%s %s", state.backtracking?"YES":"NO", state.failed ? "failed":"succeeded"); 1104 } 1105 NSLog(@"\n"); 1106} 1107 1108 1109// call a syntactic predicate methods using its selector. this way we can support arbitrary synpreds. 1110- (BOOL) evaluateSyntacticPredicate:(SEL)synpredFragment // stream:(id<IntStream>)input 1111{ 1112 id<IntStream> input; 1113 1114 state.backtracking++; 1115 // input = state.token.input; 1116 input = self.input; 1117 int start = [input mark]; 1118 @try { 1119 [self performSelector:synpredFragment]; 1120 } 1121 @catch (RecognitionException *re) { 1122 NSLog(@"impossible synpred: %@", re.name); 1123 } 1124 BOOL success = (state.failed == NO); 1125 [input rewind:start]; 1126 state.backtracking--; 1127 state.failed = NO; 1128 return success; 1129} 1130 1131@end 1132 1133