1/** A generic recognizer that can handle recognizers generated from 2 * lexer, parser, and tree grammars. This is all the parsing 3 * support code essentially; most of it is error recovery stuff and 4 * backtracking. 5 * 6 * <p>This class should not be instantiated directly. Instead, use one of its 7 * subclasses.</p> 8 * 9 * @class 10 * @param {org.antlr.runtime.RecognizerSharedState} [state] state object with 11 * which to initialize this recognizer. 12 */ 13org.antlr.runtime.BaseRecognizer = function(state) { 14 /** State of a lexer, parser, or tree parser are collected into a state 15 * object so the state can be shared. This sharing is needed to 16 * have one grammar import others and share same error variables 17 * and other state variables. It's a kind of explicit multiple 18 * inheritance via delegation of methods and shared state. 19 * @type org.antlr.runtime.RecognizerSharedState 20 */ 21 this.state = state || new org.antlr.runtime.RecognizerSharedState(); 22}; 23 24org.antlr.lang.augmentObject(org.antlr.runtime.BaseRecognizer, { 25 /** 26 * @memberOf org.antlr.runtime.BaseRecognizer 27 * @type Number 28 */ 29 MEMO_RULE_FAILED: -2, 30 31 /** 32 * @memberOf org.antlr.runtime.BaseRecognizer 33 * @type Number 34 */ 35 MEMO_RULE_UNKNOWN: -1, 36 37 /** 38 * @memberOf org.antlr.runtime.BaseRecognizer 39 * @type Number 40 */ 41 INITIAL_FOLLOW_STACK_SIZE: 100, 42 43 /** 44 * @memberOf org.antlr.runtime.BaseRecognizer 45 * @type Number 46 */ 47 MEMO_RULE_FAILED_I: -2, 48 49 /** 50 * @memberOf org.antlr.runtime.BaseRecognizer 51 * @type Number 52 */ 53 DEFAULT_TOKEN_CHANNEL: org.antlr.runtime.Token.DEFAULT_CHANNEL, 54 55 /** 56 * @memberOf org.antlr.runtime.BaseRecognizer 57 * @type Number 58 */ 59 HIDDEN: org.antlr.runtime.Token.HIDDEN_CHANNEL, 60 61 /** 62 * @memberOf org.antlr.runtime.BaseRecognizer 63 * @type String 64 */ 65 NEXT_TOKEN_RULE_NAME: "nextToken" 66}); 67 68org.antlr.runtime.BaseRecognizer.prototype = { 69 /** Reset the parser's state. Subclasses must rewinds the input stream */ 70 reset: function() { 71 var i, len; 72 73 // wack everything related to error recovery 74 if (!this.state) { 75 return; // no shared state work to do 76 } 77 this.state._fsp = -1; 78 this.state.errorRecovery = false; 79 this.state.lastErrorIndex = -1; 80 this.state.failed = false; 81 this.state.syntaxErrors = 0; 82 // wack everything related to backtracking and memoization 83 this.state.backtracking = 0; 84 // wipe cache 85 if (this.state.ruleMemo) { 86 for (i=0, len=this.state.ruleMemo.length; i<len; i++) { 87 this.state.ruleMemo[i] = null; 88 } 89 } 90 }, 91 92 /** Match current input symbol against ttype. Attempt 93 * single token insertion or deletion error recovery. If 94 * that fails, throw {@link org.antlr.runtime.MismatchedTokenException}. 95 * 96 * <p>To turn off single token insertion or deletion error 97 * recovery, override {@link #mismatchRecover} and have it call 98 * plain {@link #mismatch}, which does not recover. Then any error 99 * in a rule will cause an exception and immediate exit from 100 * rule. Rule would recover by resynchronizing to the set of 101 * symbols that can follow rule ref.</p> 102 * 103 * @param {org.antlr.runtime.IntStream} input input stream to match against. 104 * @param {Number} ttype input type to match. 105 * @param {org.antlr.runtime.BitSet} [follow] set of tokens that can follow the 106 * matched token. 107 * @returns {Object} the matched symbol 108 */ 109 match: function(input, ttype, follow) { 110 var matchedSymbol = this.getCurrentInputSymbol(input); 111 if ( input.LA(1)===ttype ) { 112 input.consume(); 113 this.state.errorRecovery = false; 114 this.state.failed = false; 115 return matchedSymbol; 116 } 117 if ( this.state.backtracking>0 ) { 118 this.state.failed = true; 119 return matchedSymbol; 120 } 121 matchedSymbol = this.recoverFromMismatchedToken(input, ttype, follow); 122 return matchedSymbol; 123 }, 124 125 /** 126 * Match any token. 127 * @param {org.antlr.runtime.IntStream} input input stream to match against. 128 */ 129 matchAny: function(input) { 130 this.state.errorRecovery = false; 131 this.state.failed = false; 132 input.consume(); 133 }, 134 135 /** 136 * Is the following token (LA(2)) the unwanted type (ttype)? 137 * @param {org.antlr.runtime.IntStream} input input stream to match against. 138 * @param {Number} ttype the undesired token type. 139 * @returns {Boolean} true if and only if the following token is the 140 * unwanted type. 141 */ 142 mismatchIsUnwantedToken: function(input, ttype) { 143 return input.LA(2)===ttype; 144 }, 145 146 /** 147 * Does the stream appear to be missing a single token? 148 * @param {org.antlr.runtime.IntStream} input input stream to match against. 149 * @param {org.antlr.runtime.BitSet} [follow] set of tokens that can follow the 150 * matched token. 151 * @returns {Boolean} true if and only if it appears that the stream is 152 * missing a single token. 153 */ 154 mismatchIsMissingToken: function(input, follow) { 155 if ( !follow ) { 156 // we have no information about the follow; we can only consume 157 // a single token and hope for the best 158 return false; 159 } 160 // compute what can follow this grammar element reference 161 if ( follow.member(org.antlr.runtime.Token.EOR_TOKEN_TYPE) ) { 162 var viableTokensFollowingThisRule = this.computeContextSensitiveRuleFOLLOW(); 163 follow = follow.or(this.viableTokensFollowingThisRule); 164 if ( this.state._fsp>=0 ) { // remove EOR if we're not the start symbol 165 follow.remove(org.antlr.runtime.Token.EOR_TOKEN_TYPE); 166 } 167 } 168 // if current token is consistent with what could come after set 169 // then we know we're missing a token; error recovery is free to 170 // "insert" the missing token 171 172 // BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR 173 // in follow set to indicate that the fall of the start symbol is 174 // in the set (EOF can follow). 175 if ( follow.member(input.LA(1)) || 176 follow.member(org.antlr.runtime.Token.EOR_TOKEN_TYPE) ) 177 { 178 return true; 179 } 180 return false; 181 }, 182 183 /** Factor out what to do upon token mismatch so tree parsers can behave 184 * differently. Override and call {@link #mismatchRecover} 185 * to get single token insertion and deletion. 186 * 187 * @param {org.antlr.runtime.IntStream} input input stream to match against. 188 * @param {Number} ttype input type to match. 189 * @param {org.antlr.runtime.BitSet} [follow] set of tokens that can follow the 190 * matched token. 191 */ 192 mismatch: function(input, ttype, follow) { 193 if ( this.mismatchIsUnwantedToken(input, ttype) ) { 194 throw new org.antlr.runtime.UnwantedTokenException(ttype, input); 195 } else if ( this.mismatchIsMissingToken(input, follow) ) { 196 throw new org.antlr.runtime.MissingTokenException(ttype, input, null); 197 } 198 throw new org.antlr.runtime.MismatchedTokenException(ttype, input); 199 }, 200 201 /** Report a recognition problem. 202 * 203 * <p>This method sets errorRecovery to indicate the parser is recovering 204 * not parsing. Once in recovery mode, no errors are generated. 205 * To get out of recovery mode, the parser must successfully match 206 * a token (after a resync). So it will go:</p> 207 * <ol> 208 * <li>error occurs</li> 209 * <li>enter recovery mode, report error</li> 210 * <li>consume until token found in resynch set</li> 211 * <li>try to resume parsing</li> 212 * <li>next match() will reset errorRecovery mode</li> 213 * </ol> 214 * 215 * <p>If you override, make sure to update this.state.syntaxErrors if you 216 * care about that.</p> 217 * @param {org.antlr.runtime.RecognitionException} e the error to be reported. 218 */ 219 reportError: function(e) { 220 // if we've already reported an error and have not matched a token 221 // yet successfully, don't report any errors. 222 if ( this.state.errorRecovery ) { 223 return; 224 } 225 this.state.syntaxErrors++; 226 this.state.errorRecovery = true; 227 228 this.displayRecognitionError(this.getTokenNames(), e); 229 }, 230 231 /** 232 * Assemble recognition error message. 233 * @param {Array} tokenNames array of token names (strings). 234 * @param {org.antlr.runtime.RecognitionException} e the error to be reported. 235 */ 236 displayRecognitionError: function(tokenNames, e) { 237 var hdr = this.getErrorHeader(e), 238 msg = this.getErrorMessage(e, tokenNames); 239 this.emitErrorMessage(hdr+" "+msg); 240 }, 241 242 /** 243 * Create error header message. Format is <q>line 244 * lineNumber:positionInLine</q>. 245 * @param {org.antlr.runtime.RecognitionException} e the error to be reported. 246 * @returns {String} The error header. 247 */ 248 getErrorHeader: function(e) { 249 /* handle null input */ 250 if (!org.antlr.lang.isNumber(e.line)) { 251 e.line = 0; 252 } 253 return "line "+e.line+":"+e.charPositionInLine; 254 }, 255 256 /** 257 * Override this method to change where error messages go. 258 * Defaults to "alert"-ing the error in browsers and "print"-ing the error 259 * in other environments (e.g. Rhino, SpiderMonkey). 260 * @param {String} msg the error message to be displayed. 261 */ 262 emitErrorMessage: function(msg) { 263 if (typeof(window) != 'undefined' && window.alert) { 264 alert(msg); 265 } else { 266 print(msg); 267 } 268 }, 269 270 /** What error message should be generated for the various 271 * exception types? 272 * 273 * <p>Not very object-oriented code, but I like having all error message 274 * generation within one method rather than spread among all of the 275 * exception classes. This also makes it much easier for the exception 276 * handling because the exception classes do not have to have pointers back 277 * to this object to access utility routines and so on. Also, changing 278 * the message for an exception type would be difficult because you 279 * would have to be subclassing exceptions, but then somehow get ANTLR 280 * to make those kinds of exception objects instead of the default.</p> 281 * 282 * <p>For grammar debugging, you will want to override this to add 283 * more information such as the stack frame and no viable alts.</p> 284 * 285 * <p>Override this to change the message generated for one or more 286 * exception types.</p> 287 * 288 * @param {Array} tokenNames array of token names (strings). 289 * @param {org.antlr.runtime.RecognitionException} e the error to be reported. 290 * @returns {String} the error message to be emitted. 291 */ 292 getErrorMessage: function(e, tokenNames) { 293 var msg = (e && e.getMessage) ? e.getMessage() : null, 294 mte, 295 tokenName; 296 if ( e instanceof org.antlr.runtime.UnwantedTokenException ) { 297 var ute = e; 298 tokenName="<unknown>"; 299 if ( ute.expecting== org.antlr.runtime.Token.EOF ) { 300 tokenName = "EOF"; 301 } else { 302 tokenName = tokenNames[ute.expecting]; 303 } 304 msg = "extraneous input "+this.getTokenErrorDisplay(ute.getUnexpectedToken())+ 305 " expecting "+tokenName; 306 } 307 else if ( e instanceof org.antlr.runtime.MissingTokenException ) { 308 mte = e; 309 tokenName="<unknown>"; 310 if ( mte.expecting== org.antlr.runtime.Token.EOF ) { 311 tokenName = "EOF"; 312 } else { 313 tokenName = tokenNames[mte.expecting]; 314 } 315 msg = "missing "+tokenName+" at "+this.getTokenErrorDisplay(e.token); 316 } 317 else if ( e instanceof org.antlr.runtime.MismatchedTokenException ) { 318 mte = e; 319 tokenName="<unknown>"; 320 if ( mte.expecting== org.antlr.runtime.Token.EOF ) { 321 tokenName = "EOF"; 322 } 323 else { 324 tokenName = tokenNames[mte.expecting]; 325 } 326 msg = "mismatched input "+this.getTokenErrorDisplay(e.token)+ 327 " expecting "+tokenName; 328 } 329 else if ( e instanceof org.antlr.runtime.NoViableAltException ) { 330 msg = "no viable alternative at input "+this.getTokenErrorDisplay(e.token); 331 } 332 else if ( e instanceof org.antlr.runtime.EarlyExitException ) { 333 msg = "required (...)+ loop did not match anything at input "+ 334 this.getTokenErrorDisplay(e.token); 335 } 336 else if ( e instanceof org.antlr.runtime.MismatchedSetException ) { 337 msg = "mismatched input "+this.getTokenErrorDisplay(e.token)+ 338 " expecting set "+e.expecting; 339 } 340 else if ( e instanceof org.antlr.runtime.MismatchedNotSetException ) { 341 msg = "mismatched input "+this.getTokenErrorDisplay(e.token)+ 342 " expecting set "+e.expecting; 343 } 344 else if ( e instanceof org.antlr.runtime.FailedPredicateException ) { 345 msg = "rule "+e.ruleName+" failed predicate: {"+ 346 e.predicateText+"}?"; 347 } 348 return msg; 349 }, 350 351 /** <p>Get number of recognition errors (lexer, parser, tree parser). Each 352 * recognizer tracks its own number. So parser and lexer each have 353 * separate count. Does not count the spurious errors found between 354 * an error and next valid token match.</p> 355 * 356 * <p>See also {@link #reportError}() 357 * @returns {Number} number of syntax errors encountered 358 */ 359 getNumberOfSyntaxErrors: function() { 360 return this.state.syntaxErrors; 361 }, 362 363 /** How should a token be displayed in an error message? The default 364 * is to display just the text, but during development you might 365 * want to have a lot of information spit out. Override in that case 366 * to use t.toString() (which, for CommonToken, dumps everything about 367 * the token). 368 * @param {org.antlr.runtime.Token} t token that will be displayed in an error message 369 * @return {String} the string representation of the token 370 */ 371 getTokenErrorDisplay: function(t) { 372 var s = t.getText(); 373 if ( !org.antlr.lang.isValue(s) ) { 374 if ( t.getType()==org.antlr.runtime.Token.EOF ) { 375 s = "<EOF>"; 376 } 377 else { 378 s = "<"+t.getType()+">"; 379 } 380 } 381 s = s.replace(/\n/g,"\\n"); 382 s = s.replace(/\r/g,"\\r"); 383 s = s.replace(/\t/g,"\\t"); 384 return "'"+s+"'"; 385 }, 386 387 /** Recover from an error found on the input stream. This is 388 * for NoViableAlt and mismatched symbol exceptions. If you enable 389 * single token insertion and deletion, this will usually not 390 * handle mismatched symbol exceptions but there could be a mismatched 391 * token that the match() routine could not recover from. 392 * @param {org.antlr.runtime.IntStream} input the intput stream 393 * @param {org.antlr.runtime.RecogntionException} the error found on the input stream 394 */ 395 recover: function(input, re) { 396 if ( this.state.lastErrorIndex==input.index() ) { 397 // uh oh, another error at same token index; must be a case 398 // where LT(1) is in the recovery token set so nothing is 399 // consumed; consume a single token so at least to prevent 400 // an infinite loop; this is a failsafe. 401 input.consume(); 402 } 403 this.state.lastErrorIndex = input.index(); 404 var followSet = this.computeErrorRecoverySet(); 405 this.beginResync(); 406 this.consumeUntil(input, followSet); 407 this.endResync(); 408 }, 409 410 /** A hook to listen in on the token consumption during error recovery. 411 */ 412 beginResync: function() { 413 }, 414 415 /** A hook to listen in on the token consumption during error recovery. 416 */ 417 endResync: function() { 418 }, 419 420 /** Compute the error recovery set for the current rule. 421 * <p>During rule invocation, the parser pushes the set of tokens that can 422 * follow that rule reference on the stack; this amounts to 423 * computing FIRST of what follows the rule reference in the 424 * enclosing rule. This local follow set only includes tokens 425 * from within the rule; i.e., the FIRST computation done by 426 * ANTLR stops at the end of a rule.</p> 427 * 428 * <p>EXAMPLE</p> 429 * 430 * <p>When you find a "no viable alt exception", the input is not 431 * consistent with any of the alternatives for rule r. The best 432 * thing to do is to consume tokens until you see something that 433 * can legally follow a call to r *or* any rule that called r. 434 * You don't want the exact set of viable next tokens because the 435 * input might just be missing a token--you might consume the 436 * rest of the input looking for one of the missing tokens.</p> 437 * 438 * <p>Consider grammar:</p> 439 * <code><pre> 440 * a : '[' b ']' 441 * | '(' b ')' 442 * ; 443 * b : c '^' INT ; 444 * c : ID 445 * | INT 446 * ; 447 * </pre></code> 448 * 449 * <p>At each rule invocation, the set of tokens that could follow 450 * that rule is pushed on a stack. Here are the various "local" 451 * follow sets:</p> 452 * 453 * <code><pre> 454 * FOLLOW(b1_in_a) = FIRST(']') = ']' 455 * FOLLOW(b2_in_a) = FIRST(')') = ')' 456 * FOLLOW(c_in_b) = FIRST('^') = '^' 457 * </pre></code> 458 * 459 * <p>Upon erroneous input "[]", the call chain is</p> 460 * 461 * <code>a -> b -> c</code> 462 * 463 * <p>and, hence, the follow context stack is:</p> 464 * 465 * <code><pre> 466 * depth local follow set after call to rule 467 * 0 <EOF> a (from main()) 468 * 1 ']' b 469 * 3 '^' c 470 * </pre></code> 471 * 472 * <p>Notice that ')' is not included, because b would have to have 473 * been called from a different context in rule a for ')' to be 474 * included.</p> 475 * 476 * <p>For error recovery, we cannot consider FOLLOW(c) 477 * (context-sensitive or otherwise). We need the combined set of 478 * all context-sensitive FOLLOW sets--the set of all tokens that 479 * could follow any reference in the call chain. We need to 480 * resync to one of those tokens. Note that FOLLOW(c)='^' and if 481 * we resync'd to that token, we'd consume until EOF. We need to 482 * sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}. 483 * In this case, for input "[]", LA(1) is in this set so we would 484 * not consume anything and after printing an error rule c would 485 * return normally. It would not find the required '^' though. 486 * At this point, it gets a mismatched token error and throws an 487 * exception (since LA(1) is not in the viable following token 488 * set). The rule exception handler tries to recover, but finds 489 * the same recovery set and doesn't consume anything. Rule b 490 * exits normally returning to rule a. Now it finds the ']' (and 491 * with the successful match exits errorRecovery mode).</p> 492 * 493 * <p>So, you cna see that the parser walks up call chain looking 494 * for the token that was a member of the recovery set.</p> 495 * 496 * <p>Errors are not generated in errorRecovery mode.</p> 497 * 498 * <p>ANTLR's error recovery mechanism is based upon original ideas:</p> 499 * 500 * <p>"Algorithms + Data Structures = Programs" by Niklaus Wirth</p> 501 * 502 * <p>and</p> 503 * 504 * <p>"A note on error recovery in recursive descent parsers": 505 * http://portal.acm.org/citation.cfm?id=947902.947905</p> 506 * 507 * <p>Later, Josef Grosch had some good ideas:</p> 508 * 509 * <p>"Efficient and Comfortable Error Recovery in Recursive Descent 510 * Parsers": 511 * ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip</p> 512 * 513 * <p>Like Grosch I implemented local FOLLOW sets that are combined 514 * at run-time upon error to avoid overhead during parsing.</p> 515 * @returns {org.antlr.runtime.BitSet} 516 */ 517 computeErrorRecoverySet: function() { 518 return this.combineFollows(false); 519 }, 520 521 522 /** Compute the context-sensitive FOLLOW set for current rule. 523 * <p>This is set of token types that can follow a specific rule 524 * reference given a specific call chain. You get the set of 525 * viable tokens that can possibly come next (lookahead depth 1) 526 * given the current call chain. Contrast this with the 527 * definition of plain FOLLOW for rule r:</p> 528 * 529 * <code>FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)}</code> 530 * 531 * <p>where x in T* and alpha, beta in V*; T is set of terminals and 532 * V is the set of terminals and nonterminals. In other words, 533 * FOLLOW(r) is the set of all tokens that can possibly follow 534 * references to r in *any* sentential form (context). At 535 * runtime, however, we know precisely which context applies as 536 * we have the call chain. We may compute the exact (rather 537 * than covering superset) set of following tokens.</p> 538 * 539 * <p>For example, consider grammar:</p> 540 * 541 * <code><pre> 542 * stat : ID '=' expr ';' // FOLLOW(stat)=={EOF} 543 * | "return" expr '.' 544 * ; 545 * expr : atom ('+' atom)* ; // FOLLOW(expr)=={';','.',')'} 546 * atom : INT // FOLLOW(atom)=={'+',')',';','.'} 547 * | '(' expr ')' 548 * ; 549 * </pre></code> 550 * 551 * <p>The FOLLOW sets are all inclusive whereas context-sensitive 552 * FOLLOW sets are precisely what could follow a rule reference. 553 * For input input "i=(3);", here is the derivation:</p> 554 * 555 * <code><pre> 556 * stat => ID '=' expr ';' 557 * => ID '=' atom ('+' atom)* ';' 558 * => ID '=' '(' expr ')' ('+' atom)* ';' 559 * => ID '=' '(' atom ')' ('+' atom)* ';' 560 * => ID '=' '(' INT ')' ('+' atom)* ';' 561 * => ID '=' '(' INT ')' ';' 562 * </pre></code> 563 * 564 * <p>At the "3" token, you'd have a call chain of</p> 565 * 566 * <code> stat -> expr -> atom -> expr -> atom</code> 567 * 568 * <p>What can follow that specific nested ref to atom? Exactly ')' 569 * as you can see by looking at the derivation of this specific 570 * input. Contrast this with the FOLLOW(atom)={'+',')',';','.'}.</p> 571 * 572 * <p>You want the exact viable token set when recovering from a 573 * token mismatch. Upon token mismatch, if LA(1) is member of 574 * the viable next token set, then you know there is most likely 575 * a missing token in the input stream. "Insert" one by just not 576 * throwing an exception.</p> 577 * @returns {org.antlr.runtime.BitSet} 578 */ 579 computeContextSensitiveRuleFOLLOW: function() { 580 return this.combineFollows(true); 581 }, 582 583 /** 584 * Helper method for {@link #computeErrorRecoverySet} and 585 * {@link computeContextSensitiveRuleFOLLO}. 586 * @param {Boolean} exact 587 * @returns {org.antlr.runtime.BitSet} 588 */ 589 combineFollows: function(exact) { 590 var top = this.state._fsp, 591 i, 592 localFollowSet, 593 followSet = new org.antlr.runtime.BitSet(); 594 for (i=top; i>=0; i--) { 595 localFollowSet = this.state.following[i]; 596 followSet.orInPlace(localFollowSet); 597 if ( exact ) { 598 // can we see end of rule? 599 if ( localFollowSet.member(org.antlr.runtime.Token.EOR_TOKEN_TYPE) ) 600 { 601 // Only leave EOR in set if at top (start rule); this lets 602 // us know if have to include follow(start rule); i.e., EOF 603 if ( i>0 ) { 604 followSet.remove(org.antlr.runtime.Token.EOR_TOKEN_TYPE); 605 } 606 } 607 else { // can't see end of rule, quit 608 break; 609 } 610 } 611 } 612 return followSet; 613 }, 614 615 /** Attempt to recover from a single missing or extra token. 616 * 617 * <p>EXTRA TOKEN</p> 618 * 619 * <p>LA(1) is not what we are looking for. If LA(2) has the right token, 620 * however, then assume LA(1) is some extra spurious token. Delete it 621 * and LA(2) as if we were doing a normal match(), which advances the 622 * input.</p> 623 * 624 * <p>MISSING TOKEN</p> 625 * 626 * <p>If current token is consistent with what could come after 627 * ttype then it is ok to "insert" the missing token, else throw 628 * exception For example, Input "i=(3;" is clearly missing the 629 * ')'. When the parser returns from the nested call to expr, it 630 * will have call chain:</p> 631 * 632 * <pre><code> stat -> expr -> atom</code></pre> 633 * 634 * <p>and it will be trying to match the ')' at this point in the 635 * derivation:</p> 636 * 637 * <pre><code> => ID '=' '(' INT ')' ('+' atom)* ';'</code></pre> 638 * ^ 639 * <p>match() will see that ';' doesn't match ')' and report a 640 * mismatched token error. To recover, it sees that LA(1)==';' 641 * is in the set of tokens that can follow the ')' token 642 * reference in rule atom. It can assume that you forgot the ')'.</p> 643 * 644 * @param {org.antlr.runtime.IntStream} input 645 * @param {Number} ttype 646 * @param {org.antlr.runtime.BitSet} follow 647 * @returns {Object} 648 */ 649 recoverFromMismatchedToken: function(input, 650 ttype, 651 follow) 652 { 653 var e = null; 654 // if next token is what we are looking for then "delete" this token 655 if ( this.mismatchIsUnwantedToken(input, ttype) ) { 656 e = new org.antlr.runtime.UnwantedTokenException(ttype, input); 657 this.beginResync(); 658 input.consume(); // simply delete extra token 659 this.endResync(); 660 this.reportError(e); // report after consuming so AW sees the token in the exception 661 // we want to return the token we're actually matching 662 var matchedSymbol = this.getCurrentInputSymbol(input); 663 input.consume(); // move past ttype token as if all were ok 664 return matchedSymbol; 665 } 666 // can't recover with single token deletion, try insertion 667 if ( this.mismatchIsMissingToken(input, follow) ) { 668 var inserted = this.getMissingSymbol(input, e, ttype, follow); 669 e = new org.antlr.runtime.MissingTokenException(ttype, input, inserted); 670 this.reportError(e); // report after inserting so AW sees the token in the exception 671 return inserted; 672 } 673 // even that didn't work; must throw the exception 674 e = new org.antlr.runtime.MismatchedTokenException(ttype, input); 675 throw e; 676 }, 677 678 /** 679 * Recover from a mismatched set exception. 680 * @param {org.antlr.runtime.IntStream} input 681 * @param {org.antlr.runtime.RecognitionException} e 682 * @param {org.antlr.runtime.BitSet} follow 683 * @returns {Object} 684 */ 685 recoverFromMismatchedSet: function(input, 686 e, 687 follow) 688 { 689 if ( this.mismatchIsMissingToken(input, follow) ) { 690 // System.out.println("missing token"); 691 this.reportError(e); 692 // we don't know how to conjure up a token for sets yet 693 return this.getMissingSymbol(input, e, org.antlr.runtime.Token.INVALID_TOKEN_TYPE, follow); 694 } 695 throw e; 696 }, 697 698 /** Match needs to return the current input symbol, which gets put 699 * into the label for the associated token ref; e.g., x=ID. Token 700 * and tree parsers need to return different objects. Rather than test 701 * for input stream type or change the IntStream interface, I use 702 * a simple method to ask the recognizer to tell me what the current 703 * input symbol is. 704 * 705 * <p>This is ignored for lexers.</p> 706 * @param {org.antlr.runtime.IntStream} input 707 * @returns {Object} 708 */ 709 getCurrentInputSymbol: function(input) { return null; }, 710 711 /** Conjure up a missing token during error recovery. 712 * 713 * <p>The recognizer attempts to recover from single missing 714 * symbols. But, actions might refer to that missing symbol. 715 * For example, x=ID {f($x);}. The action clearly assumes 716 * that there has been an identifier matched previously and that 717 * $x points at that token. If that token is missing, but 718 * the next token in the stream is what we want we assume that 719 * this token is missing and we keep going. Because we 720 * have to return some token to replace the missing token, 721 * we have to conjure one up. This method gives the user control 722 * over the tokens returned for missing tokens. Mostly, 723 * you will want to create something special for identifier 724 * tokens. For literals such as '{' and ',', the default 725 * action in the parser or tree parser works. It simply creates 726 * a CommonToken of the appropriate type. The text will be the token. 727 * If you change what tokens must be created by the lexer, 728 * override this method to create the appropriate tokens.</p> 729 * 730 * @param {org.antlr.runtime.IntStream} input 731 * @param {org.antlr.runtime.RecognitionException} e 732 * @param {Number} expectedTokenType 733 * @param {org.antlr.runtime.BitSet} follow 734 * @returns {Object} 735 */ 736 getMissingSymbol: function(input, 737 e, 738 expectedTokenType, 739 follow) 740 { 741 return null; 742 }, 743 744 745 /** 746 * Consume tokens until one matches the given token or token set 747 * @param {org.antlr.runtime.IntStream} input 748 * @param {Number|org.antlr.runtime.BitSet} set 749 */ 750 consumeUntil: function(input, set) { 751 var ttype = input.LA(1); 752 while (ttype != org.antlr.runtime.Token.EOF && !set.member(ttype) ) { 753 input.consume(); 754 ttype = input.LA(1); 755 } 756 }, 757 758 /** 759 * Push a rule's follow set using our own hardcoded stack. 760 * @param {org.antlr.runtime.BitSet} fset 761 */ 762 pushFollow: function(fset) { 763 if ( (this.state._fsp +1)>=this.state.following.length ) { 764 var f = []; 765 var i; 766 for (i=this.state.following.length-1; i>=0; i--) { 767 f[i] = this.state.following[i]; 768 } 769 this.state.following = f; 770 } 771 this.state._fsp++; 772 this.state.following[this.state._fsp] = fset; 773 }, 774 775 /** 776 * Sadly JavaScript doesn't provide a robust mechanism for runtime stack reflection. 777 * This makes implementing this function impossible without maintaining an auxillary 778 * stack data structure, which would be crazy expensive, especially in Lexers. As such, 779 * this method remains unimplemented. 780 * @deprecated 781 */ 782 getRuleInvocationStack: function(e, recognizerClassName) 783 { 784 throw new Error("Not implemented."); 785 }, 786 787 /** 788 * Get this recognizer's backtracking level. 789 * @returns {Number} backtracking level 790 */ 791 getBacktrackingLevel: function() { 792 return this.state.backtracking; 793 }, 794 795 /** Used to print out token names like ID during debugging and 796 * error reporting. The generated parsers implement a method 797 * that overrides this to point to their String[] tokenNames. 798 * @returns {Array} String array of token names. 799 */ 800 getTokenNames: function() { 801 return null; 802 }, 803 804 /** For debugging and other purposes, might want the grammar name. 805 * Have ANTLR generate an implementation for this method. 806 * @returns {String} the grammar name. 807 */ 808 getGrammarFileName: function() { 809 return null; 810 }, 811 812 /** A convenience method for use most often with template rewrites. 813 * Convert an array of Tokens to an array of Strings. 814 * @param {Array} array of org.antlr.runtime.Token objects. 815 * @returns {Array} array of string representations of the argument. 816 */ 817 toStrings: function(tokens) { 818 if ( !tokens ) { 819 return null; 820 } 821 var strings = []; 822 var i; 823 for (i=0; i<tokens.length; i++) { 824 strings.push(tokens[i].getText()); 825 } 826 return strings; 827 }, 828 829 /** Given a rule number and a start token index number, return 830 * MEMO_RULE_UNKNOWN if the rule has not parsed input starting from 831 * start index. If this rule has parsed input starting from the 832 * start index before, then return where the rule stopped parsing. 833 * It returns the index of the last token matched by the rule. 834 * 835 * <p>For now we use a hashtable and just the slow Object-based one. 836 * Later, we can make a special one for ints and also one that 837 * tosses out data after we commit past input position i.</p> 838 * @param {Number} ruleIndex 839 * @param {Number} ruleStartIndex 840 * @returns {Number} 841 */ 842 getRuleMemoization: function(ruleIndex, ruleStartIndex) { 843 if ( !this.state.ruleMemo[ruleIndex] ) { 844 this.state.ruleMemo[ruleIndex] = {}; 845 } 846 var stopIndexI = 847 this.state.ruleMemo[ruleIndex][ruleStartIndex]; 848 if ( !org.antlr.lang.isNumber(stopIndexI) ) { 849 return org.antlr.runtime.BaseRecognizer.MEMO_RULE_UNKNOWN; 850 } 851 return stopIndexI; 852 }, 853 854 /** Has this rule already parsed input at the current index in the 855 * input stream? Return the stop token index or MEMO_RULE_UNKNOWN. 856 * If we attempted but failed to parse properly before, return 857 * MEMO_RULE_FAILED. 858 * 859 * <p>This method has a side-effect: if we have seen this input for 860 * this rule and successfully parsed before, then seek ahead to 861 * 1 past the stop token matched for this rule last time.</p> 862 * @param {org.antlr.runtime.IntStream} input 863 * @param {Number} ruleIndex 864 * @returns {Boolean} 865 */ 866 alreadyParsedRule: function(input, ruleIndex) { 867 var stopIndex = this.getRuleMemoization(ruleIndex, input.index()); 868 if ( stopIndex==org.antlr.runtime.BaseRecognizer.MEMO_RULE_UNKNOWN ) { 869 return false; 870 } 871 if ( stopIndex==org.antlr.runtime.BaseRecognizer.MEMO_RULE_FAILED ) { 872 //System.out.println("rule "+ruleIndex+" will never succeed"); 873 this.state.failed=true; 874 } 875 else { 876 input.seek(stopIndex+1); // jump to one past stop token 877 } 878 return true; 879 }, 880 881 /** Record whether or not this rule parsed the input at this position 882 * successfully. Use a standard java hashtable for now. 883 * @param {org.antlr.runtime.IntStream} input 884 * @param {Number} ruleIndex 885 * @param {Number} ruleStartIndex 886 */ 887 memoize: function(input, 888 ruleIndex, 889 ruleStartIndex) 890 { 891 var stopTokenIndex = this.state.failed ? 892 org.antlr.runtime.BaseRecognizer.MEMO_RULE_FAILED : input.index()-1; 893 if ( !org.antlr.lang.isValue(this.state.ruleMemo) ) { 894 throw new Error("!!!!!!!!! memo array is null for "+ this.getGrammarFileName()); 895 } 896 if ( ruleIndex >= this.state.ruleMemo.length ) { 897 throw new Error("!!!!!!!!! memo size is "+this.state.ruleMemo.length+", but rule index is "+ruleIndex); 898 } 899 if ( org.antlr.lang.isValue(this.state.ruleMemo[ruleIndex]) ) { 900 this.state.ruleMemo[ruleIndex][ruleStartIndex] = stopTokenIndex; 901 } 902 }, 903 904 /** return how many rule/input-index pairs there are in total. 905 * TODO: this includes synpreds. 906 * @returns {Number} 907 */ 908 getRuleMemoizationCacheSize: function() { 909 var n = 0, i; 910 for (i = 0; this.state.ruleMemo && i < this.state.ruleMemo.length; i++) { 911 var ruleMap = this.state.ruleMemo[i]; 912 if ( ruleMap ) { 913 // @todo need to get size of rulemap? 914 n += ruleMap.length; // how many input indexes are recorded? 915 } 916 } 917 return n; 918 }, 919 920 /** 921 * When a grammar is compiled with the tracing flag enabled, this method is invoked 922 * at the start of each rule. 923 * @param {String} ruleName the current ruleName 924 * @param {Number} ruleIndex 925 * @param {Object} inputSymbol 926 */ 927 traceIn: function(ruleName, ruleIndex, inputSymbol) { 928 this.emitErrorMessage("enter "+ruleName+" "+inputSymbol); 929 if ( this.state.failed ) { 930 this.emitErrorMessage(" failed="+this.failed); 931 } 932 if ( this.state.backtracking>0 ) { 933 this.emitErrorMessage(" backtracking="+this.state.backtracking); 934 } 935 // System.out.println(); 936 }, 937 938 /** 939 * When a grammar is compiled with the tracing flag enabled, this method is invoked 940 * at the end of each rule. 941 * @param {String} ruleName the current ruleName 942 * @param {Number} ruleIndex 943 * @param {Object} inputSymbol 944 */ 945 traceOut: function(ruleName, ruleIndex, inputSymbol) { 946 this.emitErrorMessage("exit "+ruleName+" "+inputSymbol); 947 if ( this.state.failed ) { 948 this.emitErrorMessage(" failed="+this.state.failed); 949 } 950 if ( this.state.backtracking>0 ) { 951 this.emitErrorMessage(" backtracking="+this.state.backtracking); 952 } 953 } 954}; 955