/** A generic recognizer that can handle recognizers generated from * lexer, parser, and tree grammars. This is all the parsing * support code essentially; most of it is error recovery stuff and * backtracking. * *

This class should not be instantiated directly. Instead, use one of its * subclasses.

* * @class * @param {org.antlr.runtime.RecognizerSharedState} [state] state object with * which to initialize this recognizer. */ org.antlr.runtime.BaseRecognizer = function(state) { /** State of a lexer, parser, or tree parser are collected into a state * object so the state can be shared. This sharing is needed to * have one grammar import others and share same error variables * and other state variables. It's a kind of explicit multiple * inheritance via delegation of methods and shared state. * @type org.antlr.runtime.RecognizerSharedState */ this.state = state || new org.antlr.runtime.RecognizerSharedState(); }; org.antlr.lang.augmentObject(org.antlr.runtime.BaseRecognizer, { /** * @memberOf org.antlr.runtime.BaseRecognizer * @type Number */ MEMO_RULE_FAILED: -2, /** * @memberOf org.antlr.runtime.BaseRecognizer * @type Number */ MEMO_RULE_UNKNOWN: -1, /** * @memberOf org.antlr.runtime.BaseRecognizer * @type Number */ INITIAL_FOLLOW_STACK_SIZE: 100, /** * @memberOf org.antlr.runtime.BaseRecognizer * @type Number */ MEMO_RULE_FAILED_I: -2, /** * @memberOf org.antlr.runtime.BaseRecognizer * @type Number */ DEFAULT_TOKEN_CHANNEL: org.antlr.runtime.Token.DEFAULT_CHANNEL, /** * @memberOf org.antlr.runtime.BaseRecognizer * @type Number */ HIDDEN: org.antlr.runtime.Token.HIDDEN_CHANNEL, /** * @memberOf org.antlr.runtime.BaseRecognizer * @type String */ NEXT_TOKEN_RULE_NAME: "nextToken" }); org.antlr.runtime.BaseRecognizer.prototype = { /** Reset the parser's state. Subclasses must rewinds the input stream */ reset: function() { var i, len; // wack everything related to error recovery if (!this.state) { return; // no shared state work to do } this.state._fsp = -1; this.state.errorRecovery = false; this.state.lastErrorIndex = -1; this.state.failed = false; this.state.syntaxErrors = 0; // wack everything related to backtracking and memoization this.state.backtracking = 0; // wipe cache if (this.state.ruleMemo) { for (i=0, len=this.state.ruleMemo.length; iTo turn off single token insertion or deletion error * recovery, override {@link #mismatchRecover} and have it call * plain {@link #mismatch}, which does not recover. Then any error * in a rule will cause an exception and immediate exit from * rule. Rule would recover by resynchronizing to the set of * symbols that can follow rule ref.

* * @param {org.antlr.runtime.IntStream} input input stream to match against. * @param {Number} ttype input type to match. * @param {org.antlr.runtime.BitSet} [follow] set of tokens that can follow the * matched token. * @returns {Object} the matched symbol */ match: function(input, ttype, follow) { var matchedSymbol = this.getCurrentInputSymbol(input); if ( input.LA(1)===ttype ) { input.consume(); this.state.errorRecovery = false; this.state.failed = false; return matchedSymbol; } if ( this.state.backtracking>0 ) { this.state.failed = true; return matchedSymbol; } matchedSymbol = this.recoverFromMismatchedToken(input, ttype, follow); return matchedSymbol; }, /** * Match any token. * @param {org.antlr.runtime.IntStream} input input stream to match against. */ matchAny: function(input) { this.state.errorRecovery = false; this.state.failed = false; input.consume(); }, /** * Is the following token (LA(2)) the unwanted type (ttype)? * @param {org.antlr.runtime.IntStream} input input stream to match against. * @param {Number} ttype the undesired token type. * @returns {Boolean} true if and only if the following token is the * unwanted type. */ mismatchIsUnwantedToken: function(input, ttype) { return input.LA(2)===ttype; }, /** * Does the stream appear to be missing a single token? * @param {org.antlr.runtime.IntStream} input input stream to match against. * @param {org.antlr.runtime.BitSet} [follow] set of tokens that can follow the * matched token. * @returns {Boolean} true if and only if it appears that the stream is * missing a single token. */ mismatchIsMissingToken: function(input, follow) { if ( !follow ) { // we have no information about the follow; we can only consume // a single token and hope for the best return false; } // compute what can follow this grammar element reference if ( follow.member(org.antlr.runtime.Token.EOR_TOKEN_TYPE) ) { var viableTokensFollowingThisRule = this.computeContextSensitiveRuleFOLLOW(); follow = follow.or(this.viableTokensFollowingThisRule); if ( this.state._fsp>=0 ) { // remove EOR if we're not the start symbol follow.remove(org.antlr.runtime.Token.EOR_TOKEN_TYPE); } } // if current token is consistent with what could come after set // then we know we're missing a token; error recovery is free to // "insert" the missing token // BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR // in follow set to indicate that the fall of the start symbol is // in the set (EOF can follow). if ( follow.member(input.LA(1)) || follow.member(org.antlr.runtime.Token.EOR_TOKEN_TYPE) ) { return true; } return false; }, /** Factor out what to do upon token mismatch so tree parsers can behave * differently. Override and call {@link #mismatchRecover} * to get single token insertion and deletion. * * @param {org.antlr.runtime.IntStream} input input stream to match against. * @param {Number} ttype input type to match. * @param {org.antlr.runtime.BitSet} [follow] set of tokens that can follow the * matched token. */ mismatch: function(input, ttype, follow) { if ( this.mismatchIsUnwantedToken(input, ttype) ) { throw new org.antlr.runtime.UnwantedTokenException(ttype, input); } else if ( this.mismatchIsMissingToken(input, follow) ) { throw new org.antlr.runtime.MissingTokenException(ttype, input, null); } throw new org.antlr.runtime.MismatchedTokenException(ttype, input); }, /** Report a recognition problem. * *

This method sets errorRecovery to indicate the parser is recovering * not parsing. Once in recovery mode, no errors are generated. * To get out of recovery mode, the parser must successfully match * a token (after a resync). So it will go:

*
    *
  1. error occurs
  2. *
  3. enter recovery mode, report error
  4. *
  5. consume until token found in resynch set
  6. *
  7. try to resume parsing
  8. *
  9. next match() will reset errorRecovery mode
  10. *
* *

If you override, make sure to update this.state.syntaxErrors if you * care about that.

* @param {org.antlr.runtime.RecognitionException} e the error to be reported. */ reportError: function(e) { // if we've already reported an error and have not matched a token // yet successfully, don't report any errors. if ( this.state.errorRecovery ) { return; } this.state.syntaxErrors++; this.state.errorRecovery = true; this.displayRecognitionError(this.getTokenNames(), e); }, /** * Assemble recognition error message. * @param {Array} tokenNames array of token names (strings). * @param {org.antlr.runtime.RecognitionException} e the error to be reported. */ displayRecognitionError: function(tokenNames, e) { var hdr = this.getErrorHeader(e), msg = this.getErrorMessage(e, tokenNames); this.emitErrorMessage(hdr+" "+msg); }, /** * Create error header message. Format is line * lineNumber:positionInLine. * @param {org.antlr.runtime.RecognitionException} e the error to be reported. * @returns {String} The error header. */ getErrorHeader: function(e) { /* handle null input */ if (!org.antlr.lang.isNumber(e.line)) { e.line = 0; } return "line "+e.line+":"+e.charPositionInLine; }, /** * Override this method to change where error messages go. * Defaults to "alert"-ing the error in browsers and "print"-ing the error * in other environments (e.g. Rhino, SpiderMonkey). * @param {String} msg the error message to be displayed. */ emitErrorMessage: function(msg) { if (typeof(window) != 'undefined' && window.alert) { alert(msg); } else { print(msg); } }, /** What error message should be generated for the various * exception types? * *

Not very object-oriented code, but I like having all error message * generation within one method rather than spread among all of the * exception classes. This also makes it much easier for the exception * handling because the exception classes do not have to have pointers back * to this object to access utility routines and so on. Also, changing * the message for an exception type would be difficult because you * would have to be subclassing exceptions, but then somehow get ANTLR * to make those kinds of exception objects instead of the default.

* *

For grammar debugging, you will want to override this to add * more information such as the stack frame and no viable alts.

* *

Override this to change the message generated for one or more * exception types.

* * @param {Array} tokenNames array of token names (strings). * @param {org.antlr.runtime.RecognitionException} e the error to be reported. * @returns {String} the error message to be emitted. */ getErrorMessage: function(e, tokenNames) { var msg = (e && e.getMessage) ? e.getMessage() : null, mte, tokenName; if ( e instanceof org.antlr.runtime.UnwantedTokenException ) { var ute = e; tokenName=""; if ( ute.expecting== org.antlr.runtime.Token.EOF ) { tokenName = "EOF"; } else { tokenName = tokenNames[ute.expecting]; } msg = "extraneous input "+this.getTokenErrorDisplay(ute.getUnexpectedToken())+ " expecting "+tokenName; } else if ( e instanceof org.antlr.runtime.MissingTokenException ) { mte = e; tokenName=""; if ( mte.expecting== org.antlr.runtime.Token.EOF ) { tokenName = "EOF"; } else { tokenName = tokenNames[mte.expecting]; } msg = "missing "+tokenName+" at "+this.getTokenErrorDisplay(e.token); } else if ( e instanceof org.antlr.runtime.MismatchedTokenException ) { mte = e; tokenName=""; if ( mte.expecting== org.antlr.runtime.Token.EOF ) { tokenName = "EOF"; } else { tokenName = tokenNames[mte.expecting]; } msg = "mismatched input "+this.getTokenErrorDisplay(e.token)+ " expecting "+tokenName; } else if ( e instanceof org.antlr.runtime.NoViableAltException ) { msg = "no viable alternative at input "+this.getTokenErrorDisplay(e.token); } else if ( e instanceof org.antlr.runtime.EarlyExitException ) { msg = "required (...)+ loop did not match anything at input "+ this.getTokenErrorDisplay(e.token); } else if ( e instanceof org.antlr.runtime.MismatchedSetException ) { msg = "mismatched input "+this.getTokenErrorDisplay(e.token)+ " expecting set "+e.expecting; } else if ( e instanceof org.antlr.runtime.MismatchedNotSetException ) { msg = "mismatched input "+this.getTokenErrorDisplay(e.token)+ " expecting set "+e.expecting; } else if ( e instanceof org.antlr.runtime.FailedPredicateException ) { msg = "rule "+e.ruleName+" failed predicate: {"+ e.predicateText+"}?"; } return msg; }, /**

Get number of recognition errors (lexer, parser, tree parser). Each * recognizer tracks its own number. So parser and lexer each have * separate count. Does not count the spurious errors found between * an error and next valid token match.

* *

See also {@link #reportError}() * @returns {Number} number of syntax errors encountered */ getNumberOfSyntaxErrors: function() { return this.state.syntaxErrors; }, /** How should a token be displayed in an error message? The default * is to display just the text, but during development you might * want to have a lot of information spit out. Override in that case * to use t.toString() (which, for CommonToken, dumps everything about * the token). * @param {org.antlr.runtime.Token} t token that will be displayed in an error message * @return {String} the string representation of the token */ getTokenErrorDisplay: function(t) { var s = t.getText(); if ( !org.antlr.lang.isValue(s) ) { if ( t.getType()==org.antlr.runtime.Token.EOF ) { s = ""; } else { s = "<"+t.getType()+">"; } } s = s.replace(/\n/g,"\\n"); s = s.replace(/\r/g,"\\r"); s = s.replace(/\t/g,"\\t"); return "'"+s+"'"; }, /** Recover from an error found on the input stream. This is * for NoViableAlt and mismatched symbol exceptions. If you enable * single token insertion and deletion, this will usually not * handle mismatched symbol exceptions but there could be a mismatched * token that the match() routine could not recover from. * @param {org.antlr.runtime.IntStream} input the intput stream * @param {org.antlr.runtime.RecogntionException} the error found on the input stream */ recover: function(input, re) { if ( this.state.lastErrorIndex==input.index() ) { // uh oh, another error at same token index; must be a case // where LT(1) is in the recovery token set so nothing is // consumed; consume a single token so at least to prevent // an infinite loop; this is a failsafe. input.consume(); } this.state.lastErrorIndex = input.index(); var followSet = this.computeErrorRecoverySet(); this.beginResync(); this.consumeUntil(input, followSet); this.endResync(); }, /** A hook to listen in on the token consumption during error recovery. */ beginResync: function() { }, /** A hook to listen in on the token consumption during error recovery. */ endResync: function() { }, /** Compute the error recovery set for the current rule. *

During rule invocation, the parser pushes the set of tokens that can * follow that rule reference on the stack; this amounts to * computing FIRST of what follows the rule reference in the * enclosing rule. This local follow set only includes tokens * from within the rule; i.e., the FIRST computation done by * ANTLR stops at the end of a rule.

* *

EXAMPLE

* *

When you find a "no viable alt exception", the input is not * consistent with any of the alternatives for rule r. The best * thing to do is to consume tokens until you see something that * can legally follow a call to r *or* any rule that called r. * You don't want the exact set of viable next tokens because the * input might just be missing a token--you might consume the * rest of the input looking for one of the missing tokens.

* *

Consider grammar:

*
     *  a : '[' b ']'
     *    | '(' b ')'
     *    ;
     *  b : c '^' INT ;
     *  c : ID
     *    | INT
     *    ;
     *  
* *

At each rule invocation, the set of tokens that could follow * that rule is pushed on a stack. Here are the various "local" * follow sets:

* *
     *  FOLLOW(b1_in_a) = FIRST(']') = ']'
     *  FOLLOW(b2_in_a) = FIRST(')') = ')'
     *  FOLLOW(c_in_b) = FIRST('^') = '^'
     *  
* *

Upon erroneous input "[]", the call chain is

* * a -> b -> c * *

and, hence, the follow context stack is:

* *
     *  depth  local follow set     after call to rule
     *    0                             a (from main())
     *    1          ']'                     b
     *    3          '^'                     c
     *  
* *

Notice that ')' is not included, because b would have to have * been called from a different context in rule a for ')' to be * included.

* *

For error recovery, we cannot consider FOLLOW(c) * (context-sensitive or otherwise). We need the combined set of * all context-sensitive FOLLOW sets--the set of all tokens that * could follow any reference in the call chain. We need to * resync to one of those tokens. Note that FOLLOW(c)='^' and if * we resync'd to that token, we'd consume until EOF. We need to * sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}. * In this case, for input "[]", LA(1) is in this set so we would * not consume anything and after printing an error rule c would * return normally. It would not find the required '^' though. * At this point, it gets a mismatched token error and throws an * exception (since LA(1) is not in the viable following token * set). The rule exception handler tries to recover, but finds * the same recovery set and doesn't consume anything. Rule b * exits normally returning to rule a. Now it finds the ']' (and * with the successful match exits errorRecovery mode).

* *

So, you cna see that the parser walks up call chain looking * for the token that was a member of the recovery set.

* *

Errors are not generated in errorRecovery mode.

* *

ANTLR's error recovery mechanism is based upon original ideas:

* *

"Algorithms + Data Structures = Programs" by Niklaus Wirth

* *

and

* *

"A note on error recovery in recursive descent parsers": * http://portal.acm.org/citation.cfm?id=947902.947905

* *

Later, Josef Grosch had some good ideas:

* *

"Efficient and Comfortable Error Recovery in Recursive Descent * Parsers": * ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip

* *

Like Grosch I implemented local FOLLOW sets that are combined * at run-time upon error to avoid overhead during parsing.

* @returns {org.antlr.runtime.BitSet} */ computeErrorRecoverySet: function() { return this.combineFollows(false); }, /** Compute the context-sensitive FOLLOW set for current rule. *

This is set of token types that can follow a specific rule * reference given a specific call chain. You get the set of * viable tokens that can possibly come next (lookahead depth 1) * given the current call chain. Contrast this with the * definition of plain FOLLOW for rule r:

* * FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)} * *

where x in T* and alpha, beta in V*; T is set of terminals and * V is the set of terminals and nonterminals. In other words, * FOLLOW(r) is the set of all tokens that can possibly follow * references to r in *any* sentential form (context). At * runtime, however, we know precisely which context applies as * we have the call chain. We may compute the exact (rather * than covering superset) set of following tokens.

* *

For example, consider grammar:

* *
     *  stat : ID '=' expr ';'      // FOLLOW(stat)=={EOF}
     *       | "return" expr '.'
     *       ;
     *  expr : atom ('+' atom)* ;   // FOLLOW(expr)=={';','.',')'}
     *  atom : INT                  // FOLLOW(atom)=={'+',')',';','.'}
     *       | '(' expr ')'
     *       ;
     *  
* *

The FOLLOW sets are all inclusive whereas context-sensitive * FOLLOW sets are precisely what could follow a rule reference. * For input input "i=(3);", here is the derivation:

* *
     *  stat => ID '=' expr ';'
     *       => ID '=' atom ('+' atom)* ';'
     *       => ID '=' '(' expr ')' ('+' atom)* ';'
     *       => ID '=' '(' atom ')' ('+' atom)* ';'
     *       => ID '=' '(' INT ')' ('+' atom)* ';'
     *       => ID '=' '(' INT ')' ';'
     *  
* *

At the "3" token, you'd have a call chain of

* * stat -> expr -> atom -> expr -> atom * *

What can follow that specific nested ref to atom? Exactly ')' * as you can see by looking at the derivation of this specific * input. Contrast this with the FOLLOW(atom)={'+',')',';','.'}.

* *

You want the exact viable token set when recovering from a * token mismatch. Upon token mismatch, if LA(1) is member of * the viable next token set, then you know there is most likely * a missing token in the input stream. "Insert" one by just not * throwing an exception.

* @returns {org.antlr.runtime.BitSet} */ computeContextSensitiveRuleFOLLOW: function() { return this.combineFollows(true); }, /** * Helper method for {@link #computeErrorRecoverySet} and * {@link computeContextSensitiveRuleFOLLO}. * @param {Boolean} exact * @returns {org.antlr.runtime.BitSet} */ combineFollows: function(exact) { var top = this.state._fsp, i, localFollowSet, followSet = new org.antlr.runtime.BitSet(); for (i=top; i>=0; i--) { localFollowSet = this.state.following[i]; followSet.orInPlace(localFollowSet); if ( exact ) { // can we see end of rule? if ( localFollowSet.member(org.antlr.runtime.Token.EOR_TOKEN_TYPE) ) { // Only leave EOR in set if at top (start rule); this lets // us know if have to include follow(start rule); i.e., EOF if ( i>0 ) { followSet.remove(org.antlr.runtime.Token.EOR_TOKEN_TYPE); } } else { // can't see end of rule, quit break; } } } return followSet; }, /** Attempt to recover from a single missing or extra token. * *

EXTRA TOKEN

* *

LA(1) is not what we are looking for. If LA(2) has the right token, * however, then assume LA(1) is some extra spurious token. Delete it * and LA(2) as if we were doing a normal match(), which advances the * input.

* *

MISSING TOKEN

* *

If current token is consistent with what could come after * ttype then it is ok to "insert" the missing token, else throw * exception For example, Input "i=(3;" is clearly missing the * ')'. When the parser returns from the nested call to expr, it * will have call chain:

* *
  stat -> expr -> atom
* *

and it will be trying to match the ')' at this point in the * derivation:

* *
     => ID '=' '(' INT ')' ('+' atom)* ';'
* ^ *

match() will see that ';' doesn't match ')' and report a * mismatched token error. To recover, it sees that LA(1)==';' * is in the set of tokens that can follow the ')' token * reference in rule atom. It can assume that you forgot the ')'.

* * @param {org.antlr.runtime.IntStream} input * @param {Number} ttype * @param {org.antlr.runtime.BitSet} follow * @returns {Object} */ recoverFromMismatchedToken: function(input, ttype, follow) { var e = null; // if next token is what we are looking for then "delete" this token if ( this.mismatchIsUnwantedToken(input, ttype) ) { e = new org.antlr.runtime.UnwantedTokenException(ttype, input); this.beginResync(); input.consume(); // simply delete extra token this.endResync(); this.reportError(e); // report after consuming so AW sees the token in the exception // we want to return the token we're actually matching var matchedSymbol = this.getCurrentInputSymbol(input); input.consume(); // move past ttype token as if all were ok return matchedSymbol; } // can't recover with single token deletion, try insertion if ( this.mismatchIsMissingToken(input, follow) ) { var inserted = this.getMissingSymbol(input, e, ttype, follow); e = new org.antlr.runtime.MissingTokenException(ttype, input, inserted); this.reportError(e); // report after inserting so AW sees the token in the exception return inserted; } // even that didn't work; must throw the exception e = new org.antlr.runtime.MismatchedTokenException(ttype, input); throw e; }, /** * Recover from a mismatched set exception. * @param {org.antlr.runtime.IntStream} input * @param {org.antlr.runtime.RecognitionException} e * @param {org.antlr.runtime.BitSet} follow * @returns {Object} */ recoverFromMismatchedSet: function(input, e, follow) { if ( this.mismatchIsMissingToken(input, follow) ) { // System.out.println("missing token"); this.reportError(e); // we don't know how to conjure up a token for sets yet return this.getMissingSymbol(input, e, org.antlr.runtime.Token.INVALID_TOKEN_TYPE, follow); } throw e; }, /** Match needs to return the current input symbol, which gets put * into the label for the associated token ref; e.g., x=ID. Token * and tree parsers need to return different objects. Rather than test * for input stream type or change the IntStream interface, I use * a simple method to ask the recognizer to tell me what the current * input symbol is. * *

This is ignored for lexers.

* @param {org.antlr.runtime.IntStream} input * @returns {Object} */ getCurrentInputSymbol: function(input) { return null; }, /** Conjure up a missing token during error recovery. * *

The recognizer attempts to recover from single missing * symbols. But, actions might refer to that missing symbol. * For example, x=ID {f($x);}. The action clearly assumes * that there has been an identifier matched previously and that * $x points at that token. If that token is missing, but * the next token in the stream is what we want we assume that * this token is missing and we keep going. Because we * have to return some token to replace the missing token, * we have to conjure one up. This method gives the user control * over the tokens returned for missing tokens. Mostly, * you will want to create something special for identifier * tokens. For literals such as '{' and ',', the default * action in the parser or tree parser works. It simply creates * a CommonToken of the appropriate type. The text will be the token. * If you change what tokens must be created by the lexer, * override this method to create the appropriate tokens.

* * @param {org.antlr.runtime.IntStream} input * @param {org.antlr.runtime.RecognitionException} e * @param {Number} expectedTokenType * @param {org.antlr.runtime.BitSet} follow * @returns {Object} */ getMissingSymbol: function(input, e, expectedTokenType, follow) { return null; }, /** * Consume tokens until one matches the given token or token set * @param {org.antlr.runtime.IntStream} input * @param {Number|org.antlr.runtime.BitSet} set */ consumeUntil: function(input, set) { var ttype = input.LA(1); while (ttype != org.antlr.runtime.Token.EOF && !set.member(ttype) ) { input.consume(); ttype = input.LA(1); } }, /** * Push a rule's follow set using our own hardcoded stack. * @param {org.antlr.runtime.BitSet} fset */ pushFollow: function(fset) { if ( (this.state._fsp +1)>=this.state.following.length ) { var f = []; var i; for (i=this.state.following.length-1; i>=0; i--) { f[i] = this.state.following[i]; } this.state.following = f; } this.state._fsp++; this.state.following[this.state._fsp] = fset; }, /** * Sadly JavaScript doesn't provide a robust mechanism for runtime stack reflection. * This makes implementing this function impossible without maintaining an auxillary * stack data structure, which would be crazy expensive, especially in Lexers. As such, * this method remains unimplemented. * @deprecated */ getRuleInvocationStack: function(e, recognizerClassName) { throw new Error("Not implemented."); }, /** * Get this recognizer's backtracking level. * @returns {Number} backtracking level */ getBacktrackingLevel: function() { return this.state.backtracking; }, /** Used to print out token names like ID during debugging and * error reporting. The generated parsers implement a method * that overrides this to point to their String[] tokenNames. * @returns {Array} String array of token names. */ getTokenNames: function() { return null; }, /** For debugging and other purposes, might want the grammar name. * Have ANTLR generate an implementation for this method. * @returns {String} the grammar name. */ getGrammarFileName: function() { return null; }, /** A convenience method for use most often with template rewrites. * Convert an array of Tokens to an array of Strings. * @param {Array} array of org.antlr.runtime.Token objects. * @returns {Array} array of string representations of the argument. */ toStrings: function(tokens) { if ( !tokens ) { return null; } var strings = []; var i; for (i=0; iFor now we use a hashtable and just the slow Object-based one. * Later, we can make a special one for ints and also one that * tosses out data after we commit past input position i.

* @param {Number} ruleIndex * @param {Number} ruleStartIndex * @returns {Number} */ getRuleMemoization: function(ruleIndex, ruleStartIndex) { if ( !this.state.ruleMemo[ruleIndex] ) { this.state.ruleMemo[ruleIndex] = {}; } var stopIndexI = this.state.ruleMemo[ruleIndex][ruleStartIndex]; if ( !org.antlr.lang.isNumber(stopIndexI) ) { return org.antlr.runtime.BaseRecognizer.MEMO_RULE_UNKNOWN; } return stopIndexI; }, /** Has this rule already parsed input at the current index in the * input stream? Return the stop token index or MEMO_RULE_UNKNOWN. * If we attempted but failed to parse properly before, return * MEMO_RULE_FAILED. * *

This method has a side-effect: if we have seen this input for * this rule and successfully parsed before, then seek ahead to * 1 past the stop token matched for this rule last time.

* @param {org.antlr.runtime.IntStream} input * @param {Number} ruleIndex * @returns {Boolean} */ alreadyParsedRule: function(input, ruleIndex) { var stopIndex = this.getRuleMemoization(ruleIndex, input.index()); if ( stopIndex==org.antlr.runtime.BaseRecognizer.MEMO_RULE_UNKNOWN ) { return false; } if ( stopIndex==org.antlr.runtime.BaseRecognizer.MEMO_RULE_FAILED ) { //System.out.println("rule "+ruleIndex+" will never succeed"); this.state.failed=true; } else { input.seek(stopIndex+1); // jump to one past stop token } return true; }, /** Record whether or not this rule parsed the input at this position * successfully. Use a standard java hashtable for now. * @param {org.antlr.runtime.IntStream} input * @param {Number} ruleIndex * @param {Number} ruleStartIndex */ memoize: function(input, ruleIndex, ruleStartIndex) { var stopTokenIndex = this.state.failed ? org.antlr.runtime.BaseRecognizer.MEMO_RULE_FAILED : input.index()-1; if ( !org.antlr.lang.isValue(this.state.ruleMemo) ) { throw new Error("!!!!!!!!! memo array is null for "+ this.getGrammarFileName()); } if ( ruleIndex >= this.state.ruleMemo.length ) { throw new Error("!!!!!!!!! memo size is "+this.state.ruleMemo.length+", but rule index is "+ruleIndex); } if ( org.antlr.lang.isValue(this.state.ruleMemo[ruleIndex]) ) { this.state.ruleMemo[ruleIndex][ruleStartIndex] = stopTokenIndex; } }, /** return how many rule/input-index pairs there are in total. * TODO: this includes synpreds. * @returns {Number} */ getRuleMemoizationCacheSize: function() { var n = 0, i; for (i = 0; this.state.ruleMemo && i < this.state.ruleMemo.length; i++) { var ruleMap = this.state.ruleMemo[i]; if ( ruleMap ) { // @todo need to get size of rulemap? n += ruleMap.length; // how many input indexes are recorded? } } return n; }, /** * When a grammar is compiled with the tracing flag enabled, this method is invoked * at the start of each rule. * @param {String} ruleName the current ruleName * @param {Number} ruleIndex * @param {Object} inputSymbol */ traceIn: function(ruleName, ruleIndex, inputSymbol) { this.emitErrorMessage("enter "+ruleName+" "+inputSymbol); if ( this.state.failed ) { this.emitErrorMessage(" failed="+this.failed); } if ( this.state.backtracking>0 ) { this.emitErrorMessage(" backtracking="+this.state.backtracking); } // System.out.println(); }, /** * When a grammar is compiled with the tracing flag enabled, this method is invoked * at the end of each rule. * @param {String} ruleName the current ruleName * @param {Number} ruleIndex * @param {Object} inputSymbol */ traceOut: function(ruleName, ruleIndex, inputSymbol) { this.emitErrorMessage("exit "+ruleName+" "+inputSymbol); if ( this.state.failed ) { this.emitErrorMessage(" failed="+this.state.failed); } if ( this.state.backtracking>0 ) { this.emitErrorMessage(" backtracking="+this.state.backtracking); } } };