/* * [The "BSD licence"] * Copyright (c) 2005-2008 Terence Parr * All rights reserved. * * Conversion to C#: * Copyright (c) 2008-2009 Sam Harwell, Pixel Mine, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ namespace Antlr.Runtime { using System.Collections.Generic; using ArgumentNullException = System.ArgumentNullException; using Array = System.Array; using Conditional = System.Diagnostics.ConditionalAttribute; using Exception = System.Exception; using IDebugEventListener = Antlr.Runtime.Debug.IDebugEventListener; using MethodBase = System.Reflection.MethodBase; using NotSupportedException = System.NotSupportedException; using Regex = System.Text.RegularExpressions.Regex; using StackFrame = System.Diagnostics.StackFrame; using StackTrace = System.Diagnostics.StackTrace; using TextWriter = System.IO.TextWriter; using Type = System.Type; /** * A generic recognizer that can handle recognizers generated from * lexer, parser, and tree grammars. This is all the parsing * support code essentially; most of it is error recovery stuff and * backtracking. * */ public abstract class BaseRecognizer { public const int MemoRuleFailed = -2; public const int MemoRuleUnknown = -1; public const int InitialFollowStackSize = 100; // copies from Token object for convenience in actions public const int DefaultTokenChannel = TokenChannels.Default; public const int Hidden = TokenChannels.Hidden; public const string NextTokenRuleName = "nextToken"; /** * State of a lexer, parser, or tree parser are collected into a state * object so the state can be shared. This sharing is needed to * have one grammar import others and share same error variables * and other state variables. It's a kind of explicit multiple * inheritance via delegation of methods and shared state. * */ protected internal RecognizerSharedState state; public BaseRecognizer() : this(new RecognizerSharedState()) { } public BaseRecognizer(RecognizerSharedState state) { if (state == null) { state = new RecognizerSharedState(); } this.state = state; InitDFAs(); } public TextWriter TraceDestination { get; set; } protected virtual void InitDFAs() { } /** reset the parser's state; subclasses must rewinds the input stream */ public virtual void Reset() { // wack everything related to error recovery if (state == null) { return; // no shared state work to do } state._fsp = -1; state.errorRecovery = false; state.lastErrorIndex = -1; state.failed = false; state.syntaxErrors = 0; // wack everything related to backtracking and memoization state.backtracking = 0; for (int i = 0; state.ruleMemo != null && i < state.ruleMemo.Length; i++) { // wipe cache state.ruleMemo[i] = null; } } /** * Match current input symbol against ttype. Attempt * single token insertion or deletion error recovery. If * that fails, throw MismatchedTokenException. * * * * To turn off single token insertion or deletion error * recovery, override recoverFromMismatchedToken() and have it * throw an exception. See TreeParser.recoverFromMismatchedToken(). * This way any error in a rule will cause an exception and * immediate exit from rule. Rule would recover by resynchronizing * to the set of symbols that can follow rule ref. * */ public virtual object Match(IIntStream input, int ttype, BitSet follow) { //System.out.println("match "+((TokenStream)input).LT(1)); object matchedSymbol = GetCurrentInputSymbol(input); if (input.LA(1) == ttype) { input.Consume(); state.errorRecovery = false; state.failed = false; return matchedSymbol; } if (state.backtracking > 0) { state.failed = true; return matchedSymbol; } matchedSymbol = RecoverFromMismatchedToken(input, ttype, follow); return matchedSymbol; } /** Match the wildcard: in a symbol */ public virtual void MatchAny(IIntStream input) { state.errorRecovery = false; state.failed = false; input.Consume(); } public virtual bool MismatchIsUnwantedToken(IIntStream input, int ttype) { return input.LA(2) == ttype; } public virtual bool MismatchIsMissingToken(IIntStream input, BitSet follow) { if (follow == null) { // we have no information about the follow; we can only consume // a single token and hope for the best return false; } // compute what can follow this grammar element reference if (follow.Member(TokenTypes.EndOfRule)) { BitSet viableTokensFollowingThisRule = ComputeContextSensitiveRuleFOLLOW(); follow = follow.Or(viableTokensFollowingThisRule); if (state._fsp >= 0) { // remove EOR if we're not the start symbol follow.Remove(TokenTypes.EndOfRule); } } // if current token is consistent with what could come after set // then we know we're missing a token; error recovery is free to // "insert" the missing token //System.out.println("viable tokens="+follow.toString(getTokenNames())); //System.out.println("LT(1)="+((TokenStream)input).LT(1)); // BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR // in follow set to indicate that the fall of the start symbol is // in the set (EOF can follow). if (follow.Member(input.LA(1)) || follow.Member(TokenTypes.EndOfRule)) { //System.out.println("LT(1)=="+((TokenStream)input).LT(1)+" is consistent with what follows; inserting..."); return true; } return false; } /** Report a recognition problem. * * * This method sets errorRecovery to indicate the parser is recovering * not parsing. Once in recovery mode, no errors are generated. * To get out of recovery mode, the parser must successfully match * a token (after a resync). So it will go: * * 1. error occurs * 2. enter recovery mode, report error * 3. consume until token found in resynch set * 4. try to resume parsing * 5. next match() will reset errorRecovery mode * * If you override, make sure to update syntaxErrors if you care about that. * */ public virtual void ReportError(RecognitionException e) { // if we've already reported an error and have not matched a token // yet successfully, don't report any errors. if (state.errorRecovery) { //System.err.print("[SPURIOUS] "); return; } state.syntaxErrors++; // don't count spurious state.errorRecovery = true; DisplayRecognitionError(this.TokenNames, e); } public virtual void DisplayRecognitionError(string[] tokenNames, RecognitionException e) { string hdr = GetErrorHeader(e); string msg = GetErrorMessage(e, tokenNames); EmitErrorMessage(hdr + " " + msg); } /** What error message should be generated for the various exception types? * * * Not very object-oriented code, but I like having all error message * generation within one method rather than spread among all of the * exception classes. This also makes it much easier for the exception * handling because the exception classes do not have to have pointers back * to this object to access utility routines and so on. Also, changing * the message for an exception type would be difficult because you * would have to subclassing exception, but then somehow get ANTLR * to make those kinds of exception objects instead of the default. * This looks weird, but trust me--it makes the most sense in terms * of flexibility. * * For grammar debugging, you will want to override this to add * more information such as the stack frame with * getRuleInvocationStack(e, this.getClass().getName()) and, * for no viable alts, the decision description and state etc... * * Override this to change the message generated for one or more * exception types. * */ public virtual string GetErrorMessage(RecognitionException e, string[] tokenNames) { string msg = e.Message; if (e is UnwantedTokenException) { UnwantedTokenException ute = (UnwantedTokenException)e; string tokenName = ""; if (ute.Expecting == TokenTypes.EndOfFile) { tokenName = "EndOfFile"; } else { tokenName = tokenNames[ute.Expecting]; } msg = "extraneous input " + GetTokenErrorDisplay(ute.UnexpectedToken) + " expecting " + tokenName; } else if (e is MissingTokenException) { MissingTokenException mte = (MissingTokenException)e; string tokenName = ""; if (mte.Expecting == TokenTypes.EndOfFile) { tokenName = "EndOfFile"; } else { tokenName = tokenNames[mte.Expecting]; } msg = "missing " + tokenName + " at " + GetTokenErrorDisplay(e.Token); } else if (e is MismatchedTokenException) { MismatchedTokenException mte = (MismatchedTokenException)e; string tokenName = ""; if (mte.Expecting == TokenTypes.EndOfFile) { tokenName = "EndOfFile"; } else { tokenName = tokenNames[mte.Expecting]; } msg = "mismatched input " + GetTokenErrorDisplay(e.Token) + " expecting " + tokenName; } else if (e is MismatchedTreeNodeException) { MismatchedTreeNodeException mtne = (MismatchedTreeNodeException)e; string tokenName = ""; if (mtne.Expecting == TokenTypes.EndOfFile) { tokenName = "EndOfFile"; } else { tokenName = tokenNames[mtne.Expecting]; } // workaround for a .NET framework bug (NullReferenceException) string nodeText = (mtne.Node != null) ? mtne.Node.ToString() ?? string.Empty : string.Empty; msg = "mismatched tree node: " + nodeText + " expecting " + tokenName; } else if (e is NoViableAltException) { //NoViableAltException nvae = (NoViableAltException)e; // for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>" // and "(decision="+nvae.decisionNumber+") and // "state "+nvae.stateNumber msg = "no viable alternative at input " + GetTokenErrorDisplay(e.Token); } else if (e is EarlyExitException) { //EarlyExitException eee = (EarlyExitException)e; // for development, can add "(decision="+eee.decisionNumber+")" msg = "required (...)+ loop did not match anything at input " + GetTokenErrorDisplay(e.Token); } else if (e is MismatchedSetException) { MismatchedSetException mse = (MismatchedSetException)e; msg = "mismatched input " + GetTokenErrorDisplay(e.Token) + " expecting set " + mse.Expecting; } else if (e is MismatchedNotSetException) { MismatchedNotSetException mse = (MismatchedNotSetException)e; msg = "mismatched input " + GetTokenErrorDisplay(e.Token) + " expecting set " + mse.Expecting; } else if (e is FailedPredicateException) { FailedPredicateException fpe = (FailedPredicateException)e; msg = "rule " + fpe.RuleName + " failed predicate: {" + fpe.PredicateText + "}?"; } return msg; } /** * Get number of recognition errors (lexer, parser, tree parser). Each * recognizer tracks its own number. So parser and lexer each have * separate count. Does not count the spurious errors found between * an error and next valid token match * * * */ public virtual int NumberOfSyntaxErrors { get { return state.syntaxErrors; } } /** What is the error header, normally line/character position information? */ public virtual string GetErrorHeader(RecognitionException e) { return "line " + e.Line + ":" + (e.CharPositionInLine + 1); } /** * How should a token be displayed in an error message? The default * is to display just the text, but during development you might * want to have a lot of information spit out. Override in that case * to use t.ToString() (which, for CommonToken, dumps everything about * the token). This is better than forcing you to override a method in * your token objects because you don't have to go modify your lexer * so that it creates a new Java type. * */ public virtual string GetTokenErrorDisplay(IToken t) { string s = t.Text; if (s == null) { if (t.Type == TokenTypes.EndOfFile) { s = ""; } else { s = "<" + t.Type + ">"; } } s = Regex.Replace(s, "\n", "\\\\n"); s = Regex.Replace(s, "\r", "\\\\r"); s = Regex.Replace(s, "\t", "\\\\t"); return "'" + s + "'"; } /** Override this method to change where error messages go */ public virtual void EmitErrorMessage(string msg) { if (TraceDestination != null) TraceDestination.WriteLine(msg); } /** * Recover from an error found on the input stream. This is * for NoViableAlt and mismatched symbol exceptions. If you enable * single token insertion and deletion, this will usually not * handle mismatched symbol exceptions but there could be a mismatched * token that the match() routine could not recover from. * */ public virtual void Recover(IIntStream input, RecognitionException re) { if (state.lastErrorIndex == input.Index) { // uh oh, another error at same token index; must be a case // where LT(1) is in the recovery token set so nothing is // consumed; consume a single token so at least to prevent // an infinite loop; this is a failsafe. input.Consume(); } state.lastErrorIndex = input.Index; BitSet followSet = ComputeErrorRecoverySet(); BeginResync(); ConsumeUntil(input, followSet); EndResync(); } /** * A hook to listen in on the token consumption during error recovery. * The DebugParser subclasses this to fire events to the listenter. * */ public virtual void BeginResync() { } public virtual void EndResync() { } /* Compute the error recovery set for the current rule. During * rule invocation, the parser pushes the set of tokens that can * follow that rule reference on the stack; this amounts to * computing FIRST of what follows the rule reference in the * enclosing rule. This local follow set only includes tokens * from within the rule; i.e., the FIRST computation done by * ANTLR stops at the end of a rule. * * EXAMPLE * * When you find a "no viable alt exception", the input is not * consistent with any of the alternatives for rule r. The best * thing to do is to consume tokens until you see something that * can legally follow a call to r *or* any rule that called r. * You don't want the exact set of viable next tokens because the * input might just be missing a token--you might consume the * rest of the input looking for one of the missing tokens. * * Consider grammar: * * a : '[' b ']' * | '(' b ')' * ; * b : c '^' INT ; * c : ID * | INT * ; * * At each rule invocation, the set of tokens that could follow * that rule is pushed on a stack. Here are the various "local" * follow sets: * * FOLLOW(b1_in_a) = FIRST(']') = ']' * FOLLOW(b2_in_a) = FIRST(')') = ')' * FOLLOW(c_in_b) = FIRST('^') = '^' * * Upon erroneous input "[]", the call chain is * * a -> b -> c * * and, hence, the follow context stack is: * * depth local follow set after call to rule * 0 a (from main()) * 1 ']' b * 3 '^' c * * Notice that ')' is not included, because b would have to have * been called from a different context in rule a for ')' to be * included. * * For error recovery, we cannot consider FOLLOW(c) * (context-sensitive or otherwise). We need the combined set of * all context-sensitive FOLLOW sets--the set of all tokens that * could follow any reference in the call chain. We need to * resync to one of those tokens. Note that FOLLOW(c)='^' and if * we resync'd to that token, we'd consume until EOF. We need to * sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}. * In this case, for input "[]", LA(1) is in this set so we would * not consume anything and after printing an error rule c would * return normally. It would not find the required '^' though. * At this point, it gets a mismatched token error and throws an * exception (since LA(1) is not in the viable following token * set). The rule exception handler tries to recover, but finds * the same recovery set and doesn't consume anything. Rule b * exits normally returning to rule a. Now it finds the ']' (and * with the successful match exits errorRecovery mode). * * So, you cna see that the parser walks up call chain looking * for the token that was a member of the recovery set. * * Errors are not generated in errorRecovery mode. * * ANTLR's error recovery mechanism is based upon original ideas: * * "Algorithms + Data Structures = Programs" by Niklaus Wirth * * and * * "A note on error recovery in recursive descent parsers": * http://portal.acm.org/citation.cfm?id=947902.947905 * * Later, Josef Grosch had some good ideas: * * "Efficient and Comfortable Error Recovery in Recursive Descent * Parsers": * ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip * * Like Grosch I implemented local FOLLOW sets that are combined * at run-time upon error to avoid overhead during parsing. */ protected virtual BitSet ComputeErrorRecoverySet() { return CombineFollows(false); } /** * Compute the context-sensitive FOLLOW set for current rule. * This is set of token types that can follow a specific rule * reference given a specific call chain. You get the set of * viable tokens that can possibly come next (lookahead depth 1) * given the current call chain. Contrast this with the * definition of plain FOLLOW for rule r: * * * FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)} * * where x in T* and alpha, beta in V*; T is set of terminals and * V is the set of terminals and nonterminals. In other words, * FOLLOW(r) is the set of all tokens that can possibly follow * references to r in *any* sentential form (context). At * runtime, however, we know precisely which context applies as * we have the call chain. We may compute the exact (rather * than covering superset) set of following tokens. * * For example, consider grammar: * * stat : ID '=' expr ';' // FOLLOW(stat)=={EOF} * | "return" expr '.' * ; * expr : atom ('+' atom)* ; // FOLLOW(expr)=={';','.',')'} * atom : INT // FOLLOW(atom)=={'+',')',';','.'} * | '(' expr ')' * ; * * The FOLLOW sets are all inclusive whereas context-sensitive * FOLLOW sets are precisely what could follow a rule reference. * For input input "i=(3);", here is the derivation: * * stat => ID '=' expr ';' * => ID '=' atom ('+' atom)* ';' * => ID '=' '(' expr ')' ('+' atom)* ';' * => ID '=' '(' atom ')' ('+' atom)* ';' * => ID '=' '(' INT ')' ('+' atom)* ';' * => ID '=' '(' INT ')' ';' * * At the "3" token, you'd have a call chain of * * stat -> expr -> atom -> expr -> atom * * What can follow that specific nested ref to atom? Exactly ')' * as you can see by looking at the derivation of this specific * input. Contrast this with the FOLLOW(atom)={'+',')',';','.'}. * * You want the exact viable token set when recovering from a * token mismatch. Upon token mismatch, if LA(1) is member of * the viable next token set, then you know there is most likely * a missing token in the input stream. "Insert" one by just not * throwing an exception. */ protected virtual BitSet ComputeContextSensitiveRuleFOLLOW() { return CombineFollows(true); } // what is exact? it seems to only add sets from above on stack // if EOR is in set i. When it sees a set w/o EOR, it stops adding. // Why would we ever want them all? Maybe no viable alt instead of // mismatched token? protected virtual BitSet CombineFollows(bool exact) { int top = state._fsp; BitSet followSet = new BitSet(); for (int i = top; i >= 0; i--) { BitSet localFollowSet = (BitSet)state.following[i]; /* System.out.println("local follow depth "+i+"="+ localFollowSet.toString(getTokenNames())+")"); */ followSet.OrInPlace(localFollowSet); if (exact) { // can we see end of rule? if (localFollowSet.Member(TokenTypes.EndOfRule)) { // Only leave EOR in set if at top (start rule); this lets // us know if have to include follow(start rule); i.e., EOF if (i > 0) { followSet.Remove(TokenTypes.EndOfRule); } } else { // can't see end of rule, quit break; } } } return followSet; } /** Attempt to recover from a single missing or extra token. * * EXTRA TOKEN * * LA(1) is not what we are looking for. If LA(2) has the right token, * however, then assume LA(1) is some extra spurious token. Delete it * and LA(2) as if we were doing a normal match(), which advances the * input. * * MISSING TOKEN * * If current token is consistent with what could come after * ttype then it is ok to "insert" the missing token, else throw * exception For example, Input "i=(3;" is clearly missing the * ')'. When the parser returns from the nested call to expr, it * will have call chain: * * stat -> expr -> atom * * and it will be trying to match the ')' at this point in the * derivation: * * => ID '=' '(' INT ')' ('+' atom)* ';' * ^ * match() will see that ';' doesn't match ')' and report a * mismatched token error. To recover, it sees that LA(1)==';' * is in the set of tokens that can follow the ')' token * reference in rule atom. It can assume that you forgot the ')'. */ protected virtual object RecoverFromMismatchedToken(IIntStream input, int ttype, BitSet follow) { RecognitionException e = null; // if next token is what we are looking for then "delete" this token if (MismatchIsUnwantedToken(input, ttype)) { e = new UnwantedTokenException(ttype, input, TokenNames); /* System.err.println("recoverFromMismatchedToken deleting "+ ((TokenStream)input).LT(1)+ " since "+((TokenStream)input).LT(2)+" is what we want"); */ BeginResync(); input.Consume(); // simply delete extra token EndResync(); ReportError(e); // report after consuming so AW sees the token in the exception // we want to return the token we're actually matching object matchedSymbol = GetCurrentInputSymbol(input); input.Consume(); // move past ttype token as if all were ok return matchedSymbol; } // can't recover with single token deletion, try insertion if (MismatchIsMissingToken(input, follow)) { object inserted = GetMissingSymbol(input, e, ttype, follow); e = new MissingTokenException(ttype, input, inserted); ReportError(e); // report after inserting so AW sees the token in the exception return inserted; } // even that didn't work; must throw the exception e = new MismatchedTokenException(ttype, input, TokenNames); throw e; } /** Not currently used */ public virtual object RecoverFromMismatchedSet(IIntStream input, RecognitionException e, BitSet follow) { if (MismatchIsMissingToken(input, follow)) { // System.out.println("missing token"); ReportError(e); // we don't know how to conjure up a token for sets yet return GetMissingSymbol(input, e, TokenTypes.Invalid, follow); } // TODO do single token deletion like above for Token mismatch throw e; } /** * Match needs to return the current input symbol, which gets put * into the label for the associated token ref; e.g., x=ID. Token * and tree parsers need to return different objects. Rather than test * for input stream type or change the IntStream interface, I use * a simple method to ask the recognizer to tell me what the current * input symbol is. * * * This is ignored for lexers. */ protected virtual object GetCurrentInputSymbol(IIntStream input) { return null; } /** Conjure up a missing token during error recovery. * * * The recognizer attempts to recover from single missing * symbols. But, actions might refer to that missing symbol. * For example, x=ID {f($x);}. The action clearly assumes * that there has been an identifier matched previously and that * $x points at that token. If that token is missing, but * the next token in the stream is what we want we assume that * this token is missing and we keep going. Because we * have to return some token to replace the missing token, * we have to conjure one up. This method gives the user control * over the tokens returned for missing tokens. Mostly, * you will want to create something special for identifier * tokens. For literals such as '{' and ',', the default * action in the parser or tree parser works. It simply creates * a CommonToken of the appropriate type. The text will be the token. * If you change what tokens must be created by the lexer, * override this method to create the appropriate tokens. * */ protected virtual object GetMissingSymbol(IIntStream input, RecognitionException e, int expectedTokenType, BitSet follow) { return null; } public virtual void ConsumeUntil(IIntStream input, int tokenType) { //System.out.println("consumeUntil "+tokenType); int ttype = input.LA(1); while (ttype != TokenTypes.EndOfFile && ttype != tokenType) { input.Consume(); ttype = input.LA(1); } } /** Consume tokens until one matches the given token set */ public virtual void ConsumeUntil(IIntStream input, BitSet set) { //System.out.println("consumeUntil("+set.toString(getTokenNames())+")"); int ttype = input.LA(1); while (ttype != TokenTypes.EndOfFile && !set.Member(ttype)) { //System.out.println("consume during recover LA(1)="+getTokenNames()[input.LA(1)]); input.Consume(); ttype = input.LA(1); } } /** Push a rule's follow set using our own hardcoded stack */ protected void PushFollow(BitSet fset) { if ((state._fsp + 1) >= state.following.Length) { Array.Resize(ref state.following, state.following.Length * 2); } state.following[++state._fsp] = fset; } protected void PopFollow() { state._fsp--; } /** * Return List of the rules in your parser instance * leading up to a call to this method. You could override if * you want more details such as the file/line info of where * in the parser java code a rule is invoked. * * * * This is very useful for error messages and for context-sensitive * error recovery. * */ public virtual IList GetRuleInvocationStack() { return GetRuleInvocationStack(new StackTrace(true)); } /** * A more general version of GetRuleInvocationStack where you can * pass in the StackTrace of, for example, a RecognitionException * to get it's rule stack trace. * */ public static IList GetRuleInvocationStack(StackTrace trace) { if (trace == null) throw new ArgumentNullException("trace"); List rules = new List(); StackFrame[] stack = trace.GetFrames() ?? new StackFrame[0]; for (int i = stack.Length - 1; i >= 0; i--) { StackFrame frame = stack[i]; MethodBase method = frame.GetMethod(); GrammarRuleAttribute[] attributes = (GrammarRuleAttribute[])method.GetCustomAttributes(typeof(GrammarRuleAttribute), true); if (attributes != null && attributes.Length > 0) rules.Add(attributes[0].Name); } return rules; } public virtual int BacktrackingLevel { get { return state.backtracking; } set { state.backtracking = value; } } /** Return whether or not a backtracking attempt failed. */ public virtual bool Failed { get { return state.failed; } } /** * Used to print out token names like ID during debugging and * error reporting. The generated parsers implement a method * that overrides this to point to their String[] tokenNames. * */ public virtual string[] TokenNames { get { return null; } } /** * For debugging and other purposes, might want the grammar name. * Have ANTLR generate an implementation for this method. * */ public virtual string GrammarFileName { get { return null; } } public abstract string SourceName { get; } /** * A convenience method for use most often with template rewrites. * Convert a List to List * */ public virtual List ToStrings(ICollection tokens) { if (tokens == null) return null; List strings = new List(tokens.Count); foreach (IToken token in tokens) { strings.Add(token.Text); } return strings; } /** * Given a rule number and a start token index number, return * MEMO_RULE_UNKNOWN if the rule has not parsed input starting from * start index. If this rule has parsed input starting from the * start index before, then return where the rule stopped parsing. * It returns the index of the last token matched by the rule. * * * * For now we use a hashtable and just the slow Object-based one. * Later, we can make a special one for ints and also one that * tosses out data after we commit past input position i. * */ public virtual int GetRuleMemoization(int ruleIndex, int ruleStartIndex) { if (state.ruleMemo[ruleIndex] == null) { state.ruleMemo[ruleIndex] = new Dictionary(); } int stopIndex; if (!state.ruleMemo[ruleIndex].TryGetValue(ruleStartIndex, out stopIndex)) return MemoRuleUnknown; return stopIndex; } /** * Has this rule already parsed input at the current index in the * input stream? Return the stop token index or MEMO_RULE_UNKNOWN. * If we attempted but failed to parse properly before, return * MEMO_RULE_FAILED. * * * * This method has a side-effect: if we have seen this input for * this rule and successfully parsed before, then seek ahead to * 1 past the stop token matched for this rule last time. * */ public virtual bool AlreadyParsedRule(IIntStream input, int ruleIndex) { int stopIndex = GetRuleMemoization(ruleIndex, input.Index); if (stopIndex == MemoRuleUnknown) { return false; } if (stopIndex == MemoRuleFailed) { //System.out.println("rule "+ruleIndex+" will never succeed"); state.failed = true; } else { //System.out.println("seen rule "+ruleIndex+" before; skipping ahead to @"+(stopIndex+1)+" failed="+state.failed); input.Seek(stopIndex + 1); // jump to one past stop token } return true; } /** * Record whether or not this rule parsed the input at this position * successfully. Use a standard java hashtable for now. * */ public virtual void Memoize(IIntStream input, int ruleIndex, int ruleStartIndex) { int stopTokenIndex = state.failed ? MemoRuleFailed : input.Index - 1; if (state.ruleMemo == null) { if (TraceDestination != null) TraceDestination.WriteLine("!!!!!!!!! memo array is null for " + GrammarFileName); } if (ruleIndex >= state.ruleMemo.Length) { if (TraceDestination != null) TraceDestination.WriteLine("!!!!!!!!! memo size is " + state.ruleMemo.Length + ", but rule index is " + ruleIndex); } if (state.ruleMemo[ruleIndex] != null) { state.ruleMemo[ruleIndex][ruleStartIndex] = stopTokenIndex; } } /** return how many rule/input-index pairs there are in total. * TODO: this includes synpreds. :( */ public virtual int GetRuleMemoizationCacheSize() { int n = 0; for (int i = 0; state.ruleMemo != null && i < state.ruleMemo.Length; i++) { var ruleMap = state.ruleMemo[i]; if (ruleMap != null) { n += ruleMap.Count; // how many input indexes are recorded? } } return n; } [Conditional("ANTLR_TRACE")] public virtual void TraceIn(string ruleName, int ruleIndex, object inputSymbol) { if (TraceDestination == null) return; TraceDestination.Write("enter " + ruleName + " " + inputSymbol); if (state.backtracking > 0) { TraceDestination.Write(" backtracking=" + state.backtracking); } TraceDestination.WriteLine(); } [Conditional("ANTLR_TRACE")] public virtual void TraceOut(string ruleName, int ruleIndex, object inputSymbol) { if (TraceDestination == null) return; TraceDestination.Write("exit " + ruleName + " " + inputSymbol); if (state.backtracking > 0) { TraceDestination.Write(" backtracking=" + state.backtracking); if (state.failed) TraceDestination.Write(" failed"); else TraceDestination.Write(" succeeded"); } TraceDestination.WriteLine(); } #region Debugging support public virtual IDebugEventListener DebugListener { get { return null; } } [Conditional("ANTLR_DEBUG")] protected virtual void DebugEnterRule(string grammarFileName, string ruleName) { IDebugEventListener dbg = DebugListener; if (dbg != null) dbg.EnterRule(grammarFileName, ruleName); } [Conditional("ANTLR_DEBUG")] protected virtual void DebugExitRule(string grammarFileName, string ruleName) { IDebugEventListener dbg = DebugListener; if (dbg != null) dbg.ExitRule(grammarFileName, ruleName); } [Conditional("ANTLR_DEBUG")] protected virtual void DebugEnterSubRule(int decisionNumber) { IDebugEventListener dbg = DebugListener; if (dbg != null) dbg.EnterSubRule(decisionNumber); } [Conditional("ANTLR_DEBUG")] protected virtual void DebugExitSubRule(int decisionNumber) { IDebugEventListener dbg = DebugListener; if (dbg != null) dbg.ExitSubRule(decisionNumber); } [Conditional("ANTLR_DEBUG")] protected virtual void DebugEnterAlt(int alt) { IDebugEventListener dbg = DebugListener; if (dbg != null) dbg.EnterAlt(alt); } [Conditional("ANTLR_DEBUG")] protected virtual void DebugEnterDecision(int decisionNumber, bool couldBacktrack) { IDebugEventListener dbg = DebugListener; if (dbg != null) dbg.EnterDecision(decisionNumber, couldBacktrack); } [Conditional("ANTLR_DEBUG")] protected virtual void DebugExitDecision(int decisionNumber) { IDebugEventListener dbg = DebugListener; if (dbg != null) dbg.ExitDecision(decisionNumber); } [Conditional("ANTLR_DEBUG")] protected virtual void DebugLocation(int line, int charPositionInLine) { IDebugEventListener dbg = DebugListener; if (dbg != null) dbg.Location(line, charPositionInLine); } [Conditional("ANTLR_DEBUG")] protected virtual void DebugSemanticPredicate(bool result, string predicate) { IDebugEventListener dbg = DebugListener; if (dbg != null) dbg.SemanticPredicate(result, predicate); } [Conditional("ANTLR_DEBUG")] protected virtual void DebugBeginBacktrack(int level) { IDebugEventListener dbg = DebugListener; if (dbg != null) dbg.BeginBacktrack(level); } [Conditional("ANTLR_DEBUG")] protected virtual void DebugEndBacktrack(int level, bool successful) { IDebugEventListener dbg = DebugListener; if (dbg != null) dbg.EndBacktrack(level, successful); } [Conditional("ANTLR_DEBUG")] protected virtual void DebugRecognitionException(RecognitionException ex) { IDebugEventListener dbg = DebugListener; if (dbg != null) dbg.RecognitionException(ex); } #endregion } }