• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * [The "BSD licence"]
3  * Copyright (c) 2005-2008 Terence Parr
4  * All rights reserved.
5  *
6  * Conversion to C#:
7  * Copyright (c) 2008-2009 Sam Harwell, Pixel Mine, Inc.
8  * All rights reserved.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. The name of the author may not be used to endorse or promote products
19  *    derived from this software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 namespace Antlr.Runtime {
34     using System.Collections.Generic;
35 
36     using ArgumentNullException = System.ArgumentNullException;
37     using Array = System.Array;
38     using Conditional = System.Diagnostics.ConditionalAttribute;
39     using Exception = System.Exception;
40     using IDebugEventListener = Antlr.Runtime.Debug.IDebugEventListener;
41     using MethodBase = System.Reflection.MethodBase;
42     using NotSupportedException = System.NotSupportedException;
43     using Regex = System.Text.RegularExpressions.Regex;
44     using StackFrame = System.Diagnostics.StackFrame;
45     using StackTrace = System.Diagnostics.StackTrace;
46     using TextWriter = System.IO.TextWriter;
47     using Type = System.Type;
48 
49     /** <summary>
50      *  A generic recognizer that can handle recognizers generated from
51      *  lexer, parser, and tree grammars.  This is all the parsing
52      *  support code essentially; most of it is error recovery stuff and
53      *  backtracking.
54      *  </summary>
55      */
56     public abstract class BaseRecognizer {
57         public const int MemoRuleFailed = -2;
58         public const int MemoRuleUnknown = -1;
59         public const int InitialFollowStackSize = 100;
60 
61         // copies from Token object for convenience in actions
62         public const int DefaultTokenChannel = TokenChannels.Default;
63         public const int Hidden = TokenChannels.Hidden;
64 
65         public const string NextTokenRuleName = "nextToken";
66 
67         /** <summary>
68          *  State of a lexer, parser, or tree parser are collected into a state
69          *  object so the state can be shared.  This sharing is needed to
70          *  have one grammar import others and share same error variables
71          *  and other state variables.  It's a kind of explicit multiple
72          *  inheritance via delegation of methods and shared state.
73          *  </summary>
74          */
75         protected internal RecognizerSharedState state;
76 
BaseRecognizer()77         public BaseRecognizer()
78             : this(new RecognizerSharedState()) {
79         }
80 
BaseRecognizer(RecognizerSharedState state)81         public BaseRecognizer(RecognizerSharedState state) {
82             if (state == null) {
83                 state = new RecognizerSharedState();
84             }
85             this.state = state;
86             InitDFAs();
87         }
88 
89         public TextWriter TraceDestination {
90             get;
91             set;
92         }
93 
InitDFAs()94         protected virtual void InitDFAs() {
95         }
96 
97         /** <summary>reset the parser's state; subclasses must rewinds the input stream</summary> */
Reset()98         public virtual void Reset() {
99             // wack everything related to error recovery
100             if (state == null) {
101                 return; // no shared state work to do
102             }
103             state._fsp = -1;
104             state.errorRecovery = false;
105             state.lastErrorIndex = -1;
106             state.failed = false;
107             state.syntaxErrors = 0;
108             // wack everything related to backtracking and memoization
109             state.backtracking = 0;
110             for (int i = 0; state.ruleMemo != null && i < state.ruleMemo.Length; i++) { // wipe cache
111                 state.ruleMemo[i] = null;
112             }
113         }
114 
115 
116         /** <summary>
117          *  Match current input symbol against ttype.  Attempt
118          *  single token insertion or deletion error recovery.  If
119          *  that fails, throw MismatchedTokenException.
120          *  </summary>
121          *
122          *  <remarks>
123          *  To turn off single token insertion or deletion error
124          *  recovery, override recoverFromMismatchedToken() and have it
125          *  throw an exception. See TreeParser.recoverFromMismatchedToken().
126          *  This way any error in a rule will cause an exception and
127          *  immediate exit from rule.  Rule would recover by resynchronizing
128          *  to the set of symbols that can follow rule ref.
129          *  </remarks>
130          */
Match(IIntStream input, int ttype, BitSet follow)131         public virtual object Match(IIntStream input, int ttype, BitSet follow) {
132             //System.out.println("match "+((TokenStream)input).LT(1));
133             object matchedSymbol = GetCurrentInputSymbol(input);
134             if (input.LA(1) == ttype) {
135                 input.Consume();
136                 state.errorRecovery = false;
137                 state.failed = false;
138                 return matchedSymbol;
139             }
140             if (state.backtracking > 0) {
141                 state.failed = true;
142                 return matchedSymbol;
143             }
144             matchedSymbol = RecoverFromMismatchedToken(input, ttype, follow);
145             return matchedSymbol;
146         }
147 
148         /** <summary>Match the wildcard: in a symbol</summary> */
MatchAny(IIntStream input)149         public virtual void MatchAny(IIntStream input) {
150             state.errorRecovery = false;
151             state.failed = false;
152             input.Consume();
153         }
154 
MismatchIsUnwantedToken(IIntStream input, int ttype)155         public virtual bool MismatchIsUnwantedToken(IIntStream input, int ttype) {
156             return input.LA(2) == ttype;
157         }
158 
MismatchIsMissingToken(IIntStream input, BitSet follow)159         public virtual bool MismatchIsMissingToken(IIntStream input, BitSet follow) {
160             if (follow == null) {
161                 // we have no information about the follow; we can only consume
162                 // a single token and hope for the best
163                 return false;
164             }
165             // compute what can follow this grammar element reference
166             if (follow.Member(TokenTypes.EndOfRule)) {
167                 BitSet viableTokensFollowingThisRule = ComputeContextSensitiveRuleFOLLOW();
168                 follow = follow.Or(viableTokensFollowingThisRule);
169                 if (state._fsp >= 0) { // remove EOR if we're not the start symbol
170                     follow.Remove(TokenTypes.EndOfRule);
171                 }
172             }
173             // if current token is consistent with what could come after set
174             // then we know we're missing a token; error recovery is free to
175             // "insert" the missing token
176 
177             //System.out.println("viable tokens="+follow.toString(getTokenNames()));
178             //System.out.println("LT(1)="+((TokenStream)input).LT(1));
179 
180             // BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR
181             // in follow set to indicate that the fall of the start symbol is
182             // in the set (EOF can follow).
183             if (follow.Member(input.LA(1)) || follow.Member(TokenTypes.EndOfRule)) {
184                 //System.out.println("LT(1)=="+((TokenStream)input).LT(1)+" is consistent with what follows; inserting...");
185                 return true;
186             }
187             return false;
188         }
189 
190         /** <summary>Report a recognition problem.</summary>
191          *
192          *  <remarks>
193          *  This method sets errorRecovery to indicate the parser is recovering
194          *  not parsing.  Once in recovery mode, no errors are generated.
195          *  To get out of recovery mode, the parser must successfully match
196          *  a token (after a resync).  So it will go:
197          *
198          * 		1. error occurs
199          * 		2. enter recovery mode, report error
200          * 		3. consume until token found in resynch set
201          * 		4. try to resume parsing
202          * 		5. next match() will reset errorRecovery mode
203          *
204          *  If you override, make sure to update syntaxErrors if you care about that.
205          *  </remarks>
206          */
ReportError(RecognitionException e)207         public virtual void ReportError(RecognitionException e) {
208             // if we've already reported an error and have not matched a token
209             // yet successfully, don't report any errors.
210             if (state.errorRecovery) {
211                 //System.err.print("[SPURIOUS] ");
212                 return;
213             }
214             state.syntaxErrors++; // don't count spurious
215             state.errorRecovery = true;
216 
217             DisplayRecognitionError(this.TokenNames, e);
218         }
219 
DisplayRecognitionError(string[] tokenNames, RecognitionException e)220         public virtual void DisplayRecognitionError(string[] tokenNames,
221                                             RecognitionException e) {
222             string hdr = GetErrorHeader(e);
223             string msg = GetErrorMessage(e, tokenNames);
224             EmitErrorMessage(hdr + " " + msg);
225         }
226 
227         /** <summary>What error message should be generated for the various exception types?</summary>
228          *
229          *  <remarks>
230          *  Not very object-oriented code, but I like having all error message
231          *  generation within one method rather than spread among all of the
232          *  exception classes. This also makes it much easier for the exception
233          *  handling because the exception classes do not have to have pointers back
234          *  to this object to access utility routines and so on. Also, changing
235          *  the message for an exception type would be difficult because you
236          *  would have to subclassing exception, but then somehow get ANTLR
237          *  to make those kinds of exception objects instead of the default.
238          *  This looks weird, but trust me--it makes the most sense in terms
239          *  of flexibility.
240          *
241          *  For grammar debugging, you will want to override this to add
242          *  more information such as the stack frame with
243          *  getRuleInvocationStack(e, this.getClass().getName()) and,
244          *  for no viable alts, the decision description and state etc...
245          *
246          *  Override this to change the message generated for one or more
247          *  exception types.
248          *  </remarks>
249          */
GetErrorMessage(RecognitionException e, string[] tokenNames)250         public virtual string GetErrorMessage(RecognitionException e, string[] tokenNames) {
251             string msg = e.Message;
252             if (e is UnwantedTokenException) {
253                 UnwantedTokenException ute = (UnwantedTokenException)e;
254                 string tokenName = "<unknown>";
255                 if (ute.Expecting == TokenTypes.EndOfFile) {
256                     tokenName = "EndOfFile";
257                 } else {
258                     tokenName = tokenNames[ute.Expecting];
259                 }
260                 msg = "extraneous input " + GetTokenErrorDisplay(ute.UnexpectedToken) +
261                     " expecting " + tokenName;
262             } else if (e is MissingTokenException) {
263                 MissingTokenException mte = (MissingTokenException)e;
264                 string tokenName = "<unknown>";
265                 if (mte.Expecting == TokenTypes.EndOfFile) {
266                     tokenName = "EndOfFile";
267                 } else {
268                     tokenName = tokenNames[mte.Expecting];
269                 }
270                 msg = "missing " + tokenName + " at " + GetTokenErrorDisplay(e.Token);
271             } else if (e is MismatchedTokenException) {
272                 MismatchedTokenException mte = (MismatchedTokenException)e;
273                 string tokenName = "<unknown>";
274                 if (mte.Expecting == TokenTypes.EndOfFile) {
275                     tokenName = "EndOfFile";
276                 } else {
277                     tokenName = tokenNames[mte.Expecting];
278                 }
279                 msg = "mismatched input " + GetTokenErrorDisplay(e.Token) +
280                     " expecting " + tokenName;
281             } else if (e is MismatchedTreeNodeException) {
282                 MismatchedTreeNodeException mtne = (MismatchedTreeNodeException)e;
283                 string tokenName = "<unknown>";
284                 if (mtne.Expecting == TokenTypes.EndOfFile) {
285                     tokenName = "EndOfFile";
286                 } else {
287                     tokenName = tokenNames[mtne.Expecting];
288                 }
289                 // workaround for a .NET framework bug (NullReferenceException)
290                 string nodeText = (mtne.Node != null) ? mtne.Node.ToString() ?? string.Empty : string.Empty;
291                 msg = "mismatched tree node: " + nodeText + " expecting " + tokenName;
292             } else if (e is NoViableAltException) {
293                 //NoViableAltException nvae = (NoViableAltException)e;
294                 // for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>"
295                 // and "(decision="+nvae.decisionNumber+") and
296                 // "state "+nvae.stateNumber
297                 msg = "no viable alternative at input " + GetTokenErrorDisplay(e.Token);
298             } else if (e is EarlyExitException) {
299                 //EarlyExitException eee = (EarlyExitException)e;
300                 // for development, can add "(decision="+eee.decisionNumber+")"
301                 msg = "required (...)+ loop did not match anything at input " +
302                     GetTokenErrorDisplay(e.Token);
303             } else if (e is MismatchedSetException) {
304                 MismatchedSetException mse = (MismatchedSetException)e;
305                 msg = "mismatched input " + GetTokenErrorDisplay(e.Token) +
306                     " expecting set " + mse.Expecting;
307             } else if (e is MismatchedNotSetException) {
308                 MismatchedNotSetException mse = (MismatchedNotSetException)e;
309                 msg = "mismatched input " + GetTokenErrorDisplay(e.Token) +
310                     " expecting set " + mse.Expecting;
311             } else if (e is FailedPredicateException) {
312                 FailedPredicateException fpe = (FailedPredicateException)e;
313                 msg = "rule " + fpe.RuleName + " failed predicate: {" +
314                     fpe.PredicateText + "}?";
315             }
316             return msg;
317         }
318 
319         /** <summary>
320          *  Get number of recognition errors (lexer, parser, tree parser).  Each
321          *  recognizer tracks its own number.  So parser and lexer each have
322          *  separate count.  Does not count the spurious errors found between
323          *  an error and next valid token match
324          *  </summary>
325          *
326          *  <seealso cref="reportError()"/>
327          */
328         public virtual int NumberOfSyntaxErrors {
329             get {
330                 return state.syntaxErrors;
331             }
332         }
333 
334         /** <summary>What is the error header, normally line/character position information?</summary> */
GetErrorHeader(RecognitionException e)335         public virtual string GetErrorHeader(RecognitionException e) {
336             return "line " + e.Line + ":" + (e.CharPositionInLine + 1);
337         }
338 
339         /** <summary>
340          *  How should a token be displayed in an error message? The default
341          *  is to display just the text, but during development you might
342          *  want to have a lot of information spit out.  Override in that case
343          *  to use t.ToString() (which, for CommonToken, dumps everything about
344          *  the token). This is better than forcing you to override a method in
345          *  your token objects because you don't have to go modify your lexer
346          *  so that it creates a new Java type.
347          *  </summary>
348          */
GetTokenErrorDisplay(IToken t)349         public virtual string GetTokenErrorDisplay(IToken t) {
350             string s = t.Text;
351             if (s == null) {
352                 if (t.Type == TokenTypes.EndOfFile) {
353                     s = "<EOF>";
354                 } else {
355                     s = "<" + t.Type + ">";
356                 }
357             }
358             s = Regex.Replace(s, "\n", "\\\\n");
359             s = Regex.Replace(s, "\r", "\\\\r");
360             s = Regex.Replace(s, "\t", "\\\\t");
361             return "'" + s + "'";
362         }
363 
364         /** <summary>Override this method to change where error messages go</summary> */
EmitErrorMessage(string msg)365         public virtual void EmitErrorMessage(string msg) {
366             if (TraceDestination != null)
367                 TraceDestination.WriteLine(msg);
368         }
369 
370         /** <summary>
371          *  Recover from an error found on the input stream.  This is
372          *  for NoViableAlt and mismatched symbol exceptions.  If you enable
373          *  single token insertion and deletion, this will usually not
374          *  handle mismatched symbol exceptions but there could be a mismatched
375          *  token that the match() routine could not recover from.
376          *  </summary>
377          */
Recover(IIntStream input, RecognitionException re)378         public virtual void Recover(IIntStream input, RecognitionException re) {
379             if (state.lastErrorIndex == input.Index) {
380                 // uh oh, another error at same token index; must be a case
381                 // where LT(1) is in the recovery token set so nothing is
382                 // consumed; consume a single token so at least to prevent
383                 // an infinite loop; this is a failsafe.
384                 input.Consume();
385             }
386             state.lastErrorIndex = input.Index;
387             BitSet followSet = ComputeErrorRecoverySet();
388             BeginResync();
389             ConsumeUntil(input, followSet);
390             EndResync();
391         }
392 
393         /** <summary>
394          *  A hook to listen in on the token consumption during error recovery.
395          *  The DebugParser subclasses this to fire events to the listenter.
396          *  </summary>
397          */
BeginResync()398         public virtual void BeginResync() {
399         }
400 
EndResync()401         public virtual void EndResync() {
402         }
403 
404         /*  Compute the error recovery set for the current rule.  During
405          *  rule invocation, the parser pushes the set of tokens that can
406          *  follow that rule reference on the stack; this amounts to
407          *  computing FIRST of what follows the rule reference in the
408          *  enclosing rule. This local follow set only includes tokens
409          *  from within the rule; i.e., the FIRST computation done by
410          *  ANTLR stops at the end of a rule.
411          *
412          *  EXAMPLE
413          *
414          *  When you find a "no viable alt exception", the input is not
415          *  consistent with any of the alternatives for rule r.  The best
416          *  thing to do is to consume tokens until you see something that
417          *  can legally follow a call to r *or* any rule that called r.
418          *  You don't want the exact set of viable next tokens because the
419          *  input might just be missing a token--you might consume the
420          *  rest of the input looking for one of the missing tokens.
421          *
422          *  Consider grammar:
423          *
424          *  a : '[' b ']'
425          *    | '(' b ')'
426          *    ;
427          *  b : c '^' INT ;
428          *  c : ID
429          *    | INT
430          *    ;
431          *
432          *  At each rule invocation, the set of tokens that could follow
433          *  that rule is pushed on a stack.  Here are the various "local"
434          *  follow sets:
435          *
436          *  FOLLOW(b1_in_a) = FIRST(']') = ']'
437          *  FOLLOW(b2_in_a) = FIRST(')') = ')'
438          *  FOLLOW(c_in_b) = FIRST('^') = '^'
439          *
440          *  Upon erroneous input "[]", the call chain is
441          *
442          *  a -> b -> c
443          *
444          *  and, hence, the follow context stack is:
445          *
446          *  depth  local follow set     after call to rule
447          *    0         <EOF>                    a (from main())
448          *    1          ']'                     b
449          *    3          '^'                     c
450          *
451          *  Notice that ')' is not included, because b would have to have
452          *  been called from a different context in rule a for ')' to be
453          *  included.
454          *
455          *  For error recovery, we cannot consider FOLLOW(c)
456          *  (context-sensitive or otherwise).  We need the combined set of
457          *  all context-sensitive FOLLOW sets--the set of all tokens that
458          *  could follow any reference in the call chain.  We need to
459          *  resync to one of those tokens.  Note that FOLLOW(c)='^' and if
460          *  we resync'd to that token, we'd consume until EOF.  We need to
461          *  sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
462          *  In this case, for input "[]", LA(1) is in this set so we would
463          *  not consume anything and after printing an error rule c would
464          *  return normally.  It would not find the required '^' though.
465          *  At this point, it gets a mismatched token error and throws an
466          *  exception (since LA(1) is not in the viable following token
467          *  set).  The rule exception handler tries to recover, but finds
468          *  the same recovery set and doesn't consume anything.  Rule b
469          *  exits normally returning to rule a.  Now it finds the ']' (and
470          *  with the successful match exits errorRecovery mode).
471          *
472          *  So, you cna see that the parser walks up call chain looking
473          *  for the token that was a member of the recovery set.
474          *
475          *  Errors are not generated in errorRecovery mode.
476          *
477          *  ANTLR's error recovery mechanism is based upon original ideas:
478          *
479          *  "Algorithms + Data Structures = Programs" by Niklaus Wirth
480          *
481          *  and
482          *
483          *  "A note on error recovery in recursive descent parsers":
484          *  http://portal.acm.org/citation.cfm?id=947902.947905
485          *
486          *  Later, Josef Grosch had some good ideas:
487          *
488          *  "Efficient and Comfortable Error Recovery in Recursive Descent
489          *  Parsers":
490          *  ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
491          *
492          *  Like Grosch I implemented local FOLLOW sets that are combined
493          *  at run-time upon error to avoid overhead during parsing.
494          */
ComputeErrorRecoverySet()495         protected virtual BitSet ComputeErrorRecoverySet() {
496             return CombineFollows(false);
497         }
498 
499         /** <summary>
500          *  Compute the context-sensitive FOLLOW set for current rule.
501          *  This is set of token types that can follow a specific rule
502          *  reference given a specific call chain.  You get the set of
503          *  viable tokens that can possibly come next (lookahead depth 1)
504          *  given the current call chain.  Contrast this with the
505          *  definition of plain FOLLOW for rule r:
506          *  </summary>
507          *
508          *   FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)}
509          *
510          *  where x in T* and alpha, beta in V*; T is set of terminals and
511          *  V is the set of terminals and nonterminals.  In other words,
512          *  FOLLOW(r) is the set of all tokens that can possibly follow
513          *  references to r in *any* sentential form (context).  At
514          *  runtime, however, we know precisely which context applies as
515          *  we have the call chain.  We may compute the exact (rather
516          *  than covering superset) set of following tokens.
517          *
518          *  For example, consider grammar:
519          *
520          *  stat : ID '=' expr ';'      // FOLLOW(stat)=={EOF}
521          *       | "return" expr '.'
522          *       ;
523          *  expr : atom ('+' atom)* ;   // FOLLOW(expr)=={';','.',')'}
524          *  atom : INT                  // FOLLOW(atom)=={'+',')',';','.'}
525          *       | '(' expr ')'
526          *       ;
527          *
528          *  The FOLLOW sets are all inclusive whereas context-sensitive
529          *  FOLLOW sets are precisely what could follow a rule reference.
530          *  For input input "i=(3);", here is the derivation:
531          *
532          *  stat => ID '=' expr ';'
533          *       => ID '=' atom ('+' atom)* ';'
534          *       => ID '=' '(' expr ')' ('+' atom)* ';'
535          *       => ID '=' '(' atom ')' ('+' atom)* ';'
536          *       => ID '=' '(' INT ')' ('+' atom)* ';'
537          *       => ID '=' '(' INT ')' ';'
538          *
539          *  At the "3" token, you'd have a call chain of
540          *
541          *    stat -> expr -> atom -> expr -> atom
542          *
543          *  What can follow that specific nested ref to atom?  Exactly ')'
544          *  as you can see by looking at the derivation of this specific
545          *  input.  Contrast this with the FOLLOW(atom)={'+',')',';','.'}.
546          *
547          *  You want the exact viable token set when recovering from a
548          *  token mismatch.  Upon token mismatch, if LA(1) is member of
549          *  the viable next token set, then you know there is most likely
550          *  a missing token in the input stream.  "Insert" one by just not
551          *  throwing an exception.
552          */
ComputeContextSensitiveRuleFOLLOW()553         protected virtual BitSet ComputeContextSensitiveRuleFOLLOW() {
554             return CombineFollows(true);
555         }
556 
557         // what is exact? it seems to only add sets from above on stack
558         // if EOR is in set i.  When it sees a set w/o EOR, it stops adding.
559         // Why would we ever want them all?  Maybe no viable alt instead of
560         // mismatched token?
CombineFollows(bool exact)561         protected virtual BitSet CombineFollows(bool exact) {
562             int top = state._fsp;
563             BitSet followSet = new BitSet();
564             for (int i = top; i >= 0; i--) {
565                 BitSet localFollowSet = (BitSet)state.following[i];
566                 /*
567                 System.out.println("local follow depth "+i+"="+
568                                    localFollowSet.toString(getTokenNames())+")");
569                  */
570                 followSet.OrInPlace(localFollowSet);
571                 if (exact) {
572                     // can we see end of rule?
573                     if (localFollowSet.Member(TokenTypes.EndOfRule)) {
574                         // Only leave EOR in set if at top (start rule); this lets
575                         // us know if have to include follow(start rule); i.e., EOF
576                         if (i > 0) {
577                             followSet.Remove(TokenTypes.EndOfRule);
578                         }
579                     } else { // can't see end of rule, quit
580                         break;
581                     }
582                 }
583             }
584             return followSet;
585         }
586 
587         /** <summary>Attempt to recover from a single missing or extra token.</summary>
588          *
589          *  EXTRA TOKEN
590          *
591          *  LA(1) is not what we are looking for.  If LA(2) has the right token,
592          *  however, then assume LA(1) is some extra spurious token.  Delete it
593          *  and LA(2) as if we were doing a normal match(), which advances the
594          *  input.
595          *
596          *  MISSING TOKEN
597          *
598          *  If current token is consistent with what could come after
599          *  ttype then it is ok to "insert" the missing token, else throw
600          *  exception For example, Input "i=(3;" is clearly missing the
601          *  ')'.  When the parser returns from the nested call to expr, it
602          *  will have call chain:
603          *
604          *    stat -> expr -> atom
605          *
606          *  and it will be trying to match the ')' at this point in the
607          *  derivation:
608          *
609          *       => ID '=' '(' INT ')' ('+' atom)* ';'
610          *                          ^
611          *  match() will see that ';' doesn't match ')' and report a
612          *  mismatched token error.  To recover, it sees that LA(1)==';'
613          *  is in the set of tokens that can follow the ')' token
614          *  reference in rule atom.  It can assume that you forgot the ')'.
615          */
RecoverFromMismatchedToken(IIntStream input, int ttype, BitSet follow)616         protected virtual object RecoverFromMismatchedToken(IIntStream input, int ttype, BitSet follow) {
617             RecognitionException e = null;
618             // if next token is what we are looking for then "delete" this token
619             if (MismatchIsUnwantedToken(input, ttype)) {
620                 e = new UnwantedTokenException(ttype, input, TokenNames);
621                 /*
622                 System.err.println("recoverFromMismatchedToken deleting "+
623                                    ((TokenStream)input).LT(1)+
624                                    " since "+((TokenStream)input).LT(2)+" is what we want");
625                  */
626                 BeginResync();
627                 input.Consume(); // simply delete extra token
628                 EndResync();
629                 ReportError(e);  // report after consuming so AW sees the token in the exception
630                 // we want to return the token we're actually matching
631                 object matchedSymbol = GetCurrentInputSymbol(input);
632                 input.Consume(); // move past ttype token as if all were ok
633                 return matchedSymbol;
634             }
635             // can't recover with single token deletion, try insertion
636             if (MismatchIsMissingToken(input, follow)) {
637                 object inserted = GetMissingSymbol(input, e, ttype, follow);
638                 e = new MissingTokenException(ttype, input, inserted);
639                 ReportError(e);  // report after inserting so AW sees the token in the exception
640                 return inserted;
641             }
642             // even that didn't work; must throw the exception
643             e = new MismatchedTokenException(ttype, input, TokenNames);
644             throw e;
645         }
646 
647         /** Not currently used */
RecoverFromMismatchedSet(IIntStream input, RecognitionException e, BitSet follow)648         public virtual object RecoverFromMismatchedSet(IIntStream input,
649                                                RecognitionException e,
650                                                BitSet follow) {
651             if (MismatchIsMissingToken(input, follow)) {
652                 // System.out.println("missing token");
653                 ReportError(e);
654                 // we don't know how to conjure up a token for sets yet
655                 return GetMissingSymbol(input, e, TokenTypes.Invalid, follow);
656             }
657             // TODO do single token deletion like above for Token mismatch
658             throw e;
659         }
660 
661         /** <summary>
662          *  Match needs to return the current input symbol, which gets put
663          *  into the label for the associated token ref; e.g., x=ID.  Token
664          *  and tree parsers need to return different objects. Rather than test
665          *  for input stream type or change the IntStream interface, I use
666          *  a simple method to ask the recognizer to tell me what the current
667          *  input symbol is.
668          *  </summary>
669          *
670          *  <remarks>This is ignored for lexers.</remarks>
671          */
GetCurrentInputSymbol(IIntStream input)672         protected virtual object GetCurrentInputSymbol(IIntStream input) {
673             return null;
674         }
675 
676         /** <summary>Conjure up a missing token during error recovery.</summary>
677          *
678          *  <remarks>
679          *  The recognizer attempts to recover from single missing
680          *  symbols. But, actions might refer to that missing symbol.
681          *  For example, x=ID {f($x);}. The action clearly assumes
682          *  that there has been an identifier matched previously and that
683          *  $x points at that token. If that token is missing, but
684          *  the next token in the stream is what we want we assume that
685          *  this token is missing and we keep going. Because we
686          *  have to return some token to replace the missing token,
687          *  we have to conjure one up. This method gives the user control
688          *  over the tokens returned for missing tokens. Mostly,
689          *  you will want to create something special for identifier
690          *  tokens. For literals such as '{' and ',', the default
691          *  action in the parser or tree parser works. It simply creates
692          *  a CommonToken of the appropriate type. The text will be the token.
693          *  If you change what tokens must be created by the lexer,
694          *  override this method to create the appropriate tokens.
695          *  </remarks>
696          */
GetMissingSymbol(IIntStream input, RecognitionException e, int expectedTokenType, BitSet follow)697         protected virtual object GetMissingSymbol(IIntStream input,
698                                           RecognitionException e,
699                                           int expectedTokenType,
700                                           BitSet follow) {
701             return null;
702         }
703 
ConsumeUntil(IIntStream input, int tokenType)704         public virtual void ConsumeUntil(IIntStream input, int tokenType) {
705             //System.out.println("consumeUntil "+tokenType);
706             int ttype = input.LA(1);
707             while (ttype != TokenTypes.EndOfFile && ttype != tokenType) {
708                 input.Consume();
709                 ttype = input.LA(1);
710             }
711         }
712 
713         /** <summary>Consume tokens until one matches the given token set</summary> */
ConsumeUntil(IIntStream input, BitSet set)714         public virtual void ConsumeUntil(IIntStream input, BitSet set) {
715             //System.out.println("consumeUntil("+set.toString(getTokenNames())+")");
716             int ttype = input.LA(1);
717             while (ttype != TokenTypes.EndOfFile && !set.Member(ttype)) {
718                 //System.out.println("consume during recover LA(1)="+getTokenNames()[input.LA(1)]);
719                 input.Consume();
720                 ttype = input.LA(1);
721             }
722         }
723 
724         /** <summary>Push a rule's follow set using our own hardcoded stack</summary> */
PushFollow(BitSet fset)725         protected void PushFollow(BitSet fset) {
726             if ((state._fsp + 1) >= state.following.Length) {
727                 Array.Resize(ref state.following, state.following.Length * 2);
728             }
729             state.following[++state._fsp] = fset;
730         }
731 
PopFollow()732         protected void PopFollow() {
733             state._fsp--;
734         }
735 
736         /** <summary>
737          *  Return List<String> of the rules in your parser instance
738          *  leading up to a call to this method.  You could override if
739          *  you want more details such as the file/line info of where
740          *  in the parser java code a rule is invoked.
741          *  </summary>
742          *
743          *  <remarks>
744          *  This is very useful for error messages and for context-sensitive
745          *  error recovery.
746          *  </remarks>
747          */
GetRuleInvocationStack()748         public virtual IList<string> GetRuleInvocationStack() {
749             return GetRuleInvocationStack(new StackTrace(true));
750         }
751 
752         /** <summary>
753          *  A more general version of GetRuleInvocationStack where you can
754          *  pass in the StackTrace of, for example, a RecognitionException
755          *  to get it's rule stack trace.
756          *  </summary>
757          */
GetRuleInvocationStack(StackTrace trace)758         public static IList<string> GetRuleInvocationStack(StackTrace trace) {
759             if (trace == null)
760                 throw new ArgumentNullException("trace");
761 
762             List<string> rules = new List<string>();
763             StackFrame[] stack = trace.GetFrames() ?? new StackFrame[0];
764 
765             for (int i = stack.Length - 1; i >= 0; i--) {
766                 StackFrame frame = stack[i];
767                 MethodBase method = frame.GetMethod();
768                 GrammarRuleAttribute[] attributes = (GrammarRuleAttribute[])method.GetCustomAttributes(typeof(GrammarRuleAttribute), true);
769                 if (attributes != null && attributes.Length > 0)
770                     rules.Add(attributes[0].Name);
771             }
772 
773             return rules;
774         }
775 
776         public virtual int BacktrackingLevel {
777             get {
778                 return state.backtracking;
779             }
780             set {
781                 state.backtracking = value;
782             }
783         }
784 
785         /** <summary>Return whether or not a backtracking attempt failed.</summary> */
786         public virtual bool Failed {
787             get {
788                 return state.failed;
789             }
790         }
791 
792         /** <summary>
793          *  Used to print out token names like ID during debugging and
794          *  error reporting.  The generated parsers implement a method
795          *  that overrides this to point to their String[] tokenNames.
796          *  </summary>
797          */
798         public virtual string[] TokenNames {
799             get {
800                 return null;
801             }
802         }
803 
804         /** <summary>
805          *  For debugging and other purposes, might want the grammar name.
806          *  Have ANTLR generate an implementation for this method.
807          *  </summary>
808          */
809         public virtual string GrammarFileName {
810             get {
811                 return null;
812             }
813         }
814 
815         public abstract string SourceName {
816             get;
817         }
818 
819         /** <summary>
820          *  A convenience method for use most often with template rewrites.
821          *  Convert a List<Token> to List<String>
822          *  </summary>
823          */
ToStrings(ICollection<IToken> tokens)824         public virtual List<string> ToStrings(ICollection<IToken> tokens) {
825             if (tokens == null)
826                 return null;
827 
828             List<string> strings = new List<string>(tokens.Count);
829             foreach (IToken token in tokens) {
830                 strings.Add(token.Text);
831             }
832 
833             return strings;
834         }
835 
836         /** <summary>
837          *  Given a rule number and a start token index number, return
838          *  MEMO_RULE_UNKNOWN if the rule has not parsed input starting from
839          *  start index.  If this rule has parsed input starting from the
840          *  start index before, then return where the rule stopped parsing.
841          *  It returns the index of the last token matched by the rule.
842          *  </summary>
843          *
844          *  <remarks>
845          *  For now we use a hashtable and just the slow Object-based one.
846          *  Later, we can make a special one for ints and also one that
847          *  tosses out data after we commit past input position i.
848          *  </remarks>
849          */
GetRuleMemoization(int ruleIndex, int ruleStartIndex)850         public virtual int GetRuleMemoization(int ruleIndex, int ruleStartIndex) {
851             if (state.ruleMemo[ruleIndex] == null) {
852                 state.ruleMemo[ruleIndex] = new Dictionary<int, int>();
853             }
854 
855             int stopIndex;
856             if (!state.ruleMemo[ruleIndex].TryGetValue(ruleStartIndex, out stopIndex))
857                 return MemoRuleUnknown;
858 
859             return stopIndex;
860         }
861 
862         /** <summary>
863          *  Has this rule already parsed input at the current index in the
864          *  input stream?  Return the stop token index or MEMO_RULE_UNKNOWN.
865          *  If we attempted but failed to parse properly before, return
866          *  MEMO_RULE_FAILED.
867          *  </summary>
868          *
869          *  <remarks>
870          *  This method has a side-effect: if we have seen this input for
871          *  this rule and successfully parsed before, then seek ahead to
872          *  1 past the stop token matched for this rule last time.
873          *  </remarks>
874          */
AlreadyParsedRule(IIntStream input, int ruleIndex)875         public virtual bool AlreadyParsedRule(IIntStream input, int ruleIndex) {
876             int stopIndex = GetRuleMemoization(ruleIndex, input.Index);
877             if (stopIndex == MemoRuleUnknown) {
878                 return false;
879             }
880             if (stopIndex == MemoRuleFailed) {
881                 //System.out.println("rule "+ruleIndex+" will never succeed");
882                 state.failed = true;
883             } else {
884                 //System.out.println("seen rule "+ruleIndex+" before; skipping ahead to @"+(stopIndex+1)+" failed="+state.failed);
885                 input.Seek(stopIndex + 1); // jump to one past stop token
886             }
887             return true;
888         }
889 
890         /** <summary>
891          *  Record whether or not this rule parsed the input at this position
892          *  successfully.  Use a standard java hashtable for now.
893          *  </summary>
894          */
Memoize(IIntStream input, int ruleIndex, int ruleStartIndex)895         public virtual void Memoize(IIntStream input,
896                             int ruleIndex,
897                             int ruleStartIndex) {
898             int stopTokenIndex = state.failed ? MemoRuleFailed : input.Index - 1;
899             if (state.ruleMemo == null) {
900                 if (TraceDestination != null)
901                     TraceDestination.WriteLine("!!!!!!!!! memo array is null for " + GrammarFileName);
902             }
903             if (ruleIndex >= state.ruleMemo.Length) {
904                 if (TraceDestination != null)
905                     TraceDestination.WriteLine("!!!!!!!!! memo size is " + state.ruleMemo.Length + ", but rule index is " + ruleIndex);
906             }
907             if (state.ruleMemo[ruleIndex] != null) {
908                 state.ruleMemo[ruleIndex][ruleStartIndex] = stopTokenIndex;
909             }
910         }
911 
912         /** <summary>return how many rule/input-index pairs there are in total.</summary>
913          *  TODO: this includes synpreds. :(
914          */
GetRuleMemoizationCacheSize()915         public virtual int GetRuleMemoizationCacheSize() {
916             int n = 0;
917             for (int i = 0; state.ruleMemo != null && i < state.ruleMemo.Length; i++) {
918                 var ruleMap = state.ruleMemo[i];
919                 if (ruleMap != null) {
920                     n += ruleMap.Count; // how many input indexes are recorded?
921                 }
922             }
923             return n;
924         }
925 
926         [Conditional("ANTLR_TRACE")]
TraceIn(string ruleName, int ruleIndex, object inputSymbol)927         public virtual void TraceIn(string ruleName, int ruleIndex, object inputSymbol) {
928             if (TraceDestination == null)
929                 return;
930 
931             TraceDestination.Write("enter " + ruleName + " " + inputSymbol);
932             if (state.backtracking > 0) {
933                 TraceDestination.Write(" backtracking=" + state.backtracking);
934             }
935             TraceDestination.WriteLine();
936         }
937 
938         [Conditional("ANTLR_TRACE")]
TraceOut(string ruleName, int ruleIndex, object inputSymbol)939         public virtual void TraceOut(string ruleName, int ruleIndex, object inputSymbol) {
940             if (TraceDestination == null)
941                 return;
942 
943             TraceDestination.Write("exit " + ruleName + " " + inputSymbol);
944             if (state.backtracking > 0) {
945                 TraceDestination.Write(" backtracking=" + state.backtracking);
946                 if (state.failed)
947                     TraceDestination.Write(" failed");
948                 else
949                     TraceDestination.Write(" succeeded");
950             }
951             TraceDestination.WriteLine();
952         }
953 
954         #region Debugging support
955         public virtual IDebugEventListener DebugListener {
956             get {
957                 return null;
958             }
959         }
960 
961         [Conditional("ANTLR_DEBUG")]
DebugEnterRule(string grammarFileName, string ruleName)962         protected virtual void DebugEnterRule(string grammarFileName, string ruleName) {
963             IDebugEventListener dbg = DebugListener;
964             if (dbg != null)
965                 dbg.EnterRule(grammarFileName, ruleName);
966         }
967 
968         [Conditional("ANTLR_DEBUG")]
DebugExitRule(string grammarFileName, string ruleName)969         protected virtual void DebugExitRule(string grammarFileName, string ruleName) {
970             IDebugEventListener dbg = DebugListener;
971             if (dbg != null)
972                 dbg.ExitRule(grammarFileName, ruleName);
973         }
974 
975         [Conditional("ANTLR_DEBUG")]
DebugEnterSubRule(int decisionNumber)976         protected virtual void DebugEnterSubRule(int decisionNumber) {
977             IDebugEventListener dbg = DebugListener;
978             if (dbg != null)
979                 dbg.EnterSubRule(decisionNumber);
980         }
981 
982         [Conditional("ANTLR_DEBUG")]
DebugExitSubRule(int decisionNumber)983         protected virtual void DebugExitSubRule(int decisionNumber) {
984             IDebugEventListener dbg = DebugListener;
985             if (dbg != null)
986                 dbg.ExitSubRule(decisionNumber);
987         }
988 
989         [Conditional("ANTLR_DEBUG")]
DebugEnterAlt(int alt)990         protected virtual void DebugEnterAlt(int alt) {
991             IDebugEventListener dbg = DebugListener;
992             if (dbg != null)
993                 dbg.EnterAlt(alt);
994         }
995 
996         [Conditional("ANTLR_DEBUG")]
DebugEnterDecision(int decisionNumber, bool couldBacktrack)997         protected virtual void DebugEnterDecision(int decisionNumber, bool couldBacktrack) {
998             IDebugEventListener dbg = DebugListener;
999             if (dbg != null)
1000                 dbg.EnterDecision(decisionNumber, couldBacktrack);
1001         }
1002 
1003         [Conditional("ANTLR_DEBUG")]
DebugExitDecision(int decisionNumber)1004         protected virtual void DebugExitDecision(int decisionNumber) {
1005             IDebugEventListener dbg = DebugListener;
1006             if (dbg != null)
1007                 dbg.ExitDecision(decisionNumber);
1008         }
1009 
1010         [Conditional("ANTLR_DEBUG")]
DebugLocation(int line, int charPositionInLine)1011         protected virtual void DebugLocation(int line, int charPositionInLine) {
1012             IDebugEventListener dbg = DebugListener;
1013             if (dbg != null)
1014                 dbg.Location(line, charPositionInLine);
1015         }
1016 
1017         [Conditional("ANTLR_DEBUG")]
DebugSemanticPredicate(bool result, string predicate)1018         protected virtual void DebugSemanticPredicate(bool result, string predicate) {
1019             IDebugEventListener dbg = DebugListener;
1020             if (dbg != null)
1021                 dbg.SemanticPredicate(result, predicate);
1022         }
1023 
1024         [Conditional("ANTLR_DEBUG")]
DebugBeginBacktrack(int level)1025         protected virtual void DebugBeginBacktrack(int level) {
1026             IDebugEventListener dbg = DebugListener;
1027             if (dbg != null)
1028                 dbg.BeginBacktrack(level);
1029         }
1030 
1031         [Conditional("ANTLR_DEBUG")]
DebugEndBacktrack(int level, bool successful)1032         protected virtual void DebugEndBacktrack(int level, bool successful) {
1033             IDebugEventListener dbg = DebugListener;
1034             if (dbg != null)
1035                 dbg.EndBacktrack(level, successful);
1036         }
1037 
1038         [Conditional("ANTLR_DEBUG")]
DebugRecognitionException(RecognitionException ex)1039         protected virtual void DebugRecognitionException(RecognitionException ex) {
1040             IDebugEventListener dbg = DebugListener;
1041             if (dbg != null)
1042                 dbg.RecognitionException(ex);
1043         }
1044         #endregion
1045     }
1046 }
1047