• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * [The "BSD licence"]
3  * Copyright (c) 2005-2008 Terence Parr
4  * All rights reserved.
5  *
6  * Conversion to C#:
7  * Copyright (c) 2008-2009 Sam Harwell, Pixel Mine, Inc.
8  * All rights reserved.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. The name of the author may not be used to endorse or promote products
19  *    derived from this software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 namespace Antlr.Runtime
34 {
35     using System.Collections.Generic;
36 
37     using ArgumentNullException = System.ArgumentNullException;
38     using Array = System.Array;
39     using Conditional = System.Diagnostics.ConditionalAttribute;
40     using Exception = System.Exception;
41     using IDebugEventListener = Antlr.Runtime.Debug.IDebugEventListener;
42     using MethodBase = System.Reflection.MethodBase;
43     using NotSupportedException = System.NotSupportedException;
44     using Regex = System.Text.RegularExpressions.Regex;
45     using StackFrame = System.Diagnostics.StackFrame;
46     using StackTrace = System.Diagnostics.StackTrace;
47     using TextWriter = System.IO.TextWriter;
48     using Type = System.Type;
49 
50     /** <summary>
51      *  A generic recognizer that can handle recognizers generated from
52      *  lexer, parser, and tree grammars.  This is all the parsing
53      *  support code essentially; most of it is error recovery stuff and
54      *  backtracking.
55      *  </summary>
56      */
57     public abstract class BaseRecognizer
58     {
59         public const int MemoRuleFailed = -2;
60         public const int MemoRuleUnknown = -1;
61         public const int InitialFollowStackSize = 100;
62 
63         // copies from Token object for convenience in actions
64         public const int DefaultTokenChannel = TokenChannels.Default;
65         public const int Hidden = TokenChannels.Hidden;
66 
67         public const string NextTokenRuleName = "nextToken";
68 
69         /** <summary>
70          *  State of a lexer, parser, or tree parser are collected into a state
71          *  object so the state can be shared.  This sharing is needed to
72          *  have one grammar import others and share same error variables
73          *  and other state variables.  It's a kind of explicit multiple
74          *  inheritance via delegation of methods and shared state.
75          *  </summary>
76          */
77         protected internal RecognizerSharedState state;
78 
BaseRecognizer()79         public BaseRecognizer()
80             : this(new RecognizerSharedState())
81         {
82         }
83 
BaseRecognizer( RecognizerSharedState state )84         public BaseRecognizer( RecognizerSharedState state )
85         {
86             if ( state == null )
87             {
88                 state = new RecognizerSharedState();
89             }
90             this.state = state;
91             InitDFAs();
92         }
93 
94         public TextWriter TraceDestination
95         {
96             get;
97             set;
98         }
99 
InitDFAs()100         protected virtual void InitDFAs()
101         {
102         }
103 
104         /** <summary>reset the parser's state; subclasses must rewinds the input stream</summary> */
Reset()105         public virtual void Reset()
106         {
107             // wack everything related to error recovery
108             if ( state == null )
109             {
110                 return; // no shared state work to do
111             }
112             state._fsp = -1;
113             state.errorRecovery = false;
114             state.lastErrorIndex = -1;
115             state.failed = false;
116             state.syntaxErrors = 0;
117             // wack everything related to backtracking and memoization
118             state.backtracking = 0;
119             for ( int i = 0; state.ruleMemo != null && i < state.ruleMemo.Length; i++ )
120             { // wipe cache
121                 state.ruleMemo[i] = null;
122             }
123         }
124 
125 
126         /** <summary>
127          *  Match current input symbol against ttype.  Attempt
128          *  single token insertion or deletion error recovery.  If
129          *  that fails, throw MismatchedTokenException.
130          *  </summary>
131          *
132          *  <remarks>
133          *  To turn off single token insertion or deletion error
134          *  recovery, override recoverFromMismatchedToken() and have it
135          *  throw an exception. See TreeParser.recoverFromMismatchedToken().
136          *  This way any error in a rule will cause an exception and
137          *  immediate exit from rule.  Rule would recover by resynchronizing
138          *  to the set of symbols that can follow rule ref.
139          *  </remarks>
140          */
Match( IIntStream input, int ttype, BitSet follow )141         public virtual object Match( IIntStream input, int ttype, BitSet follow )
142         {
143             //System.out.println("match "+((TokenStream)input).LT(1));
144             object matchedSymbol = GetCurrentInputSymbol( input );
145             if ( input.LA( 1 ) == ttype )
146             {
147                 input.Consume();
148                 state.errorRecovery = false;
149                 state.failed = false;
150                 return matchedSymbol;
151             }
152             if ( state.backtracking > 0 )
153             {
154                 state.failed = true;
155                 return matchedSymbol;
156             }
157             matchedSymbol = RecoverFromMismatchedToken( input, ttype, follow );
158             return matchedSymbol;
159         }
160 
161         /** <summary>Match the wildcard: in a symbol</summary> */
MatchAny( IIntStream input )162         public virtual void MatchAny( IIntStream input )
163         {
164             state.errorRecovery = false;
165             state.failed = false;
166             input.Consume();
167         }
168 
MismatchIsUnwantedToken( IIntStream input, int ttype )169         public virtual bool MismatchIsUnwantedToken( IIntStream input, int ttype )
170         {
171             return input.LA( 2 ) == ttype;
172         }
173 
MismatchIsMissingToken( IIntStream input, BitSet follow )174         public virtual bool MismatchIsMissingToken( IIntStream input, BitSet follow )
175         {
176             if ( follow == null )
177             {
178                 // we have no information about the follow; we can only consume
179                 // a single token and hope for the best
180                 return false;
181             }
182             // compute what can follow this grammar element reference
183             if ( follow.Member( TokenTypes.EndOfRule ) )
184             {
185                 BitSet viableTokensFollowingThisRule = ComputeContextSensitiveRuleFOLLOW();
186                 follow = follow.Or( viableTokensFollowingThisRule );
187                 if ( state._fsp >= 0 )
188                 { // remove EOR if we're not the start symbol
189                     follow.Remove( TokenTypes.EndOfRule );
190                 }
191             }
192             // if current token is consistent with what could come after set
193             // then we know we're missing a token; error recovery is free to
194             // "insert" the missing token
195 
196             //System.out.println("viable tokens="+follow.toString(getTokenNames()));
197             //System.out.println("LT(1)="+((TokenStream)input).LT(1));
198 
199             // BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR
200             // in follow set to indicate that the fall of the start symbol is
201             // in the set (EOF can follow).
202             if ( follow.Member( input.LA( 1 ) ) || follow.Member( TokenTypes.EndOfRule ) )
203             {
204                 //System.out.println("LT(1)=="+((TokenStream)input).LT(1)+" is consistent with what follows; inserting...");
205                 return true;
206             }
207             return false;
208         }
209 
210         /** <summary>Report a recognition problem.</summary>
211          *
212          *  <remarks>
213          *  This method sets errorRecovery to indicate the parser is recovering
214          *  not parsing.  Once in recovery mode, no errors are generated.
215          *  To get out of recovery mode, the parser must successfully match
216          *  a token (after a resync).  So it will go:
217          *
218          * 		1. error occurs
219          * 		2. enter recovery mode, report error
220          * 		3. consume until token found in resynch set
221          * 		4. try to resume parsing
222          * 		5. next match() will reset errorRecovery mode
223          *
224          *  If you override, make sure to update syntaxErrors if you care about that.
225          *  </remarks>
226          */
ReportError( RecognitionException e )227         public virtual void ReportError( RecognitionException e )
228         {
229             // if we've already reported an error and have not matched a token
230             // yet successfully, don't report any errors.
231             if ( state.errorRecovery )
232             {
233                 //System.err.print("[SPURIOUS] ");
234                 return;
235             }
236             state.syntaxErrors++; // don't count spurious
237             state.errorRecovery = true;
238 
239             DisplayRecognitionError( this.TokenNames, e );
240         }
241 
DisplayRecognitionError( string[] tokenNames, RecognitionException e )242         public virtual void DisplayRecognitionError( string[] tokenNames,
243                                             RecognitionException e )
244         {
245             string hdr = GetErrorHeader( e );
246             string msg = GetErrorMessage( e, tokenNames );
247             EmitErrorMessage( hdr + " " + msg );
248         }
249 
250         /** <summary>What error message should be generated for the various exception types?</summary>
251          *
252          *  <remarks>
253          *  Not very object-oriented code, but I like having all error message
254          *  generation within one method rather than spread among all of the
255          *  exception classes. This also makes it much easier for the exception
256          *  handling because the exception classes do not have to have pointers back
257          *  to this object to access utility routines and so on. Also, changing
258          *  the message for an exception type would be difficult because you
259          *  would have to subclassing exception, but then somehow get ANTLR
260          *  to make those kinds of exception objects instead of the default.
261          *  This looks weird, but trust me--it makes the most sense in terms
262          *  of flexibility.
263          *
264          *  For grammar debugging, you will want to override this to add
265          *  more information such as the stack frame with
266          *  getRuleInvocationStack(e, this.getClass().getName()) and,
267          *  for no viable alts, the decision description and state etc...
268          *
269          *  Override this to change the message generated for one or more
270          *  exception types.
271          *  </remarks>
272          */
GetErrorMessage( RecognitionException e, string[] tokenNames )273         public virtual string GetErrorMessage( RecognitionException e, string[] tokenNames )
274         {
275             string msg = e.Message;
276             if ( e is UnwantedTokenException )
277             {
278                 UnwantedTokenException ute = (UnwantedTokenException)e;
279                 string tokenName = "<unknown>";
280                 if ( ute.Expecting == TokenTypes.EndOfFile )
281                 {
282                     tokenName = "EndOfFile";
283                 }
284                 else
285                 {
286                     tokenName = tokenNames[ute.Expecting];
287                 }
288                 msg = "extraneous input " + GetTokenErrorDisplay( ute.UnexpectedToken ) +
289                     " expecting " + tokenName;
290             }
291             else if ( e is MissingTokenException )
292             {
293                 MissingTokenException mte = (MissingTokenException)e;
294                 string tokenName = "<unknown>";
295                 if ( mte.Expecting == TokenTypes.EndOfFile )
296                 {
297                     tokenName = "EndOfFile";
298                 }
299                 else
300                 {
301                     tokenName = tokenNames[mte.Expecting];
302                 }
303                 msg = "missing " + tokenName + " at " + GetTokenErrorDisplay( e.Token );
304             }
305             else if ( e is MismatchedTokenException )
306             {
307                 MismatchedTokenException mte = (MismatchedTokenException)e;
308                 string tokenName = "<unknown>";
309                 if ( mte.Expecting == TokenTypes.EndOfFile )
310                 {
311                     tokenName = "EndOfFile";
312                 }
313                 else
314                 {
315                     tokenName = tokenNames[mte.Expecting];
316                 }
317                 msg = "mismatched input " + GetTokenErrorDisplay( e.Token ) +
318                     " expecting " + tokenName;
319             }
320             else if ( e is MismatchedTreeNodeException )
321             {
322                 MismatchedTreeNodeException mtne = (MismatchedTreeNodeException)e;
323                 string tokenName = "<unknown>";
324                 if ( mtne.Expecting == TokenTypes.EndOfFile )
325                 {
326                     tokenName = "EndOfFile";
327                 }
328                 else
329                 {
330                     tokenName = tokenNames[mtne.Expecting];
331                 }
332                 // workaround for a .NET framework bug (NullReferenceException)
333                 string nodeText = ( mtne.Node != null ) ? mtne.Node.ToString() ?? string.Empty : string.Empty;
334                 msg = "mismatched tree node: " + nodeText + " expecting " + tokenName;
335             }
336             else if ( e is NoViableAltException )
337             {
338                 //NoViableAltException nvae = (NoViableAltException)e;
339                 // for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>"
340                 // and "(decision="+nvae.decisionNumber+") and
341                 // "state "+nvae.stateNumber
342                 msg = "no viable alternative at input " + GetTokenErrorDisplay( e.Token );
343             }
344             else if ( e is EarlyExitException )
345             {
346                 //EarlyExitException eee = (EarlyExitException)e;
347                 // for development, can add "(decision="+eee.decisionNumber+")"
348                 msg = "required (...)+ loop did not match anything at input " +
349                     GetTokenErrorDisplay( e.Token );
350             }
351             else if ( e is MismatchedSetException )
352             {
353                 MismatchedSetException mse = (MismatchedSetException)e;
354                 msg = "mismatched input " + GetTokenErrorDisplay( e.Token ) +
355                     " expecting set " + mse.Expecting;
356             }
357             else if ( e is MismatchedNotSetException )
358             {
359                 MismatchedNotSetException mse = (MismatchedNotSetException)e;
360                 msg = "mismatched input " + GetTokenErrorDisplay( e.Token ) +
361                     " expecting set " + mse.Expecting;
362             }
363             else if ( e is FailedPredicateException )
364             {
365                 FailedPredicateException fpe = (FailedPredicateException)e;
366                 msg = "rule " + fpe.RuleName + " failed predicate: {" +
367                     fpe.PredicateText + "}?";
368             }
369             return msg;
370         }
371 
372         /** <summary>
373          *  Get number of recognition errors (lexer, parser, tree parser).  Each
374          *  recognizer tracks its own number.  So parser and lexer each have
375          *  separate count.  Does not count the spurious errors found between
376          *  an error and next valid token match
377          *  </summary>
378          *
379          *  <seealso cref="reportError()"/>
380          */
381         public virtual int NumberOfSyntaxErrors
382         {
383             get
384             {
385                 return state.syntaxErrors;
386             }
387         }
388 
389         /** <summary>What is the error header, normally line/character position information?</summary> */
GetErrorHeader( RecognitionException e )390         public virtual string GetErrorHeader( RecognitionException e )
391         {
392             string prefix = SourceName ?? string.Empty;
393             if (prefix.Length > 0)
394                 prefix += ' ';
395 
396             return string.Format("{0}line {1}:{2}", prefix, e.Line, e.CharPositionInLine + 1);
397         }
398 
399         /** <summary>
400          *  How should a token be displayed in an error message? The default
401          *  is to display just the text, but during development you might
402          *  want to have a lot of information spit out.  Override in that case
403          *  to use t.ToString() (which, for CommonToken, dumps everything about
404          *  the token). This is better than forcing you to override a method in
405          *  your token objects because you don't have to go modify your lexer
406          *  so that it creates a new Java type.
407          *  </summary>
408          */
GetTokenErrorDisplay( IToken t )409         public virtual string GetTokenErrorDisplay( IToken t )
410         {
411             string s = t.Text;
412             if ( s == null )
413             {
414                 if ( t.Type == TokenTypes.EndOfFile )
415                 {
416                     s = "<EOF>";
417                 }
418                 else
419                 {
420                     s = "<" + t.Type + ">";
421                 }
422             }
423             s = Regex.Replace( s, "\n", "\\\\n" );
424             s = Regex.Replace( s, "\r", "\\\\r" );
425             s = Regex.Replace( s, "\t", "\\\\t" );
426             return "'" + s + "'";
427         }
428 
429         /** <summary>Override this method to change where error messages go</summary> */
EmitErrorMessage( string msg )430         public virtual void EmitErrorMessage( string msg )
431         {
432             if (TraceDestination != null)
433                 TraceDestination.WriteLine( msg );
434         }
435 
436         /** <summary>
437          *  Recover from an error found on the input stream.  This is
438          *  for NoViableAlt and mismatched symbol exceptions.  If you enable
439          *  single token insertion and deletion, this will usually not
440          *  handle mismatched symbol exceptions but there could be a mismatched
441          *  token that the match() routine could not recover from.
442          *  </summary>
443          */
Recover( IIntStream input, RecognitionException re )444         public virtual void Recover( IIntStream input, RecognitionException re )
445         {
446             if ( state.lastErrorIndex == input.Index )
447             {
448                 // uh oh, another error at same token index; must be a case
449                 // where LT(1) is in the recovery token set so nothing is
450                 // consumed; consume a single token so at least to prevent
451                 // an infinite loop; this is a failsafe.
452                 input.Consume();
453             }
454             state.lastErrorIndex = input.Index;
455             BitSet followSet = ComputeErrorRecoverySet();
456             BeginResync();
457             ConsumeUntil( input, followSet );
458             EndResync();
459         }
460 
461         /** <summary>
462          *  A hook to listen in on the token consumption during error recovery.
463          *  The DebugParser subclasses this to fire events to the listenter.
464          *  </summary>
465          */
BeginResync()466         public virtual void BeginResync()
467         {
468         }
469 
EndResync()470         public virtual void EndResync()
471         {
472         }
473 
474         /*  Compute the error recovery set for the current rule.  During
475          *  rule invocation, the parser pushes the set of tokens that can
476          *  follow that rule reference on the stack; this amounts to
477          *  computing FIRST of what follows the rule reference in the
478          *  enclosing rule. This local follow set only includes tokens
479          *  from within the rule; i.e., the FIRST computation done by
480          *  ANTLR stops at the end of a rule.
481          *
482          *  EXAMPLE
483          *
484          *  When you find a "no viable alt exception", the input is not
485          *  consistent with any of the alternatives for rule r.  The best
486          *  thing to do is to consume tokens until you see something that
487          *  can legally follow a call to r *or* any rule that called r.
488          *  You don't want the exact set of viable next tokens because the
489          *  input might just be missing a token--you might consume the
490          *  rest of the input looking for one of the missing tokens.
491          *
492          *  Consider grammar:
493          *
494          *  a : '[' b ']'
495          *    | '(' b ')'
496          *    ;
497          *  b : c '^' INT ;
498          *  c : ID
499          *    | INT
500          *    ;
501          *
502          *  At each rule invocation, the set of tokens that could follow
503          *  that rule is pushed on a stack.  Here are the various "local"
504          *  follow sets:
505          *
506          *  FOLLOW(b1_in_a) = FIRST(']') = ']'
507          *  FOLLOW(b2_in_a) = FIRST(')') = ')'
508          *  FOLLOW(c_in_b) = FIRST('^') = '^'
509          *
510          *  Upon erroneous input "[]", the call chain is
511          *
512          *  a -> b -> c
513          *
514          *  and, hence, the follow context stack is:
515          *
516          *  depth  local follow set     after call to rule
517          *    0         <EOF>                    a (from main())
518          *    1          ']'                     b
519          *    3          '^'                     c
520          *
521          *  Notice that ')' is not included, because b would have to have
522          *  been called from a different context in rule a for ')' to be
523          *  included.
524          *
525          *  For error recovery, we cannot consider FOLLOW(c)
526          *  (context-sensitive or otherwise).  We need the combined set of
527          *  all context-sensitive FOLLOW sets--the set of all tokens that
528          *  could follow any reference in the call chain.  We need to
529          *  resync to one of those tokens.  Note that FOLLOW(c)='^' and if
530          *  we resync'd to that token, we'd consume until EOF.  We need to
531          *  sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
532          *  In this case, for input "[]", LA(1) is in this set so we would
533          *  not consume anything and after printing an error rule c would
534          *  return normally.  It would not find the required '^' though.
535          *  At this point, it gets a mismatched token error and throws an
536          *  exception (since LA(1) is not in the viable following token
537          *  set).  The rule exception handler tries to recover, but finds
538          *  the same recovery set and doesn't consume anything.  Rule b
539          *  exits normally returning to rule a.  Now it finds the ']' (and
540          *  with the successful match exits errorRecovery mode).
541          *
542          *  So, you cna see that the parser walks up call chain looking
543          *  for the token that was a member of the recovery set.
544          *
545          *  Errors are not generated in errorRecovery mode.
546          *
547          *  ANTLR's error recovery mechanism is based upon original ideas:
548          *
549          *  "Algorithms + Data Structures = Programs" by Niklaus Wirth
550          *
551          *  and
552          *
553          *  "A note on error recovery in recursive descent parsers":
554          *  http://portal.acm.org/citation.cfm?id=947902.947905
555          *
556          *  Later, Josef Grosch had some good ideas:
557          *
558          *  "Efficient and Comfortable Error Recovery in Recursive Descent
559          *  Parsers":
560          *  ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
561          *
562          *  Like Grosch I implemented local FOLLOW sets that are combined
563          *  at run-time upon error to avoid overhead during parsing.
564          */
ComputeErrorRecoverySet()565         protected virtual BitSet ComputeErrorRecoverySet()
566         {
567             return CombineFollows( false );
568         }
569 
570         /** <summary>
571          *  Compute the context-sensitive FOLLOW set for current rule.
572          *  This is set of token types that can follow a specific rule
573          *  reference given a specific call chain.  You get the set of
574          *  viable tokens that can possibly come next (lookahead depth 1)
575          *  given the current call chain.  Contrast this with the
576          *  definition of plain FOLLOW for rule r:
577          *  </summary>
578          *
579          *   FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)}
580          *
581          *  where x in T* and alpha, beta in V*; T is set of terminals and
582          *  V is the set of terminals and nonterminals.  In other words,
583          *  FOLLOW(r) is the set of all tokens that can possibly follow
584          *  references to r in *any* sentential form (context).  At
585          *  runtime, however, we know precisely which context applies as
586          *  we have the call chain.  We may compute the exact (rather
587          *  than covering superset) set of following tokens.
588          *
589          *  For example, consider grammar:
590          *
591          *  stat : ID '=' expr ';'      // FOLLOW(stat)=={EOF}
592          *       | "return" expr '.'
593          *       ;
594          *  expr : atom ('+' atom)* ;   // FOLLOW(expr)=={';','.',')'}
595          *  atom : INT                  // FOLLOW(atom)=={'+',')',';','.'}
596          *       | '(' expr ')'
597          *       ;
598          *
599          *  The FOLLOW sets are all inclusive whereas context-sensitive
600          *  FOLLOW sets are precisely what could follow a rule reference.
601          *  For input input "i=(3);", here is the derivation:
602          *
603          *  stat => ID '=' expr ';'
604          *       => ID '=' atom ('+' atom)* ';'
605          *       => ID '=' '(' expr ')' ('+' atom)* ';'
606          *       => ID '=' '(' atom ')' ('+' atom)* ';'
607          *       => ID '=' '(' INT ')' ('+' atom)* ';'
608          *       => ID '=' '(' INT ')' ';'
609          *
610          *  At the "3" token, you'd have a call chain of
611          *
612          *    stat -> expr -> atom -> expr -> atom
613          *
614          *  What can follow that specific nested ref to atom?  Exactly ')'
615          *  as you can see by looking at the derivation of this specific
616          *  input.  Contrast this with the FOLLOW(atom)={'+',')',';','.'}.
617          *
618          *  You want the exact viable token set when recovering from a
619          *  token mismatch.  Upon token mismatch, if LA(1) is member of
620          *  the viable next token set, then you know there is most likely
621          *  a missing token in the input stream.  "Insert" one by just not
622          *  throwing an exception.
623          */
ComputeContextSensitiveRuleFOLLOW()624         protected virtual BitSet ComputeContextSensitiveRuleFOLLOW()
625         {
626             return CombineFollows( true );
627         }
628 
629         // what is exact? it seems to only add sets from above on stack
630         // if EOR is in set i.  When it sees a set w/o EOR, it stops adding.
631         // Why would we ever want them all?  Maybe no viable alt instead of
632         // mismatched token?
CombineFollows(bool exact)633         protected virtual BitSet CombineFollows(bool exact)
634         {
635             int top = state._fsp;
636             BitSet followSet = new BitSet();
637             for ( int i = top; i >= 0; i-- )
638             {
639                 BitSet localFollowSet = (BitSet)state.following[i];
640                 /*
641                 System.out.println("local follow depth "+i+"="+
642                                    localFollowSet.toString(getTokenNames())+")");
643                  */
644                 followSet.OrInPlace( localFollowSet );
645                 if ( exact )
646                 {
647                     // can we see end of rule?
648                     if ( localFollowSet.Member( TokenTypes.EndOfRule ) )
649                     {
650                         // Only leave EOR in set if at top (start rule); this lets
651                         // us know if have to include follow(start rule); i.e., EOF
652                         if ( i > 0 )
653                         {
654                             followSet.Remove( TokenTypes.EndOfRule );
655                         }
656                     }
657                     else
658                     { // can't see end of rule, quit
659                         break;
660                     }
661                 }
662             }
663             return followSet;
664         }
665 
666         /** <summary>Attempt to recover from a single missing or extra token.</summary>
667          *
668          *  EXTRA TOKEN
669          *
670          *  LA(1) is not what we are looking for.  If LA(2) has the right token,
671          *  however, then assume LA(1) is some extra spurious token.  Delete it
672          *  and LA(2) as if we were doing a normal match(), which advances the
673          *  input.
674          *
675          *  MISSING TOKEN
676          *
677          *  If current token is consistent with what could come after
678          *  ttype then it is ok to "insert" the missing token, else throw
679          *  exception For example, Input "i=(3;" is clearly missing the
680          *  ')'.  When the parser returns from the nested call to expr, it
681          *  will have call chain:
682          *
683          *    stat -> expr -> atom
684          *
685          *  and it will be trying to match the ')' at this point in the
686          *  derivation:
687          *
688          *       => ID '=' '(' INT ')' ('+' atom)* ';'
689          *                          ^
690          *  match() will see that ';' doesn't match ')' and report a
691          *  mismatched token error.  To recover, it sees that LA(1)==';'
692          *  is in the set of tokens that can follow the ')' token
693          *  reference in rule atom.  It can assume that you forgot the ')'.
694          */
RecoverFromMismatchedToken( IIntStream input, int ttype, BitSet follow )695         protected virtual object RecoverFromMismatchedToken( IIntStream input, int ttype, BitSet follow )
696         {
697             RecognitionException e = null;
698             // if next token is what we are looking for then "delete" this token
699             if ( MismatchIsUnwantedToken( input, ttype ) )
700             {
701                 e = new UnwantedTokenException( ttype, input, TokenNames );
702                 /*
703                 System.err.println("recoverFromMismatchedToken deleting "+
704                                    ((TokenStream)input).LT(1)+
705                                    " since "+((TokenStream)input).LT(2)+" is what we want");
706                  */
707                 BeginResync();
708                 input.Consume(); // simply delete extra token
709                 EndResync();
710                 ReportError( e );  // report after consuming so AW sees the token in the exception
711                 // we want to return the token we're actually matching
712                 object matchedSymbol = GetCurrentInputSymbol( input );
713                 input.Consume(); // move past ttype token as if all were ok
714                 return matchedSymbol;
715             }
716             // can't recover with single token deletion, try insertion
717             if ( MismatchIsMissingToken( input, follow ) )
718             {
719                 object inserted = GetMissingSymbol( input, e, ttype, follow );
720                 e = new MissingTokenException( ttype, input, inserted );
721                 ReportError( e );  // report after inserting so AW sees the token in the exception
722                 return inserted;
723             }
724             // even that didn't work; must throw the exception
725             e = new MismatchedTokenException(ttype, input, TokenNames);
726             throw e;
727         }
728 
729         /** Not currently used */
RecoverFromMismatchedSet( IIntStream input, RecognitionException e, BitSet follow )730         public virtual object RecoverFromMismatchedSet( IIntStream input,
731                                                RecognitionException e,
732                                                BitSet follow )
733         {
734             if ( MismatchIsMissingToken( input, follow ) )
735             {
736                 // System.out.println("missing token");
737                 ReportError( e );
738                 // we don't know how to conjure up a token for sets yet
739                 return GetMissingSymbol( input, e, TokenTypes.Invalid, follow );
740             }
741             // TODO do single token deletion like above for Token mismatch
742             throw e;
743         }
744 
745         /** <summary>
746          *  Match needs to return the current input symbol, which gets put
747          *  into the label for the associated token ref; e.g., x=ID.  Token
748          *  and tree parsers need to return different objects. Rather than test
749          *  for input stream type or change the IntStream interface, I use
750          *  a simple method to ask the recognizer to tell me what the current
751          *  input symbol is.
752          *  </summary>
753          *
754          *  <remarks>This is ignored for lexers.</remarks>
755          */
GetCurrentInputSymbol( IIntStream input )756         protected virtual object GetCurrentInputSymbol( IIntStream input )
757         {
758             return null;
759         }
760 
761         /** <summary>Conjure up a missing token during error recovery.</summary>
762          *
763          *  <remarks>
764          *  The recognizer attempts to recover from single missing
765          *  symbols. But, actions might refer to that missing symbol.
766          *  For example, x=ID {f($x);}. The action clearly assumes
767          *  that there has been an identifier matched previously and that
768          *  $x points at that token. If that token is missing, but
769          *  the next token in the stream is what we want we assume that
770          *  this token is missing and we keep going. Because we
771          *  have to return some token to replace the missing token,
772          *  we have to conjure one up. This method gives the user control
773          *  over the tokens returned for missing tokens. Mostly,
774          *  you will want to create something special for identifier
775          *  tokens. For literals such as '{' and ',', the default
776          *  action in the parser or tree parser works. It simply creates
777          *  a CommonToken of the appropriate type. The text will be the token.
778          *  If you change what tokens must be created by the lexer,
779          *  override this method to create the appropriate tokens.
780          *  </remarks>
781          */
GetMissingSymbol( IIntStream input, RecognitionException e, int expectedTokenType, BitSet follow )782         protected virtual object GetMissingSymbol( IIntStream input,
783                                           RecognitionException e,
784                                           int expectedTokenType,
785                                           BitSet follow )
786         {
787             return null;
788         }
789 
ConsumeUntil( IIntStream input, int tokenType )790         public virtual void ConsumeUntil( IIntStream input, int tokenType )
791         {
792             //System.out.println("consumeUntil "+tokenType);
793             int ttype = input.LA( 1 );
794             while ( ttype != TokenTypes.EndOfFile && ttype != tokenType )
795             {
796                 input.Consume();
797                 ttype = input.LA( 1 );
798             }
799         }
800 
801         /** <summary>Consume tokens until one matches the given token set</summary> */
ConsumeUntil( IIntStream input, BitSet set )802         public virtual void ConsumeUntil( IIntStream input, BitSet set )
803         {
804             //System.out.println("consumeUntil("+set.toString(getTokenNames())+")");
805             int ttype = input.LA( 1 );
806             while ( ttype != TokenTypes.EndOfFile && !set.Member( ttype ) )
807             {
808                 //System.out.println("consume during recover LA(1)="+getTokenNames()[input.LA(1)]);
809                 input.Consume();
810                 ttype = input.LA( 1 );
811             }
812         }
813 
814         /** <summary>Push a rule's follow set using our own hardcoded stack</summary> */
PushFollow( BitSet fset )815         protected void PushFollow( BitSet fset )
816         {
817             if ( ( state._fsp + 1 ) >= state.following.Length )
818             {
819                 Array.Resize(ref state.following, state.following.Length * 2);
820             }
821             state.following[++state._fsp] = fset;
822         }
823 
PopFollow()824         protected void PopFollow()
825         {
826             state._fsp--;
827         }
828 
829         /** <summary>
830          *  Return List<String> of the rules in your parser instance
831          *  leading up to a call to this method.  You could override if
832          *  you want more details such as the file/line info of where
833          *  in the parser java code a rule is invoked.
834          *  </summary>
835          *
836          *  <remarks>
837          *  This is very useful for error messages and for context-sensitive
838          *  error recovery.
839          *  </remarks>
840          */
GetRuleInvocationStack()841         public virtual IList<string> GetRuleInvocationStack()
842         {
843             return GetRuleInvocationStack( new StackTrace(true) );
844         }
845 
846         /** <summary>
847          *  A more general version of GetRuleInvocationStack where you can
848          *  pass in the StackTrace of, for example, a RecognitionException
849          *  to get it's rule stack trace.
850          *  </summary>
851          */
GetRuleInvocationStack(StackTrace trace)852         public static IList<string> GetRuleInvocationStack(StackTrace trace)
853         {
854             if (trace == null)
855                 throw new ArgumentNullException("trace");
856 
857             List<string> rules = new List<string>();
858             StackFrame[] stack = trace.GetFrames() ?? new StackFrame[0];
859 
860             for (int i = stack.Length - 1; i >= 0; i--)
861             {
862                 StackFrame frame = stack[i];
863                 MethodBase method = frame.GetMethod();
864                 GrammarRuleAttribute[] attributes = (GrammarRuleAttribute[])method.GetCustomAttributes(typeof(GrammarRuleAttribute), true);
865                 if (attributes != null && attributes.Length > 0)
866                     rules.Add(attributes[0].Name);
867             }
868 
869             return rules;
870         }
871 
872         public virtual int BacktrackingLevel
873         {
874             get
875             {
876                 return state.backtracking;
877             }
878             set
879             {
880                 state.backtracking = value;
881             }
882         }
883 
884         /** <summary>Return whether or not a backtracking attempt failed.</summary> */
885         public virtual bool Failed
886         {
887             get
888             {
889                 return state.failed;
890             }
891         }
892 
893         /** <summary>
894          *  Used to print out token names like ID during debugging and
895          *  error reporting.  The generated parsers implement a method
896          *  that overrides this to point to their String[] tokenNames.
897          *  </summary>
898          */
899         public virtual string[] TokenNames
900         {
901             get
902             {
903                 return null;
904             }
905         }
906 
907         /** <summary>
908          *  For debugging and other purposes, might want the grammar name.
909          *  Have ANTLR generate an implementation for this method.
910          *  </summary>
911          */
912         public virtual string GrammarFileName
913         {
914             get
915             {
916                 return null;
917             }
918         }
919 
920         public abstract string SourceName
921         {
922             get;
923         }
924 
925         /** <summary>
926          *  A convenience method for use most often with template rewrites.
927          *  Convert a List<Token> to List<String>
928          *  </summary>
929          */
ToStrings( ICollection<IToken> tokens )930         public virtual List<string> ToStrings( ICollection<IToken> tokens )
931         {
932             if ( tokens == null )
933                 return null;
934 
935             List<string> strings = new List<string>( tokens.Count );
936             foreach ( IToken token in tokens )
937             {
938                 strings.Add( token.Text );
939             }
940 
941             return strings;
942         }
943 
944         /** <summary>
945          *  Given a rule number and a start token index number, return
946          *  MEMO_RULE_UNKNOWN if the rule has not parsed input starting from
947          *  start index.  If this rule has parsed input starting from the
948          *  start index before, then return where the rule stopped parsing.
949          *  It returns the index of the last token matched by the rule.
950          *  </summary>
951          *
952          *  <remarks>
953          *  For now we use a hashtable and just the slow Object-based one.
954          *  Later, we can make a special one for ints and also one that
955          *  tosses out data after we commit past input position i.
956          *  </remarks>
957          */
GetRuleMemoization( int ruleIndex, int ruleStartIndex )958         public virtual int GetRuleMemoization( int ruleIndex, int ruleStartIndex )
959         {
960             if ( state.ruleMemo[ruleIndex] == null )
961             {
962                 state.ruleMemo[ruleIndex] = new Dictionary<int, int>();
963             }
964 
965             int stopIndex;
966             if ( !state.ruleMemo[ruleIndex].TryGetValue( ruleStartIndex, out stopIndex ) )
967                 return MemoRuleUnknown;
968 
969             return stopIndex;
970         }
971 
972         /** <summary>
973          *  Has this rule already parsed input at the current index in the
974          *  input stream?  Return the stop token index or MEMO_RULE_UNKNOWN.
975          *  If we attempted but failed to parse properly before, return
976          *  MEMO_RULE_FAILED.
977          *  </summary>
978          *
979          *  <remarks>
980          *  This method has a side-effect: if we have seen this input for
981          *  this rule and successfully parsed before, then seek ahead to
982          *  1 past the stop token matched for this rule last time.
983          *  </remarks>
984          */
AlreadyParsedRule( IIntStream input, int ruleIndex )985         public virtual bool AlreadyParsedRule( IIntStream input, int ruleIndex )
986         {
987             int stopIndex = GetRuleMemoization( ruleIndex, input.Index );
988             if ( stopIndex == MemoRuleUnknown )
989             {
990                 return false;
991             }
992             if ( stopIndex == MemoRuleFailed )
993             {
994                 //System.out.println("rule "+ruleIndex+" will never succeed");
995                 state.failed = true;
996             }
997             else
998             {
999                 //System.out.println("seen rule "+ruleIndex+" before; skipping ahead to @"+(stopIndex+1)+" failed="+state.failed);
1000                 input.Seek( stopIndex + 1 ); // jump to one past stop token
1001             }
1002             return true;
1003         }
1004 
1005         /** <summary>
1006          *  Record whether or not this rule parsed the input at this position
1007          *  successfully.  Use a standard java hashtable for now.
1008          *  </summary>
1009          */
Memoize( IIntStream input, int ruleIndex, int ruleStartIndex )1010         public virtual void Memoize( IIntStream input,
1011                             int ruleIndex,
1012                             int ruleStartIndex )
1013         {
1014             int stopTokenIndex = state.failed ? MemoRuleFailed : input.Index - 1;
1015             if ( state.ruleMemo == null )
1016             {
1017                 if (TraceDestination != null)
1018                     TraceDestination.WriteLine( "!!!!!!!!! memo array is null for " + GrammarFileName );
1019             }
1020             if ( ruleIndex >= state.ruleMemo.Length )
1021             {
1022                 if (TraceDestination != null)
1023                     TraceDestination.WriteLine("!!!!!!!!! memo size is " + state.ruleMemo.Length + ", but rule index is " + ruleIndex);
1024             }
1025             if ( state.ruleMemo[ruleIndex] != null )
1026             {
1027                 state.ruleMemo[ruleIndex][ruleStartIndex] = stopTokenIndex;
1028             }
1029         }
1030 
1031         /** <summary>return how many rule/input-index pairs there are in total.</summary>
1032          *  TODO: this includes synpreds. :(
1033          */
GetRuleMemoizationCacheSize()1034         public virtual int GetRuleMemoizationCacheSize()
1035         {
1036             int n = 0;
1037             for ( int i = 0; state.ruleMemo != null && i < state.ruleMemo.Length; i++ )
1038             {
1039                 var ruleMap = state.ruleMemo[i];
1040                 if ( ruleMap != null )
1041                 {
1042                     n += ruleMap.Count; // how many input indexes are recorded?
1043                 }
1044             }
1045             return n;
1046         }
1047 
TraceIn(string ruleName, int ruleIndex, object inputSymbol)1048         public virtual void TraceIn(string ruleName, int ruleIndex, object inputSymbol)
1049         {
1050             if (TraceDestination == null)
1051                 return;
1052 
1053             TraceDestination.Write("enter " + ruleName + " " + inputSymbol);
1054             if (state.backtracking > 0)
1055             {
1056                 TraceDestination.Write(" backtracking=" + state.backtracking);
1057             }
1058             TraceDestination.WriteLine();
1059         }
1060 
TraceOut(string ruleName, int ruleIndex, object inputSymbol)1061         public virtual void TraceOut(string ruleName, int ruleIndex, object inputSymbol)
1062         {
1063             if (TraceDestination == null)
1064                 return;
1065 
1066             TraceDestination.Write("exit " + ruleName + " " + inputSymbol);
1067             if (state.backtracking > 0)
1068             {
1069                 TraceDestination.Write(" backtracking=" + state.backtracking);
1070                 if (state.failed)
1071                     TraceDestination.Write(" failed");
1072                 else
1073                     TraceDestination.Write(" succeeded");
1074             }
1075             TraceDestination.WriteLine();
1076         }
1077 
1078         #region Debugging support
1079         public virtual IDebugEventListener DebugListener
1080         {
1081             get
1082             {
1083                 return null;
1084             }
1085         }
1086 
1087         [Conditional("ANTLR_DEBUG")]
DebugEnterRule(string grammarFileName, string ruleName)1088         protected virtual void DebugEnterRule(string grammarFileName, string ruleName)
1089         {
1090             IDebugEventListener dbg = DebugListener;
1091             if (dbg != null)
1092                 dbg.EnterRule(grammarFileName, ruleName);
1093         }
1094 
1095         [Conditional("ANTLR_DEBUG")]
DebugExitRule(string grammarFileName, string ruleName)1096         protected virtual void DebugExitRule(string grammarFileName, string ruleName)
1097         {
1098             IDebugEventListener dbg = DebugListener;
1099             if (dbg != null)
1100                 dbg.ExitRule(grammarFileName, ruleName);
1101         }
1102 
1103         [Conditional("ANTLR_DEBUG")]
DebugEnterSubRule(int decisionNumber)1104         protected virtual void DebugEnterSubRule(int decisionNumber)
1105         {
1106             IDebugEventListener dbg = DebugListener;
1107             if (dbg != null)
1108                 dbg.EnterSubRule(decisionNumber);
1109         }
1110 
1111         [Conditional("ANTLR_DEBUG")]
DebugExitSubRule(int decisionNumber)1112         protected virtual void DebugExitSubRule(int decisionNumber)
1113         {
1114             IDebugEventListener dbg = DebugListener;
1115             if (dbg != null)
1116                 dbg.ExitSubRule(decisionNumber);
1117         }
1118 
1119         [Conditional("ANTLR_DEBUG")]
DebugEnterAlt(int alt)1120         protected virtual void DebugEnterAlt(int alt)
1121         {
1122             IDebugEventListener dbg = DebugListener;
1123             if (dbg != null)
1124                 dbg.EnterAlt(alt);
1125         }
1126 
1127         [Conditional("ANTLR_DEBUG")]
DebugEnterDecision(int decisionNumber, bool couldBacktrack)1128         protected virtual void DebugEnterDecision(int decisionNumber, bool couldBacktrack)
1129         {
1130             IDebugEventListener dbg = DebugListener;
1131             if (dbg != null)
1132                 dbg.EnterDecision(decisionNumber, couldBacktrack);
1133         }
1134 
1135         [Conditional("ANTLR_DEBUG")]
DebugExitDecision(int decisionNumber)1136         protected virtual void DebugExitDecision(int decisionNumber)
1137         {
1138             IDebugEventListener dbg = DebugListener;
1139             if (dbg != null)
1140                 dbg.ExitDecision(decisionNumber);
1141         }
1142 
1143         [Conditional("ANTLR_DEBUG")]
DebugLocation(int line, int charPositionInLine)1144         protected virtual void DebugLocation(int line, int charPositionInLine)
1145         {
1146             IDebugEventListener dbg = DebugListener;
1147             if (dbg != null)
1148                 dbg.Location(line, charPositionInLine);
1149         }
1150 
1151         [Conditional("ANTLR_DEBUG")]
DebugSemanticPredicate(bool result, string predicate)1152         protected virtual void DebugSemanticPredicate(bool result, string predicate)
1153         {
1154             IDebugEventListener dbg = DebugListener;
1155             if (dbg != null)
1156                 dbg.SemanticPredicate(result, predicate);
1157         }
1158 
1159         [Conditional("ANTLR_DEBUG")]
DebugBeginBacktrack(int level)1160         protected virtual void DebugBeginBacktrack(int level)
1161         {
1162             IDebugEventListener dbg = DebugListener;
1163             if (dbg != null)
1164                 dbg.BeginBacktrack(level);
1165         }
1166 
1167         [Conditional("ANTLR_DEBUG")]
DebugEndBacktrack(int level, bool successful)1168         protected virtual void DebugEndBacktrack(int level, bool successful)
1169         {
1170             IDebugEventListener dbg = DebugListener;
1171             if (dbg != null)
1172                 dbg.EndBacktrack(level, successful);
1173         }
1174 
1175         [Conditional("ANTLR_DEBUG")]
DebugRecognitionException(RecognitionException ex)1176         protected virtual void DebugRecognitionException(RecognitionException ex)
1177         {
1178             IDebugEventListener dbg = DebugListener;
1179             if (dbg != null)
1180                 dbg.RecognitionException(ex);
1181         }
1182         #endregion
1183     }
1184 }
1185