• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * [The "BSD licence"]
3  * Copyright (c) 2005-2008 Terence Parr
4  * All rights reserved.
5  *
6  * Conversion to C#:
7  * Copyright (c) 2008-2009 Sam Harwell, Pixel Mine, Inc.
8  * All rights reserved.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. The name of the author may not be used to endorse or promote products
19  *    derived from this software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 namespace Antlr.Runtime
34 {
35     using ConditionalAttribute = System.Diagnostics.ConditionalAttribute;
36 
37     /** <summary>
38      *  A lexer is recognizer that draws input symbols from a character stream.
39      *  lexer grammars result in a subclass of this object. A Lexer object
40      *  uses simplified match() and error recovery mechanisms in the interest
41      *  of speed.
42      *  </summary>
43      */
44     public abstract class Lexer : BaseRecognizer, ITokenSource
45     {
46         /** <summary>Where is the lexer drawing characters from?</summary> */
47         protected ICharStream input;
48 
Lexer()49         public Lexer()
50         {
51         }
52 
Lexer( ICharStream input )53         public Lexer( ICharStream input )
54         {
55             this.input = input;
56         }
57 
Lexer( ICharStream input, RecognizerSharedState state )58         public Lexer( ICharStream input, RecognizerSharedState state )
59             : base(state)
60         {
61             this.input = input;
62         }
63 
64         #region Properties
65         public string Text
66         {
67             /** <summary>Return the text matched so far for the current token or any text override.</summary> */
68             get
69             {
70                 if ( state.text != null )
71                 {
72                     return state.text;
73                 }
74                 return input.Substring( state.tokenStartCharIndex, CharIndex - state.tokenStartCharIndex );
75             }
76             /** <summary>Set the complete text of this token; it wipes any previous changes to the text.</summary> */
77             set
78             {
79                 state.text = value;
80             }
81         }
82         public int Line
83         {
84             get
85             {
86                 return input.Line;
87             }
88             set
89             {
90                 input.Line = value;
91             }
92         }
93         public int CharPositionInLine
94         {
95             get
96             {
97                 return input.CharPositionInLine;
98             }
99             set
100             {
101                 input.CharPositionInLine = value;
102             }
103         }
104         #endregion
105 
Reset()106         public override void Reset()
107         {
108             base.Reset(); // reset all recognizer state variables
109             // wack Lexer state variables
110             if ( input != null )
111             {
112                 input.Seek( 0 ); // rewind the input
113             }
114             if ( state == null )
115             {
116                 return; // no shared state work to do
117             }
118             state.token = null;
119             state.type = TokenTypes.Invalid;
120             state.channel = TokenChannels.Default;
121             state.tokenStartCharIndex = -1;
122             state.tokenStartCharPositionInLine = -1;
123             state.tokenStartLine = -1;
124             state.text = null;
125         }
126 
127         /** <summary>Return a token from this source; i.e., match a token on the char stream.</summary> */
NextToken()128         public virtual IToken NextToken()
129         {
130             for ( ; ; )
131             {
132                 state.token = null;
133                 state.channel = TokenChannels.Default;
134                 state.tokenStartCharIndex = input.Index;
135                 state.tokenStartCharPositionInLine = input.CharPositionInLine;
136                 state.tokenStartLine = input.Line;
137                 state.text = null;
138                 if ( input.LA( 1 ) == CharStreamConstants.EndOfFile )
139                 {
140                     IToken eof = new CommonToken((ICharStream)input, CharStreamConstants.EndOfFile, TokenChannels.Default, input.Index, input.Index);
141                     eof.Line = Line;
142                     eof.CharPositionInLine = CharPositionInLine;
143                     return eof;
144                 }
145                 try
146                 {
147                     ParseNextToken();
148                     if ( state.token == null )
149                     {
150                         Emit();
151                     }
152                     else if ( state.token == Tokens.Skip )
153                     {
154                         continue;
155                     }
156                     return state.token;
157                 }
158                 catch (MismatchedRangeException mre)
159                 {
160                     ReportError(mre);
161                     // MatchRange() routine has already called recover()
162                 }
163                 catch (MismatchedTokenException mte)
164                 {
165                     ReportError(mte);
166                     // Match() routine has already called recover()
167                 }
168                 catch ( RecognitionException re )
169                 {
170                     ReportError( re );
171                     Recover( re ); // throw out current char and try again
172                 }
173             }
174         }
175 
176         /** <summary>
177          *  Instruct the lexer to skip creating a token for current lexer rule
178          *  and look for another token.  nextToken() knows to keep looking when
179          *  a lexer rule finishes with token set to SKIP_TOKEN.  Recall that
180          *  if token==null at end of any token rule, it creates one for you
181          *  and emits it.
182          *  </summary>
183          */
Skip()184         public virtual void Skip()
185         {
186             state.token = Tokens.Skip;
187         }
188 
189         /** <summary>This is the lexer entry point that sets instance var 'token'</summary> */
mTokens()190         public abstract void mTokens();
191 
192         public virtual ICharStream CharStream
193         {
194             get
195             {
196                 return input;
197             }
198             /** <summary>Set the char stream and reset the lexer</summary> */
199             set
200             {
201                 input = null;
202                 Reset();
203                 input = value;
204             }
205         }
206 
207         public override string SourceName
208         {
209             get
210             {
211                 return input.SourceName;
212             }
213         }
214 
215         /** <summary>
216          *  Currently does not support multiple emits per nextToken invocation
217          *  for efficiency reasons.  Subclass and override this method and
218          *  nextToken (to push tokens into a list and pull from that list rather
219          *  than a single variable as this implementation does).
220          *  </summary>
221          */
Emit( IToken token )222         public virtual void Emit( IToken token )
223         {
224             state.token = token;
225         }
226 
227         /** <summary>
228          *  The standard method called to automatically emit a token at the
229          *  outermost lexical rule.  The token object should point into the
230          *  char buffer start..stop.  If there is a text override in 'text',
231          *  use that to set the token's text.  Override this method to emit
232          *  custom Token objects.
233          *  </summary>
234          *
235          *  <remarks>
236          *  If you are building trees, then you should also override
237          *  Parser or TreeParser.getMissingSymbol().
238          *  </remarks>
239          */
Emit()240         public virtual IToken Emit()
241         {
242             IToken t = new CommonToken( input, state.type, state.channel, state.tokenStartCharIndex, CharIndex - 1 );
243             t.Line = state.tokenStartLine;
244             t.Text = state.text;
245             t.CharPositionInLine = state.tokenStartCharPositionInLine;
246             Emit( t );
247             return t;
248         }
249 
Match( string s )250         public virtual void Match( string s )
251         {
252             int i = 0;
253             while ( i < s.Length )
254             {
255                 if ( input.LA( 1 ) != s[i] )
256                 {
257                     if ( state.backtracking > 0 )
258                     {
259                         state.failed = true;
260                         return;
261                     }
262                     MismatchedTokenException mte = new MismatchedTokenException(s[i], input, TokenNames);
263                     Recover( mte );
264                     throw mte;
265                 }
266                 i++;
267                 input.Consume();
268                 state.failed = false;
269             }
270         }
271 
MatchAny()272         public virtual void MatchAny()
273         {
274             input.Consume();
275         }
276 
Match( int c )277         public virtual void Match( int c )
278         {
279             if ( input.LA( 1 ) != c )
280             {
281                 if ( state.backtracking > 0 )
282                 {
283                     state.failed = true;
284                     return;
285                 }
286                 MismatchedTokenException mte = new MismatchedTokenException(c, input, TokenNames);
287                 Recover( mte );  // don't really recover; just consume in lexer
288                 throw mte;
289             }
290             input.Consume();
291             state.failed = false;
292         }
293 
MatchRange( int a, int b )294         public virtual void MatchRange( int a, int b )
295         {
296             if ( input.LA( 1 ) < a || input.LA( 1 ) > b )
297             {
298                 if ( state.backtracking > 0 )
299                 {
300                     state.failed = true;
301                     return;
302                 }
303                 MismatchedRangeException mre = new MismatchedRangeException(a, b, input);
304                 Recover( mre );
305                 throw mre;
306             }
307             input.Consume();
308             state.failed = false;
309         }
310 
311         /** <summary>What is the index of the current character of lookahead?</summary> */
312         public virtual int CharIndex
313         {
314             get
315             {
316                 return input.Index;
317             }
318         }
319 
ReportError( RecognitionException e )320         public override void ReportError( RecognitionException e )
321         {
322             /** TODO: not thought about recovery in lexer yet.
323              *
324             // if we've already reported an error and have not matched a token
325             // yet successfully, don't report any errors.
326             if ( errorRecovery ) {
327                 //System.err.print("[SPURIOUS] ");
328                 return;
329             }
330             errorRecovery = true;
331              */
332 
333             DisplayRecognitionError( this.TokenNames, e );
334         }
335 
GetErrorMessage( RecognitionException e, string[] tokenNames )336         public override string GetErrorMessage( RecognitionException e, string[] tokenNames )
337         {
338             string msg = null;
339             if ( e is MismatchedTokenException )
340             {
341                 MismatchedTokenException mte = (MismatchedTokenException)e;
342                 msg = "mismatched character " + GetCharErrorDisplay( e.Character ) + " expecting " + GetCharErrorDisplay( mte.Expecting );
343             }
344             else if ( e is NoViableAltException )
345             {
346                 NoViableAltException nvae = (NoViableAltException)e;
347                 // for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>"
348                 // and "(decision="+nvae.decisionNumber+") and
349                 // "state "+nvae.stateNumber
350                 msg = "no viable alternative at character " + GetCharErrorDisplay( e.Character );
351             }
352             else if ( e is EarlyExitException )
353             {
354                 EarlyExitException eee = (EarlyExitException)e;
355                 // for development, can add "(decision="+eee.decisionNumber+")"
356                 msg = "required (...)+ loop did not match anything at character " + GetCharErrorDisplay( e.Character );
357             }
358             else if ( e is MismatchedNotSetException )
359             {
360                 MismatchedNotSetException mse = (MismatchedNotSetException)e;
361                 msg = "mismatched character " + GetCharErrorDisplay( e.Character ) + " expecting set " + mse.Expecting;
362             }
363             else if ( e is MismatchedSetException )
364             {
365                 MismatchedSetException mse = (MismatchedSetException)e;
366                 msg = "mismatched character " + GetCharErrorDisplay( e.Character ) + " expecting set " + mse.Expecting;
367             }
368             else if ( e is MismatchedRangeException )
369             {
370                 MismatchedRangeException mre = (MismatchedRangeException)e;
371                 msg = "mismatched character " + GetCharErrorDisplay( e.Character ) + " expecting set " +
372                       GetCharErrorDisplay( mre.A ) + ".." + GetCharErrorDisplay( mre.B );
373             }
374             else
375             {
376                 msg = base.GetErrorMessage( e, tokenNames );
377             }
378             return msg;
379         }
380 
GetCharErrorDisplay( int c )381         public virtual string GetCharErrorDisplay( int c )
382         {
383             string s = ( (char)c ).ToString();
384             switch ( c )
385             {
386             case TokenTypes.EndOfFile:
387                 s = "<EOF>";
388                 break;
389             case '\n':
390                 s = "\\n";
391                 break;
392             case '\t':
393                 s = "\\t";
394                 break;
395             case '\r':
396                 s = "\\r";
397                 break;
398             }
399             return "'" + s + "'";
400         }
401 
402         /** <summary>
403          *  Lexers can normally match any char in it's vocabulary after matching
404          *  a token, so do the easy thing and just kill a character and hope
405          *  it all works out.  You can instead use the rule invocation stack
406          *  to do sophisticated error recovery if you are in a fragment rule.
407          *  </summary>
408          */
Recover( RecognitionException re )409         public virtual void Recover( RecognitionException re )
410         {
411             //System.out.println("consuming char "+(char)input.LA(1)+" during recovery");
412             //re.printStackTrace();
413             input.Consume();
414         }
415 
416         [Conditional("ANTLR_TRACE")]
TraceIn( string ruleName, int ruleIndex )417         public virtual void TraceIn( string ruleName, int ruleIndex )
418         {
419             string inputSymbol = ( (char)input.LT( 1 ) ) + " line=" + Line + ":" + CharPositionInLine;
420             base.TraceIn( ruleName, ruleIndex, inputSymbol );
421         }
422 
423         [Conditional("ANTLR_TRACE")]
TraceOut( string ruleName, int ruleIndex )424         public virtual void TraceOut( string ruleName, int ruleIndex )
425         {
426             string inputSymbol = ( (char)input.LT( 1 ) ) + " line=" + Line + ":" + CharPositionInLine;
427             base.TraceOut( ruleName, ruleIndex, inputSymbol );
428         }
429 
ParseNextToken()430         protected virtual void ParseNextToken()
431         {
432             mTokens();
433         }
434     }
435 }
436