• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * [The "BSD licence"]
3  * Copyright (c) 2005-2008 Terence Parr
4  * All rights reserved.
5  *
6  * Conversion to C#:
7  * Copyright (c) 2008-2009 Sam Harwell, Pixel Mine, Inc.
8  * All rights reserved.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. The name of the author may not be used to endorse or promote products
19  *    derived from this software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 namespace Antlr.Runtime {
34     using ConditionalAttribute = System.Diagnostics.ConditionalAttribute;
35 
36     /** <summary>
37      *  A lexer is recognizer that draws input symbols from a character stream.
38      *  lexer grammars result in a subclass of this object. A Lexer object
39      *  uses simplified match() and error recovery mechanisms in the interest
40      *  of speed.
41      *  </summary>
42      */
43     public abstract class Lexer : BaseRecognizer, ITokenSource {
44         /** <summary>Where is the lexer drawing characters from?</summary> */
45         protected ICharStream input;
46 
Lexer()47         public Lexer() {
48         }
49 
Lexer(ICharStream input)50         public Lexer(ICharStream input) {
51             this.input = input;
52         }
53 
Lexer(ICharStream input, RecognizerSharedState state)54         public Lexer(ICharStream input, RecognizerSharedState state)
55             : base(state) {
56             this.input = input;
57         }
58 
59         #region Properties
60         public string Text {
61             /** <summary>Return the text matched so far for the current token or any text override.</summary> */
62             get {
63                 if (state.text != null) {
64                     return state.text;
65                 }
66                 return input.Substring(state.tokenStartCharIndex, CharIndex - state.tokenStartCharIndex);
67             }
68             /** <summary>Set the complete text of this token; it wipes any previous changes to the text.</summary> */
69             set {
70                 state.text = value;
71             }
72         }
73         public int Line {
74             get {
75                 return input.Line;
76             }
77             set {
78                 input.Line = value;
79             }
80         }
81         public int CharPositionInLine {
82             get {
83                 return input.CharPositionInLine;
84             }
85             set {
86                 input.CharPositionInLine = value;
87             }
88         }
89         #endregion
90 
Reset()91         public override void Reset() {
92             base.Reset(); // reset all recognizer state variables
93             // wack Lexer state variables
94             if (input != null) {
95                 input.Seek(0); // rewind the input
96             }
97             if (state == null) {
98                 return; // no shared state work to do
99             }
100             state.token = null;
101             state.type = TokenTypes.Invalid;
102             state.channel = TokenChannels.Default;
103             state.tokenStartCharIndex = -1;
104             state.tokenStartCharPositionInLine = -1;
105             state.tokenStartLine = -1;
106             state.text = null;
107         }
108 
109         /** <summary>Return a token from this source; i.e., match a token on the char stream.</summary> */
NextToken()110         public virtual IToken NextToken() {
111             for (; ; ) {
112                 state.token = null;
113                 state.channel = TokenChannels.Default;
114                 state.tokenStartCharIndex = input.Index;
115                 state.tokenStartCharPositionInLine = input.CharPositionInLine;
116                 state.tokenStartLine = input.Line;
117                 state.text = null;
118                 if (input.LA(1) == CharStreamConstants.EndOfFile) {
119                     IToken eof = new CommonToken((ICharStream)input, CharStreamConstants.EndOfFile, TokenChannels.Default, input.Index, input.Index);
120                     eof.Line = Line;
121                     eof.CharPositionInLine = CharPositionInLine;
122                     return eof;
123                 }
124                 try {
125                     mTokens();
126                     if (state.token == null) {
127                         Emit();
128                     } else if (state.token == Tokens.Skip) {
129                         continue;
130                     }
131                     return state.token;
132                 } catch (NoViableAltException nva) {
133                     ReportError(nva);
134                     Recover(nva); // throw out current char and try again
135                 } catch (RecognitionException re) {
136                     ReportError(re);
137                     // match() routine has already called recover()
138                 }
139             }
140         }
141 
142         /** <summary>
143          *  Instruct the lexer to skip creating a token for current lexer rule
144          *  and look for another token.  nextToken() knows to keep looking when
145          *  a lexer rule finishes with token set to SKIP_TOKEN.  Recall that
146          *  if token==null at end of any token rule, it creates one for you
147          *  and emits it.
148          *  </summary>
149          */
Skip()150         public virtual void Skip() {
151             state.token = Tokens.Skip;
152         }
153 
154         /** <summary>This is the lexer entry point that sets instance var 'token'</summary> */
mTokens()155         public abstract void mTokens();
156 
157         public virtual ICharStream CharStream {
158             get {
159                 return input;
160             }
161             /** <summary>Set the char stream and reset the lexer</summary> */
162             set {
163                 input = null;
164                 Reset();
165                 input = value;
166             }
167         }
168 
169         public override string SourceName {
170             get {
171                 return input.SourceName;
172             }
173         }
174 
175         /** <summary>
176          *  Currently does not support multiple emits per nextToken invocation
177          *  for efficiency reasons.  Subclass and override this method and
178          *  nextToken (to push tokens into a list and pull from that list rather
179          *  than a single variable as this implementation does).
180          *  </summary>
181          */
Emit(IToken token)182         public virtual void Emit(IToken token) {
183             state.token = token;
184         }
185 
186         /** <summary>
187          *  The standard method called to automatically emit a token at the
188          *  outermost lexical rule.  The token object should point into the
189          *  char buffer start..stop.  If there is a text override in 'text',
190          *  use that to set the token's text.  Override this method to emit
191          *  custom Token objects.
192          *  </summary>
193          *
194          *  <remarks>
195          *  If you are building trees, then you should also override
196          *  Parser or TreeParser.getMissingSymbol().
197          *  </remarks>
198          */
Emit()199         public virtual IToken Emit() {
200             IToken t = new CommonToken(input, state.type, state.channel, state.tokenStartCharIndex, CharIndex - 1);
201             t.Line = state.tokenStartLine;
202             t.Text = state.text;
203             t.CharPositionInLine = state.tokenStartCharPositionInLine;
204             Emit(t);
205             return t;
206         }
207 
Match(string s)208         public virtual void Match(string s) {
209             int i = 0;
210             while (i < s.Length) {
211                 if (input.LA(1) != s[i]) {
212                     if (state.backtracking > 0) {
213                         state.failed = true;
214                         return;
215                     }
216                     MismatchedTokenException mte = new MismatchedTokenException(s[i], input, TokenNames);
217                     Recover(mte);
218                     throw mte;
219                 }
220                 i++;
221                 input.Consume();
222                 state.failed = false;
223             }
224         }
225 
MatchAny()226         public virtual void MatchAny() {
227             input.Consume();
228         }
229 
Match(int c)230         public virtual void Match(int c) {
231             if (input.LA(1) != c) {
232                 if (state.backtracking > 0) {
233                     state.failed = true;
234                     return;
235                 }
236                 MismatchedTokenException mte = new MismatchedTokenException(c, input, TokenNames);
237                 Recover(mte);  // don't really recover; just consume in lexer
238                 throw mte;
239             }
240             input.Consume();
241             state.failed = false;
242         }
243 
MatchRange(int a, int b)244         public virtual void MatchRange(int a, int b) {
245             if (input.LA(1) < a || input.LA(1) > b) {
246                 if (state.backtracking > 0) {
247                     state.failed = true;
248                     return;
249                 }
250                 MismatchedRangeException mre = new MismatchedRangeException(a, b, input);
251                 Recover(mre);
252                 throw mre;
253             }
254             input.Consume();
255             state.failed = false;
256         }
257 
258         /** <summary>What is the index of the current character of lookahead?</summary> */
259         public virtual int CharIndex {
260             get {
261                 return input.Index;
262             }
263         }
264 
ReportError(RecognitionException e)265         public override void ReportError(RecognitionException e) {
266             /** TODO: not thought about recovery in lexer yet.
267              *
268             // if we've already reported an error and have not matched a token
269             // yet successfully, don't report any errors.
270             if ( errorRecovery ) {
271                 //System.err.print("[SPURIOUS] ");
272                 return;
273             }
274             errorRecovery = true;
275              */
276 
277             DisplayRecognitionError(this.TokenNames, e);
278         }
279 
GetErrorMessage(RecognitionException e, string[] tokenNames)280         public override string GetErrorMessage(RecognitionException e, string[] tokenNames) {
281             string msg = null;
282             if (e is MismatchedTokenException) {
283                 MismatchedTokenException mte = (MismatchedTokenException)e;
284                 msg = "mismatched character " + GetCharErrorDisplay(e.Character) + " expecting " + GetCharErrorDisplay(mte.Expecting);
285             } else if (e is NoViableAltException) {
286                 NoViableAltException nvae = (NoViableAltException)e;
287                 // for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>"
288                 // and "(decision="+nvae.decisionNumber+") and
289                 // "state "+nvae.stateNumber
290                 msg = "no viable alternative at character " + GetCharErrorDisplay(e.Character);
291             } else if (e is EarlyExitException) {
292                 EarlyExitException eee = (EarlyExitException)e;
293                 // for development, can add "(decision="+eee.decisionNumber+")"
294                 msg = "required (...)+ loop did not match anything at character " + GetCharErrorDisplay(e.Character);
295             } else if (e is MismatchedNotSetException) {
296                 MismatchedNotSetException mse = (MismatchedNotSetException)e;
297                 msg = "mismatched character " + GetCharErrorDisplay(e.Character) + " expecting set " + mse.Expecting;
298             } else if (e is MismatchedSetException) {
299                 MismatchedSetException mse = (MismatchedSetException)e;
300                 msg = "mismatched character " + GetCharErrorDisplay(e.Character) + " expecting set " + mse.Expecting;
301             } else if (e is MismatchedRangeException) {
302                 MismatchedRangeException mre = (MismatchedRangeException)e;
303                 msg = "mismatched character " + GetCharErrorDisplay(e.Character) + " expecting set " +
304                       GetCharErrorDisplay(mre.A) + ".." + GetCharErrorDisplay(mre.B);
305             } else {
306                 msg = base.GetErrorMessage(e, tokenNames);
307             }
308             return msg;
309         }
310 
GetCharErrorDisplay(int c)311         public virtual string GetCharErrorDisplay(int c) {
312             string s = ((char)c).ToString();
313             switch (c) {
314                 case TokenTypes.EndOfFile:
315                     s = "<EOF>";
316                     break;
317                 case '\n':
318                     s = "\\n";
319                     break;
320                 case '\t':
321                     s = "\\t";
322                     break;
323                 case '\r':
324                     s = "\\r";
325                     break;
326             }
327             return "'" + s + "'";
328         }
329 
330         /** <summary>
331          *  Lexers can normally match any char in it's vocabulary after matching
332          *  a token, so do the easy thing and just kill a character and hope
333          *  it all works out.  You can instead use the rule invocation stack
334          *  to do sophisticated error recovery if you are in a fragment rule.
335          *  </summary>
336          */
Recover(RecognitionException re)337         public virtual void Recover(RecognitionException re) {
338             //System.out.println("consuming char "+(char)input.LA(1)+" during recovery");
339             //re.printStackTrace();
340             input.Consume();
341         }
342 
343         [Conditional("ANTLR_TRACE")]
TraceIn(string ruleName, int ruleIndex)344         public virtual void TraceIn(string ruleName, int ruleIndex) {
345             string inputSymbol = ((char)input.LT(1)) + " line=" + Line + ":" + CharPositionInLine;
346             base.TraceIn(ruleName, ruleIndex, inputSymbol);
347         }
348 
349         [Conditional("ANTLR_TRACE")]
TraceOut(string ruleName, int ruleIndex)350         public virtual void TraceOut(string ruleName, int ruleIndex) {
351             string inputSymbol = ((char)input.LT(1)) + " line=" + Line + ":" + CharPositionInLine;
352             base.TraceOut(ruleName, ruleIndex, inputSymbol);
353         }
354     }
355 }
356