• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// [The "BSD licence"]
2// Copyright (c) 2006-2007 Kay Roepke 2010 Alan Condit
3// All rights reserved.
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions
7// are met:
8// 1. Redistributions of source code must retain the above copyright
9//    notice, this list of conditions and the following disclaimer.
10// 2. Redistributions in binary form must reproduce the above copyright
11//    notice, this list of conditions and the following disclaimer in the
12//    documentation and/or other materials provided with the distribution.
13// 3. The name of the author may not be used to endorse or promote products
14//    derived from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27#import <ANTLR/antlr.h>
28#import "Lexer.h"
29
30@implementation Lexer
31
32@synthesize input;
33@synthesize ruleNestingLevel;
34#pragma mark Initializer
35
36- (id) initWithCharStream:(id<CharStream>)anInput
37{
38    self = [super initWithState:[[RecognizerSharedState alloc] init]];
39    if ( self != nil ) {
40        input = [anInput retain];
41        if (state.token != nil)
42            [((CommonToken *)state.token) setInput:anInput];
43        ruleNestingLevel = 0;
44    }
45    return self;
46}
47
48- (id) initWithCharStream:(id<CharStream>)anInput State:(RecognizerSharedState *)aState
49{
50    self = [super initWithState:aState];
51    if ( self != nil ) {
52        input = [anInput retain];
53        if (state.token != nil)
54            [((CommonToken *)state.token) setInput:anInput];
55        ruleNestingLevel = 0;
56    }
57    return self;
58}
59
60- (void) dealloc
61{
62    if ( input ) [input release];
63    [super dealloc];
64}
65
66- (id) copyWithZone:(NSZone *)aZone
67{
68    Lexer *copy;
69
70    copy = [[[self class] allocWithZone:aZone] init];
71    //    copy = [super copyWithZone:aZone]; // allocation occurs here
72    if ( input != nil )
73        copy.input = input;
74    copy.ruleNestingLevel = ruleNestingLevel;
75    return copy;
76}
77
78- (void) reset
79{
80    [super reset]; // reset all recognizer state variables
81                   // wack Lexer state variables
82    if ( input != nil ) {
83        [input seek:0]; // rewind the input
84    }
85    if ( state == nil ) {
86        return; // no shared state work to do
87    }
88    state.token = nil;
89    state.type = CommonToken.INVALID_TOKEN_TYPE;
90    state.channel = CommonToken.DEFAULT_CHANNEL;
91    state.tokenStartCharIndex = -1;
92    state.tokenStartCharPositionInLine = -1;
93    state.tokenStartLine = -1;
94    state.text = nil;
95}
96
97// token stuff
98#pragma mark Tokens
99
100- (id<Token>)getToken
101{
102    return [state getToken];
103}
104
105- (void) setToken: (id<Token>) aToken
106{
107    if (state.token != aToken) {
108        [aToken retain];
109        state.token = aToken;
110    }
111}
112
113
114// this method may be overridden in the generated lexer if we generate a filtering lexer.
115- (id<Token>) nextToken
116{
117    while (YES) {
118        [self setToken:nil];
119        state.channel = CommonToken.DEFAULT_CHANNEL;
120        state.tokenStartCharIndex = input.index;
121        state.tokenStartCharPositionInLine = input.getCharPositionInLine;
122        state.tokenStartLine = input.getLine;
123        state.text = nil;
124
125        // [self setText:[self text]];
126        if ([input LA:1] == CharStreamEOF) {
127            CommonToken *eof = [CommonToken newToken:input
128                                                          Type:TokenTypeEOF
129                                                       Channel:CommonToken.DEFAULT_CHANNEL
130                                                         Start:input.index
131                                                          Stop:input.index];
132            [eof setLine:input.getLine];
133            [eof setCharPositionInLine:input.getCharPositionInLine];
134            return eof;
135        }
136        @try {
137            [self mTokens];
138            // SEL aMethod = @selector(mTokens);
139            // [[self class] instancesRespondToSelector:aMethod];
140            if ( state.token == nil)
141                [self emit];
142            else if ( state.token == [CommonToken skipToken] ) {
143                continue;
144            }
145            return state.token;
146        }
147        @catch (MismatchedRangeException *re) {
148            [self reportError:re];
149            // [self recover:re];
150        }
151        @catch (MismatchedTokenException *re) {
152            [self reportError:re];
153            // [self recover:re];
154        }
155        @catch (RecognitionException *re) {
156            [self reportError:re];
157            [self recover:re];
158        }
159    }
160}
161
162- (void) mTokens
163{   // abstract, defined in generated source as a starting point for matching
164    [self doesNotRecognizeSelector:_cmd];
165}
166
167- (void) skip
168{
169    state.token = [CommonToken skipToken];
170}
171
172- (id<CharStream>) input
173{
174    return input;
175}
176
177- (void) setInput:(id<CharStream>) anInput
178{
179    if ( anInput != input ) {
180        if ( input ) [input release];
181    }
182    input = nil;
183    [self reset];
184    input = anInput;
185    [input retain];
186}
187
188/** Currently does not support multiple emits per nextToken invocation
189 *  for efficiency reasons.  Subclass and override this method and
190 *  nextToken (to push tokens into a list and pull from that list rather
191 *  than a single variable as this implementation does).
192 */
193- (void) emit:(id<Token>)aToken
194{
195    state.token = aToken;
196}
197
198/** The standard method called to automatically emit a token at the
199 *  outermost lexical rule.  The token object should point into the
200 *  char buffer start..stop.  If there is a text override in 'text',
201 *  use that to set the token's text.  Override this method to emit
202 *  custom Token objects.
203 *
204 *  If you are building trees, then you should also override
205 *  Parser or TreeParser.getMissingSymbol().
206 */
207- (void) emit
208{
209    id<Token> aToken = [CommonToken newToken:input
210                                                  Type:state.type
211                                               Channel:state.channel
212                                                 Start:state.tokenStartCharIndex
213                                                  Stop:input.index-1];
214    aToken.text = [self text];
215    [aToken setCharPositionInLine:state.tokenStartCharPositionInLine];
216    [aToken setLine:state.tokenStartLine];
217    [aToken retain];
218    [self emit:aToken];
219    // [aToken release];
220}
221
222// matching
223#pragma mark Matching
224- (void) matchString:(NSString *)aString
225{
226    unichar c;
227    unsigned int i = 0;
228    unsigned int stringLength = [aString length];
229    while ( i < stringLength ) {
230        c = [input LA:1];
231        if ( c != [aString characterAtIndex:i] ) {
232            if ([state getBacktracking] > 0) {
233                state.failed = YES;
234                return;
235            }
236            MismatchedTokenException *mte = [MismatchedTokenException newExceptionChar:[aString characterAtIndex:i] Stream:input];
237            mte.c = c;
238            [self recover:mte];
239            @throw mte;
240        }
241        i++;
242        [input consume];
243        state.failed = NO;
244    }
245}
246
247- (void) matchAny
248{
249    [input consume];
250}
251
252- (void) matchChar:(unichar) aChar
253{
254    // TODO: -LA: is returning an int because it sometimes is used in the generated parser to compare lookahead with a tokentype.
255    //       try to change all those occurrences to -LT: if possible (i.e. if ANTLR can be made to generate LA only for lexer code)
256    unichar charLA;
257    charLA = [input LA:1];
258    if ( charLA != aChar) {
259        if ([state getBacktracking] > 0) {
260            state.failed = YES;
261            return;
262        }
263        MismatchedTokenException  *mte = [MismatchedTokenException newExceptionChar:aChar Stream:input];
264        mte.c = charLA;
265        [self recover:mte];
266        @throw mte;
267    }
268    [input consume];
269    state.failed = NO;
270}
271
272- (void) matchRangeFromChar:(unichar)fromChar to:(unichar)toChar
273{
274    unichar charLA = (unichar)[input LA:1];
275    if ( charLA < fromChar || charLA > toChar ) {
276        if ([state getBacktracking] > 0) {
277            state.failed = YES;
278            return;
279        }
280        MismatchedRangeException  *mre = [MismatchedRangeException
281                    newException:NSMakeRange((NSUInteger)fromChar,(NSUInteger)toChar)
282                               stream:input];
283        mre.c = charLA;
284        [self recover:mre];
285        @throw mre;
286    }
287    [input consume];
288    state.failed = NO;
289}
290
291    // info
292#pragma mark Informational
293
294- (NSUInteger) line
295{
296    return input.getLine;
297}
298
299- (NSUInteger) charPositionInLine
300{
301    return input.getCharPositionInLine;
302}
303
304- (NSInteger) index
305{
306    return 0;
307}
308
309- (NSString *) text
310{
311    if (state.text != nil) {
312        return state.text;
313    }
314    return [input substringWithRange:NSMakeRange(state.tokenStartCharIndex, input.index-state.tokenStartCharIndex)];
315}
316
317- (void) setText:(NSString *) theText
318{
319    state.text = theText;
320}
321
322    // error handling
323- (void) reportError:(RecognitionException *)e
324{
325    /** TODO: not thought about recovery in lexer yet.
326     *
327     // if we've already reported an error and have not matched a token
328     // yet successfully, don't report any errors.
329     if ( errorRecovery ) {
330     //System.err.print("[SPURIOUS] ");
331     return;
332     }
333     errorRecovery = true;
334     */
335
336    [self displayRecognitionError:[self getTokenNames] Exception:e];
337}
338
339- (NSString *)getErrorMessage:(RecognitionException *)e TokenNames:(AMutableArray *)tokenNames
340{
341/*    NSString *msg = [NSString stringWithFormat:@"Gotta fix getErrorMessage in Lexer.m--%@\n",
342                     e.name];
343 */
344    NSString *msg = nil;
345    if ( [e isKindOfClass:[MismatchedTokenException class]] ) {
346        MismatchedTokenException *mte = (MismatchedTokenException *)e;
347        msg = [NSString stringWithFormat:@"mismatched character \"%@\" expecting \"%@\"",
348            [self getCharErrorDisplay:mte.c], [self getCharErrorDisplay:mte.expectingChar]];
349    }
350    else if ( [e isKindOfClass:[NoViableAltException class]] ) {
351        NoViableAltException *nvae = (NoViableAltException *)e;
352        // for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>"
353        // and "(decision="+nvae.decisionNumber+") and
354        // "state "+nvae.stateNumber
355        msg = [NSString stringWithFormat:@"no viable alternative decision:%d state:%d at character \"%@\"",
356               nvae.decisionNumber, nvae.stateNumber, [self getCharErrorDisplay:(nvae.c)]];
357    }
358    else if ( [e isKindOfClass:[EarlyExitException class]] ) {
359        EarlyExitException *eee = (EarlyExitException *)e;
360        // for development, can add "(decision="+eee.decisionNumber+")"
361        msg = [NSString stringWithFormat:@"required (...)+ loop did not match anything at character \"%@\"",
362               [self getCharErrorDisplay:(eee.c)]];
363    }
364    else if ( [e isKindOfClass:[MismatchedNotSetException class]] ) {
365        MismatchedNotSetException *mse = (MismatchedNotSetException *)e;
366        msg = [NSString stringWithFormat:@"mismatched character \"%@\"  expecting set \"%@\"",
367               [self getCharErrorDisplay:(mse.c)], mse.expecting];
368    }
369    else if ( [e isKindOfClass:[MismatchedSetException class]] ) {
370        MismatchedSetException *mse = (MismatchedSetException *)e;
371        msg = [NSString stringWithFormat:@"mismatched character \"%@\" expecting set \"%@\"",
372               [self getCharErrorDisplay:(mse.c)], mse.expecting];
373    }
374    else if ( [e isKindOfClass:[MismatchedRangeException class]] ) {
375        MismatchedRangeException *mre = (MismatchedRangeException *)e;
376        msg = [NSString stringWithFormat:@"mismatched character \"%@\" \"%@..%@\"",
377               [self getCharErrorDisplay:(mre.c)], [self getCharErrorDisplay:(mre.range.location)],
378               [self getCharErrorDisplay:(mre.range.location+mre.range.length-1)]];
379    }
380    else {
381        msg = [super getErrorMessage:e TokenNames:[self getTokenNames]];
382    }
383    return msg;
384}
385
386- (NSString *)getCharErrorDisplay:(NSInteger)c
387{
388    NSString *s;
389    switch ( c ) {
390        case 0:
391            s = @"char=<nil>";
392            break;
393        case TokenTypeEOF :
394        case 65535:
395            s = @"<EOF>";
396            break;
397        case '\n' :
398            s = @"\\n";
399            break;
400        case '\t' :
401            s = @"\\t";
402            break;
403        case '\r' :
404            s = @"\\r";
405            break;
406        default:
407            s = [NSString stringWithFormat:@"%c", (char)c];
408            break;
409    }
410    return s;
411}
412
413/** Lexers can normally match any char in it's vocabulary after matching
414 *  a token, so do the easy thing and just kill a character and hope
415 *  it all works out.  You can instead use the rule invocation stack
416 *  to do sophisticated error recovery if you are in a fragment rule.
417 */
418- (void)recover:(RecognitionException *)re
419{
420    //System.out.println("consuming char "+(char)input.LA(1)+" during recovery");
421    //re.printStackTrace();
422    [input consume];
423}
424
425- (void)traceIn:(NSString *)ruleName Index:(NSInteger)ruleIndex
426{
427    NSString *inputSymbol = [NSString stringWithFormat:@"%c line=%d:%d\n", [input LT:1], input.getLine, input.getCharPositionInLine];
428    [super traceIn:ruleName Index:ruleIndex Object:inputSymbol];
429}
430
431- (void)traceOut:(NSString *)ruleName Index:(NSInteger)ruleIndex
432{
433    NSString *inputSymbol = [NSString stringWithFormat:@"%c line=%d:%d\n", [input LT:1], input.getLine, input.getCharPositionInLine];
434    [super traceOut:ruleName Index:ruleIndex Object:inputSymbol];
435}
436
437@end
438