1// [The "BSD licence"] 2// Copyright (c) 2006-2007 Kay Roepke 2010 Alan Condit 3// All rights reserved. 4// 5// Redistribution and use in source and binary forms, with or without 6// modification, are permitted provided that the following conditions 7// are met: 8// 1. Redistributions of source code must retain the above copyright 9// notice, this list of conditions and the following disclaimer. 10// 2. Redistributions in binary form must reproduce the above copyright 11// notice, this list of conditions and the following disclaimer in the 12// documentation and/or other materials provided with the distribution. 13// 3. The name of the author may not be used to endorse or promote products 14// derived from this software without specific prior written permission. 15// 16// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 27#import <ANTLR/antlr.h> 28#import "ANTLRLexer.h" 29 30@implementation ANTLRLexer 31 32@synthesize input; 33@synthesize ruleNestingLevel; 34#pragma mark Initializer 35 36- (id) initWithCharStream:(id<ANTLRCharStream>)anInput 37{ 38 self = [super initWithState:[[ANTLRRecognizerSharedState alloc] init]]; 39 if ( self != nil ) { 40 input = [anInput retain]; 41 if (state.token != nil) 42 [((ANTLRCommonToken *)state.token) setInput:anInput]; 43 ruleNestingLevel = 0; 44 } 45 return self; 46} 47 48- (id) initWithCharStream:(id<ANTLRCharStream>)anInput State:(ANTLRRecognizerSharedState *)aState 49{ 50 self = [super initWithState:aState]; 51 if ( self != nil ) { 52 input = [anInput retain]; 53 if (state.token != nil) 54 [((ANTLRCommonToken *)state.token) setInput:anInput]; 55 ruleNestingLevel = 0; 56 } 57 return self; 58} 59 60- (void) dealloc 61{ 62 if ( input ) [input release]; 63 [super dealloc]; 64} 65 66- (id) copyWithZone:(NSZone *)aZone 67{ 68 ANTLRLexer *copy; 69 70 copy = [[[self class] allocWithZone:aZone] init]; 71 // copy = [super copyWithZone:aZone]; // allocation occurs here 72 if ( input != nil ) 73 copy.input = input; 74 copy.ruleNestingLevel = ruleNestingLevel; 75 return copy; 76} 77 78- (void) reset 79{ 80 [super reset]; // reset all recognizer state variables 81 // wack Lexer state variables 82 if ( input != nil ) { 83 [input seek:0]; // rewind the input 84 } 85 if ( state == nil ) { 86 return; // no shared state work to do 87 } 88 state.token = nil; 89 state.type = ANTLRCommonToken.INVALID_TOKEN_TYPE; 90 state.channel = ANTLRCommonToken.DEFAULT_CHANNEL; 91 state.tokenStartCharIndex = -1; 92 state.tokenStartCharPositionInLine = -1; 93 state.tokenStartLine = -1; 94 state.text = nil; 95} 96 97// token stuff 98#pragma mark Tokens 99 100- (id<ANTLRToken>)getToken 101{ 102 return [state getToken]; 103} 104 105- (void) setToken: (id<ANTLRToken>) aToken 106{ 107 if (state.token != aToken) { 108 [aToken retain]; 109 state.token = aToken; 110 } 111} 112 113 114// this method may be overridden in the generated lexer if we generate a filtering lexer. 115- (id<ANTLRToken>) nextToken 116{ 117 while (YES) { 118 [self setToken:nil]; 119 state.channel = ANTLRCommonToken.DEFAULT_CHANNEL; 120 state.tokenStartCharIndex = input.index; 121 state.tokenStartCharPositionInLine = input.charPositionInLine; 122 state.tokenStartLine = input.line; 123 state.text = nil; 124 125 // [self setText:[self text]]; 126 if ([input LA:1] == ANTLRCharStreamEOF) { 127 ANTLRCommonToken *eof = [ANTLRCommonToken newToken:input 128 Type:ANTLRTokenTypeEOF 129 Channel:ANTLRCommonToken.DEFAULT_CHANNEL 130 Start:input.index 131 Stop:input.index]; 132 [eof setLine:input.line]; 133 [eof setCharPositionInLine:input.charPositionInLine]; 134 return eof; 135 } 136 @try { 137 [self mTokens]; 138 // SEL aMethod = @selector(mTokens); 139 // [[self class] instancesRespondToSelector:aMethod]; 140 if ( state.token == nil) 141 [self emit]; 142 else if ( state.token == [ANTLRCommonToken skipToken] ) { 143 continue; 144 } 145 return state.token; 146 } 147 @catch (ANTLRNoViableAltException *nva) { 148 [self reportError:nva]; 149 [self recover:nva]; 150 } 151 @catch (ANTLRRecognitionException *e) { 152 [self reportError:e]; 153 } 154 } 155} 156 157- (void) mTokens 158{ // abstract, defined in generated source as a starting point for matching 159 [self doesNotRecognizeSelector:_cmd]; 160} 161 162- (void) skip 163{ 164 state.token = [ANTLRCommonToken skipToken]; 165} 166 167- (id<ANTLRCharStream>) input 168{ 169 return input; 170} 171 172- (void) setInput:(id<ANTLRCharStream>) anInput 173{ 174 if ( anInput != input ) { 175 if ( input ) [input release]; 176 } 177 input = nil; 178 [self reset]; 179 input = anInput; 180 [input retain]; 181} 182 183/** Currently does not support multiple emits per nextToken invocation 184 * for efficiency reasons. Subclass and override this method and 185 * nextToken (to push tokens into a list and pull from that list rather 186 * than a single variable as this implementation does). 187 */ 188- (void) emit:(id<ANTLRToken>)aToken 189{ 190 state.token = aToken; 191} 192 193/** The standard method called to automatically emit a token at the 194 * outermost lexical rule. The token object should point into the 195 * char buffer start..stop. If there is a text override in 'text', 196 * use that to set the token's text. Override this method to emit 197 * custom Token objects. 198 * 199 * If you are building trees, then you should also override 200 * Parser or TreeParser.getMissingSymbol(). 201 */ 202- (void) emit 203{ 204 id<ANTLRToken> aToken = [ANTLRCommonToken newToken:input 205 Type:state.type 206 Channel:state.channel 207 Start:state.tokenStartCharIndex 208 Stop:input.index-1]; 209 [aToken setLine:state.tokenStartLine]; 210 aToken.text = [self text]; 211 [aToken setCharPositionInLine:state.tokenStartCharPositionInLine]; 212 [aToken retain]; 213 [self emit:aToken]; 214 // [aToken release]; 215} 216 217// matching 218#pragma mark Matching 219- (void) matchString:(NSString *)aString 220{ 221 unichar c; 222 unsigned int i = 0; 223 unsigned int stringLength = [aString length]; 224 while ( i < stringLength ) { 225 c = [input LA:1]; 226 if ( c != [aString characterAtIndex:i] ) { 227 if ([state getBacktracking] > 0) { 228 state.failed = YES; 229 return; 230 } 231 ANTLRMismatchedTokenException *mte = [ANTLRMismatchedTokenException newExceptionChar:[aString characterAtIndex:i] Stream:input]; 232 mte.c = c; 233 [self recover:mte]; 234 @throw mte; 235 } 236 i++; 237 [input consume]; 238 state.failed = NO; 239 } 240} 241 242- (void) matchAny 243{ 244 [input consume]; 245} 246 247- (void) matchChar:(unichar) aChar 248{ 249 // TODO: -LA: is returning an int because it sometimes is used in the generated parser to compare lookahead with a tokentype. 250 // try to change all those occurrences to -LT: if possible (i.e. if ANTLR can be made to generate LA only for lexer code) 251 unichar charLA; 252 charLA = [input LA:1]; 253 if ( charLA != aChar) { 254 if ([state getBacktracking] > 0) { 255 state.failed = YES; 256 return; 257 } 258 ANTLRMismatchedTokenException *mte = [ANTLRMismatchedTokenException newExceptionChar:aChar Stream:input]; 259 mte.c = charLA; 260 [self recover:mte]; 261 @throw mte; 262 } 263 [input consume]; 264 state.failed = NO; 265} 266 267- (void) matchRangeFromChar:(unichar)fromChar to:(unichar)toChar 268{ 269 unichar charLA = (unichar)[input LA:1]; 270 if ( charLA < fromChar || charLA > toChar ) { 271 if ([state getBacktracking] > 0) { 272 state.failed = YES; 273 return; 274 } 275 ANTLRMismatchedRangeException *mre = [ANTLRMismatchedRangeException 276 newException:NSMakeRange((NSUInteger)fromChar,(NSUInteger)toChar) 277 stream:input]; 278 mre.c = charLA; 279 [self recover:mre]; 280 @throw mre; 281 } 282 [input consume]; 283 state.failed = NO; 284} 285 286 // info 287#pragma mark Informational 288 289- (NSUInteger) line 290{ 291 return input.line; 292} 293 294- (NSUInteger) charPositionInLine 295{ 296 return input.charPositionInLine; 297} 298 299- (NSInteger) index 300{ 301 return 0; 302} 303 304- (NSString *) text 305{ 306 if (state.text != nil) { 307 return state.text; 308 } 309 return [input substringWithRange:NSMakeRange(state.tokenStartCharIndex, input.index-state.tokenStartCharIndex)]; 310} 311 312- (void) setText:(NSString *) theText 313{ 314 state.text = theText; 315} 316 317 // error handling 318- (void) reportError:(ANTLRRecognitionException *)e 319{ 320 /** TODO: not thought about recovery in lexer yet. 321 * 322 // if we've already reported an error and have not matched a token 323 // yet successfully, don't report any errors. 324 if ( errorRecovery ) { 325 //System.err.print("[SPURIOUS] "); 326 return; 327 } 328 errorRecovery = true; 329 */ 330 331 [self displayRecognitionError:[self getTokenNames] Exception:e]; 332} 333 334- (NSString *)getErrorMessage:(ANTLRRecognitionException *)e TokenNames:(AMutableArray *)tokenNames 335{ 336/* NSString *msg = [NSString stringWithFormat:@"Gotta fix getErrorMessage in ANTLRLexer.m--%@\n", 337 e.name]; 338 */ 339 NSString *msg = nil; 340 if ( [e isKindOfClass:[ANTLRMismatchedTokenException class]] ) { 341 ANTLRMismatchedTokenException *mte = (ANTLRMismatchedTokenException *)e; 342 msg = [NSString stringWithFormat:@"mismatched character \"%@\" expecting \"%@\"", 343 [self getCharErrorDisplay:mte.c], [self getCharErrorDisplay:mte.expecting]]; 344 } 345 else if ( [e isKindOfClass:[ANTLRNoViableAltException class]] ) { 346 ANTLRNoViableAltException *nvae = (ANTLRNoViableAltException *)e; 347 // for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>" 348 // and "(decision="+nvae.decisionNumber+") and 349 // "state "+nvae.stateNumber 350 msg = [NSString stringWithFormat:@"no viable alternative at character \"%@\"", 351 [self getCharErrorDisplay:(nvae.c)]]; 352 } 353 else if ( [e isKindOfClass:[ANTLREarlyExitException class]] ) { 354 ANTLREarlyExitException *eee = (ANTLREarlyExitException *)e; 355 // for development, can add "(decision="+eee.decisionNumber+")" 356 msg = [NSString stringWithFormat:@"required (...)+ loop did not match anything at character \"%@\"", 357 [self getCharErrorDisplay:(eee.c)]]; 358 } 359 else if ( [e isKindOfClass:[ANTLRMismatchedNotSetException class]] ) { 360 ANTLRMismatchedNotSetException *mse = (ANTLRMismatchedNotSetException *)e; 361 msg = [NSString stringWithFormat:@"mismatched character \"%@\" expecting set \"%@\"", 362 [self getCharErrorDisplay:(mse.c)], mse.expecting]; 363 } 364 else if ( [e isKindOfClass:[ANTLRMismatchedSetException class]] ) { 365 ANTLRMismatchedSetException *mse = (ANTLRMismatchedSetException *)e; 366 msg = [NSString stringWithFormat:@"mismatched character \"%@\" expecting set \"%@\"", 367 [self getCharErrorDisplay:(mse.c)], mse.expecting]; 368 } 369 else if ( [e isKindOfClass:[ANTLRMismatchedRangeException class]] ) { 370 ANTLRMismatchedRangeException *mre = (ANTLRMismatchedRangeException *)e; 371 msg = [NSString stringWithFormat:@"mismatched character \"%@\" \"%@..%@\"", 372 [self getCharErrorDisplay:(mre.c)], [self getCharErrorDisplay:(mre.range.location)], 373 [self getCharErrorDisplay:(mre.range.location+mre.range.length-1)]]; 374 } 375 else { 376 msg = [super getErrorMessage:e TokenNames:[self getTokenNames]]; 377 } 378 return msg; 379} 380 381- (NSString *)getCharErrorDisplay:(NSInteger)c 382{ 383 NSString *s; 384 switch ( c ) { 385 case ANTLRTokenTypeEOF : 386 s = @"<EOF>"; 387 break; 388 case '\n' : 389 s = @"\\n"; 390 break; 391 case '\t' : 392 s = @"\\t"; 393 break; 394 case '\r' : 395 s = @"\\r"; 396 break; 397 default: 398 s = [NSString stringWithFormat:@"%c", (char)c]; 399 break; 400 } 401 return s; 402} 403 404/** Lexers can normally match any char in it's vocabulary after matching 405 * a token, so do the easy thing and just kill a character and hope 406 * it all works out. You can instead use the rule invocation stack 407 * to do sophisticated error recovery if you are in a fragment rule. 408 */ 409- (void)recover:(ANTLRRecognitionException *)re 410{ 411 //System.out.println("consuming char "+(char)input.LA(1)+" during recovery"); 412 //re.printStackTrace(); 413 [input consume]; 414} 415 416- (void)traceIn:(NSString *)ruleName Index:(NSInteger)ruleIndex 417{ 418 NSString *inputSymbol = [NSString stringWithFormat:@"%c line=%d:%d\n", [input LT:1], input.line, input.charPositionInLine]; 419 [super traceIn:ruleName Index:ruleIndex Object:inputSymbol]; 420} 421 422- (void)traceOut:(NSString *)ruleName Index:(NSInteger)ruleIndex 423{ 424 NSString *inputSymbol = [NSString stringWithFormat:@"%c line=%d:%d\n", [input LT:1], input.line, input.charPositionInLine]; 425 [super traceOut:ruleName Index:ruleIndex Object:inputSymbol]; 426} 427 428@end 429