1// [The "BSD licence"] 2// Copyright (c) 2006-2007 Kay Roepke 2010 Alan Condit 3// All rights reserved. 4// 5// Redistribution and use in source and binary forms, with or without 6// modification, are permitted provided that the following conditions 7// are met: 8// 1. Redistributions of source code must retain the above copyright 9// notice, this list of conditions and the following disclaimer. 10// 2. Redistributions in binary form must reproduce the above copyright 11// notice, this list of conditions and the following disclaimer in the 12// documentation and/or other materials provided with the distribution. 13// 3. The name of the author may not be used to endorse or promote products 14// derived from this software without specific prior written permission. 15// 16// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 27#import <ANTLR/antlr.h> 28#import "Lexer.h" 29 30@implementation Lexer 31 32@synthesize input; 33@synthesize ruleNestingLevel; 34#pragma mark Initializer 35 36- (id) initWithCharStream:(id<CharStream>)anInput 37{ 38 self = [super initWithState:[[RecognizerSharedState alloc] init]]; 39 if ( self != nil ) { 40 input = [anInput retain]; 41 if (state.token != nil) 42 [((CommonToken *)state.token) setInput:anInput]; 43 ruleNestingLevel = 0; 44 } 45 return self; 46} 47 48- (id) initWithCharStream:(id<CharStream>)anInput State:(RecognizerSharedState *)aState 49{ 50 self = [super initWithState:aState]; 51 if ( self != nil ) { 52 input = [anInput retain]; 53 if (state.token != nil) 54 [((CommonToken *)state.token) setInput:anInput]; 55 ruleNestingLevel = 0; 56 } 57 return self; 58} 59 60- (void) dealloc 61{ 62 if ( input ) [input release]; 63 [super dealloc]; 64} 65 66- (id) copyWithZone:(NSZone *)aZone 67{ 68 Lexer *copy; 69 70 copy = [[[self class] allocWithZone:aZone] init]; 71 // copy = [super copyWithZone:aZone]; // allocation occurs here 72 if ( input != nil ) 73 copy.input = input; 74 copy.ruleNestingLevel = ruleNestingLevel; 75 return copy; 76} 77 78- (void) reset 79{ 80 [super reset]; // reset all recognizer state variables 81 // wack Lexer state variables 82 if ( input != nil ) { 83 [input seek:0]; // rewind the input 84 } 85 if ( state == nil ) { 86 return; // no shared state work to do 87 } 88 state.token = nil; 89 state.type = CommonToken.INVALID_TOKEN_TYPE; 90 state.channel = CommonToken.DEFAULT_CHANNEL; 91 state.tokenStartCharIndex = -1; 92 state.tokenStartCharPositionInLine = -1; 93 state.tokenStartLine = -1; 94 state.text = nil; 95} 96 97// token stuff 98#pragma mark Tokens 99 100- (id<Token>)getToken 101{ 102 return [state getToken]; 103} 104 105- (void) setToken: (id<Token>) aToken 106{ 107 if (state.token != aToken) { 108 [aToken retain]; 109 state.token = aToken; 110 } 111} 112 113 114// this method may be overridden in the generated lexer if we generate a filtering lexer. 115- (id<Token>) nextToken 116{ 117 while (YES) { 118 [self setToken:nil]; 119 state.channel = CommonToken.DEFAULT_CHANNEL; 120 state.tokenStartCharIndex = input.index; 121 state.tokenStartCharPositionInLine = input.getCharPositionInLine; 122 state.tokenStartLine = input.getLine; 123 state.text = nil; 124 125 // [self setText:[self text]]; 126 if ([input LA:1] == CharStreamEOF) { 127 CommonToken *eof = [CommonToken newToken:input 128 Type:TokenTypeEOF 129 Channel:CommonToken.DEFAULT_CHANNEL 130 Start:input.index 131 Stop:input.index]; 132 [eof setLine:input.getLine]; 133 [eof setCharPositionInLine:input.getCharPositionInLine]; 134 return eof; 135 } 136 @try { 137 [self mTokens]; 138 // SEL aMethod = @selector(mTokens); 139 // [[self class] instancesRespondToSelector:aMethod]; 140 if ( state.token == nil) 141 [self emit]; 142 else if ( state.token == [CommonToken skipToken] ) { 143 continue; 144 } 145 return state.token; 146 } 147 @catch (MismatchedRangeException *re) { 148 [self reportError:re]; 149 // [self recover:re]; 150 } 151 @catch (MismatchedTokenException *re) { 152 [self reportError:re]; 153 // [self recover:re]; 154 } 155 @catch (RecognitionException *re) { 156 [self reportError:re]; 157 [self recover:re]; 158 } 159 } 160} 161 162- (void) mTokens 163{ // abstract, defined in generated source as a starting point for matching 164 [self doesNotRecognizeSelector:_cmd]; 165} 166 167- (void) skip 168{ 169 state.token = [CommonToken skipToken]; 170} 171 172- (id<CharStream>) input 173{ 174 return input; 175} 176 177- (void) setInput:(id<CharStream>) anInput 178{ 179 if ( anInput != input ) { 180 if ( input ) [input release]; 181 } 182 input = nil; 183 [self reset]; 184 input = anInput; 185 [input retain]; 186} 187 188/** Currently does not support multiple emits per nextToken invocation 189 * for efficiency reasons. Subclass and override this method and 190 * nextToken (to push tokens into a list and pull from that list rather 191 * than a single variable as this implementation does). 192 */ 193- (void) emit:(id<Token>)aToken 194{ 195 state.token = aToken; 196} 197 198/** The standard method called to automatically emit a token at the 199 * outermost lexical rule. The token object should point into the 200 * char buffer start..stop. If there is a text override in 'text', 201 * use that to set the token's text. Override this method to emit 202 * custom Token objects. 203 * 204 * If you are building trees, then you should also override 205 * Parser or TreeParser.getMissingSymbol(). 206 */ 207- (void) emit 208{ 209 id<Token> aToken = [CommonToken newToken:input 210 Type:state.type 211 Channel:state.channel 212 Start:state.tokenStartCharIndex 213 Stop:input.index-1]; 214 aToken.text = [self text]; 215 [aToken setCharPositionInLine:state.tokenStartCharPositionInLine]; 216 [aToken setLine:state.tokenStartLine]; 217 [aToken retain]; 218 [self emit:aToken]; 219 // [aToken release]; 220} 221 222// matching 223#pragma mark Matching 224- (void) matchString:(NSString *)aString 225{ 226 unichar c; 227 unsigned int i = 0; 228 unsigned int stringLength = [aString length]; 229 while ( i < stringLength ) { 230 c = [input LA:1]; 231 if ( c != [aString characterAtIndex:i] ) { 232 if ([state getBacktracking] > 0) { 233 state.failed = YES; 234 return; 235 } 236 MismatchedTokenException *mte = [MismatchedTokenException newExceptionChar:[aString characterAtIndex:i] Stream:input]; 237 mte.c = c; 238 [self recover:mte]; 239 @throw mte; 240 } 241 i++; 242 [input consume]; 243 state.failed = NO; 244 } 245} 246 247- (void) matchAny 248{ 249 [input consume]; 250} 251 252- (void) matchChar:(unichar) aChar 253{ 254 // TODO: -LA: is returning an int because it sometimes is used in the generated parser to compare lookahead with a tokentype. 255 // try to change all those occurrences to -LT: if possible (i.e. if ANTLR can be made to generate LA only for lexer code) 256 unichar charLA; 257 charLA = [input LA:1]; 258 if ( charLA != aChar) { 259 if ([state getBacktracking] > 0) { 260 state.failed = YES; 261 return; 262 } 263 MismatchedTokenException *mte = [MismatchedTokenException newExceptionChar:aChar Stream:input]; 264 mte.c = charLA; 265 [self recover:mte]; 266 @throw mte; 267 } 268 [input consume]; 269 state.failed = NO; 270} 271 272- (void) matchRangeFromChar:(unichar)fromChar to:(unichar)toChar 273{ 274 unichar charLA = (unichar)[input LA:1]; 275 if ( charLA < fromChar || charLA > toChar ) { 276 if ([state getBacktracking] > 0) { 277 state.failed = YES; 278 return; 279 } 280 MismatchedRangeException *mre = [MismatchedRangeException 281 newException:NSMakeRange((NSUInteger)fromChar,(NSUInteger)toChar) 282 stream:input]; 283 mre.c = charLA; 284 [self recover:mre]; 285 @throw mre; 286 } 287 [input consume]; 288 state.failed = NO; 289} 290 291 // info 292#pragma mark Informational 293 294- (NSUInteger) line 295{ 296 return input.getLine; 297} 298 299- (NSUInteger) charPositionInLine 300{ 301 return input.getCharPositionInLine; 302} 303 304- (NSInteger) index 305{ 306 return 0; 307} 308 309- (NSString *) text 310{ 311 if (state.text != nil) { 312 return state.text; 313 } 314 return [input substringWithRange:NSMakeRange(state.tokenStartCharIndex, input.index-state.tokenStartCharIndex)]; 315} 316 317- (void) setText:(NSString *) theText 318{ 319 state.text = theText; 320} 321 322 // error handling 323- (void) reportError:(RecognitionException *)e 324{ 325 /** TODO: not thought about recovery in lexer yet. 326 * 327 // if we've already reported an error and have not matched a token 328 // yet successfully, don't report any errors. 329 if ( errorRecovery ) { 330 //System.err.print("[SPURIOUS] "); 331 return; 332 } 333 errorRecovery = true; 334 */ 335 336 [self displayRecognitionError:[self getTokenNames] Exception:e]; 337} 338 339- (NSString *)getErrorMessage:(RecognitionException *)e TokenNames:(AMutableArray *)tokenNames 340{ 341/* NSString *msg = [NSString stringWithFormat:@"Gotta fix getErrorMessage in Lexer.m--%@\n", 342 e.name]; 343 */ 344 NSString *msg = nil; 345 if ( [e isKindOfClass:[MismatchedTokenException class]] ) { 346 MismatchedTokenException *mte = (MismatchedTokenException *)e; 347 msg = [NSString stringWithFormat:@"mismatched character \"%@\" expecting \"%@\"", 348 [self getCharErrorDisplay:mte.c], [self getCharErrorDisplay:mte.expectingChar]]; 349 } 350 else if ( [e isKindOfClass:[NoViableAltException class]] ) { 351 NoViableAltException *nvae = (NoViableAltException *)e; 352 // for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>" 353 // and "(decision="+nvae.decisionNumber+") and 354 // "state "+nvae.stateNumber 355 msg = [NSString stringWithFormat:@"no viable alternative decision:%d state:%d at character \"%@\"", 356 nvae.decisionNumber, nvae.stateNumber, [self getCharErrorDisplay:(nvae.c)]]; 357 } 358 else if ( [e isKindOfClass:[EarlyExitException class]] ) { 359 EarlyExitException *eee = (EarlyExitException *)e; 360 // for development, can add "(decision="+eee.decisionNumber+")" 361 msg = [NSString stringWithFormat:@"required (...)+ loop did not match anything at character \"%@\"", 362 [self getCharErrorDisplay:(eee.c)]]; 363 } 364 else if ( [e isKindOfClass:[MismatchedNotSetException class]] ) { 365 MismatchedNotSetException *mse = (MismatchedNotSetException *)e; 366 msg = [NSString stringWithFormat:@"mismatched character \"%@\" expecting set \"%@\"", 367 [self getCharErrorDisplay:(mse.c)], mse.expecting]; 368 } 369 else if ( [e isKindOfClass:[MismatchedSetException class]] ) { 370 MismatchedSetException *mse = (MismatchedSetException *)e; 371 msg = [NSString stringWithFormat:@"mismatched character \"%@\" expecting set \"%@\"", 372 [self getCharErrorDisplay:(mse.c)], mse.expecting]; 373 } 374 else if ( [e isKindOfClass:[MismatchedRangeException class]] ) { 375 MismatchedRangeException *mre = (MismatchedRangeException *)e; 376 msg = [NSString stringWithFormat:@"mismatched character \"%@\" \"%@..%@\"", 377 [self getCharErrorDisplay:(mre.c)], [self getCharErrorDisplay:(mre.range.location)], 378 [self getCharErrorDisplay:(mre.range.location+mre.range.length-1)]]; 379 } 380 else { 381 msg = [super getErrorMessage:e TokenNames:[self getTokenNames]]; 382 } 383 return msg; 384} 385 386- (NSString *)getCharErrorDisplay:(NSInteger)c 387{ 388 NSString *s; 389 switch ( c ) { 390 case 0: 391 s = @"char=<nil>"; 392 break; 393 case TokenTypeEOF : 394 case 65535: 395 s = @"<EOF>"; 396 break; 397 case '\n' : 398 s = @"\\n"; 399 break; 400 case '\t' : 401 s = @"\\t"; 402 break; 403 case '\r' : 404 s = @"\\r"; 405 break; 406 default: 407 s = [NSString stringWithFormat:@"%c", (char)c]; 408 break; 409 } 410 return s; 411} 412 413/** Lexers can normally match any char in it's vocabulary after matching 414 * a token, so do the easy thing and just kill a character and hope 415 * it all works out. You can instead use the rule invocation stack 416 * to do sophisticated error recovery if you are in a fragment rule. 417 */ 418- (void)recover:(RecognitionException *)re 419{ 420 //System.out.println("consuming char "+(char)input.LA(1)+" during recovery"); 421 //re.printStackTrace(); 422 [input consume]; 423} 424 425- (void)traceIn:(NSString *)ruleName Index:(NSInteger)ruleIndex 426{ 427 NSString *inputSymbol = [NSString stringWithFormat:@"%c line=%d:%d\n", [input LT:1], input.getLine, input.getCharPositionInLine]; 428 [super traceIn:ruleName Index:ruleIndex Object:inputSymbol]; 429} 430 431- (void)traceOut:(NSString *)ruleName Index:(NSInteger)ruleIndex 432{ 433 NSString *inputSymbol = [NSString stringWithFormat:@"%c line=%d:%d\n", [input LT:1], input.getLine, input.getCharPositionInLine]; 434 [super traceOut:ruleName Index:ruleIndex Object:inputSymbol]; 435} 436 437@end 438