1 /** \file
2 *
3 * Base implementation of an antlr 3 lexer.
4 *
5 * An ANTLR3 lexer implements a base recongizer, a token source and
6 * a lexer interface. It constructs a base recognizer with default
7 * functions, then overrides any of these that are parser specific (usual
8 * default implementation of base recognizer.
9 */
10
11 // [The "BSD licence"]
12 // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
13 // http://www.temporal-wave.com
14 // http://www.linkedin.com/in/jimidle
15 //
16 // All rights reserved.
17 //
18 // Redistribution and use in source and binary forms, with or without
19 // modification, are permitted provided that the following conditions
20 // are met:
21 // 1. Redistributions of source code must retain the above copyright
22 // notice, this list of conditions and the following disclaimer.
23 // 2. Redistributions in binary form must reproduce the above copyright
24 // notice, this list of conditions and the following disclaimer in the
25 // documentation and/or other materials provided with the distribution.
26 // 3. The name of the author may not be used to endorse or promote products
27 // derived from this software without specific prior written permission.
28 //
29 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
30 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
31 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
32 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
33 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
34 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
38 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39
40 #include <antlr3lexer.h>
41
42 static void mTokens (pANTLR3_LEXER lexer);
43 static void setCharStream (pANTLR3_LEXER lexer, pANTLR3_INPUT_STREAM input);
44 static void pushCharStream (pANTLR3_LEXER lexer, pANTLR3_INPUT_STREAM input);
45 static void popCharStream (pANTLR3_LEXER lexer);
46
47 static void emitNew (pANTLR3_LEXER lexer, pANTLR3_COMMON_TOKEN token);
48 static pANTLR3_COMMON_TOKEN emit (pANTLR3_LEXER lexer);
49 static ANTLR3_BOOLEAN matchs (pANTLR3_LEXER lexer, ANTLR3_UCHAR * string);
50 static ANTLR3_BOOLEAN matchc (pANTLR3_LEXER lexer, ANTLR3_UCHAR c);
51 static ANTLR3_BOOLEAN matchRange (pANTLR3_LEXER lexer, ANTLR3_UCHAR low, ANTLR3_UCHAR high);
52 static void matchAny (pANTLR3_LEXER lexer);
53 static void recover (pANTLR3_LEXER lexer);
54 static ANTLR3_UINT32 getLine (pANTLR3_LEXER lexer);
55 static ANTLR3_MARKER getCharIndex (pANTLR3_LEXER lexer);
56 static ANTLR3_UINT32 getCharPositionInLine (pANTLR3_LEXER lexer);
57 static pANTLR3_STRING getText (pANTLR3_LEXER lexer);
58 static pANTLR3_COMMON_TOKEN nextToken (pANTLR3_TOKEN_SOURCE toksource);
59
60 static void displayRecognitionError (pANTLR3_BASE_RECOGNIZER rec, pANTLR3_UINT8 * tokenNames);
61 static void reportError (pANTLR3_BASE_RECOGNIZER rec);
62 static void * getCurrentInputSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream);
63 static void * getMissingSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream, pANTLR3_EXCEPTION e,
64 ANTLR3_UINT32 expectedTokenType, pANTLR3_BITSET_LIST follow);
65
66 static void reset (pANTLR3_BASE_RECOGNIZER rec);
67
68 static void freeLexer (pANTLR3_LEXER lexer);
69
70
71 ANTLR3_API pANTLR3_LEXER
antlr3LexerNew(ANTLR3_UINT32 sizeHint,pANTLR3_RECOGNIZER_SHARED_STATE state)72 antlr3LexerNew(ANTLR3_UINT32 sizeHint, pANTLR3_RECOGNIZER_SHARED_STATE state)
73 {
74 pANTLR3_LEXER lexer;
75 pANTLR3_COMMON_TOKEN specialT;
76
77 /* Allocate memory
78 */
79 lexer = (pANTLR3_LEXER) ANTLR3_MALLOC(sizeof(ANTLR3_LEXER));
80
81 if (lexer == NULL)
82 {
83 return NULL;
84 }
85
86 /* Now we need to create the base recognizer
87 */
88 lexer->rec = antlr3BaseRecognizerNew(ANTLR3_TYPE_LEXER, sizeHint, state);
89
90 if (lexer->rec == NULL)
91 {
92 lexer->free(lexer);
93 return NULL;
94 }
95 lexer->rec->super = lexer;
96
97 lexer->rec->displayRecognitionError = displayRecognitionError;
98 lexer->rec->reportError = reportError;
99 lexer->rec->reset = reset;
100 lexer->rec->getCurrentInputSymbol = getCurrentInputSymbol;
101 lexer->rec->getMissingSymbol = getMissingSymbol;
102
103 /* Now install the token source interface
104 */
105 if (lexer->rec->state->tokSource == NULL)
106 {
107 lexer->rec->state->tokSource = (pANTLR3_TOKEN_SOURCE)ANTLR3_CALLOC(1, sizeof(ANTLR3_TOKEN_SOURCE));
108
109 if (lexer->rec->state->tokSource == NULL)
110 {
111 lexer->rec->free(lexer->rec);
112 lexer->free(lexer);
113
114 return NULL;
115 }
116 lexer->rec->state->tokSource->super = lexer;
117
118 /* Install the default nextToken() method, which may be overridden
119 * by generated code, or by anything else in fact.
120 */
121 lexer->rec->state->tokSource->nextToken = nextToken;
122 lexer->rec->state->tokSource->strFactory = NULL;
123
124 lexer->rec->state->tokFactory = NULL;
125 }
126
127 /* Install the lexer API
128 */
129 lexer->setCharStream = setCharStream;
130 lexer->mTokens = (void (*)(void *))(mTokens);
131 lexer->setCharStream = setCharStream;
132 lexer->pushCharStream = pushCharStream;
133 lexer->popCharStream = popCharStream;
134 lexer->emit = emit;
135 lexer->emitNew = emitNew;
136 lexer->matchs = matchs;
137 lexer->matchc = matchc;
138 lexer->matchRange = matchRange;
139 lexer->matchAny = matchAny;
140 lexer->recover = recover;
141 lexer->getLine = getLine;
142 lexer->getCharIndex = getCharIndex;
143 lexer->getCharPositionInLine = getCharPositionInLine;
144 lexer->getText = getText;
145 lexer->free = freeLexer;
146
147 /* Initialise the eof token
148 */
149 specialT = &(lexer->rec->state->tokSource->eofToken);
150 antlr3SetTokenAPI (specialT);
151 specialT->setType (specialT, ANTLR3_TOKEN_EOF);
152 specialT->factoryMade = ANTLR3_TRUE; // Prevent things trying to free() it
153 specialT->strFactory = NULL;
154 specialT->textState = ANTLR3_TEXT_NONE;
155 specialT->custom = NULL;
156 specialT->user1 = 0;
157 specialT->user2 = 0;
158 specialT->user3 = 0;
159
160 // Initialize the skip token.
161 //
162 specialT = &(lexer->rec->state->tokSource->skipToken);
163 antlr3SetTokenAPI (specialT);
164 specialT->setType (specialT, ANTLR3_TOKEN_INVALID);
165 specialT->factoryMade = ANTLR3_TRUE; // Prevent things trying to free() it
166 specialT->strFactory = NULL;
167 specialT->custom = NULL;
168 specialT->user1 = 0;
169 specialT->user2 = 0;
170 specialT->user3 = 0;
171 return lexer;
172 }
173
174 static void
reset(pANTLR3_BASE_RECOGNIZER rec)175 reset (pANTLR3_BASE_RECOGNIZER rec)
176 {
177 pANTLR3_LEXER lexer;
178
179 lexer = (pANTLR3_LEXER)rec->super;
180
181 lexer->rec->state->token = NULL;
182 lexer->rec->state->type = ANTLR3_TOKEN_INVALID;
183 lexer->rec->state->channel = ANTLR3_TOKEN_DEFAULT_CHANNEL;
184 lexer->rec->state->tokenStartCharIndex = -1;
185 lexer->rec->state->tokenStartCharPositionInLine = -1;
186 lexer->rec->state->tokenStartLine = -1;
187
188 lexer->rec->state->text = NULL;
189
190 // OK - that's all hunky dory, but we may well have had
191 // a token factory that needs a reset. Do that here
192 //
193 if (lexer->rec->state->tokFactory != NULL)
194 {
195 lexer->rec->state->tokFactory->reset(lexer->rec->state->tokFactory);
196 }
197 }
198
199 ///
200 /// \brief
201 /// Returns the next available token from the current input stream.
202 ///
203 /// \param toksource
204 /// Points to the implementation of a token source. The lexer is
205 /// addressed by the super structure pointer.
206 ///
207 /// \returns
208 /// The next token in the current input stream or the EOF token
209 /// if there are no more tokens.
210 ///
211 /// \remarks
212 /// Write remarks for nextToken here.
213 ///
214 /// \see nextToken
215 ///
216 ANTLR3_INLINE static pANTLR3_COMMON_TOKEN
nextTokenStr(pANTLR3_TOKEN_SOURCE toksource)217 nextTokenStr (pANTLR3_TOKEN_SOURCE toksource)
218 {
219 pANTLR3_LEXER lexer;
220 pANTLR3_RECOGNIZER_SHARED_STATE state;
221 pANTLR3_INPUT_STREAM input;
222 pANTLR3_INT_STREAM istream;
223
224 lexer = (pANTLR3_LEXER)(toksource->super);
225 state = lexer->rec->state;
226 input = lexer->input;
227 istream = input->istream;
228
229 /// Loop until we get a non skipped token or EOF
230 ///
231 for (;;)
232 {
233 // Get rid of any previous token (token factory takes care of
234 // any de-allocation when this token is finally used up.
235 //
236 state->token = NULL;
237 state->error = ANTLR3_FALSE; // Start out without an exception
238 state->failed = ANTLR3_FALSE;
239
240 // Now call the matching rules and see if we can generate a new token
241 //
242 for (;;)
243 {
244 // Record the start of the token in our input stream.
245 //
246 state->channel = ANTLR3_TOKEN_DEFAULT_CHANNEL;
247 state->tokenStartCharIndex = (ANTLR3_MARKER)(((pANTLR3_UINT8)input->nextChar));
248 state->tokenStartCharPositionInLine = input->charPositionInLine;
249 state->tokenStartLine = input->line;
250 state->text = NULL;
251 state->custom = NULL;
252 state->user1 = 0;
253 state->user2 = 0;
254 state->user3 = 0;
255
256 if (istream->_LA(istream, 1) == ANTLR3_CHARSTREAM_EOF)
257 {
258 // Reached the end of the current stream, nothing more to do if this is
259 // the last in the stack.
260 //
261 pANTLR3_COMMON_TOKEN teof = &(toksource->eofToken);
262
263 teof->setStartIndex (teof, lexer->getCharIndex(lexer));
264 teof->setStopIndex (teof, lexer->getCharIndex(lexer));
265 teof->setLine (teof, lexer->getLine(lexer));
266 teof->factoryMade = ANTLR3_TRUE; // This isn't really manufactured but it stops things from trying to free it
267 return teof;
268 }
269
270 state->token = NULL;
271 state->error = ANTLR3_FALSE; // Start out without an exception
272 state->failed = ANTLR3_FALSE;
273
274 // Call the generated lexer, see if it can get a new token together.
275 //
276 lexer->mTokens(lexer->ctx);
277
278 if (state->error == ANTLR3_TRUE)
279 {
280 // Recognition exception, report it and try to recover.
281 //
282 state->failed = ANTLR3_TRUE;
283 lexer->rec->reportError(lexer->rec);
284 lexer->recover(lexer);
285 }
286 else
287 {
288 if (state->token == NULL)
289 {
290 // Emit the real token, which adds it in to the token stream basically
291 //
292 emit(lexer);
293 }
294 else if (state->token == &(toksource->skipToken))
295 {
296 // A real token could have been generated, but "Computer say's naaaaah" and it
297 // it is just something we need to skip altogether.
298 //
299 continue;
300 }
301
302 // Good token, not skipped, not EOF token
303 //
304 return state->token;
305 }
306 }
307 }
308 }
309
310 /**
311 * \brief
312 * Default implementation of the nextToken() call for a lexer.
313 *
314 * \param toksource
315 * Points to the implementation of a token source. The lexer is
316 * addressed by the super structure pointer.
317 *
318 * \returns
319 * The next token in the current input stream or the EOF token
320 * if there are no more tokens in any input stream in the stack.
321 *
322 * Write detailed description for nextToken here.
323 *
324 * \remarks
325 * Write remarks for nextToken here.
326 *
327 * \see nextTokenStr
328 */
329 static pANTLR3_COMMON_TOKEN
nextToken(pANTLR3_TOKEN_SOURCE toksource)330 nextToken (pANTLR3_TOKEN_SOURCE toksource)
331 {
332 pANTLR3_COMMON_TOKEN tok;
333
334 // Find the next token in the current stream
335 //
336 tok = nextTokenStr(toksource);
337
338 // If we got to the EOF token then switch to the previous
339 // input stream if there were any and just return the
340 // EOF if there are none. We must check the next token
341 // in any outstanding input stream we pop into the active
342 // role to see if it was sitting at EOF after PUSHing the
343 // stream we just consumed, otherwise we will return EOF
344 // on the reinstalled input stream, when in actual fact
345 // there might be more input streams to POP before the
346 // real EOF of the whole logical input stream. Hence we
347 // use a while loop here until we find something in the stream
348 // that isn't EOF or we reach the actual end of the last input
349 // stream on the stack.
350 //
351 while ((tok != NULL) && (tok->type == ANTLR3_TOKEN_EOF))
352 {
353 pANTLR3_LEXER lexer;
354
355 lexer = (pANTLR3_LEXER)(toksource->super);
356
357 if (lexer->rec->state->streams != NULL && lexer->rec->state->streams->size(lexer->rec->state->streams) > 0)
358 {
359 // We have another input stream in the stack so we
360 // need to revert to it, then resume the loop to check
361 // it wasn't sitting at EOF itself.
362 //
363 lexer->popCharStream(lexer);
364 tok = nextTokenStr(toksource);
365 }
366 else
367 {
368 // There were no more streams on the input stack
369 // so this EOF is the 'real' logical EOF for
370 // the input stream. So we just exit the loop and
371 // return the EOF we have found.
372 //
373 break;
374 }
375
376 }
377
378 // return whatever token we have, which may be EOF
379 //
380 return tok;
381 }
382
383 ANTLR3_API pANTLR3_LEXER
antlr3LexerNewStream(ANTLR3_UINT32 sizeHint,pANTLR3_INPUT_STREAM input,pANTLR3_RECOGNIZER_SHARED_STATE state)384 antlr3LexerNewStream(ANTLR3_UINT32 sizeHint, pANTLR3_INPUT_STREAM input, pANTLR3_RECOGNIZER_SHARED_STATE state)
385 {
386 pANTLR3_LEXER lexer;
387
388 // Create a basic lexer first
389 //
390 lexer = antlr3LexerNew(sizeHint, state);
391
392 if (lexer != NULL)
393 {
394 // Install the input stream and reset the lexer
395 //
396 setCharStream(lexer, input);
397 }
398
399 return lexer;
400 }
401
mTokens(pANTLR3_LEXER lexer)402 static void mTokens (pANTLR3_LEXER lexer)
403 {
404 if (lexer) // Fool compiler, avoid pragmas
405 {
406 ANTLR3_FPRINTF(stderr, "lexer->mTokens(): Error: No lexer rules were added to the lexer yet!\n");
407 }
408 }
409
410 static void
reportError(pANTLR3_BASE_RECOGNIZER rec)411 reportError (pANTLR3_BASE_RECOGNIZER rec)
412 {
413 // Indicate this recognizer had an error while processing.
414 //
415 rec->state->errorCount++;
416
417 rec->displayRecognitionError(rec, rec->state->tokenNames);
418 }
419
420 #ifdef ANTLR3_WINDOWS
421 #pragma warning( disable : 4100 )
422 #endif
423
424 /** Default lexer error handler (works for 8 bit streams only!!!)
425 */
426 static void
displayRecognitionError(pANTLR3_BASE_RECOGNIZER recognizer,pANTLR3_UINT8 * tokenNames)427 displayRecognitionError (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 * tokenNames)
428 {
429 pANTLR3_LEXER lexer;
430 pANTLR3_EXCEPTION ex;
431 pANTLR3_STRING ftext;
432
433 lexer = (pANTLR3_LEXER)(recognizer->super);
434 ex = lexer->rec->state->exception;
435
436 // See if there is a 'filename' we can use
437 //
438 if (ex->name == NULL)
439 {
440 ANTLR3_FPRINTF(stderr, "-unknown source-(");
441 }
442 else
443 {
444 ftext = ex->streamName->to8(ex->streamName);
445 ANTLR3_FPRINTF(stderr, "%s(", ftext->chars);
446 }
447
448 ANTLR3_FPRINTF(stderr, "%d) ", recognizer->state->exception->line);
449 ANTLR3_FPRINTF(stderr, ": lexer error %d :\n\t%s at offset %d, ",
450 ex->type,
451 (pANTLR3_UINT8) (ex->message),
452 ex->charPositionInLine+1
453 );
454 {
455 ANTLR3_INT32 width;
456
457 width = ANTLR3_UINT32_CAST(( (pANTLR3_UINT8)(lexer->input->data) + (lexer->input->size(lexer->input) )) - (pANTLR3_UINT8)(ex->index));
458
459 if (width >= 1)
460 {
461 if (isprint(ex->c))
462 {
463 ANTLR3_FPRINTF(stderr, "near '%c' :\n", ex->c);
464 }
465 else
466 {
467 ANTLR3_FPRINTF(stderr, "near char(%#02X) :\n", (ANTLR3_UINT8)(ex->c));
468 }
469 ANTLR3_FPRINTF(stderr, "\t%.*s\n", width > 20 ? 20 : width ,((pANTLR3_UINT8)ex->index));
470 }
471 else
472 {
473 ANTLR3_FPRINTF(stderr, "(end of input).\n\t This indicates a poorly specified lexer RULE\n\t or unterminated input element such as: \"STRING[\"]\n");
474 ANTLR3_FPRINTF(stderr, "\t The lexer was matching from line %d, offset %d, which\n\t ",
475 (ANTLR3_UINT32)(lexer->rec->state->tokenStartLine),
476 (ANTLR3_UINT32)(lexer->rec->state->tokenStartCharPositionInLine)
477 );
478 width = ANTLR3_UINT32_CAST(((pANTLR3_UINT8)(lexer->input->data)+(lexer->input->size(lexer->input))) - (pANTLR3_UINT8)(lexer->rec->state->tokenStartCharIndex));
479
480 if (width >= 1)
481 {
482 ANTLR3_FPRINTF(stderr, "looks like this:\n\t\t%.*s\n", width > 20 ? 20 : width ,(pANTLR3_UINT8)(lexer->rec->state->tokenStartCharIndex));
483 }
484 else
485 {
486 ANTLR3_FPRINTF(stderr, "is also the end of the line, so you must check your lexer rules\n");
487 }
488 }
489 }
490 }
491
setCharStream(pANTLR3_LEXER lexer,pANTLR3_INPUT_STREAM input)492 static void setCharStream (pANTLR3_LEXER lexer, pANTLR3_INPUT_STREAM input)
493 {
494 /* Install the input interface
495 */
496 lexer->input = input;
497
498 /* We may need a token factory for the lexer; we don't destroy any existing factory
499 * until the lexer is destroyed, as people may still be using the tokens it produced.
500 * TODO: Later I will provide a dup() method for a token so that it can extract itself
501 * out of the factory.
502 */
503 if (lexer->rec->state->tokFactory == NULL)
504 {
505 lexer->rec->state->tokFactory = antlr3TokenFactoryNew(input);
506 }
507 else
508 {
509 /* When the input stream is being changed on the fly, rather than
510 * at the start of a new lexer, then we must tell the tokenFactory
511 * which input stream to adorn the tokens with so that when they
512 * are asked to provide their original input strings they can
513 * do so from the correct text stream.
514 */
515 lexer->rec->state->tokFactory->setInputStream(lexer->rec->state->tokFactory, input);
516 }
517
518 /* Propagate the string factory so that we preserve the encoding form from
519 * the input stream.
520 */
521 if (lexer->rec->state->tokSource->strFactory == NULL)
522 {
523 lexer->rec->state->tokSource->strFactory = input->strFactory;
524
525 // Set the newly acquired string factory up for our pre-made tokens
526 // for EOF.
527 //
528 if (lexer->rec->state->tokSource->eofToken.strFactory == NULL)
529 {
530 lexer->rec->state->tokSource->eofToken.strFactory = input->strFactory;
531 }
532 }
533
534 /* This is a lexer, install the appropriate exception creator
535 */
536 lexer->rec->exConstruct = antlr3RecognitionExceptionNew;
537
538 /* Set the current token to nothing
539 */
540 lexer->rec->state->token = NULL;
541 lexer->rec->state->text = NULL;
542 lexer->rec->state->tokenStartCharIndex = -1;
543
544 /* Copy the name of the char stream to the token source
545 */
546 lexer->rec->state->tokSource->fileName = input->fileName;
547 }
548
549 /*!
550 * \brief
551 * Change to a new input stream, remembering the old one.
552 *
553 * \param lexer
554 * Pointer to the lexer instance to switch input streams for.
555 *
556 * \param input
557 * New input stream to install as the current one.
558 *
559 * Switches the current character input stream to
560 * a new one, saving the old one, which we will revert to at the end of this
561 * new one.
562 */
563 static void
pushCharStream(pANTLR3_LEXER lexer,pANTLR3_INPUT_STREAM input)564 pushCharStream (pANTLR3_LEXER lexer, pANTLR3_INPUT_STREAM input)
565 {
566 // Do we need a new input stream stack?
567 //
568 if (lexer->rec->state->streams == NULL)
569 {
570 // This is the first call to stack a new
571 // stream and so we must create the stack first.
572 //
573 lexer->rec->state->streams = antlr3StackNew(0);
574
575 if (lexer->rec->state->streams == NULL)
576 {
577 // Could not do this, we just fail to push it.
578 // TODO: Consider if this is what we want to do, but then
579 // any programmer can override this method to do something else.
580 return;
581 }
582 }
583
584 // We have a stack, so we can save the current input stream
585 // into it.
586 //
587 lexer->input->istream->mark(lexer->input->istream);
588 lexer->rec->state->streams->push(lexer->rec->state->streams, lexer->input, NULL);
589
590 // And now we can install this new one
591 //
592 lexer->setCharStream(lexer, input);
593 }
594
595 /*!
596 * \brief
597 * Stops using the current input stream and reverts to any prior
598 * input stream on the stack.
599 *
600 * \param lexer
601 * Description of parameter lexer.
602 *
603 * Pointer to a function that abandons the current input stream, whether it
604 * is empty or not and reverts to the previous stacked input stream.
605 *
606 * \remark
607 * The function fails silently if there are no prior input streams.
608 */
609 static void
popCharStream(pANTLR3_LEXER lexer)610 popCharStream (pANTLR3_LEXER lexer)
611 {
612 pANTLR3_INPUT_STREAM input;
613
614 // If we do not have a stream stack or we are already at the
615 // stack bottom, then do nothing.
616 //
617 if (lexer->rec->state->streams != NULL && lexer->rec->state->streams->size(lexer->rec->state->streams) > 0)
618 {
619 // We just leave the current stream to its fate, we do not close
620 // it or anything as we do not know what the programmer intended
621 // for it. This method can always be overridden of course.
622 // So just find out what was currently saved on the stack and use
623 // that now, then pop it from the stack.
624 //
625 input = (pANTLR3_INPUT_STREAM)(lexer->rec->state->streams->top);
626 lexer->rec->state->streams->pop(lexer->rec->state->streams);
627
628 // Now install the stream as the current one.
629 //
630 lexer->setCharStream(lexer, input);
631 lexer->input->istream->rewindLast(lexer->input->istream);
632 }
633 return;
634 }
635
emitNew(pANTLR3_LEXER lexer,pANTLR3_COMMON_TOKEN token)636 static void emitNew (pANTLR3_LEXER lexer, pANTLR3_COMMON_TOKEN token)
637 {
638 lexer->rec->state->token = token; /* Voila! */
639 }
640
641 static pANTLR3_COMMON_TOKEN
emit(pANTLR3_LEXER lexer)642 emit (pANTLR3_LEXER lexer)
643 {
644 pANTLR3_COMMON_TOKEN token;
645
646 /* We could check pointers to token factories and so on, but
647 * we are in code that we want to run as fast as possible
648 * so we are not checking any errors. So make sure you have installed an input stream before
649 * trying to emit a new token.
650 */
651 token = lexer->rec->state->tokFactory->newToken(lexer->rec->state->tokFactory);
652 if (token == NULL) { return NULL; }
653
654 /* Install the supplied information, and some other bits we already know
655 * get added automatically, such as the input stream it is associated with
656 * (though it can all be overridden of course)
657 */
658 token->type = lexer->rec->state->type;
659 token->channel = lexer->rec->state->channel;
660 token->start = lexer->rec->state->tokenStartCharIndex;
661 token->stop = lexer->getCharIndex(lexer) - 1;
662 token->line = lexer->rec->state->tokenStartLine;
663 token->charPosition = lexer->rec->state->tokenStartCharPositionInLine;
664
665 if (lexer->rec->state->text != NULL)
666 {
667 token->textState = ANTLR3_TEXT_STRING;
668 token->tokText.text = lexer->rec->state->text;
669 }
670 else
671 {
672 token->textState = ANTLR3_TEXT_NONE;
673 }
674 token->lineStart = lexer->input->currentLine;
675 token->user1 = lexer->rec->state->user1;
676 token->user2 = lexer->rec->state->user2;
677 token->user3 = lexer->rec->state->user3;
678 token->custom = lexer->rec->state->custom;
679
680 lexer->rec->state->token = token;
681
682 return token;
683 }
684
685 /**
686 * Free the resources allocated by a lexer
687 */
688 static void
freeLexer(pANTLR3_LEXER lexer)689 freeLexer (pANTLR3_LEXER lexer)
690 {
691 // This may have ben a delegate or delegator lexer, in which case the
692 // state may already have been freed (and set to NULL therefore)
693 // so we ignore the state if we don't have it.
694 //
695 if (lexer->rec->state != NULL)
696 {
697 if (lexer->rec->state->streams != NULL)
698 {
699 lexer->rec->state->streams->free(lexer->rec->state->streams);
700 }
701 if (lexer->rec->state->tokFactory != NULL)
702 {
703 lexer->rec->state->tokFactory->close(lexer->rec->state->tokFactory);
704 lexer->rec->state->tokFactory = NULL;
705 }
706 if (lexer->rec->state->tokSource != NULL)
707 {
708 ANTLR3_FREE(lexer->rec->state->tokSource);
709 lexer->rec->state->tokSource = NULL;
710 }
711 }
712 if (lexer->rec != NULL)
713 {
714 lexer->rec->free(lexer->rec);
715 lexer->rec = NULL;
716 }
717 ANTLR3_FREE(lexer);
718 }
719
720 /** Implementation of matchs for the lexer, overrides any
721 * base implementation in the base recognizer.
722 *
723 * \remark
724 * Note that the generated code lays down arrays of ints for constant
725 * strings so that they are int UTF32 form!
726 */
727 static ANTLR3_BOOLEAN
matchs(pANTLR3_LEXER lexer,ANTLR3_UCHAR * string)728 matchs(pANTLR3_LEXER lexer, ANTLR3_UCHAR * string)
729 {
730 while (*string != ANTLR3_STRING_TERMINATOR)
731 {
732 if (lexer->input->istream->_LA(lexer->input->istream, 1) != (*string))
733 {
734 if (lexer->rec->state->backtracking > 0)
735 {
736 lexer->rec->state->failed = ANTLR3_TRUE;
737 return ANTLR3_FALSE;
738 }
739
740 lexer->rec->exConstruct(lexer->rec);
741 lexer->rec->state->failed = ANTLR3_TRUE;
742
743 /* TODO: Implement exception creation more fully perhaps
744 */
745 lexer->recover(lexer);
746 return ANTLR3_FALSE;
747 }
748
749 /* Matched correctly, do consume it
750 */
751 lexer->input->istream->consume(lexer->input->istream);
752 string++;
753
754 /* Reset any failed indicator
755 */
756 lexer->rec->state->failed = ANTLR3_FALSE;
757 }
758
759
760 return ANTLR3_TRUE;
761 }
762
763 /** Implementation of matchc for the lexer, overrides any
764 * base implementation in the base recognizer.
765 *
766 * \remark
767 * Note that the generated code lays down arrays of ints for constant
768 * strings so that they are int UTF32 form!
769 */
770 static ANTLR3_BOOLEAN
matchc(pANTLR3_LEXER lexer,ANTLR3_UCHAR c)771 matchc(pANTLR3_LEXER lexer, ANTLR3_UCHAR c)
772 {
773 if (lexer->input->istream->_LA(lexer->input->istream, 1) == c)
774 {
775 /* Matched correctly, do consume it
776 */
777 lexer->input->istream->consume(lexer->input->istream);
778
779 /* Reset any failed indicator
780 */
781 lexer->rec->state->failed = ANTLR3_FALSE;
782
783 return ANTLR3_TRUE;
784 }
785
786 /* Failed to match, exception and recovery time.
787 */
788 if (lexer->rec->state->backtracking > 0)
789 {
790 lexer->rec->state->failed = ANTLR3_TRUE;
791 return ANTLR3_FALSE;
792 }
793
794 lexer->rec->exConstruct(lexer->rec);
795
796 /* TODO: Implement exception creation more fully perhaps
797 */
798 lexer->recover(lexer);
799
800 return ANTLR3_FALSE;
801 }
802
803 /** Implementation of match range for the lexer, overrides any
804 * base implementation in the base recognizer.
805 *
806 * \remark
807 * Note that the generated code lays down arrays of ints for constant
808 * strings so that they are int UTF32 form!
809 */
810 static ANTLR3_BOOLEAN
matchRange(pANTLR3_LEXER lexer,ANTLR3_UCHAR low,ANTLR3_UCHAR high)811 matchRange(pANTLR3_LEXER lexer, ANTLR3_UCHAR low, ANTLR3_UCHAR high)
812 {
813 ANTLR3_UCHAR c;
814
815 /* What is in the stream at the moment?
816 */
817 c = lexer->input->istream->_LA(lexer->input->istream, 1);
818 if ( c >= low && c <= high)
819 {
820 /* Matched correctly, consume it
821 */
822 lexer->input->istream->consume(lexer->input->istream);
823
824 /* Reset any failed indicator
825 */
826 lexer->rec->state->failed = ANTLR3_FALSE;
827
828 return ANTLR3_TRUE;
829 }
830
831 /* Failed to match, execption and recovery time.
832 */
833
834 if (lexer->rec->state->backtracking > 0)
835 {
836 lexer->rec->state->failed = ANTLR3_TRUE;
837 return ANTLR3_FALSE;
838 }
839
840 lexer->rec->exConstruct(lexer->rec);
841
842 /* TODO: Implement exception creation more fully
843 */
844 lexer->recover(lexer);
845
846 return ANTLR3_FALSE;
847 }
848
849 static void
matchAny(pANTLR3_LEXER lexer)850 matchAny (pANTLR3_LEXER lexer)
851 {
852 lexer->input->istream->consume(lexer->input->istream);
853 }
854
855 static void
recover(pANTLR3_LEXER lexer)856 recover (pANTLR3_LEXER lexer)
857 {
858 lexer->input->istream->consume(lexer->input->istream);
859 }
860
861 static ANTLR3_UINT32
getLine(pANTLR3_LEXER lexer)862 getLine (pANTLR3_LEXER lexer)
863 {
864 return lexer->input->getLine(lexer->input);
865 }
866
867 static ANTLR3_UINT32
getCharPositionInLine(pANTLR3_LEXER lexer)868 getCharPositionInLine (pANTLR3_LEXER lexer)
869 {
870 return lexer->input->charPositionInLine;
871 }
872
getCharIndex(pANTLR3_LEXER lexer)873 static ANTLR3_MARKER getCharIndex (pANTLR3_LEXER lexer)
874 {
875 return lexer->input->istream->index(lexer->input->istream);
876 }
877
878 static pANTLR3_STRING
getText(pANTLR3_LEXER lexer)879 getText (pANTLR3_LEXER lexer)
880 {
881 if (lexer->rec->state->text)
882 {
883 return lexer->rec->state->text;
884
885 }
886 return lexer->input->substr(
887 lexer->input,
888 lexer->rec->state->tokenStartCharIndex,
889 lexer->getCharIndex(lexer) - lexer->input->charByteSize
890 );
891
892 }
893
894 static void *
getCurrentInputSymbol(pANTLR3_BASE_RECOGNIZER recognizer,pANTLR3_INT_STREAM istream)895 getCurrentInputSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream)
896 {
897 return NULL;
898 }
899
900 static void *
getMissingSymbol(pANTLR3_BASE_RECOGNIZER recognizer,pANTLR3_INT_STREAM istream,pANTLR3_EXCEPTION e,ANTLR3_UINT32 expectedTokenType,pANTLR3_BITSET_LIST follow)901 getMissingSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream, pANTLR3_EXCEPTION e,
902 ANTLR3_UINT32 expectedTokenType, pANTLR3_BITSET_LIST follow)
903 {
904 return NULL;
905 }
906