1 /** \file
2 * Contains the base functions that all recognizers require.
3 * Any function can be overridden by a lexer/parser/tree parser or by the
4 * ANTLR3 programmer.
5 *
6 * \addtogroup pANTLR3_BASE_RECOGNIZER
7 * @{
8 */
9 #include <antlr3baserecognizer.h>
10
11 // [The "BSD licence"]
12 // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
13 // http://www.temporal-wave.com
14 // http://www.linkedin.com/in/jimidle
15 //
16 // All rights reserved.
17 //
18 // Redistribution and use in source and binary forms, with or without
19 // modification, are permitted provided that the following conditions
20 // are met:
21 // 1. Redistributions of source code must retain the above copyright
22 // notice, this list of conditions and the following disclaimer.
23 // 2. Redistributions in binary form must reproduce the above copyright
24 // notice, this list of conditions and the following disclaimer in the
25 // documentation and/or other materials provided with the distribution.
26 // 3. The name of the author may not be used to endorse or promote products
27 // derived from this software without specific prior written permission.
28 //
29 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
30 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
31 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
32 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
33 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
34 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
38 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39
40 #ifdef ANTLR3_WINDOWS
41 #pragma warning( disable : 4100 )
42 #endif
43
44 /* Interface functions -standard implementations cover parser and treeparser
45 * almost completely but are overridden by the parser or tree parser as needed. Lexer overrides
46 * most of these functions.
47 */
48 static void beginResync (pANTLR3_BASE_RECOGNIZER recognizer);
49 static pANTLR3_BITSET computeErrorRecoverySet (pANTLR3_BASE_RECOGNIZER recognizer);
50 static void endResync (pANTLR3_BASE_RECOGNIZER recognizer);
51 static void beginBacktrack (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 level);
52 static void endBacktrack (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 level, ANTLR3_BOOLEAN successful);
53
54 static void * match (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow);
55 static void matchAny (pANTLR3_BASE_RECOGNIZER recognizer);
56 static void mismatch (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow);
57 static ANTLR3_BOOLEAN mismatchIsUnwantedToken (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM is, ANTLR3_UINT32 ttype);
58 static ANTLR3_BOOLEAN mismatchIsMissingToken (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM is, pANTLR3_BITSET_LIST follow);
59 static void reportError (pANTLR3_BASE_RECOGNIZER recognizer);
60 static pANTLR3_BITSET computeCSRuleFollow (pANTLR3_BASE_RECOGNIZER recognizer);
61 static pANTLR3_BITSET combineFollows (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_BOOLEAN exact);
62 static void displayRecognitionError (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 * tokenNames);
63 static void recover (pANTLR3_BASE_RECOGNIZER recognizer);
64 static void * recoverFromMismatchedToken (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow);
65 static void * recoverFromMismatchedSet (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET_LIST follow);
66 static ANTLR3_BOOLEAN recoverFromMismatchedElement(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET_LIST follow);
67 static void consumeUntil (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 tokenType);
68 static void consumeUntilSet (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET set);
69 static pANTLR3_STACK getRuleInvocationStack (pANTLR3_BASE_RECOGNIZER recognizer);
70 static pANTLR3_STACK getRuleInvocationStackNamed (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 name);
71 static pANTLR3_HASH_TABLE toStrings (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_HASH_TABLE);
72 static ANTLR3_MARKER getRuleMemoization (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_INTKEY ruleIndex, ANTLR3_MARKER ruleParseStart);
73 static ANTLR3_BOOLEAN alreadyParsedRule (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_MARKER ruleIndex);
74 static void memoize (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_MARKER ruleIndex, ANTLR3_MARKER ruleParseStart);
75 static ANTLR3_BOOLEAN synpred (pANTLR3_BASE_RECOGNIZER recognizer, void * ctx, void (*predicate)(void * ctx));
76 static void reset (pANTLR3_BASE_RECOGNIZER recognizer);
77 static void freeBR (pANTLR3_BASE_RECOGNIZER recognizer);
78 static void * getCurrentInputSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream);
79 static void * getMissingSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream, pANTLR3_EXCEPTION e,
80 ANTLR3_UINT32 expectedTokenType, pANTLR3_BITSET_LIST follow);
81 static ANTLR3_UINT32 getNumberOfSyntaxErrors (pANTLR3_BASE_RECOGNIZER recognizer);
82
83 ANTLR3_API pANTLR3_BASE_RECOGNIZER
antlr3BaseRecognizerNew(ANTLR3_UINT32 type,ANTLR3_UINT32 sizeHint,pANTLR3_RECOGNIZER_SHARED_STATE state)84 antlr3BaseRecognizerNew(ANTLR3_UINT32 type, ANTLR3_UINT32 sizeHint, pANTLR3_RECOGNIZER_SHARED_STATE state)
85 {
86 pANTLR3_BASE_RECOGNIZER recognizer;
87
88 // Allocate memory for the structure
89 //
90 recognizer = (pANTLR3_BASE_RECOGNIZER) ANTLR3_MALLOC((size_t)sizeof(ANTLR3_BASE_RECOGNIZER));
91
92 if (recognizer == NULL)
93 {
94 // Allocation failed
95 //
96 return NULL;
97 }
98
99
100 // If we have been supplied with a pre-existing recognizer state
101 // then we just install it, otherwise we must create one from scratch
102 //
103 if (state == NULL)
104 {
105 recognizer->state = (pANTLR3_RECOGNIZER_SHARED_STATE) ANTLR3_CALLOC(1, (size_t)sizeof(ANTLR3_RECOGNIZER_SHARED_STATE));
106
107 if (recognizer->state == NULL)
108 {
109 ANTLR3_FREE(recognizer);
110 return NULL;
111 }
112
113 // Initialize any new recognizer state
114 //
115 recognizer->state->errorRecovery = ANTLR3_FALSE;
116 recognizer->state->lastErrorIndex = -1;
117 recognizer->state->failed = ANTLR3_FALSE;
118 recognizer->state->errorCount = 0;
119 recognizer->state->backtracking = 0;
120 recognizer->state->following = NULL;
121 recognizer->state->ruleMemo = NULL;
122 recognizer->state->tokenNames = NULL;
123 recognizer->state->sizeHint = sizeHint;
124 recognizer->state->tokSource = NULL;
125 recognizer->state->tokFactory = NULL;
126
127 // Rather than check to see if we must initialize
128 // the stack every time we are asked for an new rewrite stream
129 // we just always create an empty stack and then just
130 // free it when the base recognizer is freed.
131 //
132 recognizer->state->rStreams = antlr3VectorNew(0); // We don't know the size.
133
134 if (recognizer->state->rStreams == NULL)
135 {
136 // Out of memory
137 //
138 ANTLR3_FREE(recognizer->state);
139 ANTLR3_FREE(recognizer);
140 return NULL;
141 }
142 }
143 else
144 {
145 // Install the one we were given, and do not reset it here
146 // as it will either already have been initialized or will
147 // be in a state that needs to be preserved.
148 //
149 recognizer->state = state;
150 }
151
152 // Install the BR API
153 //
154 recognizer->alreadyParsedRule = alreadyParsedRule;
155 recognizer->beginResync = beginResync;
156 recognizer->combineFollows = combineFollows;
157 recognizer->beginBacktrack = beginBacktrack;
158 recognizer->endBacktrack = endBacktrack;
159 recognizer->computeCSRuleFollow = computeCSRuleFollow;
160 recognizer->computeErrorRecoverySet = computeErrorRecoverySet;
161 recognizer->consumeUntil = consumeUntil;
162 recognizer->consumeUntilSet = consumeUntilSet;
163 recognizer->displayRecognitionError = displayRecognitionError;
164 recognizer->endResync = endResync;
165 recognizer->exConstruct = antlr3MTExceptionNew;
166 recognizer->getRuleInvocationStack = getRuleInvocationStack;
167 recognizer->getRuleInvocationStackNamed = getRuleInvocationStackNamed;
168 recognizer->getRuleMemoization = getRuleMemoization;
169 recognizer->match = match;
170 recognizer->matchAny = matchAny;
171 recognizer->memoize = memoize;
172 recognizer->mismatch = mismatch;
173 recognizer->mismatchIsUnwantedToken = mismatchIsUnwantedToken;
174 recognizer->mismatchIsMissingToken = mismatchIsMissingToken;
175 recognizer->recover = recover;
176 recognizer->recoverFromMismatchedElement= recoverFromMismatchedElement;
177 recognizer->recoverFromMismatchedSet = recoverFromMismatchedSet;
178 recognizer->recoverFromMismatchedToken = recoverFromMismatchedToken;
179 recognizer->getNumberOfSyntaxErrors = getNumberOfSyntaxErrors;
180 recognizer->reportError = reportError;
181 recognizer->reset = reset;
182 recognizer->synpred = synpred;
183 recognizer->toStrings = toStrings;
184 recognizer->getCurrentInputSymbol = getCurrentInputSymbol;
185 recognizer->getMissingSymbol = getMissingSymbol;
186 recognizer->debugger = NULL;
187
188 recognizer->free = freeBR;
189
190 /* Initialize variables
191 */
192 recognizer->type = type;
193
194
195 return recognizer;
196 }
197 static void
freeBR(pANTLR3_BASE_RECOGNIZER recognizer)198 freeBR (pANTLR3_BASE_RECOGNIZER recognizer)
199 {
200 pANTLR3_EXCEPTION thisE;
201
202 // Did we have a state allocated?
203 //
204 if (recognizer->state != NULL)
205 {
206 // Free any rule memoization we set up
207 //
208 if (recognizer->state->ruleMemo != NULL)
209 {
210 recognizer->state->ruleMemo->free(recognizer->state->ruleMemo);
211 recognizer->state->ruleMemo = NULL;
212 }
213
214 // Free any exception space we have left around
215 //
216 thisE = recognizer->state->exception;
217 if (thisE != NULL)
218 {
219 thisE->freeEx(thisE);
220 }
221
222 // Free any rewrite streams we have allocated
223 //
224 if (recognizer->state->rStreams != NULL)
225 {
226 recognizer->state->rStreams->free(recognizer->state->rStreams);
227 }
228
229 // Free up any token factory we created (error recovery for instance)
230 //
231 if (recognizer->state->tokFactory != NULL)
232 {
233 recognizer->state->tokFactory->close(recognizer->state->tokFactory);
234 }
235 // Free the shared state memory
236 //
237 ANTLR3_FREE(recognizer->state);
238 }
239
240 // Free the actual recognizer space
241 //
242 ANTLR3_FREE(recognizer);
243 }
244
245 /**
246 * Creates a new Mismatched Token Exception and inserts in the recognizer
247 * exception stack.
248 *
249 * \param recognizer
250 * Context pointer for this recognizer
251 *
252 */
253 ANTLR3_API void
antlr3MTExceptionNew(pANTLR3_BASE_RECOGNIZER recognizer)254 antlr3MTExceptionNew(pANTLR3_BASE_RECOGNIZER recognizer)
255 {
256 /* Create a basic recognition exception structure
257 */
258 antlr3RecognitionExceptionNew(recognizer);
259
260 /* Now update it to indicate this is a Mismatched token exception
261 */
262 recognizer->state->exception->name = ANTLR3_MISMATCHED_EX_NAME;
263 recognizer->state->exception->type = ANTLR3_MISMATCHED_TOKEN_EXCEPTION;
264
265 return;
266 }
267
268 ANTLR3_API void
antlr3RecognitionExceptionNew(pANTLR3_BASE_RECOGNIZER recognizer)269 antlr3RecognitionExceptionNew(pANTLR3_BASE_RECOGNIZER recognizer)
270 {
271 pANTLR3_EXCEPTION ex;
272 pANTLR3_LEXER lexer;
273 pANTLR3_PARSER parser;
274 pANTLR3_TREE_PARSER tparser;
275
276 pANTLR3_INPUT_STREAM ins;
277 pANTLR3_INT_STREAM is;
278 pANTLR3_COMMON_TOKEN_STREAM cts;
279 pANTLR3_TREE_NODE_STREAM tns;
280
281 ins = NULL;
282 cts = NULL;
283 tns = NULL;
284 is = NULL;
285 lexer = NULL;
286 parser = NULL;
287 tparser = NULL;
288
289 switch (recognizer->type)
290 {
291 case ANTLR3_TYPE_LEXER:
292
293 lexer = (pANTLR3_LEXER) (recognizer->super);
294 ins = lexer->input;
295 is = ins->istream;
296
297 break;
298
299 case ANTLR3_TYPE_PARSER:
300
301 parser = (pANTLR3_PARSER) (recognizer->super);
302 cts = (pANTLR3_COMMON_TOKEN_STREAM)(parser->tstream->super);
303 is = parser->tstream->istream;
304
305 break;
306
307 case ANTLR3_TYPE_TREE_PARSER:
308
309 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
310 tns = tparser->ctnstream->tnstream;
311 is = tns->istream;
312
313 break;
314
315 default:
316
317 ANTLR3_FPRINTF(stderr, "Base recognizer function antlr3RecognitionExceptionNew called by unknown parser type - provide override for this function\n");
318 return;
319
320 break;
321 }
322
323 /* Create a basic exception structure
324 */
325 ex = antlr3ExceptionNew(ANTLR3_RECOGNITION_EXCEPTION,
326 (void *)ANTLR3_RECOGNITION_EX_NAME,
327 NULL,
328 ANTLR3_FALSE);
329
330 /* Rest of information depends on the base type of the
331 * input stream.
332 */
333 switch (is->type & ANTLR3_INPUT_MASK)
334 {
335 case ANTLR3_CHARSTREAM:
336
337 ex->c = is->_LA (is, 1); /* Current input character */
338 ex->line = ins->getLine (ins); /* Line number comes from stream */
339 ex->charPositionInLine = ins->getCharPositionInLine (ins); /* Line offset also comes from the stream */
340 ex->index = is->index (is);
341 ex->streamName = ins->fileName;
342 ex->message = "Unexpected character";
343 break;
344
345 case ANTLR3_TOKENSTREAM:
346
347 ex->token = cts->tstream->_LT (cts->tstream, 1); /* Current input token */
348 ex->line = ((pANTLR3_COMMON_TOKEN)(ex->token))->getLine ((pANTLR3_COMMON_TOKEN)(ex->token));
349 ex->charPositionInLine = ((pANTLR3_COMMON_TOKEN)(ex->token))->getCharPositionInLine ((pANTLR3_COMMON_TOKEN)(ex->token));
350 ex->index = cts->tstream->istream->index (cts->tstream->istream);
351 if (((pANTLR3_COMMON_TOKEN)(ex->token))->type == ANTLR3_TOKEN_EOF)
352 {
353 ex->streamName = NULL;
354 }
355 else
356 {
357 ex->streamName = ((pANTLR3_COMMON_TOKEN)(ex->token))->input->fileName;
358 }
359 ex->message = "Unexpected token";
360 break;
361
362 case ANTLR3_COMMONTREENODE:
363
364 ex->token = tns->_LT (tns, 1); /* Current input tree node */
365 ex->line = ((pANTLR3_BASE_TREE)(ex->token))->getLine ((pANTLR3_BASE_TREE)(ex->token));
366 ex->charPositionInLine = ((pANTLR3_BASE_TREE)(ex->token))->getCharPositionInLine ((pANTLR3_BASE_TREE)(ex->token));
367 ex->index = tns->istream->index (tns->istream);
368
369 // Are you ready for this? Deep breath now...
370 //
371 {
372 pANTLR3_COMMON_TREE tnode;
373
374 tnode = ((pANTLR3_COMMON_TREE)(((pANTLR3_BASE_TREE)(ex->token))->super));
375
376 if (tnode->token == NULL)
377 {
378 ex->streamName = ((pANTLR3_BASE_TREE)(ex->token))->strFactory->newStr(((pANTLR3_BASE_TREE)(ex->token))->strFactory, (pANTLR3_UINT8)"-unknown source-");
379 }
380 else
381 {
382 if (tnode->token->input == NULL)
383 {
384 ex->streamName = NULL;
385 }
386 else
387 {
388 ex->streamName = tnode->token->input->fileName;
389 }
390 }
391 ex->message = "Unexpected node";
392 }
393 break;
394 }
395
396 ex->input = is;
397 ex->nextException = recognizer->state->exception; /* So we don't leak the memory */
398 recognizer->state->exception = ex;
399 recognizer->state->error = ANTLR3_TRUE; /* Exception is outstanding */
400
401 return;
402 }
403
404
405 /// Match current input symbol against ttype. Upon error, do one token
406 /// insertion or deletion if possible.
407 /// To turn off single token insertion or deletion error
408 /// recovery, override mismatchRecover() and have it call
409 /// plain mismatch(), which does not recover. Then any error
410 /// in a rule will cause an exception and immediate exit from
411 /// rule. Rule would recover by resynchronizing to the set of
412 /// symbols that can follow rule ref.
413 ///
414 static void *
match(pANTLR3_BASE_RECOGNIZER recognizer,ANTLR3_UINT32 ttype,pANTLR3_BITSET_LIST follow)415 match( pANTLR3_BASE_RECOGNIZER recognizer,
416 ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow)
417 {
418 pANTLR3_PARSER parser;
419 pANTLR3_TREE_PARSER tparser;
420 pANTLR3_INT_STREAM is;
421 void * matchedSymbol;
422
423 switch (recognizer->type)
424 {
425 case ANTLR3_TYPE_PARSER:
426
427 parser = (pANTLR3_PARSER) (recognizer->super);
428 tparser = NULL;
429 is = parser->tstream->istream;
430
431 break;
432
433 case ANTLR3_TYPE_TREE_PARSER:
434
435 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
436 parser = NULL;
437 is = tparser->ctnstream->tnstream->istream;
438
439 break;
440
441 default:
442
443 ANTLR3_FPRINTF(stderr, "Base recognizer function 'match' called by unknown parser type - provide override for this function\n");
444 return ANTLR3_FALSE;
445
446 break;
447 }
448
449 // Pick up the current input token/node for assignment to labels
450 //
451 matchedSymbol = recognizer->getCurrentInputSymbol(recognizer, is);
452
453 if (is->_LA(is, 1) == ttype)
454 {
455 // The token was the one we were told to expect
456 //
457 is->consume(is); // Consume that token from the stream
458 recognizer->state->errorRecovery = ANTLR3_FALSE; // Not in error recovery now (if we were)
459 recognizer->state->failed = ANTLR3_FALSE; // The match was a success
460 return matchedSymbol; // We are done
461 }
462
463 // We did not find the expected token type, if we are backtracking then
464 // we just set the failed flag and return.
465 //
466 if (recognizer->state->backtracking > 0)
467 {
468 // Backtracking is going on
469 //
470 recognizer->state->failed = ANTLR3_TRUE;
471 return matchedSymbol;
472 }
473
474 // We did not find the expected token and there is no backtracking
475 // going on, so we mismatch, which creates an exception in the recognizer exception
476 // stack.
477 //
478 matchedSymbol = recognizer->recoverFromMismatchedToken(recognizer, ttype, follow);
479 return matchedSymbol;
480 }
481
482 /// Consumes the next token, whatever it is, and resets the recognizer state
483 /// so that it is not in error.
484 ///
485 /// \param recognizer
486 /// Recognizer context pointer
487 ///
488 static void
matchAny(pANTLR3_BASE_RECOGNIZER recognizer)489 matchAny(pANTLR3_BASE_RECOGNIZER recognizer)
490 {
491 pANTLR3_PARSER parser;
492 pANTLR3_TREE_PARSER tparser;
493 pANTLR3_INT_STREAM is;
494
495 switch (recognizer->type)
496 {
497 case ANTLR3_TYPE_PARSER:
498
499 parser = (pANTLR3_PARSER) (recognizer->super);
500 tparser = NULL;
501 is = parser->tstream->istream;
502
503 break;
504
505 case ANTLR3_TYPE_TREE_PARSER:
506
507 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
508 parser = NULL;
509 is = tparser->ctnstream->tnstream->istream;
510
511 break;
512
513 default:
514
515 ANTLR3_FPRINTF(stderr, "Base recognizer function 'matchAny' called by unknown parser type - provide override for this function\n");
516 return;
517
518 break;
519 }
520 recognizer->state->errorRecovery = ANTLR3_FALSE;
521 recognizer->state->failed = ANTLR3_FALSE;
522 is->consume(is);
523
524 return;
525 }
526 ///
527 ///
528 static ANTLR3_BOOLEAN
mismatchIsUnwantedToken(pANTLR3_BASE_RECOGNIZER recognizer,pANTLR3_INT_STREAM is,ANTLR3_UINT32 ttype)529 mismatchIsUnwantedToken(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM is, ANTLR3_UINT32 ttype)
530 {
531 ANTLR3_UINT32 nextt;
532
533 nextt = is->_LA(is, 2);
534
535 if (nextt == ttype)
536 {
537 if (recognizer->state->exception != NULL)
538 {
539 recognizer->state->exception->expecting = nextt;
540 }
541 return ANTLR3_TRUE; // This token is unknown, but the next one is the one we wanted
542 }
543 else
544 {
545 return ANTLR3_FALSE; // Neither this token, nor the one following is the one we wanted
546 }
547 }
548
549 ///
550 ///
551 static ANTLR3_BOOLEAN
mismatchIsMissingToken(pANTLR3_BASE_RECOGNIZER recognizer,pANTLR3_INT_STREAM is,pANTLR3_BITSET_LIST follow)552 mismatchIsMissingToken(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM is, pANTLR3_BITSET_LIST follow)
553 {
554 ANTLR3_BOOLEAN retcode;
555 pANTLR3_BITSET followClone;
556 pANTLR3_BITSET viableTokensFollowingThisRule;
557
558 if (follow == NULL)
559 {
560 // There is no information about the tokens that can follow the last one
561 // hence we must say that the current one we found is not a member of the
562 // follow set and does not indicate a missing token. We will just consume this
563 // single token and see if the parser works it out from there.
564 //
565 return ANTLR3_FALSE;
566 }
567
568 followClone = NULL;
569 viableTokensFollowingThisRule = NULL;
570
571 // The C bitset maps are laid down at compile time by the
572 // C code generation. Hence we cannot remove things from them
573 // and so on. So, in order to remove EOR (if we need to) then
574 // we clone the static bitset.
575 //
576 followClone = antlr3BitsetLoad(follow);
577 if (followClone == NULL)
578 {
579 return ANTLR3_FALSE;
580 }
581
582 // Compute what can follow this grammar reference
583 //
584 if (followClone->isMember(followClone, ANTLR3_EOR_TOKEN_TYPE))
585 {
586 // EOR can follow, but if we are not the start symbol, we
587 // need to remove it.
588 //
589 //if (recognizer->state->following->vector->count >= 0) ml: always true
590 {
591 followClone->remove(followClone, ANTLR3_EOR_TOKEN_TYPE);
592 }
593
594 // Now compute the visiable tokens that can follow this rule, according to context
595 // and make them part of the follow set.
596 //
597 viableTokensFollowingThisRule = recognizer->computeCSRuleFollow(recognizer);
598 followClone->borInPlace(followClone, viableTokensFollowingThisRule);
599 }
600
601 /// if current token is consistent with what could come after set
602 /// then we know we're missing a token; error recovery is free to
603 /// "insert" the missing token
604 ///
605 /// BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR
606 /// in follow set to indicate that the fall of the start symbol is
607 /// in the set (EOF can follow).
608 ///
609 if ( followClone->isMember(followClone, is->_LA(is, 1))
610 || followClone->isMember(followClone, ANTLR3_EOR_TOKEN_TYPE)
611 )
612 {
613 retcode = ANTLR3_TRUE;
614 }
615 else
616 {
617 retcode = ANTLR3_FALSE;
618 }
619
620 if (viableTokensFollowingThisRule != NULL)
621 {
622 viableTokensFollowingThisRule->free(viableTokensFollowingThisRule);
623 }
624 if (followClone != NULL)
625 {
626 followClone->free(followClone);
627 }
628
629 return retcode;
630
631 }
632
633 /// Factor out what to do upon token mismatch so tree parsers can behave
634 /// differently. Override and call mismatchRecover(input, ttype, follow)
635 /// to get single token insertion and deletion. Use this to turn off
636 /// single token insertion and deletion. Override mismatchRecover
637 /// to call this instead.
638 ///
639 /// \remark mismatch only works for parsers and must be overridden for anything else.
640 ///
641 static void
mismatch(pANTLR3_BASE_RECOGNIZER recognizer,ANTLR3_UINT32 ttype,pANTLR3_BITSET_LIST follow)642 mismatch(pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow)
643 {
644 pANTLR3_PARSER parser;
645 pANTLR3_TREE_PARSER tparser;
646 pANTLR3_INT_STREAM is;
647
648 // Install a mismatched token exception in the exception stack
649 //
650 antlr3MTExceptionNew(recognizer);
651 recognizer->state->exception->expecting = ttype;
652
653 switch (recognizer->type)
654 {
655 case ANTLR3_TYPE_PARSER:
656
657 parser = (pANTLR3_PARSER) (recognizer->super);
658 tparser = NULL;
659 is = parser->tstream->istream;
660
661 break;
662
663 default:
664
665 ANTLR3_FPRINTF(stderr, "Base recognizer function 'mismatch' called by unknown parser type - provide override for this function\n");
666 return;
667
668 break;
669 }
670
671 if (mismatchIsUnwantedToken(recognizer, is, ttype))
672 {
673 // Create a basic recognition exception structure
674 //
675 antlr3RecognitionExceptionNew(recognizer);
676
677 // Now update it to indicate this is an unwanted token exception
678 //
679 recognizer->state->exception->name = ANTLR3_UNWANTED_TOKEN_EXCEPTION_NAME;
680 recognizer->state->exception->type = ANTLR3_UNWANTED_TOKEN_EXCEPTION;
681
682 return;
683 }
684
685 if (mismatchIsMissingToken(recognizer, is, follow))
686 {
687 // Create a basic recognition exception structure
688 //
689 antlr3RecognitionExceptionNew(recognizer);
690
691 // Now update it to indicate this is an unwanted token exception
692 //
693 recognizer->state->exception->name = ANTLR3_MISSING_TOKEN_EXCEPTION_NAME;
694 recognizer->state->exception->type = ANTLR3_MISSING_TOKEN_EXCEPTION;
695
696 return;
697 }
698
699 // Just a mismatched token is all we can dtermine
700 //
701 antlr3MTExceptionNew(recognizer);
702
703 return;
704 }
705 /// Report a recognition problem.
706 ///
707 /// This method sets errorRecovery to indicate the parser is recovering
708 /// not parsing. Once in recovery mode, no errors are generated.
709 /// To get out of recovery mode, the parser must successfully match
710 /// a token (after a resync). So it will go:
711 ///
712 /// 1. error occurs
713 /// 2. enter recovery mode, report error
714 /// 3. consume until token found in resynch set
715 /// 4. try to resume parsing
716 /// 5. next match() will reset errorRecovery mode
717 ///
718 /// If you override, make sure to update errorCount if you care about that.
719 ///
720 static void
reportError(pANTLR3_BASE_RECOGNIZER recognizer)721 reportError (pANTLR3_BASE_RECOGNIZER recognizer)
722 {
723 // Invoke the debugger event if there is a debugger listening to us
724 //
725 if (recognizer->debugger != NULL)
726 {
727 recognizer->debugger->recognitionException(recognizer->debugger, recognizer->state->exception);
728 }
729
730 if (recognizer->state->errorRecovery == ANTLR3_TRUE)
731 {
732 // Already in error recovery so don't display another error while doing so
733 //
734 return;
735 }
736
737 // Signal we are in error recovery now
738 //
739 recognizer->state->errorRecovery = ANTLR3_TRUE;
740
741 // Indicate this recognizer had an error while processing.
742 //
743 recognizer->state->errorCount++;
744
745 // Call the error display routine
746 //
747 recognizer->displayRecognitionError(recognizer, recognizer->state->tokenNames);
748 }
749
750 static void
beginBacktrack(pANTLR3_BASE_RECOGNIZER recognizer,ANTLR3_UINT32 level)751 beginBacktrack (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 level)
752 {
753 if (recognizer->debugger != NULL)
754 {
755 recognizer->debugger->beginBacktrack(recognizer->debugger, level);
756 }
757 }
758
759 static void
endBacktrack(pANTLR3_BASE_RECOGNIZER recognizer,ANTLR3_UINT32 level,ANTLR3_BOOLEAN successful)760 endBacktrack (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 level, ANTLR3_BOOLEAN successful)
761 {
762 if (recognizer->debugger != NULL)
763 {
764 recognizer->debugger->endBacktrack(recognizer->debugger, level, successful);
765 }
766 }
767 static void
beginResync(pANTLR3_BASE_RECOGNIZER recognizer)768 beginResync (pANTLR3_BASE_RECOGNIZER recognizer)
769 {
770 if (recognizer->debugger != NULL)
771 {
772 recognizer->debugger->beginResync(recognizer->debugger);
773 }
774 }
775
776 static void
endResync(pANTLR3_BASE_RECOGNIZER recognizer)777 endResync (pANTLR3_BASE_RECOGNIZER recognizer)
778 {
779 if (recognizer->debugger != NULL)
780 {
781 recognizer->debugger->endResync(recognizer->debugger);
782 }
783 }
784
785 /// Compute the error recovery set for the current rule.
786 /// Documentation below is from the Java implementation.
787 ///
788 /// During rule invocation, the parser pushes the set of tokens that can
789 /// follow that rule reference on the stack; this amounts to
790 /// computing FIRST of what follows the rule reference in the
791 /// enclosing rule. This local follow set only includes tokens
792 /// from within the rule; i.e., the FIRST computation done by
793 /// ANTLR stops at the end of a rule.
794 //
795 /// EXAMPLE
796 //
797 /// When you find a "no viable alt exception", the input is not
798 /// consistent with any of the alternatives for rule r. The best
799 /// thing to do is to consume tokens until you see something that
800 /// can legally follow a call to r *or* any rule that called r.
801 /// You don't want the exact set of viable next tokens because the
802 /// input might just be missing a token--you might consume the
803 /// rest of the input looking for one of the missing tokens.
804 ///
805 /// Consider grammar:
806 ///
807 /// a : '[' b ']'
808 /// | '(' b ')'
809 /// ;
810 /// b : c '^' INT ;
811 /// c : ID
812 /// | INT
813 /// ;
814 ///
815 /// At each rule invocation, the set of tokens that could follow
816 /// that rule is pushed on a stack. Here are the various "local"
817 /// follow sets:
818 ///
819 /// FOLLOW(b1_in_a) = FIRST(']') = ']'
820 /// FOLLOW(b2_in_a) = FIRST(')') = ')'
821 /// FOLLOW(c_in_b) = FIRST('^') = '^'
822 ///
823 /// Upon erroneous input "[]", the call chain is
824 ///
825 /// a -> b -> c
826 ///
827 /// and, hence, the follow context stack is:
828 ///
829 /// depth local follow set after call to rule
830 /// 0 <EOF> a (from main())
831 /// 1 ']' b
832 /// 3 '^' c
833 ///
834 /// Notice that ')' is not included, because b would have to have
835 /// been called from a different context in rule a for ')' to be
836 /// included.
837 ///
838 /// For error recovery, we cannot consider FOLLOW(c)
839 /// (context-sensitive or otherwise). We need the combined set of
840 /// all context-sensitive FOLLOW sets--the set of all tokens that
841 /// could follow any reference in the call chain. We need to
842 /// resync to one of those tokens. Note that FOLLOW(c)='^' and if
843 /// we resync'd to that token, we'd consume until EOF. We need to
844 /// sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
845 /// In this case, for input "[]", LA(1) is in this set so we would
846 /// not consume anything and after printing an error rule c would
847 /// return normally. It would not find the required '^' though.
848 /// At this point, it gets a mismatched token error and throws an
849 /// exception (since LA(1) is not in the viable following token
850 /// set). The rule exception handler tries to recover, but finds
851 /// the same recovery set and doesn't consume anything. Rule b
852 /// exits normally returning to rule a. Now it finds the ']' (and
853 /// with the successful match exits errorRecovery mode).
854 ///
855 /// So, you can see that the parser walks up call chain looking
856 /// for the token that was a member of the recovery set.
857 ///
858 /// Errors are not generated in errorRecovery mode.
859 ///
860 /// ANTLR's error recovery mechanism is based upon original ideas:
861 ///
862 /// "Algorithms + Data Structures = Programs" by Niklaus Wirth
863 ///
864 /// and
865 ///
866 /// "A note on error recovery in recursive descent parsers":
867 /// http://portal.acm.org/citation.cfm?id=947902.947905
868 ///
869 /// Later, Josef Grosch had some good ideas:
870 ///
871 /// "Efficient and Comfortable Error Recovery in Recursive Descent
872 /// Parsers":
873 /// ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
874 ///
875 /// Like Grosch I implemented local FOLLOW sets that are combined
876 /// at run-time upon error to avoid overhead during parsing.
877 ///
878 static pANTLR3_BITSET
computeErrorRecoverySet(pANTLR3_BASE_RECOGNIZER recognizer)879 computeErrorRecoverySet (pANTLR3_BASE_RECOGNIZER recognizer)
880 {
881 return recognizer->combineFollows(recognizer, ANTLR3_FALSE);
882 }
883
884 /// Compute the context-sensitive FOLLOW set for current rule.
885 /// Documentation below is from the Java runtime.
886 ///
887 /// This is the set of token types that can follow a specific rule
888 /// reference given a specific call chain. You get the set of
889 /// viable tokens that can possibly come next (look ahead depth 1)
890 /// given the current call chain. Contrast this with the
891 /// definition of plain FOLLOW for rule r:
892 ///
893 /// FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)}
894 ///
895 /// where x in T* and alpha, beta in V*; T is set of terminals and
896 /// V is the set of terminals and non terminals. In other words,
897 /// FOLLOW(r) is the set of all tokens that can possibly follow
898 /// references to r in///any* sentential form (context). At
899 /// runtime, however, we know precisely which context applies as
900 /// we have the call chain. We may compute the exact (rather
901 /// than covering superset) set of following tokens.
902 ///
903 /// For example, consider grammar:
904 ///
905 /// stat : ID '=' expr ';' // FOLLOW(stat)=={EOF}
906 /// | "return" expr '.'
907 /// ;
908 /// expr : atom ('+' atom)* ; // FOLLOW(expr)=={';','.',')'}
909 /// atom : INT // FOLLOW(atom)=={'+',')',';','.'}
910 /// | '(' expr ')'
911 /// ;
912 ///
913 /// The FOLLOW sets are all inclusive whereas context-sensitive
914 /// FOLLOW sets are precisely what could follow a rule reference.
915 /// For input input "i=(3);", here is the derivation:
916 ///
917 /// stat => ID '=' expr ';'
918 /// => ID '=' atom ('+' atom)* ';'
919 /// => ID '=' '(' expr ')' ('+' atom)* ';'
920 /// => ID '=' '(' atom ')' ('+' atom)* ';'
921 /// => ID '=' '(' INT ')' ('+' atom)* ';'
922 /// => ID '=' '(' INT ')' ';'
923 ///
924 /// At the "3" token, you'd have a call chain of
925 ///
926 /// stat -> expr -> atom -> expr -> atom
927 ///
928 /// What can follow that specific nested ref to atom? Exactly ')'
929 /// as you can see by looking at the derivation of this specific
930 /// input. Contrast this with the FOLLOW(atom)={'+',')',';','.'}.
931 ///
932 /// You want the exact viable token set when recovering from a
933 /// token mismatch. Upon token mismatch, if LA(1) is member of
934 /// the viable next token set, then you know there is most likely
935 /// a missing token in the input stream. "Insert" one by just not
936 /// throwing an exception.
937 ///
938 static pANTLR3_BITSET
computeCSRuleFollow(pANTLR3_BASE_RECOGNIZER recognizer)939 computeCSRuleFollow (pANTLR3_BASE_RECOGNIZER recognizer)
940 {
941 return recognizer->combineFollows(recognizer, ANTLR3_FALSE);
942 }
943
944 /// Compute the current followset for the input stream.
945 ///
946 static pANTLR3_BITSET
combineFollows(pANTLR3_BASE_RECOGNIZER recognizer,ANTLR3_BOOLEAN exact)947 combineFollows (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_BOOLEAN exact)
948 {
949 pANTLR3_BITSET followSet;
950 pANTLR3_BITSET localFollowSet;
951 ANTLR3_UINT32 top;
952 ANTLR3_UINT32 i;
953
954 top = recognizer->state->following->size(recognizer->state->following);
955
956 followSet = antlr3BitsetNew(0);
957 localFollowSet = NULL;
958
959 for (i = top; i>0; i--)
960 {
961 localFollowSet = antlr3BitsetLoad((pANTLR3_BITSET_LIST) recognizer->state->following->get(recognizer->state->following, i-1));
962
963 if (localFollowSet != NULL)
964 {
965 followSet->borInPlace(followSet, localFollowSet);
966
967 if (exact == ANTLR3_TRUE)
968 {
969 if (localFollowSet->isMember(localFollowSet, ANTLR3_EOR_TOKEN_TYPE) == ANTLR3_FALSE)
970 {
971 // Only leave EOR in the set if at top (start rule); this lets us know
972 // if we have to include the follow(start rule); I.E., EOF
973 //
974 if (i>1)
975 {
976 followSet->remove(followSet, ANTLR3_EOR_TOKEN_TYPE);
977 }
978 }
979 else
980 {
981 break; // Cannot see End Of Rule from here, just drop out
982 }
983 }
984 localFollowSet->free(localFollowSet);
985 localFollowSet = NULL;
986 }
987 }
988
989 if (localFollowSet != NULL)
990 {
991 localFollowSet->free(localFollowSet);
992 }
993 return followSet;
994 }
995
996 /// Standard/Example error display method.
997 /// No generic error message display funciton coudl possibly do everything correctly
998 /// for all possible parsers. Hence you are provided with this example routine, which
999 /// you should override in your parser/tree parser to do as you will.
1000 ///
1001 /// Here we depart somewhat from the Java runtime as that has now split up a lot
1002 /// of the error display routines into spearate units. However, ther is little advantage
1003 /// to this in the C version as you will probably implement all such routines as a
1004 /// separate translation unit, rather than install them all as pointers to functions
1005 /// in the base recognizer.
1006 ///
1007 static void
displayRecognitionError(pANTLR3_BASE_RECOGNIZER recognizer,pANTLR3_UINT8 * tokenNames)1008 displayRecognitionError (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 * tokenNames)
1009 {
1010 pANTLR3_PARSER parser;
1011 pANTLR3_TREE_PARSER tparser;
1012 pANTLR3_INT_STREAM is;
1013 pANTLR3_STRING ttext;
1014 pANTLR3_STRING ftext;
1015 pANTLR3_EXCEPTION ex;
1016 pANTLR3_COMMON_TOKEN theToken;
1017 pANTLR3_BASE_TREE theBaseTree;
1018 pANTLR3_COMMON_TREE theCommonTree;
1019
1020 // Retrieve some info for easy reading.
1021 //
1022 ex = recognizer->state->exception;
1023 ttext = NULL;
1024
1025 // See if there is a 'filename' we can use
1026 //
1027 if (ex->streamName == NULL)
1028 {
1029 if (((pANTLR3_COMMON_TOKEN)(ex->token))->type == ANTLR3_TOKEN_EOF)
1030 {
1031 ANTLR3_FPRINTF(stderr, "-end of input-(");
1032 }
1033 else
1034 {
1035 ANTLR3_FPRINTF(stderr, "-unknown source-(");
1036 }
1037 }
1038 else
1039 {
1040 ftext = ex->streamName->to8(ex->streamName);
1041 ANTLR3_FPRINTF(stderr, "%s(", ftext->chars);
1042 }
1043
1044 // Next comes the line number
1045 //
1046
1047 ANTLR3_FPRINTF(stderr, "%d) ", recognizer->state->exception->line);
1048 ANTLR3_FPRINTF(stderr, " : error %d : %s",
1049 recognizer->state->exception->type,
1050 (pANTLR3_UINT8) (recognizer->state->exception->message));
1051
1052
1053 // How we determine the next piece is dependent on which thing raised the
1054 // error.
1055 //
1056 switch (recognizer->type)
1057 {
1058 case ANTLR3_TYPE_PARSER:
1059
1060 // Prepare the knowledge we know we have
1061 //
1062 parser = (pANTLR3_PARSER) (recognizer->super);
1063 tparser = NULL;
1064 is = parser->tstream->istream;
1065 theToken = (pANTLR3_COMMON_TOKEN)(recognizer->state->exception->token);
1066 ttext = theToken->toString(theToken);
1067
1068 ANTLR3_FPRINTF(stderr, ", at offset %d", recognizer->state->exception->charPositionInLine);
1069 if (theToken != NULL)
1070 {
1071 if (theToken->type == ANTLR3_TOKEN_EOF)
1072 {
1073 ANTLR3_FPRINTF(stderr, ", at <EOF>");
1074 }
1075 else
1076 {
1077 // Guard against null text in a token
1078 //
1079 ANTLR3_FPRINTF(stderr, "\n near %s\n ", ttext == NULL ? (pANTLR3_UINT8)"<no text for the token>" : ttext->chars);
1080 }
1081 }
1082 break;
1083
1084 case ANTLR3_TYPE_TREE_PARSER:
1085
1086 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1087 parser = NULL;
1088 is = tparser->ctnstream->tnstream->istream;
1089 theBaseTree = (pANTLR3_BASE_TREE)(recognizer->state->exception->token);
1090 ttext = theBaseTree->toStringTree(theBaseTree);
1091
1092 if (theBaseTree != NULL)
1093 {
1094 theCommonTree = (pANTLR3_COMMON_TREE) theBaseTree->super;
1095
1096 if (theCommonTree != NULL)
1097 {
1098 theToken = (pANTLR3_COMMON_TOKEN) theBaseTree->getToken(theBaseTree);
1099 }
1100 ANTLR3_FPRINTF(stderr, ", at offset %d", theBaseTree->getCharPositionInLine(theBaseTree));
1101 ANTLR3_FPRINTF(stderr, ", near %s", ttext->chars);
1102 }
1103 break;
1104
1105 default:
1106
1107 ANTLR3_FPRINTF(stderr, "Base recognizer function displayRecognitionError called by unknown parser type - provide override for this function\n");
1108 return;
1109 break;
1110 }
1111
1112 // Although this function should generally be provided by the implementation, this one
1113 // should be as helpful as possible for grammar developers and serve as an example
1114 // of what you can do with each exception type. In general, when you make up your
1115 // 'real' handler, you should debug the routine with all possible errors you expect
1116 // which will then let you be as specific as possible about all circumstances.
1117 //
1118 // Note that in the general case, errors thrown by tree parsers indicate a problem
1119 // with the output of the parser or with the tree grammar itself. The job of the parser
1120 // is to produce a perfect (in traversal terms) syntactically correct tree, so errors
1121 // at that stage should really be semantic errors that your own code determines and handles
1122 // in whatever way is appropriate.
1123 //
1124 switch (ex->type)
1125 {
1126 case ANTLR3_UNWANTED_TOKEN_EXCEPTION:
1127
1128 // Indicates that the recognizer was fed a token which seesm to be
1129 // spurious input. We can detect this when the token that follows
1130 // this unwanted token would normally be part of the syntactically
1131 // correct stream. Then we can see that the token we are looking at
1132 // is just something that should not be there and throw this exception.
1133 //
1134 if (tokenNames == NULL)
1135 {
1136 ANTLR3_FPRINTF(stderr, " : Extraneous input...");
1137 }
1138 else
1139 {
1140 if (ex->expecting == ANTLR3_TOKEN_EOF)
1141 {
1142 ANTLR3_FPRINTF(stderr, " : Extraneous input - expected <EOF>\n");
1143 }
1144 else
1145 {
1146 ANTLR3_FPRINTF(stderr, " : Extraneous input - expected %s ...\n", tokenNames[ex->expecting]);
1147 }
1148 }
1149 break;
1150
1151 case ANTLR3_MISSING_TOKEN_EXCEPTION:
1152
1153 // Indicates that the recognizer detected that the token we just
1154 // hit would be valid syntactically if preceeded by a particular
1155 // token. Perhaps a missing ';' at line end or a missing ',' in an
1156 // expression list, and such like.
1157 //
1158 if (tokenNames == NULL)
1159 {
1160 ANTLR3_FPRINTF(stderr, " : Missing token (%d)...\n", ex->expecting);
1161 }
1162 else
1163 {
1164 if (ex->expecting == ANTLR3_TOKEN_EOF)
1165 {
1166 ANTLR3_FPRINTF(stderr, " : Missing <EOF>\n");
1167 }
1168 else
1169 {
1170 ANTLR3_FPRINTF(stderr, " : Missing %s \n", tokenNames[ex->expecting]);
1171 }
1172 }
1173 break;
1174
1175 case ANTLR3_RECOGNITION_EXCEPTION:
1176
1177 // Indicates that the recognizer received a token
1178 // in the input that was not predicted. This is the basic exception type
1179 // from which all others are derived. So we assume it was a syntax error.
1180 // You may get this if there are not more tokens and more are needed
1181 // to complete a parse for instance.
1182 //
1183 ANTLR3_FPRINTF(stderr, " : syntax error...\n");
1184 break;
1185
1186 case ANTLR3_MISMATCHED_TOKEN_EXCEPTION:
1187
1188 // We were expecting to see one thing and got another. This is the
1189 // most common error if we coudl not detect a missing or unwanted token.
1190 // Here you can spend your efforts to
1191 // derive more useful error messages based on the expected
1192 // token set and the last token and so on. The error following
1193 // bitmaps do a good job of reducing the set that we were looking
1194 // for down to something small. Knowing what you are parsing may be
1195 // able to allow you to be even more specific about an error.
1196 //
1197 if (tokenNames == NULL)
1198 {
1199 ANTLR3_FPRINTF(stderr, " : syntax error...\n");
1200 }
1201 else
1202 {
1203 if (ex->expecting == ANTLR3_TOKEN_EOF)
1204 {
1205 ANTLR3_FPRINTF(stderr, " : expected <EOF>\n");
1206 }
1207 else
1208 {
1209 ANTLR3_FPRINTF(stderr, " : expected %s ...\n", tokenNames[ex->expecting]);
1210 }
1211 }
1212 break;
1213
1214 case ANTLR3_NO_VIABLE_ALT_EXCEPTION:
1215
1216 // We could not pick any alt decision from the input given
1217 // so god knows what happened - however when you examine your grammar,
1218 // you should. It means that at the point where the current token occurred
1219 // that the DFA indicates nowhere to go from here.
1220 //
1221 ANTLR3_FPRINTF(stderr, " : cannot match to any predicted input...\n");
1222
1223 break;
1224
1225 case ANTLR3_MISMATCHED_SET_EXCEPTION:
1226
1227 {
1228 ANTLR3_UINT32 count;
1229 ANTLR3_UINT32 bit;
1230 ANTLR3_UINT32 size;
1231 ANTLR3_UINT32 numbits;
1232 pANTLR3_BITSET errBits;
1233
1234 // This means we were able to deal with one of a set of
1235 // possible tokens at this point, but we did not see any
1236 // member of that set.
1237 //
1238 ANTLR3_FPRINTF(stderr, " : unexpected input...\n expected one of : ");
1239
1240 // What tokens could we have accepted at this point in the
1241 // parse?
1242 //
1243 count = 0;
1244 errBits = antlr3BitsetLoad (ex->expectingSet);
1245 numbits = errBits->numBits (errBits);
1246 size = errBits->size (errBits);
1247
1248 if (size > 0)
1249 {
1250 // However many tokens we could have dealt with here, it is usually
1251 // not useful to print ALL of the set here. I arbitrarily chose 8
1252 // here, but you should do whatever makes sense for you of course.
1253 // No token number 0, so look for bit 1 and on.
1254 //
1255 for (bit = 1; bit < numbits && count < 8 && count < size; bit++)
1256 {
1257 // TODO: This doesn;t look right - should be asking if the bit is set!!
1258 //
1259 if (tokenNames[bit])
1260 {
1261 ANTLR3_FPRINTF(stderr, "%s%s", count > 0 ? ", " : "", tokenNames[bit]);
1262 count++;
1263 }
1264 }
1265 ANTLR3_FPRINTF(stderr, "\n");
1266 }
1267 else
1268 {
1269 ANTLR3_FPRINTF(stderr, "Actually dude, we didn't seem to be expecting anything here, or at least\n");
1270 ANTLR3_FPRINTF(stderr, "I could not work out what I was expecting, like so many of us these days!\n");
1271 }
1272 }
1273 break;
1274
1275 case ANTLR3_EARLY_EXIT_EXCEPTION:
1276
1277 // We entered a loop requiring a number of token sequences
1278 // but found a token that ended that sequence earlier than
1279 // we should have done.
1280 //
1281 ANTLR3_FPRINTF(stderr, " : missing elements...\n");
1282 break;
1283
1284 default:
1285
1286 // We don't handle any other exceptions here, but you can
1287 // if you wish. If we get an exception that hits this point
1288 // then we are just going to report what we know about the
1289 // token.
1290 //
1291 ANTLR3_FPRINTF(stderr, " : syntax not recognized...\n");
1292 break;
1293 }
1294
1295 // Here you have the token that was in error which if this is
1296 // the standard implementation will tell you the line and offset
1297 // and also record the address of the start of the line in the
1298 // input stream. You could therefore print the source line and so on.
1299 // Generally though, I would expect that your lexer/parser will keep
1300 // its own map of lines and source pointers or whatever as there
1301 // are a lot of specific things you need to know about the input
1302 // to do something like that.
1303 // Here is where you do it though :-).
1304 //
1305 }
1306
1307 /// Return how many syntax errors were detected by this recognizer
1308 ///
1309 static ANTLR3_UINT32
getNumberOfSyntaxErrors(pANTLR3_BASE_RECOGNIZER recognizer)1310 getNumberOfSyntaxErrors(pANTLR3_BASE_RECOGNIZER recognizer)
1311 {
1312 return recognizer->state->errorCount;
1313 }
1314
1315 /// Recover from an error found on the input stream. Mostly this is
1316 /// NoViableAlt exceptions, but could be a mismatched token that
1317 /// the match() routine could not recover from.
1318 ///
1319 static void
recover(pANTLR3_BASE_RECOGNIZER recognizer)1320 recover (pANTLR3_BASE_RECOGNIZER recognizer)
1321 {
1322 // Used to compute the follow set of tokens
1323 //
1324 pANTLR3_BITSET followSet;
1325 pANTLR3_PARSER parser;
1326 pANTLR3_TREE_PARSER tparser;
1327 pANTLR3_INT_STREAM is;
1328
1329 switch (recognizer->type)
1330 {
1331 case ANTLR3_TYPE_PARSER:
1332
1333 parser = (pANTLR3_PARSER) (recognizer->super);
1334 tparser = NULL;
1335 is = parser->tstream->istream;
1336
1337 break;
1338
1339 case ANTLR3_TYPE_TREE_PARSER:
1340
1341 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1342 parser = NULL;
1343 is = tparser->ctnstream->tnstream->istream;
1344
1345 break;
1346
1347 default:
1348
1349 ANTLR3_FPRINTF(stderr, "Base recognizer function recover called by unknown parser type - provide override for this function\n");
1350 return;
1351
1352 break;
1353 }
1354
1355 // Are we about to repeat the same error?
1356 //
1357 if (recognizer->state->lastErrorIndex == is->index(is))
1358 {
1359 // The last error was at the same token index point. This must be a case
1360 // where LT(1) is in the recovery token set so nothing is
1361 // consumed. Consume a single token so at least to prevent
1362 // an infinite loop; this is a failsafe.
1363 //
1364 is->consume(is);
1365 }
1366
1367 // Record error index position
1368 //
1369 recognizer->state->lastErrorIndex = is->index(is);
1370
1371 // Work out the follows set for error recovery
1372 //
1373 followSet = recognizer->computeErrorRecoverySet(recognizer);
1374
1375 // Call resync hook (for debuggers and so on)
1376 //
1377 recognizer->beginResync(recognizer);
1378
1379 // Consume tokens until we have resynced to something in the follows set
1380 //
1381 recognizer->consumeUntilSet(recognizer, followSet);
1382
1383 // End resync hook
1384 //
1385 recognizer->endResync(recognizer);
1386
1387 // Destroy the temporary bitset we produced.
1388 //
1389 followSet->free(followSet);
1390
1391 // Reset the inError flag so we don't re-report the exception
1392 //
1393 recognizer->state->error = ANTLR3_FALSE;
1394 recognizer->state->failed = ANTLR3_FALSE;
1395 }
1396
1397
1398 /// Attempt to recover from a single missing or extra token.
1399 ///
1400 /// EXTRA TOKEN
1401 ///
1402 /// LA(1) is not what we are looking for. If LA(2) has the right token,
1403 /// however, then assume LA(1) is some extra spurious token. Delete it
1404 /// and LA(2) as if we were doing a normal match(), which advances the
1405 /// input.
1406 ///
1407 /// MISSING TOKEN
1408 ///
1409 /// If current token is consistent with what could come after
1410 /// ttype then it is ok to "insert" the missing token, else throw
1411 /// exception For example, Input "i=(3;" is clearly missing the
1412 /// ')'. When the parser returns from the nested call to expr, it
1413 /// will have call chain:
1414 ///
1415 /// stat -> expr -> atom
1416 ///
1417 /// and it will be trying to match the ')' at this point in the
1418 /// derivation:
1419 ///
1420 /// => ID '=' '(' INT ')' ('+' atom)* ';'
1421 /// ^
1422 /// match() will see that ';' doesn't match ')' and report a
1423 /// mismatched token error. To recover, it sees that LA(1)==';'
1424 /// is in the set of tokens that can follow the ')' token
1425 /// reference in rule atom. It can assume that you forgot the ')'.
1426 ///
1427 /// The exception that was passed in, in the java implementation is
1428 /// sorted in the recognizer exception stack in the C version. To 'throw' it we set the
1429 /// error flag and rules cascade back when this is set.
1430 ///
1431 static void *
recoverFromMismatchedToken(pANTLR3_BASE_RECOGNIZER recognizer,ANTLR3_UINT32 ttype,pANTLR3_BITSET_LIST follow)1432 recoverFromMismatchedToken (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow)
1433 {
1434 pANTLR3_PARSER parser;
1435 pANTLR3_TREE_PARSER tparser;
1436 pANTLR3_INT_STREAM is;
1437 void * matchedSymbol;
1438
1439
1440
1441 switch (recognizer->type)
1442 {
1443 case ANTLR3_TYPE_PARSER:
1444
1445 parser = (pANTLR3_PARSER) (recognizer->super);
1446 tparser = NULL;
1447 is = parser->tstream->istream;
1448
1449 break;
1450
1451 case ANTLR3_TYPE_TREE_PARSER:
1452
1453 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1454 parser = NULL;
1455 is = tparser->ctnstream->tnstream->istream;
1456
1457 break;
1458
1459 default:
1460
1461 ANTLR3_FPRINTF(stderr, "Base recognizer function recoverFromMismatchedToken called by unknown parser type - provide override for this function\n");
1462 return NULL;
1463
1464 break;
1465 }
1466
1467 // Create an exception if we need one
1468 //
1469 if (recognizer->state->exception == NULL)
1470 {
1471 antlr3RecognitionExceptionNew(recognizer);
1472 }
1473
1474 // If the next token after the one we are looking at in the input stream
1475 // is what we are looking for then we remove the one we have discovered
1476 // from the stream by consuming it, then consume this next one along too as
1477 // if nothing had happened.
1478 //
1479 if ( recognizer->mismatchIsUnwantedToken(recognizer, is, ttype) == ANTLR3_TRUE)
1480 {
1481 recognizer->state->exception->type = ANTLR3_UNWANTED_TOKEN_EXCEPTION;
1482 recognizer->state->exception->message = ANTLR3_UNWANTED_TOKEN_EXCEPTION_NAME;
1483
1484 // Call resync hook (for debuggers and so on)
1485 //
1486 if (recognizer->debugger != NULL)
1487 {
1488 recognizer->debugger->beginResync(recognizer->debugger);
1489 }
1490
1491 // "delete" the extra token
1492 //
1493 recognizer->beginResync(recognizer);
1494 is->consume(is);
1495 recognizer->endResync(recognizer);
1496 // End resync hook
1497 //
1498 if (recognizer->debugger != NULL)
1499 {
1500 recognizer->debugger->endResync(recognizer->debugger);
1501 }
1502
1503 // Print out the error after we consume so that ANTLRWorks sees the
1504 // token in the exception.
1505 //
1506 recognizer->reportError(recognizer);
1507
1508 // Return the token we are actually matching
1509 //
1510 matchedSymbol = recognizer->getCurrentInputSymbol(recognizer, is);
1511
1512 // Consume the token that the rule actually expected to get as if everything
1513 // was hunky dory.
1514 //
1515 is->consume(is);
1516
1517 recognizer->state->error = ANTLR3_FALSE; // Exception is not outstanding any more
1518
1519 return matchedSymbol;
1520 }
1521
1522 // Single token deletion (Unwanted above) did not work
1523 // so we see if we can insert a token instead by calculating which
1524 // token would be missing
1525 //
1526 if (mismatchIsMissingToken(recognizer, is, follow))
1527 {
1528 // We can fake the missing token and proceed
1529 //
1530 matchedSymbol = recognizer->getMissingSymbol(recognizer, is, recognizer->state->exception, ttype, follow);
1531 recognizer->state->exception->type = ANTLR3_MISSING_TOKEN_EXCEPTION;
1532 recognizer->state->exception->message = ANTLR3_MISSING_TOKEN_EXCEPTION_NAME;
1533 recognizer->state->exception->token = matchedSymbol;
1534 recognizer->state->exception->expecting = ttype;
1535
1536 // Print out the error after we insert so that ANTLRWorks sees the
1537 // token in the exception.
1538 //
1539 recognizer->reportError(recognizer);
1540
1541 recognizer->state->error = ANTLR3_FALSE; // Exception is not outstanding any more
1542
1543 return matchedSymbol;
1544 }
1545
1546
1547 // Neither deleting nor inserting tokens allows recovery
1548 // must just report the exception.
1549 //
1550 recognizer->state->error = ANTLR3_TRUE;
1551 return NULL;
1552 }
1553
1554 static void *
recoverFromMismatchedSet(pANTLR3_BASE_RECOGNIZER recognizer,pANTLR3_BITSET_LIST follow)1555 recoverFromMismatchedSet (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET_LIST follow)
1556 {
1557 pANTLR3_PARSER parser;
1558 pANTLR3_TREE_PARSER tparser;
1559 pANTLR3_INT_STREAM is;
1560 pANTLR3_COMMON_TOKEN matchedSymbol;
1561
1562 switch (recognizer->type)
1563 {
1564 case ANTLR3_TYPE_PARSER:
1565
1566 parser = (pANTLR3_PARSER) (recognizer->super);
1567 tparser = NULL;
1568 is = parser->tstream->istream;
1569
1570 break;
1571
1572 case ANTLR3_TYPE_TREE_PARSER:
1573
1574 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1575 parser = NULL;
1576 is = tparser->ctnstream->tnstream->istream;
1577
1578 break;
1579
1580 default:
1581
1582 ANTLR3_FPRINTF(stderr, "Base recognizer function recoverFromMismatchedSet called by unknown parser type - provide override for this function\n");
1583 return NULL;
1584
1585 break;
1586 }
1587
1588 if (recognizer->mismatchIsMissingToken(recognizer, is, follow) == ANTLR3_TRUE)
1589 {
1590 // We can fake the missing token and proceed
1591 //
1592 matchedSymbol = (pANTLR3_COMMON_TOKEN)recognizer->getMissingSymbol(recognizer, is, recognizer->state->exception, ANTLR3_TOKEN_INVALID, follow);
1593 recognizer->state->exception->type = ANTLR3_MISSING_TOKEN_EXCEPTION;
1594 recognizer->state->exception->token = matchedSymbol;
1595
1596 // Print out the error after we insert so that ANTLRWorks sees the
1597 // token in the exception.
1598 //
1599 recognizer->reportError(recognizer);
1600
1601 recognizer->state->error = ANTLR3_FALSE; // Exception is not outstanding any more
1602
1603 return matchedSymbol;
1604 }
1605
1606 // TODO - Single token deletion like in recoverFromMismatchedToken()
1607 //
1608 recognizer->state->error = ANTLR3_TRUE;
1609 recognizer->state->failed = ANTLR3_TRUE;
1610 return NULL;
1611 }
1612
1613 /// This code is factored out from mismatched token and mismatched set
1614 /// recovery. It handles "single token insertion" error recovery for
1615 /// both. No tokens are consumed to recover from insertions. Return
1616 /// true if recovery was possible else return false.
1617 ///
1618 static ANTLR3_BOOLEAN
recoverFromMismatchedElement(pANTLR3_BASE_RECOGNIZER recognizer,pANTLR3_BITSET_LIST followBits)1619 recoverFromMismatchedElement (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET_LIST followBits)
1620 {
1621 pANTLR3_BITSET viableToksFollowingRule;
1622 pANTLR3_BITSET follow;
1623 pANTLR3_PARSER parser;
1624 pANTLR3_TREE_PARSER tparser;
1625 pANTLR3_INT_STREAM is;
1626
1627 switch (recognizer->type)
1628 {
1629 case ANTLR3_TYPE_PARSER:
1630
1631 parser = (pANTLR3_PARSER) (recognizer->super);
1632 tparser = NULL;
1633 is = parser->tstream->istream;
1634
1635 break;
1636
1637 case ANTLR3_TYPE_TREE_PARSER:
1638
1639 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1640 parser = NULL;
1641 is = tparser->ctnstream->tnstream->istream;
1642
1643 break;
1644
1645 default:
1646
1647 ANTLR3_FPRINTF(stderr, "Base recognizer function recover called by unknown parser type - provide override for this function\n");
1648 return ANTLR3_FALSE;
1649
1650 break;
1651 }
1652
1653 follow = antlr3BitsetLoad(followBits);
1654
1655 if (follow == NULL)
1656 {
1657 /* The follow set is NULL, which means we don't know what can come
1658 * next, so we "hit and hope" by just signifying that we cannot
1659 * recover, which will just cause the next token to be consumed,
1660 * which might dig us out.
1661 */
1662 return ANTLR3_FALSE;
1663 }
1664
1665 /* We have a bitmap for the follow set, hence we can compute
1666 * what can follow this grammar element reference.
1667 */
1668 if (follow->isMember(follow, ANTLR3_EOR_TOKEN_TYPE) == ANTLR3_TRUE)
1669 {
1670 /* First we need to know which of the available tokens are viable
1671 * to follow this reference.
1672 */
1673 viableToksFollowingRule = recognizer->computeCSRuleFollow(recognizer);
1674
1675 /* Remove the EOR token, which we do not wish to compute with
1676 */
1677 follow->remove(follow, ANTLR3_EOR_TOKEN_TYPE);
1678 viableToksFollowingRule->free(viableToksFollowingRule);
1679 /* We now have the computed set of what can follow the current token
1680 */
1681 }
1682
1683 /* We can now see if the current token works with the set of tokens
1684 * that could follow the current grammar reference. If it looks like it
1685 * is consistent, then we can "insert" that token by not throwing
1686 * an exception and assuming that we saw it.
1687 */
1688 if ( follow->isMember(follow, is->_LA(is, 1)) == ANTLR3_TRUE)
1689 {
1690 /* report the error, but don't cause any rules to abort and stuff
1691 */
1692 recognizer->reportError(recognizer);
1693 if (follow != NULL)
1694 {
1695 follow->free(follow);
1696 }
1697 recognizer->state->error = ANTLR3_FALSE;
1698 recognizer->state->failed = ANTLR3_FALSE;
1699 return ANTLR3_TRUE; /* Success in recovery */
1700 }
1701
1702 if (follow != NULL)
1703 {
1704 follow->free(follow);
1705 }
1706
1707 /* We could not find anything viable to do, so this is going to
1708 * cause an exception.
1709 */
1710 return ANTLR3_FALSE;
1711 }
1712
1713 /// Eat tokens from the input stream until we get one of JUST the right type
1714 ///
1715 static void
consumeUntil(pANTLR3_BASE_RECOGNIZER recognizer,ANTLR3_UINT32 tokenType)1716 consumeUntil (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 tokenType)
1717 {
1718 ANTLR3_UINT32 ttype;
1719 pANTLR3_PARSER parser;
1720 pANTLR3_TREE_PARSER tparser;
1721 pANTLR3_INT_STREAM is;
1722
1723 switch (recognizer->type)
1724 {
1725 case ANTLR3_TYPE_PARSER:
1726
1727 parser = (pANTLR3_PARSER) (recognizer->super);
1728 tparser = NULL;
1729 is = parser->tstream->istream;
1730
1731 break;
1732
1733 case ANTLR3_TYPE_TREE_PARSER:
1734
1735 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1736 parser = NULL;
1737 is = tparser->ctnstream->tnstream->istream;
1738
1739 break;
1740
1741 default:
1742
1743 ANTLR3_FPRINTF(stderr, "Base recognizer function 'consumeUntil' called by unknown parser type - provide override for this function\n");
1744 return;
1745
1746 break;
1747 }
1748
1749 // What do have at the moment?
1750 //
1751 ttype = is->_LA(is, 1);
1752
1753 // Start eating tokens until we get to the one we want.
1754 //
1755 while (ttype != ANTLR3_TOKEN_EOF && ttype != tokenType)
1756 {
1757 is->consume(is);
1758 ttype = is->_LA(is, 1);
1759 }
1760 }
1761
1762 /// Eat tokens from the input stream until we find one that
1763 /// belongs to the supplied set.
1764 ///
1765 static void
consumeUntilSet(pANTLR3_BASE_RECOGNIZER recognizer,pANTLR3_BITSET set)1766 consumeUntilSet (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET set)
1767 {
1768 ANTLR3_UINT32 ttype;
1769 pANTLR3_PARSER parser;
1770 pANTLR3_TREE_PARSER tparser;
1771 pANTLR3_INT_STREAM is;
1772
1773 switch (recognizer->type)
1774 {
1775 case ANTLR3_TYPE_PARSER:
1776
1777 parser = (pANTLR3_PARSER) (recognizer->super);
1778 tparser = NULL;
1779 is = parser->tstream->istream;
1780
1781 break;
1782
1783 case ANTLR3_TYPE_TREE_PARSER:
1784
1785 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1786 parser = NULL;
1787 is = tparser->ctnstream->tnstream->istream;
1788
1789 break;
1790
1791 default:
1792
1793 ANTLR3_FPRINTF(stderr, "Base recognizer function 'consumeUntilSet' called by unknown parser type - provide override for this function\n");
1794 return;
1795
1796 break;
1797 }
1798
1799 // What do have at the moment?
1800 //
1801 ttype = is->_LA(is, 1);
1802
1803 // Start eating tokens until we get to one we want.
1804 //
1805 while (ttype != ANTLR3_TOKEN_EOF && set->isMember(set, ttype) == ANTLR3_FALSE)
1806 {
1807 is->consume(is);
1808 ttype = is->_LA(is, 1);
1809 }
1810 }
1811
1812 /** Return the rule invocation stack (how we got here in the parse.
1813 * In the java version Ter just asks the JVM for all the information
1814 * but in C we don't get this information, so I am going to do nothing
1815 * right now.
1816 */
1817 static pANTLR3_STACK
getRuleInvocationStack(pANTLR3_BASE_RECOGNIZER recognizer)1818 getRuleInvocationStack (pANTLR3_BASE_RECOGNIZER recognizer)
1819 {
1820 return NULL;
1821 }
1822
1823 static pANTLR3_STACK
getRuleInvocationStackNamed(pANTLR3_BASE_RECOGNIZER recognizer,pANTLR3_UINT8 name)1824 getRuleInvocationStackNamed (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 name)
1825 {
1826 return NULL;
1827 }
1828
1829 /** Convenience method for template rewrites - NYI.
1830 */
1831 static pANTLR3_HASH_TABLE
toStrings(pANTLR3_BASE_RECOGNIZER recognizer,pANTLR3_HASH_TABLE tokens)1832 toStrings (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_HASH_TABLE tokens)
1833 {
1834 return NULL;
1835 }
1836
1837 static void ANTLR3_CDECL
freeIntTrie(void * trie)1838 freeIntTrie (void * trie)
1839 {
1840 ((pANTLR3_INT_TRIE)trie)->free((pANTLR3_INT_TRIE)trie);
1841 }
1842
1843
1844 /** Pointer to a function to return whether the rule has parsed input starting at the supplied
1845 * start index before. If the rule has not parsed input starting from the supplied start index,
1846 * then it will return ANTLR3_MEMO_RULE_UNKNOWN. If it has parsed from the suppled start point
1847 * then it will return the point where it last stopped parsing after that start point.
1848 *
1849 * \remark
1850 * The rule memos are an ANTLR3_LIST of ANTLR3_LISTS, however if this becomes any kind of performance
1851 * issue (it probably won't, the hash tables are pretty quick) then we could make a special int only
1852 * version of the table.
1853 */
1854 static ANTLR3_MARKER
getRuleMemoization(pANTLR3_BASE_RECOGNIZER recognizer,ANTLR3_INTKEY ruleIndex,ANTLR3_MARKER ruleParseStart)1855 getRuleMemoization (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_INTKEY ruleIndex, ANTLR3_MARKER ruleParseStart)
1856 {
1857 /* The rule memos are an ANTLR3_LIST of ANTLR3_LIST.
1858 */
1859 pANTLR3_INT_TRIE ruleList;
1860 ANTLR3_MARKER stopIndex;
1861 pANTLR3_TRIE_ENTRY entry;
1862
1863 /* See if we have a list in the ruleMemos for this rule, and if not, then create one
1864 * as we will need it eventually if we are being asked for the memo here.
1865 */
1866 entry = recognizer->state->ruleMemo->get(recognizer->state->ruleMemo, (ANTLR3_INTKEY)ruleIndex);
1867
1868 if (entry == NULL)
1869 {
1870 /* Did not find it, so create a new one for it, with a bit depth based on the
1871 * size of the input stream. We need the bit depth to incorporate the number if
1872 * bits required to represent the largest possible stop index in the input, which is the
1873 * last character. An int stream is free to return the largest 64 bit offset if it has
1874 * no idea of the size, but you should remember that this will cause the leftmost
1875 * bit match algorithm to run to 63 bits, which will be the whole time spent in the trie ;-)
1876 */
1877 ruleList = antlr3IntTrieNew(63); /* Depth is theoretically 64 bits, but probably not ;-) */
1878
1879 if (ruleList != NULL)
1880 {
1881 recognizer->state->ruleMemo->add(recognizer->state->ruleMemo, (ANTLR3_INTKEY)ruleIndex, ANTLR3_HASH_TYPE_STR, 0, ANTLR3_FUNC_PTR(ruleList), freeIntTrie);
1882 }
1883
1884 /* We cannot have a stopIndex in a trie we have just created of course
1885 */
1886 return MEMO_RULE_UNKNOWN;
1887 }
1888
1889 ruleList = (pANTLR3_INT_TRIE) (entry->data.ptr);
1890
1891 /* See if there is a stop index associated with the supplied start index.
1892 */
1893 stopIndex = 0;
1894
1895 entry = ruleList->get(ruleList, ruleParseStart);
1896 if (entry != NULL)
1897 {
1898 stopIndex = (ANTLR3_MARKER)(entry->data.intVal);
1899 }
1900
1901 if (stopIndex == 0)
1902 {
1903 return MEMO_RULE_UNKNOWN;
1904 }
1905
1906 return stopIndex;
1907 }
1908
1909 /** Has this rule already parsed input at the current index in the
1910 * input stream? Return ANTLR3_TRUE if we have and ANTLR3_FALSE
1911 * if we have not.
1912 *
1913 * This method has a side-effect: if we have seen this input for
1914 * this rule and successfully parsed before, then seek ahead to
1915 * 1 past the stop token matched for this rule last time.
1916 */
1917 static ANTLR3_BOOLEAN
alreadyParsedRule(pANTLR3_BASE_RECOGNIZER recognizer,ANTLR3_MARKER ruleIndex)1918 alreadyParsedRule (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_MARKER ruleIndex)
1919 {
1920 ANTLR3_MARKER stopIndex;
1921 pANTLR3_LEXER lexer;
1922 pANTLR3_PARSER parser;
1923 pANTLR3_TREE_PARSER tparser;
1924 pANTLR3_INT_STREAM is;
1925
1926 switch (recognizer->type)
1927 {
1928 case ANTLR3_TYPE_PARSER:
1929
1930 parser = (pANTLR3_PARSER) (recognizer->super);
1931 tparser = NULL;
1932 lexer = NULL;
1933 is = parser->tstream->istream;
1934
1935 break;
1936
1937 case ANTLR3_TYPE_TREE_PARSER:
1938
1939 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1940 parser = NULL;
1941 lexer = NULL;
1942 is = tparser->ctnstream->tnstream->istream;
1943
1944 break;
1945
1946 case ANTLR3_TYPE_LEXER:
1947
1948 lexer = (pANTLR3_LEXER) (recognizer->super);
1949 parser = NULL;
1950 tparser = NULL;
1951 is = lexer->input->istream;
1952 break;
1953
1954 default:
1955
1956 ANTLR3_FPRINTF(stderr, "Base recognizer function 'alreadyParsedRule' called by unknown parser type - provide override for this function\n");
1957 return ANTLR3_FALSE;
1958
1959 break;
1960 }
1961
1962 /* See if we have a memo marker for this.
1963 */
1964 stopIndex = recognizer->getRuleMemoization(recognizer, ruleIndex, is->index(is));
1965
1966 if (stopIndex == MEMO_RULE_UNKNOWN)
1967 {
1968 return ANTLR3_FALSE;
1969 }
1970
1971 if (stopIndex == MEMO_RULE_FAILED)
1972 {
1973 recognizer->state->failed = ANTLR3_TRUE;
1974 }
1975 else
1976 {
1977 is->seek(is, stopIndex+1);
1978 }
1979
1980 /* If here then the rule was executed for this input already
1981 */
1982 return ANTLR3_TRUE;
1983 }
1984
1985 /** Record whether or not this rule parsed the input at this position
1986 * successfully.
1987 */
1988 static void
memoize(pANTLR3_BASE_RECOGNIZER recognizer,ANTLR3_MARKER ruleIndex,ANTLR3_MARKER ruleParseStart)1989 memoize (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_MARKER ruleIndex, ANTLR3_MARKER ruleParseStart)
1990 {
1991 /* The rule memos are an ANTLR3_LIST of ANTLR3_LIST.
1992 */
1993 pANTLR3_INT_TRIE ruleList;
1994 pANTLR3_TRIE_ENTRY entry;
1995 ANTLR3_MARKER stopIndex;
1996 pANTLR3_LEXER lexer;
1997 pANTLR3_PARSER parser;
1998 pANTLR3_TREE_PARSER tparser;
1999 pANTLR3_INT_STREAM is;
2000
2001 switch (recognizer->type)
2002 {
2003 case ANTLR3_TYPE_PARSER:
2004
2005 parser = (pANTLR3_PARSER) (recognizer->super);
2006 tparser = NULL;
2007 is = parser->tstream->istream;
2008
2009 break;
2010
2011 case ANTLR3_TYPE_TREE_PARSER:
2012
2013 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
2014 parser = NULL;
2015 is = tparser->ctnstream->tnstream->istream;
2016
2017 break;
2018
2019 case ANTLR3_TYPE_LEXER:
2020
2021 lexer = (pANTLR3_LEXER) (recognizer->super);
2022 parser = NULL;
2023 tparser = NULL;
2024 is = lexer->input->istream;
2025 break;
2026
2027 default:
2028
2029 ANTLR3_FPRINTF(stderr, "Base recognizer function consumeUntilSet called by unknown parser type - provide override for this function\n");
2030 return;
2031
2032 break;
2033 }
2034
2035 stopIndex = recognizer->state->failed == ANTLR3_TRUE ? MEMO_RULE_FAILED : is->index(is) - 1;
2036
2037 entry = recognizer->state->ruleMemo->get(recognizer->state->ruleMemo, (ANTLR3_INTKEY)ruleIndex);
2038
2039 if (entry != NULL)
2040 {
2041 ruleList = (pANTLR3_INT_TRIE)(entry->data.ptr);
2042
2043 /* If we don't already have this entry, append it. The memoize trie does not
2044 * accept duplicates so it won't add it if already there and we just ignore the
2045 * return code as we don't care if it is there already.
2046 */
2047 ruleList->add(ruleList, ruleParseStart, ANTLR3_HASH_TYPE_INT, stopIndex, NULL, NULL);
2048 }
2049 }
2050 /** A syntactic predicate. Returns true/false depending on whether
2051 * the specified grammar fragment matches the current input stream.
2052 * This resets the failed instance var afterwards.
2053 */
2054 static ANTLR3_BOOLEAN
synpred(pANTLR3_BASE_RECOGNIZER recognizer,void * ctx,void (* predicate)(void * ctx))2055 synpred (pANTLR3_BASE_RECOGNIZER recognizer, void * ctx, void (*predicate)(void * ctx))
2056 {
2057 ANTLR3_MARKER start;
2058 pANTLR3_PARSER parser;
2059 pANTLR3_TREE_PARSER tparser;
2060 pANTLR3_INT_STREAM is;
2061
2062 switch (recognizer->type)
2063 {
2064 case ANTLR3_TYPE_PARSER:
2065
2066 parser = (pANTLR3_PARSER) (recognizer->super);
2067 tparser = NULL;
2068 is = parser->tstream->istream;
2069
2070 break;
2071
2072 case ANTLR3_TYPE_TREE_PARSER:
2073
2074 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
2075 parser = NULL;
2076 is = tparser->ctnstream->tnstream->istream;
2077
2078 break;
2079
2080 default:
2081
2082 ANTLR3_FPRINTF(stderr, "Base recognizer function 'synPred' called by unknown parser type - provide override for this function\n");
2083 return ANTLR3_FALSE;
2084
2085 break;
2086 }
2087
2088 /* Begin backtracking so we can get back to where we started after trying out
2089 * the syntactic predicate.
2090 */
2091 start = is->mark(is);
2092 recognizer->state->backtracking++;
2093
2094 /* Try the syntactical predicate
2095 */
2096 predicate(ctx);
2097
2098 /* Reset
2099 */
2100 is->rewind(is, start);
2101 recognizer->state->backtracking--;
2102
2103 if (recognizer->state->failed == ANTLR3_TRUE)
2104 {
2105 /* Predicate failed
2106 */
2107 recognizer->state->failed = ANTLR3_FALSE;
2108 return ANTLR3_FALSE;
2109 }
2110 else
2111 {
2112 /* Predicate was successful
2113 */
2114 recognizer->state->failed = ANTLR3_FALSE;
2115 return ANTLR3_TRUE;
2116 }
2117 }
2118
2119 static void
reset(pANTLR3_BASE_RECOGNIZER recognizer)2120 reset(pANTLR3_BASE_RECOGNIZER recognizer)
2121 {
2122 if (recognizer->state->following != NULL)
2123 {
2124 recognizer->state->following->free(recognizer->state->following);
2125 }
2126
2127 // Reset the state flags
2128 //
2129 recognizer->state->errorRecovery = ANTLR3_FALSE;
2130 recognizer->state->lastErrorIndex = -1;
2131 recognizer->state->failed = ANTLR3_FALSE;
2132 recognizer->state->errorCount = 0;
2133 recognizer->state->backtracking = 0;
2134 recognizer->state->following = NULL;
2135
2136 if (recognizer->state != NULL)
2137 {
2138 if (recognizer->state->ruleMemo != NULL)
2139 {
2140 recognizer->state->ruleMemo->free(recognizer->state->ruleMemo);
2141 recognizer->state->ruleMemo = antlr3IntTrieNew(15); /* 16 bit depth is enough for 32768 rules! */
2142 }
2143 }
2144
2145 // ml: 2013-11-05, added reset of old exceptions.
2146 pANTLR3_EXCEPTION thisE = recognizer->state->exception;
2147 if (thisE != NULL)
2148 {
2149 thisE->freeEx(thisE);
2150 recognizer->state->exception = NULL;
2151 }
2152
2153 // Install a new following set
2154 //
2155 recognizer->state->following = antlr3StackNew(8);
2156
2157 }
2158
2159 // Default implementation is for parser and assumes a token stream as supplied by the runtime.
2160 // You MAY need override this function if the standard TOKEN_STREAM is not what you are using.
2161 //
2162 static void *
getCurrentInputSymbol(pANTLR3_BASE_RECOGNIZER recognizer,pANTLR3_INT_STREAM istream)2163 getCurrentInputSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream)
2164 {
2165 return ((pANTLR3_TOKEN_STREAM)istream->super)->_LT((pANTLR3_TOKEN_STREAM)istream->super, 1);
2166 }
2167
2168 // Default implementation is for parser and assumes a token stream as supplied by the runtime.
2169 // You MAY need override this function if the standard COMMON_TOKEN_STREAM is not what you are using.
2170 //
2171 static void *
getMissingSymbol(pANTLR3_BASE_RECOGNIZER recognizer,pANTLR3_INT_STREAM istream,pANTLR3_EXCEPTION e,ANTLR3_UINT32 expectedTokenType,pANTLR3_BITSET_LIST follow)2172 getMissingSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream, pANTLR3_EXCEPTION e,
2173 ANTLR3_UINT32 expectedTokenType, pANTLR3_BITSET_LIST follow)
2174 {
2175 pANTLR3_TOKEN_STREAM ts;
2176 pANTLR3_COMMON_TOKEN_STREAM cts;
2177 pANTLR3_COMMON_TOKEN token;
2178 pANTLR3_COMMON_TOKEN current;
2179 pANTLR3_STRING text;
2180
2181 // Dereference the standard pointers
2182 //
2183 ts = (pANTLR3_TOKEN_STREAM)istream->super;
2184 cts = (pANTLR3_COMMON_TOKEN_STREAM)ts->super;
2185
2186 // Work out what to use as the current symbol to make a line and offset etc
2187 // If we are at EOF, we use the token before EOF
2188 //
2189 current = ts->_LT(ts, 1);
2190 if (current->getType(current) == ANTLR3_TOKEN_EOF)
2191 {
2192 current = ts->_LT(ts, -1);
2193 }
2194
2195 // Create a new empty token
2196 //
2197 if (recognizer->state->tokFactory == NULL)
2198 {
2199 // We don't yet have a token factory for making tokens
2200 // we just need a fake one using the input stream of the current
2201 // token.
2202 //
2203 recognizer->state->tokFactory = antlr3TokenFactoryNew(current->input);
2204 }
2205 token = recognizer->state->tokFactory->newToken(recognizer->state->tokFactory);
2206 if (token == NULL) { return NULL; }
2207
2208 // Set some of the token properties based on the current token
2209 //
2210 token->setLine (token, current->getLine(current));
2211 token->setCharPositionInLine (token, current->getCharPositionInLine(current));
2212 token->setChannel (token, ANTLR3_TOKEN_DEFAULT_CHANNEL);
2213 token->setType (token, expectedTokenType);
2214 token->user1 = current->user1;
2215 token->user2 = current->user2;
2216 token->user3 = current->user3;
2217 token->custom = current->custom;
2218 token->lineStart = current->lineStart;
2219
2220 // Create the token text that shows it has been inserted
2221 //
2222 token->setText8(token, (pANTLR3_UINT8)"<missing ");
2223 text = token->getText(token);
2224
2225 if (text != NULL)
2226 {
2227 text->append8(text, (const char *)recognizer->state->tokenNames[expectedTokenType]);
2228 text->append8(text, (const char *)">");
2229 }
2230
2231 // Finally return the pointer to our new token
2232 //
2233 return token;
2234 }
2235
2236
2237 #ifdef ANTLR3_WINDOWS
2238 #pragma warning( default : 4100 )
2239 #endif
2240
2241 /// @}
2242 ///
2243
2244