1 /* ANTLRParser.h 2 * 3 * Define the generic ANTLRParser superclass, which is subclassed to 4 * define an actual parser. 5 * 6 * Before entry into this file: ANTLRTokenType must be set. 7 * 8 * SOFTWARE RIGHTS 9 * 10 * We reserve no LEGAL rights to the Purdue Compiler Construction Tool 11 * Set (PCCTS) -- PCCTS is in the public domain. An individual or 12 * company may do whatever they wish with source code distributed with 13 * PCCTS or the code generated by PCCTS, including the incorporation of 14 * PCCTS, or its output, into commerical software. 15 * 16 * We encourage users to develop software with PCCTS. However, we do ask 17 * that credit is given to us for developing PCCTS. By "credit", 18 * we mean that if you incorporate our source code into one of your 19 * programs (commercial product, research project, or otherwise) that you 20 * acknowledge this fact somewhere in the documentation, research report, 21 * etc... If you like PCCTS and have developed a nice tool with the 22 * output, please mention that you developed it using PCCTS. In 23 * addition, we ask that this header remain intact in our source code. 24 * As long as these guidelines are kept, we expect to continue enhancing 25 * this system and expect to make other tools available as they are 26 * completed. 27 * 28 * ANTLR 1.33 29 * Terence Parr 30 * Parr Research Corporation 31 * with Purdue University and AHPCRC, University of Minnesota 32 * 1989-2000 33 */ 34 35 #ifndef APARSER_H_GATE 36 #define APARSER_H_GATE 37 38 #include "pcctscfg.h" 39 40 #include "pccts_stdio.h" 41 #include "pccts_setjmp.h" 42 43 PCCTS_NAMESPACE_STD 44 45 #include ATOKEN_H 46 #include ATOKENBUFFER_H 47 48 #ifdef ZZCAN_GUESS 49 #ifndef ZZINF_LOOK 50 #define ZZINF_LOOK 51 #endif 52 #endif 53 54 55 #define NLA (token_type[lap&(LLk-1)])/* --> next LA */ 56 57 typedef unsigned char SetWordType; 58 59 /* Define external bit set stuff (for SetWordType) */ 60 #define EXT_WORDSIZE (sizeof(char)*8) 61 #define EXT_LOGWORDSIZE 3 62 63 /* s y n t a c t i c p r e d i c a t e s t u f f */ 64 65 #ifndef zzUSER_GUESS_HOOK 66 #define zzUSER_GUESS_HOOK(seqFrozen,zzrv) 67 #endif 68 69 #ifndef zzUSER_GUESS_DONE_HOOK 70 #define zzUSER_GUESS_DONE_HOOK(seqFrozen) 71 #endif 72 73 /* MR14 Add zzUSER_GUESS_FAIL_HOOK and related code */ 74 75 #define zzUSER_GUESS_FAIL_HOOK_INTERNAL zzUSER_GUESS_FAIL_HOOK(SeqFrozen) 76 #ifndef zzUSER_GUESS_FAIL_HOOK 77 #define zzUSER_GUESS_FAIL_HOOK(zzGuessSeq) 78 #endif 79 80 81 typedef struct _zzjmp_buf { 82 jmp_buf state; 83 } zzjmp_buf; 84 85 /* these need to be macros not member functions */ 86 #define zzGUESS_BLOCK ANTLRParserState zzst; int zzrv; int _marker; int zzGuessSeqFrozen; 87 #define zzNON_GUESS_MODE if ( !guessing ) 88 #define zzGUESS_FAIL guess_fail(); 89 90 /* Note: zzGUESS_DONE does not execute longjmp() */ 91 92 #define zzGUESS_DONE {zzrv=1; inputTokens->rewind(_marker); guess_done(&zzst);zzUSER_GUESS_DONE_HOOK(zzGuessSeqFrozen) } 93 #define zzGUESS saveState(&zzst); \ 94 guessing = 1; \ 95 zzGuessSeqFrozen = ++zzGuessSeq; \ 96 _marker = inputTokens->mark(); \ 97 zzrv = setjmp(guess_start.state); \ 98 zzUSER_GUESS_HOOK(zzGuessSeqFrozen,zzrv) \ 99 if ( zzrv ) zzGUESS_DONE 100 101 #define zzTRACEdata const ANTLRChar *zzTracePrevRuleName = NULL; 102 103 #ifndef zzTRACEIN 104 #define zzTRACEIN(r) zzTracePrevRuleName=traceCurrentRuleName;tracein(r); 105 #endif 106 #ifndef zzTRACEOUT 107 #define zzTRACEOUT(r) traceout(r);traceCurrentRuleName=zzTracePrevRuleName; 108 #endif 109 110 /* a n t l r p a r s e r d e f */ 111 112 struct ANTLRParserState { 113 /* class variables */ 114 zzjmp_buf guess_start; 115 int guessing; 116 117 int inf_labase; 118 int inf_last; 119 120 int dirty; 121 122 int traceOptionValue; // MR10 123 int traceGuessOptionValue; // MR10 124 const ANTLRChar *traceCurrentRuleName; // MR10 125 int traceDepth; // MR10 126 127 }; 128 129 /* notes: 130 * 131 * multiple inheritance is a cool way to include what stuff is needed 132 * in this structure (like guess stuff). however, i'm not convinced that 133 * multiple inheritance works correctly on all platforms. not that 134 * much space is used--just include all possibly useful members. 135 * 136 * the class should also be a template with arguments for the lookahead 137 * depth and so on. that way, more than one parser can be defined (as 138 * each will probably have different lookahead requirements). however, 139 * am i sure that templates work? no, i'm not sure. 140 * 141 * no attributes are maintained and, hence, the 'asp' variable is not 142 * needed. $i can still be referenced, but it refers to the token 143 * associated with that rule element. question: where are the token's 144 * stored if not on the software stack? in local variables created 145 * and assigned to by antlr. 146 */ 147 class ANTLRParser { 148 protected: 149 /* class variables */ 150 static SetWordType bitmask[sizeof(SetWordType)*8]; 151 static char eMsgBuffer[500]; 152 153 protected: 154 int LLk; // number of lookahead symbols (old LL_K) 155 int demand_look; 156 ANTLRTokenType eofToken; // when do I stop during resynch()s 157 int bsetsize; // size of bitsets created by ANTLR in 158 // units of SetWordType 159 160 ANTLRTokenBuffer *inputTokens; //place to get input tokens 161 162 zzjmp_buf guess_start; // where to jump back to upon failure 163 int guessing; // if guessing (using (...)? predicate) 164 165 // infinite lookahead stuff 166 int can_use_inf_look; // set by subclass (generated by ANTLR) 167 int inf_lap; 168 int inf_labase; 169 int inf_last; 170 int *_inf_line; 171 172 const ANTLRChar **token_tbl; // pointer to table of token type strings MR20 const 173 174 int dirty; // used during demand lookahead 175 176 ANTLRTokenType *token_type; // fast reference cache of token.getType() 177 // ANTLRLightweightToken **token; // the token with all its attributes 178 int lap; 179 int labase; 180 #ifdef ZZDEFER_FETCH 181 int stillToFetch; // MR19 V.H. Simonis 182 #endif 183 184 private: 185 void fill_inf_look(); 186 187 protected: guess_fail()188 virtual void guess_fail() { // MR9 27-Sep-97 make virtual 189 traceGuessFail(); // MR10 190 longjmp(guess_start.state, 1); } // MR9 guess_done(ANTLRParserState * st)191 virtual void guess_done(ANTLRParserState *st) { // MR9 27-Sep-97 make virtual 192 restoreState(st); } // MR9 193 virtual int guess(ANTLRParserState *); // MR9 27-Sep-97 make virtual 194 void look(int); 195 int _match(ANTLRTokenType, ANTLRChar **, ANTLRTokenType *, 196 _ANTLRTokenPtr *, SetWordType **); 197 int _setmatch(SetWordType *, ANTLRChar **, ANTLRTokenType *, 198 _ANTLRTokenPtr *, SetWordType **, 199 SetWordType * tokclassErrset /* MR23 */); 200 int _match_wsig(ANTLRTokenType); 201 int _setmatch_wsig(SetWordType *); 202 virtual void consume(); 203 virtual void resynch(SetWordType *wd,SetWordType mask); // MR21 204 void prime_lookahead(); 205 virtual void tracein(const ANTLRChar *r); // MR10 206 virtual void traceout(const ANTLRChar *r); // MR10 MODWORD(unsigned x)207 static unsigned MODWORD(unsigned x) {return x & (EXT_WORDSIZE-1);} // x % EXT_WORDSIZE // MR9 DIVWORD(unsigned x)208 static unsigned DIVWORD(unsigned x) {return x >> EXT_LOGWORDSIZE;} // x / EXT_WORDSIZE // MR9 209 int set_deg(SetWordType *); 210 int set_el(ANTLRTokenType, SetWordType *); 211 virtual void edecode(SetWordType *); // MR1 212 virtual void FAIL(int k, ...); // MR1 213 int traceOptionValue; // MR10 214 int traceGuessOptionValue; // MR10 215 const ANTLRChar *traceCurrentRuleName; // MR10 216 int traceDepth; // MR10 217 void traceReset(); // MR10 218 virtual void traceGuessFail(); // MR10 219 virtual void traceGuessDone(const ANTLRParserState *); // MR10 220 int zzGuessSeq; // MR10 221 222 public: 223 ANTLRParser(ANTLRTokenBuffer *, 224 int k=1, 225 int use_inf_look=0, 226 int demand_look=0, 227 int bsetsize=1); 228 virtual ~ANTLRParser(); 229 230 virtual void init(); 231 LA(int i)232 ANTLRTokenType LA(int i) 233 { 234 // 235 // MR14 demand look will always be 0 for C++ mode 236 // 237 //// return demand_look ? token_type[(labase+(i)-1)&(LLk-1)] : 238 //// token_type[(lap+(i)-1)&(LLk-1)]; 239 240 // MR19 V.H. Simonis Defer fetch feature 241 242 #ifdef ZZDEFER_FETCH 243 undeferFetch(); 244 #endif 245 return token_type[(lap+(i)-1)&(LLk-1)]; 246 } 247 _ANTLRTokenPtr LT(int i); 248 setEofToken(ANTLRTokenType t)249 void setEofToken(ANTLRTokenType t) { eofToken = t; } getEofToken()250 ANTLRTokenType getEofToken() const { return eofToken; } // MR14 251 noGarbageCollectTokens()252 void noGarbageCollectTokens() { inputTokens->noGarbageCollectTokens(); } garbageCollectTokens()253 void garbageCollectTokens() { inputTokens->garbageCollectTokens(); } 254 255 virtual void syn(_ANTLRTokenPtr tok, ANTLRChar *egroup, 256 SetWordType *eset, ANTLRTokenType etok, int k); 257 virtual void saveState(ANTLRParserState *); // MR9 27-Sep-97 make virtual 258 virtual void restoreState(ANTLRParserState *); // MR9 27-Sep-97 make virtual 259 260 virtual void panic(const char *msg); // MR20 const 261 262 static char *eMsgd(char *,int); 263 static char *eMsg(char *,char *); 264 static char *eMsg2(char *,char *,char *); 265 266 virtual int printMessage(FILE* pFile, const char* pFormat, ...); // MR23 267 virtual int printMessageV(FILE* pFile, const char* pFormat, va_list arglist); // MR23 268 269 void consumeUntil(SetWordType *st); 270 void consumeUntilToken(int t); 271 272 virtual int _setmatch_wdfltsig(SetWordType *tokensWanted, 273 ANTLRTokenType tokenTypeOfSet, 274 SetWordType *whatFollows); 275 virtual int _match_wdfltsig(ANTLRTokenType tokenWanted, 276 SetWordType *whatFollows); 277 278 const ANTLRChar * parserTokenName(int tok); // MR1 279 280 int traceOptionValueDefault; // MR11 281 int traceOption(int delta); // MR11 282 int traceGuessOption(int delta); // MR11 283 284 // MR8 5-Aug-97 S.Bochnak@microtool.com.pl 285 // MR8 Move resynch static local variable 286 // MR8 to class instance 287 288 int syntaxErrCount; // MR12 getLexer()289 ANTLRTokenStream *getLexer() const { // MR12 290 return inputTokens ? inputTokens->getLexer() : 0; } // MR12 291 protected: // MR8 292 int resynchConsumed; // MR8 293 char *zzFAILtext; // workarea required by zzFAIL // MR9 294 void undeferFetch(); // MR19 V.H. Simonis 295 int isDeferFetchEnabled(); // MR19 V.H. Simonis 296 virtual void failedSemanticPredicate(const char* predicate); /* MR23 */ 297 }; 298 299 #define zzmatch(_t) \ 300 if ( !_match((ANTLRTokenType)_t, &zzMissText, &zzMissTok, \ 301 (_ANTLRTokenPtr *) &zzBadTok, &zzMissSet) ) goto fail; 302 303 #define zzmatch_wsig(_t,handler) \ 304 if ( !_match_wsig((ANTLRTokenType)_t) ) if ( guessing ) zzGUESS_FAIL else {_signal=MismatchedToken; goto handler;} 305 306 #define zzsetmatch(_ts,_tokclassErrset) \ 307 if ( !_setmatch(_ts, &zzMissText, &zzMissTok, \ 308 (_ANTLRTokenPtr *) &zzBadTok, &zzMissSet, _tokclassErrset) ) goto fail; 309 310 #define zzsetmatch_wsig(_ts, handler) \ 311 if ( !_setmatch_wsig(_ts) ) if ( guessing ) zzGUESS_FAIL else {_signal=MismatchedToken; goto handler;} 312 313 /* For the dflt signal matchers, a FALSE indicates that an error occurred 314 * just like the other matchers, but in this case, the routine has already 315 * recovered--we do NOT want to consume another token. However, when 316 * the match was successful, we do want to consume hence _signal=0 so that 317 * a token is consumed by the "if (!_signal) consume(); _signal=NoSignal;" 318 * preamble. 319 */ 320 #define zzsetmatch_wdfltsig(tokensWanted, tokenTypeOfSet, whatFollows) \ 321 if ( !_setmatch_wdfltsig(tokensWanted, tokenTypeOfSet, whatFollows) ) \ 322 _signal = MismatchedToken; 323 324 #define zzmatch_wdfltsig(tokenWanted, whatFollows) \ 325 if ( !_match_wdfltsig(tokenWanted, whatFollows) ) _signal = MismatchedToken; 326 327 328 // MR1 10-Apr-97 zzfailed_pred() macro does not backtrack in guess mode. 329 // MR1 Identification and correction due to J. Lilley 330 // 331 // MR23 Call virtual method to report error. 332 // MR23 Provide more control over failed predicate action 333 // without any need for user to worry about guessing internals. 334 335 #ifndef zzfailed_pred 336 #define zzfailed_pred(_p,_hasuseraction,_useraction) \ 337 if (guessing) { \ 338 zzGUESS_FAIL; \ 339 } else { \ 340 zzfailed_pred_action(_p,_hasuseraction,_useraction) \ 341 } 342 #endif 343 344 // MR23 Provide more control over failed predicate action 345 // without any need for user to worry about guessing internals. 346 // _hasuseraction == 0 => no user specified error action 347 // _hasuseraction == 1 => user specified error action 348 349 #ifndef zzfailed_pred_action 350 #define zzfailed_pred_action(_p,_hasuseraction,_useraction) \ 351 if (_hasuseraction) { _useraction } else { failedSemanticPredicate(_p); } 352 #endif 353 354 #define zzRULE \ 355 SetWordType *zzMissSet=NULL; ANTLRTokenType zzMissTok=(ANTLRTokenType)0; \ 356 _ANTLRTokenPtr zzBadTok=NULL; ANTLRChar *zzBadText=(ANTLRChar *)""; \ 357 int zzErrk=1,zzpf=0; \ 358 zzTRACEdata \ 359 ANTLRChar *zzMissText=(ANTLRChar *)""; 360 361 #endif 362 363 /* S t a n d a r d E x c e p t i o n S i g n a l s */ 364 365 #define NoSignal 0 366 #define MismatchedToken 1 367 #define NoViableAlt 2 368 #define NoSemViableAlt 3 369 370 /* MR7 Allow more control over signalling */ 371 /* by adding "Unwind" and "SetSignal" */ 372 373 #define Unwind 4 374 #define setSignal(newValue) *_retsignal=_signal=(newValue) 375 #define suppressSignal *_retsignal=_signal=0 376 #define exportSignal *_retsignal=_signal 377