1/* This is the parser for the dlg 2 * This is a part of the Purdue Compiler Construction Tool Set 3 * 4 * SOFTWARE RIGHTS 5 * 6 * We reserve no LEGAL rights to the Purdue Compiler Construction Tool 7 * Set (PCCTS) -- PCCTS is in the public domain. An individual or 8 * company may do whatever they wish with source code distributed with 9 * PCCTS or the code generated by PCCTS, including the incorporation of 10 * PCCTS, or its output, into commerical software. 11 * 12 * We encourage users to develop software with PCCTS. However, we do ask 13 * that credit is given to us for developing PCCTS. By "credit", 14 * we mean that if you incorporate our source code into one of your 15 * programs (commercial product, research project, or otherwise) that you 16 * acknowledge this fact somewhere in the documentation, research report, 17 * etc... If you like PCCTS and have developed a nice tool with the 18 * output, please mention that you developed it using PCCTS. In 19 * addition, we ask that this header remain intact in our source code. 20 * As long as these guidelines are kept, we expect to continue enhancing 21 * this system and expect to make other tools available as they are 22 * completed. 23 * 24 * DLG 1.33 25 * Will Cohen 26 * With mods by Terence Parr; AHPCRC, University of Minnesota 27 * 1989-1995 28 */ 29 30#header << 31#include <ctype.h> 32#include "dlg.h" 33>> 34 35<< 36 37/* MR20 G. Hobbelt 38 Fix for Borland C++ 4.x & 5.x compiling with ALL warnings enabled 39*/ 40 41#ifdef __TURBOC__ 42#pragma warn -aus /* unused assignment of 'xxx' */ 43#endif 44 45int action_no = 0; /* keep track of actions outputed */ 46int nfa_allocated = 0; /* keeps track of number of nfa nodes */ 47nfa_node **nfa_array = NULL;/* root of binary tree that stores nfa array */ 48nfa_node nfa_model_node; /* model to initialize new nodes */ 49set used_chars; /* used to label trans. arcs */ 50set used_classes; /* classes or chars used to label trans. arcs */ 51set normal_chars; /* mask to get rid elements that aren't used 52 in set */ 53int flag_paren = FALSE; 54int flag_brace = FALSE; 55int mode_counter = 0; /* keep track of number of %%names */ 56 57>> 58 59#lexaction << 60int func_action; /* should actions be turned into functions?*/ 61int lex_mode_counter = 0; /* keeps track of the number of %%names */ 62/* MR1 */ 63/* MR1 11-Apr-97 Provide mechanism for inserting code into DLG class */ 64/* MR1 via <<%%lexmember...>> */ 65/* MR1 */ 66int lexMember = 0; /* <<%%lexmemeber ...>> MR1 */ 67int lexAction = 0; /* <<%%lexaction ...>> MR1 */ 68int parserClass = 0; /* <<%%parserclass ...>> MR1 */ 69int lexPrefix = 0; /* <<%%lexprefix ...>> MR1 */ 70char theClassName[100]; /* MR11 */ 71char *pClassName=theClassName; /* MR11 */ 72int firstLexMember=1; /* MR1 */ 73 74#ifdef __USE_PROTOS 75void xxputc(int c) { /* MR1 */ 76#else 77void xxputc(c) /* MR1 */ 78 int c; /* MR1 */ 79{ /* MR1 */ 80#endif 81 if (parserClass) { /* MR1 */ 82 *pClassName++=c; /* MR1 */ 83 *pClassName=0; /* MR1 */ 84 } else if (lexMember || lexPrefix) { /* MR1 */ 85 if (class_stream != NULL) fputc(c,class_stream); /* MR1 */ 86 } else { /* MR1 */ 87 fputc(c,OUT); /* MR1 */ 88 }; /* MR1 */ 89} /* MR1 */ 90 91#ifdef __USE_PROTOS 92void xxprintf(char *format,char *string) { /* MR1 */ 93#else 94void xxprintf(format,string) /* MR1 */ 95 char *format; /* MR1 */ 96 char *string; /* MR1 */ 97{ /* MR1 */ 98#endif 99 if (lexMember || lexPrefix || parserClass) { /* MR1 */ 100 if (class_stream != NULL) /* MR1 */ 101 fprintf(class_stream,format,string); /* MR1 */ 102 } else { /* MR1 */ 103 fprintf(OUT,format,string); /* MR1 */ 104 }; /* MR1 */ 105} /* MR1 */ 106>> 107 108#token "[\r\t\ ]+" << zzskip(); >> /* Ignore white */ 109#token "\n" << zzline++; zzskip(); DAWDLE; >> /* Track Line # */ 110#token L_EOF "\@" 111#token PER_PER "\%\%" 112#token NAME_PER_PER "\%\%[a-zA-Z_][a-zA-Z0-9_]*" 113 << p_mode_def(&zzlextext[2],lex_mode_counter++); >> 114 115#token LEXMEMBER "\<\<\%\%lexmember" /* MR1 */ 116 <<lexMember=1; /* MR1 */ 117 if (firstLexMember != 0) { /* MR1 */ 118 firstLexMember=0; /* MR1 */ 119 p_class_def1(); /* MR1 */ 120 }; /* MR1 */ 121 zzmode(ACT); /* MR1 */ 122 >> /* MR1 */ 123#token LEXACTION "\<\<\%\%lexaction" /* MR1 */ 124 <<lexAction=1;zzmode(ACT);>> /* MR1 */ 125#token PARSERCLASS "\<\<\%\%parserclass" /* MR1 */ 126 <<parserClass=1; /* MR1 */ 127 zzmode(ACT); /* MR1 */ 128 >> /* MR1 */ 129#token LEXPREFIX "\<\<\%\%lexprefix" /* MR1 */ 130 <<lexPrefix=1;zzmode(ACT);>> /* MR1 */ 131 132#token ACTION "\<\<" 133 << if (func_action) 134 fprintf(OUT,"\n%s %sact%d()\n{ ", 135 gen_cpp?"ANTLRTokenType":"static void", 136 gen_cpp?ClassName("::"):"", ++action_no); 137 zzmode(ACT); zzskip(); 138 >> 139#token GREAT_GREAT "\>\>" 140#token L_BRACE "\{" 141#token R_BRACE "\}" 142#token L_PAR "\(" 143#token R_PAR "\)" 144#token L_BRACK "\[" 145#token R_BRACK "\]" 146#token ZERO_MORE "\*" 147#token ONE_MORE "\+" 148#token OR "\|" 149#token RANGE "\-" 150#token NOT "\~" 151#token OCTAL_VALUE "\\0[0-7]*" 152 << {int t; sscanf(&zzlextext[1],"%o",&t); zzlextext[0] = t;}>> 153#token HEX_VALUE "\\0[Xx][0-9a-fA-F]+" 154 << {int t; sscanf(&zzlextext[3],"%x",&t); zzlextext[0] = t;}>> 155#token DEC_VALUE "\\[1-9][0-9]*" 156 << {int t; sscanf(&zzlextext[1],"%d",&t); zzlextext[0] = t;}>> 157#token TAB "\\t" << zzlextext[0] = '\t';>> 158#token NL "\\n" << zzlextext[0] = '\n';>> 159#token CR "\\r" << zzlextext[0] = '\r';>> 160#token BS "\\b" << zzlextext[0] = '\b';>> 161 162/* MR1 */ 163/* MR1 10-Apr-97 MR1 Allow #token regular expressions to cross lines */ 164/* MR1 */ 165#token CONTINUATION "\\ \n" << zzline++; zzskip();>> /* MR1 */ 166 167/* NOTE: this takes ANYTHING after the \ */ 168#token LIT "\\~[tnrb]" << zzlextext[0] = zzlextext[1];>> 169 170/* NOTE: this takes ANYTHING that doesn't match the other tokens */ 171#token REGCHAR "~[\\]" 172 173 174grammar : << p_head(); p_class_hdr(); func_action = FALSE;>> 175 ( {LEXACTION | LEXMEMBER | LEXPREFIX | PARSERCLASS } ACTION)* /* MR1 */ 176 <<if ( gen_cpp ) p_includes();>> 177 start_states 178 << func_action = FALSE; p_tables(); p_tail(); >> 179 (ACTION)* "@" 180 << if (firstLexMember != 0) p_class_def1(); >> /* MR1 */ 181 ; 182 183start_states : ( PER_PER do_conversion 184 | NAME_PER_PER do_conversion (NAME_PER_PER do_conversion)*) 185 PER_PER 186 ; 187 188do_conversion : <<new_automaton_mode(); func_action = TRUE;>> 189 rule_list 190 << 191 dfa_class_nop[mode_counter] = 192 relabel($1.l,comp_level); 193 if (comp_level) 194 p_shift_table(mode_counter); 195 dfa_basep[mode_counter] = dfa_allocated+1; 196 make_dfa_model_node(dfa_class_nop[mode_counter]); 197 nfa_to_dfa($1.l); 198 ++mode_counter; 199 func_action = FALSE; 200#ifdef HASH_STAT 201 fprint_hash_stats(stderr); 202#endif 203 >> 204 ; 205 206rule_list : rule <<$$.l=$1.l; $$.r=$1.r;>> 207 (rule 208 <<{nfa_node *t1; 209 t1 = new_nfa_node(); 210 (t1)->trans[0]=$$.l; 211 (t1)->trans[1]=$1.l; 212 /* all accept nodes "dead ends" */ 213 $$.l=t1; $$.r=NULL; 214 } 215 >> 216 )* 217 | /* empty */ 218 <<$$.l = new_nfa_node(); $$.r = NULL; 219 warning("no regular expressions", zzline); 220 >> 221 ; 222 223rule : reg_expr ACTION 224/* MR23 */ << if ($1.r != NULL) { 225 $$.l=$1.l; $$.r=$1.r; ($1.r)->accept=action_no; 226 } 227 >> 228 | ACTION 229 <<$$.l = NULL; $$.r = NULL; 230 error("no expression for action ", zzline); 231 >> 232 ; 233 234reg_expr : and_expr <<$$.l=$1.l; $$.r=$1.r;>> 235 (OR and_expr 236 <<{nfa_node *t1, *t2; 237 t1 = new_nfa_node(); t2 = new_nfa_node(); 238 (t1)->trans[0]=$$.l; 239 (t1)->trans[1]=$2.l; 240/* MR23 */ if ($$.r != NULL) ($$.r)->trans[1]=t2; 241 if ($2.r) { 242 ($2.r)->trans[1]=t2; /* MR20 */ 243 } 244 $$.l=t1; $$.r=t2; 245 } 246 >> 247 )* 248 ; 249 250and_expr : repeat_expr 251 << 252 $$.l=$1.l; $$.r=$1.r; 253 >> 254 (repeat_expr 255/* MR23 */ << if ($$.r != NULL) { 256 ($$.r)->trans[1]=$1.l; 257 $$.r=$1.r; 258 } 259 >> 260 )* 261 ; 262 263repeat_expr : expr <<$$.l=$1.l; $$.r=$1.r;>> 264 { ZERO_MORE 265 <<{ nfa_node *t1,*t2; 266/* MR23 */ if ($$.r != NULL) ($$.r)->trans[0] = $$.l; 267 t1 = new_nfa_node(); t2 = new_nfa_node(); 268 t1->trans[0]=$$.l; 269 t1->trans[1]=t2; 270/* MR23 */ if ($$.r != NULL) ($$.r)->trans[1]=t2; 271 $$.l=t1;$$.r=t2; 272 } 273 >> 274 | ONE_MORE 275/* MR23 */ <<if ($$.r != NULL) ($$.r)->trans[0] = $$.l;>> 276 } 277 | ZERO_MORE 278 << error("no expression for *", zzline);>> 279 | ONE_MORE 280 << error("no expression for +", zzline);>> 281 ; 282 283expr : << $$.l = new_nfa_node(); 284 $$.r = new_nfa_node(); 285 >> 286 L_BRACK atom_list R_BRACK 287 << 288/* MR23 */ if ($$.l != NULL) { 289 ($$.l)->trans[0] = $$.r; 290 ($$.l)->label = set_dup($2.label); 291 set_orin(&used_chars,($$.l)->label); 292 } 293 >> 294 | NOT L_BRACK atom_list R_BRACK 295 << 296/* MR23 */ if ($$.l != NULL) { 297 ($$.l)->trans[0] = $$.r; 298 ($$.l)->label = set_dif(normal_chars,$3.label); 299 set_orin(&used_chars,($$.l)->label); 300 } 301 >> 302 | L_PAR reg_expr R_PAR 303 << 304/* MR23 */ if ($$.l != NULL) { 305 ($$.l)->trans[0] = $2.l; 306 if ($2.r) { 307 ($2.r)->trans[1] = $$.r; /* MR20 */ 308 } 309 } 310 >> 311 | L_BRACE reg_expr R_BRACE 312 << 313/* MR23 */ if ($$.l != NULL) { 314 ($$.l)->trans[0] = $2.l; 315 ($$.l)->trans[1] = $$.r; 316 if ($2.r) { 317 ($2.r)->trans[1] = $$.r; /* MR20 */ 318 } 319 } 320 >> 321 | atom 322 << 323/* MR23 */ if ($$.l != NULL) { 324 ($$.l)->trans[0] = $$.r; 325 ($$.l)->label = set_dup($1.label); 326 set_orin(&used_chars,($$.l)->label); 327 } 328 >> 329 ; 330 331atom_list : << set_free($$.label); >> 332 (near_atom <<set_orin(&($$.label),$1.label);>>)* 333 ; 334 335near_atom : << register int i; 336 register int i_prime; 337 >> 338 anychar 339 <<$$.letter=$1.letter; $$.label=set_of($1.letter); 340 i_prime = $1.letter + MIN_CHAR; 341 if (case_insensitive && islower(i_prime)) 342 set_orel(toupper(i_prime)-MIN_CHAR, 343 &($$.label)); 344 if (case_insensitive && isupper(i_prime)) 345 set_orel(tolower(i_prime)-MIN_CHAR, 346 &($$.label)); 347 >> 348 { RANGE anychar 349 << if (case_insensitive){ 350 i_prime = $$.letter+MIN_CHAR; 351 $$.letter = (islower(i_prime) ? 352 toupper(i_prime) : i_prime)-MIN_CHAR; 353 i_prime = $2.letter+MIN_CHAR; 354 $2.letter = (islower(i_prime) ? 355 toupper(i_prime) : i_prime)-MIN_CHAR; 356 } 357 /* check to see if range okay */ 358 { 359 int debugLetter1 = $$.letter; 360 int debugLetter2 = $2.letter; 361 } 362 if ($$.letter > $2.letter 363 && $2.letter != 0xff){ /* MR16 */ 364 error("invalid range ", zzline); 365 } 366 for (i=$$.letter; i<= (int)$2.letter; ++i){ 367 set_orel(i,&($$.label)); 368 i_prime = i+MIN_CHAR; 369 if (case_insensitive && islower(i_prime)) 370 set_orel(toupper(i_prime)-MIN_CHAR, 371 &($$.label)); 372 if (case_insensitive && isupper(i_prime)) 373 set_orel(tolower(i_prime)-MIN_CHAR, 374 &($$.label)); 375 } 376 >> 377 } 378 ; 379 380atom : << register int i_prime;>> 381 anychar 382 <<$$.label = set_of($1.letter); 383 i_prime = $1.letter + MIN_CHAR; 384 if (case_insensitive && islower(i_prime)) 385 set_orel(toupper(i_prime)-MIN_CHAR, 386 &($$.label)); 387 if (case_insensitive && isupper(i_prime)) 388 set_orel(tolower(i_prime)-MIN_CHAR, 389 &($$.label)); 390 >> 391 ; 392 393anychar : REGCHAR <<$$.letter = $1.letter - MIN_CHAR;>> 394 | OCTAL_VALUE <<$$.letter = $1.letter - MIN_CHAR;>> 395 | HEX_VALUE <<$$.letter = $1.letter - MIN_CHAR;>> 396 | DEC_VALUE <<$$.letter = $1.letter - MIN_CHAR;>> 397 | TAB <<$$.letter = $1.letter - MIN_CHAR;>> 398 | NL <<$$.letter = $1.letter - MIN_CHAR;>> 399 | CR <<$$.letter = $1.letter - MIN_CHAR;>> 400 | BS <<$$.letter = $1.letter - MIN_CHAR;>> 401 | LIT <<$$.letter = $1.letter - MIN_CHAR;>> 402 /* NOTE: LEX_EOF is ALWAYS shifted to 0 = MIN_CHAR - MIN_CHAR*/ 403 | L_EOF <<$$.letter = 0;>> 404 ; 405 406<</* empty action */>> 407 408#lexclass ACT 409#token "@" << error("unterminated action", zzline); zzmode(START); >> 410#token ACTION "\>\>" 411 << if (func_action) fprintf(OUT,"}\n\n"); 412 zzmode(START); 413/* MR1 */ 414/* MR1 11-Apr-97 Provide mechanism for inserting code into DLG class */ 415/* MR1 via <<%%lexmember ...>> */ 416/* MR1 This is a consequence of not saving actions */ 417/* MR1 */ 418/* MR1 */ parserClass=0; 419/* MR1 */ lexPrefix=0; 420/* MR1 */ lexAction=0; 421/* MR1 */ lexMember=0; 422 >> 423#token "\>" << xxputc(zzlextext[0]); zzskip(); >> /* MR1 */ 424#token "\\\>" << xxputc('>'); zzskip(); >> /* MR1 */ 425#token "\\" << xxputc('\\'); zzskip(); >> /* MR1 */ 426#token "\n" << xxputc(zzlextext[0]); ++zzline; zzskip(); >> /* MR1 */ 427#token "/\*" << zzmode(ACTION_COMMENTS); /* MR1 */ 428 xxprintf("%s", &(zzlextext[0])); zzskip(); /* MR1 */ 429 >> /* MR1 */ 430#token "//" << zzmode(ACTION_CPP_COMMENTS); /* MR1 */ 431 xxprintf("%s", &(zzlextext[0])); zzskip(); /* MR1 */ 432 >> /* MR1 */ 433#token "~[]" << xxputc(zzlextext[0]); zzskip(); >> /* MR1 */ 434 /* MR1 */ 435#lexclass ACTION_COMMENTS /* MR1 */ 436#token "\*/" << zzmode(ACT); /* MR1 */ 437 xxprintf("%s", &(zzlextext[0])); zzskip(); /* MR1 */ 438 >> /* MR1 */ 439#token "[\n\r]" << zzline++; xxputc(zzlextext[0]); zzskip();>> /* MR1 */ 440#token "~[]" << xxputc(zzlextext[0]); zzskip();>> /* MR1 */ 441 /* MR1 */ 442#lexclass ACTION_CPP_COMMENTS /* MR1 */ 443#token "[\n\r]" << zzmode(ACT); zzline++; /* MR1 */ 444 xxprintf("%s", &(zzlextext[0])); zzskip(); /* MR1 */ 445 >> /* MR1 */ 446#token "~[]" << xxputc(zzlextext[0]); zzskip();>> /* MR1 */ 447 448<< 449/* adds a new nfa to the binary tree and returns a pointer to it */ 450nfa_node * 451#ifdef __USE_PROTOS 452new_nfa_node(void) 453#else 454new_nfa_node() 455#endif 456{ 457 register nfa_node *t; 458 static int nfa_size=0; /* elements nfa_array[] can hold */ 459 460 ++nfa_allocated; 461 if (nfa_size<=nfa_allocated){ 462 /* need to redo array */ 463 if (!nfa_array){ 464 /* need some to do inital allocation */ 465 nfa_size=nfa_allocated+NFA_MIN; 466 nfa_array=(nfa_node **) malloc(sizeof(nfa_node*)* 467 nfa_size); 468 }else{ 469 /* need more space */ 470 nfa_size=2*(nfa_allocated+1); 471 nfa_array=(nfa_node **) realloc(nfa_array, 472 sizeof(nfa_node*)*nfa_size); 473 } 474 } 475 /* fill out entry in array */ 476 t = (nfa_node*) malloc(sizeof(nfa_node)); 477 nfa_array[nfa_allocated] = t; 478 *t = nfa_model_node; 479 t->node_no = nfa_allocated; 480 return t; 481} 482 483 484/* initialize the model node used to fill in newly made nfa_nodes */ 485void 486#ifdef __USE_PROTOS 487make_nfa_model_node(void) 488#else 489make_nfa_model_node() 490#endif 491{ 492 nfa_model_node.node_no = -1; /* impossible value for real nfa node */ 493 nfa_model_node.nfa_set = 0; 494 nfa_model_node.accept = 0; /* error state default*/ 495 nfa_model_node.trans[0] = NULL; 496 nfa_model_node.trans[1] = NULL; 497 nfa_model_node.label = empty; 498} 499>> 500 501<< 502#if defined(DEBUG) || defined(_DEBUG) 503 504/* print out the pointer value and the node_number */ 505void 506#ifdef __USE_PROTOS 507fprint_dfa_pair(FILE *f, nfa_node *p) 508#else 509fprint_dfa_pair(f, p) 510FILE *f; 511nfa_node *p; 512#endif 513{ 514 if (p){ 515 fprintf(f, "%x (%d)", p, p->node_no); 516 }else{ 517 fprintf(f, "(nil)"); 518 } 519} 520 521/* print out interest information on a set */ 522void 523#ifdef __USE_PROTOS 524fprint_set(FILE *f, set s) 525#else 526fprint_set(f,s) 527FILE *f; 528set s; 529#endif 530{ 531 unsigned int *x; 532 533 fprintf(f, "n = %d,", s.n); 534 if (s.setword){ 535 fprintf(f, "setword = %x, ", s.setword); 536 /* print out all the elements in the set */ 537 x = set_pdq(s); 538 while (*x!=nil){ 539 fprintf(f, "%d ", *x); 540 ++x; 541 } 542 }else{ 543 fprintf(f, "setword = (nil)"); 544 } 545} 546 547/* code to be able to dump out the nfas 548 return 0 if okay dump 549 return 1 if screwed up 550 */ 551int 552#ifdef __USE_PROTOS 553dump_nfas(int first_node, int last_node) 554#else 555dump_nfas(first_node, last_node) 556int first_node; 557int last_node; 558#endif 559{ 560 register int i; 561 nfa_node *t; 562 563 for (i=first_node; i<=last_node; ++i){ 564 t = NFA(i); 565 if (!t) break; 566 fprintf(stderr, "nfa_node %d {\n", t->node_no); 567 fprintf(stderr, "\n\tnfa_set = %d\n", t->nfa_set); 568 fprintf(stderr, "\taccept\t=\t%d\n", t->accept); 569 fprintf(stderr, "\ttrans\t=\t("); 570 fprint_dfa_pair(stderr, t->trans[0]); 571 fprintf(stderr, ","); 572 fprint_dfa_pair(stderr, t->trans[1]); 573 fprintf(stderr, ")\n"); 574 fprintf(stderr, "\tlabel\t=\t{ "); 575 fprint_set(stderr, t->label); 576 fprintf(stderr, "\t}\n"); 577 fprintf(stderr, "}\n\n"); 578 } 579 return 0; 580} 581#endif 582>> 583 584<< 585/* DLG-specific syntax error message generator 586 * (define USER_ZZSYN when compiling so don't get 2 definitions) 587 */ 588void 589#ifdef __USE_PROTOS 590zzsyn(char *text, int tok, char *egroup, SetWordType *eset, int etok, int k, char *bad_text) 591#else 592zzsyn(text, tok, egroup, eset, etok, k, bad_text) 593char *text, *egroup, *bad_text; 594int tok; 595int etok; 596int k; 597SetWordType *eset; 598#endif 599{ 600 fprintf(stderr, ErrHdr, file_str[0]!=NULL?file_str[0]:"stdin", zzline); 601 fprintf(stderr, " syntax error at \"%s\"", (tok==zzEOF_TOKEN)?"EOF":text); 602 if ( !etok && !eset ) {fprintf(stderr, "\n"); return;} 603 if ( k==1 ) fprintf(stderr, " missing"); 604 else 605 { 606 fprintf(stderr, "; \"%s\" not", bad_text); 607 if ( zzset_deg(eset)>1 ) fprintf(stderr, " in"); 608 } 609 if ( zzset_deg(eset)>0 ) zzedecode(eset); 610 else fprintf(stderr, " %s", zztokens[etok]); 611 if ( strlen(egroup) > (size_t)0 ) fprintf(stderr, " in %s", egroup); 612 fprintf(stderr, "\n"); 613} 614>> 615