1 /*
2 * misc.c
3 *
4 * Manage tokens, regular expressions.
5 * Print methods for debugging
6 * Compute follow lists onto tail ends of rules.
7 *
8 * The following functions are visible:
9 *
10 * int addTname(char *); Add token name
11 * int addTexpr(char *); Add token expression
12 * int Tnum(char *); Get number of expr/token
13 * void Tklink(char *, char *); Link a name with an expression
14 * int hasAction(expr); Does expr already have action assigned?
15 * void setHasAction(expr); Indicate that expr now has an action
16 * Entry *newEntry(char *,int); Create new table entry with certain size
17 * void list_add(ListNode **list, char *e)
18 * void list_free(ListNode **list, int freeData); *** MR10 ***
19 * void list_apply(ListNode *list, void (*f)())
20 * void lexclass(char *m); switch to new/old lexical class
21 * void lexmode(int i); switch to old lexical class i
22 *
23 * SOFTWARE RIGHTS
24 *
25 * We reserve no LEGAL rights to the Purdue Compiler Construction Tool
26 * Set (PCCTS) -- PCCTS is in the public domain. An individual or
27 * company may do whatever they wish with source code distributed with
28 * PCCTS or the code generated by PCCTS, including the incorporation of
29 * PCCTS, or its output, into commerical software.
30 *
31 * We encourage users to develop software with PCCTS. However, we do ask
32 * that credit is given to us for developing PCCTS. By "credit",
33 * we mean that if you incorporate our source code into one of your
34 * programs (commercial product, research project, or otherwise) that you
35 * acknowledge this fact somewhere in the documentation, research report,
36 * etc... If you like PCCTS and have developed a nice tool with the
37 * output, please mention that you developed it using PCCTS. In
38 * addition, we ask that this header remain intact in our source code.
39 * As long as these guidelines are kept, we expect to continue enhancing
40 * this system and expect to make other tools available as they are
41 * completed.
42 *
43 * ANTLR 1.33
44 * Terence Parr
45 * Parr Research Corporation
46 * with Purdue University and AHPCRC, University of Minnesota
47 * 1989-2001
48 */
49
50 #include <stdio.h>
51 #include "pcctscfg.h"
52 #include "set.h"
53 #include "syn.h"
54 #include "hash.h"
55 #include "generic.h"
56 #include "dlgdef.h"
57 #include <ctype.h>
58
59 static int tsize=TSChunk; /* size of token str arrays */
60
61 static void
62 #ifdef __USE_PROTOS
63 RemapForcedTokensInSyntaxDiagram(Node *);
64 #else
65 RemapForcedTokensInSyntaxDiagram();
66 #endif
67
68 /* T o k e n M a n i p u l a t i o n */
69
70 /*
71 * add token 't' to the TokenStr/Expr array. Make more room if necessary.
72 * 't' is either an expression or a token name.
73 *
74 * There is only one TokenStr array, but multiple ExprStr's. Therefore,
75 * for each lex class (element of lclass) we must extend the ExprStr array.
76 * ExprStr's and TokenStr are always all the same size.
77 *
78 * Also, there is a Texpr hash table for each automaton.
79 */
80 static void
81 #ifdef __USE_PROTOS
Ttrack(char * t)82 Ttrack( char *t )
83 #else
84 Ttrack( t )
85 char *t;
86 #endif
87 {
88 if ( TokenNum >= tsize ) /* terminal table overflow? */
89 {
90 char **p;
91 int i, more, j;
92
93 more = TSChunk * (1 + ((TokenNum-tsize) / TSChunk));
94 tsize += more;
95 TokenStr = (char **) realloc((char *)TokenStr, tsize*sizeof(char *));
96 require(TokenStr != NULL, "Ttrack: can't extend TokenStr");
97 for (i=0; i<NumLexClasses; i++)
98 {
99 lclass[i].exprs = (char **)
100 realloc((char *)lclass[i].exprs, tsize*sizeof(char *));
101 require(lclass[i].exprs != NULL, "Ttrack: can't extend ExprStr");
102 for (p= &lclass[i].exprs[tsize-more],j=1; j<=more; j++) *p++ = NULL;
103 }
104 for (p= &TokenStr[tsize-more],i=1; i<=more; i++) *p++ = NULL;
105 lexmode( CurrentLexClass ); /* reset ExprStr in case table moved */
106 }
107 /* note: we use the actual ExprStr/TokenStr array
108 * here as TokenInd doesn't exist yet
109 */
110 if ( *t == '"' ) ExprStr[TokenNum] = t;
111 else TokenStr[TokenNum] = t;
112 }
113
114 static Expr *
115 #ifdef __USE_PROTOS
newExpr(char * e)116 newExpr( char *e )
117 #else
118 newExpr( e )
119 char *e;
120 #endif
121 {
122 Expr *p = (Expr *) calloc(1, sizeof(Expr));
123 require(p!=NULL, "newExpr: cannot alloc Expr node");
124
125 p->expr = e;
126 p->lclass = CurrentLexClass;
127 return p;
128 }
129
130 /* switch to lexical class/mode m. This amounts to creating a new
131 * lex mode if one does not already exist and making ExprStr point
132 * to the correct char string array. We must also switch Texpr tables.
133 *
134 * BTW, we need multiple ExprStr arrays because more than one automaton
135 * may have the same label for a token, but with different expressions.
136 * We need to track an expr for each automaton. If we disallowed this
137 * feature, only one ExprStr would be required.
138 */
139 void
140 #ifdef __USE_PROTOS
lexclass(char * m)141 lexclass( char *m )
142 #else
143 lexclass( m )
144 char *m;
145 #endif
146 {
147 int i;
148 TermEntry *p;
149 static char EOFSTR[] = "\"@\"";
150
151 if ( hash_get(Tname, m) != NULL )
152 {
153 warn(eMsg1("lexclass name conflicts with token/errclass label '%s'",m));
154 }
155 /* does m already exist? */
156 i = LexClassIndex(m);
157 if ( i != -1 ) {lexmode(i); return;}
158 /* must make new one */
159 NumLexClasses++;
160 CurrentLexClass = NumLexClasses-1;
161 require(NumLexClasses<=MaxLexClasses, "number of allowable lexclasses exceeded\nIncrease MaxLexClasses in generic.h and recompile all C files");
162 lclass[CurrentLexClass].classnum = m;
163 lclass[CurrentLexClass].exprs = (char **) calloc(tsize, sizeof(char *));
164 require(lclass[CurrentLexClass].exprs!=NULL,
165 "lexclass: cannot allocate ExprStr");
166 lclass[CurrentLexClass].htable = newHashTable();
167 ExprStr = lclass[CurrentLexClass].exprs;
168 Texpr = lclass[CurrentLexClass].htable;
169 /* define EOF for each automaton */
170 p = newTermEntry( EOFSTR );
171 p->token = EofToken; /* couldn't have remapped tokens yet, use EofToken */
172 hash_add(Texpr, EOFSTR, (Entry *)p);
173 list_add(&ExprOrder, (void *)newExpr(EOFSTR));
174 /* note: we use the actual ExprStr array
175 * here as TokenInd doesn't exist yet
176 */
177 ExprStr[EofToken] = EOFSTR;
178 }
179
180 void
181 #ifdef __USE_PROTOS
lexmode(int i)182 lexmode( int i )
183 #else
184 lexmode( i )
185 int i;
186 #endif
187 {
188 require(i<NumLexClasses, "lexmode: invalid mode");
189 ExprStr = lclass[i].exprs;
190 Texpr = lclass[i].htable;
191 CurrentLexClass = i;
192 }
193
194 /* return index into lclass array of lexical class. return -1 if nonexistent */
195 int
196 #ifdef __USE_PROTOS
LexClassIndex(char * cl)197 LexClassIndex( char *cl )
198 #else
199 LexClassIndex( cl )
200 char *cl;
201 #endif
202 {
203 int i;
204
205 for (i=0; i<NumLexClasses; i++)
206 {
207 if ( strcmp(lclass[i].classnum, cl) == 0 ) return i;
208 }
209 return -1;
210 }
211
212 int
213 #ifdef __USE_PROTOS
hasAction(char * expr)214 hasAction( char *expr )
215 #else
216 hasAction( expr )
217 char *expr;
218 #endif
219 {
220 TermEntry *p;
221 require(expr!=NULL, "hasAction: invalid expr");
222
223 p = (TermEntry *) hash_get(Texpr, expr);
224 require(p!=NULL, eMsg1("hasAction: expr '%s' doesn't exist",expr));
225 return (p->action!=NULL);
226 }
227
228 void
229 #ifdef __USE_PROTOS
setHasAction(char * expr,char * action)230 setHasAction( char *expr, char *action )
231 #else
232 setHasAction( expr, action )
233 char *expr;
234 char *action;
235 #endif
236 {
237 TermEntry *p;
238 require(expr!=NULL, "setHasAction: invalid expr");
239
240 p = (TermEntry *) hash_get(Texpr, expr);
241 require(p!=NULL, eMsg1("setHasAction: expr '%s' doesn't exist",expr));
242 p->action = action;
243 }
244
245 ForcedToken *
246 #ifdef __USE_PROTOS
newForcedToken(char * token,int tnum)247 newForcedToken(char *token, int tnum)
248 #else
249 newForcedToken(token, tnum)
250 char *token;
251 int tnum;
252 #endif
253 {
254 ForcedToken *ft = (ForcedToken *) calloc(1, sizeof(ForcedToken));
255 require(ft!=NULL, "out of memory");
256 ft->token = token;
257 ft->tnum = tnum;
258 return ft;
259 }
260
261 /*
262 * Make a token indirection array that remaps token numbers and then walk
263 * the appropriate symbol tables and SynDiag to change token numbers
264 */
265 void
266 #ifdef __USE_PROTOS
RemapForcedTokens(void)267 RemapForcedTokens(void)
268 #else
269 RemapForcedTokens()
270 #endif
271 {
272 ListNode *p;
273 ForcedToken *q;
274 int max_token_number=0; /* MR9 23-Sep-97 Removed "unsigned" */
275 int i;
276
277 if ( ForcedTokens == NULL ) return;
278
279 /* find max token num */
280 for (p = ForcedTokens->next; p!=NULL; p=p->next)
281 {
282 q = (ForcedToken *) p->elem;
283 if ( q->tnum > max_token_number ) max_token_number = q->tnum;
284 }
285 fprintf(stderr, "max token number is %d\n", max_token_number);
286
287 /* make token indirection array */
288 TokenInd = (int *) calloc(max_token_number+1, sizeof(int));
289 LastTokenCounted = TokenNum;
290 TokenNum = max_token_number+1;
291 require(TokenInd!=NULL, "RemapForcedTokens: cannot allocate TokenInd");
292
293 /* fill token indirection array and change token id htable ; swap token indices */
294 for (i=1; i<TokenNum; i++) TokenInd[i] = i;
295 for (p = ForcedTokens->next; p!=NULL; p=p->next)
296 {
297 TermEntry *te;
298 int old_pos, t;
299
300 q = (ForcedToken *) p->elem;
301 fprintf(stderr, "%s forced to %d\n", q->token, q->tnum);
302 te = (TermEntry *) hash_get(Tname, q->token);
303 require(te!=NULL, "RemapForcedTokens: token not in hash table");
304 old_pos = te->token;
305 fprintf(stderr, "Before: TokenInd[old_pos==%d] is %d\n", old_pos, TokenInd[old_pos]);
306 fprintf(stderr, "Before: TokenInd[target==%d] is %d\n", q->tnum, TokenInd[q->tnum]);
307 q = (ForcedToken *) p->elem;
308 t = TokenInd[old_pos];
309 TokenInd[old_pos] = q->tnum;
310 TokenInd[q->tnum] = t;
311 te->token = q->tnum; /* update token type id symbol table */
312 fprintf(stderr, "After: TokenInd[old_pos==%d] is %d\n", old_pos, TokenInd[old_pos]);
313 fprintf(stderr, "After: TokenInd[target==%d] is %d\n", q->tnum, TokenInd[q->tnum]);
314
315 /* Change the token number in the sym tab entry for the exprs
316 * at the old position of the token id and the target position
317 */
318 /* update expr at target (if any) of forced token id */
319 if ( q->tnum < TokenNum ) /* is it a valid position? */
320 {
321 for (i=0; i<NumLexClasses; i++)
322 {
323 if ( lclass[i].exprs[q->tnum]!=NULL )
324 {
325 /* update the symbol table for this expr */
326 TermEntry *e = (TermEntry *) hash_get(lclass[i].htable, lclass[i].exprs[q->tnum]);
327 require(e!=NULL, "RemapForcedTokens: expr not in hash table");
328 e->token = old_pos;
329 fprintf(stderr, "found expr '%s' at target %d in lclass[%d]; changed to %d\n",
330 lclass[i].exprs[q->tnum], q->tnum, i, old_pos);
331 }
332 }
333 }
334 /* update expr at old position (if any) of forced token id */
335 for (i=0; i<NumLexClasses; i++)
336 {
337 if ( lclass[i].exprs[old_pos]!=NULL )
338 {
339 /* update the symbol table for this expr */
340 TermEntry *e = (TermEntry *) hash_get(lclass[i].htable, lclass[i].exprs[old_pos]);
341 require(e!=NULL, "RemapForcedTokens: expr not in hash table");
342 e->token = q->tnum;
343 fprintf(stderr, "found expr '%s' for id %s in lclass[%d]; changed to %d\n",
344 lclass[i].exprs[old_pos], q->token, i, q->tnum);
345 }
346 }
347 }
348
349 /* Update SynDiag */
350 RemapForcedTokensInSyntaxDiagram((Node *)SynDiag);
351 }
352
353 static void
354 #ifdef __USE_PROTOS
RemapForcedTokensInSyntaxDiagram(Node * p)355 RemapForcedTokensInSyntaxDiagram(Node *p)
356 #else
357 RemapForcedTokensInSyntaxDiagram(p)
358 Node *p;
359 #endif
360 {
361 Junction *j = (Junction *) p;
362 RuleRefNode *r = (RuleRefNode *) p;
363 TokNode *t = (TokNode *)p;
364
365 if ( p==NULL ) return;
366 require(p->ntype>=1 && p->ntype<=NumNodeTypes, "Remap...: invalid diagram node");
367 switch ( p->ntype )
368 {
369 case nJunction :
370 if ( j->visited ) return;
371 if ( j->jtype == EndRule ) return;
372 j->visited = TRUE;
373 RemapForcedTokensInSyntaxDiagram( j->p1 );
374 RemapForcedTokensInSyntaxDiagram( j->p2 );
375 j->visited = FALSE;
376 return;
377 case nRuleRef :
378 RemapForcedTokensInSyntaxDiagram( r->next );
379 return;
380 case nToken :
381 if ( t->remapped ) return; /* we've been here before */
382 t->remapped = 1;
383 fprintf(stderr, "remapping %d to %d\n", t->token, TokenInd[t->token]);
384 t->token = TokenInd[t->token];
385 RemapForcedTokensInSyntaxDiagram( t->next );
386 return;
387 case nAction :
388 RemapForcedTokensInSyntaxDiagram( ((ActionNode *)p)->next );
389 return;
390 default :
391 fatal_internal("invalid node type");
392 }
393 }
394
395 /*
396 * Add a token name. Return the token number associated with it. If it already
397 * exists, then return the token number assigned to it.
398 *
399 * Track the order in which tokens are found so that the DLG output maintains
400 * that order. It also lets us map token numbers to strings.
401 */
402 int
403 #ifdef __USE_PROTOS
addTname(char * token)404 addTname( char *token )
405 #else
406 addTname( token )
407 char *token;
408 #endif
409 {
410 TermEntry *p;
411 require(token!=NULL, "addTname: invalid token name");
412
413 if ( (p=(TermEntry *)hash_get(Tname, token)) != NULL ) return p->token;
414 p = newTermEntry( token );
415 Ttrack( p->str );
416 p->token = TokenNum++;
417 hash_add(Tname, token, (Entry *)p);
418 return p->token;
419 }
420
421 /* This is the same as addTname except we force the TokenNum to be tnum.
422 * We don't have to use the Forced token stuff as no tokens will have
423 * been defined with #tokens when this is called. This is only called
424 * when a #tokdefs meta-op is used.
425 */
426 int
427 #ifdef __USE_PROTOS
addForcedTname(char * token,int tnum)428 addForcedTname( char *token, int tnum )
429 #else
430 addForcedTname( token, tnum )
431 char *token;
432 int tnum;
433 #endif
434 {
435 TermEntry *p;
436 require(token!=NULL, "addTname: invalid token name");
437
438 if ( (p=(TermEntry *)hash_get(Tname, token)) != NULL ) return p->token;
439 p = newTermEntry( token );
440 Ttrack( p->str );
441 p->token = tnum;
442 hash_add(Tname, token, (Entry *)p);
443 return p->token;
444 }
445
446 /*
447 * Add a token expr. Return the token number associated with it. If it already
448 * exists, then return the token number assigned to it.
449 */
450 int
451 #ifdef __USE_PROTOS
addTexpr(char * expr)452 addTexpr( char *expr )
453 #else
454 addTexpr( expr )
455 char *expr;
456 #endif
457 {
458 TermEntry *p;
459 require(expr!=NULL, "addTexpr: invalid regular expression");
460
461 if ( (p=(TermEntry *)hash_get(Texpr, expr)) != NULL ) return p->token;
462 p = newTermEntry( expr );
463 Ttrack( p->str );
464 /* track the order in which they occur */
465 list_add(&ExprOrder, (void *)newExpr(p->str));
466 p->token = TokenNum++;
467 hash_add(Texpr, expr, (Entry *)p);
468 return p->token;
469 }
470
471 /* return the token number of 'term'. Return 0 if no 'term' exists */
472 int
473 #ifdef __USE_PROTOS
Tnum(char * term)474 Tnum( char *term )
475 #else
476 Tnum( term )
477 char *term;
478 #endif
479 {
480 TermEntry *p;
481 require(term!=NULL, "Tnum: invalid terminal");
482
483 if ( *term=='"' ) p = (TermEntry *) hash_get(Texpr, term);
484 else p = (TermEntry *) hash_get(Tname, term);
485 if ( p == NULL ) return 0;
486 else return p->token;
487 }
488
489 /* associate a Name with an expr. If both have been already assigned
490 * token numbers, then an error is reported. Add the token or expr
491 * that has not been added if no error. This 'represents' the #token
492 * ANTLR pseudo-op. If both have not been defined, define them both
493 * linked to same token number.
494 */
495 void
496 #ifdef __USE_PROTOS
Tklink(char * token,char * expr)497 Tklink( char *token, char *expr )
498 #else
499 Tklink( token, expr )
500 char *token;
501 char *expr;
502 #endif
503 {
504 TermEntry *p, *q;
505 require(token!=NULL && expr!=NULL, "Tklink: invalid token name and/or expr");
506
507 p = (TermEntry *) hash_get(Tname, token);
508 q = (TermEntry *) hash_get(Texpr, expr);
509 if ( p != NULL && q != NULL ) /* both defined */
510 {
511 warn( eMsg2("token name %s and rexpr %s already defined; ignored",
512 token, expr) );
513 return;
514 }
515 if ( p==NULL && q==NULL ) /* both not defined */
516 {
517 int t = addTname( token );
518 q = newTermEntry( expr );
519 hash_add(Texpr, expr, (Entry *)q);
520 q->token = t;
521 /* note: we use the actual ExprStr array
522 * here as TokenInd doesn't exist yet
523 */
524 ExprStr[t] = q->str;
525 /* track the order in which they occur */
526 list_add(&ExprOrder, (void *)newExpr(q->str));
527 return;
528 }
529 if ( p != NULL ) /* one is defined, one is not */
530 {
531 q = newTermEntry( expr );
532 hash_add(Texpr, expr, (Entry *)q);
533 q->token = p->token;
534 ExprStr[p->token] = q->str; /* both expr and token str defined now */
535 list_add(&ExprOrder, (void *)newExpr(q->str));
536 }
537 else /* trying to associate name with expr here*/
538 {
539 p = newTermEntry( token );
540 hash_add(Tname, token, (Entry *)p);
541 p->token = q->token;
542 TokenStr[p->token] = p->str;/* both expr and token str defined now */
543 }
544 }
545
546 /*
547 * Given a string, this function allocates and returns a pointer to a
548 * hash table record of size 'sz' whose "str" pointer is reset to a position
549 * in the string table.
550 */
551 Entry *
552 #ifdef __USE_PROTOS
newEntry(char * text,int sz)553 newEntry( char *text, int sz )
554 #else
555 newEntry( text, sz )
556 char *text;
557 int sz;
558 #endif
559 {
560 Entry *p;
561 require(text!=NULL, "new: NULL terminal");
562
563 if ( (p = (Entry *) calloc(1,sz)) == 0 )
564 {
565 fatal_internal("newEntry: out of memory for terminals\n");
566 exit(PCCTS_EXIT_FAILURE);
567 }
568 p->str = mystrdup(text);
569
570 return(p);
571 }
572
573 /*
574 * add an element to a list.
575 *
576 * Any non-empty list has a sentinel node whose 'elem' pointer is really
577 * a pointer to the last element. (i.e. length(list) = #elemIn(list)+1).
578 * Elements are appended to the list.
579 */
580 void
581 #ifdef __USE_PROTOS
list_add(ListNode ** list,void * e)582 list_add( ListNode **list, void *e )
583 #else
584 list_add( list, e )
585 ListNode **list;
586 void *e;
587 #endif
588 {
589 ListNode *p, *tail;
590 require(e!=NULL, "list_add: attempting to add NULL list element");
591
592 p = newListNode;
593 require(p!=NULL, "list_add: cannot alloc new list node");
594 p->elem = e;
595 if ( *list == NULL )
596 {
597 ListNode *sentinel = newListNode;
598 require(sentinel!=NULL, "list_add: cannot alloc sentinel node");
599 *list=sentinel;
600 sentinel->next = p;
601 sentinel->elem = (char *)p; /* set tail pointer */
602 }
603 else /* find end of list */
604 {
605 tail = (ListNode *) (*list)->elem; /* get tail pointer */
606 tail->next = p;
607 (*list)->elem = (char *) p; /* reset tail */
608 }
609 }
610
611 /* MR10 list_free() frees the ListNode elements in the list */
612 /* MR10 if freeData then free the data elements of the list too */
613
614 void
615 #ifdef __USE_PROTOS
list_free(ListNode ** list,int freeData)616 list_free(ListNode **list,int freeData)
617 #else
618 list_free(list,freeData)
619 ListNode **list;
620 int freeData;
621 #endif
622 {
623 ListNode *p;
624 ListNode *next;
625
626 if (list == NULL) return;
627 if (*list == NULL) return;
628 for (p=*list; p != NULL; p=next) {
629 next=p->next;
630 if (freeData && p->elem != NULL) {
631 free( (char *) p->elem);
632 };
633 free( (char *) p);
634 };
635 *list=NULL;
636 }
637
638 void
639 #ifdef __USE_PROTOS
list_apply(ListNode * list,void (* f)(void *))640 list_apply( ListNode *list, void (*f)(void *) )
641 #else
642 list_apply( list, f )
643 ListNode *list;
644 void (*f)();
645 #endif
646 {
647 ListNode *p;
648 require(f!=NULL, "list_apply: NULL function to apply");
649
650 if ( list == NULL ) return;
651 for (p = list->next; p!=NULL; p=p->next) (*f)( p->elem );
652 }
653
654 /* MR27 */
655
656 #ifdef __USE_PROTOS
list_search_cstring(ListNode * list,char * cstring)657 int list_search_cstring(ListNode *list, char * cstring)
658 #else
659 int list_search_cstring(list, cstring)
660 ListNode * list;
661 char * cstring;
662 #endif
663 {
664 ListNode *p;
665 if (list == NULL ) return 0;
666 for (p = list->next; p!=NULL; p=p->next) {
667 if (p->elem == NULL) continue;
668 if (0 == strcmp((char *) p->elem , cstring)) return 1;
669 }
670 return 0;
671 }
672
673 /* F O L L O W C y c l e S t u f f */
674
675 /* make a key based upon (rulename, computation, k value).
676 * Computation values are 'i'==FIRST, 'o'==FOLLOW.
677 */
678
679 /* MR10 Make the key all characters so it can be read easily */
680 /* MR10 by a simple dump program. Also, separates */
681 /* MR10 'o' and 'i' from rule name */
682
683 char *
684 #ifdef __USE_PROTOS
Fkey(char * rule,int computation,int k)685 Fkey( char *rule, int computation, int k )
686 #else
687 Fkey( rule, computation, k )
688 char *rule;
689 int computation;
690 int k;
691 #endif
692 {
693 static char key[MaxRuleName+2+2+1]; /* MR10 */
694 int i;
695
696 if ( k > 99 ) /* MR10 */
697 fatal("k>99 is too big for this implementation of ANTLR!\n"); /* MR10 */
698 if ( (i=strlen(rule)) > MaxRuleName ) /* MR10 */
699 fatal( eMsgd("rule name > max of %d\n", MaxRuleName) ); /* MR10 */
700 strcpy(key,rule);
701
702 /* MR10 */ key[i]='*';
703 /* MR10 */ key[i+1] = (char) computation; /* MR20 G. Hobbelt */
704 /* MR10 */ if (k < 10) {
705 /* MR10 */ key[i+2] = (char) ( '0' + k);
706 /* MR10 */ key[i+3] = '\0';
707 /* MR10 */ } else {
708 /* MR10 */ key[i+2] = (char) ( '0' + k/10);
709 /* MR10 */ key[i+3] = (char) ( '0' + k % 10);
710 /* MR10 */ key[i+4] = '\0';
711 /* MR10 */ };
712
713 return key;
714 }
715
716 /* Push a rule onto the kth FOLLOW stack */
717 void
718 #ifdef __USE_PROTOS
FoPush(char * rule,int k)719 FoPush( char *rule, int k )
720 #else
721 FoPush( rule, k )
722 char *rule;
723 int k;
724 #endif
725 {
726 RuleEntry *r;
727 require(rule!=NULL, "FoPush: tried to push NULL rule");
728 require(k<=CLL_k, "FoPush: tried to access non-existent stack");
729
730 /*fprintf(stderr, "FoPush(%s)\n", rule);*/
731 r = (RuleEntry *) hash_get(Rname, rule);
732 if ( r == NULL ) {fatal_internal( eMsg1("rule %s must be defined but isn't", rule) );}
733 if ( FoStack[k] == NULL ) /* Does the kth stack exist yet? */
734 {
735 /*fprintf(stderr, "allocating FoStack\n");*/
736 FoStack[k] = (int *) calloc(FoStackSize, sizeof(int));
737 require(FoStack[k]!=NULL, "FoPush: cannot allocate FOLLOW stack\n");
738 }
739 if ( FoTOS[k] == NULL )
740 {
741 FoTOS[k]=FoStack[k];
742 *(FoTOS[k]) = r->rulenum;
743 }
744 else
745 {
746 #ifdef MEMCHK
747 require(valid(FoStack[k]), "FoPush: invalid FoStack");
748 #endif
749 if ( FoTOS[k] >= &(FoStack[k][FoStackSize-1]) )
750 fatal( eMsgd("exceeded max depth of FOLLOW recursion (%d)\n",
751 FoStackSize) );
752 require(FoTOS[k]>=FoStack[k],
753 eMsg1("FoPush: FoStack stack-ptr is playing out of its sandbox",
754 rule));
755 ++(FoTOS[k]);
756 *(FoTOS[k]) = r->rulenum;
757 }
758 {
759 /*
760 **** int *p;
761 **** fprintf(stderr, "FoStack[k=%d]:\n", k);
762 **** for (p=FoStack[k]; p<=FoTOS[k]; p++)
763 **** {
764 **** fprintf(stderr, "\t%s\n", RulePtr[*p]->rname);
765 **** }
766 */
767 }
768 }
769
770 /* Pop one rule off of the FOLLOW stack. TOS ptr is NULL if empty. */
771 void
772 #ifdef __USE_PROTOS
FoPop(int k)773 FoPop( int k )
774 #else
775 FoPop( k )
776 int k;
777 #endif
778 {
779 require(k<=CLL_k, "FoPop: tried to access non-existent stack");
780 /*fprintf(stderr, "FoPop\n");*/
781 require(FoTOS[k]>=FoStack[k]&&FoTOS[k]<=&(FoStack[k][FoStackSize-1]),
782 "FoPop: FoStack stack-ptr is playing out of its sandbox");
783 if ( FoTOS[k] == FoStack[k] ) FoTOS[k] = NULL;
784 else (FoTOS[k])--;
785 }
786
787 /* Compute FOLLOW cycle.
788 * Mark all FOLLOW sets for rules in cycle as incomplete.
789 * Then, save cycle on the cycle list (Cycles) for later resolution.
790 * The Cycle is stored in the form:
791 * (head of cycle==croot, rest of rules in cycle==cyclicDep)
792 *
793 * e.g. (Fo means "FOLLOW of", "-->" means requires or depends on)
794 *
795 * Fo(x)-->Fo(a)-->Fo(b)-->Fo(c)-->Fo(x)
796 * ^----Infinite recursion (cycle)
797 *
798 * the cycle would be: x -> {a,b,c} or stored as (x,{a,b,c}). Fo(x) depends
799 * on the FOLLOW of a,b, and c. The root of a cycle is always complete after
800 * Fo(x) finishes. Fo(a,b,c) however are not. It turns out that all rules
801 * in a FOLLOW cycle have the same FOLLOW set.
802 */
803 void
804 #ifdef __USE_PROTOS
RegisterCycle(char * rule,int k)805 RegisterCycle( char *rule, int k )
806 #else
807 RegisterCycle( rule, k )
808 char *rule;
809 int k;
810 #endif
811 {
812 CacheEntry *f;
813 Cycle *c;
814 int *p;
815 RuleEntry *r;
816 require(rule!=NULL, "RegisterCycle: tried to register NULL rule");
817 require(k<=CLL_k, "RegisterCycle: tried to access non-existent stack");
818
819 /*fprintf(stderr, "RegisterCycle(%s)\n", rule);*/
820 /* Find cycle start */
821 r = (RuleEntry *) hash_get(Rname, rule);
822 require(r!=NULL,eMsg1("rule %s must be defined but isn't", rule));
823 require(FoTOS[k]>=FoStack[k]&&FoTOS[k]<=&(FoStack[k][FoStackSize-1]),
824 eMsg1("RegisterCycle(%s): FoStack stack-ptr is playing out of its sandbox",
825 rule));
826 /*** if ( FoTOS[k]<FoStack[k]||FoTOS[k]>&(FoStack[k][FoStackSize-1]) )
827 **** {
828 **** fprintf(stderr, "RegisterCycle(%s): FoStack stack-ptr is playing out of its sandbox\n",
829 **** rule);
830 **** fprintf(stderr, "RegisterCycle: sp==0x%x out of bounds 0x%x...0x%x\n",
831 **** FoTOS[k], FoStack[k], &(FoStack[k][FoStackSize-1]));
832 **** exit(PCCTS_EXIT_FAILURE);
833 **** }
834 ****/
835
836 #ifdef MEMCHK
837 require(valid(FoStack[k]), "RegisterCycle: invalid FoStack");
838 #endif
839 for (p=FoTOS[k]; *p != r->rulenum && p >= FoStack[k]; --p) {;}
840 require(p>=FoStack[k], "RegisterCycle: FoStack is screwed up beyond belief");
841 if ( p == FoTOS[k] ) return; /* don't worry about cycles to oneself */
842
843 /* compute cyclic dependents (rules in cycle except head) */
844 c = newCycle;
845 require(c!=NULL, "RegisterCycle: couldn't alloc new cycle");
846 c->cyclicDep = empty;
847 c->croot = *p++; /* record root of cycle */
848 for (; p<=FoTOS[k]; p++)
849 {
850 /* Mark all dependent rules as incomplete */
851 f = (CacheEntry *) hash_get(Fcache, Fkey(RulePtr[*p]->rname,'o',k));
852 if ( f==NULL )
853 {
854 f = newCacheEntry( Fkey(RulePtr[*p]->rname,'o',k) );
855 hash_add(Fcache, Fkey(RulePtr[*p]->rname,'o',k), (Entry *)f);
856 }
857 f->incomplete = TRUE;
858
859 set_orel(*p, &(c->cyclicDep)); /* mark rule as dependent of croot */
860 }
861 list_add(&(Cycles[k]), (void *)c);
862 }
863
864 /* make all rules in cycle complete
865 *
866 * while ( some set has changed ) do
867 * for each cycle do
868 * if degree of FOLLOW set for croot > old degree then
869 * update all FOLLOW sets for rules in cyclic dependency
870 * change = TRUE
871 * endif
872 * endfor
873 * endwhile
874 */
875 void
876 #ifdef __USE_PROTOS
ResolveFoCycles(int k)877 ResolveFoCycles( int k )
878 #else
879 ResolveFoCycles( k )
880 int k;
881 #endif
882 {
883 ListNode *p, *q;
884 Cycle *c;
885 int changed = 1;
886 CacheEntry *f,*g;
887 int r;
888 /* int i; */ /* MR10 not useful */
889 unsigned d;
890
891 unsigned *cursor; /* MR10 */
892 unsigned *origin; /* MR10 */
893
894 /*fprintf(stderr, "Resolving following cycles for %d\n", k);*/
895 while ( changed )
896 {
897 changed = 0;
898 /* MR10 i = 0; */
899 for (p = Cycles[k]->next; p!=NULL; p=p->next)
900 {
901 c = (Cycle *) p->elem;
902 /*fprintf(stderr, "cycle %d: %s -->", i++, RulePtr[c->croot]->rname);*/
903 /*s_fprT(stderr, c->cyclicDep);*/
904 /*fprintf(stderr, "\n");*/
905 f = (CacheEntry *)
906 hash_get(Fcache, Fkey(RulePtr[c->croot]->rname,'o',k));
907 require(f!=NULL, eMsg1("FOLLOW(%s) must be in cache but isn't", RulePtr[c->croot]->rname) );
908 if ( (d=set_deg(f->fset)) > c->deg )
909 {
910 /*fprintf(stderr, "Fo(%s) has changed\n", RulePtr[c->croot]->rname);*/
911 changed = 1;
912 c->deg = d; /* update cycle FOLLOW set degree */
913
914 /* MR10 */ origin=set_pdq(c->cyclicDep);
915 /* MR10 */ for (cursor=origin; *cursor != nil; cursor++) {
916 /* MR10 */ r=*cursor;
917
918 /******** while ( !set_nil(c->cyclicDep) ) { *****/
919 /******** r = set_int(c->cyclicDep); *****/
920 /******** set_rm(r, c->cyclicDep); *****/
921
922 /*fprintf(stderr, "updating Fo(%s)\n", RulePtr[r]->rname);*/
923 g = (CacheEntry *)
924 hash_get(Fcache, Fkey(RulePtr[r]->rname,'o',k));
925 require(g!=NULL, eMsg1("FOLLOW(%s) must be in cache but isn't", RulePtr[r]->rname) );
926 set_orin(&(g->fset), f->fset);
927 g->incomplete = FALSE;
928 }
929 /* MR10 */ free( (char *) origin);
930 /* MR10 */ origin=NULL;
931 }
932 }
933 /* MR10 - this if statement appears to be meaningless since i is always 0 */
934 /* MR10 if ( i == 1 ) changed = 0; */ /* if only 1 cycle, no need to repeat */
935 }
936 /* kill Cycle list */
937 for (q = Cycles[k]->next; q != NULL; q=p)
938 {
939 p = q->next;
940 set_free( ((Cycle *)q->elem)->cyclicDep );
941 free((char *)q);
942 }
943 free( (char *)Cycles[k] );
944 Cycles[k] = NULL;
945 }
946
947
948 /* P r i n t i n g S y n t a x D i a g r a m s */
949
950 static void
951 #ifdef __USE_PROTOS
pBlk(Junction * q,int btype)952 pBlk( Junction *q, int btype )
953 #else
954 pBlk( q, btype )
955 Junction *q;
956 int btype;
957 #endif
958 {
959 int k,a;
960 Junction *alt, *p;
961
962 q->end->pvisited = TRUE;
963 if ( btype == aLoopBegin )
964 {
965 require(q->p2!=NULL, "pBlk: invalid ()* block");
966 PRINT(q->p1);
967 alt = (Junction *)q->p2;
968 PRINT(alt->p1);
969 if ( PrintAnnotate )
970 {
971 printf(" /* Opt ");
972 k = 1;
973 while ( !set_nil(alt->fset[k]) )
974 {
975 s_fprT(stdout, alt->fset[k]);
976 if ( k++ == CLL_k ) break;
977 if ( !set_nil(alt->fset[k]) ) printf(", ");
978 }
979 printf(" */\n");
980 }
981 return;
982 }
983 for (a=1,alt=q; alt != NULL; alt= (Junction *) alt->p2, a++)
984 {
985 if ( alt->p1 != NULL ) PRINT(alt->p1);
986 if ( PrintAnnotate )
987 {
988 printf( " /* [%d] ", alt->altnum);
989 k = 1;
990 while ( !set_nil(alt->fset[k]) )
991 {
992 s_fprT(stdout, alt->fset[k]);
993 if ( k++ == CLL_k ) break;
994 if ( !set_nil(alt->fset[k]) ) printf(", ");
995 }
996 if ( alt->p2 == NULL && btype == aOptBlk )
997 printf( " (optional branch) */\n");
998 else printf( " */\n");
999 }
1000
1001 /* ignore implied empty alt of Plus blocks */
1002 if ( alt->p2 != NULL && ((Junction *)alt->p2)->ignore ) break;
1003
1004 if ( alt->p2 != NULL && !(((Junction *)alt->p2)->p2==NULL && btype == aOptBlk) )
1005 {
1006 if ( pLevel == 1 )
1007 {
1008 printf("\n");
1009 if ( a+1==pAlt1 || a+1==pAlt2 ) printf("=>");
1010 printf("\t");
1011 }
1012 else printf(" ");
1013 printf("|");
1014 if ( pLevel == 1 )
1015 {
1016 p = (Junction *) ((Junction *)alt->p2)->p1;
1017 while ( p!=NULL )
1018 {
1019 if ( p->ntype==nAction )
1020 {
1021 p=(Junction *)((ActionNode *)p)->next;
1022 continue;
1023 }
1024 if ( p->ntype!=nJunction )
1025 {
1026 break;
1027 }
1028 if ( p->jtype==EndBlk || p->jtype==EndRule )
1029 {
1030 p = NULL;
1031 break;
1032 }
1033 p = (Junction *)p->p1;
1034 }
1035 if ( p==NULL ) printf("\n\t"); /* Empty alt? */
1036 }
1037 }
1038 }
1039 q->end->pvisited = FALSE;
1040 }
1041
1042 /* How to print out a junction */
1043 void
1044 #ifdef __USE_PROTOS
pJunc(Junction * q)1045 pJunc( Junction *q )
1046 #else
1047 pJunc( q )
1048 Junction *q;
1049 #endif
1050 {
1051 int dum_k;
1052 int doing_rule;
1053 require(q!=NULL, "pJunc: NULL node");
1054 require(q->ntype==nJunction, "pJunc: not junction");
1055
1056 if ( q->pvisited == TRUE ) return;
1057 q->pvisited = TRUE;
1058 switch ( q->jtype )
1059 {
1060 case aSubBlk :
1061 if ( PrintAnnotate ) First(q, 1, q->jtype, &dum_k);
1062 if ( q->end->p1 != NULL && ((Junction *)q->end->p1)->ntype==nJunction &&
1063 ((Junction *)q->end->p1)->jtype == EndRule ) doing_rule = 1;
1064 else doing_rule = 0;
1065 pLevel++;
1066 if ( pLevel==1 )
1067 {
1068 if ( pAlt1==1 ) printf("=>");
1069 printf("\t");
1070 }
1071 else printf(" ");
1072 if ( doing_rule )
1073 {
1074 if ( pLevel==1 ) printf(" ");
1075 pBlk(q,q->jtype);
1076 }
1077 else {
1078 printf("(");
1079 if ( pLevel==1 ) printf(" ");
1080 pBlk(q,q->jtype);
1081 if ( pLevel>1 ) printf(" ");
1082 printf(")");
1083 }
1084 if ( q->guess ) printf("?");
1085 pLevel--;
1086 if ( PrintAnnotate ) freeBlkFsets(q);
1087 if ( q->end->p1 != NULL ) PRINT(q->end->p1);
1088 break;
1089 case aOptBlk :
1090 if ( PrintAnnotate ) First(q, 1, q->jtype, &dum_k);
1091 pLevel++;
1092 if ( pLevel==1 )
1093 {
1094 if ( pAlt1==1 ) printf("=>");
1095 printf("\t");
1096 }
1097 else printf(" ");
1098 printf("{");
1099 if ( pLevel==1 ) printf(" ");
1100 pBlk(q,q->jtype);
1101 if ( pLevel>1 ) printf(" ");
1102 else printf("\n\t");
1103 printf("}");
1104 pLevel--;
1105 if ( PrintAnnotate ) freeBlkFsets(q);
1106 if ( q->end->p1 != NULL ) PRINT(q->end->p1);
1107 break;
1108 case aLoopBegin :
1109 if ( PrintAnnotate ) First(q, 1, q->jtype, &dum_k);
1110 pLevel++;
1111 if ( pLevel==1 )
1112 {
1113 if ( pAlt1==1 ) printf("=>");
1114 printf("\t");
1115 }
1116 else printf(" ");
1117 printf("(");
1118 if ( pLevel==1 ) printf(" ");
1119 pBlk(q,q->jtype);
1120 if ( pLevel>1 ) printf(" ");
1121 else printf("\n\t");
1122 printf(")*");
1123 pLevel--;
1124 if ( PrintAnnotate ) freeBlkFsets(q);
1125 if ( q->end->p1 != NULL ) PRINT(q->end->p1);
1126 break;
1127 case aLoopBlk :
1128 if ( PrintAnnotate ) First(q, 1, q->jtype, &dum_k);
1129 pBlk(q,q->jtype);
1130 if ( PrintAnnotate ) freeBlkFsets(q);
1131 break;
1132 case aPlusBlk :
1133 if ( PrintAnnotate ) First(q, 1, q->jtype, &dum_k);
1134 pLevel++;
1135 if ( pLevel==1 )
1136 {
1137 if ( pAlt1==1 ) printf("=>");
1138 printf("\t");
1139 }
1140 else printf(" ");
1141 printf("(");
1142 if ( pLevel==1 ) printf(" ");
1143 pBlk(q,q->jtype);
1144 if ( pLevel>1 ) printf(" ");
1145 printf(")+");
1146 pLevel--;
1147 if ( PrintAnnotate ) freeBlkFsets(q);
1148 if ( q->end->p1 != NULL ) PRINT(q->end->p1);
1149 break;
1150 case EndBlk :
1151 break;
1152 case RuleBlk :
1153 printf( "\n%s :\n", q->rname);
1154 PRINT(q->p1);
1155 if ( q->p2 != NULL ) PRINT(q->p2);
1156 break;
1157 case Generic :
1158 if ( q->p1 != NULL ) PRINT(q->p1);
1159 q->pvisited = FALSE;
1160 if ( q->p2 != NULL ) PRINT(q->p2);
1161 break;
1162 case EndRule :
1163 printf( "\n\t;\n");
1164 break;
1165 }
1166 q->pvisited = FALSE;
1167 }
1168
1169 /* How to print out a rule reference node */
1170 void
1171 #ifdef __USE_PROTOS
pRuleRef(RuleRefNode * p)1172 pRuleRef( RuleRefNode *p )
1173 #else
1174 pRuleRef( p )
1175 RuleRefNode *p;
1176 #endif
1177 {
1178 require(p!=NULL, "pRuleRef: NULL node");
1179 require(p->ntype==nRuleRef, "pRuleRef: not rule ref node");
1180
1181 printf( " %s", p->text);
1182 PRINT(p->next);
1183 }
1184
1185 /* How to print out a terminal node */
1186 void
1187 #ifdef __USE_PROTOS
pToken(TokNode * p)1188 pToken( TokNode *p )
1189 #else
1190 pToken( p )
1191 TokNode *p;
1192 #endif
1193 {
1194 require(p!=NULL, "pToken: NULL node");
1195 require(p->ntype==nToken, "pToken: not token node");
1196
1197 if ( p->wild_card ) printf(" .");
1198 printf( " %s", TerminalString(p->token));
1199 PRINT(p->next);
1200 }
1201
1202 /* How to print out a terminal node */
1203 void
1204 #ifdef __USE_PROTOS
pAction(ActionNode * p)1205 pAction( ActionNode *p )
1206 #else
1207 pAction( p )
1208 ActionNode *p;
1209 #endif
1210 {
1211 require(p!=NULL, "pAction: NULL node");
1212 require(p->ntype==nAction, "pAction: not action node");
1213
1214 PRINT(p->next);
1215 }
1216
1217 /* F i l l F o l l o w L i s t s */
1218
1219 /*
1220 * Search all rules for all rule reference nodes, q to rule, r.
1221 * Add q->next to follow list dangling off of rule r.
1222 * i.e.
1223 *
1224 * r: -o-R-o-->o--> Ptr to node following rule r in another rule
1225 * |
1226 * o--> Ptr to node following another reference to r.
1227 *
1228 * This is the data structure employed to avoid FOLLOW set computation. We
1229 * simply compute the FIRST (reach) of the EndRule Node which follows the
1230 * list found at the end of all rules which are referenced elsewhere. Rules
1231 * not invoked by other rules have no follow list (r->end->p1==NULL).
1232 * Generally, only start symbols are not invoked by another rule.
1233 *
1234 * Note that this mechanism also gives a free cross-reference mechanism.
1235 *
1236 * The entire syntax diagram is layed out like this:
1237 *
1238 * SynDiag
1239 * |
1240 * v
1241 * o-->R1--o
1242 * |
1243 * o-->R2--o
1244 * |
1245 * ...
1246 * |
1247 * o-->Rn--o
1248 *
1249 */
1250 void
1251 #ifdef __USE_PROTOS
FoLink(Node * p)1252 FoLink( Node *p )
1253 #else
1254 FoLink( p )
1255 Node *p;
1256 #endif
1257 {
1258 RuleEntry *q;
1259 Junction *j = (Junction *) p;
1260 RuleRefNode *r = (RuleRefNode *) p;
1261
1262 if ( p==NULL ) return;
1263 require(p->ntype>=1 && p->ntype<=NumNodeTypes,
1264 eMsgd("FoLink: invalid diagram node: ntype==%d",p->ntype));
1265 switch ( p->ntype )
1266 {
1267 case nJunction :
1268 if ( j->fvisited ) return;
1269 if ( j->jtype == EndRule ) return;
1270 j->fvisited = TRUE;
1271 FoLink( j->p1 );
1272 FoLink( j->p2 );
1273 /* MR14 */
1274 /* MR14 */ /* Need to determine whether the guess block is an */
1275 /* MR14 */ /* of the form (alpha)? beta before follow sets are */
1276 /* MR14 */ /* computed. This is necessary to solve problem */
1277 /* MR14 */ /* of doing follow on the alpha of an (alpha)? beta block. */
1278 /* MR14 */
1279 /* MR14 */ /* This is performed by analysis_point as a side-effect. */
1280 /* MR14 */
1281 /* MR14 */
1282 /* MR14 */ if (j->jtype == aSubBlk && j->guess) {
1283 /* MR14 */ Junction *ignore;
1284 /* MR14 */ ignore=analysis_point(j);
1285 /* MR14 */ }
1286 /* MR14 */
1287 return;
1288 case nRuleRef :
1289 if ( r->linked ) return;
1290 q = (RuleEntry *) hash_get(Rname, r->text);
1291 if ( q == NULL )
1292 {
1293 warnFL( eMsg1("rule %s not defined",r->text), FileStr[r->file], r->line );
1294 }
1295 else
1296 {
1297 if ( r->parms!=NULL && RulePtr[q->rulenum]->pdecl==NULL )
1298 {
1299 warnFL( eMsg1("rule %s accepts no parameter(s)", r->text),
1300 FileStr[r->file], r->line );
1301 }
1302 if ( r->parms==NULL && RulePtr[q->rulenum]->pdecl!=NULL )
1303 {
1304 warnFL( eMsg1("rule %s requires parameter(s)", r->text),
1305 FileStr[r->file], r->line );
1306 }
1307 if ( r->assign!=NULL && RulePtr[q->rulenum]->ret==NULL )
1308 {
1309 warnFL( eMsg1("rule %s yields no return value(s)", r->text),
1310 FileStr[r->file], r->line );
1311 }
1312 if ( r->assign==NULL && RulePtr[q->rulenum]->ret!=NULL )
1313 {
1314 warnFL( eMsg1("rule %s returns a value(s)", r->text),
1315 FileStr[r->file], r->line );
1316 }
1317 if ( !r->linked )
1318 {
1319 addFoLink( r->next, r->rname, RulePtr[q->rulenum] );
1320 r->linked = TRUE;
1321 }
1322 }
1323 FoLink( r->next );
1324 return;
1325 case nToken :
1326 FoLink( ((TokNode *)p)->next );
1327 return;
1328 case nAction :
1329 FoLink( ((ActionNode *)p)->next );
1330 return;
1331 default :
1332 fatal_internal("invalid node type");
1333 }
1334 }
1335
1336 /*
1337 * Add a reference to the end of a rule.
1338 *
1339 * 'r' points to the RuleBlk node in a rule. r->end points to the last node
1340 * (EndRule jtype) in a rule.
1341 *
1342 * Initial:
1343 * r->end --> o
1344 *
1345 * After:
1346 * r->end --> o-->o--> Ptr to node following rule r in another rule
1347 * |
1348 * o--> Ptr to node following another reference to r.
1349 *
1350 * Note that the links are added to the head of the list so that r->end->p1
1351 * always points to the most recently added follow-link. At the end, it should
1352 * point to the last reference found in the grammar (starting from the 1st rule).
1353 */
1354 void
1355 #ifdef __USE_PROTOS
addFoLink(Node * p,char * rname,Junction * r)1356 addFoLink( Node *p, char *rname, Junction *r )
1357 #else
1358 addFoLink( p, rname, r )
1359 Node *p;
1360 char *rname;
1361 Junction *r;
1362 #endif
1363 {
1364 Junction *j;
1365 require(r!=NULL, "addFoLink: incorrect rule graph");
1366 require(r->end!=NULL, "addFoLink: incorrect rule graph");
1367 require(r->end->jtype==EndRule, "addFoLink: incorrect rule graph");
1368 require(p!=NULL, "addFoLink: NULL FOLLOW link");
1369
1370 j = newJunction();
1371 j->rname = rname; /* rname on follow links point to target rule */
1372 j->p1 = p; /* link to other rule */
1373 j->p2 = (Node *) r->end->p1;/* point to head of list */
1374 r->end->p1 = (Node *) j; /* reset head to point to new node */
1375 }
1376
1377 void
1378 #ifdef __USE_PROTOS
GenCrossRef(Junction * p)1379 GenCrossRef( Junction *p )
1380 #else
1381 GenCrossRef( p )
1382 Junction *p;
1383 #endif
1384 {
1385 set a;
1386 Junction *j;
1387 RuleEntry *q;
1388 unsigned e;
1389 require(p!=NULL, "GenCrossRef: why are you passing me a null grammar?");
1390
1391 printf("Cross Reference:\n\n");
1392 a = empty;
1393 for (; p!=NULL; p = (Junction *)p->p2)
1394 {
1395 printf("Rule %20s referenced by {", p->rname);
1396 /* make a set of rules for uniqueness */
1397 for (j = (Junction *)(p->end)->p1; j!=NULL; j = (Junction *)j->p2)
1398 {
1399 q = (RuleEntry *) hash_get(Rname, j->rname);
1400 require(q!=NULL, "GenCrossRef: FoLinks are screwed up");
1401 set_orel(q->rulenum, &a);
1402 }
1403 for (; !set_nil(a); set_rm(e, a))
1404 {
1405 e = set_int(a);
1406 printf(" %s", RulePtr[e]->rname);
1407 }
1408 printf(" }\n");
1409 }
1410 set_free( a );
1411 }
1412
1413 /*
1414 The single argument is a pointer to the start of an element of a
1415 C++ style function prototypet list. Given a pointer to the start of
1416 an formal we must locate the comma (or the end of the string)
1417 and locate the datatype, formal name, and initial value expression.
1418
1419 The function returns a pointer to the character following the comma
1420 which terminates the formal declaration, or a pointer to the end of
1421 the string if none was found.
1422
1423 I thought we were parsing specialists, how come I'm doing this by
1424 hand written code ?
1425
1426 Examples of input:
1427
1428 Foo f,
1429 Foo f = Foo(1),
1430 Foo f = Foo(1,2),
1431 Foo f = &farray[1,2],
1432 Foo f = ",",
1433 Foo f = ',',
1434 TFoo<int,char> f = TFoo<int,char>(1,2),
1435
1436 A non-zero value for nesting indicates a problem matching '(' and ')',
1437 '[' and ']', '<' and '>', '{' and '}', or improperly terminated string
1438 or character literal.
1439
1440 */
1441
1442
1443 /*
1444 * Don't care if it is a valid string literal or not, just find the end
1445 * Start with pointer to leading "\""
1446 */
1447
1448 #ifdef __USE_PROTOS
skipStringLiteral(char * pCurrent)1449 char * skipStringLiteral(char *pCurrent)
1450 #else
1451 char * skipStringLiteral(pCurrent)
1452 char *pCurrent;
1453 #endif
1454 {
1455 char *p = pCurrent;
1456 if (*p == 0) return p;
1457 require (*p == '\"', "skipStringLiteral")
1458 p++;
1459 for (p = p; *p != 0; p++) {
1460 if (*p == '\\') {
1461 p++;
1462 if (*p == 0) break;
1463 p++;
1464 }
1465 if (*p == '\"') {
1466 p++;
1467 break;
1468 }
1469 }
1470 return p;
1471 }
1472
1473 /*
1474 * Don't care if it is a valid character literal or not, just find the end
1475 * Start with pointer to leading "'"
1476 */
1477
1478 #ifdef __USE_PROTOS
skipCharLiteral(char * pStart)1479 char * skipCharLiteral(char *pStart)
1480 #else
1481 char * skipCharLiteral(pStart)
1482 char *pStart;
1483 #endif
1484 {
1485 char *p = pStart;
1486 if (*p == 0) return p;
1487 require (*p == '\'', "skipCharLiteral")
1488 p++;
1489 for (p = p; *p != 0; p++) {
1490 if (*p == '\\') {
1491 p++;
1492 if (*p == 0) break;
1493 p++;
1494 }
1495 if (*p == '\'') {
1496 p++;
1497 break;
1498 }
1499 }
1500 return p;
1501 }
1502
1503 #ifdef __USE_PROTOS
skipSpaces(char * pStart)1504 char * skipSpaces(char *pStart)
1505 #else
1506 char * skipSpaces(pStart)
1507 char * pStart;
1508 #endif
1509 {
1510 char *p = pStart;
1511 while (*p != 0 && isspace(*p)) p++;
1512 return p;
1513 }
1514
1515 #ifdef __USE_PROTOS
skipToSeparatorOrEqualSign(char * pStart,int * pNest)1516 char * skipToSeparatorOrEqualSign(char *pStart, int *pNest)
1517 #else
1518 char * skipToSeparatorOrEqualSign(pStart, pNest)
1519 char *pStart;
1520 int *pNest;
1521 #endif
1522 {
1523 char *p = pStart;
1524
1525 int nest = 0;
1526
1527 *pNest = (-1);
1528
1529 while (*p != 0) {
1530 switch (*p) {
1531
1532 case '(' :
1533 case '[' :
1534 case '<' :
1535 case '{' :
1536 nest++;
1537 p++;
1538 break;
1539
1540 case ')' :
1541 case ']' :
1542 case '>' :
1543 case '}' :
1544 nest--;
1545 p++;
1546 break;
1547
1548 case '"' :
1549 p = skipStringLiteral(p);
1550 break;
1551
1552 case '\'' :
1553 p = skipCharLiteral(p);
1554 break;
1555
1556 case '\\':
1557 p++;
1558 if (*p == 0) goto EXIT;
1559 p++;
1560 break;
1561
1562 case ',':
1563 case '=':
1564 if (nest == 0) goto EXIT;
1565 p++;
1566 break;
1567
1568 default:
1569 p++;
1570 }
1571 }
1572 EXIT:
1573 *pNest = nest;
1574 return p;
1575 }
1576
1577 #ifdef __USE_PROTOS
skipToSeparator(char * pStart,int * pNest)1578 char * skipToSeparator(char *pStart, int *pNest)
1579 #else
1580 char * skipToSeparator(pStart, pNest)
1581 char *pStart;
1582 int *pNest;
1583 #endif
1584 {
1585 char * p = pStart;
1586 for ( ; ; ) {
1587 p = skipToSeparatorOrEqualSign(p, pNest);
1588 if (*pNest != 0) return p;
1589 if (*p == ',') return p;
1590 if (*p == 0) return p;
1591 p++;
1592 }
1593 }
1594
1595 /* skip to just past the "=" separating the declaration from the initialization value */
1596
1597 #ifdef __USE_PROTOS
getInitializer(char * pStart)1598 char * getInitializer(char *pStart)
1599 #else
1600 char * getInitializer(pStart)
1601 char * pStart;
1602 #endif
1603 {
1604 char *p;
1605 char *pDataType;
1606 char *pSymbol;
1607 char *pEqualSign;
1608 char *pValue;
1609 char *pSeparator;
1610 int nest = 0;
1611
1612 require(pStart!=NULL, "getInitializer: invalid string");
1613
1614 p = endFormal(pStart,
1615 &pDataType,
1616 &pSymbol,
1617 &pEqualSign,
1618 &pValue,
1619 &pSeparator,
1620 &nest);
1621 if (nest != 0) return NULL;
1622 if (pEqualSign == NULL) return NULL;
1623 if (pValue == NULL) return NULL;
1624 return strBetween(pValue, NULL, pSeparator);
1625 }
1626
1627 /*
1628 Examines the string from pStart to pEnd-1.
1629 If the string has 0 length or is entirely white space
1630 returns 1. Otherwise 0.
1631 */
1632
1633 #ifdef __USE_PROTOS
isWhiteString(const char * pStart,const char * pEnd)1634 int isWhiteString(const char *pStart, const char *pEnd)
1635 #else
1636 int isWhiteString(pStart, pEnd)
1637 const char *pStart;
1638 const char *pEnd;
1639 #endif
1640 {
1641 const char *p;
1642 for (p = pStart; p < pEnd; p++) {
1643 if (! isspace(*p)) return 0;
1644 }
1645 return 1;
1646 }
1647
1648 /*
1649 This replaces HasComma() which couldn't distinguish
1650
1651 foo ["a,b"]
1652
1653 from:
1654
1655 foo[a,b]
1656
1657 */
1658
1659 #ifdef __USE_PROTOS
hasMultipleOperands(char * pStart)1660 int hasMultipleOperands(char *pStart)
1661 #else
1662 int hasMultipleOperands(pStart)
1663 char *pStart;
1664 #endif
1665 {
1666 char *p = pStart;
1667 int nest = 0;
1668
1669 p = skipSpaces(p);
1670 if (*p == 0) return 0;
1671 p = skipToSeparator(p, &nest);
1672 if (nest == 0 && *p == ',') return 1;
1673 return 0;
1674 }
1675
1676
1677 #define MAX_STR_BETWEEN_WORK_AREA 1000
1678
1679 static char strBetweenWorkArea[MAX_STR_BETWEEN_WORK_AREA];
1680
1681
1682 /*
1683 strBetween(pStart, pNext, pStop)
1684
1685 Creates a null terminated string by copying the text between two pointers
1686 to a work area. The start of the string is pStart. The end of the string
1687 is the character before pNext, or if pNext is null then the character before
1688 pStop. Trailing spaces are not included in the copy operation.
1689
1690 This is used when a string contains several parts. The pNext part may be
1691 optional. The pStop will stop the scan when the optional part is not present
1692 (is a null pointer).
1693 */
1694
1695 #ifdef __USE_PROTOS
strBetween(char * pStart,char * pNext,char * pStop)1696 char *strBetween(char *pStart, char *pNext, char *pStop)
1697 #else
1698 char *strBetween(pStart, pNext, pStop)
1699 char *pStart;
1700 char *pNext;
1701 char *pStop;
1702 #endif
1703 {
1704 char *p;
1705 char *q = strBetweenWorkArea;
1706 const char *pEnd;
1707
1708 pEnd = (pNext != NULL) ? pNext : pStop;
1709
1710 require (pEnd != NULL, "pEnd == NULL");
1711 require (pEnd >= pStart, "pEnd < pStart");
1712 for (pEnd--; pEnd >= pStart; pEnd--) { /* MR31 */
1713 if (! isspace(*pEnd)) break;
1714 }
1715 for (p = pStart;
1716 p <= pEnd && q < &strBetweenWorkArea[MAX_STR_BETWEEN_WORK_AREA-2];
1717 p++, q++) {
1718 *q = *p;
1719 }
1720 *q = 0;
1721 return strBetweenWorkArea;
1722 }
1723
1724 /*
1725 function Returns pointer to character following separator at
1726 value which to continue search for next formal. If at the
1727 end of the string a pointer to the null byte at the
1728 end of the string is returned.
1729
1730 pStart Pointer to the starting position of the formal list
1731
1732 This may be the middle of a longer string, for example
1733 when looking for the end of formal #3 starting from
1734 the middle of the complete formal list.
1735
1736 ppDataType Returns a pointer to the start of the data type in the
1737 formal. Example: pointer to "Foo".
1738
1739 ppSymbol Returns a pointer to the start of the formal symbol.
1740 Example: pointer to "f".
1741
1742 ppEqualSign Returns a pointer to the equal sign separating the
1743 formal symbol from the initial value. If there is
1744 no "=" then this will be NULL.
1745
1746 ppValue Returns a pointer to the initial value part of the
1747 formal declaration. Example: pointer to "&farray[1,2]"
1748
1749 ppSeparator Returns a pointer to the character which terminated the
1750 scan. This should be a pointer to a comma or a null
1751 byte which terminates the string.
1752
1753 pNest Returns the nesting level when a separator was found.
1754 This is non-zero for any kind of error. This is zero
1755 for a successful parse of this portion of the formal
1756 list.
1757
1758 */
1759
1760 #ifdef __USE_PROTOS
endFormal(char * pStart,char ** ppDataType,char ** ppSymbol,char ** ppEqualSign,char ** ppValue,char ** ppSeparator,int * pNest)1761 char * endFormal(char *pStart,
1762 char **ppDataType,
1763 char **ppSymbol,
1764 char **ppEqualSign,
1765 char **ppValue,
1766 char **ppSeparator,
1767 int *pNest)
1768 #else
1769 char * endFormal(pStart,
1770 ppDataType,
1771 ppSymbol,
1772 ppEqualSign,
1773 ppValue,
1774 ppSeparator,
1775 pNest)
1776 char *pStart;
1777 char **ppDataType;
1778 char **ppSymbol;
1779 char **ppEqualSign;
1780 char **ppValue;
1781 char **ppSeparator;
1782 int *pNest;
1783
1784 #endif
1785 {
1786 char *p = pStart;
1787 char *q;
1788
1789 *ppDataType = NULL;
1790 *ppSymbol = NULL;
1791 *ppEqualSign = NULL;
1792 *ppValue = NULL;
1793 *ppSeparator = NULL;
1794
1795 *pNest = 0;
1796
1797 /* The first non-blank is the start of the datatype */
1798
1799 p = skipSpaces(p);
1800 if (*p == 0) goto EXIT;
1801 *ppDataType = p;
1802
1803 /* We are not looking for the symbol, we are looking
1804 for the separator that follows the symbol. Then
1805 we'll back up.
1806
1807 Search for the ',' or '=" or null terminator.
1808 */
1809
1810 p = skipToSeparatorOrEqualSign(p, pNest);
1811
1812 if (*pNest != 0) goto EXIT;
1813
1814 /*
1815 Work backwards to find start of symbol
1816 Skip spaces between the end of symbol and separator
1817 Assume that there are no spaces in the formal, but
1818 there is a space preceding the formal
1819 */
1820
1821 for (q = &p[-1]; q >= *ppDataType; q--) {
1822 if (! isspace(*q)) break;
1823 }
1824 if (q < *ppDataType) goto EXIT;
1825
1826 /*
1827 MR26 Handle things like: IIR_Bool (IIR_Decl::*constraint)()
1828 Backup until we hit the end of a symbol string, then find the
1829 start of the symbol string. This wont' work for functions
1830 with prototypes, but works for the most common cases. For
1831 others, use typedef names.
1832 */
1833
1834 /* MR26 */ for (q = q; q >= *ppDataType; q--) {
1835 /* MR26 */ if (isalpha(*q) || isdigit(*q) || *q == '_' || *q == '$') break;
1836 /* MR26 */ }
1837 /* MR26 */ if (q < *ppDataType) goto EXIT;
1838
1839 for (q = q; q >= *ppDataType; q--) {
1840 if ( ! (isalpha(*q) || isdigit(*q) || *q == '_' || *q == '$')) break;
1841 }
1842
1843 *ppSymbol = &q[1];
1844
1845 if (*p == ',' || *p == 0) {
1846 *ppSeparator = p;
1847 goto EXIT;
1848 }
1849
1850 *ppEqualSign = p;
1851 p = skipSpaces(++p);
1852 *ppValue = p;
1853 if (*p == 0) goto EXIT;
1854
1855
1856 while (*p != 0 && *pNest == 0 && *p != ',') {
1857 p = skipToSeparator(p, pNest);
1858 }
1859 if (*pNest == 0) *ppSeparator = p;
1860
1861 EXIT:
1862 if (*p == ',') p++;
1863 return p;
1864 }
1865