• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 ** This file contains all sources (including headers) to the LEMON
3 ** LALR(1) parser generator.  The sources have been combined into a
4 ** single file to make it easy to include LEMON in the source tree
5 ** and Makefile of another program.
6 **
7 ** The author of this program disclaims copyright.
8 */
9 #include <stdio.h>
10 #include <stdarg.h>
11 #include <string.h>
12 #include <ctype.h>
13 #include <stdlib.h>
14 #include <assert.h>
15 
16 #ifndef __WIN32__
17 #   if defined(_WIN32) || defined(WIN32)
18 #	define __WIN32__
19 #   endif
20 #endif
21 
22 #ifdef __WIN32__
23 #ifdef __cplusplus
24 extern "C" {
25 #endif
26 extern int access(const char *path, int mode);
27 #ifdef __cplusplus
28 }
29 #endif
30 #else
31 #include <unistd.h>
32 #endif
33 
34 /* #define PRIVATE static */
35 #define PRIVATE
36 
37 #ifdef TEST
38 #define MAXRHS 5       /* Set low to exercise exception code */
39 #else
40 #define MAXRHS 1000
41 #endif
42 
43 static int showPrecedenceConflict = 0;
44 static const char **made_files = NULL;
45 static int made_files_count = 0;
46 static int successful_exit = 0;
LemonAtExit(void)47 static void LemonAtExit(void)
48 {
49     /* if we failed, delete (most) files we made, to unconfuse build tools. */
50     int i;
51     for (i = 0; i < made_files_count; i++) {
52         if (!successful_exit) {
53             remove(made_files[i]);
54         }
55     }
56     free(made_files);
57     made_files_count = 0;
58     made_files = NULL;
59 }
60 
61 static char *msort(char*,char**,int(*)(const char*,const char*));
62 
63 /*
64 ** Compilers are getting increasingly pedantic about type conversions
65 ** as C evolves ever closer to Ada....  To work around the latest problems
66 ** we have to define the following variant of strlen().
67 */
68 #define lemonStrlen(X)   ((int)strlen(X))
69 
70 /* a few forward declarations... */
71 struct rule;
72 struct lemon;
73 struct action;
74 
75 static struct action *Action_new(void);
76 static struct action *Action_sort(struct action *);
77 
78 /********** From the file "build.h" ************************************/
79 void FindRulePrecedences();
80 void FindFirstSets();
81 void FindStates();
82 void FindLinks();
83 void FindFollowSets();
84 void FindActions();
85 
86 /********* From the file "configlist.h" *********************************/
87 void Configlist_init(void);
88 struct config *Configlist_add(struct rule *, int);
89 struct config *Configlist_addbasis(struct rule *, int);
90 void Configlist_closure(struct lemon *);
91 void Configlist_sort(void);
92 void Configlist_sortbasis(void);
93 struct config *Configlist_return(void);
94 struct config *Configlist_basis(void);
95 void Configlist_eat(struct config *);
96 void Configlist_reset(void);
97 
98 /********* From the file "error.h" ***************************************/
99 void ErrorMsg(const char *, int,const char *, ...);
100 
101 /****** From the file "option.h" ******************************************/
102 enum option_type { OPT_FLAG=1,  OPT_INT,  OPT_DBL,  OPT_STR,
103          OPT_FFLAG, OPT_FINT, OPT_FDBL, OPT_FSTR};
104 struct s_options {
105   enum option_type type;
106   const char *label;
107   char *arg;
108   const char *message;
109 };
110 int    OptInit(char**,struct s_options*,FILE*);
111 int    OptNArgs(void);
112 char  *OptArg(int);
113 void   OptErr(int);
114 void   OptPrint(void);
115 
116 /******** From the file "parse.h" *****************************************/
117 void Parse(struct lemon *lemp);
118 
119 /********* From the file "plink.h" ***************************************/
120 struct plink *Plink_new(void);
121 void Plink_add(struct plink **, struct config *);
122 void Plink_copy(struct plink **, struct plink *);
123 void Plink_delete(struct plink *);
124 
125 /********** From the file "report.h" *************************************/
126 void Reprint(struct lemon *);
127 void ReportOutput(struct lemon *);
128 void ReportTable(struct lemon *, int);
129 void ReportHeader(struct lemon *);
130 void CompressTables(struct lemon *);
131 void ResortStates(struct lemon *);
132 
133 /********** From the file "set.h" ****************************************/
134 void  SetSize(int);             /* All sets will be of size N */
135 char *SetNew(void);               /* A new set for element 0..N */
136 void  SetFree(char*);             /* Deallocate a set */
137 
138 char *SetNew(void);               /* A new set for element 0..N */
139 int SetAdd(char*,int);            /* Add element to a set */
140 int SetUnion(char *,char *);    /* A <- A U B, thru element N */
141 #define SetFind(X,Y) (X[Y])       /* True if Y is in set X */
142 
143 /********** From the file "struct.h" *************************************/
144 /*
145 ** Principal data structures for the LEMON parser generator.
146 */
147 
148 typedef enum {LEMON_FALSE=0, LEMON_TRUE} Boolean;
149 
150 /* Symbols (terminals and nonterminals) of the grammar are stored
151 ** in the following: */
152 enum symbol_type {
153   TERMINAL,
154   NONTERMINAL,
155   MULTITERMINAL
156 };
157 enum e_assoc {
158     LEFT,
159     RIGHT,
160     NONE,
161     UNK
162 };
163 struct symbol {
164   const char *name;        /* Name of the symbol */
165   int index;               /* Index number for this symbol */
166   enum symbol_type type;   /* Symbols are all either TERMINALS or NTs */
167   struct rule *rule;       /* Linked list of rules of this (if an NT) */
168   struct symbol *fallback; /* fallback token in case this token doesn't parse */
169   int prec;                /* Precedence if defined (-1 otherwise) */
170   enum e_assoc assoc;      /* Associativity if precedence is defined */
171   char *firstset;          /* First-set for all rules of this symbol */
172   Boolean lambda;          /* True if NT and can generate an empty string */
173   int useCnt;              /* Number of times used */
174   char *destructor;        /* Code which executes whenever this symbol is
175                            ** popped from the stack during error processing */
176   int destLineno;          /* Line number for start of destructor */
177   char *datatype;          /* The data type of information held by this
178                            ** object. Only used if type==NONTERMINAL */
179   int dtnum;               /* The data type number.  In the parser, the value
180                            ** stack is a union.  The .yy%d element of this
181                            ** union is the correct data type for this object */
182   /* The following fields are used by MULTITERMINALs only */
183   int nsubsym;             /* Number of constituent symbols in the MULTI */
184   struct symbol **subsym;  /* Array of constituent symbols */
185 };
186 
187 /* Each production rule in the grammar is stored in the following
188 ** structure.  */
189 struct rule {
190   struct symbol *lhs;      /* Left-hand side of the rule */
191   const char *lhsalias;    /* Alias for the LHS (NULL if none) */
192   int lhsStart;            /* True if left-hand side is the start symbol */
193   int ruleline;            /* Line number for the rule */
194   int nrhs;                /* Number of RHS symbols */
195   struct symbol **rhs;     /* The RHS symbols */
196   const char **rhsalias;   /* An alias for each RHS symbol (NULL if none) */
197   int line;                /* Line number at which code begins */
198   const char *code;        /* The code executed when this rule is reduced */
199   struct symbol *precsym;  /* Precedence symbol for this rule */
200   int index;               /* An index number for this rule */
201   Boolean canReduce;       /* True if this rule is ever reduced */
202   struct rule *nextlhs;    /* Next rule with the same LHS */
203   struct rule *next;       /* Next rule in the global list */
204 };
205 
206 /* A configuration is a production rule of the grammar together with
207 ** a mark (dot) showing how much of that rule has been processed so far.
208 ** Configurations also contain a follow-set which is a list of terminal
209 ** symbols which are allowed to immediately follow the end of the rule.
210 ** Every configuration is recorded as an instance of the following: */
211 enum cfgstatus {
212   COMPLETE,
213   INCOMPLETE
214 };
215 struct config {
216   struct rule *rp;         /* The rule upon which the configuration is based */
217   int dot;                 /* The parse point */
218   char *fws;               /* Follow-set for this configuration only */
219   struct plink *fplp;      /* Follow-set forward propagation links */
220   struct plink *bplp;      /* Follow-set backwards propagation links */
221   struct state *stp;       /* Pointer to state which contains this */
222   enum cfgstatus status;   /* used during followset and shift computations */
223   struct config *next;     /* Next configuration in the state */
224   struct config *bp;       /* The next basis configuration */
225 };
226 
227 enum e_action {
228   SHIFT,
229   ACCEPT,
230   REDUCE,
231   ERROR,
232   SSCONFLICT,              /* A shift/shift conflict */
233   SRCONFLICT,              /* Was a reduce, but part of a conflict */
234   RRCONFLICT,              /* Was a reduce, but part of a conflict */
235   SH_RESOLVED,             /* Was a shift.  Precedence resolved conflict */
236   RD_RESOLVED,             /* Was reduce.  Precedence resolved conflict */
237   NOT_USED                 /* Deleted by compression */
238 };
239 
240 /* Every shift or reduce operation is stored as one of the following */
241 struct action {
242   struct symbol *sp;       /* The look-ahead symbol */
243   enum e_action type;
244   union {
245     struct state *stp;     /* The new state, if a shift */
246     struct rule *rp;       /* The rule, if a reduce */
247   } x;
248   struct action *next;     /* Next action for this state */
249   struct action *collide;  /* Next action with the same hash */
250 };
251 
252 /* Each state of the generated parser's finite state machine
253 ** is encoded as an instance of the following structure. */
254 struct state {
255   struct config *bp;       /* The basis configurations for this state */
256   struct config *cfp;      /* All configurations in this set */
257   int statenum;            /* Sequential number for this state */
258   struct action *ap;       /* Array of actions for this state */
259   int nTknAct, nNtAct;     /* Number of actions on terminals and nonterminals */
260   int iTknOfst, iNtOfst;   /* yy_action[] offset for terminals and nonterms */
261   int iDflt;               /* Default action */
262 };
263 #define NO_OFFSET (-2147483647)
264 
265 /* A followset propagation link indicates that the contents of one
266 ** configuration followset should be propagated to another whenever
267 ** the first changes. */
268 struct plink {
269   struct config *cfp;      /* The configuration to which linked */
270   struct plink *next;      /* The next propagate link */
271 };
272 
273 /* The state vector for the entire parser generator is recorded as
274 ** follows.  (LEMON uses no global variables and makes little use of
275 ** static variables.  Fields in the following structure can be thought
276 ** of as begin global variables in the program.) */
277 struct lemon {
278   struct state **sorted;   /* Table of states sorted by state number */
279   struct rule *rule;       /* List of all rules */
280   int nstate;              /* Number of states */
281   int nrule;               /* Number of rules */
282   int nsymbol;             /* Number of terminal and nonterminal symbols */
283   int nterminal;           /* Number of terminal symbols */
284   struct symbol **symbols; /* Sorted array of pointers to symbols */
285   int errorcnt;            /* Number of errors */
286   struct symbol *errsym;   /* The error symbol */
287   struct symbol *wildcard; /* Token that matches anything */
288   char *name;              /* Name of the generated parser */
289   char *arg;               /* Declaration of the 3th argument to parser */
290   char *tokentype;         /* Type of terminal symbols in the parser stack */
291   char *vartype;           /* The default type of non-terminal symbols */
292   char *start;             /* Name of the start symbol for the grammar */
293   char *stacksize;         /* Size of the parser stack */
294   char *include;           /* Code to put at the start of the C file */
295   char *error;             /* Code to execute when an error is seen */
296   char *overflow;          /* Code to execute on a stack overflow */
297   char *failure;           /* Code to execute on parser failure */
298   char *accept;            /* Code to execute when the parser excepts */
299   char *extracode;         /* Code appended to the generated file */
300   char *tokendest;         /* Code to execute to destroy token data */
301   char *vardest;           /* Code for the default non-terminal destructor */
302   char *filename;          /* Name of the input file */
303   char *outname;           /* Name of the current output file */
304   char *tokenprefix;       /* A prefix added to token names in the .h file */
305   int nconflict;           /* Number of parsing conflicts */
306   int tablesize;           /* Size of the parse tables */
307   int basisflag;           /* Print only basis configurations */
308   int has_fallback;        /* True if any %fallback is seen in the grammar */
309   int nolinenosflag;       /* True if #line statements should not be printed */
310   char *argv0;             /* Name of the program */
311 };
312 
313 #define MemoryCheck(X) if((X)==0){ \
314   extern void memory_error(); \
315   memory_error(); \
316 }
317 
318 /**************** From the file "table.h" *********************************/
319 /*
320 ** All code in this file has been automatically generated
321 ** from a specification in the file
322 **              "table.q"
323 ** by the associative array code building program "aagen".
324 ** Do not edit this file!  Instead, edit the specification
325 ** file, then rerun aagen.
326 */
327 /*
328 ** Code for processing tables in the LEMON parser generator.
329 */
330 /* Routines for handling a strings */
331 
332 const char *Strsafe(const char *);
333 
334 void Strsafe_init(void);
335 int Strsafe_insert(const char *);
336 const char *Strsafe_find(const char *);
337 
338 /* Routines for handling symbols of the grammar */
339 
340 struct symbol *Symbol_new(const char *);
341 int Symbolcmpp(const void *, const void *);
342 void Symbol_init(void);
343 int Symbol_insert(struct symbol *, const char *);
344 struct symbol *Symbol_find(const char *);
345 struct symbol *Symbol_Nth(int);
346 int Symbol_count(void);
347 struct symbol **Symbol_arrayof(void);
348 
349 /* Routines to manage the state table */
350 
351 int Configcmp(const char *, const char *);
352 struct state *State_new(void);
353 void State_init(void);
354 int State_insert(struct state *, struct config *);
355 struct state *State_find(struct config *);
356 struct state **State_arrayof(/*  */);
357 
358 /* Routines used for efficiency in Configlist_add */
359 
360 void Configtable_init(void);
361 int Configtable_insert(struct config *);
362 struct config *Configtable_find(struct config *);
363 void Configtable_clear(int(*)(struct config *));
364 
365 /****************** From the file "action.c" *******************************/
366 /*
367 ** Routines processing parser actions in the LEMON parser generator.
368 */
369 
370 /* Allocate a new parser action */
Action_new(void)371 static struct action *Action_new(void){
372   static struct action *freelist = 0;
373   struct action *newaction;
374 
375   if( freelist==0 ){
376     int i;
377     int amt = 100;
378     freelist = (struct action *)calloc(amt, sizeof(struct action));
379     if( freelist==0 ){
380       fprintf(stderr,"Unable to allocate memory for a new parser action.");
381       exit(1);
382     }
383     for(i=0; i<amt-1; i++) freelist[i].next = &freelist[i+1];
384     freelist[amt-1].next = 0;
385   }
386   newaction = freelist;
387   freelist = freelist->next;
388   return newaction;
389 }
390 
391 /* Compare two actions for sorting purposes.  Return negative, zero, or
392 ** positive if the first action is less than, equal to, or greater than
393 ** the first
394 */
actioncmp(struct action * ap1,struct action * ap2)395 static int actioncmp(
396   struct action *ap1,
397   struct action *ap2
398 ){
399   int rc;
400   rc = ap1->sp->index - ap2->sp->index;
401   if( rc==0 ){
402     rc = (int)ap1->type - (int)ap2->type;
403   }
404   if( rc==0 && ap1->type==REDUCE ){
405     rc = ap1->x.rp->index - ap2->x.rp->index;
406   }
407   if( rc==0 ){
408     rc = (int) (ap2 - ap1);
409   }
410   return rc;
411 }
412 
413 /* Sort parser actions */
Action_sort(struct action * ap)414 static struct action *Action_sort(
415   struct action *ap
416 ){
417   ap = (struct action *)msort((char *)ap,(char **)&ap->next,
418                               (int(*)(const char*,const char*))actioncmp);
419   return ap;
420 }
421 
Action_add(struct action ** app,enum e_action type,struct symbol * sp,char * arg)422 void Action_add(
423   struct action **app,
424   enum e_action type,
425   struct symbol *sp,
426   char *arg
427 ){
428   struct action *newaction;
429   newaction = Action_new();
430   newaction->next = *app;
431   *app = newaction;
432   newaction->type = type;
433   newaction->sp = sp;
434   if( type==SHIFT ){
435     newaction->x.stp = (struct state *)arg;
436   }else{
437     newaction->x.rp = (struct rule *)arg;
438   }
439 }
440 /********************** New code to implement the "acttab" module ***********/
441 /*
442 ** This module implements routines use to construct the yy_action[] table.
443 */
444 
445 /*
446 ** The state of the yy_action table under construction is an instance of
447 ** the following structure.
448 **
449 ** The yy_action table maps the pair (state_number, lookahead) into an
450 ** action_number.  The table is an array of integers pairs.  The state_number
451 ** determines an initial offset into the yy_action array.  The lookahead
452 ** value is then added to this initial offset to get an index X into the
453 ** yy_action array. If the aAction[X].lookahead equals the value of the
454 ** of the lookahead input, then the value of the action_number output is
455 ** aAction[X].action.  If the lookaheads do not match then the
456 ** default action for the state_number is returned.
457 **
458 ** All actions associated with a single state_number are first entered
459 ** into aLookahead[] using multiple calls to acttab_action().  Then the
460 ** actions for that single state_number are placed into the aAction[]
461 ** array with a single call to acttab_insert().  The acttab_insert() call
462 ** also resets the aLookahead[] array in preparation for the next
463 ** state number.
464 */
465 struct lookahead_action {
466   int lookahead;             /* Value of the lookahead token */
467   int action;                /* Action to take on the given lookahead */
468 };
469 typedef struct acttab acttab;
470 struct acttab {
471   int nAction;                 /* Number of used slots in aAction[] */
472   int nActionAlloc;            /* Slots allocated for aAction[] */
473   struct lookahead_action
474     *aAction,                  /* The yy_action[] table under construction */
475     *aLookahead;               /* A single new transaction set */
476   int mnLookahead;             /* Minimum aLookahead[].lookahead */
477   int mnAction;                /* Action associated with mnLookahead */
478   int mxLookahead;             /* Maximum aLookahead[].lookahead */
479   int nLookahead;              /* Used slots in aLookahead[] */
480   int nLookaheadAlloc;         /* Slots allocated in aLookahead[] */
481 };
482 
483 /* Return the number of entries in the yy_action table */
484 #define acttab_size(X) ((X)->nAction)
485 
486 /* The value for the N-th entry in yy_action */
487 #define acttab_yyaction(X,N)  ((X)->aAction[N].action)
488 
489 /* The value for the N-th entry in yy_lookahead */
490 #define acttab_yylookahead(X,N)  ((X)->aAction[N].lookahead)
491 
492 /* Free all memory associated with the given acttab */
acttab_free(acttab * p)493 void acttab_free(acttab *p){
494   free( p->aAction );
495   free( p->aLookahead );
496   free( p );
497 }
498 
499 /* Allocate a new acttab structure */
acttab_alloc(void)500 acttab *acttab_alloc(void){
501   acttab *p = (acttab *) calloc( 1, sizeof(*p) );
502   if( p==0 ){
503     fprintf(stderr,"Unable to allocate memory for a new acttab.");
504     exit(1);
505   }
506   memset(p, 0, sizeof(*p));
507   return p;
508 }
509 
510 /* Add a new action to the current transaction set.
511 **
512 ** This routine is called once for each lookahead for a particular
513 ** state.
514 */
acttab_action(acttab * p,int lookahead,int action)515 void acttab_action(acttab *p, int lookahead, int action){
516   if( p->nLookahead>=p->nLookaheadAlloc ){
517     p->nLookaheadAlloc += 25;
518     p->aLookahead = (struct lookahead_action *) realloc( p->aLookahead,
519                              sizeof(p->aLookahead[0])*p->nLookaheadAlloc );
520     if( p->aLookahead==0 ){
521       fprintf(stderr,"malloc failed\n");
522       exit(1);
523     }
524   }
525   if( p->nLookahead==0 ){
526     p->mxLookahead = lookahead;
527     p->mnLookahead = lookahead;
528     p->mnAction = action;
529   }else{
530     if( p->mxLookahead<lookahead ) p->mxLookahead = lookahead;
531     if( p->mnLookahead>lookahead ){
532       p->mnLookahead = lookahead;
533       p->mnAction = action;
534     }
535   }
536   p->aLookahead[p->nLookahead].lookahead = lookahead;
537   p->aLookahead[p->nLookahead].action = action;
538   p->nLookahead++;
539 }
540 
541 /*
542 ** Add the transaction set built up with prior calls to acttab_action()
543 ** into the current action table.  Then reset the transaction set back
544 ** to an empty set in preparation for a new round of acttab_action() calls.
545 **
546 ** Return the offset into the action table of the new transaction.
547 */
acttab_insert(acttab * p)548 int acttab_insert(acttab *p){
549   int i, j, k, n;
550   assert( p->nLookahead>0 );
551 
552   /* Make sure we have enough space to hold the expanded action table
553   ** in the worst case.  The worst case occurs if the transaction set
554   ** must be appended to the current action table
555   */
556   n = p->mxLookahead + 1;
557   if( p->nAction + n >= p->nActionAlloc ){
558     int oldAlloc = p->nActionAlloc;
559     p->nActionAlloc = p->nAction + n + p->nActionAlloc + 20;
560     p->aAction = (struct lookahead_action *) realloc( p->aAction,
561                           sizeof(p->aAction[0])*p->nActionAlloc);
562     if( p->aAction==0 ){
563       fprintf(stderr,"malloc failed\n");
564       exit(1);
565     }
566     for(i=oldAlloc; i<p->nActionAlloc; i++){
567       p->aAction[i].lookahead = -1;
568       p->aAction[i].action = -1;
569     }
570   }
571 
572   /* Scan the existing action table looking for an offset that is a
573   ** duplicate of the current transaction set.  Fall out of the loop
574   ** if and when the duplicate is found.
575   **
576   ** i is the index in p->aAction[] where p->mnLookahead is inserted.
577   */
578   for(i=p->nAction-1; i>=0; i--){
579     if( p->aAction[i].lookahead==p->mnLookahead ){
580       /* All lookaheads and actions in the aLookahead[] transaction
581       ** must match against the candidate aAction[i] entry. */
582       if( p->aAction[i].action!=p->mnAction ) continue;
583       for(j=0; j<p->nLookahead; j++){
584         k = p->aLookahead[j].lookahead - p->mnLookahead + i;
585         if( k<0 || k>=p->nAction ) break;
586         if( p->aLookahead[j].lookahead!=p->aAction[k].lookahead ) break;
587         if( p->aLookahead[j].action!=p->aAction[k].action ) break;
588       }
589       if( j<p->nLookahead ) continue;
590 
591       /* No possible lookahead value that is not in the aLookahead[]
592       ** transaction is allowed to match aAction[i] */
593       n = 0;
594       for(j=0; j<p->nAction; j++){
595         if( p->aAction[j].lookahead<0 ) continue;
596         if( p->aAction[j].lookahead==j+p->mnLookahead-i ) n++;
597       }
598       if( n==p->nLookahead ){
599         break;  /* An exact match is found at offset i */
600       }
601     }
602   }
603 
604   /* If no existing offsets exactly match the current transaction, find an
605   ** an empty offset in the aAction[] table in which we can add the
606   ** aLookahead[] transaction.
607   */
608   if( i<0 ){
609     /* Look for holes in the aAction[] table that fit the current
610     ** aLookahead[] transaction.  Leave i set to the offset of the hole.
611     ** If no holes are found, i is left at p->nAction, which means the
612     ** transaction will be appended. */
613     for(i=0; i<p->nActionAlloc - p->mxLookahead; i++){
614       if( p->aAction[i].lookahead<0 ){
615         for(j=0; j<p->nLookahead; j++){
616           k = p->aLookahead[j].lookahead - p->mnLookahead + i;
617           if( k<0 ) break;
618           if( p->aAction[k].lookahead>=0 ) break;
619         }
620         if( j<p->nLookahead ) continue;
621         for(j=0; j<p->nAction; j++){
622           if( p->aAction[j].lookahead==j+p->mnLookahead-i ) break;
623         }
624         if( j==p->nAction ){
625           break;  /* Fits in empty slots */
626         }
627       }
628     }
629   }
630   /* Insert transaction set at index i. */
631   for(j=0; j<p->nLookahead; j++){
632     k = p->aLookahead[j].lookahead - p->mnLookahead + i;
633     p->aAction[k] = p->aLookahead[j];
634     if( k>=p->nAction ) p->nAction = k+1;
635   }
636   p->nLookahead = 0;
637 
638   /* Return the offset that is added to the lookahead in order to get the
639   ** index into yy_action of the action */
640   return i - p->mnLookahead;
641 }
642 
643 /********************** From the file "build.c" *****************************/
644 /*
645 ** Routines to construction the finite state machine for the LEMON
646 ** parser generator.
647 */
648 
649 /* Find a precedence symbol of every rule in the grammar.
650 **
651 ** Those rules which have a precedence symbol coded in the input
652 ** grammar using the "[symbol]" construct will already have the
653 ** rp->precsym field filled.  Other rules take as their precedence
654 ** symbol the first RHS symbol with a defined precedence.  If there
655 ** are not RHS symbols with a defined precedence, the precedence
656 ** symbol field is left blank.
657 */
FindRulePrecedences(struct lemon * xp)658 void FindRulePrecedences(struct lemon *xp)
659 {
660   struct rule *rp;
661   for(rp=xp->rule; rp; rp=rp->next){
662     if( rp->precsym==0 ){
663       int i, j;
664       for(i=0; i<rp->nrhs && rp->precsym==0; i++){
665         struct symbol *sp = rp->rhs[i];
666         if( sp->type==MULTITERMINAL ){
667           for(j=0; j<sp->nsubsym; j++){
668             if( sp->subsym[j]->prec>=0 ){
669               rp->precsym = sp->subsym[j];
670               break;
671             }
672           }
673         }else if( sp->prec>=0 ){
674           rp->precsym = rp->rhs[i];
675 	}
676       }
677     }
678   }
679   return;
680 }
681 
682 /* Find all nonterminals which will generate the empty string.
683 ** Then go back and compute the first sets of every nonterminal.
684 ** The first set is the set of all terminal symbols which can begin
685 ** a string generated by that nonterminal.
686 */
FindFirstSets(struct lemon * lemp)687 void FindFirstSets(struct lemon *lemp)
688 {
689   int i, j;
690   struct rule *rp;
691   int progress;
692 
693   for(i=0; i<lemp->nsymbol; i++){
694     lemp->symbols[i]->lambda = LEMON_FALSE;
695   }
696   for(i=lemp->nterminal; i<lemp->nsymbol; i++){
697     lemp->symbols[i]->firstset = SetNew();
698   }
699 
700   /* First compute all lambdas */
701   do{
702     progress = 0;
703     for(rp=lemp->rule; rp; rp=rp->next){
704       if( rp->lhs->lambda ) continue;
705       for(i=0; i<rp->nrhs; i++){
706          struct symbol *sp = rp->rhs[i];
707          if( sp->type!=TERMINAL || sp->lambda==LEMON_FALSE ) break;
708       }
709       if( i==rp->nrhs ){
710         rp->lhs->lambda = LEMON_TRUE;
711         progress = 1;
712       }
713     }
714   }while( progress );
715 
716   /* Now compute all first sets */
717   do{
718     struct symbol *s1, *s2;
719     progress = 0;
720     for(rp=lemp->rule; rp; rp=rp->next){
721       s1 = rp->lhs;
722       for(i=0; i<rp->nrhs; i++){
723         s2 = rp->rhs[i];
724         if( s2->type==TERMINAL ){
725           progress += SetAdd(s1->firstset,s2->index);
726           break;
727         }else if( s2->type==MULTITERMINAL ){
728           for(j=0; j<s2->nsubsym; j++){
729             progress += SetAdd(s1->firstset,s2->subsym[j]->index);
730           }
731           break;
732 	}else if( s1==s2 ){
733           if( s1->lambda==LEMON_FALSE ) break;
734 	}else{
735           progress += SetUnion(s1->firstset,s2->firstset);
736           if( s2->lambda==LEMON_FALSE ) break;
737 	}
738       }
739     }
740   }while( progress );
741   return;
742 }
743 
744 /* Compute all LR(0) states for the grammar.  Links
745 ** are added to between some states so that the LR(1) follow sets
746 ** can be computed later.
747 */
748 PRIVATE struct state *getstate(struct lemon *);  /* forward reference */
FindStates(struct lemon * lemp)749 void FindStates(struct lemon *lemp)
750 {
751   struct symbol *sp;
752   struct rule *rp;
753 
754   Configlist_init();
755 
756   /* Find the start symbol */
757   if( lemp->start ){
758     sp = Symbol_find(lemp->start);
759     if( sp==0 ){
760       ErrorMsg(lemp->filename,0,
761 "The specified start symbol \"%s\" is not \
762 in a nonterminal of the grammar.  \"%s\" will be used as the start \
763 symbol instead.",lemp->start,lemp->rule->lhs->name);
764       lemp->errorcnt++;
765       sp = lemp->rule->lhs;
766     }
767   }else{
768     sp = lemp->rule->lhs;
769   }
770 
771   /* Make sure the start symbol doesn't occur on the right-hand side of
772   ** any rule.  Report an error if it does.  (YACC would generate a new
773   ** start symbol in this case.) */
774   for(rp=lemp->rule; rp; rp=rp->next){
775     int i;
776     for(i=0; i<rp->nrhs; i++){
777       if( rp->rhs[i]==sp ){   /* FIX ME:  Deal with multiterminals */
778         ErrorMsg(lemp->filename,0,
779 "The start symbol \"%s\" occurs on the \
780 right-hand side of a rule. This will result in a parser which \
781 does not work properly.",sp->name);
782         lemp->errorcnt++;
783       }
784     }
785   }
786 
787   /* The basis configuration set for the first state
788   ** is all rules which have the start symbol as their
789   ** left-hand side */
790   for(rp=sp->rule; rp; rp=rp->nextlhs){
791     struct config *newcfp;
792     rp->lhsStart = 1;
793     newcfp = Configlist_addbasis(rp,0);
794     SetAdd(newcfp->fws,0);
795   }
796 
797   /* Compute the first state.  All other states will be
798   ** computed automatically during the computation of the first one.
799   ** The returned pointer to the first state is not used. */
800   (void)getstate(lemp);
801   return;
802 }
803 
804 /* Return a pointer to a state which is described by the configuration
805 ** list which has been built from calls to Configlist_add.
806 */
807 PRIVATE void buildshifts(struct lemon *, struct state *); /* Forwd ref */
getstate(struct lemon * lemp)808 PRIVATE struct state *getstate(struct lemon *lemp)
809 {
810   struct config *cfp, *bp;
811   struct state *stp;
812 
813   /* Extract the sorted basis of the new state.  The basis was constructed
814   ** by prior calls to "Configlist_addbasis()". */
815   Configlist_sortbasis();
816   bp = Configlist_basis();
817 
818   /* Get a state with the same basis */
819   stp = State_find(bp);
820   if( stp ){
821     /* A state with the same basis already exists!  Copy all the follow-set
822     ** propagation links from the state under construction into the
823     ** preexisting state, then return a pointer to the preexisting state */
824     struct config *x, *y;
825     for(x=bp, y=stp->bp; x && y; x=x->bp, y=y->bp){
826       Plink_copy(&y->bplp,x->bplp);
827       Plink_delete(x->fplp);
828       x->fplp = x->bplp = 0;
829     }
830     cfp = Configlist_return();
831     Configlist_eat(cfp);
832   }else{
833     /* This really is a new state.  Construct all the details */
834     Configlist_closure(lemp);    /* Compute the configuration closure */
835     Configlist_sort();           /* Sort the configuration closure */
836     cfp = Configlist_return();   /* Get a pointer to the config list */
837     stp = State_new();           /* A new state structure */
838     MemoryCheck(stp);
839     stp->bp = bp;                /* Remember the configuration basis */
840     stp->cfp = cfp;              /* Remember the configuration closure */
841     stp->statenum = lemp->nstate++; /* Every state gets a sequence number */
842     stp->ap = 0;                 /* No actions, yet. */
843     State_insert(stp,stp->bp);   /* Add to the state table */
844     buildshifts(lemp,stp);       /* Recursively compute successor states */
845   }
846   return stp;
847 }
848 
849 /*
850 ** Return true if two symbols are the same.
851 */
same_symbol(struct symbol * a,struct symbol * b)852 int same_symbol(struct symbol *a, struct symbol *b)
853 {
854   int i;
855   if( a==b ) return 1;
856   if( a->type!=MULTITERMINAL ) return 0;
857   if( b->type!=MULTITERMINAL ) return 0;
858   if( a->nsubsym!=b->nsubsym ) return 0;
859   for(i=0; i<a->nsubsym; i++){
860     if( a->subsym[i]!=b->subsym[i] ) return 0;
861   }
862   return 1;
863 }
864 
865 /* Construct all successor states to the given state.  A "successor"
866 ** state is any state which can be reached by a shift action.
867 */
buildshifts(struct lemon * lemp,struct state * stp)868 PRIVATE void buildshifts(struct lemon *lemp, struct state *stp)
869 {
870   struct config *cfp;  /* For looping thru the config closure of "stp" */
871   struct config *bcfp; /* For the inner loop on config closure of "stp" */
872   struct config *newcfg;  /* */
873   struct symbol *sp;   /* Symbol following the dot in configuration "cfp" */
874   struct symbol *bsp;  /* Symbol following the dot in configuration "bcfp" */
875   struct state *newstp; /* A pointer to a successor state */
876 
877   /* Each configuration becomes complete after it contibutes to a successor
878   ** state.  Initially, all configurations are incomplete */
879   for(cfp=stp->cfp; cfp; cfp=cfp->next) cfp->status = INCOMPLETE;
880 
881   /* Loop through all configurations of the state "stp" */
882   for(cfp=stp->cfp; cfp; cfp=cfp->next){
883     if( cfp->status==COMPLETE ) continue;    /* Already used by inner loop */
884     if( cfp->dot>=cfp->rp->nrhs ) continue;  /* Can't shift this config */
885     Configlist_reset();                      /* Reset the new config set */
886     sp = cfp->rp->rhs[cfp->dot];             /* Symbol after the dot */
887 
888     /* For every configuration in the state "stp" which has the symbol "sp"
889     ** following its dot, add the same configuration to the basis set under
890     ** construction but with the dot shifted one symbol to the right. */
891     for(bcfp=cfp; bcfp; bcfp=bcfp->next){
892       if( bcfp->status==COMPLETE ) continue;    /* Already used */
893       if( bcfp->dot>=bcfp->rp->nrhs ) continue; /* Can't shift this one */
894       bsp = bcfp->rp->rhs[bcfp->dot];           /* Get symbol after dot */
895       if( !same_symbol(bsp,sp) ) continue;      /* Must be same as for "cfp" */
896       bcfp->status = COMPLETE;                  /* Mark this config as used */
897       newcfg = Configlist_addbasis(bcfp->rp,bcfp->dot+1);
898       Plink_add(&newcfg->bplp,bcfp);
899     }
900 
901     /* Get a pointer to the state described by the basis configuration set
902     ** constructed in the preceding loop */
903     newstp = getstate(lemp);
904 
905     /* The state "newstp" is reached from the state "stp" by a shift action
906     ** on the symbol "sp" */
907     if( sp->type==MULTITERMINAL ){
908       int i;
909       for(i=0; i<sp->nsubsym; i++){
910         Action_add(&stp->ap,SHIFT,sp->subsym[i],(char*)newstp);
911       }
912     }else{
913       Action_add(&stp->ap,SHIFT,sp,(char *)newstp);
914     }
915   }
916 }
917 
918 /*
919 ** Construct the propagation links
920 */
FindLinks(struct lemon * lemp)921 void FindLinks(struct lemon *lemp)
922 {
923   int i;
924   struct config *cfp, *other;
925   struct state *stp;
926   struct plink *plp;
927 
928   /* Housekeeping detail:
929   ** Add to every propagate link a pointer back to the state to
930   ** which the link is attached. */
931   for(i=0; i<lemp->nstate; i++){
932     stp = lemp->sorted[i];
933     for(cfp=stp->cfp; cfp; cfp=cfp->next){
934       cfp->stp = stp;
935     }
936   }
937 
938   /* Convert all backlinks into forward links.  Only the forward
939   ** links are used in the follow-set computation. */
940   for(i=0; i<lemp->nstate; i++){
941     stp = lemp->sorted[i];
942     for(cfp=stp->cfp; cfp; cfp=cfp->next){
943       for(plp=cfp->bplp; plp; plp=plp->next){
944         other = plp->cfp;
945         Plink_add(&other->fplp,cfp);
946       }
947     }
948   }
949 }
950 
951 /* Compute all followsets.
952 **
953 ** A followset is the set of all symbols which can come immediately
954 ** after a configuration.
955 */
FindFollowSets(struct lemon * lemp)956 void FindFollowSets(struct lemon *lemp)
957 {
958   int i;
959   struct config *cfp;
960   struct plink *plp;
961   int progress;
962   int change;
963 
964   for(i=0; i<lemp->nstate; i++){
965     for(cfp=lemp->sorted[i]->cfp; cfp; cfp=cfp->next){
966       cfp->status = INCOMPLETE;
967     }
968   }
969 
970   do{
971     progress = 0;
972     for(i=0; i<lemp->nstate; i++){
973       for(cfp=lemp->sorted[i]->cfp; cfp; cfp=cfp->next){
974         if( cfp->status==COMPLETE ) continue;
975         for(plp=cfp->fplp; plp; plp=plp->next){
976           change = SetUnion(plp->cfp->fws,cfp->fws);
977           if( change ){
978             plp->cfp->status = INCOMPLETE;
979             progress = 1;
980 	  }
981 	}
982         cfp->status = COMPLETE;
983       }
984     }
985   }while( progress );
986 }
987 
988 static int resolve_conflict(struct action *,struct action *, struct symbol *);
989 
990 /* Compute the reduce actions, and resolve conflicts.
991 */
FindActions(struct lemon * lemp)992 void FindActions(struct lemon *lemp)
993 {
994   int i,j;
995   struct config *cfp;
996   struct state *stp;
997   struct symbol *sp;
998   struct rule *rp;
999 
1000   /* Add all of the reduce actions
1001   ** A reduce action is added for each element of the followset of
1002   ** a configuration which has its dot at the extreme right.
1003   */
1004   for(i=0; i<lemp->nstate; i++){   /* Loop over all states */
1005     stp = lemp->sorted[i];
1006     for(cfp=stp->cfp; cfp; cfp=cfp->next){  /* Loop over all configurations */
1007       if( cfp->rp->nrhs==cfp->dot ){        /* Is dot at extreme right? */
1008         for(j=0; j<lemp->nterminal; j++){
1009           if( SetFind(cfp->fws,j) ){
1010             /* Add a reduce action to the state "stp" which will reduce by the
1011             ** rule "cfp->rp" if the lookahead symbol is "lemp->symbols[j]" */
1012             Action_add(&stp->ap,REDUCE,lemp->symbols[j],(char *)cfp->rp);
1013           }
1014 	}
1015       }
1016     }
1017   }
1018 
1019   /* Add the accepting token */
1020   if( lemp->start ){
1021     sp = Symbol_find(lemp->start);
1022     if( sp==0 ) sp = lemp->rule->lhs;
1023   }else{
1024     sp = lemp->rule->lhs;
1025   }
1026   /* Add to the first state (which is always the starting state of the
1027   ** finite state machine) an action to ACCEPT if the lookahead is the
1028   ** start nonterminal.  */
1029   Action_add(&lemp->sorted[0]->ap,ACCEPT,sp,0);
1030 
1031   /* Resolve conflicts */
1032   for(i=0; i<lemp->nstate; i++){
1033     struct action *ap, *nap;
1034     struct state *stp;
1035     stp = lemp->sorted[i];
1036     /* assert( stp->ap ); */
1037     stp->ap = Action_sort(stp->ap);
1038     for(ap=stp->ap; ap && ap->next; ap=ap->next){
1039       for(nap=ap->next; nap && nap->sp==ap->sp; nap=nap->next){
1040          /* The two actions "ap" and "nap" have the same lookahead.
1041          ** Figure out which one should be used */
1042          lemp->nconflict += resolve_conflict(ap,nap,lemp->errsym);
1043       }
1044     }
1045   }
1046 
1047   /* Report an error for each rule that can never be reduced. */
1048   for(rp=lemp->rule; rp; rp=rp->next) rp->canReduce = LEMON_FALSE;
1049   for(i=0; i<lemp->nstate; i++){
1050     struct action *ap;
1051     for(ap=lemp->sorted[i]->ap; ap; ap=ap->next){
1052       if( ap->type==REDUCE ) ap->x.rp->canReduce = LEMON_TRUE;
1053     }
1054   }
1055   for(rp=lemp->rule; rp; rp=rp->next){
1056     if( rp->canReduce ) continue;
1057     ErrorMsg(lemp->filename,rp->ruleline,"This rule can not be reduced.\n");
1058     lemp->errorcnt++;
1059   }
1060 }
1061 
1062 /* Resolve a conflict between the two given actions.  If the
1063 ** conflict can't be resolved, return non-zero.
1064 **
1065 ** NO LONGER TRUE:
1066 **   To resolve a conflict, first look to see if either action
1067 **   is on an error rule.  In that case, take the action which
1068 **   is not associated with the error rule.  If neither or both
1069 **   actions are associated with an error rule, then try to
1070 **   use precedence to resolve the conflict.
1071 **
1072 ** If either action is a SHIFT, then it must be apx.  This
1073 ** function won't work if apx->type==REDUCE and apy->type==SHIFT.
1074 */
resolve_conflict(struct action * apx,struct action * apy,struct symbol * errsym)1075 static int resolve_conflict(
1076   struct action *apx,
1077   struct action *apy,
1078   struct symbol *errsym   /* The error symbol (if defined.  NULL otherwise) */
1079 ){
1080   struct symbol *spx, *spy;
1081   int errcnt = 0;
1082   assert( apx->sp==apy->sp );  /* Otherwise there would be no conflict */
1083   if( apx->type==SHIFT && apy->type==SHIFT ){
1084     apy->type = SSCONFLICT;
1085     errcnt++;
1086   }
1087   if( apx->type==SHIFT && apy->type==REDUCE ){
1088     spx = apx->sp;
1089     spy = apy->x.rp->precsym;
1090     if( spy==0 || spx->prec<0 || spy->prec<0 ){
1091       /* Not enough precedence information. */
1092       apy->type = SRCONFLICT;
1093       errcnt++;
1094     }else if( spx->prec>spy->prec ){    /* higher precedence wins */
1095       apy->type = RD_RESOLVED;
1096     }else if( spx->prec<spy->prec ){
1097       apx->type = SH_RESOLVED;
1098     }else if( spx->prec==spy->prec && spx->assoc==RIGHT ){ /* Use operator */
1099       apy->type = RD_RESOLVED;                             /* associativity */
1100     }else if( spx->prec==spy->prec && spx->assoc==LEFT ){  /* to break tie */
1101       apx->type = SH_RESOLVED;
1102     }else{
1103       assert( spx->prec==spy->prec && spx->assoc==NONE );
1104       apy->type = SRCONFLICT;
1105       errcnt++;
1106     }
1107   }else if( apx->type==REDUCE && apy->type==REDUCE ){
1108     spx = apx->x.rp->precsym;
1109     spy = apy->x.rp->precsym;
1110     if( spx==0 || spy==0 || spx->prec<0 ||
1111     spy->prec<0 || spx->prec==spy->prec ){
1112       apy->type = RRCONFLICT;
1113       errcnt++;
1114     }else if( spx->prec>spy->prec ){
1115       apy->type = RD_RESOLVED;
1116     }else if( spx->prec<spy->prec ){
1117       apx->type = RD_RESOLVED;
1118     }
1119   }else{
1120     assert(
1121       apx->type==SH_RESOLVED ||
1122       apx->type==RD_RESOLVED ||
1123       apx->type==SSCONFLICT ||
1124       apx->type==SRCONFLICT ||
1125       apx->type==RRCONFLICT ||
1126       apy->type==SH_RESOLVED ||
1127       apy->type==RD_RESOLVED ||
1128       apy->type==SSCONFLICT ||
1129       apy->type==SRCONFLICT ||
1130       apy->type==RRCONFLICT
1131     );
1132     /* The REDUCE/SHIFT case cannot happen because SHIFTs come before
1133     ** REDUCEs on the list.  If we reach this point it must be because
1134     ** the parser conflict had already been resolved. */
1135   }
1136   return errcnt;
1137 }
1138 /********************* From the file "configlist.c" *************************/
1139 /*
1140 ** Routines to processing a configuration list and building a state
1141 ** in the LEMON parser generator.
1142 */
1143 
1144 static struct config *freelist = 0;      /* List of free configurations */
1145 static struct config *current = 0;       /* Top of list of configurations */
1146 static struct config **currentend = 0;   /* Last on list of configs */
1147 static struct config *basis = 0;         /* Top of list of basis configs */
1148 static struct config **basisend = 0;     /* End of list of basis configs */
1149 
1150 /* Return a pointer to a new configuration */
newconfig()1151 PRIVATE struct config *newconfig(){
1152   struct config *newcfg;
1153   if( freelist==0 ){
1154     int i;
1155     int amt = 3;
1156     freelist = (struct config *)calloc( amt, sizeof(struct config) );
1157     if( freelist==0 ){
1158       fprintf(stderr,"Unable to allocate memory for a new configuration.");
1159       exit(1);
1160     }
1161     for(i=0; i<amt-1; i++) freelist[i].next = &freelist[i+1];
1162     freelist[amt-1].next = 0;
1163   }
1164   newcfg = freelist;
1165   freelist = freelist->next;
1166   return newcfg;
1167 }
1168 
1169 /* The configuration "old" is no longer used */
deleteconfig(struct config * old)1170 PRIVATE void deleteconfig(struct config *old)
1171 {
1172   old->next = freelist;
1173   freelist = old;
1174 }
1175 
1176 /* Initialized the configuration list builder */
Configlist_init()1177 void Configlist_init(){
1178   current = 0;
1179   currentend = &current;
1180   basis = 0;
1181   basisend = &basis;
1182   Configtable_init();
1183   return;
1184 }
1185 
1186 /* Initialized the configuration list builder */
Configlist_reset()1187 void Configlist_reset(){
1188   current = 0;
1189   currentend = &current;
1190   basis = 0;
1191   basisend = &basis;
1192   Configtable_clear(0);
1193   return;
1194 }
1195 
1196 /* Add another configuration to the configuration list */
Configlist_add(struct rule * rp,int dot)1197 struct config *Configlist_add(
1198   struct rule *rp,    /* The rule */
1199   int dot             /* Index into the RHS of the rule where the dot goes */
1200 ){
1201   struct config *cfp, model;
1202 
1203   assert( currentend!=0 );
1204   model.rp = rp;
1205   model.dot = dot;
1206   cfp = Configtable_find(&model);
1207   if( cfp==0 ){
1208     cfp = newconfig();
1209     cfp->rp = rp;
1210     cfp->dot = dot;
1211     cfp->fws = SetNew();
1212     cfp->stp = 0;
1213     cfp->fplp = cfp->bplp = 0;
1214     cfp->next = 0;
1215     cfp->bp = 0;
1216     *currentend = cfp;
1217     currentend = &cfp->next;
1218     Configtable_insert(cfp);
1219   }
1220   return cfp;
1221 }
1222 
1223 /* Add a basis configuration to the configuration list */
Configlist_addbasis(struct rule * rp,int dot)1224 struct config *Configlist_addbasis(struct rule *rp, int dot)
1225 {
1226   struct config *cfp, model;
1227 
1228   assert( basisend!=0 );
1229   assert( currentend!=0 );
1230   model.rp = rp;
1231   model.dot = dot;
1232   cfp = Configtable_find(&model);
1233   if( cfp==0 ){
1234     cfp = newconfig();
1235     cfp->rp = rp;
1236     cfp->dot = dot;
1237     cfp->fws = SetNew();
1238     cfp->stp = 0;
1239     cfp->fplp = cfp->bplp = 0;
1240     cfp->next = 0;
1241     cfp->bp = 0;
1242     *currentend = cfp;
1243     currentend = &cfp->next;
1244     *basisend = cfp;
1245     basisend = &cfp->bp;
1246     Configtable_insert(cfp);
1247   }
1248   return cfp;
1249 }
1250 
1251 /* Compute the closure of the configuration list */
Configlist_closure(struct lemon * lemp)1252 void Configlist_closure(struct lemon *lemp)
1253 {
1254   struct config *cfp, *newcfp;
1255   struct rule *rp, *newrp;
1256   struct symbol *sp, *xsp;
1257   int i, dot;
1258 
1259   assert( currentend!=0 );
1260   for(cfp=current; cfp; cfp=cfp->next){
1261     rp = cfp->rp;
1262     dot = cfp->dot;
1263     if( dot>=rp->nrhs ) continue;
1264     sp = rp->rhs[dot];
1265     if( sp->type==NONTERMINAL ){
1266       if( sp->rule==0 && sp!=lemp->errsym ){
1267         ErrorMsg(lemp->filename,rp->line,"Nonterminal \"%s\" has no rules.",
1268           sp->name);
1269         lemp->errorcnt++;
1270       }
1271       for(newrp=sp->rule; newrp; newrp=newrp->nextlhs){
1272         newcfp = Configlist_add(newrp,0);
1273         for(i=dot+1; i<rp->nrhs; i++){
1274           xsp = rp->rhs[i];
1275           if( xsp->type==TERMINAL ){
1276             SetAdd(newcfp->fws,xsp->index);
1277             break;
1278           }else if( xsp->type==MULTITERMINAL ){
1279             int k;
1280             for(k=0; k<xsp->nsubsym; k++){
1281               SetAdd(newcfp->fws, xsp->subsym[k]->index);
1282             }
1283             break;
1284 	  }else{
1285             SetUnion(newcfp->fws,xsp->firstset);
1286             if( xsp->lambda==LEMON_FALSE ) break;
1287 	  }
1288 	}
1289         if( i==rp->nrhs ) Plink_add(&cfp->fplp,newcfp);
1290       }
1291     }
1292   }
1293   return;
1294 }
1295 
1296 /* Sort the configuration list */
Configlist_sort()1297 void Configlist_sort(){
1298   current = (struct config *)msort((char *)current,(char **)&(current->next),Configcmp);
1299   currentend = 0;
1300   return;
1301 }
1302 
1303 /* Sort the basis configuration list */
Configlist_sortbasis()1304 void Configlist_sortbasis(){
1305   basis = (struct config *)msort((char *)current,(char **)&(current->bp),Configcmp);
1306   basisend = 0;
1307   return;
1308 }
1309 
1310 /* Return a pointer to the head of the configuration list and
1311 ** reset the list */
Configlist_return()1312 struct config *Configlist_return(){
1313   struct config *old;
1314   old = current;
1315   current = 0;
1316   currentend = 0;
1317   return old;
1318 }
1319 
1320 /* Return a pointer to the head of the configuration list and
1321 ** reset the list */
Configlist_basis()1322 struct config *Configlist_basis(){
1323   struct config *old;
1324   old = basis;
1325   basis = 0;
1326   basisend = 0;
1327   return old;
1328 }
1329 
1330 /* Free all elements of the given configuration list */
Configlist_eat(struct config * cfp)1331 void Configlist_eat(struct config *cfp)
1332 {
1333   struct config *nextcfp;
1334   for(; cfp; cfp=nextcfp){
1335     nextcfp = cfp->next;
1336     assert( cfp->fplp==0 );
1337     assert( cfp->bplp==0 );
1338     if( cfp->fws ) SetFree(cfp->fws);
1339     deleteconfig(cfp);
1340   }
1341   return;
1342 }
1343 /***************** From the file "error.c" *********************************/
1344 /*
1345 ** Code for printing error message.
1346 */
1347 
ErrorMsg(const char * filename,int lineno,const char * format,...)1348 void ErrorMsg(const char *filename, int lineno, const char *format, ...){
1349   va_list ap;
1350   fprintf(stderr, "%s:%d: ", filename, lineno);
1351   va_start(ap, format);
1352   vfprintf(stderr,format,ap);
1353   va_end(ap);
1354   fprintf(stderr, "\n");
1355 }
1356 /**************** From the file "main.c" ************************************/
1357 /*
1358 ** Main program file for the LEMON parser generator.
1359 */
1360 
1361 /* Report an out-of-memory condition and abort.  This function
1362 ** is used mostly by the "MemoryCheck" macro in struct.h
1363 */
memory_error()1364 void memory_error(){
1365   fprintf(stderr,"Out of memory.  Aborting...\n");
1366   exit(1);
1367 }
1368 
1369 static int nDefine = 0;      /* Number of -D options on the command line */
1370 static char **azDefine = 0;  /* Name of the -D macros */
1371 
1372 /* This routine is called with the argument to each -D command-line option.
1373 ** Add the macro defined to the azDefine array.
1374 */
handle_D_option(char * z)1375 static void handle_D_option(char *z){
1376   char **paz;
1377   nDefine++;
1378   azDefine = (char **) realloc(azDefine, sizeof(azDefine[0])*nDefine);
1379   if( azDefine==0 ){
1380     fprintf(stderr,"out of memory\n");
1381     exit(1);
1382   }
1383   paz = &azDefine[nDefine-1];
1384   *paz = (char *) malloc( lemonStrlen(z)+1 );
1385   if( *paz==0 ){
1386     fprintf(stderr,"out of memory\n");
1387     exit(1);
1388   }
1389   strcpy(*paz, z);
1390   for(z=*paz; *z && *z!='='; z++){}
1391   *z = 0;
1392 }
1393 
1394 static char *user_templatename = NULL;
handle_T_option(char * z)1395 static void handle_T_option(char *z){
1396   user_templatename = (char *) malloc( lemonStrlen(z)+1 );
1397   if( user_templatename==0 ){
1398     memory_error();
1399   }
1400   strcpy(user_templatename, z);
1401 }
1402 
1403 /* The main program.  Parse the command line and do it... */
main(int argc,char ** argv)1404 int main(int argc, char **argv)
1405 {
1406   static int version = 0;
1407   static int rpflag = 0;
1408   static int basisflag = 0;
1409   static int compress = 0;
1410   static int quiet = 0;
1411   static int statistics = 0;
1412   static int mhflag = 0;
1413   static int nolinenosflag = 0;
1414   static int noResort = 0;
1415   static struct s_options options[] = {
1416     {OPT_FLAG, "b", (char*)&basisflag, "Print only the basis in report."},
1417     {OPT_FLAG, "c", (char*)&compress, "Don't compress the action table."},
1418     {OPT_FSTR, "D", (char*)handle_D_option, "Define an %ifdef macro."},
1419     {OPT_FSTR, "T", (char*)handle_T_option, "Specify a template file."},
1420     {OPT_FLAG, "g", (char*)&rpflag, "Print grammar without actions."},
1421     {OPT_FLAG, "m", (char*)&mhflag, "Output a makeheaders compatible file."},
1422     {OPT_FLAG, "l", (char*)&nolinenosflag, "Do not print #line statements."},
1423     {OPT_FLAG, "p", (char*)&showPrecedenceConflict,
1424                     "Show conflicts resolved by precedence rules"},
1425     {OPT_FLAG, "q", (char*)&quiet, "(Quiet) Don't print the report file."},
1426     {OPT_FLAG, "r", (char*)&noResort, "Do not sort or renumber states"},
1427     {OPT_FLAG, "s", (char*)&statistics,
1428                                    "Print parser stats to standard output."},
1429     {OPT_FLAG, "x", (char*)&version, "Print the version number."},
1430     {OPT_FLAG,0,0,0}
1431   };
1432   int i;
1433   int exitcode;
1434   struct lemon lem;
1435 
1436   atexit(LemonAtExit);
1437 
1438   OptInit(argv,options,stderr);
1439   if( version ){
1440      printf("Lemon version 1.0\n");
1441      exit(0);
1442   }
1443   if( OptNArgs()!=1 ){
1444     fprintf(stderr,"Exactly one filename argument is required.\n");
1445     exit(1);
1446   }
1447   memset(&lem, 0, sizeof(lem));
1448   lem.errorcnt = 0;
1449 
1450   /* Initialize the machine */
1451   Strsafe_init();
1452   Symbol_init();
1453   State_init();
1454   lem.argv0 = argv[0];
1455   lem.filename = OptArg(0);
1456   lem.basisflag = basisflag;
1457   lem.nolinenosflag = nolinenosflag;
1458   Symbol_new("$");
1459   lem.errsym = Symbol_new("error");
1460   lem.errsym->useCnt = 0;
1461 
1462   /* Parse the input file */
1463   Parse(&lem);
1464   if( lem.errorcnt ) exit(lem.errorcnt);
1465   if( lem.nrule==0 ){
1466     fprintf(stderr,"Empty grammar.\n");
1467     exit(1);
1468   }
1469 
1470   /* Count and index the symbols of the grammar */
1471   lem.nsymbol = Symbol_count();
1472   Symbol_new("{default}");
1473   lem.symbols = Symbol_arrayof();
1474   for(i=0; i<=lem.nsymbol; i++) lem.symbols[i]->index = i;
1475   qsort(lem.symbols,lem.nsymbol+1,sizeof(struct symbol*), Symbolcmpp);
1476   for(i=0; i<=lem.nsymbol; i++) lem.symbols[i]->index = i;
1477   for(i=1; isupper(lem.symbols[i]->name[0]); i++);
1478   lem.nterminal = i;
1479 
1480   /* Generate a reprint of the grammar, if requested on the command line */
1481   if( rpflag ){
1482     Reprint(&lem);
1483   }else{
1484     /* Initialize the size for all follow and first sets */
1485     SetSize(lem.nterminal+1);
1486 
1487     /* Find the precedence for every production rule (that has one) */
1488     FindRulePrecedences(&lem);
1489 
1490     /* Compute the lambda-nonterminals and the first-sets for every
1491     ** nonterminal */
1492     FindFirstSets(&lem);
1493 
1494     /* Compute all LR(0) states.  Also record follow-set propagation
1495     ** links so that the follow-set can be computed later */
1496     lem.nstate = 0;
1497     FindStates(&lem);
1498     lem.sorted = State_arrayof();
1499 
1500     /* Tie up loose ends on the propagation links */
1501     FindLinks(&lem);
1502 
1503     /* Compute the follow set of every reducible configuration */
1504     FindFollowSets(&lem);
1505 
1506     /* Compute the action tables */
1507     FindActions(&lem);
1508 
1509     /* Compress the action tables */
1510     if( compress==0 ) CompressTables(&lem);
1511 
1512     /* Reorder and renumber the states so that states with fewer choices
1513     ** occur at the end.  This is an optimization that helps make the
1514     ** generated parser tables smaller. */
1515     if( noResort==0 ) ResortStates(&lem);
1516 
1517     /* Generate a report of the parser generated.  (the "y.output" file) */
1518     if( !quiet ) ReportOutput(&lem);
1519 
1520     /* Generate the source code for the parser */
1521     ReportTable(&lem, mhflag);
1522 
1523     /* Produce a header file for use by the scanner.  (This step is
1524     ** omitted if the "-m" option is used because makeheaders will
1525     ** generate the file for us.) */
1526     if( !mhflag ) ReportHeader(&lem);
1527   }
1528   if( statistics ){
1529     printf("Parser statistics: %d terminals, %d nonterminals, %d rules\n",
1530       lem.nterminal, lem.nsymbol - lem.nterminal, lem.nrule);
1531     printf("                   %d states, %d parser table entries, %d conflicts\n",
1532       lem.nstate, lem.tablesize, lem.nconflict);
1533   }
1534   if( lem.nconflict > 0 ){
1535     fprintf(stderr,"%d parsing conflicts.\n",lem.nconflict);
1536   }
1537 
1538   /* return 0 on success, 1 on failure. */
1539   exitcode = ((lem.errorcnt > 0) || (lem.nconflict > 0)) ? 1 : 0;
1540   successful_exit = (exitcode == 0);
1541   exit(exitcode);
1542   return (exitcode);
1543 }
1544 /******************** From the file "msort.c" *******************************/
1545 /*
1546 ** A generic merge-sort program.
1547 **
1548 ** USAGE:
1549 ** Let "ptr" be a pointer to some structure which is at the head of
1550 ** a null-terminated list.  Then to sort the list call:
1551 **
1552 **     ptr = msort(ptr,&(ptr->next),cmpfnc);
1553 **
1554 ** In the above, "cmpfnc" is a pointer to a function which compares
1555 ** two instances of the structure and returns an integer, as in
1556 ** strcmp.  The second argument is a pointer to the pointer to the
1557 ** second element of the linked list.  This address is used to compute
1558 ** the offset to the "next" field within the structure.  The offset to
1559 ** the "next" field must be constant for all structures in the list.
1560 **
1561 ** The function returns a new pointer which is the head of the list
1562 ** after sorting.
1563 **
1564 ** ALGORITHM:
1565 ** Merge-sort.
1566 */
1567 
1568 /*
1569 ** Return a pointer to the next structure in the linked list.
1570 */
1571 #define NEXT(A) (*(char**)(((unsigned long)A)+offset))
1572 
1573 /*
1574 ** Inputs:
1575 **   a:       A sorted, null-terminated linked list.  (May be null).
1576 **   b:       A sorted, null-terminated linked list.  (May be null).
1577 **   cmp:     A pointer to the comparison function.
1578 **   offset:  Offset in the structure to the "next" field.
1579 **
1580 ** Return Value:
1581 **   A pointer to the head of a sorted list containing the elements
1582 **   of both a and b.
1583 **
1584 ** Side effects:
1585 **   The "next" pointers for elements in the lists a and b are
1586 **   changed.
1587 */
merge(char * a,char * b,int (* cmp)(const char *,const char *),int offset)1588 static char *merge(
1589   char *a,
1590   char *b,
1591   int (*cmp)(const char*,const char*),
1592   int offset
1593 ){
1594   char *ptr, *head;
1595 
1596   if( a==0 ){
1597     head = b;
1598   }else if( b==0 ){
1599     head = a;
1600   }else{
1601     if( (*cmp)(a,b)<=0 ){
1602       ptr = a;
1603       a = NEXT(a);
1604     }else{
1605       ptr = b;
1606       b = NEXT(b);
1607     }
1608     head = ptr;
1609     while( a && b ){
1610       if( (*cmp)(a,b)<=0 ){
1611         NEXT(ptr) = a;
1612         ptr = a;
1613         a = NEXT(a);
1614       }else{
1615         NEXT(ptr) = b;
1616         ptr = b;
1617         b = NEXT(b);
1618       }
1619     }
1620     if( a ) NEXT(ptr) = a;
1621     else    NEXT(ptr) = b;
1622   }
1623   return head;
1624 }
1625 
1626 /*
1627 ** Inputs:
1628 **   list:      Pointer to a singly-linked list of structures.
1629 **   next:      Pointer to pointer to the second element of the list.
1630 **   cmp:       A comparison function.
1631 **
1632 ** Return Value:
1633 **   A pointer to the head of a sorted list containing the elements
1634 **   orginally in list.
1635 **
1636 ** Side effects:
1637 **   The "next" pointers for elements in list are changed.
1638 */
1639 #define LISTSIZE 30
msort(char * list,char ** next,int (* cmp)(const char *,const char *))1640 static char *msort(
1641   char *list,
1642   char **next,
1643   int (*cmp)(const char*,const char*)
1644 ){
1645   unsigned long offset;
1646   char *ep;
1647   char *set[LISTSIZE];
1648   int i;
1649   offset = (unsigned long)next - (unsigned long)list;
1650   for(i=0; i<LISTSIZE; i++) set[i] = 0;
1651   while( list ){
1652     ep = list;
1653     list = NEXT(list);
1654     NEXT(ep) = 0;
1655     for(i=0; i<LISTSIZE-1 && set[i]!=0; i++){
1656       ep = merge(ep,set[i],cmp,offset);
1657       set[i] = 0;
1658     }
1659     set[i] = ep;
1660   }
1661   ep = 0;
1662   for(i=0; i<LISTSIZE; i++) if( set[i] ) ep = merge(set[i],ep,cmp,offset);
1663   return ep;
1664 }
1665 /************************ From the file "option.c" **************************/
1666 static char **argv;
1667 static struct s_options *op;
1668 static FILE *errstream;
1669 
1670 #define ISOPT(X) ((X)[0]=='-'||(X)[0]=='+'||strchr((X),'=')!=0)
1671 
1672 /*
1673 ** Print the command line with a carrot pointing to the k-th character
1674 ** of the n-th field.
1675 */
errline(int n,int k,FILE * err)1676 static void errline(int n, int k, FILE *err)
1677 {
1678   int spcnt, i;
1679   if( argv[0] ) fprintf(err,"%s",argv[0]);
1680   spcnt = lemonStrlen(argv[0]) + 1;
1681   for(i=1; i<n && argv[i]; i++){
1682     fprintf(err," %s",argv[i]);
1683     spcnt += lemonStrlen(argv[i])+1;
1684   }
1685   spcnt += k;
1686   for(; argv[i]; i++) fprintf(err," %s",argv[i]);
1687   if( spcnt<20 ){
1688     fprintf(err,"\n%*s^-- here\n",spcnt,"");
1689   }else{
1690     fprintf(err,"\n%*shere --^\n",spcnt-7,"");
1691   }
1692 }
1693 
1694 /*
1695 ** Return the index of the N-th non-switch argument.  Return -1
1696 ** if N is out of range.
1697 */
argindex(int n)1698 static int argindex(int n)
1699 {
1700   int i;
1701   int dashdash = 0;
1702   if( argv!=0 && *argv!=0 ){
1703     for(i=1; argv[i]; i++){
1704       if( dashdash || !ISOPT(argv[i]) ){
1705         if( n==0 ) return i;
1706         n--;
1707       }
1708       if( strcmp(argv[i],"--")==0 ) dashdash = 1;
1709     }
1710   }
1711   return -1;
1712 }
1713 
1714 static char emsg[] = "Command line syntax error: ";
1715 
1716 /*
1717 ** Process a flag command line argument.
1718 */
handleflags(int i,FILE * err)1719 static int handleflags(int i, FILE *err)
1720 {
1721   int v;
1722   int errcnt = 0;
1723   int j;
1724   for(j=0; op[j].label; j++){
1725     if( strncmp(&argv[i][1],op[j].label,lemonStrlen(op[j].label))==0 ) break;
1726   }
1727   v = argv[i][0]=='-' ? 1 : 0;
1728   if( op[j].label==0 ){
1729     if( err ){
1730       fprintf(err,"%sundefined option.\n",emsg);
1731       errline(i,1,err);
1732     }
1733     errcnt++;
1734   }else if( op[j].type==OPT_FLAG ){
1735     *((int*)op[j].arg) = v;
1736   }else if( op[j].type==OPT_FFLAG ){
1737     (*(void(*)(int))(op[j].arg))(v);
1738   }else if( op[j].type==OPT_FSTR ){
1739     (*(void(*)(char *))(op[j].arg))(&argv[i][2]);
1740   }else{
1741     if( err ){
1742       fprintf(err,"%smissing argument on switch.\n",emsg);
1743       errline(i,1,err);
1744     }
1745     errcnt++;
1746   }
1747   return errcnt;
1748 }
1749 
1750 /*
1751 ** Process a command line switch which has an argument.
1752 */
handleswitch(int i,FILE * err)1753 static int handleswitch(int i, FILE *err)
1754 {
1755   int lv = 0;
1756   double dv = 0.0;
1757   char *sv = 0, *end;
1758   char *cp;
1759   int j;
1760   int errcnt = 0;
1761   cp = strchr(argv[i],'=');
1762   assert( cp!=0 );
1763   *cp = 0;
1764   for(j=0; op[j].label; j++){
1765     if( strcmp(argv[i],op[j].label)==0 ) break;
1766   }
1767   *cp = '=';
1768   if( op[j].label==0 ){
1769     if( err ){
1770       fprintf(err,"%sundefined option.\n",emsg);
1771       errline(i,0,err);
1772     }
1773     errcnt++;
1774   }else{
1775     cp++;
1776     switch( op[j].type ){
1777       case OPT_FLAG:
1778       case OPT_FFLAG:
1779         if( err ){
1780           fprintf(err,"%soption requires an argument.\n",emsg);
1781           errline(i,0,err);
1782         }
1783         errcnt++;
1784         break;
1785       case OPT_DBL:
1786       case OPT_FDBL:
1787         dv = strtod(cp,&end);
1788         if( *end ){
1789           if( err ){
1790             fprintf(err,"%sillegal character in floating-point argument.\n",emsg);
1791             errline(i,((unsigned long)end)-(unsigned long)argv[i],err);
1792           }
1793           errcnt++;
1794         }
1795         break;
1796       case OPT_INT:
1797       case OPT_FINT:
1798         lv = strtol(cp,&end,0);
1799         if( *end ){
1800           if( err ){
1801             fprintf(err,"%sillegal character in integer argument.\n",emsg);
1802             errline(i,((unsigned long)end)-(unsigned long)argv[i],err);
1803           }
1804           errcnt++;
1805         }
1806         break;
1807       case OPT_STR:
1808       case OPT_FSTR:
1809         sv = cp;
1810         break;
1811     }
1812     switch( op[j].type ){
1813       case OPT_FLAG:
1814       case OPT_FFLAG:
1815         break;
1816       case OPT_DBL:
1817         *(double*)(op[j].arg) = dv;
1818         break;
1819       case OPT_FDBL:
1820         (*(void(*)(double))(op[j].arg))(dv);
1821         break;
1822       case OPT_INT:
1823         *(int*)(op[j].arg) = lv;
1824         break;
1825       case OPT_FINT:
1826         (*(void(*)(int))(op[j].arg))((int)lv);
1827         break;
1828       case OPT_STR:
1829         *(char**)(op[j].arg) = sv;
1830         break;
1831       case OPT_FSTR:
1832         (*(void(*)(char *))(op[j].arg))(sv);
1833         break;
1834     }
1835   }
1836   return errcnt;
1837 }
1838 
OptInit(char ** a,struct s_options * o,FILE * err)1839 int OptInit(char **a, struct s_options *o, FILE *err)
1840 {
1841   int errcnt = 0;
1842   argv = a;
1843   op = o;
1844   errstream = err;
1845   if( argv && *argv && op ){
1846     int i;
1847     for(i=1; argv[i]; i++){
1848       if( argv[i][0]=='+' || argv[i][0]=='-' ){
1849         errcnt += handleflags(i,err);
1850       }else if( strchr(argv[i],'=') ){
1851         errcnt += handleswitch(i,err);
1852       }
1853     }
1854   }
1855   if( errcnt>0 ){
1856     fprintf(err,"Valid command line options for \"%s\" are:\n",*a);
1857     OptPrint();
1858     exit(1);
1859   }
1860   return 0;
1861 }
1862 
OptNArgs()1863 int OptNArgs(){
1864   int cnt = 0;
1865   int dashdash = 0;
1866   int i;
1867   if( argv!=0 && argv[0]!=0 ){
1868     for(i=1; argv[i]; i++){
1869       if( dashdash || !ISOPT(argv[i]) ) cnt++;
1870       if( strcmp(argv[i],"--")==0 ) dashdash = 1;
1871     }
1872   }
1873   return cnt;
1874 }
1875 
OptArg(int n)1876 char *OptArg(int n)
1877 {
1878   int i;
1879   i = argindex(n);
1880   return i>=0 ? argv[i] : 0;
1881 }
1882 
OptErr(int n)1883 void OptErr(int n)
1884 {
1885   int i;
1886   i = argindex(n);
1887   if( i>=0 ) errline(i,0,errstream);
1888 }
1889 
OptPrint()1890 void OptPrint(){
1891   int i;
1892   int max, len;
1893   max = 0;
1894   for(i=0; op[i].label; i++){
1895     len = lemonStrlen(op[i].label) + 1;
1896     switch( op[i].type ){
1897       case OPT_FLAG:
1898       case OPT_FFLAG:
1899         break;
1900       case OPT_INT:
1901       case OPT_FINT:
1902         len += 9;       /* length of "<integer>" */
1903         break;
1904       case OPT_DBL:
1905       case OPT_FDBL:
1906         len += 6;       /* length of "<real>" */
1907         break;
1908       case OPT_STR:
1909       case OPT_FSTR:
1910         len += 8;       /* length of "<string>" */
1911         break;
1912     }
1913     if( len>max ) max = len;
1914   }
1915   for(i=0; op[i].label; i++){
1916     switch( op[i].type ){
1917       case OPT_FLAG:
1918       case OPT_FFLAG:
1919         fprintf(errstream,"  -%-*s  %s\n",max,op[i].label,op[i].message);
1920         break;
1921       case OPT_INT:
1922       case OPT_FINT:
1923         fprintf(errstream,"  %s=<integer>%*s  %s\n",op[i].label,
1924           (int)(max-lemonStrlen(op[i].label)-9),"",op[i].message);
1925         break;
1926       case OPT_DBL:
1927       case OPT_FDBL:
1928         fprintf(errstream,"  %s=<real>%*s  %s\n",op[i].label,
1929           (int)(max-lemonStrlen(op[i].label)-6),"",op[i].message);
1930         break;
1931       case OPT_STR:
1932       case OPT_FSTR:
1933         fprintf(errstream,"  %s=<string>%*s  %s\n",op[i].label,
1934           (int)(max-lemonStrlen(op[i].label)-8),"",op[i].message);
1935         break;
1936     }
1937   }
1938 }
1939 /*********************** From the file "parse.c" ****************************/
1940 /*
1941 ** Input file parser for the LEMON parser generator.
1942 */
1943 
1944 /* The state of the parser */
1945 enum e_state {
1946   INITIALIZE,
1947   WAITING_FOR_DECL_OR_RULE,
1948   WAITING_FOR_DECL_KEYWORD,
1949   WAITING_FOR_DECL_ARG,
1950   WAITING_FOR_PRECEDENCE_SYMBOL,
1951   WAITING_FOR_ARROW,
1952   IN_RHS,
1953   LHS_ALIAS_1,
1954   LHS_ALIAS_2,
1955   LHS_ALIAS_3,
1956   RHS_ALIAS_1,
1957   RHS_ALIAS_2,
1958   PRECEDENCE_MARK_1,
1959   PRECEDENCE_MARK_2,
1960   RESYNC_AFTER_RULE_ERROR,
1961   RESYNC_AFTER_DECL_ERROR,
1962   WAITING_FOR_DESTRUCTOR_SYMBOL,
1963   WAITING_FOR_DATATYPE_SYMBOL,
1964   WAITING_FOR_FALLBACK_ID,
1965   WAITING_FOR_WILDCARD_ID
1966 };
1967 struct pstate {
1968   char *filename;       /* Name of the input file */
1969   int tokenlineno;      /* Linenumber at which current token starts */
1970   int errorcnt;         /* Number of errors so far */
1971   char *tokenstart;     /* Text of current token */
1972   struct lemon *gp;     /* Global state vector */
1973   enum e_state state;        /* The state of the parser */
1974   struct symbol *fallback;   /* The fallback token */
1975   struct symbol *lhs;        /* Left-hand side of current rule */
1976   const char *lhsalias;      /* Alias for the LHS */
1977   int nrhs;                  /* Number of right-hand side symbols seen */
1978   struct symbol *rhs[MAXRHS];  /* RHS symbols */
1979   const char *alias[MAXRHS]; /* Aliases for each RHS symbol (or NULL) */
1980   struct rule *prevrule;     /* Previous rule parsed */
1981   const char *declkeyword;   /* Keyword of a declaration */
1982   char **declargslot;        /* Where the declaration argument should be put */
1983   int insertLineMacro;       /* Add #line before declaration insert */
1984   int *decllinenoslot;       /* Where to write declaration line number */
1985   enum e_assoc declassoc;    /* Assign this association to decl arguments */
1986   int preccounter;           /* Assign this precedence to decl arguments */
1987   struct rule *firstrule;    /* Pointer to first rule in the grammar */
1988   struct rule *lastrule;     /* Pointer to the most recently parsed rule */
1989 };
1990 
1991 /* Parse a single token */
parseonetoken(struct pstate * psp)1992 static void parseonetoken(struct pstate *psp)
1993 {
1994   const char *x;
1995   x = Strsafe(psp->tokenstart);     /* Save the token permanently */
1996 #if 0
1997   printf("%s:%d: Token=[%s] state=%d\n",psp->filename,psp->tokenlineno,
1998     x,psp->state);
1999 #endif
2000   switch( psp->state ){
2001     case INITIALIZE:
2002       psp->prevrule = 0;
2003       psp->preccounter = 0;
2004       psp->firstrule = psp->lastrule = 0;
2005       psp->gp->nrule = 0;
2006       /* Fall thru to next case */
2007     case WAITING_FOR_DECL_OR_RULE:
2008       if( x[0]=='%' ){
2009         psp->state = WAITING_FOR_DECL_KEYWORD;
2010       }else if( islower(x[0]) ){
2011         psp->lhs = Symbol_new(x);
2012         psp->nrhs = 0;
2013         psp->lhsalias = 0;
2014         psp->state = WAITING_FOR_ARROW;
2015       }else if( x[0]=='{' ){
2016         if( psp->prevrule==0 ){
2017           ErrorMsg(psp->filename,psp->tokenlineno,
2018 "There is no prior rule opon which to attach the code \
2019 fragment which begins on this line.");
2020           psp->errorcnt++;
2021 	}else if( psp->prevrule->code!=0 ){
2022           ErrorMsg(psp->filename,psp->tokenlineno,
2023 "Code fragment beginning on this line is not the first \
2024 to follow the previous rule.");
2025           psp->errorcnt++;
2026         }else{
2027           psp->prevrule->line = psp->tokenlineno;
2028           psp->prevrule->code = &x[1];
2029 	}
2030       }else if( x[0]=='[' ){
2031         psp->state = PRECEDENCE_MARK_1;
2032       }else{
2033         ErrorMsg(psp->filename,psp->tokenlineno,
2034           "Token \"%s\" should be either \"%%\" or a nonterminal name.",
2035           x);
2036         psp->errorcnt++;
2037       }
2038       break;
2039     case PRECEDENCE_MARK_1:
2040       if( !isupper(x[0]) ){
2041         ErrorMsg(psp->filename,psp->tokenlineno,
2042           "The precedence symbol must be a terminal.");
2043         psp->errorcnt++;
2044       }else if( psp->prevrule==0 ){
2045         ErrorMsg(psp->filename,psp->tokenlineno,
2046           "There is no prior rule to assign precedence \"[%s]\".",x);
2047         psp->errorcnt++;
2048       }else if( psp->prevrule->precsym!=0 ){
2049         ErrorMsg(psp->filename,psp->tokenlineno,
2050 "Precedence mark on this line is not the first \
2051 to follow the previous rule.");
2052         psp->errorcnt++;
2053       }else{
2054         psp->prevrule->precsym = Symbol_new(x);
2055       }
2056       psp->state = PRECEDENCE_MARK_2;
2057       break;
2058     case PRECEDENCE_MARK_2:
2059       if( x[0]!=']' ){
2060         ErrorMsg(psp->filename,psp->tokenlineno,
2061           "Missing \"]\" on precedence mark.");
2062         psp->errorcnt++;
2063       }
2064       psp->state = WAITING_FOR_DECL_OR_RULE;
2065       break;
2066     case WAITING_FOR_ARROW:
2067       if( x[0]==':' && x[1]==':' && x[2]=='=' ){
2068         psp->state = IN_RHS;
2069       }else if( x[0]=='(' ){
2070         psp->state = LHS_ALIAS_1;
2071       }else{
2072         ErrorMsg(psp->filename,psp->tokenlineno,
2073           "Expected to see a \":\" following the LHS symbol \"%s\".",
2074           psp->lhs->name);
2075         psp->errorcnt++;
2076         psp->state = RESYNC_AFTER_RULE_ERROR;
2077       }
2078       break;
2079     case LHS_ALIAS_1:
2080       if( isalpha(x[0]) ){
2081         psp->lhsalias = x;
2082         psp->state = LHS_ALIAS_2;
2083       }else{
2084         ErrorMsg(psp->filename,psp->tokenlineno,
2085           "\"%s\" is not a valid alias for the LHS \"%s\"\n",
2086           x,psp->lhs->name);
2087         psp->errorcnt++;
2088         psp->state = RESYNC_AFTER_RULE_ERROR;
2089       }
2090       break;
2091     case LHS_ALIAS_2:
2092       if( x[0]==')' ){
2093         psp->state = LHS_ALIAS_3;
2094       }else{
2095         ErrorMsg(psp->filename,psp->tokenlineno,
2096           "Missing \")\" following LHS alias name \"%s\".",psp->lhsalias);
2097         psp->errorcnt++;
2098         psp->state = RESYNC_AFTER_RULE_ERROR;
2099       }
2100       break;
2101     case LHS_ALIAS_3:
2102       if( x[0]==':' && x[1]==':' && x[2]=='=' ){
2103         psp->state = IN_RHS;
2104       }else{
2105         ErrorMsg(psp->filename,psp->tokenlineno,
2106           "Missing \"->\" following: \"%s(%s)\".",
2107            psp->lhs->name,psp->lhsalias);
2108         psp->errorcnt++;
2109         psp->state = RESYNC_AFTER_RULE_ERROR;
2110       }
2111       break;
2112     case IN_RHS:
2113       if( x[0]=='.' ){
2114         struct rule *rp;
2115         rp = (struct rule *)calloc( sizeof(struct rule) +
2116              sizeof(struct symbol*)*psp->nrhs + sizeof(char*)*psp->nrhs, 1);
2117         if( rp==0 ){
2118           ErrorMsg(psp->filename,psp->tokenlineno,
2119             "Can't allocate enough memory for this rule.");
2120           psp->errorcnt++;
2121           psp->prevrule = 0;
2122 	}else{
2123           int i;
2124           rp->ruleline = psp->tokenlineno;
2125           rp->rhs = (struct symbol**)&rp[1];
2126           rp->rhsalias = (const char**)&(rp->rhs[psp->nrhs]);
2127           for(i=0; i<psp->nrhs; i++){
2128             rp->rhs[i] = psp->rhs[i];
2129             rp->rhsalias[i] = psp->alias[i];
2130 	  }
2131           rp->lhs = psp->lhs;
2132           rp->lhsalias = psp->lhsalias;
2133           rp->nrhs = psp->nrhs;
2134           rp->code = 0;
2135           rp->precsym = 0;
2136           rp->index = psp->gp->nrule++;
2137           rp->nextlhs = rp->lhs->rule;
2138           rp->lhs->rule = rp;
2139           rp->next = 0;
2140           if( psp->firstrule==0 ){
2141             psp->firstrule = psp->lastrule = rp;
2142 	  }else{
2143             psp->lastrule->next = rp;
2144             psp->lastrule = rp;
2145 	  }
2146           psp->prevrule = rp;
2147 	}
2148         psp->state = WAITING_FOR_DECL_OR_RULE;
2149       }else if( isalpha(x[0]) ){
2150         if( psp->nrhs>=MAXRHS ){
2151           ErrorMsg(psp->filename,psp->tokenlineno,
2152             "Too many symbols on RHS of rule beginning at \"%s\".",
2153             x);
2154           psp->errorcnt++;
2155           psp->state = RESYNC_AFTER_RULE_ERROR;
2156 	}else{
2157           psp->rhs[psp->nrhs] = Symbol_new(x);
2158           psp->alias[psp->nrhs] = 0;
2159           psp->nrhs++;
2160 	}
2161       }else if( (x[0]=='|' || x[0]=='/') && psp->nrhs>0 ){
2162         struct symbol *msp = psp->rhs[psp->nrhs-1];
2163         if( msp->type!=MULTITERMINAL ){
2164           struct symbol *origsp = msp;
2165           msp = (struct symbol *) calloc(1,sizeof(*msp));
2166           memset(msp, 0, sizeof(*msp));
2167           msp->type = MULTITERMINAL;
2168           msp->nsubsym = 1;
2169           msp->subsym = (struct symbol **) calloc(1,sizeof(struct symbol*));
2170           msp->subsym[0] = origsp;
2171           msp->name = origsp->name;
2172           psp->rhs[psp->nrhs-1] = msp;
2173         }
2174         msp->nsubsym++;
2175         msp->subsym = (struct symbol **) realloc(msp->subsym,
2176           sizeof(struct symbol*)*msp->nsubsym);
2177         msp->subsym[msp->nsubsym-1] = Symbol_new(&x[1]);
2178         if( islower(x[1]) || islower(msp->subsym[0]->name[0]) ){
2179           ErrorMsg(psp->filename,psp->tokenlineno,
2180             "Cannot form a compound containing a non-terminal");
2181           psp->errorcnt++;
2182         }
2183       }else if( x[0]=='(' && psp->nrhs>0 ){
2184         psp->state = RHS_ALIAS_1;
2185       }else{
2186         ErrorMsg(psp->filename,psp->tokenlineno,
2187           "Illegal character on RHS of rule: \"%s\".",x);
2188         psp->errorcnt++;
2189         psp->state = RESYNC_AFTER_RULE_ERROR;
2190       }
2191       break;
2192     case RHS_ALIAS_1:
2193       if( isalpha(x[0]) ){
2194         psp->alias[psp->nrhs-1] = x;
2195         psp->state = RHS_ALIAS_2;
2196       }else{
2197         ErrorMsg(psp->filename,psp->tokenlineno,
2198           "\"%s\" is not a valid alias for the RHS symbol \"%s\"\n",
2199           x,psp->rhs[psp->nrhs-1]->name);
2200         psp->errorcnt++;
2201         psp->state = RESYNC_AFTER_RULE_ERROR;
2202       }
2203       break;
2204     case RHS_ALIAS_2:
2205       if( x[0]==')' ){
2206         psp->state = IN_RHS;
2207       }else{
2208         ErrorMsg(psp->filename,psp->tokenlineno,
2209           "Missing \")\" following LHS alias name \"%s\".",psp->lhsalias);
2210         psp->errorcnt++;
2211         psp->state = RESYNC_AFTER_RULE_ERROR;
2212       }
2213       break;
2214     case WAITING_FOR_DECL_KEYWORD:
2215       if( isalpha(x[0]) ){
2216         psp->declkeyword = x;
2217         psp->declargslot = 0;
2218         psp->decllinenoslot = 0;
2219         psp->insertLineMacro = 1;
2220         psp->state = WAITING_FOR_DECL_ARG;
2221         if( strcmp(x,"name")==0 ){
2222           psp->declargslot = &(psp->gp->name);
2223           psp->insertLineMacro = 0;
2224 	}else if( strcmp(x,"include")==0 ){
2225           psp->declargslot = &(psp->gp->include);
2226 	}else if( strcmp(x,"code")==0 ){
2227           psp->declargslot = &(psp->gp->extracode);
2228 	}else if( strcmp(x,"token_destructor")==0 ){
2229           psp->declargslot = &psp->gp->tokendest;
2230 	}else if( strcmp(x,"default_destructor")==0 ){
2231           psp->declargslot = &psp->gp->vardest;
2232 	}else if( strcmp(x,"token_prefix")==0 ){
2233           psp->declargslot = &psp->gp->tokenprefix;
2234           psp->insertLineMacro = 0;
2235 	}else if( strcmp(x,"syntax_error")==0 ){
2236           psp->declargslot = &(psp->gp->error);
2237 	}else if( strcmp(x,"parse_accept")==0 ){
2238           psp->declargslot = &(psp->gp->accept);
2239 	}else if( strcmp(x,"parse_failure")==0 ){
2240           psp->declargslot = &(psp->gp->failure);
2241 	}else if( strcmp(x,"stack_overflow")==0 ){
2242           psp->declargslot = &(psp->gp->overflow);
2243         }else if( strcmp(x,"extra_argument")==0 ){
2244           psp->declargslot = &(psp->gp->arg);
2245           psp->insertLineMacro = 0;
2246         }else if( strcmp(x,"token_type")==0 ){
2247           psp->declargslot = &(psp->gp->tokentype);
2248           psp->insertLineMacro = 0;
2249         }else if( strcmp(x,"default_type")==0 ){
2250           psp->declargslot = &(psp->gp->vartype);
2251           psp->insertLineMacro = 0;
2252         }else if( strcmp(x,"stack_size")==0 ){
2253           psp->declargslot = &(psp->gp->stacksize);
2254           psp->insertLineMacro = 0;
2255         }else if( strcmp(x,"start_symbol")==0 ){
2256           psp->declargslot = &(psp->gp->start);
2257           psp->insertLineMacro = 0;
2258         }else if( strcmp(x,"left")==0 ){
2259           psp->preccounter++;
2260           psp->declassoc = LEFT;
2261           psp->state = WAITING_FOR_PRECEDENCE_SYMBOL;
2262         }else if( strcmp(x,"right")==0 ){
2263           psp->preccounter++;
2264           psp->declassoc = RIGHT;
2265           psp->state = WAITING_FOR_PRECEDENCE_SYMBOL;
2266         }else if( strcmp(x,"nonassoc")==0 ){
2267           psp->preccounter++;
2268           psp->declassoc = NONE;
2269           psp->state = WAITING_FOR_PRECEDENCE_SYMBOL;
2270 	}else if( strcmp(x,"destructor")==0 ){
2271           psp->state = WAITING_FOR_DESTRUCTOR_SYMBOL;
2272 	}else if( strcmp(x,"type")==0 ){
2273           psp->state = WAITING_FOR_DATATYPE_SYMBOL;
2274         }else if( strcmp(x,"fallback")==0 ){
2275           psp->fallback = 0;
2276           psp->state = WAITING_FOR_FALLBACK_ID;
2277         }else if( strcmp(x,"wildcard")==0 ){
2278           psp->state = WAITING_FOR_WILDCARD_ID;
2279         }else{
2280           ErrorMsg(psp->filename,psp->tokenlineno,
2281             "Unknown declaration keyword: \"%%%s\".",x);
2282           psp->errorcnt++;
2283           psp->state = RESYNC_AFTER_DECL_ERROR;
2284 	}
2285       }else{
2286         ErrorMsg(psp->filename,psp->tokenlineno,
2287           "Illegal declaration keyword: \"%s\".",x);
2288         psp->errorcnt++;
2289         psp->state = RESYNC_AFTER_DECL_ERROR;
2290       }
2291       break;
2292     case WAITING_FOR_DESTRUCTOR_SYMBOL:
2293       if( !isalpha(x[0]) ){
2294         ErrorMsg(psp->filename,psp->tokenlineno,
2295           "Symbol name missing after %%destructor keyword");
2296         psp->errorcnt++;
2297         psp->state = RESYNC_AFTER_DECL_ERROR;
2298       }else{
2299         struct symbol *sp = Symbol_new(x);
2300         psp->declargslot = &sp->destructor;
2301         psp->decllinenoslot = &sp->destLineno;
2302         psp->insertLineMacro = 1;
2303         psp->state = WAITING_FOR_DECL_ARG;
2304       }
2305       break;
2306     case WAITING_FOR_DATATYPE_SYMBOL:
2307       if( !isalpha(x[0]) ){
2308         ErrorMsg(psp->filename,psp->tokenlineno,
2309           "Symbol name missing after %%type keyword");
2310         psp->errorcnt++;
2311         psp->state = RESYNC_AFTER_DECL_ERROR;
2312       }else{
2313         struct symbol *sp = Symbol_find(x);
2314         if((sp) && (sp->datatype)){
2315           ErrorMsg(psp->filename,psp->tokenlineno,
2316             "Symbol %%type \"%s\" already defined", x);
2317           psp->errorcnt++;
2318           psp->state = RESYNC_AFTER_DECL_ERROR;
2319         }else{
2320           if (!sp){
2321             sp = Symbol_new(x);
2322           }
2323           psp->declargslot = &sp->datatype;
2324           psp->insertLineMacro = 0;
2325           psp->state = WAITING_FOR_DECL_ARG;
2326         }
2327       }
2328       break;
2329     case WAITING_FOR_PRECEDENCE_SYMBOL:
2330       if( x[0]=='.' ){
2331         psp->state = WAITING_FOR_DECL_OR_RULE;
2332       }else if( isupper(x[0]) ){
2333         struct symbol *sp;
2334         sp = Symbol_new(x);
2335         if( sp->prec>=0 ){
2336           ErrorMsg(psp->filename,psp->tokenlineno,
2337             "Symbol \"%s\" has already be given a precedence.",x);
2338           psp->errorcnt++;
2339 	}else{
2340           sp->prec = psp->preccounter;
2341           sp->assoc = psp->declassoc;
2342 	}
2343       }else{
2344         ErrorMsg(psp->filename,psp->tokenlineno,
2345           "Can't assign a precedence to \"%s\".",x);
2346         psp->errorcnt++;
2347       }
2348       break;
2349     case WAITING_FOR_DECL_ARG:
2350       if( x[0]=='{' || x[0]=='\"' || isalnum(x[0]) ){
2351         const char *zOld, *zNew;
2352         char *zBuf, *z;
2353         int nOld, n, nLine, nNew, nBack;
2354         int addLineMacro;
2355         char zLine[50];
2356         zNew = x;
2357         if( zNew[0]=='"' || zNew[0]=='{' ) zNew++;
2358         nNew = lemonStrlen(zNew);
2359         if( *psp->declargslot ){
2360           zOld = *psp->declargslot;
2361         }else{
2362           zOld = "";
2363         }
2364         nOld = lemonStrlen(zOld);
2365         n = nOld + nNew + 20;
2366         addLineMacro = !psp->gp->nolinenosflag && psp->insertLineMacro &&
2367                         (psp->decllinenoslot==0 || psp->decllinenoslot[0]!=0);
2368         if( addLineMacro ){
2369           for(z=psp->filename, nBack=0; *z; z++){
2370             if( *z=='\\' ) nBack++;
2371           }
2372           sprintf(zLine, "#line %d ", psp->tokenlineno);
2373           nLine = lemonStrlen(zLine);
2374           n += nLine + lemonStrlen(psp->filename) + nBack;
2375         }
2376         *psp->declargslot = (char *) realloc(*psp->declargslot, n);
2377         zBuf = *psp->declargslot + nOld;
2378         if( addLineMacro ){
2379           if( nOld && zBuf[-1]!='\n' ){
2380             *(zBuf++) = '\n';
2381           }
2382           memcpy(zBuf, zLine, nLine);
2383           zBuf += nLine;
2384           *(zBuf++) = '"';
2385           for(z=psp->filename; *z; z++){
2386             if( *z=='\\' ){
2387               *(zBuf++) = '\\';
2388             }
2389             *(zBuf++) = *z;
2390           }
2391           *(zBuf++) = '"';
2392           *(zBuf++) = '\n';
2393         }
2394         if( psp->decllinenoslot && psp->decllinenoslot[0]==0 ){
2395           psp->decllinenoslot[0] = psp->tokenlineno;
2396         }
2397         memcpy(zBuf, zNew, nNew);
2398         zBuf += nNew;
2399         *zBuf = 0;
2400         psp->state = WAITING_FOR_DECL_OR_RULE;
2401       }else{
2402         ErrorMsg(psp->filename,psp->tokenlineno,
2403           "Illegal argument to %%%s: %s",psp->declkeyword,x);
2404         psp->errorcnt++;
2405         psp->state = RESYNC_AFTER_DECL_ERROR;
2406       }
2407       break;
2408     case WAITING_FOR_FALLBACK_ID:
2409       if( x[0]=='.' ){
2410         psp->state = WAITING_FOR_DECL_OR_RULE;
2411       }else if( !isupper(x[0]) ){
2412         ErrorMsg(psp->filename, psp->tokenlineno,
2413           "%%fallback argument \"%s\" should be a token", x);
2414         psp->errorcnt++;
2415       }else{
2416         struct symbol *sp = Symbol_new(x);
2417         if( psp->fallback==0 ){
2418           psp->fallback = sp;
2419         }else if( sp->fallback ){
2420           ErrorMsg(psp->filename, psp->tokenlineno,
2421             "More than one fallback assigned to token %s", x);
2422           psp->errorcnt++;
2423         }else{
2424           sp->fallback = psp->fallback;
2425           psp->gp->has_fallback = 1;
2426         }
2427       }
2428       break;
2429     case WAITING_FOR_WILDCARD_ID:
2430       if( x[0]=='.' ){
2431         psp->state = WAITING_FOR_DECL_OR_RULE;
2432       }else if( !isupper(x[0]) ){
2433         ErrorMsg(psp->filename, psp->tokenlineno,
2434           "%%wildcard argument \"%s\" should be a token", x);
2435         psp->errorcnt++;
2436       }else{
2437         struct symbol *sp = Symbol_new(x);
2438         if( psp->gp->wildcard==0 ){
2439           psp->gp->wildcard = sp;
2440         }else{
2441           ErrorMsg(psp->filename, psp->tokenlineno,
2442             "Extra wildcard to token: %s", x);
2443           psp->errorcnt++;
2444         }
2445       }
2446       break;
2447     case RESYNC_AFTER_RULE_ERROR:
2448 /*      if( x[0]=='.' ) psp->state = WAITING_FOR_DECL_OR_RULE;
2449 **      break; */
2450     case RESYNC_AFTER_DECL_ERROR:
2451       if( x[0]=='.' ) psp->state = WAITING_FOR_DECL_OR_RULE;
2452       if( x[0]=='%' ) psp->state = WAITING_FOR_DECL_KEYWORD;
2453       break;
2454   }
2455 }
2456 
2457 /* Run the preprocessor over the input file text.  The global variables
2458 ** azDefine[0] through azDefine[nDefine-1] contains the names of all defined
2459 ** macros.  This routine looks for "%ifdef" and "%ifndef" and "%endif" and
2460 ** comments them out.  Text in between is also commented out as appropriate.
2461 */
preprocess_input(char * z)2462 static void preprocess_input(char *z){
2463   int i, j, k, n;
2464   int exclude = 0;
2465   int start = 0;
2466   int lineno = 1;
2467   int start_lineno = 1;
2468   for(i=0; z[i]; i++){
2469     if( z[i]=='\n' ) lineno++;
2470     if( z[i]!='%' || (i>0 && z[i-1]!='\n') ) continue;
2471     if( strncmp(&z[i],"%endif",6)==0 && isspace(z[i+6]) ){
2472       if( exclude ){
2473         exclude--;
2474         if( exclude==0 ){
2475           for(j=start; j<i; j++) if( z[j]!='\n' ) z[j] = ' ';
2476         }
2477       }
2478       for(j=i; z[j] && z[j]!='\n'; j++) z[j] = ' ';
2479     }else if( (strncmp(&z[i],"%ifdef",6)==0 && isspace(z[i+6]))
2480           || (strncmp(&z[i],"%ifndef",7)==0 && isspace(z[i+7])) ){
2481       if( exclude ){
2482         exclude++;
2483       }else{
2484         for(j=i+7; isspace(z[j]); j++){}
2485         for(n=0; z[j+n] && !isspace(z[j+n]); n++){}
2486         exclude = 1;
2487         for(k=0; k<nDefine; k++){
2488           if( strncmp(azDefine[k],&z[j],n)==0 && lemonStrlen(azDefine[k])==n ){
2489             exclude = 0;
2490             break;
2491           }
2492         }
2493         if( z[i+3]=='n' ) exclude = !exclude;
2494         if( exclude ){
2495           start = i;
2496           start_lineno = lineno;
2497         }
2498       }
2499       for(j=i; z[j] && z[j]!='\n'; j++) z[j] = ' ';
2500     }
2501   }
2502   if( exclude ){
2503     fprintf(stderr,"unterminated %%ifdef starting on line %d\n", start_lineno);
2504     exit(1);
2505   }
2506 }
2507 
2508 /* In spite of its name, this function is really a scanner.  It read
2509 ** in the entire input file (all at once) then tokenizes it.  Each
2510 ** token is passed to the function "parseonetoken" which builds all
2511 ** the appropriate data structures in the global state vector "gp".
2512 */
Parse(struct lemon * gp)2513 void Parse(struct lemon *gp)
2514 {
2515   struct pstate ps;
2516   FILE *fp;
2517   char *filebuf;
2518   int filesize;
2519   int lineno;
2520   int c;
2521   char *cp, *nextcp;
2522   int startline = 0;
2523 
2524   memset(&ps, '\0', sizeof(ps));
2525   ps.gp = gp;
2526   ps.filename = gp->filename;
2527   ps.errorcnt = 0;
2528   ps.state = INITIALIZE;
2529 
2530   /* Begin by reading the input file */
2531   fp = fopen(ps.filename,"rb");
2532   if( fp==0 ){
2533     ErrorMsg(ps.filename,0,"Can't open this file for reading.");
2534     gp->errorcnt++;
2535     return;
2536   }
2537   fseek(fp,0,2);
2538   filesize = ftell(fp);
2539   rewind(fp);
2540   filebuf = (char *)malloc( filesize+1 );
2541   if( filebuf==0 ){
2542     ErrorMsg(ps.filename,0,"Can't allocate %d of memory to hold this file.",
2543       filesize+1);
2544     gp->errorcnt++;
2545     return;
2546   }
2547   if( fread(filebuf,1,filesize,fp)!=filesize ){
2548     ErrorMsg(ps.filename,0,"Can't read in all %d bytes of this file.",
2549       filesize);
2550     free(filebuf);
2551     gp->errorcnt++;
2552     return;
2553   }
2554   fclose(fp);
2555   filebuf[filesize] = 0;
2556 
2557   /* Make an initial pass through the file to handle %ifdef and %ifndef */
2558   preprocess_input(filebuf);
2559 
2560   /* Now scan the text of the input file */
2561   lineno = 1;
2562   for(cp=filebuf; (c= *cp)!=0; ){
2563     if( c=='\n' ) lineno++;              /* Keep track of the line number */
2564     if( isspace(c) ){ cp++; continue; }  /* Skip all white space */
2565     if( c=='/' && cp[1]=='/' ){          /* Skip C++ style comments */
2566       cp+=2;
2567       while( (c= *cp)!=0 && c!='\n' ) cp++;
2568       continue;
2569     }
2570     if( c=='/' && cp[1]=='*' ){          /* Skip C style comments */
2571       cp+=2;
2572       while( (c= *cp)!=0 && (c!='/' || cp[-1]!='*') ){
2573         if( c=='\n' ) lineno++;
2574         cp++;
2575       }
2576       if( c ) cp++;
2577       continue;
2578     }
2579     ps.tokenstart = cp;                /* Mark the beginning of the token */
2580     ps.tokenlineno = lineno;           /* Linenumber on which token begins */
2581     if( c=='\"' ){                     /* String literals */
2582       cp++;
2583       while( (c= *cp)!=0 && c!='\"' ){
2584         if( c=='\n' ) lineno++;
2585         cp++;
2586       }
2587       if( c==0 ){
2588         ErrorMsg(ps.filename,startline,
2589 "String starting on this line is not terminated before the end of the file.");
2590         ps.errorcnt++;
2591         nextcp = cp;
2592       }else{
2593         nextcp = cp+1;
2594       }
2595     }else if( c=='{' ){               /* A block of C code */
2596       int level;
2597       cp++;
2598       for(level=1; (c= *cp)!=0 && (level>1 || c!='}'); cp++){
2599         if( c=='\n' ) lineno++;
2600         else if( c=='{' ) level++;
2601         else if( c=='}' ) level--;
2602         else if( c=='/' && cp[1]=='*' ){  /* Skip comments */
2603           int prevc;
2604           cp = &cp[2];
2605           prevc = 0;
2606           while( (c= *cp)!=0 && (c!='/' || prevc!='*') ){
2607             if( c=='\n' ) lineno++;
2608             prevc = c;
2609             cp++;
2610 	  }
2611 	}else if( c=='/' && cp[1]=='/' ){  /* Skip C++ style comments too */
2612           cp = &cp[2];
2613           while( (c= *cp)!=0 && c!='\n' ) cp++;
2614           if( c ) lineno++;
2615 	}else if( c=='\'' || c=='\"' ){    /* String a character literals */
2616           int startchar, prevc;
2617           startchar = c;
2618           prevc = 0;
2619           for(cp++; (c= *cp)!=0 && (c!=startchar || prevc=='\\'); cp++){
2620             if( c=='\n' ) lineno++;
2621             if( prevc=='\\' ) prevc = 0;
2622             else              prevc = c;
2623 	  }
2624 	}
2625       }
2626       if( c==0 ){
2627         ErrorMsg(ps.filename,ps.tokenlineno,
2628 "C code starting on this line is not terminated before the end of the file.");
2629         ps.errorcnt++;
2630         nextcp = cp;
2631       }else{
2632         nextcp = cp+1;
2633       }
2634     }else if( isalnum(c) ){          /* Identifiers */
2635       while( (c= *cp)!=0 && (isalnum(c) || c=='_') ) cp++;
2636       nextcp = cp;
2637     }else if( c==':' && cp[1]==':' && cp[2]=='=' ){ /* The operator "::=" */
2638       cp += 3;
2639       nextcp = cp;
2640     }else if( (c=='/' || c=='|') && isalpha(cp[1]) ){
2641       cp += 2;
2642       while( (c = *cp)!=0 && (isalnum(c) || c=='_') ) cp++;
2643       nextcp = cp;
2644     }else{                          /* All other (one character) operators */
2645       cp++;
2646       nextcp = cp;
2647     }
2648     c = *cp;
2649     *cp = 0;                        /* Null terminate the token */
2650     parseonetoken(&ps);             /* Parse the token */
2651     *cp = c;                        /* Restore the buffer */
2652     cp = nextcp;
2653   }
2654   free(filebuf);                    /* Release the buffer after parsing */
2655   gp->rule = ps.firstrule;
2656   gp->errorcnt = ps.errorcnt;
2657 }
2658 /*************************** From the file "plink.c" *********************/
2659 /*
2660 ** Routines processing configuration follow-set propagation links
2661 ** in the LEMON parser generator.
2662 */
2663 static struct plink *plink_freelist = 0;
2664 
2665 /* Allocate a new plink */
Plink_new()2666 struct plink *Plink_new(){
2667   struct plink *newlink;
2668 
2669   if( plink_freelist==0 ){
2670     int i;
2671     int amt = 100;
2672     plink_freelist = (struct plink *)calloc( amt, sizeof(struct plink) );
2673     if( plink_freelist==0 ){
2674       fprintf(stderr,
2675       "Unable to allocate memory for a new follow-set propagation link.\n");
2676       exit(1);
2677     }
2678     for(i=0; i<amt-1; i++) plink_freelist[i].next = &plink_freelist[i+1];
2679     plink_freelist[amt-1].next = 0;
2680   }
2681   newlink = plink_freelist;
2682   plink_freelist = plink_freelist->next;
2683   return newlink;
2684 }
2685 
2686 /* Add a plink to a plink list */
Plink_add(struct plink ** plpp,struct config * cfp)2687 void Plink_add(struct plink **plpp, struct config *cfp)
2688 {
2689   struct plink *newlink;
2690   newlink = Plink_new();
2691   newlink->next = *plpp;
2692   *plpp = newlink;
2693   newlink->cfp = cfp;
2694 }
2695 
2696 /* Transfer every plink on the list "from" to the list "to" */
Plink_copy(struct plink ** to,struct plink * from)2697 void Plink_copy(struct plink **to, struct plink *from)
2698 {
2699   struct plink *nextpl;
2700   while( from ){
2701     nextpl = from->next;
2702     from->next = *to;
2703     *to = from;
2704     from = nextpl;
2705   }
2706 }
2707 
2708 /* Delete every plink on the list */
Plink_delete(struct plink * plp)2709 void Plink_delete(struct plink *plp)
2710 {
2711   struct plink *nextpl;
2712 
2713   while( plp ){
2714     nextpl = plp->next;
2715     plp->next = plink_freelist;
2716     plink_freelist = plp;
2717     plp = nextpl;
2718   }
2719 }
2720 /*********************** From the file "report.c" **************************/
2721 /*
2722 ** Procedures for generating reports and tables in the LEMON parser generator.
2723 */
2724 
2725 /* Generate a filename with the given suffix.  Space to hold the
2726 ** name comes from malloc() and must be freed by the calling
2727 ** function.
2728 */
file_makename(struct lemon * lemp,const char * suffix)2729 PRIVATE char *file_makename(struct lemon *lemp, const char *suffix)
2730 {
2731   char *name;
2732   char *cp;
2733 
2734   name = (char*)malloc( lemonStrlen(lemp->filename) + lemonStrlen(suffix) + 5 );
2735   if( name==0 ){
2736     fprintf(stderr,"Can't allocate space for a filename.\n");
2737     exit(1);
2738   }
2739   strcpy(name,lemp->filename);
2740   cp = strrchr(name,'.');
2741   if( cp ) *cp = 0;
2742   strcat(name,suffix);
2743   return name;
2744 }
2745 
2746 /* Open a file with a name based on the name of the input file,
2747 ** but with a different (specified) suffix, and return a pointer
2748 ** to the stream */
file_open(struct lemon * lemp,const char * suffix,const char * mode)2749 PRIVATE FILE *file_open(
2750   struct lemon *lemp,
2751   const char *suffix,
2752   const char *mode
2753 ){
2754   FILE *fp;
2755 
2756   if( lemp->outname ) free(lemp->outname);
2757   lemp->outname = file_makename(lemp, suffix);
2758   fp = fopen(lemp->outname,mode);
2759   if( fp==0 && *mode=='w' ){
2760     fprintf(stderr,"Can't open file \"%s\".\n",lemp->outname);
2761     lemp->errorcnt++;
2762     return 0;
2763   }
2764 
2765   /* Add files we create to a list, so we can delete them if we fail. This
2766   ** is to keep makefiles from getting confused. We don't include .out files,
2767   ** though: this is debug information, and you don't want it deleted if there
2768   ** was an error you need to track down.
2769   */
2770   if(( *mode=='w' ) && (strcmp(suffix, ".out") != 0)){
2771     const char **ptr = (const char **)
2772         realloc(made_files, sizeof (const char **) * (made_files_count + 1));
2773     const char *fname = Strsafe(lemp->outname);
2774     if ((ptr == NULL) || (fname == NULL)) {
2775         free(ptr);
2776         memory_error();
2777     }
2778     made_files = ptr;
2779     made_files[made_files_count++] = fname;
2780   }
2781   return fp;
2782 }
2783 
2784 /* Duplicate the input file without comments and without actions
2785 ** on rules */
Reprint(struct lemon * lemp)2786 void Reprint(struct lemon *lemp)
2787 {
2788   struct rule *rp;
2789   struct symbol *sp;
2790   int i, j, maxlen, len, ncolumns, skip;
2791   printf("// Reprint of input file \"%s\".\n// Symbols:\n",lemp->filename);
2792   maxlen = 10;
2793   for(i=0; i<lemp->nsymbol; i++){
2794     sp = lemp->symbols[i];
2795     len = lemonStrlen(sp->name);
2796     if( len>maxlen ) maxlen = len;
2797   }
2798   ncolumns = 76/(maxlen+5);
2799   if( ncolumns<1 ) ncolumns = 1;
2800   skip = (lemp->nsymbol + ncolumns - 1)/ncolumns;
2801   for(i=0; i<skip; i++){
2802     printf("//");
2803     for(j=i; j<lemp->nsymbol; j+=skip){
2804       sp = lemp->symbols[j];
2805       assert( sp->index==j );
2806       printf(" %3d %-*.*s",j,maxlen,maxlen,sp->name);
2807     }
2808     printf("\n");
2809   }
2810   for(rp=lemp->rule; rp; rp=rp->next){
2811     printf("%s",rp->lhs->name);
2812     /*    if( rp->lhsalias ) printf("(%s)",rp->lhsalias); */
2813     printf(" ::=");
2814     for(i=0; i<rp->nrhs; i++){
2815       sp = rp->rhs[i];
2816       printf(" %s", sp->name);
2817       if( sp->type==MULTITERMINAL ){
2818         for(j=1; j<sp->nsubsym; j++){
2819           printf("|%s", sp->subsym[j]->name);
2820         }
2821       }
2822       /* if( rp->rhsalias[i] ) printf("(%s)",rp->rhsalias[i]); */
2823     }
2824     printf(".");
2825     if( rp->precsym ) printf(" [%s]",rp->precsym->name);
2826     /* if( rp->code ) printf("\n    %s",rp->code); */
2827     printf("\n");
2828   }
2829 }
2830 
ConfigPrint(FILE * fp,struct config * cfp)2831 void ConfigPrint(FILE *fp, struct config *cfp)
2832 {
2833   struct rule *rp;
2834   struct symbol *sp;
2835   int i, j;
2836   rp = cfp->rp;
2837   fprintf(fp,"%s ::=",rp->lhs->name);
2838   for(i=0; i<=rp->nrhs; i++){
2839     if( i==cfp->dot ) fprintf(fp," *");
2840     if( i==rp->nrhs ) break;
2841     sp = rp->rhs[i];
2842     fprintf(fp," %s", sp->name);
2843     if( sp->type==MULTITERMINAL ){
2844       for(j=1; j<sp->nsubsym; j++){
2845         fprintf(fp,"|%s",sp->subsym[j]->name);
2846       }
2847     }
2848   }
2849 }
2850 
2851 /* #define TEST */
2852 #if 0
2853 /* Print a set */
2854 PRIVATE void SetPrint(out,set,lemp)
2855 FILE *out;
2856 char *set;
2857 struct lemon *lemp;
2858 {
2859   int i;
2860   char *spacer;
2861   spacer = "";
2862   fprintf(out,"%12s[","");
2863   for(i=0; i<lemp->nterminal; i++){
2864     if( SetFind(set,i) ){
2865       fprintf(out,"%s%s",spacer,lemp->symbols[i]->name);
2866       spacer = " ";
2867     }
2868   }
2869   fprintf(out,"]\n");
2870 }
2871 
2872 /* Print a plink chain */
2873 PRIVATE void PlinkPrint(out,plp,tag)
2874 FILE *out;
2875 struct plink *plp;
2876 char *tag;
2877 {
2878   while( plp ){
2879     fprintf(out,"%12s%s (state %2d) ","",tag,plp->cfp->stp->statenum);
2880     ConfigPrint(out,plp->cfp);
2881     fprintf(out,"\n");
2882     plp = plp->next;
2883   }
2884 }
2885 #endif
2886 
2887 /* Print an action to the given file descriptor.  Return FALSE if
2888 ** nothing was actually printed.
2889 */
PrintAction(struct action * ap,FILE * fp,int indent)2890 int PrintAction(struct action *ap, FILE *fp, int indent){
2891   int result = 1;
2892   switch( ap->type ){
2893     case SHIFT:
2894       fprintf(fp,"%*s shift  %d",indent,ap->sp->name,ap->x.stp->statenum);
2895       break;
2896     case REDUCE:
2897       fprintf(fp,"%*s reduce %d",indent,ap->sp->name,ap->x.rp->index);
2898       break;
2899     case ACCEPT:
2900       fprintf(fp,"%*s accept",indent,ap->sp->name);
2901       break;
2902     case ERROR:
2903       fprintf(fp,"%*s error",indent,ap->sp->name);
2904       break;
2905     case SRCONFLICT:
2906     case RRCONFLICT:
2907       fprintf(fp,"%*s reduce %-3d ** Parsing conflict **",
2908         indent,ap->sp->name,ap->x.rp->index);
2909       break;
2910     case SSCONFLICT:
2911       fprintf(fp,"%*s shift  %-3d ** Parsing conflict **",
2912         indent,ap->sp->name,ap->x.stp->statenum);
2913       break;
2914     case SH_RESOLVED:
2915       if( showPrecedenceConflict ){
2916         fprintf(fp,"%*s shift  %-3d -- dropped by precedence",
2917                 indent,ap->sp->name,ap->x.stp->statenum);
2918       }else{
2919         result = 0;
2920       }
2921       break;
2922     case RD_RESOLVED:
2923       if( showPrecedenceConflict ){
2924         fprintf(fp,"%*s reduce %-3d -- dropped by precedence",
2925                 indent,ap->sp->name,ap->x.rp->index);
2926       }else{
2927         result = 0;
2928       }
2929       break;
2930     case NOT_USED:
2931       result = 0;
2932       break;
2933   }
2934   return result;
2935 }
2936 
2937 /* Generate the "y.output" log file */
ReportOutput(struct lemon * lemp)2938 void ReportOutput(struct lemon *lemp)
2939 {
2940   int i;
2941   struct state *stp;
2942   struct config *cfp;
2943   struct action *ap;
2944   FILE *fp;
2945 
2946   fp = file_open(lemp,".out","wb");
2947   if( fp==0 ) return;
2948   for(i=0; i<lemp->nstate; i++){
2949     stp = lemp->sorted[i];
2950     fprintf(fp,"State %d:\n",stp->statenum);
2951     if( lemp->basisflag ) cfp=stp->bp;
2952     else                  cfp=stp->cfp;
2953     while( cfp ){
2954       char buf[20];
2955       if( cfp->dot==cfp->rp->nrhs ){
2956         sprintf(buf,"(%d)",cfp->rp->index);
2957         fprintf(fp,"    %5s ",buf);
2958       }else{
2959         fprintf(fp,"          ");
2960       }
2961       ConfigPrint(fp,cfp);
2962       fprintf(fp,"\n");
2963 #if 0
2964       SetPrint(fp,cfp->fws,lemp);
2965       PlinkPrint(fp,cfp->fplp,"To  ");
2966       PlinkPrint(fp,cfp->bplp,"From");
2967 #endif
2968       if( lemp->basisflag ) cfp=cfp->bp;
2969       else                  cfp=cfp->next;
2970     }
2971     fprintf(fp,"\n");
2972     for(ap=stp->ap; ap; ap=ap->next){
2973       if( PrintAction(ap,fp,30) ) fprintf(fp,"\n");
2974     }
2975     fprintf(fp,"\n");
2976   }
2977   fprintf(fp, "----------------------------------------------------\n");
2978   fprintf(fp, "Symbols:\n");
2979   for(i=0; i<lemp->nsymbol; i++){
2980     int j;
2981     struct symbol *sp;
2982 
2983     sp = lemp->symbols[i];
2984     fprintf(fp, "  %3d: %s", i, sp->name);
2985     if( sp->type==NONTERMINAL ){
2986       fprintf(fp, ":");
2987       if( sp->lambda ){
2988         fprintf(fp, " <lambda>");
2989       }
2990       for(j=0; j<lemp->nterminal; j++){
2991         if( sp->firstset && SetFind(sp->firstset, j) ){
2992           fprintf(fp, " %s", lemp->symbols[j]->name);
2993         }
2994       }
2995     }
2996     fprintf(fp, "\n");
2997   }
2998   fclose(fp);
2999   return;
3000 }
3001 
3002 /* Search for the file "name" which is in the same directory as
3003 ** the exacutable */
pathsearch(char * argv0,char * name,int modemask)3004 PRIVATE char *pathsearch(char *argv0, char *name, int modemask)
3005 {
3006   const char *pathlist;
3007   char *pathbufptr;
3008   char *pathbuf;
3009   char *path,*cp;
3010   char c;
3011 
3012 #ifdef __WIN32__
3013   cp = strrchr(argv0,'\\');
3014 #else
3015   cp = strrchr(argv0,'/');
3016 #endif
3017   if( cp ){
3018     c = *cp;
3019     *cp = 0;
3020     path = (char *)malloc( lemonStrlen(argv0) + lemonStrlen(name) + 2 );
3021     if( path ) sprintf(path,"%s/%s",argv0,name);
3022     *cp = c;
3023   }else{
3024     pathlist = getenv("PATH");
3025     if( pathlist==0 ) pathlist = ".:/bin:/usr/bin";
3026     pathbuf = (char *) malloc( lemonStrlen(pathlist) + 1 );
3027     path = (char *)malloc( lemonStrlen(pathlist)+lemonStrlen(name)+2 );
3028     if( (pathbuf != 0) && (path!=0) ){
3029       pathbufptr = pathbuf;
3030       strcpy(pathbuf, pathlist);
3031       while( *pathbuf ){
3032         cp = strchr(pathbuf,':');
3033         if( cp==0 ) cp = &pathbuf[lemonStrlen(pathbuf)];
3034         c = *cp;
3035         *cp = 0;
3036         sprintf(path,"%s/%s",pathbuf,name);
3037         *cp = c;
3038         if( c==0 ) pathbuf[0] = 0;
3039         else pathbuf = &cp[1];
3040         if( access(path,modemask)==0 ) break;
3041       }
3042       free(pathbufptr);
3043     }
3044   }
3045   return path;
3046 }
3047 
3048 /* Given an action, compute the integer value for that action
3049 ** which is to be put in the action table of the generated machine.
3050 ** Return negative if no action should be generated.
3051 */
compute_action(struct lemon * lemp,struct action * ap)3052 PRIVATE int compute_action(struct lemon *lemp, struct action *ap)
3053 {
3054   int act;
3055   switch( ap->type ){
3056     case SHIFT:  act = ap->x.stp->statenum;            break;
3057     case REDUCE: act = ap->x.rp->index + lemp->nstate; break;
3058     case ERROR:  act = lemp->nstate + lemp->nrule;     break;
3059     case ACCEPT: act = lemp->nstate + lemp->nrule + 1; break;
3060     default:     act = -1; break;
3061   }
3062   return act;
3063 }
3064 
3065 #define LINESIZE 1000
3066 /* The next cluster of routines are for reading the template file
3067 ** and writing the results to the generated parser */
3068 /* The first function transfers data from "in" to "out" until
3069 ** a line is seen which begins with "%%".  The line number is
3070 ** tracked.
3071 **
3072 ** if name!=0, then any word that begin with "Parse" is changed to
3073 ** begin with *name instead.
3074 */
tplt_xfer(char * name,FILE * in,FILE * out,int * lineno)3075 PRIVATE void tplt_xfer(char *name, FILE *in, FILE *out, int *lineno)
3076 {
3077   int i, iStart;
3078   char line[LINESIZE];
3079   while( fgets(line,LINESIZE,in) && (line[0]!='%' || line[1]!='%') ){
3080     (*lineno)++;
3081     iStart = 0;
3082     if( name ){
3083       for(i=0; line[i]; i++){
3084         if( line[i]=='P' && strncmp(&line[i],"Parse",5)==0
3085           && (i==0 || !isalpha(line[i-1]))
3086         ){
3087           if( i>iStart ) fprintf(out,"%.*s",i-iStart,&line[iStart]);
3088           fprintf(out,"%s",name);
3089           i += 4;
3090           iStart = i+1;
3091         }
3092       }
3093     }
3094     fprintf(out,"%s",&line[iStart]);
3095   }
3096 }
3097 
3098 /* The next function finds the template file and opens it, returning
3099 ** a pointer to the opened file. */
tplt_open(struct lemon * lemp)3100 PRIVATE FILE *tplt_open(struct lemon *lemp)
3101 {
3102   static char templatename[] = "lempar.c";
3103   char buf[1000];
3104   FILE *in;
3105   char *tpltname;
3106   char *cp;
3107 
3108   /* first, see if user specified a template filename on the command line. */
3109   if (user_templatename != 0) {
3110     if( access(user_templatename,004)==-1 ){
3111       fprintf(stderr,"Can't find the parser driver template file \"%s\".\n",
3112         user_templatename);
3113       lemp->errorcnt++;
3114       return 0;
3115     }
3116     in = fopen(user_templatename,"rb");
3117     if( in==0 ){
3118       fprintf(stderr,"Can't open the template file \"%s\".\n",user_templatename);
3119       lemp->errorcnt++;
3120       return 0;
3121     }
3122     return in;
3123   }
3124 
3125   cp = strrchr(lemp->filename,'.');
3126   if( cp ){
3127     sprintf(buf,"%.*s.lt",(int)(cp-lemp->filename),lemp->filename);
3128   }else{
3129     sprintf(buf,"%s.lt",lemp->filename);
3130   }
3131   if( access(buf,004)==0 ){
3132     tpltname = buf;
3133   }else if( access(templatename,004)==0 ){
3134     tpltname = templatename;
3135   }else{
3136     tpltname = pathsearch(lemp->argv0,templatename,0);
3137   }
3138   if( tpltname==0 ){
3139     fprintf(stderr,"Can't find the parser driver template file \"%s\".\n",
3140     templatename);
3141     lemp->errorcnt++;
3142     return 0;
3143   }
3144   in = fopen(tpltname,"rb");
3145   if( in==0 ){
3146     fprintf(stderr,"Can't open the template file \"%s\".\n",templatename);
3147     lemp->errorcnt++;
3148     return 0;
3149   }
3150   return in;
3151 }
3152 
3153 /* Print a #line directive line to the output file. */
tplt_linedir(FILE * out,int lineno,char * filename)3154 PRIVATE void tplt_linedir(FILE *out, int lineno, char *filename)
3155 {
3156   fprintf(out,"#line %d \"",lineno);
3157   while( *filename ){
3158     if( *filename == '\\' ) putc('\\',out);
3159     putc(*filename,out);
3160     filename++;
3161   }
3162   fprintf(out,"\"\n");
3163 }
3164 
3165 /* Print a string to the file and keep the linenumber up to date */
tplt_print(FILE * out,struct lemon * lemp,char * str,int * lineno)3166 PRIVATE void tplt_print(FILE *out, struct lemon *lemp, char *str, int *lineno)
3167 {
3168   if( str==0 ) return;
3169   while( *str ){
3170     putc(*str,out);
3171     if( *str=='\n' ) (*lineno)++;
3172     str++;
3173   }
3174   if( str[-1]!='\n' ){
3175     putc('\n',out);
3176     (*lineno)++;
3177   }
3178   if (!lemp->nolinenosflag) {
3179     (*lineno)++; tplt_linedir(out,*lineno,lemp->outname);
3180   }
3181   return;
3182 }
3183 
3184 /*
3185 ** The following routine emits code for the destructor for the
3186 ** symbol sp
3187 */
emit_destructor_code(FILE * out,struct symbol * sp,struct lemon * lemp,int * lineno)3188 void emit_destructor_code(
3189   FILE *out,
3190   struct symbol *sp,
3191   struct lemon *lemp,
3192   int *lineno
3193 ){
3194  char *cp = 0;
3195 
3196  if( sp->type==TERMINAL ){
3197    cp = lemp->tokendest;
3198    if( cp==0 ) return;
3199    fprintf(out,"{\n"); (*lineno)++;
3200  }else if( sp->destructor ){
3201    cp = sp->destructor;
3202    fprintf(out,"{\n"); (*lineno)++;
3203    if (!lemp->nolinenosflag) { (*lineno)++; tplt_linedir(out,sp->destLineno,lemp->filename); }
3204  }else if( lemp->vardest ){
3205    cp = lemp->vardest;
3206    if( cp==0 ) return;
3207    fprintf(out,"{\n"); (*lineno)++;
3208  }else{
3209    assert( 0 );  /* Cannot happen */
3210  }
3211  for(; *cp; cp++){
3212    if( *cp=='$' && cp[1]=='$' ){
3213      fprintf(out,"(yypminor->yy%d)",sp->dtnum);
3214      cp++;
3215      continue;
3216    }
3217    if( *cp=='\n' ) (*lineno)++;
3218    fputc(*cp,out);
3219  }
3220  fprintf(out,"\n"); (*lineno)++;
3221  if (!lemp->nolinenosflag) {
3222    (*lineno)++; tplt_linedir(out,*lineno,lemp->outname);
3223  }
3224  fprintf(out,"}\n"); (*lineno)++;
3225  return;
3226 }
3227 
3228 /*
3229 ** Return TRUE (non-zero) if the given symbol has a destructor.
3230 */
has_destructor(struct symbol * sp,struct lemon * lemp)3231 int has_destructor(struct symbol *sp, struct lemon *lemp)
3232 {
3233   int ret;
3234   if( sp->type==TERMINAL ){
3235     ret = lemp->tokendest!=0;
3236   }else{
3237     ret = lemp->vardest!=0 || sp->destructor!=0;
3238   }
3239   return ret;
3240 }
3241 
3242 /*
3243 ** Append text to a dynamically allocated string.  If zText is 0 then
3244 ** reset the string to be empty again.  Always return the complete text
3245 ** of the string (which is overwritten with each call).
3246 **
3247 ** n bytes of zText are stored.  If n==0 then all of zText up to the first
3248 ** \000 terminator is stored.  zText can contain up to two instances of
3249 ** %d.  The values of p1 and p2 are written into the first and second
3250 ** %d.
3251 **
3252 ** If n==-1, then the previous character is overwritten.
3253 */
append_str(const char * zText,int n,int p1,int p2)3254 PRIVATE char *append_str(const char *zText, int n, int p1, int p2){
3255   static char empty[1] = { 0 };
3256   static char *z = 0;
3257   static int alloced = 0;
3258   static int used = 0;
3259   int c;
3260   char zInt[40];
3261   if( zText==0 ){
3262     used = 0;
3263     return z;
3264   }
3265   if( n<=0 ){
3266     if( n<0 ){
3267       used += n;
3268       assert( used>=0 );
3269     }
3270     n = lemonStrlen(zText);
3271   }
3272   if( (int) (n+sizeof(zInt)*2+used) >= alloced ){
3273     alloced = n + sizeof(zInt)*2 + used + 200;
3274     z = (char *) realloc(z,  alloced);
3275   }
3276   if( z==0 ) return empty;
3277   while( n-- > 0 ){
3278     c = *(zText++);
3279     if( c=='%' && n>0 && zText[0]=='d' ){
3280       sprintf(zInt, "%d", p1);
3281       p1 = p2;
3282       strcpy(&z[used], zInt);
3283       used += lemonStrlen(&z[used]);
3284       zText++;
3285       n--;
3286     }else{
3287       z[used++] = c;
3288     }
3289   }
3290   z[used] = 0;
3291   return z;
3292 }
3293 
3294 /*
3295 ** zCode is a string that is the action associated with a rule.  Expand
3296 ** the symbols in this string so that the refer to elements of the parser
3297 ** stack.
3298 */
translate_code(struct lemon * lemp,struct rule * rp)3299 PRIVATE void translate_code(struct lemon *lemp, struct rule *rp){
3300   char *cp, *xp;
3301   int i;
3302   char lhsused = 0;    /* True if the LHS element has been used */
3303   char used[MAXRHS];   /* True for each RHS element which is used */
3304 
3305   for(i=0; i<rp->nrhs; i++) used[i] = 0;
3306   lhsused = 0;
3307 
3308   if( rp->code==0 ){
3309     static char newlinestr[2] = { '\n', '\0' };
3310     rp->code = newlinestr;
3311     rp->line = rp->ruleline;
3312   }
3313 
3314   append_str(0,0,0,0);
3315 
3316   /* This const cast is wrong but harmless, if we're careful. */
3317   for(cp=(char *)rp->code; *cp; cp++){
3318     if( isalpha(*cp) && (cp==rp->code || (!isalnum(cp[-1]) && cp[-1]!='_')) ){
3319       char saved;
3320       for(xp= &cp[1]; isalnum(*xp) || *xp=='_'; xp++);
3321       saved = *xp;
3322       *xp = 0;
3323       if( rp->lhsalias && strcmp(cp,rp->lhsalias)==0 ){
3324         append_str("yygotominor.yy%d",0,rp->lhs->dtnum,0);
3325         cp = xp;
3326         lhsused = 1;
3327       }else{
3328         for(i=0; i<rp->nrhs; i++){
3329           if( rp->rhsalias[i] && strcmp(cp,rp->rhsalias[i])==0 ){
3330             if( cp!=rp->code && cp[-1]=='@' ){
3331               /* If the argument is of the form @X then substituted
3332               ** the token number of X, not the value of X */
3333               append_str("yymsp[%d].major",-1,i-rp->nrhs+1,0);
3334             }else{
3335               struct symbol *sp = rp->rhs[i];
3336               int dtnum;
3337               if( sp->type==MULTITERMINAL ){
3338                 dtnum = sp->subsym[0]->dtnum;
3339               }else{
3340                 dtnum = sp->dtnum;
3341               }
3342               append_str("yymsp[%d].minor.yy%d",0,i-rp->nrhs+1, dtnum);
3343             }
3344             cp = xp;
3345             used[i] = 1;
3346             break;
3347           }
3348         }
3349       }
3350       *xp = saved;
3351     }
3352     append_str(cp, 1, 0, 0);
3353   } /* End loop */
3354 
3355   /* Check to make sure the LHS has been used */
3356   if( rp->lhsalias && !lhsused ){
3357     ErrorMsg(lemp->filename,rp->ruleline,
3358       "Label \"%s\" for \"%s(%s)\" is never used.",
3359         rp->lhsalias,rp->lhs->name,rp->lhsalias);
3360     lemp->errorcnt++;
3361   }
3362 
3363   /* Generate destructor code for RHS symbols which are not used in the
3364   ** reduce code */
3365   for(i=0; i<rp->nrhs; i++){
3366     if( rp->rhsalias[i] && !used[i] ){
3367       ErrorMsg(lemp->filename,rp->ruleline,
3368         "Label %s for \"%s(%s)\" is never used.",
3369         rp->rhsalias[i],rp->rhs[i]->name,rp->rhsalias[i]);
3370       lemp->errorcnt++;
3371     }else if( rp->rhsalias[i]==0 ){
3372       if( has_destructor(rp->rhs[i],lemp) ){
3373         append_str("  yy_destructor(yypParser,%d,&yymsp[%d].minor);\n", 0,
3374            rp->rhs[i]->index,i-rp->nrhs+1);
3375       }else{
3376         /* No destructor defined for this term */
3377       }
3378     }
3379   }
3380   if( rp->code ){
3381     cp = append_str(0,0,0,0);
3382     rp->code = Strsafe(cp?cp:"");
3383   }
3384 }
3385 
3386 /*
3387 ** Generate code which executes when the rule "rp" is reduced.  Write
3388 ** the code to "out".  Make sure lineno stays up-to-date.
3389 */
emit_code(FILE * out,struct rule * rp,struct lemon * lemp,int * lineno)3390 PRIVATE void emit_code(
3391   FILE *out,
3392   struct rule *rp,
3393   struct lemon *lemp,
3394   int *lineno
3395 ){
3396  const char *cp;
3397 
3398  /* Generate code to do the reduce action */
3399  if( rp->code ){
3400    if (!lemp->nolinenosflag) { (*lineno)++; tplt_linedir(out,rp->line,lemp->filename); }
3401    fprintf(out,"{%s",rp->code);
3402    for(cp=rp->code; *cp; cp++){
3403      if( *cp=='\n' ) (*lineno)++;
3404    } /* End loop */
3405    fprintf(out,"}\n"); (*lineno)++;
3406    if (!lemp->nolinenosflag) { (*lineno)++; tplt_linedir(out,*lineno,lemp->outname); }
3407  } /* End if( rp->code ) */
3408 
3409  return;
3410 }
3411 
3412 /*
3413 ** Print the definition of the union used for the parser's data stack.
3414 ** This union contains fields for every possible data type for tokens
3415 ** and nonterminals.  In the process of computing and printing this
3416 ** union, also set the ".dtnum" field of every terminal and nonterminal
3417 ** symbol.
3418 */
print_stack_union(FILE * out,struct lemon * lemp,int * plineno,int mhflag)3419 void print_stack_union(
3420   FILE *out,                  /* The output stream */
3421   struct lemon *lemp,         /* The main info structure for this parser */
3422   int *plineno,               /* Pointer to the line number */
3423   int mhflag                  /* True if generating makeheaders output */
3424 ){
3425   int lineno = *plineno;    /* The line number of the output */
3426   char **types;             /* A hash table of datatypes */
3427   int arraysize;            /* Size of the "types" array */
3428   int maxdtlength;          /* Maximum length of any ".datatype" field. */
3429   char *stddt;              /* Standardized name for a datatype */
3430   int i,j;                  /* Loop counters */
3431   int hash;                 /* For hashing the name of a type */
3432   const char *name;         /* Name of the parser */
3433 
3434   /* Allocate and initialize types[] and allocate stddt[] */
3435   arraysize = lemp->nsymbol * 2;
3436   types = (char**)calloc( arraysize, sizeof(char*) );
3437   for(i=0; i<arraysize; i++) types[i] = 0;
3438   maxdtlength = 0;
3439   if( lemp->vartype ){
3440     maxdtlength = lemonStrlen(lemp->vartype);
3441   }
3442   for(i=0; i<lemp->nsymbol; i++){
3443     int len;
3444     struct symbol *sp = lemp->symbols[i];
3445     if( sp->datatype==0 ) continue;
3446     len = lemonStrlen(sp->datatype);
3447     if( len>maxdtlength ) maxdtlength = len;
3448   }
3449   stddt = (char*)malloc( maxdtlength*2 + 1 );
3450   if( types==0 || stddt==0 ){
3451     fprintf(stderr,"Out of memory.\n");
3452     exit(1);
3453   }
3454 
3455   /* Build a hash table of datatypes. The ".dtnum" field of each symbol
3456   ** is filled in with the hash index plus 1.  A ".dtnum" value of 0 is
3457   ** used for terminal symbols.  If there is no %default_type defined then
3458   ** 0 is also used as the .dtnum value for nonterminals which do not specify
3459   ** a datatype using the %type directive.
3460   */
3461   for(i=0; i<lemp->nsymbol; i++){
3462     struct symbol *sp = lemp->symbols[i];
3463     char *cp;
3464     if( sp==lemp->errsym ){
3465       sp->dtnum = arraysize+1;
3466       continue;
3467     }
3468     if( sp->type!=NONTERMINAL || (sp->datatype==0 && lemp->vartype==0) ){
3469       sp->dtnum = 0;
3470       continue;
3471     }
3472     cp = sp->datatype;
3473     if( cp==0 ) cp = lemp->vartype;
3474     j = 0;
3475     while( isspace(*cp) ) cp++;
3476     while( *cp ) stddt[j++] = *cp++;
3477     while( j>0 && isspace(stddt[j-1]) ) j--;
3478     stddt[j] = 0;
3479     if( lemp->tokentype && strcmp(stddt, lemp->tokentype)==0 ){
3480       sp->dtnum = 0;
3481       continue;
3482     }
3483     hash = 0;
3484     for(j=0; stddt[j]; j++){
3485       hash = hash*53 + stddt[j];
3486     }
3487     hash = (hash & 0x7fffffff)%arraysize;
3488     while( types[hash] ){
3489       if( strcmp(types[hash],stddt)==0 ){
3490         sp->dtnum = hash + 1;
3491         break;
3492       }
3493       hash++;
3494       if( hash>=arraysize ) hash = 0;
3495     }
3496     if( types[hash]==0 ){
3497       sp->dtnum = hash + 1;
3498       types[hash] = (char*)malloc( lemonStrlen(stddt)+1 );
3499       if( types[hash]==0 ){
3500         fprintf(stderr,"Out of memory.\n");
3501         exit(1);
3502       }
3503       strcpy(types[hash],stddt);
3504     }
3505   }
3506 
3507   /* Print out the definition of YYTOKENTYPE and YYMINORTYPE */
3508   name = lemp->name ? lemp->name : "Parse";
3509   lineno = *plineno;
3510   if( mhflag ){ fprintf(out,"#if INTERFACE\n"); lineno++; }
3511   fprintf(out,"#define %sTOKENTYPE %s\n",name,
3512     lemp->tokentype?lemp->tokentype:"void*");  lineno++;
3513   if( mhflag ){ fprintf(out,"#endif\n"); lineno++; }
3514   fprintf(out,"typedef union {\n"); lineno++;
3515   fprintf(out,"  int yyinit;\n"); lineno++;
3516   fprintf(out,"  %sTOKENTYPE yy0;\n",name); lineno++;
3517   for(i=0; i<arraysize; i++){
3518     if( types[i]==0 ) continue;
3519     fprintf(out,"  %s yy%d;\n",types[i],i+1); lineno++;
3520     free(types[i]);
3521   }
3522   if( lemp->errsym->useCnt ){
3523     fprintf(out,"  int yy%d;\n",lemp->errsym->dtnum); lineno++;
3524   }
3525   free(stddt);
3526   free(types);
3527   fprintf(out,"} YYMINORTYPE;\n"); lineno++;
3528   *plineno = lineno;
3529 }
3530 
3531 /*
3532 ** Return the name of a C datatype able to represent values between
3533 ** lwr and upr, inclusive.
3534 */
minimum_size_type(int lwr,int upr)3535 static const char *minimum_size_type(int lwr, int upr){
3536   if( lwr>=0 ){
3537     if( upr<=255 ){
3538       return "unsigned char";
3539     }else if( upr<65535 ){
3540       return "unsigned short int";
3541     }else{
3542       return "unsigned int";
3543     }
3544   }else if( lwr>=-127 && upr<=127 ){
3545     return "signed char";
3546   }else if( lwr>=-32767 && upr<32767 ){
3547     return "short";
3548   }else{
3549     return "int";
3550   }
3551 }
3552 
3553 /*
3554 ** Each state contains a set of token transaction and a set of
3555 ** nonterminal transactions.  Each of these sets makes an instance
3556 ** of the following structure.  An array of these structures is used
3557 ** to order the creation of entries in the yy_action[] table.
3558 */
3559 struct axset {
3560   struct state *stp;   /* A pointer to a state */
3561   int isTkn;           /* True to use tokens.  False for non-terminals */
3562   int nAction;         /* Number of actions */
3563   int iOrder;          /* Original order of action sets */
3564 };
3565 
3566 /*
3567 ** Compare to axset structures for sorting purposes
3568 */
axset_compare(const void * a,const void * b)3569 static int axset_compare(const void *a, const void *b){
3570   struct axset *p1 = (struct axset*)a;
3571   struct axset *p2 = (struct axset*)b;
3572   int c;
3573   c = p2->nAction - p1->nAction;
3574   if( c==0 ){
3575     c = p2->iOrder - p1->iOrder;
3576   }
3577   assert( c!=0 || p1==p2 );
3578   return c;
3579 }
3580 
3581 /*
3582 ** Write text on "out" that describes the rule "rp".
3583 */
writeRuleText(FILE * out,struct rule * rp)3584 static void writeRuleText(FILE *out, struct rule *rp){
3585   int j;
3586   fprintf(out,"%s ::=", rp->lhs->name);
3587   for(j=0; j<rp->nrhs; j++){
3588     struct symbol *sp = rp->rhs[j];
3589     fprintf(out," %s", sp->name);
3590     if( sp->type==MULTITERMINAL ){
3591       int k;
3592       for(k=1; k<sp->nsubsym; k++){
3593         fprintf(out,"|%s",sp->subsym[k]->name);
3594       }
3595     }
3596   }
3597 }
3598 
3599 
3600 /* Generate C source code for the parser */
ReportTable(struct lemon * lemp,int mhflag)3601 void ReportTable(
3602   struct lemon *lemp,
3603   int mhflag     /* Output in makeheaders format if true */
3604 ){
3605   FILE *out, *in;
3606   char line[LINESIZE];
3607   int  lineno;
3608   struct state *stp;
3609   struct action *ap;
3610   struct rule *rp;
3611   struct acttab *pActtab;
3612   int i, j, n;
3613   const char *name;
3614   int mnTknOfst, mxTknOfst;
3615   int mnNtOfst, mxNtOfst;
3616   struct axset *ax;
3617 
3618   in = tplt_open(lemp);
3619   if( in==0 ) return;
3620   out = file_open(lemp,".c","wb");
3621   if( out==0 ){
3622     fclose(in);
3623     return;
3624   }
3625   lineno = 1;
3626   tplt_xfer(lemp->name,in,out,&lineno);
3627 
3628   /* Generate the include code, if any */
3629   tplt_print(out,lemp,lemp->include,&lineno);
3630   if( mhflag ){
3631     char *name = file_makename(lemp, ".h");
3632     fprintf(out,"#include \"%s\"\n", name); lineno++;
3633     free(name);
3634   }
3635   tplt_xfer(lemp->name,in,out,&lineno);
3636 
3637   /* Generate #defines for all tokens */
3638   if( mhflag ){
3639     const char *prefix;
3640     fprintf(out,"#if INTERFACE\n"); lineno++;
3641     if( lemp->tokenprefix ) prefix = lemp->tokenprefix;
3642     else                    prefix = "";
3643     for(i=1; i<lemp->nterminal; i++){
3644       fprintf(out,"#define %s%-30s %2d\n",prefix,lemp->symbols[i]->name,i);
3645       lineno++;
3646     }
3647     fprintf(out,"#endif\n"); lineno++;
3648   }
3649   tplt_xfer(lemp->name,in,out,&lineno);
3650 
3651   /* Generate the defines */
3652   fprintf(out,"#define YYCODETYPE %s\n",
3653     minimum_size_type(0, lemp->nsymbol+1)); lineno++;
3654   fprintf(out,"#define YYNOCODE %d\n",lemp->nsymbol+1);  lineno++;
3655   fprintf(out,"#define YYACTIONTYPE %s\n",
3656     minimum_size_type(0, lemp->nstate+lemp->nrule+5));  lineno++;
3657   if( lemp->wildcard ){
3658     fprintf(out,"#define YYWILDCARD %d\n",
3659        lemp->wildcard->index); lineno++;
3660   }
3661   print_stack_union(out,lemp,&lineno,mhflag);
3662   fprintf(out, "#ifndef YYSTACKDEPTH\n"); lineno++;
3663   if( lemp->stacksize ){
3664     fprintf(out,"#define YYSTACKDEPTH %s\n",lemp->stacksize);  lineno++;
3665   }else{
3666     fprintf(out,"#define YYSTACKDEPTH 100\n");  lineno++;
3667   }
3668   fprintf(out, "#endif\n"); lineno++;
3669   if( mhflag ){
3670     fprintf(out,"#if INTERFACE\n"); lineno++;
3671   }
3672   name = lemp->name ? lemp->name : "Parse";
3673   if( lemp->arg && lemp->arg[0] ){
3674     int i;
3675     i = lemonStrlen(lemp->arg);
3676     while( i>=1 && isspace(lemp->arg[i-1]) ) i--;
3677     while( i>=1 && (isalnum(lemp->arg[i-1]) || lemp->arg[i-1]=='_') ) i--;
3678     fprintf(out,"#define %sARG_SDECL %s;\n",name,lemp->arg);  lineno++;
3679     fprintf(out,"#define %sARG_PDECL ,%s\n",name,lemp->arg);  lineno++;
3680     fprintf(out,"#define %sARG_FETCH %s = yypParser->%s\n",
3681                  name,lemp->arg,&lemp->arg[i]);  lineno++;
3682     fprintf(out,"#define %sARG_STORE yypParser->%s = %s\n",
3683                  name,&lemp->arg[i],&lemp->arg[i]);  lineno++;
3684   }else{
3685     fprintf(out,"#define %sARG_SDECL\n",name);  lineno++;
3686     fprintf(out,"#define %sARG_PDECL\n",name);  lineno++;
3687     fprintf(out,"#define %sARG_FETCH\n",name); lineno++;
3688     fprintf(out,"#define %sARG_STORE\n",name); lineno++;
3689   }
3690   if( mhflag ){
3691     fprintf(out,"#endif\n"); lineno++;
3692   }
3693   fprintf(out,"#define YYNSTATE %d\n",lemp->nstate);  lineno++;
3694   fprintf(out,"#define YYNRULE %d\n",lemp->nrule);  lineno++;
3695   if( lemp->errsym->useCnt ){
3696     fprintf(out,"#define YYERRORSYMBOL %d\n",lemp->errsym->index);  lineno++;
3697     fprintf(out,"#define YYERRSYMDT yy%d\n",lemp->errsym->dtnum);  lineno++;
3698   }
3699   if( lemp->has_fallback ){
3700     fprintf(out,"#define YYFALLBACK 1\n");  lineno++;
3701   }
3702   tplt_xfer(lemp->name,in,out,&lineno);
3703 
3704   /* Generate the action table and its associates:
3705   **
3706   **  yy_action[]        A single table containing all actions.
3707   **  yy_lookahead[]     A table containing the lookahead for each entry in
3708   **                     yy_action.  Used to detect hash collisions.
3709   **  yy_shift_ofst[]    For each state, the offset into yy_action for
3710   **                     shifting terminals.
3711   **  yy_reduce_ofst[]   For each state, the offset into yy_action for
3712   **                     shifting non-terminals after a reduce.
3713   **  yy_default[]       Default action for each state.
3714   */
3715 
3716   /* Compute the actions on all states and count them up */
3717   ax = (struct axset *) calloc(lemp->nstate*2, sizeof(ax[0]));
3718   if( ax==0 ){
3719     fprintf(stderr,"malloc failed\n");
3720     exit(1);
3721   }
3722   for(i=0; i<lemp->nstate; i++){
3723     stp = lemp->sorted[i];
3724     ax[i*2].stp = stp;
3725     ax[i*2].isTkn = 1;
3726     ax[i*2].nAction = stp->nTknAct;
3727     ax[i*2+1].stp = stp;
3728     ax[i*2+1].isTkn = 0;
3729     ax[i*2+1].nAction = stp->nNtAct;
3730   }
3731   mxTknOfst = mnTknOfst = 0;
3732   mxNtOfst = mnNtOfst = 0;
3733 
3734   /* Compute the action table.  In order to try to keep the size of the
3735   ** action table to a minimum, the heuristic of placing the largest action
3736   ** sets first is used.
3737   */
3738   for(i=0; i<lemp->nstate*2; i++) ax[i].iOrder = i;
3739   qsort(ax, lemp->nstate*2, sizeof(ax[0]), axset_compare);
3740   pActtab = acttab_alloc();
3741   for(i=0; i<lemp->nstate*2 && ax[i].nAction>0; i++){
3742     stp = ax[i].stp;
3743     if( ax[i].isTkn ){
3744       for(ap=stp->ap; ap; ap=ap->next){
3745         int action;
3746         if( ap->sp->index>=lemp->nterminal ) continue;
3747         action = compute_action(lemp, ap);
3748         if( action<0 ) continue;
3749         acttab_action(pActtab, ap->sp->index, action);
3750       }
3751       stp->iTknOfst = acttab_insert(pActtab);
3752       if( stp->iTknOfst<mnTknOfst ) mnTknOfst = stp->iTknOfst;
3753       if( stp->iTknOfst>mxTknOfst ) mxTknOfst = stp->iTknOfst;
3754     }else{
3755       for(ap=stp->ap; ap; ap=ap->next){
3756         int action;
3757         if( ap->sp->index<lemp->nterminal ) continue;
3758         if( ap->sp->index==lemp->nsymbol ) continue;
3759         action = compute_action(lemp, ap);
3760         if( action<0 ) continue;
3761         acttab_action(pActtab, ap->sp->index, action);
3762       }
3763       stp->iNtOfst = acttab_insert(pActtab);
3764       if( stp->iNtOfst<mnNtOfst ) mnNtOfst = stp->iNtOfst;
3765       if( stp->iNtOfst>mxNtOfst ) mxNtOfst = stp->iNtOfst;
3766     }
3767   }
3768   free(ax);
3769 
3770   /* Output the yy_action table */
3771   n = acttab_size(pActtab);
3772   fprintf(out,"#define YY_ACTTAB_COUNT (%d)\n", n); lineno++;
3773   fprintf(out,"static const YYACTIONTYPE yy_action[] = {\n"); lineno++;
3774   for(i=j=0; i<n; i++){
3775     int action = acttab_yyaction(pActtab, i);
3776     if( action<0 ) action = lemp->nstate + lemp->nrule + 2;
3777     if( j==0 ) fprintf(out," /* %5d */ ", i);
3778     fprintf(out, " %4d,", action);
3779     if( j==9 || i==n-1 ){
3780       fprintf(out, "\n"); lineno++;
3781       j = 0;
3782     }else{
3783       j++;
3784     }
3785   }
3786   fprintf(out, "};\n"); lineno++;
3787 
3788   /* Output the yy_lookahead table */
3789   fprintf(out,"static const YYCODETYPE yy_lookahead[] = {\n"); lineno++;
3790   for(i=j=0; i<n; i++){
3791     int la = acttab_yylookahead(pActtab, i);
3792     if( la<0 ) la = lemp->nsymbol;
3793     if( j==0 ) fprintf(out," /* %5d */ ", i);
3794     fprintf(out, " %4d,", la);
3795     if( j==9 || i==n-1 ){
3796       fprintf(out, "\n"); lineno++;
3797       j = 0;
3798     }else{
3799       j++;
3800     }
3801   }
3802   fprintf(out, "};\n"); lineno++;
3803 
3804   /* Output the yy_shift_ofst[] table */
3805   fprintf(out, "#define YY_SHIFT_USE_DFLT (%d)\n", mnTknOfst-1); lineno++;
3806   n = lemp->nstate;
3807   while( n>0 && lemp->sorted[n-1]->iTknOfst==NO_OFFSET ) n--;
3808   fprintf(out, "#define YY_SHIFT_COUNT (%d)\n", n-1); lineno++;
3809   fprintf(out, "#define YY_SHIFT_MIN   (%d)\n", mnTknOfst); lineno++;
3810   fprintf(out, "#define YY_SHIFT_MAX   (%d)\n", mxTknOfst); lineno++;
3811   fprintf(out, "static const %s yy_shift_ofst[] = {\n",
3812           minimum_size_type(mnTknOfst-1, mxTknOfst)); lineno++;
3813   for(i=j=0; i<n; i++){
3814     int ofst;
3815     stp = lemp->sorted[i];
3816     ofst = stp->iTknOfst;
3817     if( ofst==NO_OFFSET ) ofst = mnTknOfst - 1;
3818     if( j==0 ) fprintf(out," /* %5d */ ", i);
3819     fprintf(out, " %4d,", ofst);
3820     if( j==9 || i==n-1 ){
3821       fprintf(out, "\n"); lineno++;
3822       j = 0;
3823     }else{
3824       j++;
3825     }
3826   }
3827   fprintf(out, "};\n"); lineno++;
3828 
3829   /* Output the yy_reduce_ofst[] table */
3830   fprintf(out, "#define YY_REDUCE_USE_DFLT (%d)\n", mnNtOfst-1); lineno++;
3831   n = lemp->nstate;
3832   while( n>0 && lemp->sorted[n-1]->iNtOfst==NO_OFFSET ) n--;
3833   fprintf(out, "#define YY_REDUCE_COUNT (%d)\n", n-1); lineno++;
3834   fprintf(out, "#define YY_REDUCE_MIN   (%d)\n", mnNtOfst); lineno++;
3835   fprintf(out, "#define YY_REDUCE_MAX   (%d)\n", mxNtOfst); lineno++;
3836   fprintf(out, "static const %s yy_reduce_ofst[] = {\n",
3837           minimum_size_type(mnNtOfst-1, mxNtOfst)); lineno++;
3838   for(i=j=0; i<n; i++){
3839     int ofst;
3840     stp = lemp->sorted[i];
3841     ofst = stp->iNtOfst;
3842     if( ofst==NO_OFFSET ) ofst = mnNtOfst - 1;
3843     if( j==0 ) fprintf(out," /* %5d */ ", i);
3844     fprintf(out, " %4d,", ofst);
3845     if( j==9 || i==n-1 ){
3846       fprintf(out, "\n"); lineno++;
3847       j = 0;
3848     }else{
3849       j++;
3850     }
3851   }
3852   fprintf(out, "};\n"); lineno++;
3853 
3854   /* Output the default action table */
3855   fprintf(out, "static const YYACTIONTYPE yy_default[] = {\n"); lineno++;
3856   n = lemp->nstate;
3857   for(i=j=0; i<n; i++){
3858     stp = lemp->sorted[i];
3859     if( j==0 ) fprintf(out," /* %5d */ ", i);
3860     fprintf(out, " %4d,", stp->iDflt);
3861     if( j==9 || i==n-1 ){
3862       fprintf(out, "\n"); lineno++;
3863       j = 0;
3864     }else{
3865       j++;
3866     }
3867   }
3868   fprintf(out, "};\n"); lineno++;
3869   tplt_xfer(lemp->name,in,out,&lineno);
3870 
3871   /* Generate the table of fallback tokens.
3872   */
3873   if( lemp->has_fallback ){
3874     int mx = lemp->nterminal - 1;
3875     while( mx>0 && lemp->symbols[mx]->fallback==0 ){ mx--; }
3876     for(i=0; i<=mx; i++){
3877       struct symbol *p = lemp->symbols[i];
3878       if( p->fallback==0 ){
3879         fprintf(out, "    0,  /* %10s => nothing */\n", p->name);
3880       }else{
3881         fprintf(out, "  %3d,  /* %10s => %s */\n", p->fallback->index,
3882           p->name, p->fallback->name);
3883       }
3884       lineno++;
3885     }
3886   }
3887   tplt_xfer(lemp->name, in, out, &lineno);
3888 
3889   /* Generate a table containing the symbolic name of every symbol
3890   */
3891   for(i=0; i<lemp->nsymbol; i++){
3892     sprintf(line,"\"%s\",",lemp->symbols[i]->name);
3893     fprintf(out,"  %-15s",line);
3894     if( (i&3)==3 ){ fprintf(out,"\n"); lineno++; }
3895   }
3896   if( (i&3)!=0 ){ fprintf(out,"\n"); lineno++; }
3897   tplt_xfer(lemp->name,in,out,&lineno);
3898 
3899   /* Generate a table containing a text string that describes every
3900   ** rule in the rule set of the grammar.  This information is used
3901   ** when tracing REDUCE actions.
3902   */
3903   for(i=0, rp=lemp->rule; rp; rp=rp->next, i++){
3904     assert( rp->index==i );
3905     fprintf(out," /* %3d */ \"", i);
3906     writeRuleText(out, rp);
3907     fprintf(out,"\",\n"); lineno++;
3908   }
3909   tplt_xfer(lemp->name,in,out,&lineno);
3910 
3911   /* Generate code which executes every time a symbol is popped from
3912   ** the stack while processing errors or while destroying the parser.
3913   ** (In other words, generate the %destructor actions)
3914   */
3915   if( lemp->tokendest ){
3916     int once = 1;
3917     for(i=0; i<lemp->nsymbol; i++){
3918       struct symbol *sp = lemp->symbols[i];
3919       if( sp==0 || sp->type!=TERMINAL ) continue;
3920       if( once ){
3921         fprintf(out, "      /* TERMINAL Destructor */\n"); lineno++;
3922         once = 0;
3923       }
3924       fprintf(out,"    case %d: /* %s */\n", sp->index, sp->name); lineno++;
3925     }
3926     for(i=0; i<lemp->nsymbol && lemp->symbols[i]->type!=TERMINAL; i++);
3927     if( i<lemp->nsymbol ){
3928       emit_destructor_code(out,lemp->symbols[i],lemp,&lineno);
3929       fprintf(out,"      break;\n"); lineno++;
3930     }
3931   }
3932   if( lemp->vardest ){
3933     struct symbol *dflt_sp = 0;
3934     int once = 1;
3935     for(i=0; i<lemp->nsymbol; i++){
3936       struct symbol *sp = lemp->symbols[i];
3937       if( sp==0 || sp->type==TERMINAL ||
3938           sp->index<=0 || sp->destructor!=0 ) continue;
3939       if( once ){
3940         fprintf(out, "      /* Default NON-TERMINAL Destructor */\n"); lineno++;
3941         once = 0;
3942       }
3943       fprintf(out,"    case %d: /* %s */\n", sp->index, sp->name); lineno++;
3944       dflt_sp = sp;
3945     }
3946     if( dflt_sp!=0 ){
3947       emit_destructor_code(out,dflt_sp,lemp,&lineno);
3948     }
3949     fprintf(out,"      break;\n"); lineno++;
3950   }
3951   for(i=0; i<lemp->nsymbol; i++){
3952     struct symbol *sp = lemp->symbols[i];
3953     if( sp==0 || sp->type==TERMINAL || sp->destructor==0 ) continue;
3954     fprintf(out,"    case %d: /* %s */\n", sp->index, sp->name); lineno++;
3955 
3956     /* Combine duplicate destructors into a single case */
3957     for(j=i+1; j<lemp->nsymbol; j++){
3958       struct symbol *sp2 = lemp->symbols[j];
3959       if( sp2 && sp2->type!=TERMINAL && sp2->destructor
3960           && sp2->dtnum==sp->dtnum
3961           && strcmp(sp->destructor,sp2->destructor)==0 ){
3962          fprintf(out,"    case %d: /* %s */\n",
3963                  sp2->index, sp2->name); lineno++;
3964          sp2->destructor = 0;
3965       }
3966     }
3967 
3968     emit_destructor_code(out,lemp->symbols[i],lemp,&lineno);
3969     fprintf(out,"      break;\n"); lineno++;
3970   }
3971   tplt_xfer(lemp->name,in,out,&lineno);
3972 
3973   /* Generate code which executes whenever the parser stack overflows */
3974   tplt_print(out,lemp,lemp->overflow,&lineno);
3975   tplt_xfer(lemp->name,in,out,&lineno);
3976 
3977   /* Generate the table of rule information
3978   **
3979   ** Note: This code depends on the fact that rules are number
3980   ** sequentually beginning with 0.
3981   */
3982   for(rp=lemp->rule; rp; rp=rp->next){
3983     fprintf(out,"  { %d, %d },\n",rp->lhs->index,rp->nrhs); lineno++;
3984   }
3985   tplt_xfer(lemp->name,in,out,&lineno);
3986 
3987   /* Generate code which execution during each REDUCE action */
3988   for(rp=lemp->rule; rp; rp=rp->next){
3989     translate_code(lemp, rp);
3990   }
3991   /* First output rules other than the default: rule */
3992   for(rp=lemp->rule; rp; rp=rp->next){
3993     struct rule *rp2;               /* Other rules with the same action */
3994     if( rp->code==0 ) continue;
3995     if( rp->code[0]=='\n' && rp->code[1]==0 ) continue; /* Will be default: */
3996     fprintf(out,"      case %d: /* ", rp->index);
3997     writeRuleText(out, rp);
3998     fprintf(out, " */\n"); lineno++;
3999     for(rp2=rp->next; rp2; rp2=rp2->next){
4000       if( rp2->code==rp->code ){
4001         fprintf(out,"      case %d: /* ", rp2->index);
4002         writeRuleText(out, rp2);
4003         fprintf(out," */ yytestcase(yyruleno==%d);\n", rp2->index); lineno++;
4004         rp2->code = 0;
4005       }
4006     }
4007     emit_code(out,rp,lemp,&lineno);
4008     fprintf(out,"        break;\n"); lineno++;
4009     rp->code = 0;
4010   }
4011   /* Finally, output the default: rule.  We choose as the default: all
4012   ** empty actions. */
4013   fprintf(out,"      default:\n"); lineno++;
4014   for(rp=lemp->rule; rp; rp=rp->next){
4015     if( rp->code==0 ) continue;
4016     assert( rp->code[0]=='\n' && rp->code[1]==0 );
4017     fprintf(out,"      /* (%d) ", rp->index);
4018     writeRuleText(out, rp);
4019     fprintf(out, " */ yytestcase(yyruleno==%d);\n", rp->index); lineno++;
4020   }
4021   fprintf(out,"        break;\n"); lineno++;
4022   tplt_xfer(lemp->name,in,out,&lineno);
4023 
4024   /* Generate code which executes if a parse fails */
4025   tplt_print(out,lemp,lemp->failure,&lineno);
4026   tplt_xfer(lemp->name,in,out,&lineno);
4027 
4028   /* Generate code which executes when a syntax error occurs */
4029   tplt_print(out,lemp,lemp->error,&lineno);
4030   tplt_xfer(lemp->name,in,out,&lineno);
4031 
4032   /* Generate code which executes when the parser accepts its input */
4033   tplt_print(out,lemp,lemp->accept,&lineno);
4034   tplt_xfer(lemp->name,in,out,&lineno);
4035 
4036   /* Append any addition code the user desires */
4037   tplt_print(out,lemp,lemp->extracode,&lineno);
4038 
4039   fclose(in);
4040   fclose(out);
4041   return;
4042 }
4043 
4044 /* Generate a header file for the parser */
ReportHeader(struct lemon * lemp)4045 void ReportHeader(struct lemon *lemp)
4046 {
4047   FILE *out, *in;
4048   const char *prefix;
4049   char line[LINESIZE];
4050   char pattern[LINESIZE];
4051   int i;
4052 
4053   if( lemp->tokenprefix ) prefix = lemp->tokenprefix;
4054   else                    prefix = "";
4055   in = file_open(lemp,".h","rb");
4056   if( in ){
4057     for(i=1; i<lemp->nterminal && fgets(line,LINESIZE,in); i++){
4058       sprintf(pattern,"#define %s%-30s %2d\n",prefix,lemp->symbols[i]->name,i);
4059       if( strcmp(line,pattern) ) break;
4060     }
4061     fclose(in);
4062     if( i==lemp->nterminal ){
4063       /* No change in the file.  Don't rewrite it. */
4064       return;
4065     }
4066   }
4067   out = file_open(lemp,".h","wb");
4068   if( out ){
4069     for(i=1; i<lemp->nterminal; i++){
4070       fprintf(out,"#define %s%-30s %2d\n",prefix,lemp->symbols[i]->name,i);
4071     }
4072     fclose(out);
4073   }
4074   return;
4075 }
4076 
4077 /* Reduce the size of the action tables, if possible, by making use
4078 ** of defaults.
4079 **
4080 ** In this version, we take the most frequent REDUCE action and make
4081 ** it the default.  Except, there is no default if the wildcard token
4082 ** is a possible look-ahead.
4083 */
CompressTables(struct lemon * lemp)4084 void CompressTables(struct lemon *lemp)
4085 {
4086   struct state *stp;
4087   struct action *ap, *ap2;
4088   struct rule *rp, *rp2, *rbest;
4089   int nbest, n;
4090   int i;
4091   int usesWildcard;
4092 
4093   for(i=0; i<lemp->nstate; i++){
4094     stp = lemp->sorted[i];
4095     nbest = 0;
4096     rbest = 0;
4097     usesWildcard = 0;
4098 
4099     for(ap=stp->ap; ap; ap=ap->next){
4100       if( ap->type==SHIFT && ap->sp==lemp->wildcard ){
4101         usesWildcard = 1;
4102       }
4103       if( ap->type!=REDUCE ) continue;
4104       rp = ap->x.rp;
4105       if( rp->lhsStart ) continue;
4106       if( rp==rbest ) continue;
4107       n = 1;
4108       for(ap2=ap->next; ap2; ap2=ap2->next){
4109         if( ap2->type!=REDUCE ) continue;
4110         rp2 = ap2->x.rp;
4111         if( rp2==rbest ) continue;
4112         if( rp2==rp ) n++;
4113       }
4114       if( n>nbest ){
4115         nbest = n;
4116         rbest = rp;
4117       }
4118     }
4119 
4120     /* Do not make a default if the number of rules to default
4121     ** is not at least 1 or if the wildcard token is a possible
4122     ** lookahead.
4123     */
4124     if( nbest<1 || usesWildcard ) continue;
4125 
4126 
4127     /* Combine matching REDUCE actions into a single default */
4128     for(ap=stp->ap; ap; ap=ap->next){
4129       if( ap->type==REDUCE && ap->x.rp==rbest ) break;
4130     }
4131     assert( ap );
4132     ap->sp = Symbol_new("{default}");
4133     for(ap=ap->next; ap; ap=ap->next){
4134       if( ap->type==REDUCE && ap->x.rp==rbest ) ap->type = NOT_USED;
4135     }
4136     stp->ap = Action_sort(stp->ap);
4137   }
4138 }
4139 
4140 
4141 /*
4142 ** Compare two states for sorting purposes.  The smaller state is the
4143 ** one with the most non-terminal actions.  If they have the same number
4144 ** of non-terminal actions, then the smaller is the one with the most
4145 ** token actions.
4146 */
stateResortCompare(const void * a,const void * b)4147 static int stateResortCompare(const void *a, const void *b){
4148   const struct state *pA = *(const struct state**)a;
4149   const struct state *pB = *(const struct state**)b;
4150   int n;
4151 
4152   n = pB->nNtAct - pA->nNtAct;
4153   if( n==0 ){
4154     n = pB->nTknAct - pA->nTknAct;
4155     if( n==0 ){
4156       n = pB->statenum - pA->statenum;
4157     }
4158   }
4159   assert( n!=0 );
4160   return n;
4161 }
4162 
4163 
4164 /*
4165 ** Renumber and resort states so that states with fewer choices
4166 ** occur at the end.  Except, keep state 0 as the first state.
4167 */
ResortStates(struct lemon * lemp)4168 void ResortStates(struct lemon *lemp)
4169 {
4170   int i;
4171   struct state *stp;
4172   struct action *ap;
4173 
4174   for(i=0; i<lemp->nstate; i++){
4175     stp = lemp->sorted[i];
4176     stp->nTknAct = stp->nNtAct = 0;
4177     stp->iDflt = lemp->nstate + lemp->nrule;
4178     stp->iTknOfst = NO_OFFSET;
4179     stp->iNtOfst = NO_OFFSET;
4180     for(ap=stp->ap; ap; ap=ap->next){
4181       if( compute_action(lemp,ap)>=0 ){
4182         if( ap->sp->index<lemp->nterminal ){
4183           stp->nTknAct++;
4184         }else if( ap->sp->index<lemp->nsymbol ){
4185           stp->nNtAct++;
4186         }else{
4187           stp->iDflt = compute_action(lemp, ap);
4188         }
4189       }
4190     }
4191   }
4192   qsort(&lemp->sorted[1], lemp->nstate-1, sizeof(lemp->sorted[0]),
4193         stateResortCompare);
4194   for(i=0; i<lemp->nstate; i++){
4195     lemp->sorted[i]->statenum = i;
4196   }
4197 }
4198 
4199 
4200 /***************** From the file "set.c" ************************************/
4201 /*
4202 ** Set manipulation routines for the LEMON parser generator.
4203 */
4204 
4205 static int size = 0;
4206 
4207 /* Set the set size */
SetSize(int n)4208 void SetSize(int n)
4209 {
4210   size = n+1;
4211 }
4212 
4213 /* Allocate a new set */
SetNew()4214 char *SetNew(){
4215   char *s;
4216   s = (char*)calloc( size, 1);
4217   if( s==0 ){
4218     extern void memory_error();
4219     memory_error();
4220   }
4221   return s;
4222 }
4223 
4224 /* Deallocate a set */
SetFree(char * s)4225 void SetFree(char *s)
4226 {
4227   free(s);
4228 }
4229 
4230 /* Add a new element to the set.  Return TRUE if the element was added
4231 ** and FALSE if it was already there. */
SetAdd(char * s,int e)4232 int SetAdd(char *s, int e)
4233 {
4234   int rv;
4235   assert( e>=0 && e<size );
4236   rv = s[e];
4237   s[e] = 1;
4238   return !rv;
4239 }
4240 
4241 /* Add every element of s2 to s1.  Return TRUE if s1 changes. */
SetUnion(char * s1,char * s2)4242 int SetUnion(char *s1, char *s2)
4243 {
4244   int i, progress;
4245   progress = 0;
4246   for(i=0; i<size; i++){
4247     if( s2[i]==0 ) continue;
4248     if( s1[i]==0 ){
4249       progress = 1;
4250       s1[i] = 1;
4251     }
4252   }
4253   return progress;
4254 }
4255 /********************** From the file "table.c" ****************************/
4256 /*
4257 ** All code in this file has been automatically generated
4258 ** from a specification in the file
4259 **              "table.q"
4260 ** by the associative array code building program "aagen".
4261 ** Do not edit this file!  Instead, edit the specification
4262 ** file, then rerun aagen.
4263 */
4264 /*
4265 ** Code for processing tables in the LEMON parser generator.
4266 */
4267 
strhash(const char * x)4268 PRIVATE int strhash(const char *x)
4269 {
4270   int h = 0;
4271   while( *x) h = h*13 + *(x++);
4272   return h;
4273 }
4274 
4275 /* Works like strdup, sort of.  Save a string in malloced memory, but
4276 ** keep strings in a table so that the same string is not in more
4277 ** than one place.
4278 */
Strsafe(const char * y)4279 const char *Strsafe(const char *y)
4280 {
4281   const char *z;
4282   char *cpy;
4283 
4284   if( y==0 ) return 0;
4285   z = Strsafe_find(y);
4286   if( z==0 && (cpy=(char *)malloc( lemonStrlen(y)+1 ))!=0 ){
4287     strcpy(cpy,y);
4288     z = cpy;
4289     Strsafe_insert(z);
4290   }
4291   MemoryCheck(z);
4292   return z;
4293 }
4294 
4295 /* There is one instance of the following structure for each
4296 ** associative array of type "x1".
4297 */
4298 struct s_x1 {
4299   int size;               /* The number of available slots. */
4300                           /*   Must be a power of 2 greater than or */
4301                           /*   equal to 1 */
4302   int count;              /* Number of currently slots filled */
4303   struct s_x1node *tbl;  /* The data stored here */
4304   struct s_x1node **ht;  /* Hash table for lookups */
4305 };
4306 
4307 /* There is one instance of this structure for every data element
4308 ** in an associative array of type "x1".
4309 */
4310 typedef struct s_x1node {
4311   const char *data;        /* The data */
4312   struct s_x1node *next;   /* Next entry with the same hash */
4313   struct s_x1node **from;  /* Previous link */
4314 } x1node;
4315 
4316 /* There is only one instance of the array, which is the following */
4317 static struct s_x1 *x1a;
4318 
4319 /* Allocate a new associative array */
Strsafe_init()4320 void Strsafe_init(){
4321   if( x1a ) return;
4322   x1a = (struct s_x1*)malloc( sizeof(struct s_x1) );
4323   if( x1a ){
4324     x1a->size = 1024;
4325     x1a->count = 0;
4326     x1a->tbl = (x1node*)malloc(
4327       (sizeof(x1node) + sizeof(x1node*))*1024 );
4328     if( x1a->tbl==0 ){
4329       free(x1a);
4330       x1a = 0;
4331     }else{
4332       int i;
4333       x1a->ht = (x1node**)&(x1a->tbl[1024]);
4334       for(i=0; i<1024; i++) x1a->ht[i] = 0;
4335     }
4336   }
4337 }
4338 /* Insert a new record into the array.  Return TRUE if successful.
4339 ** Prior data with the same key is NOT overwritten */
Strsafe_insert(const char * data)4340 int Strsafe_insert(const char *data)
4341 {
4342   x1node *np;
4343   int h;
4344   int ph;
4345 
4346   if( x1a==0 ) return 0;
4347   ph = strhash(data);
4348   h = ph & (x1a->size-1);
4349   np = x1a->ht[h];
4350   while( np ){
4351     if( strcmp(np->data,data)==0 ){
4352       /* An existing entry with the same key is found. */
4353       /* Fail because overwrite is not allows. */
4354       return 0;
4355     }
4356     np = np->next;
4357   }
4358   if( x1a->count>=x1a->size ){
4359     /* Need to make the hash table bigger */
4360     int i,size;
4361     struct s_x1 array;
4362     array.size = size = x1a->size*2;
4363     array.count = x1a->count;
4364     array.tbl = (x1node*)malloc(
4365       (sizeof(x1node) + sizeof(x1node*))*size );
4366     if( array.tbl==0 ) return 0;  /* Fail due to malloc failure */
4367     array.ht = (x1node**)&(array.tbl[size]);
4368     for(i=0; i<size; i++) array.ht[i] = 0;
4369     for(i=0; i<x1a->count; i++){
4370       x1node *oldnp, *newnp;
4371       oldnp = &(x1a->tbl[i]);
4372       h = strhash(oldnp->data) & (size-1);
4373       newnp = &(array.tbl[i]);
4374       if( array.ht[h] ) array.ht[h]->from = &(newnp->next);
4375       newnp->next = array.ht[h];
4376       newnp->data = oldnp->data;
4377       newnp->from = &(array.ht[h]);
4378       array.ht[h] = newnp;
4379     }
4380     free(x1a->tbl);
4381     *x1a = array;
4382   }
4383   /* Insert the new data */
4384   h = ph & (x1a->size-1);
4385   np = &(x1a->tbl[x1a->count++]);
4386   np->data = data;
4387   if( x1a->ht[h] ) x1a->ht[h]->from = &(np->next);
4388   np->next = x1a->ht[h];
4389   x1a->ht[h] = np;
4390   np->from = &(x1a->ht[h]);
4391   return 1;
4392 }
4393 
4394 /* Return a pointer to data assigned to the given key.  Return NULL
4395 ** if no such key. */
Strsafe_find(const char * key)4396 const char *Strsafe_find(const char *key)
4397 {
4398   int h;
4399   x1node *np;
4400 
4401   if( x1a==0 ) return 0;
4402   h = strhash(key) & (x1a->size-1);
4403   np = x1a->ht[h];
4404   while( np ){
4405     if( strcmp(np->data,key)==0 ) break;
4406     np = np->next;
4407   }
4408   return np ? np->data : 0;
4409 }
4410 
4411 /* Return a pointer to the (terminal or nonterminal) symbol "x".
4412 ** Create a new symbol if this is the first time "x" has been seen.
4413 */
Symbol_new(const char * x)4414 struct symbol *Symbol_new(const char *x)
4415 {
4416   struct symbol *sp;
4417 
4418   sp = Symbol_find(x);
4419   if( sp==0 ){
4420     sp = (struct symbol *)calloc(1, sizeof(struct symbol) );
4421     MemoryCheck(sp);
4422     sp->name = Strsafe(x);
4423     sp->type = isupper(*x) ? TERMINAL : NONTERMINAL;
4424     sp->rule = 0;
4425     sp->fallback = 0;
4426     sp->prec = -1;
4427     sp->assoc = UNK;
4428     sp->firstset = 0;
4429     sp->lambda = LEMON_FALSE;
4430     sp->destructor = 0;
4431     sp->destLineno = 0;
4432     sp->datatype = 0;
4433     sp->useCnt = 0;
4434     Symbol_insert(sp,sp->name);
4435   }
4436   sp->useCnt++;
4437   return sp;
4438 }
4439 
4440 /* Compare two symbols for working purposes
4441 **
4442 ** Symbols that begin with upper case letters (terminals or tokens)
4443 ** must sort before symbols that begin with lower case letters
4444 ** (non-terminals).  Other than that, the order does not matter.
4445 **
4446 ** We find experimentally that leaving the symbols in their original
4447 ** order (the order they appeared in the grammar file) gives the
4448 ** smallest parser tables in SQLite.
4449 */
Symbolcmpp(const void * _a,const void * _b)4450 int Symbolcmpp(const void *_a, const void *_b)
4451 {
4452   const struct symbol **a = (const struct symbol **) _a;
4453   const struct symbol **b = (const struct symbol **) _b;
4454   int i1 = (**a).index + 10000000*((**a).name[0]>'Z');
4455   int i2 = (**b).index + 10000000*((**b).name[0]>'Z');
4456   assert( i1!=i2 || strcmp((**a).name,(**b).name)==0 );
4457   return i1-i2;
4458 }
4459 
4460 /* There is one instance of the following structure for each
4461 ** associative array of type "x2".
4462 */
4463 struct s_x2 {
4464   int size;               /* The number of available slots. */
4465                           /*   Must be a power of 2 greater than or */
4466                           /*   equal to 1 */
4467   int count;              /* Number of currently slots filled */
4468   struct s_x2node *tbl;  /* The data stored here */
4469   struct s_x2node **ht;  /* Hash table for lookups */
4470 };
4471 
4472 /* There is one instance of this structure for every data element
4473 ** in an associative array of type "x2".
4474 */
4475 typedef struct s_x2node {
4476   struct symbol *data;     /* The data */
4477   const char *key;         /* The key */
4478   struct s_x2node *next;   /* Next entry with the same hash */
4479   struct s_x2node **from;  /* Previous link */
4480 } x2node;
4481 
4482 /* There is only one instance of the array, which is the following */
4483 static struct s_x2 *x2a;
4484 
4485 /* Allocate a new associative array */
Symbol_init()4486 void Symbol_init(){
4487   if( x2a ) return;
4488   x2a = (struct s_x2*)malloc( sizeof(struct s_x2) );
4489   if( x2a ){
4490     x2a->size = 128;
4491     x2a->count = 0;
4492     x2a->tbl = (x2node*)malloc(
4493       (sizeof(x2node) + sizeof(x2node*))*128 );
4494     if( x2a->tbl==0 ){
4495       free(x2a);
4496       x2a = 0;
4497     }else{
4498       int i;
4499       x2a->ht = (x2node**)&(x2a->tbl[128]);
4500       for(i=0; i<128; i++) x2a->ht[i] = 0;
4501     }
4502   }
4503 }
4504 /* Insert a new record into the array.  Return TRUE if successful.
4505 ** Prior data with the same key is NOT overwritten */
Symbol_insert(struct symbol * data,const char * key)4506 int Symbol_insert(struct symbol *data, const char *key)
4507 {
4508   x2node *np;
4509   int h;
4510   int ph;
4511 
4512   if( x2a==0 ) return 0;
4513   ph = strhash(key);
4514   h = ph & (x2a->size-1);
4515   np = x2a->ht[h];
4516   while( np ){
4517     if( strcmp(np->key,key)==0 ){
4518       /* An existing entry with the same key is found. */
4519       /* Fail because overwrite is not allows. */
4520       return 0;
4521     }
4522     np = np->next;
4523   }
4524   if( x2a->count>=x2a->size ){
4525     /* Need to make the hash table bigger */
4526     int i,size;
4527     struct s_x2 array;
4528     array.size = size = x2a->size*2;
4529     array.count = x2a->count;
4530     array.tbl = (x2node*)malloc(
4531       (sizeof(x2node) + sizeof(x2node*))*size );
4532     if( array.tbl==0 ) return 0;  /* Fail due to malloc failure */
4533     array.ht = (x2node**)&(array.tbl[size]);
4534     for(i=0; i<size; i++) array.ht[i] = 0;
4535     for(i=0; i<x2a->count; i++){
4536       x2node *oldnp, *newnp;
4537       oldnp = &(x2a->tbl[i]);
4538       h = strhash(oldnp->key) & (size-1);
4539       newnp = &(array.tbl[i]);
4540       if( array.ht[h] ) array.ht[h]->from = &(newnp->next);
4541       newnp->next = array.ht[h];
4542       newnp->key = oldnp->key;
4543       newnp->data = oldnp->data;
4544       newnp->from = &(array.ht[h]);
4545       array.ht[h] = newnp;
4546     }
4547     free(x2a->tbl);
4548     *x2a = array;
4549   }
4550   /* Insert the new data */
4551   h = ph & (x2a->size-1);
4552   np = &(x2a->tbl[x2a->count++]);
4553   np->key = key;
4554   np->data = data;
4555   if( x2a->ht[h] ) x2a->ht[h]->from = &(np->next);
4556   np->next = x2a->ht[h];
4557   x2a->ht[h] = np;
4558   np->from = &(x2a->ht[h]);
4559   return 1;
4560 }
4561 
4562 /* Return a pointer to data assigned to the given key.  Return NULL
4563 ** if no such key. */
Symbol_find(const char * key)4564 struct symbol *Symbol_find(const char *key)
4565 {
4566   int h;
4567   x2node *np;
4568 
4569   if( x2a==0 ) return 0;
4570   h = strhash(key) & (x2a->size-1);
4571   np = x2a->ht[h];
4572   while( np ){
4573     if( strcmp(np->key,key)==0 ) break;
4574     np = np->next;
4575   }
4576   return np ? np->data : 0;
4577 }
4578 
4579 /* Return the n-th data.  Return NULL if n is out of range. */
Symbol_Nth(int n)4580 struct symbol *Symbol_Nth(int n)
4581 {
4582   struct symbol *data;
4583   if( x2a && n>0 && n<=x2a->count ){
4584     data = x2a->tbl[n-1].data;
4585   }else{
4586     data = 0;
4587   }
4588   return data;
4589 }
4590 
4591 /* Return the size of the array */
Symbol_count()4592 int Symbol_count()
4593 {
4594   return x2a ? x2a->count : 0;
4595 }
4596 
4597 /* Return an array of pointers to all data in the table.
4598 ** The array is obtained from malloc.  Return NULL if memory allocation
4599 ** problems, or if the array is empty. */
Symbol_arrayof()4600 struct symbol **Symbol_arrayof()
4601 {
4602   struct symbol **array;
4603   int i,size;
4604   if( x2a==0 ) return 0;
4605   size = x2a->count;
4606   array = (struct symbol **)calloc(size, sizeof(struct symbol *));
4607   if( array ){
4608     for(i=0; i<size; i++) array[i] = x2a->tbl[i].data;
4609   }
4610   return array;
4611 }
4612 
4613 /* Compare two configurations */
Configcmp(const char * _a,const char * _b)4614 int Configcmp(const char *_a,const char *_b)
4615 {
4616   const struct config *a = (struct config *) _a;
4617   const struct config *b = (struct config *) _b;
4618   int x;
4619   x = a->rp->index - b->rp->index;
4620   if( x==0 ) x = a->dot - b->dot;
4621   return x;
4622 }
4623 
4624 /* Compare two states */
statecmp(struct config * a,struct config * b)4625 PRIVATE int statecmp(struct config *a, struct config *b)
4626 {
4627   int rc;
4628   for(rc=0; rc==0 && a && b;  a=a->bp, b=b->bp){
4629     rc = a->rp->index - b->rp->index;
4630     if( rc==0 ) rc = a->dot - b->dot;
4631   }
4632   if( rc==0 ){
4633     if( a ) rc = 1;
4634     if( b ) rc = -1;
4635   }
4636   return rc;
4637 }
4638 
4639 /* Hash a state */
statehash(struct config * a)4640 PRIVATE int statehash(struct config *a)
4641 {
4642   int h=0;
4643   while( a ){
4644     h = h*571 + a->rp->index*37 + a->dot;
4645     a = a->bp;
4646   }
4647   return h;
4648 }
4649 
4650 /* Allocate a new state structure */
State_new()4651 struct state *State_new()
4652 {
4653   struct state *newstate;
4654   newstate = (struct state *)calloc(1, sizeof(struct state) );
4655   MemoryCheck(newstate);
4656   return newstate;
4657 }
4658 
4659 /* There is one instance of the following structure for each
4660 ** associative array of type "x3".
4661 */
4662 struct s_x3 {
4663   int size;               /* The number of available slots. */
4664                           /*   Must be a power of 2 greater than or */
4665                           /*   equal to 1 */
4666   int count;              /* Number of currently slots filled */
4667   struct s_x3node *tbl;  /* The data stored here */
4668   struct s_x3node **ht;  /* Hash table for lookups */
4669 };
4670 
4671 /* There is one instance of this structure for every data element
4672 ** in an associative array of type "x3".
4673 */
4674 typedef struct s_x3node {
4675   struct state *data;                  /* The data */
4676   struct config *key;                   /* The key */
4677   struct s_x3node *next;   /* Next entry with the same hash */
4678   struct s_x3node **from;  /* Previous link */
4679 } x3node;
4680 
4681 /* There is only one instance of the array, which is the following */
4682 static struct s_x3 *x3a;
4683 
4684 /* Allocate a new associative array */
State_init()4685 void State_init(){
4686   if( x3a ) return;
4687   x3a = (struct s_x3*)malloc( sizeof(struct s_x3) );
4688   if( x3a ){
4689     x3a->size = 128;
4690     x3a->count = 0;
4691     x3a->tbl = (x3node*)malloc(
4692       (sizeof(x3node) + sizeof(x3node*))*128 );
4693     if( x3a->tbl==0 ){
4694       free(x3a);
4695       x3a = 0;
4696     }else{
4697       int i;
4698       x3a->ht = (x3node**)&(x3a->tbl[128]);
4699       for(i=0; i<128; i++) x3a->ht[i] = 0;
4700     }
4701   }
4702 }
4703 /* Insert a new record into the array.  Return TRUE if successful.
4704 ** Prior data with the same key is NOT overwritten */
State_insert(struct state * data,struct config * key)4705 int State_insert(struct state *data, struct config *key)
4706 {
4707   x3node *np;
4708   int h;
4709   int ph;
4710 
4711   if( x3a==0 ) return 0;
4712   ph = statehash(key);
4713   h = ph & (x3a->size-1);
4714   np = x3a->ht[h];
4715   while( np ){
4716     if( statecmp(np->key,key)==0 ){
4717       /* An existing entry with the same key is found. */
4718       /* Fail because overwrite is not allows. */
4719       return 0;
4720     }
4721     np = np->next;
4722   }
4723   if( x3a->count>=x3a->size ){
4724     /* Need to make the hash table bigger */
4725     int i,size;
4726     struct s_x3 array;
4727     array.size = size = x3a->size*2;
4728     array.count = x3a->count;
4729     array.tbl = (x3node*)malloc(
4730       (sizeof(x3node) + sizeof(x3node*))*size );
4731     if( array.tbl==0 ) return 0;  /* Fail due to malloc failure */
4732     array.ht = (x3node**)&(array.tbl[size]);
4733     for(i=0; i<size; i++) array.ht[i] = 0;
4734     for(i=0; i<x3a->count; i++){
4735       x3node *oldnp, *newnp;
4736       oldnp = &(x3a->tbl[i]);
4737       h = statehash(oldnp->key) & (size-1);
4738       newnp = &(array.tbl[i]);
4739       if( array.ht[h] ) array.ht[h]->from = &(newnp->next);
4740       newnp->next = array.ht[h];
4741       newnp->key = oldnp->key;
4742       newnp->data = oldnp->data;
4743       newnp->from = &(array.ht[h]);
4744       array.ht[h] = newnp;
4745     }
4746     free(x3a->tbl);
4747     *x3a = array;
4748   }
4749   /* Insert the new data */
4750   h = ph & (x3a->size-1);
4751   np = &(x3a->tbl[x3a->count++]);
4752   np->key = key;
4753   np->data = data;
4754   if( x3a->ht[h] ) x3a->ht[h]->from = &(np->next);
4755   np->next = x3a->ht[h];
4756   x3a->ht[h] = np;
4757   np->from = &(x3a->ht[h]);
4758   return 1;
4759 }
4760 
4761 /* Return a pointer to data assigned to the given key.  Return NULL
4762 ** if no such key. */
State_find(struct config * key)4763 struct state *State_find(struct config *key)
4764 {
4765   int h;
4766   x3node *np;
4767 
4768   if( x3a==0 ) return 0;
4769   h = statehash(key) & (x3a->size-1);
4770   np = x3a->ht[h];
4771   while( np ){
4772     if( statecmp(np->key,key)==0 ) break;
4773     np = np->next;
4774   }
4775   return np ? np->data : 0;
4776 }
4777 
4778 /* Return an array of pointers to all data in the table.
4779 ** The array is obtained from malloc.  Return NULL if memory allocation
4780 ** problems, or if the array is empty. */
State_arrayof()4781 struct state **State_arrayof()
4782 {
4783   struct state **array;
4784   int i,size;
4785   if( x3a==0 ) return 0;
4786   size = x3a->count;
4787   array = (struct state **)malloc( sizeof(struct state *)*size );
4788   if( array ){
4789     for(i=0; i<size; i++) array[i] = x3a->tbl[i].data;
4790   }
4791   return array;
4792 }
4793 
4794 /* Hash a configuration */
confighash(struct config * a)4795 PRIVATE int confighash(struct config *a)
4796 {
4797   int h=0;
4798   h = h*571 + a->rp->index*37 + a->dot;
4799   return h;
4800 }
4801 
4802 /* There is one instance of the following structure for each
4803 ** associative array of type "x4".
4804 */
4805 struct s_x4 {
4806   int size;               /* The number of available slots. */
4807                           /*   Must be a power of 2 greater than or */
4808                           /*   equal to 1 */
4809   int count;              /* Number of currently slots filled */
4810   struct s_x4node *tbl;  /* The data stored here */
4811   struct s_x4node **ht;  /* Hash table for lookups */
4812 };
4813 
4814 /* There is one instance of this structure for every data element
4815 ** in an associative array of type "x4".
4816 */
4817 typedef struct s_x4node {
4818   struct config *data;                  /* The data */
4819   struct s_x4node *next;   /* Next entry with the same hash */
4820   struct s_x4node **from;  /* Previous link */
4821 } x4node;
4822 
4823 /* There is only one instance of the array, which is the following */
4824 static struct s_x4 *x4a;
4825 
4826 /* Allocate a new associative array */
Configtable_init()4827 void Configtable_init(){
4828   if( x4a ) return;
4829   x4a = (struct s_x4*)malloc( sizeof(struct s_x4) );
4830   if( x4a ){
4831     x4a->size = 64;
4832     x4a->count = 0;
4833     x4a->tbl = (x4node*)malloc(
4834       (sizeof(x4node) + sizeof(x4node*))*64 );
4835     if( x4a->tbl==0 ){
4836       free(x4a);
4837       x4a = 0;
4838     }else{
4839       int i;
4840       x4a->ht = (x4node**)&(x4a->tbl[64]);
4841       for(i=0; i<64; i++) x4a->ht[i] = 0;
4842     }
4843   }
4844 }
4845 /* Insert a new record into the array.  Return TRUE if successful.
4846 ** Prior data with the same key is NOT overwritten */
Configtable_insert(struct config * data)4847 int Configtable_insert(struct config *data)
4848 {
4849   x4node *np;
4850   int h;
4851   int ph;
4852 
4853   if( x4a==0 ) return 0;
4854   ph = confighash(data);
4855   h = ph & (x4a->size-1);
4856   np = x4a->ht[h];
4857   while( np ){
4858     if( Configcmp((const char *) np->data,(const char *) data)==0 ){
4859       /* An existing entry with the same key is found. */
4860       /* Fail because overwrite is not allows. */
4861       return 0;
4862     }
4863     np = np->next;
4864   }
4865   if( x4a->count>=x4a->size ){
4866     /* Need to make the hash table bigger */
4867     int i,size;
4868     struct s_x4 array;
4869     array.size = size = x4a->size*2;
4870     array.count = x4a->count;
4871     array.tbl = (x4node*)malloc(
4872       (sizeof(x4node) + sizeof(x4node*))*size );
4873     if( array.tbl==0 ) return 0;  /* Fail due to malloc failure */
4874     array.ht = (x4node**)&(array.tbl[size]);
4875     for(i=0; i<size; i++) array.ht[i] = 0;
4876     for(i=0; i<x4a->count; i++){
4877       x4node *oldnp, *newnp;
4878       oldnp = &(x4a->tbl[i]);
4879       h = confighash(oldnp->data) & (size-1);
4880       newnp = &(array.tbl[i]);
4881       if( array.ht[h] ) array.ht[h]->from = &(newnp->next);
4882       newnp->next = array.ht[h];
4883       newnp->data = oldnp->data;
4884       newnp->from = &(array.ht[h]);
4885       array.ht[h] = newnp;
4886     }
4887     free(x4a->tbl);
4888     *x4a = array;
4889   }
4890   /* Insert the new data */
4891   h = ph & (x4a->size-1);
4892   np = &(x4a->tbl[x4a->count++]);
4893   np->data = data;
4894   if( x4a->ht[h] ) x4a->ht[h]->from = &(np->next);
4895   np->next = x4a->ht[h];
4896   x4a->ht[h] = np;
4897   np->from = &(x4a->ht[h]);
4898   return 1;
4899 }
4900 
4901 /* Return a pointer to data assigned to the given key.  Return NULL
4902 ** if no such key. */
Configtable_find(struct config * key)4903 struct config *Configtable_find(struct config *key)
4904 {
4905   int h;
4906   x4node *np;
4907 
4908   if( x4a==0 ) return 0;
4909   h = confighash(key) & (x4a->size-1);
4910   np = x4a->ht[h];
4911   while( np ){
4912     if( Configcmp((const char *) np->data,(const char *) key)==0 ) break;
4913     np = np->next;
4914   }
4915   return np ? np->data : 0;
4916 }
4917 
4918 /* Remove all data from the table.  Pass each data to the function "f"
4919 ** as it is removed.  ("f" may be null to avoid this step.) */
Configtable_clear(int (* f)(struct config *))4920 void Configtable_clear(int(*f)(struct config *))
4921 {
4922   int i;
4923   if( x4a==0 || x4a->count==0 ) return;
4924   if( f ) for(i=0; i<x4a->count; i++) (*f)(x4a->tbl[i].data);
4925   for(i=0; i<x4a->size; i++) x4a->ht[i] = 0;
4926   x4a->count = 0;
4927   return;
4928 }
4929