• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Grammar reduction for Bison.
2 
3    Copyright (C) 1988, 1989, 2000, 2001, 2002, 2003, 2005, 2006 Free
4    Software Foundation, Inc.
5 
6    This file is part of Bison, the GNU Compiler Compiler.
7 
8    Bison is free software; you can redistribute it and/or modify
9    it under the terms of the GNU General Public License as published by
10    the Free Software Foundation; either version 2, or (at your option)
11    any later version.
12 
13    Bison is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16    GNU General Public License for more details.
17 
18    You should have received a copy of the GNU General Public License
19    along with Bison; see the file COPYING.  If not, write to
20    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21    Boston, MA 02110-1301, USA.  */
22 
23 
24 /* Reduce the grammar: Find and eliminate unreachable terminals,
25    nonterminals, and productions.  David S. Bakin.  */
26 
27 /* Don't eliminate unreachable terminals: They may be used by the
28    user's parser.  */
29 
30 #include <config.h>
31 #include "system.h"
32 
33 #include <bitset.h>
34 #include <quotearg.h>
35 
36 #include "complain.h"
37 #include "files.h"
38 #include "getargs.h"
39 #include "gram.h"
40 #include "reader.h"
41 #include "reduce.h"
42 #include "symtab.h"
43 
44 /* Set of all nonterminals which are not useless.  */
45 static bitset N;
46 
47 /* Set of all rules which have no useless nonterminals in their RHS.  */
48 static bitset P;
49 
50 /* Set of all accessible symbols.  */
51 static bitset V;
52 
53 /* Set of symbols used to define rule precedence (so they are
54    `useless', but no warning should be issued).  */
55 static bitset V1;
56 
57 static rule_number nuseful_productions;
58 rule_number nuseless_productions;
59 static int nuseful_nonterminals;
60 symbol_number nuseless_nonterminals;
61 
62 /*-------------------------------------------------------------------.
63 | Another way to do this would be with a set for each production and |
64 | then do subset tests against N0, but even for the C grammar the    |
65 | whole reducing process takes only 2 seconds on my 8Mhz AT.         |
66 `-------------------------------------------------------------------*/
67 
68 static bool
useful_production(rule_number r,bitset N0)69 useful_production (rule_number r, bitset N0)
70 {
71   item_number *rhsp;
72 
73   /* A production is useful if all of the nonterminals in its appear
74      in the set of useful nonterminals.  */
75 
76   for (rhsp = rules[r].rhs; *rhsp >= 0; ++rhsp)
77     if (ISVAR (*rhsp) && !bitset_test (N0, *rhsp - ntokens))
78       return false;
79   return true;
80 }
81 
82 
83 /*---------------------------------------------------------.
84 | Remember that rules are 1-origin, symbols are 0-origin.  |
85 `---------------------------------------------------------*/
86 
87 static void
useless_nonterminals(void)88 useless_nonterminals (void)
89 {
90   bitset Np, Ns;
91   rule_number r;
92 
93   /* N is set as built.  Np is set being built this iteration. P is
94      set of all productions which have a RHS all in N.  */
95 
96   Np = bitset_create (nvars, BITSET_FIXED);
97 
98 
99   /* The set being computed is a set of nonterminals which can derive
100      the empty string or strings consisting of all terminals. At each
101      iteration a nonterminal is added to the set if there is a
102      production with that nonterminal as its LHS for which all the
103      nonterminals in its RHS are already in the set.  Iterate until
104      the set being computed remains unchanged.  Any nonterminals not
105      in the set at that point are useless in that they will never be
106      used in deriving a sentence of the language.
107 
108      This iteration doesn't use any special traversal over the
109      productions.  A set is kept of all productions for which all the
110      nonterminals in the RHS are in useful.  Only productions not in
111      this set are scanned on each iteration.  At the end, this set is
112      saved to be used when finding useful productions: only
113      productions in this set will appear in the final grammar.  */
114 
115   while (1)
116     {
117       bitset_copy (Np, N);
118       for (r = 0; r < nrules; r++)
119 	if (!bitset_test (P, r)
120 	    && useful_production (r, N))
121 	  {
122 	    bitset_set (Np, rules[r].lhs->number - ntokens);
123 	    bitset_set (P, r);
124 	  }
125       if (bitset_equal_p (N, Np))
126 	break;
127       Ns = Np;
128       Np = N;
129       N = Ns;
130     }
131   bitset_free (N);
132   N = Np;
133 }
134 
135 
136 static void
inaccessable_symbols(void)137 inaccessable_symbols (void)
138 {
139   bitset Vp, Vs, Pp;
140 
141   /* Find out which productions are reachable and which symbols are
142      used.  Starting with an empty set of productions and a set of
143      symbols which only has the start symbol in it, iterate over all
144      productions until the set of productions remains unchanged for an
145      iteration.  For each production which has a LHS in the set of
146      reachable symbols, add the production to the set of reachable
147      productions, and add all of the nonterminals in the RHS of the
148      production to the set of reachable symbols.
149 
150      Consider only the (partially) reduced grammar which has only
151      nonterminals in N and productions in P.
152 
153      The result is the set P of productions in the reduced grammar,
154      and the set V of symbols in the reduced grammar.
155 
156      Although this algorithm also computes the set of terminals which
157      are reachable, no terminal will be deleted from the grammar. Some
158      terminals might not be in the grammar but might be generated by
159      semantic routines, and so the user might want them available with
160      specified numbers.  (Is this true?)  However, the nonreachable
161      terminals are printed (if running in verbose mode) so that the
162      user can know.  */
163 
164   Vp = bitset_create (nsyms, BITSET_FIXED);
165   Pp = bitset_create (nrules, BITSET_FIXED);
166 
167   /* If the start symbol isn't useful, then nothing will be useful. */
168   if (bitset_test (N, accept->number - ntokens))
169     {
170       bitset_set (V, accept->number);
171 
172       while (1)
173 	{
174 	  rule_number r;
175 	  bitset_copy (Vp, V);
176 	  for (r = 0; r < nrules; r++)
177 	    {
178 	      if (!bitset_test (Pp, r)
179 		  && bitset_test (P, r)
180 		  && bitset_test (V, rules[r].lhs->number))
181 		{
182 		  item_number *rhsp;
183 		  for (rhsp = rules[r].rhs; *rhsp >= 0; rhsp++)
184 		    if (ISTOKEN (*rhsp) || bitset_test (N, *rhsp - ntokens))
185 		      bitset_set (Vp, *rhsp);
186 		  bitset_set (Pp, r);
187 		}
188 	    }
189 	  if (bitset_equal_p (V, Vp))
190 	    break;
191 	  Vs = Vp;
192 	  Vp = V;
193 	  V = Vs;
194 	}
195     }
196 
197   bitset_free (V);
198   V = Vp;
199 
200   /* Tokens 0, 1, and 2 are internal to Bison.  Consider them useful. */
201   bitset_set (V, endtoken->number);		/* end-of-input token */
202   bitset_set (V, errtoken->number);		/* error token */
203   bitset_set (V, undeftoken->number);		/* some undefined token */
204 
205   bitset_free (P);
206   P = Pp;
207 
208   nuseful_productions = bitset_count (P);
209   nuseless_productions = nrules - nuseful_productions;
210 
211   nuseful_nonterminals = 0;
212   {
213     symbol_number i;
214     for (i = ntokens; i < nsyms; i++)
215       if (bitset_test (V, i))
216 	nuseful_nonterminals++;
217   }
218   nuseless_nonterminals = nvars - nuseful_nonterminals;
219 
220   /* A token that was used in %prec should not be warned about.  */
221   {
222     rule_number r;
223     for (r = 0; r < nrules; ++r)
224       if (rules[r].precsym != 0)
225 	bitset_set (V1, rules[r].precsym->number);
226   }
227 }
228 
229 
230 /*-------------------------------------------------------------------.
231 | Put the useless productions at the end of RULES, and adjust NRULES |
232 | accordingly.                                                       |
233 `-------------------------------------------------------------------*/
234 
235 static void
reduce_grammar_tables(void)236 reduce_grammar_tables (void)
237 {
238   /* Report and flag useless productions.  */
239   {
240     rule_number r;
241     for (r = 0; r < nrules; r++)
242       rules[r].useful = bitset_test (P, r);
243     grammar_rules_never_reduced_report (_("useless rule"));
244   }
245 
246   /* Map the nonterminals to their new index: useful first, useless
247      afterwards.  Kept for later report.  */
248   {
249     int useful = 0;
250     int useless = nrules - nuseless_productions;
251     rule *rules_sorted = xnmalloc (nrules, sizeof *rules_sorted);
252     rule_number r;
253     for (r = 0; r < nrules; ++r)
254       rules_sorted[rules[r].useful ? useful++ : useless++] = rules[r];
255     free (rules);
256     rules = rules_sorted;
257 
258     /* Renumber the rules markers in RITEMS.  */
259     for (r = 0; r < nrules; ++r)
260       {
261 	item_number *rhsp = rules[r].rhs;
262 	for (/* Nothing. */; *rhsp >= 0; ++rhsp)
263 	  /* Nothing. */;
264 	*rhsp = rule_number_as_item_number (r);
265 	rules[r].number = r;
266       }
267     nrules -= nuseless_productions;
268   }
269 
270   /* Adjust NRITEMS.  */
271   {
272     rule_number r;
273     int length;
274     for (r = nrules; r < nrules + nuseless_productions; ++r)
275       {
276 	length = rule_rhs_length (&rules[r]);
277 	nritems -= length + 1;
278       }
279   }
280 }
281 
282 
283 /*------------------------------.
284 | Remove useless nonterminals.  |
285 `------------------------------*/
286 
287 static void
nonterminals_reduce(void)288 nonterminals_reduce (void)
289 {
290   symbol_number i, n;
291 
292   /* Map the nonterminals to their new index: useful first, useless
293      afterwards.  Kept for later report.  */
294 
295   symbol_number *nontermmap = xnmalloc (nvars, sizeof *nontermmap);
296   n = ntokens;
297   for (i = ntokens; i < nsyms; i++)
298     if (bitset_test (V, i))
299       nontermmap[i - ntokens] = n++;
300   for (i = ntokens; i < nsyms; i++)
301     if (!bitset_test (V, i))
302       {
303 	nontermmap[i - ntokens] = n++;
304 	warn_at (symbols[i]->location, _("useless nonterminal: %s"),
305 		 symbols[i]->tag);
306       }
307 
308 
309   /* Shuffle elements of tables indexed by symbol number.  */
310   {
311     symbol **symbols_sorted = xnmalloc (nvars, sizeof *symbols_sorted);
312 
313     for (i = ntokens; i < nsyms; i++)
314       symbols[i]->number = nontermmap[i - ntokens];
315     for (i = ntokens; i < nsyms; i++)
316       symbols_sorted[nontermmap[i - ntokens] - ntokens] = symbols[i];
317     for (i = ntokens; i < nsyms; i++)
318       symbols[i] = symbols_sorted[i - ntokens];
319     free (symbols_sorted);
320   }
321 
322   {
323     rule_number r;
324     for (r = 0; r < nrules; ++r)
325       {
326 	item_number *rhsp;
327 	for (rhsp = rules[r].rhs; *rhsp >= 0; ++rhsp)
328 	  if (ISVAR (*rhsp))
329 	    *rhsp =  symbol_number_as_item_number (nontermmap[*rhsp
330 							      - ntokens]);
331       }
332     accept->number = nontermmap[accept->number - ntokens];
333   }
334 
335   nsyms -= nuseless_nonterminals;
336   nvars -= nuseless_nonterminals;
337 
338   free (nontermmap);
339 }
340 
341 
342 /*------------------------------------------------------------------.
343 | Output the detailed results of the reductions.  For FILE.output.  |
344 `------------------------------------------------------------------*/
345 
346 void
reduce_output(FILE * out)347 reduce_output (FILE *out)
348 {
349   if (nuseless_nonterminals > 0)
350     {
351       int i;
352       fprintf (out, "%s\n\n", _("Useless nonterminals"));
353       for (i = 0; i < nuseless_nonterminals; ++i)
354 	fprintf (out, "   %s\n", symbols[nsyms + i]->tag);
355       fputs ("\n\n", out);
356     }
357 
358   {
359     bool b = false;
360     int i;
361     for (i = 0; i < ntokens; i++)
362       if (!bitset_test (V, i) && !bitset_test (V1, i))
363 	{
364 	  if (!b)
365 	    fprintf (out, "%s\n\n", _("Terminals which are not used"));
366 	  b = true;
367 	  fprintf (out, "   %s\n", symbols[i]->tag);
368 	}
369     if (b)
370       fputs ("\n\n", out);
371   }
372 
373   if (nuseless_productions > 0)
374     grammar_rules_partial_print (out, _("Useless rules"),
375 				 rule_useless_p);
376 }
377 
378 
379 
380 
381 
382 /*-------------------------------.
383 | Report the results to STDERR.  |
384 `-------------------------------*/
385 
386 static void
reduce_print(void)387 reduce_print (void)
388 {
389   if (yacc_flag && nuseless_productions)
390     fprintf (stderr, ngettext ("%d rule never reduced\n",
391 			       "%d rules never reduced\n",
392 			       nuseless_productions),
393 	     nuseless_productions);
394 
395   fprintf (stderr, "%s: %s: ", grammar_file, _("warning"));
396 
397   if (nuseless_nonterminals > 0)
398     fprintf (stderr, ngettext ("%d useless nonterminal",
399 			       "%d useless nonterminals",
400 			       nuseless_nonterminals),
401 	     nuseless_nonterminals);
402 
403   if (nuseless_nonterminals > 0 && nuseless_productions > 0)
404     fprintf (stderr, _(" and "));
405 
406   if (nuseless_productions > 0)
407     fprintf (stderr, ngettext ("%d useless rule",
408 			       "%d useless rules",
409 			       nuseless_productions),
410 	     nuseless_productions);
411   fprintf (stderr, "\n");
412 }
413 
414 void
reduce_grammar(void)415 reduce_grammar (void)
416 {
417   bool reduced;
418 
419   /* Allocate the global sets used to compute the reduced grammar */
420 
421   N = bitset_create (nvars, BITSET_FIXED);
422   P =  bitset_create (nrules, BITSET_FIXED);
423   V = bitset_create (nsyms, BITSET_FIXED);
424   V1 = bitset_create (nsyms, BITSET_FIXED);
425 
426   useless_nonterminals ();
427   inaccessable_symbols ();
428 
429   reduced = (nuseless_nonterminals + nuseless_productions > 0);
430   if (!reduced)
431     return;
432 
433   reduce_print ();
434 
435   if (!bitset_test (N, accept->number - ntokens))
436     fatal_at (startsymbol_location,
437 	      _("start symbol %s does not derive any sentence"),
438 	      startsymbol->tag);
439 
440   /* First reduce the nonterminals, as they renumber themselves in the
441      whole grammar.  If you change the order, nonterms would be
442      renumbered only in the reduced grammar.  */
443   if (nuseless_nonterminals > 0)
444     nonterminals_reduce ();
445   if (nuseless_productions > 0)
446     reduce_grammar_tables ();
447 
448   if (trace_flag & trace_grammar)
449     {
450       grammar_dump (stderr, "Reduced Grammar");
451 
452       fprintf (stderr, "reduced %s defines %d terminals, %d nonterminals\
453 , and %d productions.\n",
454 	       grammar_file, ntokens, nvars, nrules);
455     }
456 }
457 
458 
459 /*-----------------------------------------------------------.
460 | Free the global sets used to compute the reduced grammar.  |
461 `-----------------------------------------------------------*/
462 
463 void
reduce_free(void)464 reduce_free (void)
465 {
466   bitset_free (N);
467   bitset_free (V);
468   bitset_free (V1);
469   bitset_free (P);
470 }
471