• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Bison Grammar Scanner                             -*- C -*-
2 
3    Copyright (C) 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
4 
5    This file is part of Bison, the GNU Compiler Compiler.
6 
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 2 of the License, or
10    (at your option) any later version.
11 
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16 
17    You should have received a copy of the GNU General Public License
18    along with this program; if not, write to the Free Software
19    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
20    02110-1301  USA
21 */
22 
23 %option debug nodefault nounput noyywrap never-interactive
24 %option prefix="gram_" outfile="lex.yy.c"
25 
26 %{
27 /* Work around a bug in flex 2.5.31.  See Debian bug 333231
28    <http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=333231>.  */
29 #undef gram_wrap
30 #define gram_wrap() 1
31 
32 #include "system.h"
33 
34 #include <mbswidth.h>
35 #include <quote.h>
36 
37 #include "complain.h"
38 #include "files.h"
39 #include "getargs.h"
40 #include "gram.h"
41 #include "quotearg.h"
42 #include "reader.h"
43 #include "uniqstr.h"
44 
45 #define YY_USER_INIT					\
46   do							\
47     {							\
48       scanner_cursor.file = current_file;		\
49       scanner_cursor.line = 1;				\
50       scanner_cursor.column = 1;			\
51       code_start = scanner_cursor;			\
52     }							\
53   while (0)
54 
55 /* Pacify "gcc -Wmissing-prototypes" when flex 2.5.31 is used.  */
56 int gram_get_lineno (void);
57 FILE *gram_get_in (void);
58 FILE *gram_get_out (void);
59 int gram_get_leng (void);
60 char *gram_get_text (void);
61 void gram_set_lineno (int);
62 void gram_set_in (FILE *);
63 void gram_set_out (FILE *);
64 int gram_get_debug (void);
65 void gram_set_debug (int);
66 int gram_lex_destroy (void);
67 
68 /* Location of scanner cursor.  */
69 boundary scanner_cursor;
70 
71 static void adjust_location (location *, char const *, size_t);
72 #define YY_USER_ACTION  adjust_location (loc, yytext, yyleng);
73 
74 static size_t no_cr_read (FILE *, char *, size_t);
75 #define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
76 
77 
78 /* OBSTACK_FOR_STRING -- Used to store all the characters that we need to
79    keep (to construct ID, STRINGS etc.).  Use the following macros to
80    use it.
81 
82    Use STRING_GROW to append what has just been matched, and
83    STRING_FINISH to end the string (it puts the ending 0).
84    STRING_FINISH also stores this string in LAST_STRING, which can be
85    used, and which is used by STRING_FREE to free the last string.  */
86 
87 static struct obstack obstack_for_string;
88 
89 /* A string representing the most recently saved token.  */
90 char *last_string;
91 
92 /* The location of the most recently saved token, if it was a
93    BRACED_CODE token; otherwise, this has an unspecified value.  */
94 location last_braced_code_loc;
95 
96 #define STRING_GROW   \
97   obstack_grow (&obstack_for_string, yytext, yyleng)
98 
99 #define STRING_FINISH					\
100   do {							\
101     obstack_1grow (&obstack_for_string, '\0');		\
102     last_string = obstack_finish (&obstack_for_string);	\
103   } while (0)
104 
105 #define STRING_FREE \
106   obstack_free (&obstack_for_string, last_string)
107 
108 void
scanner_last_string_free(void)109 scanner_last_string_free (void)
110 {
111   STRING_FREE;
112 }
113 
114 /* Within well-formed rules, RULE_LENGTH is the number of values in
115    the current rule so far, which says where to find `$0' with respect
116    to the top of the stack.  It is not the same as the rule->length in
117    the case of mid rule actions.
118 
119    Outside of well-formed rules, RULE_LENGTH has an undefined value.  */
120 static int rule_length;
121 
122 static void rule_length_overflow (location) __attribute__ ((__noreturn__));
123 
124 /* Increment the rule length by one, checking for overflow.  */
125 static inline void
increment_rule_length(location loc)126 increment_rule_length (location loc)
127 {
128   rule_length++;
129 
130   /* Don't allow rule_length == INT_MAX, since that might cause
131      confusion with strtol if INT_MAX == LONG_MAX.  */
132   if (rule_length == INT_MAX)
133     rule_length_overflow (loc);
134 }
135 
136 static void handle_dollar (int token_type, char *cp, location loc);
137 static void handle_at (int token_type, char *cp, location loc);
138 static void handle_syncline (char *, location);
139 static unsigned long int scan_integer (char const *p, int base, location loc);
140 static int convert_ucn_to_byte (char const *hex_text);
141 static void unexpected_eof (boundary, char const *);
142 static void unexpected_newline (boundary, char const *);
143 
144 %}
145 %x SC_COMMENT SC_LINE_COMMENT SC_YACC_COMMENT
146 %x SC_STRING SC_CHARACTER
147 %x SC_AFTER_IDENTIFIER
148 %x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
149 %x SC_PRE_CODE SC_BRACED_CODE SC_PROLOGUE SC_EPILOGUE
150 
151 letter	  [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
152 id	  {letter}({letter}|[0-9])*
153 directive %{letter}({letter}|[0-9]|-)*
154 int	  [0-9]+
155 
156 /* POSIX says that a tag must be both an id and a C union member, but
157    historically almost any character is allowed in a tag.  We disallow
158    NUL and newline, as this simplifies our implementation.  */
159 tag	 [^\0\n>]+
160 
161 /* Zero or more instances of backslash-newline.  Following GCC, allow
162    white space between the backslash and the newline.  */
163 splice	 (\\[ \f\t\v]*\n)*
164 
165 %%
166 %{
167   /* Nesting level of the current code in braces.  */
168   int braces_level IF_LINT (= 0);
169 
170   /* Parent context state, when applicable.  */
171   int context_state IF_LINT (= 0);
172 
173   /* Token type to return, when applicable.  */
174   int token_type IF_LINT (= 0);
175 
176   /* Location of most recent identifier, when applicable.  */
177   location id_loc IF_LINT (= empty_location);
178 
179   /* Where containing code started, when applicable.  Its initial
180      value is relevant only when yylex is invoked in the SC_EPILOGUE
181      start condition.  */
182   boundary code_start = scanner_cursor;
183 
184   /* Where containing comment or string or character literal started,
185      when applicable.  */
186   boundary token_start IF_LINT (= scanner_cursor);
187 %}
188 
189 
190   /*-----------------------.
191   | Scanning white space.  |
192   `-----------------------*/
193 
194 <INITIAL,SC_AFTER_IDENTIFIER,SC_PRE_CODE>
195 {
196   /* Comments and white space.  */
197   ","	       warn_at (*loc, _("stray `,' treated as white space"));
198   [ \f\n\t\v]  |
199   "//".*       ;
200   "/*" {
201     token_start = loc->start;
202     context_state = YY_START;
203     BEGIN SC_YACC_COMMENT;
204   }
205 
206   /* #line directives are not documented, and may be withdrawn or
207      modified in future versions of Bison.  */
208   ^"#line "{int}" \"".*"\"\n" {
209     handle_syncline (yytext + sizeof "#line " - 1, *loc);
210   }
211 }
212 
213 
214   /*----------------------------.
215   | Scanning Bison directives.  |
216   `----------------------------*/
217 <INITIAL>
218 {
219   "%binary"               return PERCENT_NONASSOC;
220   "%debug"                return PERCENT_DEBUG;
221   "%default"[-_]"prec"    return PERCENT_DEFAULT_PREC;
222   "%define"               return PERCENT_DEFINE;
223   "%defines"              return PERCENT_DEFINES;
224   "%destructor"		  token_type = PERCENT_DESTRUCTOR; BEGIN SC_PRE_CODE;
225   "%dprec"		  return PERCENT_DPREC;
226   "%error"[-_]"verbose"   return PERCENT_ERROR_VERBOSE;
227   "%expect"               return PERCENT_EXPECT;
228   "%expect"[-_]"rr"	  return PERCENT_EXPECT_RR;
229   "%file-prefix"          return PERCENT_FILE_PREFIX;
230   "%fixed"[-_]"output"[-_]"files"   return PERCENT_YACC;
231   "%initial-action"       token_type = PERCENT_INITIAL_ACTION; BEGIN SC_PRE_CODE;
232   "%glr-parser"           return PERCENT_GLR_PARSER;
233   "%left"                 return PERCENT_LEFT;
234   "%lex-param"		  token_type = PERCENT_LEX_PARAM; BEGIN SC_PRE_CODE;
235   "%locations"            return PERCENT_LOCATIONS;
236   "%merge"		  return PERCENT_MERGE;
237   "%name"[-_]"prefix"     return PERCENT_NAME_PREFIX;
238   "%no"[-_]"default"[-_]"prec"	return PERCENT_NO_DEFAULT_PREC;
239   "%no"[-_]"lines"        return PERCENT_NO_LINES;
240   "%nonassoc"             return PERCENT_NONASSOC;
241   "%nondeterministic-parser"   return PERCENT_NONDETERMINISTIC_PARSER;
242   "%nterm"                return PERCENT_NTERM;
243   "%output"               return PERCENT_OUTPUT;
244   "%parse-param"	  token_type = PERCENT_PARSE_PARAM; BEGIN SC_PRE_CODE;
245   "%prec"                 rule_length--; return PERCENT_PREC;
246   "%printer"              token_type = PERCENT_PRINTER; BEGIN SC_PRE_CODE;
247   "%pure"[-_]"parser"     return PERCENT_PURE_PARSER;
248   "%require"              return PERCENT_REQUIRE;
249   "%right"                return PERCENT_RIGHT;
250   "%skeleton"             return PERCENT_SKELETON;
251   "%start"                return PERCENT_START;
252   "%term"                 return PERCENT_TOKEN;
253   "%token"                return PERCENT_TOKEN;
254   "%token"[-_]"table"     return PERCENT_TOKEN_TABLE;
255   "%type"                 return PERCENT_TYPE;
256   "%union"		  token_type = PERCENT_UNION; BEGIN SC_PRE_CODE;
257   "%verbose"              return PERCENT_VERBOSE;
258   "%yacc"                 return PERCENT_YACC;
259 
260   {directive} {
261     complain_at (*loc, _("invalid directive: %s"), quote (yytext));
262   }
263 
264   "="                     return EQUAL;
265   "|"                     rule_length = 0; return PIPE;
266   ";"                     return SEMICOLON;
267 
268   {id} {
269     val->symbol = symbol_get (yytext, *loc);
270     id_loc = *loc;
271     increment_rule_length (*loc);
272     BEGIN SC_AFTER_IDENTIFIER;
273   }
274 
275   {int} {
276     val->integer = scan_integer (yytext, 10, *loc);
277     return INT;
278   }
279   0[xX][0-9abcdefABCDEF]+ {
280     val->integer = scan_integer (yytext, 16, *loc);
281     return INT;
282   }
283 
284   /* Characters.  We don't check there is only one.  */
285   "'"	      STRING_GROW; token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;
286 
287   /* Strings. */
288   "\""	      token_start = loc->start; BEGIN SC_ESCAPED_STRING;
289 
290   /* Prologue. */
291   "%{"        code_start = loc->start; BEGIN SC_PROLOGUE;
292 
293   /* Code in between braces.  */
294   "{" {
295     if (current_rule && current_rule->action)
296       grammar_midrule_action ();
297     STRING_GROW;
298     token_type = BRACED_CODE;
299     braces_level = 0;
300     code_start = loc->start;
301     BEGIN SC_BRACED_CODE;
302   }
303 
304   /* A type. */
305   "<"{tag}">" {
306     obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
307     STRING_FINISH;
308     val->uniqstr = uniqstr_new (last_string);
309     STRING_FREE;
310     return TYPE;
311   }
312 
313   "%%" {
314     static int percent_percent_count;
315     if (++percent_percent_count == 2)
316       BEGIN SC_EPILOGUE;
317     return PERCENT_PERCENT;
318   }
319 
320   . {
321     complain_at (*loc, _("invalid character: %s"), quote (yytext));
322   }
323 
324   <<EOF>> {
325     loc->start = loc->end = scanner_cursor;
326     yyterminate ();
327   }
328 }
329 
330 
331   /*-----------------------------------------------------------------.
332   | Scanning after an identifier, checking whether a colon is next.  |
333   `-----------------------------------------------------------------*/
334 
335 <SC_AFTER_IDENTIFIER>
336 {
337   ":" {
338     rule_length = 0;
339     *loc = id_loc;
340     BEGIN INITIAL;
341     return ID_COLON;
342   }
343   . {
344     scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
345     yyless (0);
346     *loc = id_loc;
347     BEGIN INITIAL;
348     return ID;
349   }
350   <<EOF>> {
351     *loc = id_loc;
352     BEGIN INITIAL;
353     return ID;
354   }
355 }
356 
357 
358   /*---------------------------------------------------------------.
359   | Scanning a Yacc comment.  The initial `/ *' is already eaten.  |
360   `---------------------------------------------------------------*/
361 
362 <SC_YACC_COMMENT>
363 {
364   "*/"     BEGIN context_state;
365   .|\n	   ;
366   <<EOF>>  unexpected_eof (token_start, "*/"); BEGIN context_state;
367 }
368 
369 
370   /*------------------------------------------------------------.
371   | Scanning a C comment.  The initial `/ *' is already eaten.  |
372   `------------------------------------------------------------*/
373 
374 <SC_COMMENT>
375 {
376   "*"{splice}"/"  STRING_GROW; BEGIN context_state;
377   <<EOF>>	  unexpected_eof (token_start, "*/"); BEGIN context_state;
378 }
379 
380 
381   /*--------------------------------------------------------------.
382   | Scanning a line comment.  The initial `//' is already eaten.  |
383   `--------------------------------------------------------------*/
384 
385 <SC_LINE_COMMENT>
386 {
387   "\n"		 STRING_GROW; BEGIN context_state;
388   {splice}	 STRING_GROW;
389   <<EOF>>	 BEGIN context_state;
390 }
391 
392 
393   /*------------------------------------------------.
394   | Scanning a Bison string, including its escapes. |
395   | The initial quote is already eaten.             |
396   `------------------------------------------------*/
397 
398 <SC_ESCAPED_STRING>
399 {
400   "\"" {
401     STRING_FINISH;
402     loc->start = token_start;
403     val->chars = last_string;
404     increment_rule_length (*loc);
405     BEGIN INITIAL;
406     return STRING;
407   }
408   \n		unexpected_newline (token_start, "\"");	BEGIN INITIAL;
409   <<EOF>>	unexpected_eof (token_start, "\"");	BEGIN INITIAL;
410 }
411 
412   /*----------------------------------------------------------.
413   | Scanning a Bison character literal, decoding its escapes. |
414   | The initial quote is already eaten.			      |
415   `----------------------------------------------------------*/
416 
417 <SC_ESCAPED_CHARACTER>
418 {
419   "'" {
420     unsigned char last_string_1;
421     STRING_GROW;
422     STRING_FINISH;
423     loc->start = token_start;
424     val->symbol = symbol_get (quotearg_style (escape_quoting_style,
425 					      last_string),
426 			      *loc);
427     symbol_class_set (val->symbol, token_sym, *loc, false);
428     last_string_1 = last_string[1];
429     symbol_user_token_number_set (val->symbol, last_string_1, *loc);
430     STRING_FREE;
431     increment_rule_length (*loc);
432     BEGIN INITIAL;
433     return ID;
434   }
435   \n		unexpected_newline (token_start, "'");	BEGIN INITIAL;
436   <<EOF>>	unexpected_eof (token_start, "'");	BEGIN INITIAL;
437 }
438 
439 <SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING>
440 {
441   \0	    complain_at (*loc, _("invalid null character"));
442 }
443 
444 
445   /*----------------------------.
446   | Decode escaped characters.  |
447   `----------------------------*/
448 
449 <SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
450 {
451   \\[0-7]{1,3} {
452     unsigned long int c = strtoul (yytext + 1, NULL, 8);
453     if (UCHAR_MAX < c)
454       complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
455     else if (! c)
456       complain_at (*loc, _("invalid null character: %s"), quote (yytext));
457     else
458       obstack_1grow (&obstack_for_string, c);
459   }
460 
461   \\x[0-9abcdefABCDEF]+ {
462     verify (UCHAR_MAX < ULONG_MAX);
463     unsigned long int c = strtoul (yytext + 2, NULL, 16);
464     if (UCHAR_MAX < c)
465       complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
466     else if (! c)
467       complain_at (*loc, _("invalid null character: %s"), quote (yytext));
468     else
469       obstack_1grow (&obstack_for_string, c);
470   }
471 
472   \\a	obstack_1grow (&obstack_for_string, '\a');
473   \\b	obstack_1grow (&obstack_for_string, '\b');
474   \\f	obstack_1grow (&obstack_for_string, '\f');
475   \\n	obstack_1grow (&obstack_for_string, '\n');
476   \\r	obstack_1grow (&obstack_for_string, '\r');
477   \\t	obstack_1grow (&obstack_for_string, '\t');
478   \\v	obstack_1grow (&obstack_for_string, '\v');
479 
480   /* \\[\"\'?\\] would be shorter, but it confuses xgettext.  */
481   \\("\""|"'"|"?"|"\\")  obstack_1grow (&obstack_for_string, yytext[1]);
482 
483   \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
484     int c = convert_ucn_to_byte (yytext);
485     if (c < 0)
486       complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
487     else if (! c)
488       complain_at (*loc, _("invalid null character: %s"), quote (yytext));
489     else
490       obstack_1grow (&obstack_for_string, c);
491   }
492   \\(.|\n)	{
493     complain_at (*loc, _("unrecognized escape sequence: %s"), quote (yytext));
494     STRING_GROW;
495   }
496 }
497 
498   /*--------------------------------------------.
499   | Scanning user-code characters and strings.  |
500   `--------------------------------------------*/
501 
502 <SC_CHARACTER,SC_STRING>
503 {
504   {splice}|\\{splice}[^\n$@\[\]]	STRING_GROW;
505 }
506 
507 <SC_CHARACTER>
508 {
509   "'"		STRING_GROW; BEGIN context_state;
510   \n		unexpected_newline (token_start, "'"); BEGIN context_state;
511   <<EOF>>	unexpected_eof (token_start, "'"); BEGIN context_state;
512 }
513 
514 <SC_STRING>
515 {
516   "\""		STRING_GROW; BEGIN context_state;
517   \n		unexpected_newline (token_start, "\""); BEGIN context_state;
518   <<EOF>>	unexpected_eof (token_start, "\""); BEGIN context_state;
519 }
520 
521 
522   /*---------------------------------------------------.
523   | Strings, comments etc. can be found in user code.  |
524   `---------------------------------------------------*/
525 
526 <SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
527 {
528   "'" {
529     STRING_GROW;
530     context_state = YY_START;
531     token_start = loc->start;
532     BEGIN SC_CHARACTER;
533   }
534   "\"" {
535     STRING_GROW;
536     context_state = YY_START;
537     token_start = loc->start;
538     BEGIN SC_STRING;
539   }
540   "/"{splice}"*" {
541     STRING_GROW;
542     context_state = YY_START;
543     token_start = loc->start;
544     BEGIN SC_COMMENT;
545   }
546   "/"{splice}"/" {
547     STRING_GROW;
548     context_state = YY_START;
549     BEGIN SC_LINE_COMMENT;
550   }
551 }
552 
553 
554   /*---------------------------------------------------------------.
555   | Scanning after %union etc., possibly followed by white space.  |
556   | For %union only, allow arbitrary C code to appear before the   |
557   | following brace, as an extension to POSIX.			   |
558   `---------------------------------------------------------------*/
559 
560 <SC_PRE_CODE>
561 {
562   . {
563     bool valid = yytext[0] == '{' || token_type == PERCENT_UNION;
564     scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
565     yyless (0);
566 
567     if (valid)
568       {
569 	braces_level = -1;
570 	code_start = loc->start;
571 	BEGIN SC_BRACED_CODE;
572       }
573     else
574       {
575 	complain_at (*loc, _("missing `{' in %s"),
576 		     token_name (token_type));
577 	obstack_sgrow (&obstack_for_string, "{}");
578 	STRING_FINISH;
579 	val->chars = last_string;
580 	BEGIN INITIAL;
581 	return token_type;
582       }
583   }
584 
585   <<EOF>>  unexpected_eof (scanner_cursor, "{}"); BEGIN INITIAL;
586 }
587 
588 
589   /*---------------------------------------------------------------.
590   | Scanning some code in braces (%union and actions). The initial |
591   | "{" is already eaten.                                          |
592   `---------------------------------------------------------------*/
593 
594 <SC_BRACED_CODE>
595 {
596   "{"|"<"{splice}"%"  STRING_GROW; braces_level++;
597   "%"{splice}">"      STRING_GROW; braces_level--;
598   "}" {
599     bool outer_brace = --braces_level < 0;
600 
601     /* As an undocumented Bison extension, append `;' before the last
602        brace in braced code, so that the user code can omit trailing
603        `;'.  But do not append `;' if emulating Yacc, since Yacc does
604        not append one.
605 
606        FIXME: Bison should warn if a semicolon seems to be necessary
607        here, and should omit the semicolon if it seems unnecessary
608        (e.g., after ';', '{', or '}', each followed by comments or
609        white space).  Such a warning shouldn't depend on --yacc; it
610        should depend on a new --pedantic option, which would cause
611        Bison to warn if it detects an extension to POSIX.  --pedantic
612        should also diagnose other Bison extensions like %yacc.
613        Perhaps there should also be a GCC-style --pedantic-errors
614        option, so that such warnings are diagnosed as errors.  */
615     if (outer_brace && token_type == BRACED_CODE && ! yacc_flag)
616       obstack_1grow (&obstack_for_string, ';');
617 
618     obstack_1grow (&obstack_for_string, '}');
619 
620     if (outer_brace)
621       {
622 	STRING_FINISH;
623 	loc->start = code_start;
624 	val->chars = last_string;
625 	increment_rule_length (*loc);
626 	last_braced_code_loc = *loc;
627 	BEGIN INITIAL;
628 	return token_type;
629       }
630   }
631 
632   /* Tokenize `<<%' correctly (as `<<' `%') rather than incorrrectly
633      (as `<' `<%').  */
634   "<"{splice}"<"  STRING_GROW;
635 
636   "$"("<"{tag}">")?(-?[0-9]+|"$")  handle_dollar (token_type, yytext, *loc);
637   "@"(-?[0-9]+|"$")		   handle_at (token_type, yytext, *loc);
638 
639   "$"  {
640     warn_at (*loc, _("stray `$'"));
641     obstack_sgrow (&obstack_for_string, "$][");
642   }
643   "@"  {
644     warn_at (*loc, _("stray `@'"));
645     obstack_sgrow (&obstack_for_string, "@@");
646   }
647 
648   <<EOF>>  unexpected_eof (code_start, "}"); BEGIN INITIAL;
649 }
650 
651 
652   /*--------------------------------------------------------------.
653   | Scanning some prologue: from "%{" (already scanned) to "%}".  |
654   `--------------------------------------------------------------*/
655 
656 <SC_PROLOGUE>
657 {
658   "%}" {
659     STRING_FINISH;
660     loc->start = code_start;
661     val->chars = last_string;
662     BEGIN INITIAL;
663     return PROLOGUE;
664   }
665 
666   <<EOF>>  unexpected_eof (code_start, "%}"); BEGIN INITIAL;
667 }
668 
669 
670   /*---------------------------------------------------------------.
671   | Scanning the epilogue (everything after the second "%%", which |
672   | has already been eaten).                                       |
673   `---------------------------------------------------------------*/
674 
675 <SC_EPILOGUE>
676 {
677   <<EOF>> {
678     STRING_FINISH;
679     loc->start = code_start;
680     val->chars = last_string;
681     BEGIN INITIAL;
682     return EPILOGUE;
683   }
684 }
685 
686 
687   /*-----------------------------------------.
688   | Escape M4 quoting characters in C code.  |
689   `-----------------------------------------*/
690 
691 <SC_COMMENT,SC_LINE_COMMENT,SC_STRING,SC_CHARACTER,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
692 {
693   \$	obstack_sgrow (&obstack_for_string, "$][");
694   \@	obstack_sgrow (&obstack_for_string, "@@");
695   \[	obstack_sgrow (&obstack_for_string, "@{");
696   \]	obstack_sgrow (&obstack_for_string, "@}");
697 }
698 
699 
700   /*-----------------------------------------------------.
701   | By default, grow the string obstack with the input.  |
702   `-----------------------------------------------------*/
703 
704 <SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>.	|
705 <SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>\n	STRING_GROW;
706 
707 %%
708 
709 /* Keeps track of the maximum number of semantic values to the left of
710    a handle (those referenced by $0, $-1, etc.) are required by the
711    semantic actions of this grammar. */
712 int max_left_semantic_context = 0;
713 
714 /* If BUF is null, add BUFSIZE (which in this case must be less than
715    INT_MAX) to COLUMN; otherwise, add mbsnwidth (BUF, BUFSIZE, 0) to
716    COLUMN.  If an overflow occurs, or might occur but is undetectable,
717    return INT_MAX.  Assume COLUMN is nonnegative.  */
718 
719 static inline int
add_column_width(int column,char const * buf,size_t bufsize)720 add_column_width (int column, char const *buf, size_t bufsize)
721 {
722   size_t width;
723   unsigned int remaining_columns = INT_MAX - column;
724 
725   if (buf)
726     {
727       if (INT_MAX / 2 <= bufsize)
728 	return INT_MAX;
729       width = mbsnwidth (buf, bufsize, 0);
730     }
731   else
732     width = bufsize;
733 
734   return width <= remaining_columns ? column + width : INT_MAX;
735 }
736 
737 /* Set *LOC and adjust scanner cursor to account for token TOKEN of
738    size SIZE.  */
739 
740 static void
adjust_location(location * loc,char const * token,size_t size)741 adjust_location (location *loc, char const *token, size_t size)
742 {
743   int line = scanner_cursor.line;
744   int column = scanner_cursor.column;
745   char const *p0 = token;
746   char const *p = token;
747   char const *lim = token + size;
748 
749   loc->start = scanner_cursor;
750 
751   for (p = token; p < lim; p++)
752     switch (*p)
753       {
754       case '\n':
755 	line += line < INT_MAX;
756 	column = 1;
757 	p0 = p + 1;
758 	break;
759 
760       case '\t':
761 	column = add_column_width (column, p0, p - p0);
762 	column = add_column_width (column, NULL, 8 - ((column - 1) & 7));
763 	p0 = p + 1;
764 	break;
765 
766       default:
767 	break;
768       }
769 
770   scanner_cursor.line = line;
771   scanner_cursor.column = column = add_column_width (column, p0, p - p0);
772 
773   loc->end = scanner_cursor;
774 
775   if (line == INT_MAX && loc->start.line != INT_MAX)
776     warn_at (*loc, _("line number overflow"));
777   if (column == INT_MAX && loc->start.column != INT_MAX)
778     warn_at (*loc, _("column number overflow"));
779 }
780 
781 
782 /* Read bytes from FP into buffer BUF of size SIZE.  Return the
783    number of bytes read.  Remove '\r' from input, treating \r\n
784    and isolated \r as \n.  */
785 
786 static size_t
no_cr_read(FILE * fp,char * buf,size_t size)787 no_cr_read (FILE *fp, char *buf, size_t size)
788 {
789   size_t bytes_read = fread (buf, 1, size, fp);
790   if (bytes_read)
791     {
792       char *w = memchr (buf, '\r', bytes_read);
793       if (w)
794 	{
795 	  char const *r = ++w;
796 	  char const *lim = buf + bytes_read;
797 
798 	  for (;;)
799 	    {
800 	      /* Found an '\r'.  Treat it like '\n', but ignore any
801 		 '\n' that immediately follows.  */
802 	      w[-1] = '\n';
803 	      if (r == lim)
804 		{
805 		  int ch = getc (fp);
806 		  if (ch != '\n' && ungetc (ch, fp) != ch)
807 		    break;
808 		}
809 	      else if (*r == '\n')
810 		r++;
811 
812 	      /* Copy until the next '\r'.  */
813 	      do
814 		{
815 		  if (r == lim)
816 		    return w - buf;
817 		}
818 	      while ((*w++ = *r++) != '\r');
819 	    }
820 
821 	  return w - buf;
822 	}
823     }
824 
825   return bytes_read;
826 }
827 
828 
829 /*------------------------------------------------------------------.
830 | TEXT is pointing to a wannabee semantic value (i.e., a `$').      |
831 |                                                                   |
832 | Possible inputs: $[<TYPENAME>]($|integer)                         |
833 |                                                                   |
834 | Output to OBSTACK_FOR_STRING a reference to this semantic value.  |
835 `------------------------------------------------------------------*/
836 
837 static inline bool
handle_action_dollar(char * text,location loc)838 handle_action_dollar (char *text, location loc)
839 {
840   const char *type_name = NULL;
841   char *cp = text + 1;
842 
843   if (! current_rule)
844     return false;
845 
846   /* Get the type name if explicit. */
847   if (*cp == '<')
848     {
849       type_name = ++cp;
850       while (*cp != '>')
851 	++cp;
852       *cp = '\0';
853       ++cp;
854     }
855 
856   if (*cp == '$')
857     {
858       if (!type_name)
859 	type_name = symbol_list_n_type_name_get (current_rule, loc, 0);
860       if (!type_name && typed)
861 	complain_at (loc, _("$$ of `%s' has no declared type"),
862 		     current_rule->sym->tag);
863       if (!type_name)
864 	type_name = "";
865       obstack_fgrow1 (&obstack_for_string,
866 		      "]b4_lhs_value([%s])[", type_name);
867       current_rule->used = true;
868     }
869   else
870     {
871       long int num = strtol (cp, NULL, 10);
872 
873       if (1 - INT_MAX + rule_length <= num && num <= rule_length)
874 	{
875 	  int n = num;
876 	  if (max_left_semantic_context < 1 - n)
877 	    max_left_semantic_context = 1 - n;
878 	  if (!type_name && 0 < n)
879 	    type_name = symbol_list_n_type_name_get (current_rule, loc, n);
880 	  if (!type_name && typed)
881 	    complain_at (loc, _("$%d of `%s' has no declared type"),
882 			 n, current_rule->sym->tag);
883 	  if (!type_name)
884 	    type_name = "";
885 	  obstack_fgrow3 (&obstack_for_string,
886 			  "]b4_rhs_value(%d, %d, [%s])[",
887 			  rule_length, n, type_name);
888 	  symbol_list_n_used_set (current_rule, n, true);
889 	}
890       else
891 	complain_at (loc, _("integer out of range: %s"), quote (text));
892     }
893 
894   return true;
895 }
896 
897 
898 /*----------------------------------------------------------------.
899 | Map `$?' onto the proper M4 symbol, depending on its TOKEN_TYPE |
900 | (are we in an action?).                                         |
901 `----------------------------------------------------------------*/
902 
903 static void
handle_dollar(int token_type,char * text,location loc)904 handle_dollar (int token_type, char *text, location loc)
905 {
906   switch (token_type)
907     {
908     case BRACED_CODE:
909       if (handle_action_dollar (text, loc))
910 	return;
911       break;
912 
913     case PERCENT_DESTRUCTOR:
914     case PERCENT_INITIAL_ACTION:
915     case PERCENT_PRINTER:
916       if (text[1] == '$')
917 	{
918 	  obstack_sgrow (&obstack_for_string, "]b4_dollar_dollar[");
919 	  return;
920 	}
921       break;
922 
923     default:
924       break;
925     }
926 
927   complain_at (loc, _("invalid value: %s"), quote (text));
928 }
929 
930 
931 /*------------------------------------------------------.
932 | TEXT is a location token (i.e., a `@...').  Output to |
933 | OBSTACK_FOR_STRING a reference to this location.      |
934 `------------------------------------------------------*/
935 
936 static inline bool
handle_action_at(char * text,location loc)937 handle_action_at (char *text, location loc)
938 {
939   char *cp = text + 1;
940   locations_flag = true;
941 
942   if (! current_rule)
943     return false;
944 
945   if (*cp == '$')
946     obstack_sgrow (&obstack_for_string, "]b4_lhs_location[");
947   else
948     {
949       long int num = strtol (cp, NULL, 10);
950 
951       if (1 - INT_MAX + rule_length <= num && num <= rule_length)
952 	{
953 	  int n = num;
954 	  obstack_fgrow2 (&obstack_for_string, "]b4_rhs_location(%d, %d)[",
955 			  rule_length, n);
956 	}
957       else
958 	complain_at (loc, _("integer out of range: %s"), quote (text));
959     }
960 
961   return true;
962 }
963 
964 
965 /*----------------------------------------------------------------.
966 | Map `@?' onto the proper M4 symbol, depending on its TOKEN_TYPE |
967 | (are we in an action?).                                         |
968 `----------------------------------------------------------------*/
969 
970 static void
handle_at(int token_type,char * text,location loc)971 handle_at (int token_type, char *text, location loc)
972 {
973   switch (token_type)
974     {
975     case BRACED_CODE:
976       handle_action_at (text, loc);
977       return;
978 
979     case PERCENT_INITIAL_ACTION:
980     case PERCENT_DESTRUCTOR:
981     case PERCENT_PRINTER:
982       if (text[1] == '$')
983 	{
984 	  obstack_sgrow (&obstack_for_string, "]b4_at_dollar[");
985 	  return;
986 	}
987       break;
988 
989     default:
990       break;
991     }
992 
993   complain_at (loc, _("invalid value: %s"), quote (text));
994 }
995 
996 
997 /*------------------------------------------------------.
998 | Scan NUMBER for a base-BASE integer at location LOC.  |
999 `------------------------------------------------------*/
1000 
1001 static unsigned long int
scan_integer(char const * number,int base,location loc)1002 scan_integer (char const *number, int base, location loc)
1003 {
1004   verify (INT_MAX < ULONG_MAX);
1005   unsigned long int num = strtoul (number, NULL, base);
1006 
1007   if (INT_MAX < num)
1008     {
1009       complain_at (loc, _("integer out of range: %s"), quote (number));
1010       num = INT_MAX;
1011     }
1012 
1013   return num;
1014 }
1015 
1016 
1017 /*------------------------------------------------------------------.
1018 | Convert universal character name UCN to a single-byte character,  |
1019 | and return that character.  Return -1 if UCN does not correspond  |
1020 | to a single-byte character.					    |
1021 `------------------------------------------------------------------*/
1022 
1023 static int
convert_ucn_to_byte(char const * ucn)1024 convert_ucn_to_byte (char const *ucn)
1025 {
1026   verify (UCHAR_MAX <= INT_MAX);
1027   unsigned long int code = strtoul (ucn + 2, NULL, 16);
1028 
1029   /* FIXME: Currently we assume Unicode-compatible unibyte characters
1030      on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes).  On
1031      non-ASCII hosts we support only the portable C character set.
1032      These limitations should be removed once we add support for
1033      multibyte characters.  */
1034 
1035   if (UCHAR_MAX < code)
1036     return -1;
1037 
1038 #if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
1039   {
1040     /* A non-ASCII host.  Use CODE to index into a table of the C
1041        basic execution character set, which is guaranteed to exist on
1042        all Standard C platforms.  This table also includes '$', '@',
1043        and '`', which are not in the basic execution character set but
1044        which are unibyte characters on all the platforms that we know
1045        about.  */
1046     static signed char const table[] =
1047       {
1048 	'\0',   -1,   -1,   -1,   -1,   -1,   -1, '\a',
1049 	'\b', '\t', '\n', '\v', '\f', '\r',   -1,   -1,
1050 	  -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
1051 	  -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
1052 	 ' ',  '!',  '"',  '#',  '$',  '%',  '&', '\'',
1053 	 '(',  ')',  '*',  '+',  ',',  '-',  '.',  '/',
1054 	 '0',  '1',  '2',  '3',  '4',  '5',  '6',  '7',
1055 	 '8',  '9',  ':',  ';',  '<',  '=',  '>',  '?',
1056 	 '@',  'A',  'B',  'C',  'D',  'E',  'F',  'G',
1057 	 'H',  'I',  'J',  'K',  'L',  'M',  'N',  'O',
1058 	 'P',  'Q',  'R',  'S',  'T',  'U',  'V',  'W',
1059 	 'X',  'Y',  'Z',  '[', '\\',  ']',  '^',  '_',
1060 	 '`',  'a',  'b',  'c',  'd',  'e',  'f',  'g',
1061 	 'h',  'i',  'j',  'k',  'l',  'm',  'n',  'o',
1062 	 'p',  'q',  'r',  's',  't',  'u',  'v',  'w',
1063 	 'x',  'y',  'z',  '{',  '|',  '}',  '~'
1064       };
1065 
1066     code = code < sizeof table ? table[code] : -1;
1067   }
1068 #endif
1069 
1070   return code;
1071 }
1072 
1073 
1074 /*----------------------------------------------------------------.
1075 | Handle `#line INT "FILE"'.  ARGS has already skipped `#line '.  |
1076 `----------------------------------------------------------------*/
1077 
1078 static void
handle_syncline(char * args,location loc)1079 handle_syncline (char *args, location loc)
1080 {
1081   char *after_num;
1082   unsigned long int lineno = strtoul (args, &after_num, 10);
1083   char *file = strchr (after_num, '"') + 1;
1084   *strchr (file, '"') = '\0';
1085   if (INT_MAX <= lineno)
1086     {
1087       warn_at (loc, _("line number overflow"));
1088       lineno = INT_MAX;
1089     }
1090   scanner_cursor.file = current_file = uniqstr_new (file);
1091   scanner_cursor.line = lineno;
1092   scanner_cursor.column = 1;
1093 }
1094 
1095 
1096 /*---------------------------------.
1097 | Report a rule that is too long.  |
1098 `---------------------------------*/
1099 
1100 static void
rule_length_overflow(location loc)1101 rule_length_overflow (location loc)
1102 {
1103   fatal_at (loc, _("rule is too long"));
1104 }
1105 
1106 
1107 /*----------------------------------------------------------------.
1108 | For a token or comment starting at START, report message MSGID, |
1109 | which should say that an end marker was found before		  |
1110 | the expected TOKEN_END.					  |
1111 `----------------------------------------------------------------*/
1112 
1113 static void
unexpected_end(boundary start,char const * msgid,char const * token_end)1114 unexpected_end (boundary start, char const *msgid, char const *token_end)
1115 {
1116   location loc;
1117   loc.start = start;
1118   loc.end = scanner_cursor;
1119   complain_at (loc, _(msgid), token_end);
1120 }
1121 
1122 
1123 /*------------------------------------------------------------------------.
1124 | Report an unexpected EOF in a token or comment starting at START.       |
1125 | An end of file was encountered and the expected TOKEN_END was missing.  |
1126 `------------------------------------------------------------------------*/
1127 
1128 static void
unexpected_eof(boundary start,char const * token_end)1129 unexpected_eof (boundary start, char const *token_end)
1130 {
1131   unexpected_end (start, N_("missing `%s' at end of file"), token_end);
1132 }
1133 
1134 
1135 /*----------------------------------------.
1136 | Likewise, but for unexpected newlines.  |
1137 `----------------------------------------*/
1138 
1139 static void
unexpected_newline(boundary start,char const * token_end)1140 unexpected_newline (boundary start, char const *token_end)
1141 {
1142   unexpected_end (start, N_("missing `%s' at end of line"), token_end);
1143 }
1144 
1145 
1146 /*-------------------------.
1147 | Initialize the scanner.  |
1148 `-------------------------*/
1149 
1150 void
scanner_initialize(void)1151 scanner_initialize (void)
1152 {
1153   obstack_init (&obstack_for_string);
1154 }
1155 
1156 
1157 /*-----------------------------------------------.
1158 | Free all the memory allocated to the scanner.  |
1159 `-----------------------------------------------*/
1160 
1161 void
scanner_free(void)1162 scanner_free (void)
1163 {
1164   obstack_free (&obstack_for_string, 0);
1165   /* Reclaim Flex's buffers.  */
1166   yy_delete_buffer (YY_CURRENT_BUFFER);
1167 }
1168