1 /* parse.y - parser for flex input */
2
3 %token CHAR NUMBER SECTEND SCDECL XSCDECL NAME PREVCCL EOF_OP
4 %token OPTION_OP OPT_OUTFILE OPT_PREFIX OPT_YYCLASS
5
6 %token CCE_ALNUM CCE_ALPHA CCE_BLANK CCE_CNTRL CCE_DIGIT CCE_GRAPH
7 %token CCE_LOWER CCE_PRINT CCE_PUNCT CCE_SPACE CCE_UPPER CCE_XDIGIT
8
9 %{
10 /*-
11 * Copyright (c) 1990 The Regents of the University of California.
12 * All rights reserved.
13 *
14 * This code is derived from software contributed to Berkeley by
15 * Vern Paxson.
16 *
17 * The United States Government has rights in this work pursuant
18 * to contract no. DE-AC03-76SF00098 between the United States
19 * Department of Energy and the University of California.
20 *
21 * Redistribution and use in source and binary forms with or without
22 * modification are permitted provided that: (1) source distributions retain
23 * this entire copyright notice and comment, and (2) distributions including
24 * binaries display the following acknowledgement: ``This product includes
25 * software developed by the University of California, Berkeley and its
26 * contributors'' in the documentation or other materials provided with the
27 * distribution and in all advertising materials mentioning features or use
28 * of this software. Neither the name of the University nor the names of
29 * its contributors may be used to endorse or promote products derived from
30 * this software without specific prior written permission.
31 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
32 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
33 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
34 */
35
36 /* $Header: /home/daffy/u0/vern/flex/RCS/parse.y,v 2.28 95/04/21 11:51:51 vern Exp $ */
37
38
39 /* Some versions of bison are broken in that they use alloca() but don't
40 * declare it properly. The following is the patented (just kidding!)
41 * #ifdef chud to fix the problem, courtesy of Francois Pinard.
42 */
43 #ifdef YYBISON
44 /* AIX requires this to be the first thing in the file. What a piece. */
45 # ifdef _AIX
46 #pragma alloca
47 # endif
48 #endif
49
50 #include "flexdef.h"
51
52 /* The remainder of the alloca() cruft has to come after including flexdef.h,
53 * so HAVE_ALLOCA_H is (possibly) defined.
54 */
55 #ifdef YYBISON
56 # ifdef __GNUC__
57 # ifndef alloca
58 # define alloca __builtin_alloca
59 # endif
60 # else
61 # if HAVE_ALLOCA_H
62 # include <alloca.h>
63 # else
64 # ifdef __hpux
65 void *alloca ();
66 # else
67 # ifdef __TURBOC__
68 # include <malloc.h>
69 # else
70 char *alloca ();
71 # endif
72 # endif
73 # endif
74 # endif
75 #endif
76
77 /* Bletch, ^^^^ that was ugly! */
78
79
80 int pat, scnum, eps, headcnt, trailcnt, anyccl, lastchar, i, rulelen;
81 int trlcontxt, xcluflg, currccl, cclsorted, varlength, variable_trail_rule;
82
83 int *scon_stk;
84 int scon_stk_ptr;
85
86 static int madeany = false; /* whether we've made the '.' character class */
87 int previous_continued_action; /* whether the previous rule's action was '|' */
88
89 /* Expand a POSIX character class expression. */
90 #define CCL_EXPR(func) \
91 { \
92 int c; \
93 for ( c = 0; c < csize; ++c ) \
94 if ( isascii(c) && func(c) ) \
95 ccladd( currccl, c ); \
96 }
97
98 /* While POSIX defines isblank(), it's not ANSI C. */
99 #define IS_BLANK(c) ((c) == ' ' || (c) == '\t')
100
101 /* On some over-ambitious machines, such as DEC Alpha's, the default
102 * token type is "long" instead of "int"; this leads to problems with
103 * declaring yylval in flexdef.h. But so far, all the yacc's I've seen
104 * wrap their definitions of YYSTYPE with "#ifndef YYSTYPE"'s, so the
105 * following should ensure that the default token type is "int".
106 */
107 #define YYSTYPE int
108
109 %}
110
111 %%
112 goal : initlex sect1 sect1end sect2 initforrule
113 { /* add default rule */
114 int def_rule;
115
116 pat = cclinit();
117 cclnegate( pat );
118
119 def_rule = mkstate( -pat );
120
121 /* Remember the number of the default rule so we
122 * don't generate "can't match" warnings for it.
123 */
124 default_rule = num_rules;
125
126 finish_rule( def_rule, false, 0, 0 );
127
128 for ( i = 1; i <= lastsc; ++i )
129 scset[i] = mkbranch( scset[i], def_rule );
130
131 if ( spprdflt )
132 add_action(
133 "YY_FATAL_ERROR( \"flex scanner jammed\" )" );
134 else
135 add_action( "ECHO" );
136
137 add_action( ";\n\tYY_BREAK\n" );
138 }
139 ;
140
141 initlex :
142 { /* initialize for processing rules */
143
144 /* Create default DFA start condition. */
145 scinstal( "INITIAL", false );
146 }
147 ;
148
149 sect1 : sect1 startconddecl namelist1
150 | sect1 options
151 |
152 | error
153 { synerr( "unknown error processing section 1" ); }
154 ;
155
156 sect1end : SECTEND
157 {
158 check_options();
159 scon_stk = allocate_integer_array( lastsc + 1 );
160 scon_stk_ptr = 0;
161 }
162 ;
163
164 startconddecl : SCDECL
165 { xcluflg = false; }
166
167 | XSCDECL
168 { xcluflg = true; }
169 ;
170
171 namelist1 : namelist1 NAME
172 { scinstal( nmstr, xcluflg ); }
173
174 | NAME
175 { scinstal( nmstr, xcluflg ); }
176
177 | error
178 { synerr( "bad start condition list" ); }
179 ;
180
181 options : OPTION_OP optionlist
182 ;
183
184 optionlist : optionlist option
185 |
186 ;
187
188 option : OPT_OUTFILE '=' NAME
189 {
190 outfilename = copy_string( nmstr );
191 did_outfilename = 1;
192 }
193 | OPT_PREFIX '=' NAME
194 { prefix = copy_string( nmstr ); }
195 | OPT_YYCLASS '=' NAME
196 { yyclass = copy_string( nmstr ); }
197 ;
198
199 sect2 : sect2 scon initforrule flexrule '\n'
200 { scon_stk_ptr = $2; }
201 | sect2 scon '{' sect2 '}'
202 { scon_stk_ptr = $2; }
203 |
204 ;
205
206 initforrule :
207 {
208 /* Initialize for a parse of one rule. */
209 trlcontxt = variable_trail_rule = varlength = false;
210 trailcnt = headcnt = rulelen = 0;
211 current_state_type = STATE_NORMAL;
212 previous_continued_action = continued_action;
213 in_rule = true;
214
215 new_rule();
216 }
217 ;
218
219 flexrule : '^' rule
220 {
221 pat = $2;
222 finish_rule( pat, variable_trail_rule,
223 headcnt, trailcnt );
224
225 if ( scon_stk_ptr > 0 )
226 {
227 for ( i = 1; i <= scon_stk_ptr; ++i )
228 scbol[scon_stk[i]] =
229 mkbranch( scbol[scon_stk[i]],
230 pat );
231 }
232
233 else
234 {
235 /* Add to all non-exclusive start conditions,
236 * including the default (0) start condition.
237 */
238
239 for ( i = 1; i <= lastsc; ++i )
240 if ( ! scxclu[i] )
241 scbol[i] = mkbranch( scbol[i],
242 pat );
243 }
244
245 if ( ! bol_needed )
246 {
247 bol_needed = true;
248
249 if ( performance_report > 1 )
250 pinpoint_message(
251 "'^' operator results in sub-optimal performance" );
252 }
253 }
254
255 | rule
256 {
257 pat = $1;
258 finish_rule( pat, variable_trail_rule,
259 headcnt, trailcnt );
260
261 if ( scon_stk_ptr > 0 )
262 {
263 for ( i = 1; i <= scon_stk_ptr; ++i )
264 scset[scon_stk[i]] =
265 mkbranch( scset[scon_stk[i]],
266 pat );
267 }
268
269 else
270 {
271 for ( i = 1; i <= lastsc; ++i )
272 if ( ! scxclu[i] )
273 scset[i] =
274 mkbranch( scset[i],
275 pat );
276 }
277 }
278
279 | EOF_OP
280 {
281 if ( scon_stk_ptr > 0 )
282 build_eof_action();
283
284 else
285 {
286 /* This EOF applies to all start conditions
287 * which don't already have EOF actions.
288 */
289 for ( i = 1; i <= lastsc; ++i )
290 if ( ! sceof[i] )
291 scon_stk[++scon_stk_ptr] = i;
292
293 if ( scon_stk_ptr == 0 )
294 warn(
295 "all start conditions already have <<EOF>> rules" );
296
297 else
298 build_eof_action();
299 }
300 }
301
302 | error
303 { synerr( "unrecognized rule" ); }
304 ;
305
306 scon_stk_ptr :
307 { $$ = scon_stk_ptr; }
308 ;
309
310 scon : '<' scon_stk_ptr namelist2 '>'
311 { $$ = $2; }
312
313 | '<' '*' '>'
314 {
315 $$ = scon_stk_ptr;
316
317 for ( i = 1; i <= lastsc; ++i )
318 {
319 int j;
320
321 for ( j = 1; j <= scon_stk_ptr; ++j )
322 if ( scon_stk[j] == i )
323 break;
324
325 if ( j > scon_stk_ptr )
326 scon_stk[++scon_stk_ptr] = i;
327 }
328 }
329
330 |
331 { $$ = scon_stk_ptr; }
332 ;
333
334 namelist2 : namelist2 ',' sconname
335
336 | sconname
337
338 | error
339 { synerr( "bad start condition list" ); }
340 ;
341
342 sconname : NAME
343 {
344 if ( (scnum = sclookup( nmstr )) == 0 )
345 format_pinpoint_message(
346 "undeclared start condition %s",
347 nmstr );
348 else
349 {
350 for ( i = 1; i <= scon_stk_ptr; ++i )
351 if ( scon_stk[i] == scnum )
352 {
353 format_warn(
354 "<%s> specified twice",
355 scname[scnum] );
356 break;
357 }
358
359 if ( i > scon_stk_ptr )
360 scon_stk[++scon_stk_ptr] = scnum;
361 }
362 }
363 ;
364
365 rule : re2 re
366 {
367 if ( transchar[lastst[$2]] != SYM_EPSILON )
368 /* Provide final transition \now/ so it
369 * will be marked as a trailing context
370 * state.
371 */
372 $2 = link_machines( $2,
373 mkstate( SYM_EPSILON ) );
374
375 mark_beginning_as_normal( $2 );
376 current_state_type = STATE_NORMAL;
377
378 if ( previous_continued_action )
379 {
380 /* We need to treat this as variable trailing
381 * context so that the backup does not happen
382 * in the action but before the action switch
383 * statement. If the backup happens in the
384 * action, then the rules "falling into" this
385 * one's action will *also* do the backup,
386 * erroneously.
387 */
388 if ( ! varlength || headcnt != 0 )
389 warn(
390 "trailing context made variable due to preceding '|' action" );
391
392 /* Mark as variable. */
393 varlength = true;
394 headcnt = 0;
395 }
396
397 if ( lex_compat || (varlength && headcnt == 0) )
398 { /* variable trailing context rule */
399 /* Mark the first part of the rule as the
400 * accepting "head" part of a trailing
401 * context rule.
402 *
403 * By the way, we didn't do this at the
404 * beginning of this production because back
405 * then current_state_type was set up for a
406 * trail rule, and add_accept() can create
407 * a new state ...
408 */
409 add_accept( $1,
410 num_rules | YY_TRAILING_HEAD_MASK );
411 variable_trail_rule = true;
412 }
413
414 else
415 trailcnt = rulelen;
416
417 $$ = link_machines( $1, $2 );
418 }
419
420 | re2 re '$'
421 { synerr( "trailing context used twice" ); }
422
423 | re '$'
424 {
425 headcnt = 0;
426 trailcnt = 1;
427 rulelen = 1;
428 varlength = false;
429
430 current_state_type = STATE_TRAILING_CONTEXT;
431
432 if ( trlcontxt )
433 {
434 synerr( "trailing context used twice" );
435 $$ = mkstate( SYM_EPSILON );
436 }
437
438 else if ( previous_continued_action )
439 {
440 /* See the comment in the rule for "re2 re"
441 * above.
442 */
443 warn(
444 "trailing context made variable due to preceding '|' action" );
445
446 varlength = true;
447 }
448
449 if ( lex_compat || varlength )
450 {
451 /* Again, see the comment in the rule for
452 * "re2 re" above.
453 */
454 add_accept( $1,
455 num_rules | YY_TRAILING_HEAD_MASK );
456 variable_trail_rule = true;
457 }
458
459 trlcontxt = true;
460
461 eps = mkstate( SYM_EPSILON );
462 $$ = link_machines( $1,
463 link_machines( eps, mkstate( '\n' ) ) );
464 }
465
466 | re
467 {
468 $$ = $1;
469
470 if ( trlcontxt )
471 {
472 if ( lex_compat || (varlength && headcnt == 0) )
473 /* Both head and trail are
474 * variable-length.
475 */
476 variable_trail_rule = true;
477 else
478 trailcnt = rulelen;
479 }
480 }
481 ;
482
483
484 re : re '|' series
485 {
486 varlength = true;
487 $$ = mkor( $1, $3 );
488 }
489
490 | series
491 { $$ = $1; }
492 ;
493
494
495 re2 : re '/'
496 {
497 /* This rule is written separately so the
498 * reduction will occur before the trailing
499 * series is parsed.
500 */
501
502 if ( trlcontxt )
503 synerr( "trailing context used twice" );
504 else
505 trlcontxt = true;
506
507 if ( varlength )
508 /* We hope the trailing context is
509 * fixed-length.
510 */
511 varlength = false;
512 else
513 headcnt = rulelen;
514
515 rulelen = 0;
516
517 current_state_type = STATE_TRAILING_CONTEXT;
518 $$ = $1;
519 }
520 ;
521
522 series : series singleton
523 {
524 /* This is where concatenation of adjacent patterns
525 * gets done.
526 */
527 $$ = link_machines( $1, $2 );
528 }
529
530 | singleton
531 { $$ = $1; }
532 ;
533
534 singleton : singleton '*'
535 {
536 varlength = true;
537
538 $$ = mkclos( $1 );
539 }
540
541 | singleton '+'
542 {
543 varlength = true;
544 $$ = mkposcl( $1 );
545 }
546
547 | singleton '?'
548 {
549 varlength = true;
550 $$ = mkopt( $1 );
551 }
552
553 | singleton '{' NUMBER ',' NUMBER '}'
554 {
555 varlength = true;
556
557 if ( $3 > $5 || $3 < 0 )
558 {
559 synerr( "bad iteration values" );
560 $$ = $1;
561 }
562 else
563 {
564 if ( $3 == 0 )
565 {
566 if ( $5 <= 0 )
567 {
568 synerr(
569 "bad iteration values" );
570 $$ = $1;
571 }
572 else
573 $$ = mkopt(
574 mkrep( $1, 1, $5 ) );
575 }
576 else
577 $$ = mkrep( $1, $3, $5 );
578 }
579 }
580
581 | singleton '{' NUMBER ',' '}'
582 {
583 varlength = true;
584
585 if ( $3 <= 0 )
586 {
587 synerr( "iteration value must be positive" );
588 $$ = $1;
589 }
590
591 else
592 $$ = mkrep( $1, $3, INFINITY );
593 }
594
595 | singleton '{' NUMBER '}'
596 {
597 /* The singleton could be something like "(foo)",
598 * in which case we have no idea what its length
599 * is, so we punt here.
600 */
601 varlength = true;
602
603 if ( $3 <= 0 )
604 {
605 synerr( "iteration value must be positive" );
606 $$ = $1;
607 }
608
609 else
610 $$ = link_machines( $1,
611 copysingl( $1, $3 - 1 ) );
612 }
613
614 | '.'
615 {
616 if ( ! madeany )
617 {
618 /* Create the '.' character class. */
619 anyccl = cclinit();
620 ccladd( anyccl, '\n' );
621 cclnegate( anyccl );
622
623 if ( useecs )
624 mkeccl( ccltbl + cclmap[anyccl],
625 ccllen[anyccl], nextecm,
626 ecgroup, csize, csize );
627
628 madeany = true;
629 }
630
631 ++rulelen;
632
633 $$ = mkstate( -anyccl );
634 }
635
636 | fullccl
637 {
638 if ( ! cclsorted )
639 /* Sort characters for fast searching. We
640 * use a shell sort since this list could
641 * be large.
642 */
643 cshell( ccltbl + cclmap[$1], ccllen[$1], true );
644
645 if ( useecs )
646 mkeccl( ccltbl + cclmap[$1], ccllen[$1],
647 nextecm, ecgroup, csize, csize );
648
649 ++rulelen;
650
651 $$ = mkstate( -$1 );
652 }
653
654 | PREVCCL
655 {
656 ++rulelen;
657
658 $$ = mkstate( -$1 );
659 }
660
661 | '"' string '"'
662 { $$ = $2; }
663
664 | '(' re ')'
665 { $$ = $2; }
666
667 | CHAR
668 {
669 ++rulelen;
670
671 if ( caseins && $1 >= 'A' && $1 <= 'Z' )
672 $1 = clower( $1 );
673
674 $$ = mkstate( $1 );
675 }
676 ;
677
678 fullccl : '[' ccl ']'
679 { $$ = $2; }
680
681 | '[' '^' ccl ']'
682 {
683 cclnegate( $3 );
684 $$ = $3;
685 }
686 ;
687
688 ccl : ccl CHAR '-' CHAR
689 {
690 if ( caseins )
691 {
692 if ( $2 >= 'A' && $2 <= 'Z' )
693 $2 = clower( $2 );
694 if ( $4 >= 'A' && $4 <= 'Z' )
695 $4 = clower( $4 );
696 }
697
698 if ( $2 > $4 )
699 synerr( "negative range in character class" );
700
701 else
702 {
703 for ( i = $2; i <= $4; ++i )
704 ccladd( $1, i );
705
706 /* Keep track if this ccl is staying in
707 * alphabetical order.
708 */
709 cclsorted = cclsorted && ($2 > lastchar);
710 lastchar = $4;
711 }
712
713 $$ = $1;
714 }
715
716 | ccl CHAR
717 {
718 if ( caseins && $2 >= 'A' && $2 <= 'Z' )
719 $2 = clower( $2 );
720
721 ccladd( $1, $2 );
722 cclsorted = cclsorted && ($2 > lastchar);
723 lastchar = $2;
724 $$ = $1;
725 }
726
727 | ccl ccl_expr
728 {
729 /* Too hard to properly maintain cclsorted. */
730 cclsorted = false;
731 $$ = $1;
732 }
733
734 |
735 {
736 cclsorted = true;
737 lastchar = 0;
738 currccl = $$ = cclinit();
739 }
740 ;
741
742 ccl_expr: CCE_ALNUM { CCL_EXPR(isalnum) }
743 | CCE_ALPHA { CCL_EXPR(isalpha) }
744 | CCE_BLANK { CCL_EXPR(IS_BLANK) }
745 | CCE_CNTRL { CCL_EXPR(iscntrl) }
746 | CCE_DIGIT { CCL_EXPR(isdigit) }
747 | CCE_GRAPH { CCL_EXPR(isgraph) }
748 | CCE_LOWER { CCL_EXPR(islower) }
749 | CCE_PRINT { CCL_EXPR(isprint) }
750 | CCE_PUNCT { CCL_EXPR(ispunct) }
751 | CCE_SPACE { CCL_EXPR(isspace) }
752 | CCE_UPPER {
753 if ( caseins )
754 CCL_EXPR(islower)
755 else
756 CCL_EXPR(isupper)
757 }
758 | CCE_XDIGIT { CCL_EXPR(isxdigit) }
759 ;
760
761 string : string CHAR
762 {
763 if ( caseins && $2 >= 'A' && $2 <= 'Z' )
764 $2 = clower( $2 );
765
766 ++rulelen;
767
768 $$ = link_machines( $1, mkstate( $2 ) );
769 }
770
771 |
772 { $$ = mkstate( SYM_EPSILON ); }
773 ;
774
775 %%
776
777
778 /* build_eof_action - build the "<<EOF>>" action for the active start
779 * conditions
780 */
781
782 void build_eof_action()
783 {
784 register int i;
785 char action_text[MAXLINE];
786
787 for ( i = 1; i <= scon_stk_ptr; ++i )
788 {
789 if ( sceof[scon_stk[i]] )
790 format_pinpoint_message(
791 "multiple <<EOF>> rules for start condition %s",
792 scname[scon_stk[i]] );
793
794 else
795 {
796 sceof[scon_stk[i]] = true;
797 sprintf( action_text, "case YY_STATE_EOF(%s):\n",
798 scname[scon_stk[i]] );
799 add_action( action_text );
800 }
801 }
802
803 line_directive_out( (FILE *) 0, 1 );
804
805 /* This isn't a normal rule after all - don't count it as
806 * such, so we don't have any holes in the rule numbering
807 * (which make generating "rule can never match" warnings
808 * more difficult.
809 */
810 --num_rules;
811 ++num_eof_rules;
812 }
813
814
815 /* format_synerr - write out formatted syntax error */
816
format_synerr(msg,arg)817 void format_synerr( msg, arg )
818 char msg[], arg[];
819 {
820 char errmsg[MAXLINE];
821
822 (void) sprintf( errmsg, msg, arg );
823 synerr( errmsg );
824 }
825
826
827 /* synerr - report a syntax error */
828
synerr(str)829 void synerr( str )
830 char str[];
831 {
832 syntaxerror = true;
833 pinpoint_message( str );
834 }
835
836
837 /* format_warn - write out formatted warning */
838
format_warn(msg,arg)839 void format_warn( msg, arg )
840 char msg[], arg[];
841 {
842 char warn_msg[MAXLINE];
843
844 (void) sprintf( warn_msg, msg, arg );
845 warn( warn_msg );
846 }
847
848
849 /* warn - report a warning, unless -w was given */
850
warn(str)851 void warn( str )
852 char str[];
853 {
854 line_warning( str, linenum );
855 }
856
857 /* format_pinpoint_message - write out a message formatted with one string,
858 * pinpointing its location
859 */
860
format_pinpoint_message(msg,arg)861 void format_pinpoint_message( msg, arg )
862 char msg[], arg[];
863 {
864 char errmsg[MAXLINE];
865
866 (void) sprintf( errmsg, msg, arg );
867 pinpoint_message( errmsg );
868 }
869
870
871 /* pinpoint_message - write out a message, pinpointing its location */
872
pinpoint_message(str)873 void pinpoint_message( str )
874 char str[];
875 {
876 line_pinpoint( str, linenum );
877 }
878
879
880 /* line_warning - report a warning at a given line, unless -w was given */
881
line_warning(str,line)882 void line_warning( str, line )
883 char str[];
884 int line;
885 {
886 char warning[MAXLINE];
887
888 if ( ! nowarn )
889 {
890 sprintf( warning, "warning, %s", str );
891 line_pinpoint( warning, line );
892 }
893 }
894
895
896 /* line_pinpoint - write out a message, pinpointing it at the given line */
897
line_pinpoint(str,line)898 void line_pinpoint( str, line )
899 char str[];
900 int line;
901 {
902 fprintf( stderr, "\"%s\", line %d: %s\n", infilename, line, str );
903 }
904
905
906 /* yyerror - eat up an error message from the parser;
907 * currently, messages are ignore
908 */
909
yyerror(msg)910 void yyerror( msg )
911 char msg[];
912 {
913 }
914