1 /*
2 * Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc.
3 *
4 * This file is part of Jam - see jam.c for Copyright information.
5 */
6
7 /*
8 * scan.c - the jam yacc scanner
9 *
10 */
11
12 #include "jam.h"
13 #include "scan.h"
14 #include "output.h"
15
16 #include "constants.h"
17 #include "jamgram.hpp"
18
19
20 struct keyword
21 {
22 const char * word;
23 int type;
24 } keywords[] =
25 {
26 #include "jamgramtab.h"
27 { 0, 0 }
28 };
29
30 typedef struct include include;
31 struct include
32 {
33 include * next; /* next serial include file */
34 char * string; /* pointer into current line */
35 char * * strings; /* for yyfparse() -- text to parse */
36 LISTITER pos; /* for yysparse() -- text to parse */
37 LIST * list; /* for yysparse() -- text to parse */
38 FILE * file; /* for yyfparse() -- file being read */
39 OBJECT * fname; /* for yyfparse() -- file name */
40 int line; /* line counter for error messages */
41 char buf[ 512 ]; /* for yyfparse() -- line buffer */
42 };
43
44 static include * incp = 0; /* current file; head of chain */
45
46 static int scanmode = SCAN_NORMAL;
47 static int anyerrors = 0;
48
49
50 static char * symdump( YYSTYPE * );
51
52 #define BIGGEST_TOKEN 10240 /* no single token can be larger */
53
54
55 /*
56 * Set parser mode: normal, string, or keyword.
57 */
58
yymode(int n)59 int yymode( int n )
60 {
61 int result = scanmode;
62 scanmode = n;
63 return result;
64 }
65
66
yyerror(char const * s)67 void yyerror( char const * s )
68 {
69 /* We use yylval instead of incp to access the error location information as
70 * the incp pointer will already be reset to 0 in case the error occurred at
71 * EOF.
72 *
73 * The two may differ only if ran into an unexpected EOF or we get an error
74 * while reading a lexical token spanning multiple lines, e.g. a multi-line
75 * string literal or action body, in which case yylval location information
76 * will hold the information about where the token started while incp will
77 * hold the information about where reading it broke.
78 */
79 out_printf( "%s:%d: %s at %s\n", object_str( yylval.file ), yylval.line, s,
80 symdump( &yylval ) );
81 ++anyerrors;
82 }
83
84
yyanyerrors()85 int yyanyerrors()
86 {
87 return anyerrors != 0;
88 }
89
90
yyfparse(OBJECT * s)91 void yyfparse( OBJECT * s )
92 {
93 include * i = (include *)BJAM_MALLOC( sizeof( *i ) );
94
95 /* Push this onto the incp chain. */
96 i->string = (char*)"";
97 i->strings = 0;
98 i->file = 0;
99 i->fname = object_copy( s );
100 i->line = 0;
101 i->next = incp;
102 incp = i;
103 }
104
105
yysparse(OBJECT * name,const char ** lines)106 void yysparse( OBJECT * name, const char * * lines )
107 {
108 yyfparse( name );
109 incp->strings = (char * *)lines;
110 }
111
112
113 /*
114 * yyfdone() - cleanup after we're done parsing a file.
115 */
yyfdone(void)116 void yyfdone( void )
117 {
118 include * const i = incp;
119 incp = i->next;
120
121 /* Close file, free name. */
122 if(i->file && (i->file != stdin))
123 fclose(i->file);
124 object_free(i->fname);
125 BJAM_FREE((char *)i);
126 }
127
128
129 /*
130 * yyline() - read new line and return first character.
131 *
132 * Fabricates a continuous stream of characters across include files, returning
133 * EOF at the bitter end.
134 */
135
yyline()136 int yyline()
137 {
138 include * const i = incp;
139
140 if ( !incp )
141 return EOF;
142
143 /* Once we start reading from the input stream, we reset the include
144 * insertion point so that the next include file becomes the head of the
145 * list.
146 */
147
148 /* If there is more data in this line, return it. */
149 if ( *i->string )
150 return *i->string++;
151
152 /* If we are reading from an internal string list, go to the next string. */
153 if ( i->strings )
154 {
155 if ( *i->strings )
156 {
157 ++i->line;
158 i->string = *(i->strings++);
159 return *i->string++;
160 }
161 }
162 else
163 {
164 /* If necessary, open the file. */
165 if ( !i->file )
166 {
167 FILE * f = stdin;
168 if ( strcmp( object_str( i->fname ), "-" ) && !( f = fopen( object_str( i->fname ), "r" ) ) )
169 perror( object_str( i->fname ) );
170 i->file = f;
171 }
172
173 /* If there is another line in this file, start it. */
174 if ( i->file && fgets( i->buf, sizeof( i->buf ), i->file ) )
175 {
176 ++i->line;
177 i->string = i->buf;
178 return *i->string++;
179 }
180 }
181
182 /* This include is done. Return EOF so yyparse() returns to
183 * parse_file().
184 */
185
186 return EOF;
187 }
188
189 /* This allows us to get an extra character of lookahead.
190 * There are a few places where we need to look ahead two
191 * characters and yyprev only guarantees a single character
192 * of putback.
193 */
yypeek()194 int yypeek()
195 {
196 if ( *incp->string )
197 {
198 return *incp->string;
199 }
200 else if ( incp->strings )
201 {
202 if ( *incp->strings )
203 return **incp->strings;
204 }
205 else if ( incp->file )
206 {
207 /* Don't bother opening the file. yypeek is
208 * only used in special cases and never at the
209 * beginning of a file.
210 */
211 int ch = fgetc( incp->file );
212 if ( ch != EOF )
213 ungetc( ch, incp->file );
214 return ch;
215 }
216 return EOF;
217 }
218
219 /*
220 * yylex() - set yylval to current token; return its type.
221 *
222 * Macros to move things along:
223 *
224 * yychar() - return and advance character; invalid after EOF.
225 * yyprev() - back up one character; invalid before yychar().
226 *
227 * yychar() returns a continuous stream of characters, until it hits the EOF of
228 * the current include file.
229 */
230
231 #define yychar() ( *incp->string ? *incp->string++ : yyline() )
232 #define yyprev() ( incp->string-- )
233
234 static int use_new_scanner = 0;
235
236 #define yystartkeyword() if(use_new_scanner) break; else token_warning()
237 #define yyendkeyword() if(use_new_scanner) break; else if ( 1 ) { expect_whitespace = 1; continue; } else (void)0
238
do_token_warning()239 void do_token_warning()
240 {
241 out_printf( "%s:%d: %s %s\n", object_str( yylval.file ), yylval.line, "Unescaped special character in",
242 symdump( &yylval ) );
243 }
244
245 #define token_warning() has_token_warning = 1
246
yylex()247 int yylex()
248 {
249 int c;
250 char buf[ BIGGEST_TOKEN ];
251 char * b = buf;
252
253 if ( !incp )
254 goto eof;
255
256 /* Get first character (whitespace or of token). */
257 c = yychar();
258
259 if ( scanmode == SCAN_STRING )
260 {
261 /* If scanning for a string (action's {}'s), look for the closing brace.
262 * We handle matching braces, if they match.
263 */
264
265 int nest = 1;
266
267 while ( ( c != EOF ) && ( b < buf + sizeof( buf ) ) )
268 {
269 if ( c == '{' )
270 ++nest;
271
272 if ( ( c == '}' ) && !--nest )
273 break;
274
275 *b++ = c;
276
277 c = yychar();
278
279 /* Turn trailing "\r\n" sequences into plain "\n" for Cygwin. */
280 if ( ( c == '\n' ) && ( b[ -1 ] == '\r' ) )
281 --b;
282 }
283
284 /* We ate the ending brace -- regurgitate it. */
285 if ( c != EOF )
286 yyprev();
287
288 /* Check for obvious errors. */
289 if ( b == buf + sizeof( buf ) )
290 {
291 yyerror( "action block too big" );
292 goto eof;
293 }
294
295 if ( nest )
296 {
297 yyerror( "unmatched {} in action block" );
298 goto eof;
299 }
300
301 *b = 0;
302 yylval.type = STRING;
303 yylval.string = object_new( buf );
304 yylval.file = incp->fname;
305 yylval.line = incp->line;
306 }
307 else
308 {
309 char * b = buf;
310 struct keyword * k;
311 int inquote = 0;
312 int notkeyword;
313 int hastoken = 0;
314 int hasquote = 0;
315 int ingrist = 0;
316 int invarexpand = 0;
317 int expect_whitespace = 0;
318 int has_token_warning = 0;
319
320 /* Eat white space. */
321 for ( ; ; )
322 {
323 /* Skip past white space. */
324 while ( ( c != EOF ) && isspace( c ) )
325 c = yychar();
326
327 /* Not a comment? */
328 if ( c != '#' )
329 break;
330
331 c = yychar();
332 if ( ( c != EOF ) && c == '|' )
333 {
334 /* Swallow up block comment. */
335 int c0 = yychar();
336 int c1 = yychar();
337 while ( ! ( c0 == '|' && c1 == '#' ) && ( c0 != EOF && c1 != EOF ) )
338 {
339 c0 = c1;
340 c1 = yychar();
341 }
342 c = yychar();
343 }
344 else
345 {
346 /* Swallow up comment line. */
347 while ( ( c != EOF ) && ( c != '\n' ) ) c = yychar();
348 }
349 }
350
351 /* c now points to the first character of a token. */
352 if ( c == EOF )
353 goto eof;
354
355 yylval.file = incp->fname;
356 yylval.line = incp->line;
357
358 /* While scanning the word, disqualify it for (expensive) keyword lookup
359 * when we can: $anything, "anything", \anything
360 */
361 notkeyword = c == '$';
362
363 /* Look for white space to delimit word. "'s get stripped but preserve
364 * white space. \ protects next character.
365 */
366 while
367 (
368 ( c != EOF ) &&
369 ( b < buf + sizeof( buf ) ) &&
370 ( inquote || invarexpand || !isspace( c ) )
371 )
372 {
373 if ( expect_whitespace || ( isspace( c ) && ! inquote ) )
374 {
375 token_warning();
376 expect_whitespace = 0;
377 }
378 if ( !inquote && !invarexpand )
379 {
380 if ( scanmode == SCAN_COND || scanmode == SCAN_CONDB )
381 {
382 if ( hastoken && ( c == '=' || c == '<' || c == '>' || c == '!' || c == '(' || c == ')' || c == '&' || c == '|' ) )
383 {
384 /* Don't treat > as special if we started with a grist. */
385 if ( ! ( scanmode == SCAN_CONDB && ingrist == 1 && c == '>' ) )
386 {
387 yystartkeyword();
388 }
389 }
390 else if ( c == '=' || c == '(' || c == ')' )
391 {
392 *b++ = c;
393 c = yychar();
394 yyendkeyword();
395 }
396 else if ( c == '!' || ( scanmode == SCAN_COND && ( c == '<' || c == '>' ) ) )
397 {
398 *b++ = c;
399 if ( ( c = yychar() ) == '=' )
400 {
401 *b++ = c;
402 c = yychar();
403 }
404 yyendkeyword();
405 }
406 else if ( c == '&' || c == '|' )
407 {
408 *b++ = c;
409 if ( yychar() == c )
410 {
411 *b++ = c;
412 c = yychar();
413 }
414 yyendkeyword();
415 }
416 }
417 else if ( scanmode == SCAN_PARAMS )
418 {
419 if ( c == '*' || c == '+' || c == '?' || c == '(' || c == ')' )
420 {
421 if ( !hastoken )
422 {
423 *b++ = c;
424 c = yychar();
425 yyendkeyword();
426 }
427 else
428 {
429 yystartkeyword();
430 }
431 }
432 }
433 else if ( scanmode == SCAN_XASSIGN && ! hastoken )
434 {
435 if ( c == '=' )
436 {
437 *b++ = c;
438 c = yychar();
439 yyendkeyword();
440 }
441 else if ( c == '+' || c == '?' )
442 {
443 if ( yypeek() == '=' )
444 {
445 *b++ = c;
446 *b++ = yychar();
447 c = yychar();
448 yyendkeyword();
449 }
450 }
451 }
452 else if ( scanmode == SCAN_NORMAL || scanmode == SCAN_ASSIGN )
453 {
454 if ( c == '=' )
455 {
456 if ( !hastoken )
457 {
458 *b++ = c;
459 c = yychar();
460 yyendkeyword();
461 }
462 else
463 {
464 yystartkeyword();
465 }
466 }
467 else if ( c == '+' || c == '?' )
468 {
469 if ( yypeek() == '=' )
470 {
471 if ( hastoken )
472 {
473 yystartkeyword();
474 }
475 else
476 {
477 *b++ = c;
478 *b++ = yychar();
479 c = yychar();
480 yyendkeyword();
481 }
482 }
483 }
484 }
485 if ( scanmode != SCAN_CASE && ( c == ';' || c == '{' || c == '}' ||
486 ( scanmode != SCAN_PARAMS && ( c == '[' || c == ']' ) ) ) )
487 {
488 if ( ! hastoken )
489 {
490 *b++ = c;
491 c = yychar();
492 yyendkeyword();
493 }
494 else
495 {
496 yystartkeyword();
497 }
498 }
499 else if ( c == ':' )
500 {
501 if ( ! hastoken )
502 {
503 *b++ = c;
504 c = yychar();
505 yyendkeyword();
506 break;
507 }
508 else if ( hasquote )
509 {
510 /* Special rules for ':' do not apply after we quote anything. */
511 yystartkeyword();
512 }
513 else if ( ingrist == 0 )
514 {
515 int next = yychar();
516 int is_win_path = 0;
517 int is_conditional = 0;
518 if ( next == '\\' )
519 {
520 if( yypeek() == '\\' )
521 {
522 is_win_path = 1;
523 }
524 }
525 else if ( next == '/' )
526 {
527 is_win_path = 1;
528 }
529 yyprev();
530 if ( is_win_path )
531 {
532 /* Accept windows paths iff they are at the start or immediately follow a grist. */
533 if ( b > buf && isalpha( b[ -1 ] ) && ( b == buf + 1 || b[ -2 ] == '>' ) )
534 {
535 is_win_path = 1;
536 }
537 else
538 {
539 is_win_path = 0;
540 }
541 }
542 if ( next == '<' )
543 {
544 /* Accept conditionals only for tokens that start with "<" or "!<" */
545 if ( ( (b > buf) && (buf[ 0 ] == '<') ) ||
546 ( (b > (buf + 1)) && (buf[ 0 ] == '!') && (buf[ 1 ] == '<') ))
547 {
548 is_conditional = 1;
549 }
550 }
551 if ( !is_conditional && !is_win_path )
552 {
553 yystartkeyword();
554 }
555 }
556 }
557 }
558 hastoken = 1;
559 if ( c == '"' )
560 {
561 /* begin or end " */
562 inquote = !inquote;
563 hasquote = 1;
564 notkeyword = 1;
565 }
566 else if ( c != '\\' )
567 {
568 if ( !invarexpand && c == '<' )
569 {
570 if ( ingrist == 0 ) ingrist = 1;
571 else ingrist = -1;
572 }
573 else if ( !invarexpand && c == '>' )
574 {
575 if ( ingrist == 1 ) ingrist = 0;
576 else ingrist = -1;
577 }
578 else if ( c == '$' )
579 {
580 if ( ( c = yychar() ) == EOF )
581 {
582 *b++ = '$';
583 break;
584 }
585 else if ( c == '(' )
586 {
587 /* inside $(), we only care about quotes */
588 *b++ = '$';
589 c = '(';
590 ++invarexpand;
591 }
592 else
593 {
594 c = '$';
595 yyprev();
596 }
597 }
598 else if ( c == '@' )
599 {
600 if ( ( c = yychar() ) == EOF )
601 {
602 *b++ = '@';
603 break;
604 }
605 else if ( c == '(' )
606 {
607 /* inside @(), we only care about quotes */
608 *b++ = '@';
609 c = '(';
610 ++invarexpand;
611 }
612 else
613 {
614 c = '@';
615 yyprev();
616 }
617 }
618 else if ( invarexpand && c == '(' )
619 {
620 ++invarexpand;
621 }
622 else if ( invarexpand && c == ')' )
623 {
624 --invarexpand;
625 }
626 /* normal char */
627 *b++ = c;
628 }
629 else if ( ( c = yychar() ) != EOF )
630 {
631 /* \c */
632 if (c == 'n')
633 c = '\n';
634 else if (c == 'r')
635 c = '\r';
636 else if (c == 't')
637 c = '\t';
638 *b++ = c;
639 notkeyword = 1;
640 }
641 else
642 {
643 /* \EOF */
644 break;
645 }
646
647 c = yychar();
648 }
649
650 /* Automatically switch modes after reading the token. */
651 if ( scanmode == SCAN_CONDB )
652 scanmode = SCAN_COND;
653
654 /* Check obvious errors. */
655 if ( b == buf + sizeof( buf ) )
656 {
657 yyerror( "string too big" );
658 goto eof;
659 }
660
661 if ( inquote )
662 {
663 yyerror( "unmatched \" in string" );
664 goto eof;
665 }
666
667 /* We looked ahead a character - back up. */
668 if ( c != EOF )
669 yyprev();
670
671 /* Scan token table. Do not scan if it is obviously not a keyword or if
672 * it is an alphabetic when were looking for punctuation.
673 */
674
675 *b = 0;
676 yylval.type = ARG;
677
678 if ( !notkeyword && !( isalpha( *buf ) && ( scanmode == SCAN_PUNCT || scanmode == SCAN_PARAMS || scanmode == SCAN_ASSIGN ) ) )
679 for ( k = keywords; k->word; ++k )
680 if ( ( *buf == *k->word ) && !strcmp( k->word, buf ) )
681 {
682 yylval.type = k->type;
683 yylval.keyword = k->word; /* used by symdump */
684 break;
685 }
686
687 if ( yylval.type == ARG )
688 yylval.string = object_new( buf );
689
690 if ( scanmode == SCAN_NORMAL && yylval.type == ARG )
691 scanmode = SCAN_XASSIGN;
692
693 if ( has_token_warning )
694 do_token_warning();
695 }
696
697 if ( DEBUG_SCAN )
698 out_printf( "scan %s\n", symdump( &yylval ) );
699
700 return yylval.type;
701
702 eof:
703 /* We do not reset yylval.file & yylval.line here so unexpected EOF error
704 * messages would include correct error location information.
705 */
706 yylval.type = EOF;
707 return yylval.type;
708 }
709
710
symdump(YYSTYPE * s)711 static char * symdump( YYSTYPE * s )
712 {
713 static char buf[ BIGGEST_TOKEN + 20 ];
714 switch ( s->type )
715 {
716 case EOF : sprintf( buf, "EOF" ); break;
717 case 0 : sprintf( buf, "unknown symbol %s", object_str( s->string ) ); break;
718 case ARG : sprintf( buf, "argument %s" , object_str( s->string ) ); break;
719 case STRING: sprintf( buf, "string \"%s\"" , object_str( s->string ) ); break;
720 default : sprintf( buf, "keyword %s" , s->keyword ); break;
721 }
722 return buf;
723 }
724
725
726 /*
727 * Get information about the current file and line, for those epsilon
728 * transitions that produce a parse.
729 */
730
yyinput_last_read_token(OBJECT ** name,int * line)731 void yyinput_last_read_token( OBJECT * * name, int * line )
732 {
733 /* TODO: Consider whether and when we might want to report where the last
734 * read token ended, e.g. EOF errors inside string literals.
735 */
736 *name = yylval.file;
737 *line = yylval.line;
738 }
739