1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
3 All Rights Reserved
4
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
13 permission.
14
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22 THIS SOFTWARE.
23 ****************************************************************/
24
25 %{
26 #include <stdio.h>
27 #include <string.h>
28 #include "awk.h"
29
30 void checkdup(Node *list, Cell *item);
yywrap(void)31 int yywrap(void) { return(1); }
32
33 Node *beginloc = 0;
34 Node *endloc = 0;
35 int infunc = 0; /* = 1 if in arglist or body of func */
36 int inloop = 0; /* = 1 if in while, for, do */
37 char *curfname = 0; /* current function name */
38 Node *arglist = 0; /* list of args for current function */
39 %}
40
41 %union {
42 Node *p;
43 Cell *cp;
44 int i;
45 char *s;
46 }
47
48 %token <i> FIRSTTOKEN /* must be first */
49 %token <p> PROGRAM PASTAT PASTAT2 XBEGIN XEND
50 %token <i> NL ',' '{' '(' '|' ';' '/' ')' '}' '[' ']'
51 %token <i> ARRAY
52 %token <i> MATCH NOTMATCH MATCHOP
53 %token <i> FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS EMPTYRE
54 %token <i> AND BOR APPEND EQ GE GT LE LT NE IN
55 %token <i> ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC
56 %token <i> SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE
57 %token <i> ADD MINUS MULT DIVIDE MOD
58 %token <i> ASSIGN ASGNOP ADDEQ SUBEQ MULTEQ DIVEQ MODEQ POWEQ
59 %token <i> PRINT PRINTF SPRINTF
60 %token <p> ELSE INTEST CONDEXPR
61 %token <i> POSTINCR PREINCR POSTDECR PREDECR
62 %token <cp> VAR IVAR VARNF CALL NUMBER STRING
63 %token <s> REGEXPR
64
65 %type <p> pas pattern ppattern plist pplist patlist prarg term re
66 %type <p> pa_pat pa_stat pa_stats
67 %type <s> reg_expr
68 %type <p> simple_stmt opt_simple_stmt stmt stmtlist
69 %type <p> var varname funcname varlist
70 %type <p> for if else while
71 %type <i> do st
72 %type <i> pst opt_pst lbrace rbrace rparen comma nl opt_nl and bor
73 %type <i> subop print
74
75 %right ASGNOP
76 %right '?'
77 %right ':'
78 %left BOR
79 %left AND
80 %left GETLINE
81 %nonassoc APPEND EQ GE GT LE LT NE MATCHOP IN '|'
82 %left ARG BLTIN BREAK CALL CLOSE CONTINUE DELETE DO EXIT FOR FUNC
83 %left GSUB IF INDEX LSUBSTR MATCHFCN NEXT NUMBER
84 %left PRINT PRINTF RETURN SPLIT SPRINTF STRING SUB SUBSTR
85 %left REGEXPR VAR VARNF IVAR WHILE '('
86 %left CAT
87 %left '+' '-'
88 %left '*' '/' '%'
89 %left NOT UMINUS
90 %right POWER
91 %right DECR INCR
92 %left INDIRECT
93 %token LASTTOKEN /* must be last */
94
95 %%
96
97 program:
98 pas { if (errorflag==0)
99 winner = (Node *)stat3(PROGRAM, beginloc, $1, endloc); }
100 | error { yyclearin; bracecheck(); SYNTAX("bailing out"); }
101 ;
102
103 and:
104 AND | and NL
105 ;
106
107 bor:
108 BOR | bor NL
109 ;
110
111 comma:
112 ',' | comma NL
113 ;
114
115 do:
116 DO | do NL
117 ;
118
119 else:
120 ELSE | else NL
121 ;
122
123 for:
124 FOR '(' opt_simple_stmt ';' opt_nl pattern ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt
125 { --inloop; $$ = stat4(FOR, $3, notnull($6), $9, $12); }
126 | FOR '(' opt_simple_stmt ';' ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt
127 { --inloop; $$ = stat4(FOR, $3, NIL, $7, $10); }
128 | FOR '(' varname IN varname rparen {inloop++;} stmt
129 { --inloop; $$ = stat3(IN, $3, makearr($5), $8); }
130 ;
131
132 funcname:
133 VAR { setfname($1); }
134 | CALL { setfname($1); }
135 ;
136
137 if:
138 IF '(' pattern rparen { $$ = notnull($3); }
139 ;
140
141 lbrace:
142 '{' | lbrace NL
143 ;
144
145 nl:
146 NL | nl NL
147 ;
148
149 opt_nl:
150 /* empty */ { $$ = 0; }
151 | nl
152 ;
153
154 opt_pst:
155 /* empty */ { $$ = 0; }
156 | pst
157 ;
158
159
160 opt_simple_stmt:
161 /* empty */ { $$ = 0; }
162 | simple_stmt
163 ;
164
165 pas:
166 opt_pst { $$ = 0; }
167 | opt_pst pa_stats opt_pst { $$ = $2; }
168 ;
169
170 pa_pat:
171 pattern { $$ = notnull($1); }
172 ;
173
174 pa_stat:
175 pa_pat { $$ = stat2(PASTAT, $1, stat2(PRINT, rectonode(), NIL)); }
176 | pa_pat lbrace stmtlist '}' { $$ = stat2(PASTAT, $1, $3); }
177 | pa_pat ',' pa_pat { $$ = pa2stat($1, $3, stat2(PRINT, rectonode(), NIL)); }
178 | pa_pat ',' pa_pat lbrace stmtlist '}' { $$ = pa2stat($1, $3, $5); }
179 | lbrace stmtlist '}' { $$ = stat2(PASTAT, NIL, $2); }
180 | XBEGIN lbrace stmtlist '}'
181 { beginloc = linkum(beginloc, $3); $$ = 0; }
182 | XEND lbrace stmtlist '}'
183 { endloc = linkum(endloc, $3); $$ = 0; }
184 | FUNC funcname '(' varlist rparen {infunc++;} lbrace stmtlist '}'
185 { infunc--; curfname=0; defn((Cell *)$2, $4, $8); $$ = 0; }
186 ;
187
188 pa_stats:
189 pa_stat
190 | pa_stats opt_pst pa_stat { $$ = linkum($1, $3); }
191 ;
192
193 patlist:
194 pattern
195 | patlist comma pattern { $$ = linkum($1, $3); }
196 ;
197
198 ppattern:
199 var ASGNOP ppattern { $$ = op2($2, $1, $3); }
200 | ppattern '?' ppattern ':' ppattern %prec '?'
201 { $$ = op3(CONDEXPR, notnull($1), $3, $5); }
202 | ppattern bor ppattern %prec BOR
203 { $$ = op2(BOR, notnull($1), notnull($3)); }
204 | ppattern and ppattern %prec AND
205 { $$ = op2(AND, notnull($1), notnull($3)); }
206 | ppattern MATCHOP reg_expr { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); }
207 | ppattern MATCHOP ppattern
208 { if (constnode($3))
209 $$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0));
210 else
211 $$ = op3($2, (Node *)1, $1, $3); }
212 | ppattern IN varname { $$ = op2(INTEST, $1, makearr($3)); }
213 | '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); }
214 | ppattern term %prec CAT { $$ = op2(CAT, $1, $2); }
215 | re
216 | term
217 ;
218
219 pattern:
220 var ASGNOP pattern { $$ = op2($2, $1, $3); }
221 | pattern '?' pattern ':' pattern %prec '?'
222 { $$ = op3(CONDEXPR, notnull($1), $3, $5); }
223 | pattern bor pattern %prec BOR
224 { $$ = op2(BOR, notnull($1), notnull($3)); }
225 | pattern and pattern %prec AND
226 { $$ = op2(AND, notnull($1), notnull($3)); }
227 | pattern EQ pattern { $$ = op2($2, $1, $3); }
228 | pattern GE pattern { $$ = op2($2, $1, $3); }
229 | pattern GT pattern { $$ = op2($2, $1, $3); }
230 | pattern LE pattern { $$ = op2($2, $1, $3); }
231 | pattern LT pattern { $$ = op2($2, $1, $3); }
232 | pattern NE pattern { $$ = op2($2, $1, $3); }
233 | pattern MATCHOP reg_expr { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); }
234 | pattern MATCHOP pattern
235 { if (constnode($3))
236 $$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0));
237 else
238 $$ = op3($2, (Node *)1, $1, $3); }
239 | pattern IN varname { $$ = op2(INTEST, $1, makearr($3)); }
240 | '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); }
241 | pattern '|' GETLINE var {
242 if (safe) SYNTAX("cmd | getline is unsafe");
243 else $$ = op3(GETLINE, $4, itonp($2), $1); }
244 | pattern '|' GETLINE {
245 if (safe) SYNTAX("cmd | getline is unsafe");
246 else $$ = op3(GETLINE, (Node*)0, itonp($2), $1); }
247 | pattern term %prec CAT { $$ = op2(CAT, $1, $2); }
248 | re
249 | term
250 ;
251
252 plist:
253 pattern comma pattern { $$ = linkum($1, $3); }
254 | plist comma pattern { $$ = linkum($1, $3); }
255 ;
256
257 pplist:
258 ppattern
259 | pplist comma ppattern { $$ = linkum($1, $3); }
260 ;
261
262 prarg:
263 /* empty */ { $$ = rectonode(); }
264 | pplist
265 | '(' plist ')' { $$ = $2; }
266 ;
267
268 print:
269 PRINT | PRINTF
270 ;
271
272 pst:
273 NL | ';' | pst NL | pst ';'
274 ;
275
276 rbrace:
277 '}' | rbrace NL
278 ;
279
280 re:
281 reg_expr
282 { $$ = op3(MATCH, NIL, rectonode(), (Node*)makedfa($1, 0)); }
283 | NOT re { $$ = op1(NOT, notnull($2)); }
284 ;
285
286 reg_expr:
287 '/' {startreg();} REGEXPR '/' { $$ = $3; }
288 ;
289
290 rparen:
291 ')' | rparen NL
292 ;
293
294 simple_stmt:
295 print prarg '|' term {
296 if (safe) SYNTAX("print | is unsafe");
297 else $$ = stat3($1, $2, itonp($3), $4); }
298 | print prarg APPEND term {
299 if (safe) SYNTAX("print >> is unsafe");
300 else $$ = stat3($1, $2, itonp($3), $4); }
301 | print prarg GT term {
302 if (safe) SYNTAX("print > is unsafe");
303 else $$ = stat3($1, $2, itonp($3), $4); }
304 | print prarg { $$ = stat3($1, $2, NIL, NIL); }
305 | DELETE varname '[' patlist ']' { $$ = stat2(DELETE, makearr($2), $4); }
306 | DELETE varname { $$ = stat2(DELETE, makearr($2), 0); }
307 | pattern { $$ = exptostat($1); }
308 | error { yyclearin; SYNTAX("illegal statement"); }
309 ;
310
311 st:
312 nl
313 | ';' opt_nl
314 ;
315
316 stmt:
317 BREAK st { if (!inloop) SYNTAX("break illegal outside of loops");
318 $$ = stat1(BREAK, NIL); }
319 | CONTINUE st { if (!inloop) SYNTAX("continue illegal outside of loops");
320 $$ = stat1(CONTINUE, NIL); }
321 | do {inloop++;} stmt {--inloop;} WHILE '(' pattern ')' st
322 { $$ = stat2(DO, $3, notnull($7)); }
323 | EXIT pattern st { $$ = stat1(EXIT, $2); }
324 | EXIT st { $$ = stat1(EXIT, NIL); }
325 | for
326 | if stmt else stmt { $$ = stat3(IF, $1, $2, $4); }
327 | if stmt { $$ = stat3(IF, $1, $2, NIL); }
328 | lbrace stmtlist rbrace { $$ = $2; }
329 | NEXT st { if (infunc)
330 SYNTAX("next is illegal inside a function");
331 $$ = stat1(NEXT, NIL); }
332 | NEXTFILE st { if (infunc)
333 SYNTAX("nextfile is illegal inside a function");
334 $$ = stat1(NEXTFILE, NIL); }
335 | RETURN pattern st { $$ = stat1(RETURN, $2); }
336 | RETURN st { $$ = stat1(RETURN, NIL); }
337 | simple_stmt st
338 | while {inloop++;} stmt { --inloop; $$ = stat2(WHILE, $1, $3); }
339 | ';' opt_nl { $$ = 0; }
340 ;
341
342 stmtlist:
343 stmt
344 | stmtlist stmt { $$ = linkum($1, $2); }
345 ;
346
347 subop:
348 SUB | GSUB
349 ;
350
351 term:
352 term '/' ASGNOP term { $$ = op2(DIVEQ, $1, $4); }
353 | term '+' term { $$ = op2(ADD, $1, $3); }
354 | term '-' term { $$ = op2(MINUS, $1, $3); }
355 | term '*' term { $$ = op2(MULT, $1, $3); }
356 | term '/' term { $$ = op2(DIVIDE, $1, $3); }
357 | term '%' term { $$ = op2(MOD, $1, $3); }
358 | term POWER term { $$ = op2(POWER, $1, $3); }
359 | '-' term %prec UMINUS { $$ = op1(UMINUS, $2); }
360 | '+' term %prec UMINUS { $$ = $2; }
361 | NOT term %prec UMINUS { $$ = op1(NOT, notnull($2)); }
362 | BLTIN '(' ')' { $$ = op2(BLTIN, itonp($1), rectonode()); }
363 | BLTIN '(' patlist ')' { $$ = op2(BLTIN, itonp($1), $3); }
364 | BLTIN { $$ = op2(BLTIN, itonp($1), rectonode()); }
365 | CALL '(' ')' { $$ = op2(CALL, celltonode($1,CVAR), NIL); }
366 | CALL '(' patlist ')' { $$ = op2(CALL, celltonode($1,CVAR), $3); }
367 | CLOSE term { $$ = op1(CLOSE, $2); }
368 | DECR var { $$ = op1(PREDECR, $2); }
369 | INCR var { $$ = op1(PREINCR, $2); }
370 | var DECR { $$ = op1(POSTDECR, $1); }
371 | var INCR { $$ = op1(POSTINCR, $1); }
372 | GETLINE var LT term { $$ = op3(GETLINE, $2, itonp($3), $4); }
373 | GETLINE LT term { $$ = op3(GETLINE, NIL, itonp($2), $3); }
374 | GETLINE var { $$ = op3(GETLINE, $2, NIL, NIL); }
375 | GETLINE { $$ = op3(GETLINE, NIL, NIL, NIL); }
376 | INDEX '(' pattern comma pattern ')'
377 { $$ = op2(INDEX, $3, $5); }
378 | INDEX '(' pattern comma reg_expr ')'
379 { SYNTAX("index() doesn't permit regular expressions");
380 $$ = op2(INDEX, $3, (Node*)$5); }
381 | '(' pattern ')' { $$ = $2; }
382 | MATCHFCN '(' pattern comma reg_expr ')'
383 { $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa($5, 1)); }
384 | MATCHFCN '(' pattern comma pattern ')'
385 { if (constnode($5))
386 $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa(strnode($5), 1));
387 else
388 $$ = op3(MATCHFCN, (Node *)1, $3, $5); }
389 | NUMBER { $$ = celltonode($1, CCON); }
390 | SPLIT '(' pattern comma varname comma pattern ')' /* string */
391 { $$ = op4(SPLIT, $3, makearr($5), $7, (Node*)STRING); }
392 | SPLIT '(' pattern comma varname comma reg_expr ')' /* const /regexp/ */
393 { $$ = op4(SPLIT, $3, makearr($5), (Node*)makedfa($7, 1), (Node *)REGEXPR); }
394 | SPLIT '(' pattern comma varname ')'
395 { $$ = op4(SPLIT, $3, makearr($5), NIL, (Node*)STRING); } /* default */
396 | SPRINTF '(' patlist ')' { $$ = op1($1, $3); }
397 | STRING { $$ = celltonode($1, CCON); }
398 | subop '(' reg_expr comma pattern ')'
399 { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, rectonode()); }
400 | subop '(' pattern comma pattern ')'
401 { if (constnode($3))
402 $$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, rectonode());
403 else
404 $$ = op4($1, (Node *)1, $3, $5, rectonode()); }
405 | subop '(' reg_expr comma pattern comma var ')'
406 { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, $7); }
407 | subop '(' pattern comma pattern comma var ')'
408 { if (constnode($3))
409 $$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, $7);
410 else
411 $$ = op4($1, (Node *)1, $3, $5, $7); }
412 | SUBSTR '(' pattern comma pattern comma pattern ')'
413 { $$ = op3(SUBSTR, $3, $5, $7); }
414 | SUBSTR '(' pattern comma pattern ')'
415 { $$ = op3(SUBSTR, $3, $5, NIL); }
416 | var
417 ;
418
419 var:
420 varname
421 | varname '[' patlist ']' { $$ = op2(ARRAY, makearr($1), $3); }
422 | IVAR { $$ = op1(INDIRECT, celltonode($1, CVAR)); }
423 | INDIRECT term { $$ = op1(INDIRECT, $2); }
424 ;
425
426 varlist:
427 /* nothing */ { arglist = $$ = 0; }
428 | VAR { arglist = $$ = celltonode($1,CVAR); }
429 | varlist comma VAR {
430 checkdup($1, $3);
431 arglist = $$ = linkum($1,celltonode($3,CVAR)); }
432 ;
433
434 varname:
435 VAR { $$ = celltonode($1, CVAR); }
436 | ARG { $$ = op1(ARG, itonp($1)); }
437 | VARNF { $$ = op1(VARNF, (Node *) $1); }
438 ;
439
440
441 while:
442 WHILE '(' pattern rparen { $$ = notnull($3); }
443 ;
444
445 %%
446
447 void setfname(Cell *p)
448 {
449 if (isarr(p))
450 SYNTAX("%s is an array, not a function", p->nval);
451 else if (isfcn(p))
452 SYNTAX("you can't define function %s more than once", p->nval);
453 curfname = p->nval;
454 }
455
456 int constnode(Node *p)
457 {
458 return isvalue(p) && ((Cell *) (p->narg[0]))->csub == CCON;
459 }
460
461 char *strnode(Node *p)
462 {
463 return ((Cell *)(p->narg[0]))->sval;
464 }
465
466 Node *notnull(Node *n)
467 {
468 switch (n->nobj) {
469 case LE: case LT: case EQ: case NE: case GT: case GE:
470 case BOR: case AND: case NOT:
471 return n;
472 default:
473 return op2(NE, n, nullnode);
474 }
475 }
476
477 void checkdup(Node *vl, Cell *cp) /* check if name already in list */
478 {
479 char *s = cp->nval;
480 for ( ; vl; vl = vl->nnext) {
481 if (strcmp(s, ((Cell *)(vl->narg[0]))->nval) == 0) {
482 SYNTAX("duplicate argument %s", s);
483 break;
484 }
485 }
486 }
487