1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
3 All Rights Reserved
4
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
13 permission.
14
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22 THIS SOFTWARE.
23 ****************************************************************/
24
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <ctype.h>
29 #include "awk.h"
30 #include "awkgram.tab.h"
31
32 extern YYSTYPE yylval;
33 extern bool infunc;
34
35 int lineno = 1;
36 int bracecnt = 0;
37 int brackcnt = 0;
38 int parencnt = 0;
39
40 typedef struct Keyword {
41 const char *word;
42 int sub;
43 int type;
44 } Keyword;
45
46 const Keyword keywords[] = { /* keep sorted: binary searched */
47 { "BEGIN", XBEGIN, XBEGIN },
48 { "END", XEND, XEND },
49 { "NF", VARNF, VARNF },
50 { "atan2", FATAN, BLTIN },
51 { "break", BREAK, BREAK },
52 { "close", CLOSE, CLOSE },
53 { "continue", CONTINUE, CONTINUE },
54 { "cos", FCOS, BLTIN },
55 { "delete", DELETE, DELETE },
56 { "do", DO, DO },
57 { "else", ELSE, ELSE },
58 { "exit", EXIT, EXIT },
59 { "exp", FEXP, BLTIN },
60 { "fflush", FFLUSH, BLTIN },
61 { "for", FOR, FOR },
62 { "func", FUNC, FUNC },
63 { "function", FUNC, FUNC },
64 { "getline", GETLINE, GETLINE },
65 { "gsub", GSUB, GSUB },
66 { "if", IF, IF },
67 { "in", IN, IN },
68 { "index", INDEX, INDEX },
69 { "int", FINT, BLTIN },
70 { "length", FLENGTH, BLTIN },
71 { "log", FLOG, BLTIN },
72 { "match", MATCHFCN, MATCHFCN },
73 { "next", NEXT, NEXT },
74 { "nextfile", NEXTFILE, NEXTFILE },
75 { "print", PRINT, PRINT },
76 { "printf", PRINTF, PRINTF },
77 { "rand", FRAND, BLTIN },
78 { "return", RETURN, RETURN },
79 { "sin", FSIN, BLTIN },
80 { "split", SPLIT, SPLIT },
81 { "sprintf", SPRINTF, SPRINTF },
82 { "sqrt", FSQRT, BLTIN },
83 { "srand", FSRAND, BLTIN },
84 { "sub", SUB, SUB },
85 { "substr", SUBSTR, SUBSTR },
86 { "system", FSYSTEM, BLTIN },
87 { "tolower", FTOLOWER, BLTIN },
88 { "toupper", FTOUPPER, BLTIN },
89 { "while", WHILE, WHILE },
90 };
91
92 #define RET(x) { if(dbg)printf("lex %s\n", tokname(x)); return(x); }
93
peek(void)94 static int peek(void)
95 {
96 int c = input();
97 unput(c);
98 return c;
99 }
100
gettok(char ** pbuf,int * psz)101 static int gettok(char **pbuf, int *psz) /* get next input token */
102 {
103 int c, retc;
104 char *buf = *pbuf;
105 int sz = *psz;
106 char *bp = buf;
107
108 c = input();
109 if (c == 0)
110 return 0;
111 buf[0] = c;
112 buf[1] = 0;
113 if (!isalnum(c) && c != '.' && c != '_')
114 return c;
115
116 *bp++ = c;
117 if (isalpha(c) || c == '_') { /* it's a varname */
118 for ( ; (c = input()) != 0; ) {
119 if (bp-buf >= sz)
120 if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
121 FATAL( "out of space for name %.10s...", buf );
122 if (isalnum(c) || c == '_')
123 *bp++ = c;
124 else {
125 *bp = 0;
126 unput(c);
127 break;
128 }
129 }
130 *bp = 0;
131 retc = 'a'; /* alphanumeric */
132 } else { /* maybe it's a number, but could be . */
133 char *rem;
134 /* read input until can't be a number */
135 for ( ; (c = input()) != 0; ) {
136 if (bp-buf >= sz)
137 if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
138 FATAL( "out of space for number %.10s...", buf );
139 if (isdigit(c) || c == 'e' || c == 'E'
140 || c == '.' || c == '+' || c == '-')
141 *bp++ = c;
142 else {
143 unput(c);
144 break;
145 }
146 }
147 *bp = 0;
148 strtod(buf, &rem); /* parse the number */
149 if (rem == buf) { /* it wasn't a valid number at all */
150 buf[1] = 0; /* return one character as token */
151 retc = (uschar)buf[0]; /* character is its own type */
152 unputstr(rem+1); /* put rest back for later */
153 } else { /* some prefix was a number */
154 unputstr(rem); /* put rest back for later */
155 rem[0] = 0; /* truncate buf after number part */
156 retc = '0'; /* type is number */
157 }
158 }
159 *pbuf = buf;
160 *psz = sz;
161 return retc;
162 }
163
164 int word(char *);
165 int string(void);
166 int regexpr(void);
167 bool sc = false; /* true => return a } right now */
168 bool reg = false; /* true => return a REGEXPR now */
169
yylex(void)170 int yylex(void)
171 {
172 int c;
173 static char *buf = NULL;
174 static int bufsize = 5; /* BUG: setting this small causes core dump! */
175
176 if (buf == NULL && (buf = (char *) malloc(bufsize)) == NULL)
177 FATAL( "out of space in yylex" );
178 if (sc) {
179 sc = false;
180 RET('}');
181 }
182 if (reg) {
183 reg = false;
184 return regexpr();
185 }
186 for (;;) {
187 c = gettok(&buf, &bufsize);
188 if (c == 0)
189 return 0;
190 if (isalpha(c) || c == '_')
191 return word(buf);
192 if (isdigit(c)) {
193 char *cp = tostring(buf);
194 double result;
195
196 if (is_number(cp, & result))
197 yylval.cp = setsymtab(buf, cp, result, CON|NUM, symtab);
198 else
199 yylval.cp = setsymtab(buf, cp, 0.0, STR, symtab);
200 free(cp);
201 /* should this also have STR set? */
202 RET(NUMBER);
203 }
204
205 yylval.i = c;
206 switch (c) {
207 case '\n': /* {EOL} */
208 lineno++;
209 RET(NL);
210 case '\r': /* assume \n is coming */
211 case ' ': /* {WS}+ */
212 case '\t':
213 break;
214 case '#': /* #.* strip comments */
215 while ((c = input()) != '\n' && c != 0)
216 ;
217 unput(c);
218 /*
219 * Next line is a hack, itcompensates for
220 * unput's treatment of \n.
221 */
222 lineno++;
223 break;
224 case ';':
225 RET(';');
226 case '\\':
227 if (peek() == '\n') {
228 input();
229 lineno++;
230 } else if (peek() == '\r') {
231 input(); input(); /* \n */
232 lineno++;
233 } else {
234 RET(c);
235 }
236 break;
237 case '&':
238 if (peek() == '&') {
239 input(); RET(AND);
240 } else
241 RET('&');
242 case '|':
243 if (peek() == '|') {
244 input(); RET(BOR);
245 } else
246 RET('|');
247 case '!':
248 if (peek() == '=') {
249 input(); yylval.i = NE; RET(NE);
250 } else if (peek() == '~') {
251 input(); yylval.i = NOTMATCH; RET(MATCHOP);
252 } else
253 RET(NOT);
254 case '~':
255 yylval.i = MATCH;
256 RET(MATCHOP);
257 case '<':
258 if (peek() == '=') {
259 input(); yylval.i = LE; RET(LE);
260 } else {
261 yylval.i = LT; RET(LT);
262 }
263 case '=':
264 if (peek() == '=') {
265 input(); yylval.i = EQ; RET(EQ);
266 } else {
267 yylval.i = ASSIGN; RET(ASGNOP);
268 }
269 case '>':
270 if (peek() == '=') {
271 input(); yylval.i = GE; RET(GE);
272 } else if (peek() == '>') {
273 input(); yylval.i = APPEND; RET(APPEND);
274 } else {
275 yylval.i = GT; RET(GT);
276 }
277 case '+':
278 if (peek() == '+') {
279 input(); yylval.i = INCR; RET(INCR);
280 } else if (peek() == '=') {
281 input(); yylval.i = ADDEQ; RET(ASGNOP);
282 } else
283 RET('+');
284 case '-':
285 if (peek() == '-') {
286 input(); yylval.i = DECR; RET(DECR);
287 } else if (peek() == '=') {
288 input(); yylval.i = SUBEQ; RET(ASGNOP);
289 } else
290 RET('-');
291 case '*':
292 if (peek() == '=') { /* *= */
293 input(); yylval.i = MULTEQ; RET(ASGNOP);
294 } else if (peek() == '*') { /* ** or **= */
295 input(); /* eat 2nd * */
296 if (peek() == '=') {
297 input(); yylval.i = POWEQ; RET(ASGNOP);
298 } else {
299 RET(POWER);
300 }
301 } else
302 RET('*');
303 case '/':
304 RET('/');
305 case '%':
306 if (peek() == '=') {
307 input(); yylval.i = MODEQ; RET(ASGNOP);
308 } else
309 RET('%');
310 case '^':
311 if (peek() == '=') {
312 input(); yylval.i = POWEQ; RET(ASGNOP);
313 } else
314 RET(POWER);
315
316 case '$':
317 /* BUG: awkward, if not wrong */
318 c = gettok(&buf, &bufsize);
319 if (isalpha(c)) {
320 if (strcmp(buf, "NF") == 0) { /* very special */
321 unputstr("(NF)");
322 RET(INDIRECT);
323 }
324 c = peek();
325 if (c == '(' || c == '[' || (infunc && isarg(buf) >= 0)) {
326 unputstr(buf);
327 RET(INDIRECT);
328 }
329 yylval.cp = setsymtab(buf, "", 0.0, STR|NUM, symtab);
330 RET(IVAR);
331 } else if (c == 0) { /* */
332 SYNTAX( "unexpected end of input after $" );
333 RET(';');
334 } else {
335 unputstr(buf);
336 RET(INDIRECT);
337 }
338
339 case '}':
340 if (--bracecnt < 0)
341 SYNTAX( "extra }" );
342 sc = true;
343 RET(';');
344 case ']':
345 if (--brackcnt < 0)
346 SYNTAX( "extra ]" );
347 RET(']');
348 case ')':
349 if (--parencnt < 0)
350 SYNTAX( "extra )" );
351 RET(')');
352 case '{':
353 bracecnt++;
354 RET('{');
355 case '[':
356 brackcnt++;
357 RET('[');
358 case '(':
359 parencnt++;
360 RET('(');
361
362 case '"':
363 return string(); /* BUG: should be like tran.c ? */
364
365 default:
366 RET(c);
367 }
368 }
369 }
370
string(void)371 int string(void)
372 {
373 int c, n;
374 char *s, *bp;
375 static char *buf = NULL;
376 static int bufsz = 500;
377
378 if (buf == NULL && (buf = (char *) malloc(bufsz)) == NULL)
379 FATAL("out of space for strings");
380 for (bp = buf; (c = input()) != '"'; ) {
381 if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, "string"))
382 FATAL("out of space for string %.10s...", buf);
383 switch (c) {
384 case '\n':
385 case '\r':
386 case 0:
387 *bp = '\0';
388 SYNTAX( "non-terminated string %.10s...", buf );
389 if (c == 0) /* hopeless */
390 FATAL( "giving up" );
391 lineno++;
392 break;
393 case '\\':
394 c = input();
395 switch (c) {
396 case '\n': break;
397 case '"': *bp++ = '"'; break;
398 case 'n': *bp++ = '\n'; break;
399 case 't': *bp++ = '\t'; break;
400 case 'f': *bp++ = '\f'; break;
401 case 'r': *bp++ = '\r'; break;
402 case 'b': *bp++ = '\b'; break;
403 case 'v': *bp++ = '\v'; break;
404 case 'a': *bp++ = '\a'; break;
405 case '\\': *bp++ = '\\'; break;
406
407 case '0': case '1': case '2': /* octal: \d \dd \ddd */
408 case '3': case '4': case '5': case '6': case '7':
409 n = c - '0';
410 if ((c = peek()) >= '0' && c < '8') {
411 n = 8 * n + input() - '0';
412 if ((c = peek()) >= '0' && c < '8')
413 n = 8 * n + input() - '0';
414 }
415 *bp++ = n;
416 break;
417
418 case 'x': /* hex \x0-9a-fA-F + */
419 {
420 int i;
421
422 n = 0;
423 for (i = 1; i <= 2; i++) {
424 c = input();
425 if (c == 0)
426 break;
427 if (isxdigit(c)) {
428 c = tolower(c);
429 n *= 16;
430 if (isdigit(c))
431 n += (c - '0');
432 else
433 n += 10 + (c - 'a');
434 } else
435 break;
436 }
437 if (n)
438 *bp++ = n;
439 else
440 unput(c);
441 break;
442 }
443
444 default:
445 *bp++ = c;
446 break;
447 }
448 break;
449 default:
450 *bp++ = c;
451 break;
452 }
453 }
454 *bp = 0;
455 s = tostring(buf);
456 *bp++ = ' '; *bp++ = '\0';
457 yylval.cp = setsymtab(buf, s, 0.0, CON|STR|DONTFREE, symtab);
458 free(s);
459 RET(STRING);
460 }
461
462
binsearch(char * w,const Keyword * kp,int n)463 static int binsearch(char *w, const Keyword *kp, int n)
464 {
465 int cond, low, mid, high;
466
467 low = 0;
468 high = n - 1;
469 while (low <= high) {
470 mid = (low + high) / 2;
471 if ((cond = strcmp(w, kp[mid].word)) < 0)
472 high = mid - 1;
473 else if (cond > 0)
474 low = mid + 1;
475 else
476 return mid;
477 }
478 return -1;
479 }
480
word(char * w)481 int word(char *w)
482 {
483 const Keyword *kp;
484 int c, n;
485
486 n = binsearch(w, keywords, sizeof(keywords)/sizeof(keywords[0]));
487 if (n != -1) { /* found in table */
488 kp = keywords + n;
489 yylval.i = kp->sub;
490 switch (kp->type) { /* special handling */
491 case BLTIN:
492 if (kp->sub == FSYSTEM && safe)
493 SYNTAX( "system is unsafe" );
494 RET(kp->type);
495 case FUNC:
496 if (infunc)
497 SYNTAX( "illegal nested function" );
498 RET(kp->type);
499 case RETURN:
500 if (!infunc)
501 SYNTAX( "return not in function" );
502 RET(kp->type);
503 case VARNF:
504 yylval.cp = setsymtab("NF", "", 0.0, NUM, symtab);
505 RET(VARNF);
506 default:
507 RET(kp->type);
508 }
509 }
510 c = peek(); /* look for '(' */
511 if (c != '(' && infunc && (n=isarg(w)) >= 0) {
512 yylval.i = n;
513 RET(ARG);
514 } else {
515 yylval.cp = setsymtab(w, "", 0.0, STR|NUM|DONTFREE, symtab);
516 if (c == '(') {
517 RET(CALL);
518 } else {
519 RET(VAR);
520 }
521 }
522 }
523
startreg(void)524 void startreg(void) /* next call to yylex will return a regular expression */
525 {
526 reg = true;
527 }
528
regexpr(void)529 int regexpr(void)
530 {
531 int c;
532 static char *buf = NULL;
533 static int bufsz = 500;
534 char *bp;
535
536 if (buf == NULL && (buf = (char *) malloc(bufsz)) == NULL)
537 FATAL("out of space for reg expr");
538 bp = buf;
539 for ( ; (c = input()) != '/' && c != 0; ) {
540 if (!adjbuf(&buf, &bufsz, bp-buf+3, 500, &bp, "regexpr"))
541 FATAL("out of space for reg expr %.10s...", buf);
542 if (c == '\n') {
543 *bp = '\0';
544 SYNTAX( "newline in regular expression %.10s...", buf );
545 unput('\n');
546 break;
547 } else if (c == '\\') {
548 *bp++ = '\\';
549 *bp++ = input();
550 } else {
551 *bp++ = c;
552 }
553 }
554 *bp = 0;
555 if (c == 0)
556 SYNTAX("non-terminated regular expression %.10s...", buf);
557 yylval.s = buf;
558 unput('/');
559 RET(REGEXPR);
560 }
561
562 /* low-level lexical stuff, sort of inherited from lex */
563
564 char ebuf[300];
565 char *ep = ebuf;
566 char yysbuf[100]; /* pushback buffer */
567 char *yysptr = yysbuf;
568 FILE *yyin = NULL;
569
input(void)570 int input(void) /* get next lexical input character */
571 {
572 int c;
573 extern char *lexprog;
574
575 if (yysptr > yysbuf)
576 c = (uschar)*--yysptr;
577 else if (lexprog != NULL) { /* awk '...' */
578 if ((c = (uschar)*lexprog) != 0)
579 lexprog++;
580 } else /* awk -f ... */
581 c = pgetc();
582 if (c == EOF)
583 c = 0;
584 if (ep >= ebuf + sizeof ebuf)
585 ep = ebuf;
586 *ep = c;
587 if (c != 0) {
588 ep++;
589 }
590 return (c);
591 }
592
unput(int c)593 void unput(int c) /* put lexical character back on input */
594 {
595 if (c == '\n')
596 lineno--;
597 if (yysptr >= yysbuf + sizeof(yysbuf))
598 FATAL("pushed back too much: %.20s...", yysbuf);
599 *yysptr++ = c;
600 if (--ep < ebuf)
601 ep = ebuf + sizeof(ebuf) - 1;
602 }
603
unputstr(const char * s)604 void unputstr(const char *s) /* put a string back on input */
605 {
606 int i;
607
608 for (i = strlen(s)-1; i >= 0; i--)
609 unput(s[i]);
610 }
611