• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
3 All Rights Reserved
4 
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
13 permission.
14 
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22 THIS SOFTWARE.
23 ****************************************************************/
24 
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <ctype.h>
29 #include "awk.h"
30 #include "ytab.h"
31 
32 extern YYSTYPE	yylval;
33 extern bool	infunc;
34 
35 int	lineno	= 1;
36 int	bracecnt = 0;
37 int	brackcnt  = 0;
38 int	parencnt = 0;
39 
40 typedef struct Keyword {
41 	const char *word;
42 	int	sub;
43 	int	type;
44 } Keyword;
45 
46 const Keyword keywords[] = {	/* keep sorted: binary searched */
47 	{ "BEGIN",	XBEGIN,		XBEGIN },
48 	{ "END",	XEND,		XEND },
49 	{ "NF",		VARNF,		VARNF },
50 	{ "atan2",	FATAN,		BLTIN },
51 	{ "break",	BREAK,		BREAK },
52 	{ "close",	CLOSE,		CLOSE },
53 	{ "continue",	CONTINUE,	CONTINUE },
54 	{ "cos",	FCOS,		BLTIN },
55 	{ "delete",	DELETE,		DELETE },
56 	{ "do",		DO,		DO },
57 	{ "else",	ELSE,		ELSE },
58 	{ "exit",	EXIT,		EXIT },
59 	{ "exp",	FEXP,		BLTIN },
60 	{ "fflush",	FFLUSH,		BLTIN },
61 	{ "for",	FOR,		FOR },
62 	{ "func",	FUNC,		FUNC },
63 	{ "function",	FUNC,		FUNC },
64 	{ "getline",	GETLINE,	GETLINE },
65 	{ "gsub",	GSUB,		GSUB },
66 	{ "if",		IF,		IF },
67 	{ "in",		IN,		IN },
68 	{ "index",	INDEX,		INDEX },
69 	{ "int",	FINT,		BLTIN },
70 	{ "length",	FLENGTH,	BLTIN },
71 	{ "log",	FLOG,		BLTIN },
72 	{ "match",	MATCHFCN,	MATCHFCN },
73 	{ "next",	NEXT,		NEXT },
74 	{ "nextfile",	NEXTFILE,	NEXTFILE },
75 	{ "print",	PRINT,		PRINT },
76 	{ "printf",	PRINTF,		PRINTF },
77 	{ "rand",	FRAND,		BLTIN },
78 	{ "return",	RETURN,		RETURN },
79 	{ "sin",	FSIN,		BLTIN },
80 	{ "split",	SPLIT,		SPLIT },
81 	{ "sprintf",	SPRINTF,	SPRINTF },
82 	{ "sqrt",	FSQRT,		BLTIN },
83 	{ "srand",	FSRAND,		BLTIN },
84 	{ "sub",	SUB,		SUB },
85 	{ "substr",	SUBSTR,		SUBSTR },
86 	{ "system",	FSYSTEM,	BLTIN },
87 	{ "tolower",	FTOLOWER,	BLTIN },
88 	{ "toupper",	FTOUPPER,	BLTIN },
89 	{ "while",	WHILE,		WHILE },
90 };
91 
92 #define	RET(x)	{ if(dbg)printf("lex %s\n", tokname(x)); return(x); }
93 
peek(void)94 static int peek(void)
95 {
96 	int c = input();
97 	unput(c);
98 	return c;
99 }
100 
gettok(char ** pbuf,int * psz)101 static int gettok(char **pbuf, int *psz)	/* get next input token */
102 {
103 	int c, retc;
104 	char *buf = *pbuf;
105 	int sz = *psz;
106 	char *bp = buf;
107 
108 	c = input();
109 	if (c == 0)
110 		return 0;
111 	buf[0] = c;
112 	buf[1] = 0;
113 	if (!isalnum(c) && c != '.' && c != '_')
114 		return c;
115 
116 	*bp++ = c;
117 	if (isalpha(c) || c == '_') {	/* it's a varname */
118 		for ( ; (c = input()) != 0; ) {
119 			if (bp-buf >= sz)
120 				if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
121 					FATAL( "out of space for name %.10s...", buf );
122 			if (isalnum(c) || c == '_')
123 				*bp++ = c;
124 			else {
125 				*bp = 0;
126 				unput(c);
127 				break;
128 			}
129 		}
130 		*bp = 0;
131 		retc = 'a';	/* alphanumeric */
132 	} else {	/* maybe it's a number, but could be . */
133 		char *rem;
134 		/* read input until can't be a number */
135 		for ( ; (c = input()) != 0; ) {
136 			if (bp-buf >= sz)
137 				if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
138 					FATAL( "out of space for number %.10s...", buf );
139 			if (isdigit(c) || c == 'e' || c == 'E'
140 			  || c == '.' || c == '+' || c == '-')
141 				*bp++ = c;
142 			else {
143 				unput(c);
144 				break;
145 			}
146 		}
147 		*bp = 0;
148 		strtod(buf, &rem);	/* parse the number */
149 		if (rem == buf) {	/* it wasn't a valid number at all */
150 			buf[1] = 0;	/* return one character as token */
151 			retc = buf[0];	/* character is its own type */
152 			unputstr(rem+1); /* put rest back for later */
153 		} else {	/* some prefix was a number */
154 			unputstr(rem);	/* put rest back for later */
155 			rem[0] = 0;	/* truncate buf after number part */
156 			retc = '0';	/* type is number */
157 		}
158 	}
159 	*pbuf = buf;
160 	*psz = sz;
161 	return retc;
162 }
163 
164 int	word(char *);
165 int	string(void);
166 int	regexpr(void);
167 bool	sc	= false;	/* true => return a } right now */
168 bool	reg	= false;	/* true => return a REGEXPR now */
169 
yylex(void)170 int yylex(void)
171 {
172 	int c;
173 	static char *buf = NULL;
174 	static int bufsize = 5; /* BUG: setting this small causes core dump! */
175 
176 	if (buf == NULL && (buf = malloc(bufsize)) == NULL)
177 		FATAL( "out of space in yylex" );
178 	if (sc) {
179 		sc = false;
180 		RET('}');
181 	}
182 	if (reg) {
183 		reg = false;
184 		return regexpr();
185 	}
186 	for (;;) {
187 		c = gettok(&buf, &bufsize);
188 		if (c == 0)
189 			return 0;
190 		if (isalpha(c) || c == '_')
191 			return word(buf);
192 		if (isdigit(c)) {
193 			char *cp = tostring(buf);
194 			yylval.cp = setsymtab(buf, cp, atof(buf), CON|NUM, symtab);
195 			free(cp);
196 			/* should this also have STR set? */
197 			RET(NUMBER);
198 		}
199 
200 		yylval.i = c;
201 		switch (c) {
202 		case '\n':	/* {EOL} */
203 			lineno++;
204 			RET(NL);
205 		case '\r':	/* assume \n is coming */
206 		case ' ':	/* {WS}+ */
207 		case '\t':
208 			break;
209 		case '#':	/* #.* strip comments */
210 			while ((c = input()) != '\n' && c != 0)
211 				;
212 			unput(c);
213 			/*
214 			 * Next line is a hack, itcompensates for
215 			 * unput's treatment of \n.
216 			 */
217 			lineno++;
218 			break;
219 		case ';':
220 			RET(';');
221 		case '\\':
222 			if (peek() == '\n') {
223 				input();
224 				lineno++;
225 			} else if (peek() == '\r') {
226 				input(); input();	/* \n */
227 				lineno++;
228 			} else {
229 				RET(c);
230 			}
231 			break;
232 		case '&':
233 			if (peek() == '&') {
234 				input(); RET(AND);
235 			} else
236 				RET('&');
237 		case '|':
238 			if (peek() == '|') {
239 				input(); RET(BOR);
240 			} else
241 				RET('|');
242 		case '!':
243 			if (peek() == '=') {
244 				input(); yylval.i = NE; RET(NE);
245 			} else if (peek() == '~') {
246 				input(); yylval.i = NOTMATCH; RET(MATCHOP);
247 			} else
248 				RET(NOT);
249 		case '~':
250 			yylval.i = MATCH;
251 			RET(MATCHOP);
252 		case '<':
253 			if (peek() == '=') {
254 				input(); yylval.i = LE; RET(LE);
255 			} else {
256 				yylval.i = LT; RET(LT);
257 			}
258 		case '=':
259 			if (peek() == '=') {
260 				input(); yylval.i = EQ; RET(EQ);
261 			} else {
262 				yylval.i = ASSIGN; RET(ASGNOP);
263 			}
264 		case '>':
265 			if (peek() == '=') {
266 				input(); yylval.i = GE; RET(GE);
267 			} else if (peek() == '>') {
268 				input(); yylval.i = APPEND; RET(APPEND);
269 			} else {
270 				yylval.i = GT; RET(GT);
271 			}
272 		case '+':
273 			if (peek() == '+') {
274 				input(); yylval.i = INCR; RET(INCR);
275 			} else if (peek() == '=') {
276 				input(); yylval.i = ADDEQ; RET(ASGNOP);
277 			} else
278 				RET('+');
279 		case '-':
280 			if (peek() == '-') {
281 				input(); yylval.i = DECR; RET(DECR);
282 			} else if (peek() == '=') {
283 				input(); yylval.i = SUBEQ; RET(ASGNOP);
284 			} else
285 				RET('-');
286 		case '*':
287 			if (peek() == '=') {	/* *= */
288 				input(); yylval.i = MULTEQ; RET(ASGNOP);
289 			} else if (peek() == '*') {	/* ** or **= */
290 				input();	/* eat 2nd * */
291 				if (peek() == '=') {
292 					input(); yylval.i = POWEQ; RET(ASGNOP);
293 				} else {
294 					RET(POWER);
295 				}
296 			} else
297 				RET('*');
298 		case '/':
299 			RET('/');
300 		case '%':
301 			if (peek() == '=') {
302 				input(); yylval.i = MODEQ; RET(ASGNOP);
303 			} else
304 				RET('%');
305 		case '^':
306 			if (peek() == '=') {
307 				input(); yylval.i = POWEQ; RET(ASGNOP);
308 			} else
309 				RET(POWER);
310 
311 		case '$':
312 			/* BUG: awkward, if not wrong */
313 			c = gettok(&buf, &bufsize);
314 			if (isalpha(c)) {
315 				if (strcmp(buf, "NF") == 0) {	/* very special */
316 					unputstr("(NF)");
317 					RET(INDIRECT);
318 				}
319 				c = peek();
320 				if (c == '(' || c == '[' || (infunc && isarg(buf) >= 0)) {
321 					unputstr(buf);
322 					RET(INDIRECT);
323 				}
324 				yylval.cp = setsymtab(buf, "", 0.0, STR|NUM, symtab);
325 				RET(IVAR);
326 			} else if (c == 0) {	/*  */
327 				SYNTAX( "unexpected end of input after $" );
328 				RET(';');
329 			} else {
330 				unputstr(buf);
331 				RET(INDIRECT);
332 			}
333 
334 		case '}':
335 			if (--bracecnt < 0)
336 				SYNTAX( "extra }" );
337 			sc = true;
338 			RET(';');
339 		case ']':
340 			if (--brackcnt < 0)
341 				SYNTAX( "extra ]" );
342 			RET(']');
343 		case ')':
344 			if (--parencnt < 0)
345 				SYNTAX( "extra )" );
346 			RET(')');
347 		case '{':
348 			bracecnt++;
349 			RET('{');
350 		case '[':
351 			brackcnt++;
352 			RET('[');
353 		case '(':
354 			parencnt++;
355 			RET('(');
356 
357 		case '"':
358 			return string();	/* BUG: should be like tran.c ? */
359 
360 		default:
361 			RET(c);
362 		}
363 	}
364 }
365 
string(void)366 int string(void)
367 {
368 	int c, n;
369 	char *s, *bp;
370 	static char *buf = NULL;
371 	static int bufsz = 500;
372 
373 	if (buf == NULL && (buf = malloc(bufsz)) == NULL)
374 		FATAL("out of space for strings");
375 	for (bp = buf; (c = input()) != '"'; ) {
376 		if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, "string"))
377 			FATAL("out of space for string %.10s...", buf);
378 		switch (c) {
379 		case '\n':
380 		case '\r':
381 		case 0:
382 			*bp = '\0';
383 			SYNTAX( "non-terminated string %.10s...", buf );
384 			if (c == 0)	/* hopeless */
385 				FATAL( "giving up" );
386 			lineno++;
387 			break;
388 		case '\\':
389 			c = input();
390 			switch (c) {
391 			case '"': *bp++ = '"'; break;
392 			case 'n': *bp++ = '\n'; break;
393 			case 't': *bp++ = '\t'; break;
394 			case 'f': *bp++ = '\f'; break;
395 			case 'r': *bp++ = '\r'; break;
396 			case 'b': *bp++ = '\b'; break;
397 			case 'v': *bp++ = '\v'; break;
398 			case 'a': *bp++ = '\a'; break;
399 			case '\\': *bp++ = '\\'; break;
400 
401 			case '0': case '1': case '2': /* octal: \d \dd \ddd */
402 			case '3': case '4': case '5': case '6': case '7':
403 				n = c - '0';
404 				if ((c = peek()) >= '0' && c < '8') {
405 					n = 8 * n + input() - '0';
406 					if ((c = peek()) >= '0' && c < '8')
407 						n = 8 * n + input() - '0';
408 				}
409 				*bp++ = n;
410 				break;
411 
412 			case 'x':	/* hex  \x0-9a-fA-F + */
413 			    {	char xbuf[100], *px;
414 				for (px = xbuf; (c = input()) != 0 && px-xbuf < 100-2; ) {
415 					if (isdigit(c)
416 					 || (c >= 'a' && c <= 'f')
417 					 || (c >= 'A' && c <= 'F'))
418 						*px++ = c;
419 					else
420 						break;
421 				}
422 				*px = 0;
423 				unput(c);
424 	  			sscanf(xbuf, "%x", (unsigned int *) &n);
425 				*bp++ = n;
426 				break;
427 			    }
428 
429 			default:
430 				*bp++ = c;
431 				break;
432 			}
433 			break;
434 		default:
435 			*bp++ = c;
436 			break;
437 		}
438 	}
439 	*bp = 0;
440 	s = tostring(buf);
441 	*bp++ = ' '; *bp++ = '\0';
442 	yylval.cp = setsymtab(buf, s, 0.0, CON|STR|DONTFREE, symtab);
443 	free(s);
444 	RET(STRING);
445 }
446 
447 
binsearch(char * w,const Keyword * kp,int n)448 static int binsearch(char *w, const Keyword *kp, int n)
449 {
450 	int cond, low, mid, high;
451 
452 	low = 0;
453 	high = n - 1;
454 	while (low <= high) {
455 		mid = (low + high) / 2;
456 		if ((cond = strcmp(w, kp[mid].word)) < 0)
457 			high = mid - 1;
458 		else if (cond > 0)
459 			low = mid + 1;
460 		else
461 			return mid;
462 	}
463 	return -1;
464 }
465 
word(char * w)466 int word(char *w)
467 {
468 	const Keyword *kp;
469 	int c, n;
470 
471 	n = binsearch(w, keywords, sizeof(keywords)/sizeof(keywords[0]));
472 	if (n != -1) {	/* found in table */
473 		kp = keywords + n;
474 		yylval.i = kp->sub;
475 		switch (kp->type) {	/* special handling */
476 		case BLTIN:
477 			if (kp->sub == FSYSTEM && safe)
478 				SYNTAX( "system is unsafe" );
479 			RET(kp->type);
480 		case FUNC:
481 			if (infunc)
482 				SYNTAX( "illegal nested function" );
483 			RET(kp->type);
484 		case RETURN:
485 			if (!infunc)
486 				SYNTAX( "return not in function" );
487 			RET(kp->type);
488 		case VARNF:
489 			yylval.cp = setsymtab("NF", "", 0.0, NUM, symtab);
490 			RET(VARNF);
491 		default:
492 			RET(kp->type);
493 		}
494 	}
495 	c = peek();	/* look for '(' */
496 	if (c != '(' && infunc && (n=isarg(w)) >= 0) {
497 		yylval.i = n;
498 		RET(ARG);
499 	} else {
500 		yylval.cp = setsymtab(w, "", 0.0, STR|NUM|DONTFREE, symtab);
501 		if (c == '(') {
502 			RET(CALL);
503 		} else {
504 			RET(VAR);
505 		}
506 	}
507 }
508 
startreg(void)509 void startreg(void)	/* next call to yylex will return a regular expression */
510 {
511 	reg = true;
512 }
513 
regexpr(void)514 int regexpr(void)
515 {
516 	int c;
517 	static char *buf = NULL;
518 	static int bufsz = 500;
519 	char *bp;
520 
521 	if (buf == NULL && (buf = malloc(bufsz)) == NULL)
522 		FATAL("out of space for rex expr");
523 	bp = buf;
524 	for ( ; (c = input()) != '/' && c != 0; ) {
525 		if (!adjbuf(&buf, &bufsz, bp-buf+3, 500, &bp, "regexpr"))
526 			FATAL("out of space for reg expr %.10s...", buf);
527 		if (c == '\n') {
528 			*bp = '\0';
529 			SYNTAX( "newline in regular expression %.10s...", buf );
530 			unput('\n');
531 			break;
532 		} else if (c == '\\') {
533 			*bp++ = '\\';
534 			*bp++ = input();
535 		} else {
536 			*bp++ = c;
537 		}
538 	}
539 	*bp = 0;
540 	if (c == 0)
541 		SYNTAX("non-terminated regular expression %.10s...", buf);
542 	yylval.s = tostring(buf);
543 	unput('/');
544 	RET(REGEXPR);
545 }
546 
547 /* low-level lexical stuff, sort of inherited from lex */
548 
549 char	ebuf[300];
550 char	*ep = ebuf;
551 char	yysbuf[100];	/* pushback buffer */
552 char	*yysptr = yysbuf;
553 FILE	*yyin = NULL;
554 
input(void)555 int input(void)	/* get next lexical input character */
556 {
557 	int c;
558 	extern char *lexprog;
559 
560 	if (yysptr > yysbuf)
561 		c = (uschar)*--yysptr;
562 	else if (lexprog != NULL) {	/* awk '...' */
563 		if ((c = (uschar)*lexprog) != 0)
564 			lexprog++;
565 	} else				/* awk -f ... */
566 		c = pgetc();
567 	if (c == EOF)
568 		c = 0;
569 	if (ep >= ebuf + sizeof ebuf)
570 		ep = ebuf;
571 	*ep = c;
572 	if (c != 0) {
573 		ep++;
574 	}
575 	return (c);
576 }
577 
unput(int c)578 void unput(int c)	/* put lexical character back on input */
579 {
580 	if (c == '\n')
581 		lineno--;
582 	if (yysptr >= yysbuf + sizeof(yysbuf))
583 		FATAL("pushed back too much: %.20s...", yysbuf);
584 	*yysptr++ = c;
585 	if (--ep < ebuf)
586 		ep = ebuf + sizeof(ebuf) - 1;
587 }
588 
unputstr(const char * s)589 void unputstr(const char *s)	/* put a string back on input */
590 {
591 	int i;
592 
593 	for (i = strlen(s)-1; i >= 0; i--)
594 		unput(s[i]);
595 }
596