• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
3 All Rights Reserved
4 
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
13 permission.
14 
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22 THIS SOFTWARE.
23 ****************************************************************/
24 
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <ctype.h>
29 #include "awk.h"
30 #include "awkgram.tab.h"
31 
32 extern YYSTYPE	yylval;
33 extern bool	infunc;
34 
35 int	lineno	= 1;
36 int	bracecnt = 0;
37 int	brackcnt  = 0;
38 int	parencnt = 0;
39 
40 typedef struct Keyword {
41 	const char *word;
42 	int	sub;
43 	int	type;
44 } Keyword;
45 
46 const Keyword keywords[] = {	/* keep sorted: binary searched */
47 	{ "BEGIN",	XBEGIN,		XBEGIN },
48 	{ "END",	XEND,		XEND },
49 	{ "NF",		VARNF,		VARNF },
50 	{ "atan2",	FATAN,		BLTIN },
51 	{ "break",	BREAK,		BREAK },
52 	{ "close",	CLOSE,		CLOSE },
53 	{ "continue",	CONTINUE,	CONTINUE },
54 	{ "cos",	FCOS,		BLTIN },
55 	{ "delete",	DELETE,		DELETE },
56 	{ "do",		DO,		DO },
57 	{ "else",	ELSE,		ELSE },
58 	{ "exit",	EXIT,		EXIT },
59 	{ "exp",	FEXP,		BLTIN },
60 	{ "fflush",	FFLUSH,		BLTIN },
61 	{ "for",	FOR,		FOR },
62 	{ "func",	FUNC,		FUNC },
63 	{ "function",	FUNC,		FUNC },
64 	{ "getline",	GETLINE,	GETLINE },
65 	{ "gsub",	GSUB,		GSUB },
66 	{ "if",		IF,		IF },
67 	{ "in",		IN,		IN },
68 	{ "index",	INDEX,		INDEX },
69 	{ "int",	FINT,		BLTIN },
70 	{ "length",	FLENGTH,	BLTIN },
71 	{ "log",	FLOG,		BLTIN },
72 	{ "match",	MATCHFCN,	MATCHFCN },
73 	{ "next",	NEXT,		NEXT },
74 	{ "nextfile",	NEXTFILE,	NEXTFILE },
75 	{ "print",	PRINT,		PRINT },
76 	{ "printf",	PRINTF,		PRINTF },
77 	{ "rand",	FRAND,		BLTIN },
78 	{ "return",	RETURN,		RETURN },
79 	{ "sin",	FSIN,		BLTIN },
80 	{ "split",	SPLIT,		SPLIT },
81 	{ "sprintf",	SPRINTF,	SPRINTF },
82 	{ "sqrt",	FSQRT,		BLTIN },
83 	{ "srand",	FSRAND,		BLTIN },
84 	{ "sub",	SUB,		SUB },
85 	{ "substr",	SUBSTR,		SUBSTR },
86 	{ "system",	FSYSTEM,	BLTIN },
87 	{ "tolower",	FTOLOWER,	BLTIN },
88 	{ "toupper",	FTOUPPER,	BLTIN },
89 	{ "while",	WHILE,		WHILE },
90 };
91 
92 #define	RET(x)	{ if(dbg)printf("lex %s\n", tokname(x)); return(x); }
93 
peek(void)94 static int peek(void)
95 {
96 	int c = input();
97 	unput(c);
98 	return c;
99 }
100 
gettok(char ** pbuf,int * psz)101 static int gettok(char **pbuf, int *psz)	/* get next input token */
102 {
103 	int c, retc;
104 	char *buf = *pbuf;
105 	int sz = *psz;
106 	char *bp = buf;
107 
108 	c = input();
109 	if (c == 0)
110 		return 0;
111 	buf[0] = c;
112 	buf[1] = 0;
113 	if (!isalnum(c) && c != '.' && c != '_')
114 		return c;
115 
116 	*bp++ = c;
117 	if (isalpha(c) || c == '_') {	/* it's a varname */
118 		for ( ; (c = input()) != 0; ) {
119 			if (bp-buf >= sz)
120 				if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
121 					FATAL( "out of space for name %.10s...", buf );
122 			if (isalnum(c) || c == '_')
123 				*bp++ = c;
124 			else {
125 				*bp = 0;
126 				unput(c);
127 				break;
128 			}
129 		}
130 		*bp = 0;
131 		retc = 'a';	/* alphanumeric */
132 	} else {	/* maybe it's a number, but could be . */
133 		char *rem;
134 		/* read input until can't be a number */
135 		for ( ; (c = input()) != 0; ) {
136 			if (bp-buf >= sz)
137 				if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
138 					FATAL( "out of space for number %.10s...", buf );
139 			if (isdigit(c) || c == 'e' || c == 'E'
140 			  || c == '.' || c == '+' || c == '-')
141 				*bp++ = c;
142 			else {
143 				unput(c);
144 				break;
145 			}
146 		}
147 		*bp = 0;
148 		strtod(buf, &rem);	/* parse the number */
149 		if (rem == buf) {	/* it wasn't a valid number at all */
150 			buf[1] = 0;	/* return one character as token */
151 			retc = (uschar)buf[0];	/* character is its own type */
152 			unputstr(rem+1); /* put rest back for later */
153 		} else {	/* some prefix was a number */
154 			unputstr(rem);	/* put rest back for later */
155 			rem[0] = 0;	/* truncate buf after number part */
156 			retc = '0';	/* type is number */
157 		}
158 	}
159 	*pbuf = buf;
160 	*psz = sz;
161 	return retc;
162 }
163 
164 int	word(char *);
165 int	string(void);
166 int	regexpr(void);
167 bool	sc	= false;	/* true => return a } right now */
168 bool	reg	= false;	/* true => return a REGEXPR now */
169 
yylex(void)170 int yylex(void)
171 {
172 	int c;
173 	static char *buf = NULL;
174 	static int bufsize = 5; /* BUG: setting this small causes core dump! */
175 
176 	if (buf == NULL && (buf = (char *) malloc(bufsize)) == NULL)
177 		FATAL( "out of space in yylex" );
178 	if (sc) {
179 		sc = false;
180 		RET('}');
181 	}
182 	if (reg) {
183 		reg = false;
184 		return regexpr();
185 	}
186 	for (;;) {
187 		c = gettok(&buf, &bufsize);
188 		if (c == 0)
189 			return 0;
190 		if (isalpha(c) || c == '_')
191 			return word(buf);
192 		if (isdigit(c)) {
193 			char *cp = tostring(buf);
194 			double result;
195 
196 			if (is_number(cp, & result))
197 				yylval.cp = setsymtab(buf, cp, result, CON|NUM, symtab);
198 			else
199 				yylval.cp = setsymtab(buf, cp, 0.0, STR, symtab);
200 			free(cp);
201 			/* should this also have STR set? */
202 			RET(NUMBER);
203 		}
204 
205 		yylval.i = c;
206 		switch (c) {
207 		case '\n':	/* {EOL} */
208 			lineno++;
209 			RET(NL);
210 		case '\r':	/* assume \n is coming */
211 		case ' ':	/* {WS}+ */
212 		case '\t':
213 			break;
214 		case '#':	/* #.* strip comments */
215 			while ((c = input()) != '\n' && c != 0)
216 				;
217 			unput(c);
218 			/*
219 			 * Next line is a hack, itcompensates for
220 			 * unput's treatment of \n.
221 			 */
222 			lineno++;
223 			break;
224 		case ';':
225 			RET(';');
226 		case '\\':
227 			if (peek() == '\n') {
228 				input();
229 				lineno++;
230 			} else if (peek() == '\r') {
231 				input(); input();	/* \n */
232 				lineno++;
233 			} else {
234 				RET(c);
235 			}
236 			break;
237 		case '&':
238 			if (peek() == '&') {
239 				input(); RET(AND);
240 			} else
241 				RET('&');
242 		case '|':
243 			if (peek() == '|') {
244 				input(); RET(BOR);
245 			} else
246 				RET('|');
247 		case '!':
248 			if (peek() == '=') {
249 				input(); yylval.i = NE; RET(NE);
250 			} else if (peek() == '~') {
251 				input(); yylval.i = NOTMATCH; RET(MATCHOP);
252 			} else
253 				RET(NOT);
254 		case '~':
255 			yylval.i = MATCH;
256 			RET(MATCHOP);
257 		case '<':
258 			if (peek() == '=') {
259 				input(); yylval.i = LE; RET(LE);
260 			} else {
261 				yylval.i = LT; RET(LT);
262 			}
263 		case '=':
264 			if (peek() == '=') {
265 				input(); yylval.i = EQ; RET(EQ);
266 			} else {
267 				yylval.i = ASSIGN; RET(ASGNOP);
268 			}
269 		case '>':
270 			if (peek() == '=') {
271 				input(); yylval.i = GE; RET(GE);
272 			} else if (peek() == '>') {
273 				input(); yylval.i = APPEND; RET(APPEND);
274 			} else {
275 				yylval.i = GT; RET(GT);
276 			}
277 		case '+':
278 			if (peek() == '+') {
279 				input(); yylval.i = INCR; RET(INCR);
280 			} else if (peek() == '=') {
281 				input(); yylval.i = ADDEQ; RET(ASGNOP);
282 			} else
283 				RET('+');
284 		case '-':
285 			if (peek() == '-') {
286 				input(); yylval.i = DECR; RET(DECR);
287 			} else if (peek() == '=') {
288 				input(); yylval.i = SUBEQ; RET(ASGNOP);
289 			} else
290 				RET('-');
291 		case '*':
292 			if (peek() == '=') {	/* *= */
293 				input(); yylval.i = MULTEQ; RET(ASGNOP);
294 			} else if (peek() == '*') {	/* ** or **= */
295 				input();	/* eat 2nd * */
296 				if (peek() == '=') {
297 					input(); yylval.i = POWEQ; RET(ASGNOP);
298 				} else {
299 					RET(POWER);
300 				}
301 			} else
302 				RET('*');
303 		case '/':
304 			RET('/');
305 		case '%':
306 			if (peek() == '=') {
307 				input(); yylval.i = MODEQ; RET(ASGNOP);
308 			} else
309 				RET('%');
310 		case '^':
311 			if (peek() == '=') {
312 				input(); yylval.i = POWEQ; RET(ASGNOP);
313 			} else
314 				RET(POWER);
315 
316 		case '$':
317 			/* BUG: awkward, if not wrong */
318 			c = gettok(&buf, &bufsize);
319 			if (isalpha(c)) {
320 				if (strcmp(buf, "NF") == 0) {	/* very special */
321 					unputstr("(NF)");
322 					RET(INDIRECT);
323 				}
324 				c = peek();
325 				if (c == '(' || c == '[' || (infunc && isarg(buf) >= 0)) {
326 					unputstr(buf);
327 					RET(INDIRECT);
328 				}
329 				yylval.cp = setsymtab(buf, "", 0.0, STR|NUM, symtab);
330 				RET(IVAR);
331 			} else if (c == 0) {	/*  */
332 				SYNTAX( "unexpected end of input after $" );
333 				RET(';');
334 			} else {
335 				unputstr(buf);
336 				RET(INDIRECT);
337 			}
338 
339 		case '}':
340 			if (--bracecnt < 0)
341 				SYNTAX( "extra }" );
342 			sc = true;
343 			RET(';');
344 		case ']':
345 			if (--brackcnt < 0)
346 				SYNTAX( "extra ]" );
347 			RET(']');
348 		case ')':
349 			if (--parencnt < 0)
350 				SYNTAX( "extra )" );
351 			RET(')');
352 		case '{':
353 			bracecnt++;
354 			RET('{');
355 		case '[':
356 			brackcnt++;
357 			RET('[');
358 		case '(':
359 			parencnt++;
360 			RET('(');
361 
362 		case '"':
363 			return string();	/* BUG: should be like tran.c ? */
364 
365 		default:
366 			RET(c);
367 		}
368 	}
369 }
370 
string(void)371 int string(void)
372 {
373 	int c, n;
374 	char *s, *bp;
375 	static char *buf = NULL;
376 	static int bufsz = 500;
377 
378 	if (buf == NULL && (buf = (char *) malloc(bufsz)) == NULL)
379 		FATAL("out of space for strings");
380 	for (bp = buf; (c = input()) != '"'; ) {
381 		if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, "string"))
382 			FATAL("out of space for string %.10s...", buf);
383 		switch (c) {
384 		case '\n':
385 		case '\r':
386 		case 0:
387 			*bp = '\0';
388 			SYNTAX( "non-terminated string %.10s...", buf );
389 			if (c == 0)	/* hopeless */
390 				FATAL( "giving up" );
391 			lineno++;
392 			break;
393 		case '\\':
394 			c = input();
395 			switch (c) {
396 			case '\n': break;
397 			case '"': *bp++ = '"'; break;
398 			case 'n': *bp++ = '\n'; break;
399 			case 't': *bp++ = '\t'; break;
400 			case 'f': *bp++ = '\f'; break;
401 			case 'r': *bp++ = '\r'; break;
402 			case 'b': *bp++ = '\b'; break;
403 			case 'v': *bp++ = '\v'; break;
404 			case 'a': *bp++ = '\a'; break;
405 			case '\\': *bp++ = '\\'; break;
406 
407 			case '0': case '1': case '2': /* octal: \d \dd \ddd */
408 			case '3': case '4': case '5': case '6': case '7':
409 				n = c - '0';
410 				if ((c = peek()) >= '0' && c < '8') {
411 					n = 8 * n + input() - '0';
412 					if ((c = peek()) >= '0' && c < '8')
413 						n = 8 * n + input() - '0';
414 				}
415 				*bp++ = n;
416 				break;
417 
418 			case 'x':	/* hex  \x0-9a-fA-F + */
419 			    {
420 				int i;
421 
422 				n = 0;
423 				for (i = 1; i <= 2; i++) {
424 					c = input();
425 					if (c == 0)
426 						break;
427 					if (isxdigit(c)) {
428 						c = tolower(c);
429 						n *= 16;
430 						if (isdigit(c))
431 							n += (c - '0');
432 						else
433 							n += 10 + (c - 'a');
434 					} else
435 						break;
436 				}
437 				if (n)
438 					*bp++ = n;
439 				else
440 					unput(c);
441 				break;
442 			    }
443 
444 			default:
445 				*bp++ = c;
446 				break;
447 			}
448 			break;
449 		default:
450 			*bp++ = c;
451 			break;
452 		}
453 	}
454 	*bp = 0;
455 	s = tostring(buf);
456 	*bp++ = ' '; *bp++ = '\0';
457 	yylval.cp = setsymtab(buf, s, 0.0, CON|STR|DONTFREE, symtab);
458 	free(s);
459 	RET(STRING);
460 }
461 
462 
binsearch(char * w,const Keyword * kp,int n)463 static int binsearch(char *w, const Keyword *kp, int n)
464 {
465 	int cond, low, mid, high;
466 
467 	low = 0;
468 	high = n - 1;
469 	while (low <= high) {
470 		mid = (low + high) / 2;
471 		if ((cond = strcmp(w, kp[mid].word)) < 0)
472 			high = mid - 1;
473 		else if (cond > 0)
474 			low = mid + 1;
475 		else
476 			return mid;
477 	}
478 	return -1;
479 }
480 
word(char * w)481 int word(char *w)
482 {
483 	const Keyword *kp;
484 	int c, n;
485 
486 	n = binsearch(w, keywords, sizeof(keywords)/sizeof(keywords[0]));
487 	if (n != -1) {	/* found in table */
488 		kp = keywords + n;
489 		yylval.i = kp->sub;
490 		switch (kp->type) {	/* special handling */
491 		case BLTIN:
492 			if (kp->sub == FSYSTEM && safe)
493 				SYNTAX( "system is unsafe" );
494 			RET(kp->type);
495 		case FUNC:
496 			if (infunc)
497 				SYNTAX( "illegal nested function" );
498 			RET(kp->type);
499 		case RETURN:
500 			if (!infunc)
501 				SYNTAX( "return not in function" );
502 			RET(kp->type);
503 		case VARNF:
504 			yylval.cp = setsymtab("NF", "", 0.0, NUM, symtab);
505 			RET(VARNF);
506 		default:
507 			RET(kp->type);
508 		}
509 	}
510 	c = peek();	/* look for '(' */
511 	if (c != '(' && infunc && (n=isarg(w)) >= 0) {
512 		yylval.i = n;
513 		RET(ARG);
514 	} else {
515 		yylval.cp = setsymtab(w, "", 0.0, STR|NUM|DONTFREE, symtab);
516 		if (c == '(') {
517 			RET(CALL);
518 		} else {
519 			RET(VAR);
520 		}
521 	}
522 }
523 
startreg(void)524 void startreg(void)	/* next call to yylex will return a regular expression */
525 {
526 	reg = true;
527 }
528 
regexpr(void)529 int regexpr(void)
530 {
531 	int c;
532 	static char *buf = NULL;
533 	static int bufsz = 500;
534 	char *bp;
535 
536 	if (buf == NULL && (buf = (char *) malloc(bufsz)) == NULL)
537 		FATAL("out of space for reg expr");
538 	bp = buf;
539 	for ( ; (c = input()) != '/' && c != 0; ) {
540 		if (!adjbuf(&buf, &bufsz, bp-buf+3, 500, &bp, "regexpr"))
541 			FATAL("out of space for reg expr %.10s...", buf);
542 		if (c == '\n') {
543 			*bp = '\0';
544 			SYNTAX( "newline in regular expression %.10s...", buf );
545 			unput('\n');
546 			break;
547 		} else if (c == '\\') {
548 			*bp++ = '\\';
549 			*bp++ = input();
550 		} else {
551 			*bp++ = c;
552 		}
553 	}
554 	*bp = 0;
555 	if (c == 0)
556 		SYNTAX("non-terminated regular expression %.10s...", buf);
557 	yylval.s = buf;
558 	unput('/');
559 	RET(REGEXPR);
560 }
561 
562 /* low-level lexical stuff, sort of inherited from lex */
563 
564 char	ebuf[300];
565 char	*ep = ebuf;
566 char	yysbuf[100];	/* pushback buffer */
567 char	*yysptr = yysbuf;
568 FILE	*yyin = NULL;
569 
input(void)570 int input(void)	/* get next lexical input character */
571 {
572 	int c;
573 	extern char *lexprog;
574 
575 	if (yysptr > yysbuf)
576 		c = (uschar)*--yysptr;
577 	else if (lexprog != NULL) {	/* awk '...' */
578 		if ((c = (uschar)*lexprog) != 0)
579 			lexprog++;
580 	} else				/* awk -f ... */
581 		c = pgetc();
582 	if (c == EOF)
583 		c = 0;
584 	if (ep >= ebuf + sizeof ebuf)
585 		ep = ebuf;
586 	*ep = c;
587 	if (c != 0) {
588 		ep++;
589 	}
590 	return (c);
591 }
592 
unput(int c)593 void unput(int c)	/* put lexical character back on input */
594 {
595 	if (c == '\n')
596 		lineno--;
597 	if (yysptr >= yysbuf + sizeof(yysbuf))
598 		FATAL("pushed back too much: %.20s...", yysbuf);
599 	*yysptr++ = c;
600 	if (--ep < ebuf)
601 		ep = ebuf + sizeof(ebuf) - 1;
602 }
603 
unputstr(const char * s)604 void unputstr(const char *s)	/* put a string back on input */
605 {
606 	int i;
607 
608 	for (i = strlen(s)-1; i >= 0; i--)
609 		unput(s[i]);
610 }
611