1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
3 All Rights Reserved
4
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
13 permission.
14
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22 THIS SOFTWARE.
23 ****************************************************************/
24
25 #define DEBUG
26 #include <stdio.h>
27 #include <math.h>
28 #include <ctype.h>
29 #include <string.h>
30 #include <stdlib.h>
31 #include "awk.h"
32 #include "ytab.h"
33
34 #define FULLTAB 2 /* rehash when table gets this x full */
35 #define GROWTAB 4 /* grow table by this factor */
36
37 Array *symtab; /* main symbol table */
38
39 char **FS; /* initial field sep */
40 char **RS; /* initial record sep */
41 char **OFS; /* output field sep */
42 char **ORS; /* output record sep */
43 char **OFMT; /* output format for numbers */
44 char **CONVFMT; /* format for conversions in getsval */
45 Awkfloat *NF; /* number of fields in current record */
46 Awkfloat *NR; /* number of current record */
47 Awkfloat *FNR; /* number of current record in current file */
48 char **FILENAME; /* current filename argument */
49 Awkfloat *ARGC; /* number of arguments from command line */
50 char **SUBSEP; /* subscript separator for a[i,j,k]; default \034 */
51 Awkfloat *RSTART; /* start of re matched with ~; origin 1 (!) */
52 Awkfloat *RLENGTH; /* length of same */
53
54 Cell *fsloc; /* FS */
55 Cell *nrloc; /* NR */
56 Cell *nfloc; /* NF */
57 Cell *fnrloc; /* FNR */
58 Cell *ofsloc; /* OFS */
59 Cell *orsloc; /* ORS */
60 Cell *rsloc; /* RS */
61 Array *ARGVtab; /* symbol table containing ARGV[...] */
62 Array *ENVtab; /* symbol table containing ENVIRON[...] */
63 Cell *rstartloc; /* RSTART */
64 Cell *rlengthloc; /* RLENGTH */
65 Cell *subseploc; /* SUBSEP */
66 Cell *symtabloc; /* SYMTAB */
67
68 Cell *nullloc; /* a guaranteed empty cell */
69 Node *nullnode; /* zero&null, converted into a node for comparisons */
70 Cell *literal0;
71
72 extern Cell **fldtab;
73
74 static void
setfree(Cell * vp)75 setfree(Cell *vp)
76 {
77 if (&vp->sval == FS || &vp->sval == RS ||
78 &vp->sval == OFS || &vp->sval == ORS ||
79 &vp->sval == OFMT || &vp->sval == CONVFMT ||
80 &vp->sval == FILENAME || &vp->sval == SUBSEP)
81 vp->tval |= DONTFREE;
82 else
83 vp->tval &= ~DONTFREE;
84 }
85
syminit(void)86 void syminit(void) /* initialize symbol table with builtin vars */
87 {
88 literal0 = setsymtab("0", "0", 0.0, NUM|STR|CON|DONTFREE, symtab);
89 /* this is used for if(x)... tests: */
90 nullloc = setsymtab("$zero&null", "", 0.0, NUM|STR|CON|DONTFREE, symtab);
91 nullnode = celltonode(nullloc, CCON);
92
93 fsloc = setsymtab("FS", " ", 0.0, STR|DONTFREE, symtab);
94 FS = &fsloc->sval;
95 rsloc = setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab);
96 RS = &rsloc->sval;
97 ofsloc = setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab);
98 OFS = &ofsloc->sval;
99 orsloc = setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab);
100 ORS = &orsloc->sval;
101 OFMT = &setsymtab("OFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
102 CONVFMT = &setsymtab("CONVFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
103 FILENAME = &setsymtab("FILENAME", "", 0.0, STR|DONTFREE, symtab)->sval;
104 nfloc = setsymtab("NF", "", 0.0, NUM, symtab);
105 NF = &nfloc->fval;
106 nrloc = setsymtab("NR", "", 0.0, NUM, symtab);
107 NR = &nrloc->fval;
108 fnrloc = setsymtab("FNR", "", 0.0, NUM, symtab);
109 FNR = &fnrloc->fval;
110 subseploc = setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab);
111 SUBSEP = &subseploc->sval;
112 rstartloc = setsymtab("RSTART", "", 0.0, NUM, symtab);
113 RSTART = &rstartloc->fval;
114 rlengthloc = setsymtab("RLENGTH", "", 0.0, NUM, symtab);
115 RLENGTH = &rlengthloc->fval;
116 symtabloc = setsymtab("SYMTAB", "", 0.0, ARR, symtab);
117 symtabloc->sval = (char *) symtab;
118 }
119
arginit(int ac,char ** av)120 void arginit(int ac, char **av) /* set up ARGV and ARGC */
121 {
122 Cell *cp;
123 int i;
124 char temp[50];
125
126 ARGC = &setsymtab("ARGC", "", (Awkfloat) ac, NUM, symtab)->fval;
127 cp = setsymtab("ARGV", "", 0.0, ARR, symtab);
128 ARGVtab = makesymtab(NSYMTAB); /* could be (int) ARGC as well */
129 cp->sval = (char *) ARGVtab;
130 for (i = 0; i < ac; i++) {
131 sprintf(temp, "%d", i);
132 if (is_number(*av))
133 setsymtab(temp, *av, atof(*av), STR|NUM, ARGVtab);
134 else
135 setsymtab(temp, *av, 0.0, STR, ARGVtab);
136 av++;
137 }
138 }
139
envinit(char ** envp)140 void envinit(char **envp) /* set up ENVIRON variable */
141 {
142 Cell *cp;
143 char *p;
144
145 cp = setsymtab("ENVIRON", "", 0.0, ARR, symtab);
146 ENVtab = makesymtab(NSYMTAB);
147 cp->sval = (char *) ENVtab;
148 for ( ; *envp; envp++) {
149 if ((p = strchr(*envp, '=')) == NULL)
150 continue;
151 if( p == *envp ) /* no left hand side name in env string */
152 continue;
153 *p++ = 0; /* split into two strings at = */
154 if (is_number(p))
155 setsymtab(*envp, p, atof(p), STR|NUM, ENVtab);
156 else
157 setsymtab(*envp, p, 0.0, STR, ENVtab);
158 p[-1] = '='; /* restore in case env is passed down to a shell */
159 }
160 }
161
makesymtab(int n)162 Array *makesymtab(int n) /* make a new symbol table */
163 {
164 Array *ap;
165 Cell **tp;
166
167 ap = (Array *) malloc(sizeof(Array));
168 tp = (Cell **) calloc(n, sizeof(Cell *));
169 if (ap == NULL || tp == NULL)
170 FATAL("out of space in makesymtab");
171 ap->nelem = 0;
172 ap->size = n;
173 ap->tab = tp;
174 return(ap);
175 }
176
freesymtab(Cell * ap)177 void freesymtab(Cell *ap) /* free a symbol table */
178 {
179 Cell *cp, *temp;
180 Array *tp;
181 int i;
182
183 if (!isarr(ap))
184 return;
185 tp = (Array *) ap->sval;
186 if (tp == NULL)
187 return;
188 for (i = 0; i < tp->size; i++) {
189 for (cp = tp->tab[i]; cp != NULL; cp = temp) {
190 xfree(cp->nval);
191 if (freeable(cp))
192 xfree(cp->sval);
193 temp = cp->cnext; /* avoids freeing then using */
194 free(cp);
195 tp->nelem--;
196 }
197 tp->tab[i] = 0;
198 }
199 if (tp->nelem != 0)
200 WARNING("can't happen: inconsistent element count freeing %s", ap->nval);
201 free(tp->tab);
202 free(tp);
203 }
204
freeelem(Cell * ap,const char * s)205 void freeelem(Cell *ap, const char *s) /* free elem s from ap (i.e., ap["s"] */
206 {
207 Array *tp;
208 Cell *p, *prev = NULL;
209 int h;
210
211 tp = (Array *) ap->sval;
212 h = hash(s, tp->size);
213 for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext)
214 if (strcmp(s, p->nval) == 0) {
215 if (prev == NULL) /* 1st one */
216 tp->tab[h] = p->cnext;
217 else /* middle somewhere */
218 prev->cnext = p->cnext;
219 if (freeable(p))
220 xfree(p->sval);
221 free(p->nval);
222 free(p);
223 tp->nelem--;
224 return;
225 }
226 }
227
setsymtab(const char * n,const char * s,Awkfloat f,unsigned t,Array * tp)228 Cell *setsymtab(const char *n, const char *s, Awkfloat f, unsigned t, Array *tp)
229 {
230 int h;
231 Cell *p;
232
233 if (n != NULL && (p = lookup(n, tp)) != NULL) {
234 dprintf( ("setsymtab found %p: n=%s s=\"%s\" f=%g t=%o\n",
235 (void*)p, NN(p->nval), NN(p->sval), p->fval, p->tval) );
236 return(p);
237 }
238 p = (Cell *) malloc(sizeof(Cell));
239 if (p == NULL)
240 FATAL("out of space for symbol table at %s", n);
241 p->nval = tostring(n);
242 p->sval = s ? tostring(s) : tostring("");
243 p->fval = f;
244 p->tval = t;
245 p->csub = CUNK;
246 p->ctype = OCELL;
247 tp->nelem++;
248 if (tp->nelem > FULLTAB * tp->size)
249 rehash(tp);
250 h = hash(n, tp->size);
251 p->cnext = tp->tab[h];
252 tp->tab[h] = p;
253 dprintf( ("setsymtab set %p: n=%s s=\"%s\" f=%g t=%o\n",
254 (void*)p, p->nval, p->sval, p->fval, p->tval) );
255 return(p);
256 }
257
hash(const char * s,int n)258 int hash(const char *s, int n) /* form hash value for string s */
259 {
260 unsigned hashval;
261
262 for (hashval = 0; *s != '\0'; s++)
263 hashval = (*s + 31 * hashval);
264 return hashval % n;
265 }
266
rehash(Array * tp)267 void rehash(Array *tp) /* rehash items in small table into big one */
268 {
269 int i, nh, nsz;
270 Cell *cp, *op, **np;
271
272 nsz = GROWTAB * tp->size;
273 np = (Cell **) calloc(nsz, sizeof(Cell *));
274 if (np == NULL) /* can't do it, but can keep running. */
275 return; /* someone else will run out later. */
276 for (i = 0; i < tp->size; i++) {
277 for (cp = tp->tab[i]; cp; cp = op) {
278 op = cp->cnext;
279 nh = hash(cp->nval, nsz);
280 cp->cnext = np[nh];
281 np[nh] = cp;
282 }
283 }
284 free(tp->tab);
285 tp->tab = np;
286 tp->size = nsz;
287 }
288
lookup(const char * s,Array * tp)289 Cell *lookup(const char *s, Array *tp) /* look for s in tp */
290 {
291 Cell *p;
292 int h;
293
294 h = hash(s, tp->size);
295 for (p = tp->tab[h]; p != NULL; p = p->cnext)
296 if (strcmp(s, p->nval) == 0)
297 return(p); /* found it */
298 return(NULL); /* not found */
299 }
300
setfval(Cell * vp,Awkfloat f)301 Awkfloat setfval(Cell *vp, Awkfloat f) /* set float val of a Cell */
302 {
303 int fldno;
304
305 f += 0.0; /* normalise negative zero to positive zero */
306 if ((vp->tval & (NUM | STR)) == 0)
307 funnyvar(vp, "assign to");
308 if (isfld(vp)) {
309 donerec = 0; /* mark $0 invalid */
310 fldno = atoi(vp->nval);
311 if (fldno > *NF)
312 newfld(fldno);
313 dprintf( ("setting field %d to %g\n", fldno, f) );
314 } else if (&vp->fval == NF) {
315 donerec = 0; /* mark $0 invalid */
316 setlastfld(f);
317 dprintf( ("setting NF to %g\n", f) );
318 } else if (isrec(vp)) {
319 donefld = 0; /* mark $1... invalid */
320 donerec = 1;
321 } else if (vp == ofsloc) {
322 if (donerec == 0)
323 recbld();
324 }
325 if (freeable(vp))
326 xfree(vp->sval); /* free any previous string */
327 vp->tval &= ~(STR|CONVC|CONVO); /* mark string invalid */
328 vp->fmt = NULL;
329 vp->tval |= NUM; /* mark number ok */
330 if (f == -0) /* who would have thought this possible? */
331 f = 0;
332 dprintf( ("setfval %p: %s = %g, t=%o\n", (void*)vp, NN(vp->nval), f, vp->tval) );
333 return vp->fval = f;
334 }
335
funnyvar(Cell * vp,const char * rw)336 void funnyvar(Cell *vp, const char *rw)
337 {
338 if (isarr(vp))
339 FATAL("can't %s %s; it's an array name.", rw, vp->nval);
340 if (vp->tval & FCN)
341 FATAL("can't %s %s; it's a function.", rw, vp->nval);
342 WARNING("funny variable %p: n=%s s=\"%s\" f=%g t=%o",
343 vp, vp->nval, vp->sval, vp->fval, vp->tval);
344 }
345
setsval(Cell * vp,const char * s)346 char *setsval(Cell *vp, const char *s) /* set string val of a Cell */
347 {
348 char *t;
349 int fldno;
350 Awkfloat f;
351
352 dprintf( ("starting setsval %p: %s = \"%s\", t=%o, r,f=%d,%d\n",
353 (void*)vp, NN(vp->nval), s, vp->tval, donerec, donefld) );
354 if ((vp->tval & (NUM | STR)) == 0)
355 funnyvar(vp, "assign to");
356 if (isfld(vp)) {
357 donerec = 0; /* mark $0 invalid */
358 fldno = atoi(vp->nval);
359 if (fldno > *NF)
360 newfld(fldno);
361 dprintf( ("setting field %d to %s (%p)\n", fldno, s, (void *) s) );
362 } else if (isrec(vp)) {
363 donefld = 0; /* mark $1... invalid */
364 donerec = 1;
365 } else if (vp == ofsloc) {
366 if (donerec == 0)
367 recbld();
368 }
369 t = s ? tostring(s) : tostring(""); /* in case it's self-assign */
370 if (freeable(vp))
371 xfree(vp->sval);
372 vp->tval &= ~(NUM|CONVC|CONVO);
373 vp->tval |= STR;
374 vp->fmt = NULL;
375 setfree(vp);
376 dprintf( ("setsval %p: %s = \"%s (%p) \", t=%o r,f=%d,%d\n",
377 (void*)vp, NN(vp->nval), t, (void *) t, vp->tval, donerec, donefld) );
378 vp->sval = t;
379 if (&vp->fval == NF) {
380 donerec = 0; /* mark $0 invalid */
381 f = getfval(vp);
382 setlastfld(f);
383 dprintf( ("setting NF to %g\n", f) );
384 }
385
386 return(vp->sval);
387 }
388
getfval(Cell * vp)389 Awkfloat getfval(Cell *vp) /* get float val of a Cell */
390 {
391 if ((vp->tval & (NUM | STR)) == 0)
392 funnyvar(vp, "read value of");
393 if (isfld(vp) && donefld == 0)
394 fldbld();
395 else if (isrec(vp) && donerec == 0)
396 recbld();
397 if (!isnum(vp)) { /* not a number */
398 vp->fval = atof(vp->sval); /* best guess */
399 if (is_number(vp->sval) && !(vp->tval&CON))
400 vp->tval |= NUM; /* make NUM only sparingly */
401 }
402 dprintf( ("getfval %p: %s = %g, t=%o\n",
403 (void*)vp, NN(vp->nval), vp->fval, vp->tval) );
404 return(vp->fval);
405 }
406
get_str_val(Cell * vp,char ** fmt)407 static char *get_str_val(Cell *vp, char **fmt) /* get string val of a Cell */
408 {
409 char s[256];
410 double dtemp;
411
412 if ((vp->tval & (NUM | STR)) == 0)
413 funnyvar(vp, "read value of");
414 if (isfld(vp) && donefld == 0)
415 fldbld();
416 else if (isrec(vp) && donerec == 0)
417 recbld();
418
419 /*
420 * ADR: This is complicated and more fragile than is desirable.
421 * Retrieving a string value for a number associates the string
422 * value with the scalar. Previously, the string value was
423 * sticky, meaning if converted via OFMT that became the value
424 * (even though POSIX wants it to be via CONVFMT). Or if CONVFMT
425 * changed after a string value was retrieved, the original value
426 * was maintained and used. Also not per POSIX.
427 *
428 * We work around this design by adding two additional flags,
429 * CONVC and CONVO, indicating how the string value was
430 * obtained (via CONVFMT or OFMT) and _also_ maintaining a copy
431 * of the pointer to the xFMT format string used for the
432 * conversion. This pointer is only read, **never** dereferenced.
433 * The next time we do a conversion, if it's coming from the same
434 * xFMT as last time, and the pointer value is different, we
435 * know that the xFMT format string changed, and we need to
436 * redo the conversion. If it's the same, we don't have to.
437 *
438 * There are also several cases where we don't do a conversion,
439 * such as for a field (see the checks below).
440 */
441
442 /* Don't duplicate the code for actually updating the value */
443 #define update_str_val(vp) \
444 { \
445 if (freeable(vp)) \
446 xfree(vp->sval); \
447 if (modf(vp->fval, &dtemp) == 0) /* it's integral */ \
448 snprintf(s, sizeof (s), "%.30g", vp->fval); \
449 else \
450 snprintf(s, sizeof (s), *fmt, vp->fval); \
451 vp->sval = tostring(s); \
452 vp->tval &= ~DONTFREE; \
453 vp->tval |= STR; \
454 }
455
456 if (isstr(vp) == 0) {
457 update_str_val(vp);
458 if (fmt == OFMT) {
459 vp->tval &= ~CONVC;
460 vp->tval |= CONVO;
461 } else {
462 /* CONVFMT */
463 vp->tval &= ~CONVO;
464 vp->tval |= CONVC;
465 }
466 vp->fmt = *fmt;
467 } else if ((vp->tval & DONTFREE) != 0 || ! isnum(vp) || isfld(vp)) {
468 goto done;
469 } else if (isstr(vp)) {
470 if (fmt == OFMT) {
471 if ((vp->tval & CONVC) != 0
472 || ((vp->tval & CONVO) != 0 && vp->fmt != *fmt)) {
473 update_str_val(vp);
474 vp->tval &= ~CONVC;
475 vp->tval |= CONVO;
476 vp->fmt = *fmt;
477 }
478 } else {
479 /* CONVFMT */
480 if ((vp->tval & CONVO) != 0
481 || ((vp->tval & CONVC) != 0 && vp->fmt != *fmt)) {
482 update_str_val(vp);
483 vp->tval &= ~CONVO;
484 vp->tval |= CONVC;
485 vp->fmt = *fmt;
486 }
487 }
488 }
489 done:
490 dprintf( ("getsval %p: %s = \"%s (%p)\", t=%o\n",
491 (void*)vp, NN(vp->nval), vp->sval, (void *) vp->sval, vp->tval) );
492 return(vp->sval);
493 }
494
getsval(Cell * vp)495 char *getsval(Cell *vp) /* get string val of a Cell */
496 {
497 return get_str_val(vp, CONVFMT);
498 }
499
getpssval(Cell * vp)500 char *getpssval(Cell *vp) /* get string val of a Cell for print */
501 {
502 return get_str_val(vp, OFMT);
503 }
504
505
tostring(const char * s)506 char *tostring(const char *s) /* make a copy of string s */
507 {
508 char *p;
509
510 p = (char *) malloc(strlen(s)+1);
511 if (p == NULL)
512 FATAL("out of space in tostring on %s", s);
513 strcpy(p, s);
514 return(p);
515 }
516
qstring(const char * is,int delim)517 char *qstring(const char *is, int delim) /* collect string up to next delim */
518 {
519 const char *os = is;
520 int c, n;
521 uschar *s = (uschar *) is;
522 uschar *buf, *bp;
523
524 if ((buf = (uschar *) malloc(strlen(is)+3)) == NULL)
525 FATAL( "out of space in qstring(%s)", s);
526 for (bp = buf; (c = *s) != delim; s++) {
527 if (c == '\n')
528 SYNTAX( "newline in string %.20s...", os );
529 else if (c != '\\')
530 *bp++ = c;
531 else { /* \something */
532 c = *++s;
533 if (c == 0) { /* \ at end */
534 *bp++ = '\\';
535 break; /* for loop */
536 }
537 switch (c) {
538 case '\\': *bp++ = '\\'; break;
539 case 'n': *bp++ = '\n'; break;
540 case 't': *bp++ = '\t'; break;
541 case 'b': *bp++ = '\b'; break;
542 case 'f': *bp++ = '\f'; break;
543 case 'r': *bp++ = '\r'; break;
544 default:
545 if (!isdigit(c)) {
546 *bp++ = c;
547 break;
548 }
549 n = c - '0';
550 if (isdigit(s[1])) {
551 n = 8 * n + *++s - '0';
552 if (isdigit(s[1]))
553 n = 8 * n + *++s - '0';
554 }
555 *bp++ = n;
556 break;
557 }
558 }
559 }
560 *bp++ = 0;
561 return (char *) buf;
562 }
563
flags2str(int flags)564 const char *flags2str(int flags)
565 {
566 static const struct ftab {
567 const char *name;
568 int value;
569 } flagtab[] = {
570 { "NUM", NUM },
571 { "STR", STR },
572 { "DONTFREE", DONTFREE },
573 { "CON", CON },
574 { "ARR", ARR },
575 { "FCN", FCN },
576 { "FLD", FLD },
577 { "REC", REC },
578 { "CONVC", CONVC },
579 { "CONVO", CONVO },
580 { NULL, 0 }
581 };
582 static char buf[100];
583 int i;
584 char *cp = buf;
585
586 for (i = 0; flagtab[i].name != NULL; i++) {
587 if ((flags & flagtab[i].value) != 0) {
588 if (cp > buf)
589 *cp++ = '|';
590 strcpy(cp, flagtab[i].name);
591 cp += strlen(cp);
592 }
593 }
594
595 return buf;
596 }
597