1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
3 All Rights Reserved
4
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
13 permission.
14
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22 THIS SOFTWARE.
23 ****************************************************************/
24
25 /* lasciate ogne speranza, voi ch'intrate. */
26
27 #define DEBUG
28
29 #include <ctype.h>
30 #include <stdio.h>
31 #include <string.h>
32 #include <stdlib.h>
33 #include "awk.h"
34 #include "ytab.h"
35
36 #define HAT (NCHARS+2) /* matches ^ in regular expr */
37 /* NCHARS is 2**n */
38 #define MAXLIN 22
39
40 #define type(v) (v)->nobj /* badly overloaded here */
41 #define info(v) (v)->ntype /* badly overloaded here */
42 #define left(v) (v)->narg[0]
43 #define right(v) (v)->narg[1]
44 #define parent(v) (v)->nnext
45
46 #define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
47 #define ELEAF case EMPTYRE: /* empty string in regexp */
48 #define UNARY case STAR: case PLUS: case QUEST:
49
50 /* encoding in tree Nodes:
51 leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL, EMPTYRE):
52 left is index, right contains value or pointer to value
53 unary (STAR, PLUS, QUEST): left is child, right is null
54 binary (CAT, OR): left and right are children
55 parent contains pointer to parent
56 */
57
58
59 int *setvec;
60 int *tmpset;
61 int maxsetvec = 0;
62
63 int rtok; /* next token in current re */
64 int rlxval;
65 static uschar *rlxstr;
66 static uschar *prestr; /* current position in current re */
67 static uschar *lastre; /* origin of last re */
68
69 static int setcnt;
70 static int poscnt;
71
72 char *patbeg;
73 int patlen;
74
75 #define NFA 20 /* cache this many dynamic fa's */
76 fa *fatab[NFA];
77 int nfatab = 0; /* entries in fatab */
78
makedfa(const char * s,int anchor)79 fa *makedfa(const char *s, int anchor) /* returns dfa for reg expr s */
80 {
81 int i, use, nuse;
82 fa *pfa;
83 static int now = 1;
84
85 if (setvec == 0) { /* first time through any RE */
86 maxsetvec = MAXLIN;
87 setvec = (int *) malloc(maxsetvec * sizeof(int));
88 tmpset = (int *) malloc(maxsetvec * sizeof(int));
89 if (setvec == 0 || tmpset == 0)
90 overflo("out of space initializing makedfa");
91 }
92
93 if (compile_time) /* a constant for sure */
94 return mkdfa(s, anchor);
95 for (i = 0; i < nfatab; i++) /* is it there already? */
96 if (fatab[i]->anchor == anchor
97 && strcmp((const char *) fatab[i]->restr, s) == 0) {
98 fatab[i]->use = now++;
99 return fatab[i];
100 }
101 pfa = mkdfa(s, anchor);
102 if (nfatab < NFA) { /* room for another */
103 fatab[nfatab] = pfa;
104 fatab[nfatab]->use = now++;
105 nfatab++;
106 return pfa;
107 }
108 use = fatab[0]->use; /* replace least-recently used */
109 nuse = 0;
110 for (i = 1; i < nfatab; i++)
111 if (fatab[i]->use < use) {
112 use = fatab[i]->use;
113 nuse = i;
114 }
115 freefa(fatab[nuse]);
116 fatab[nuse] = pfa;
117 pfa->use = now++;
118 return pfa;
119 }
120
mkdfa(const char * s,int anchor)121 fa *mkdfa(const char *s, int anchor) /* does the real work of making a dfa */
122 /* anchor = 1 for anchored matches, else 0 */
123 {
124 Node *p, *p1;
125 fa *f;
126
127 p = reparse(s);
128 p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
129 /* put ALL STAR in front of reg. exp. */
130 p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
131 /* put FINAL after reg. exp. */
132
133 poscnt = 0;
134 penter(p1); /* enter parent pointers and leaf indices */
135 if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL)
136 overflo("out of space for fa");
137 f->accept = poscnt-1; /* penter has computed number of positions in re */
138 cfoll(f, p1); /* set up follow sets */
139 freetr(p1);
140 if ((f->posns[0] = (int *) calloc(1, *(f->re[0].lfollow)*sizeof(int))) == NULL)
141 overflo("out of space in makedfa");
142 if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL)
143 overflo("out of space in makedfa");
144 *f->posns[1] = 0;
145 f->initstat = makeinit(f, anchor);
146 f->anchor = anchor;
147 f->restr = (uschar *) tostring(s);
148 return f;
149 }
150
makeinit(fa * f,int anchor)151 int makeinit(fa *f, int anchor)
152 {
153 int i, k;
154
155 f->curstat = 2;
156 f->out[2] = 0;
157 f->reset = 0;
158 k = *(f->re[0].lfollow);
159 xfree(f->posns[2]);
160 if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
161 overflo("out of space in makeinit");
162 for (i=0; i <= k; i++) {
163 (f->posns[2])[i] = (f->re[0].lfollow)[i];
164 }
165 if ((f->posns[2])[1] == f->accept)
166 f->out[2] = 1;
167 for (i=0; i < NCHARS; i++)
168 f->gototab[2][i] = 0;
169 f->curstat = cgoto(f, 2, HAT);
170 if (anchor) {
171 *f->posns[2] = k-1; /* leave out position 0 */
172 for (i=0; i < k; i++) {
173 (f->posns[0])[i] = (f->posns[2])[i];
174 }
175
176 f->out[0] = f->out[2];
177 if (f->curstat != 2)
178 --(*f->posns[f->curstat]);
179 }
180 return f->curstat;
181 }
182
penter(Node * p)183 void penter(Node *p) /* set up parent pointers and leaf indices */
184 {
185 switch (type(p)) {
186 ELEAF
187 LEAF
188 info(p) = poscnt;
189 poscnt++;
190 break;
191 UNARY
192 penter(left(p));
193 parent(left(p)) = p;
194 break;
195 case CAT:
196 case OR:
197 penter(left(p));
198 penter(right(p));
199 parent(left(p)) = p;
200 parent(right(p)) = p;
201 break;
202 default: /* can't happen */
203 FATAL("can't happen: unknown type %d in penter", type(p));
204 break;
205 }
206 }
207
freetr(Node * p)208 void freetr(Node *p) /* free parse tree */
209 {
210 switch (type(p)) {
211 ELEAF
212 LEAF
213 xfree(p);
214 break;
215 UNARY
216 freetr(left(p));
217 xfree(p);
218 break;
219 case CAT:
220 case OR:
221 freetr(left(p));
222 freetr(right(p));
223 xfree(p);
224 break;
225 default: /* can't happen */
226 FATAL("can't happen: unknown type %d in freetr", type(p));
227 break;
228 }
229 }
230
231 /* in the parsing of regular expressions, metacharacters like . have */
232 /* to be seen literally; \056 is not a metacharacter. */
233
hexstr(uschar ** pp)234 int hexstr(uschar **pp) /* find and eval hex string at pp, return new p */
235 { /* only pick up one 8-bit byte (2 chars) */
236 uschar *p;
237 int n = 0;
238 int i;
239
240 for (i = 0, p = (uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) {
241 if (isdigit(*p))
242 n = 16 * n + *p - '0';
243 else if (*p >= 'a' && *p <= 'f')
244 n = 16 * n + *p - 'a' + 10;
245 else if (*p >= 'A' && *p <= 'F')
246 n = 16 * n + *p - 'A' + 10;
247 }
248 *pp = (uschar *) p;
249 return n;
250 }
251
252 #define isoctdigit(c) ((c) >= '0' && (c) <= '7') /* multiple use of arg */
253
quoted(uschar ** pp)254 int quoted(uschar **pp) /* pick up next thing after a \\ */
255 /* and increment *pp */
256 {
257 uschar *p = *pp;
258 int c;
259
260 if ((c = *p++) == 't')
261 c = '\t';
262 else if (c == 'n')
263 c = '\n';
264 else if (c == 'f')
265 c = '\f';
266 else if (c == 'r')
267 c = '\r';
268 else if (c == 'b')
269 c = '\b';
270 else if (c == '\\')
271 c = '\\';
272 else if (c == 'x') { /* hexadecimal goo follows */
273 c = hexstr(&p); /* this adds a null if number is invalid */
274 } else if (isoctdigit(c)) { /* \d \dd \ddd */
275 int n = c - '0';
276 if (isoctdigit(*p)) {
277 n = 8 * n + *p++ - '0';
278 if (isoctdigit(*p))
279 n = 8 * n + *p++ - '0';
280 }
281 c = n;
282 } /* else */
283 /* c = c; */
284 *pp = p;
285 return c;
286 }
287
cclenter(const char * argp)288 char *cclenter(const char *argp) /* add a character class */
289 {
290 int i, c, c2;
291 uschar *p = (uschar *) argp;
292 uschar *op, *bp;
293 static uschar *buf = 0;
294 static int bufsz = 100;
295
296 op = p;
297 if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
298 FATAL("out of space for character class [%.10s...] 1", p);
299 bp = buf;
300 for (i = 0; (c = *p++) != 0; ) {
301 if (c == '\\') {
302 c = quoted(&p);
303 } else if (c == '-' && i > 0 && bp[-1] != 0) {
304 if (*p != 0) {
305 c = bp[-1];
306 c2 = *p++;
307 if (c2 == '\\')
308 c2 = quoted(&p);
309 if (c > c2) { /* empty; ignore */
310 bp--;
311 i--;
312 continue;
313 }
314 while (c < c2) {
315 if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter1"))
316 FATAL("out of space for character class [%.10s...] 2", p);
317 *bp++ = ++c;
318 i++;
319 }
320 continue;
321 }
322 }
323 if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter2"))
324 FATAL("out of space for character class [%.10s...] 3", p);
325 *bp++ = c;
326 i++;
327 }
328 *bp = 0;
329 dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) );
330 xfree(op);
331 return (char *) tostring((char *) buf);
332 }
333
overflo(const char * s)334 void overflo(const char *s)
335 {
336 FATAL("regular expression too big: %.30s...", s);
337 }
338
cfoll(fa * f,Node * v)339 void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfollow[leaf] */
340 {
341 int i;
342 int *p;
343
344 switch (type(v)) {
345 ELEAF
346 LEAF
347 f->re[info(v)].ltype = type(v);
348 f->re[info(v)].lval.np = right(v);
349 while (f->accept >= maxsetvec) { /* guessing here! */
350 maxsetvec *= 4;
351 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
352 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
353 if (setvec == 0 || tmpset == 0)
354 overflo("out of space in cfoll()");
355 }
356 for (i = 0; i <= f->accept; i++)
357 setvec[i] = 0;
358 setcnt = 0;
359 follow(v); /* computes setvec and setcnt */
360 if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
361 overflo("out of space building follow set");
362 f->re[info(v)].lfollow = p;
363 *p = setcnt;
364 for (i = f->accept; i >= 0; i--)
365 if (setvec[i] == 1)
366 *++p = i;
367 break;
368 UNARY
369 cfoll(f,left(v));
370 break;
371 case CAT:
372 case OR:
373 cfoll(f,left(v));
374 cfoll(f,right(v));
375 break;
376 default: /* can't happen */
377 FATAL("can't happen: unknown type %d in cfoll", type(v));
378 }
379 }
380
first(Node * p)381 int first(Node *p) /* collects initially active leaves of p into setvec */
382 /* returns 0 if p matches empty string */
383 {
384 int b, lp;
385
386 switch (type(p)) {
387 ELEAF
388 LEAF
389 lp = info(p); /* look for high-water mark of subscripts */
390 while (setcnt >= maxsetvec || lp >= maxsetvec) { /* guessing here! */
391 maxsetvec *= 4;
392 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
393 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
394 if (setvec == 0 || tmpset == 0)
395 overflo("out of space in first()");
396 }
397 if (type(p) == EMPTYRE) {
398 setvec[lp] = 0;
399 return(0);
400 }
401 if (setvec[lp] != 1) {
402 setvec[lp] = 1;
403 setcnt++;
404 }
405 if (type(p) == CCL && (*(char *) right(p)) == '\0')
406 return(0); /* empty CCL */
407 else return(1);
408 case PLUS:
409 if (first(left(p)) == 0) return(0);
410 return(1);
411 case STAR:
412 case QUEST:
413 first(left(p));
414 return(0);
415 case CAT:
416 if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
417 return(1);
418 case OR:
419 b = first(right(p));
420 if (first(left(p)) == 0 || b == 0) return(0);
421 return(1);
422 }
423 FATAL("can't happen: unknown type %d in first", type(p)); /* can't happen */
424 return(-1);
425 }
426
follow(Node * v)427 void follow(Node *v) /* collects leaves that can follow v into setvec */
428 {
429 Node *p;
430
431 if (type(v) == FINAL)
432 return;
433 p = parent(v);
434 switch (type(p)) {
435 case STAR:
436 case PLUS:
437 first(v);
438 follow(p);
439 return;
440
441 case OR:
442 case QUEST:
443 follow(p);
444 return;
445
446 case CAT:
447 if (v == left(p)) { /* v is left child of p */
448 if (first(right(p)) == 0) {
449 follow(p);
450 return;
451 }
452 } else /* v is right child */
453 follow(p);
454 return;
455 }
456 }
457
member(int c,const char * sarg)458 int member(int c, const char *sarg) /* is c in s? */
459 {
460 uschar *s = (uschar *) sarg;
461
462 while (*s)
463 if (c == *s++)
464 return(1);
465 return(0);
466 }
467
match(fa * f,const char * p0)468 int match(fa *f, const char *p0) /* shortest match ? */
469 {
470 int s, ns;
471 uschar *p = (uschar *) p0;
472
473 s = f->reset ? makeinit(f,0) : f->initstat;
474 if (f->out[s])
475 return(1);
476 do {
477 /* assert(*p < NCHARS); */
478 if ((ns = f->gototab[s][*p]) != 0)
479 s = ns;
480 else
481 s = cgoto(f, s, *p);
482 if (f->out[s])
483 return(1);
484 } while (*p++ != 0);
485 return(0);
486 }
487
pmatch(fa * f,const char * p0)488 int pmatch(fa *f, const char *p0) /* longest match, for sub */
489 {
490 int s, ns;
491 uschar *p = (uschar *) p0;
492 uschar *q;
493 int i, k;
494
495 /* s = f->reset ? makeinit(f,1) : f->initstat; */
496 if (f->reset) {
497 f->initstat = s = makeinit(f,1);
498 } else {
499 s = f->initstat;
500 }
501 patbeg = (char *) p;
502 patlen = -1;
503 do {
504 q = p;
505 do {
506 if (f->out[s]) /* final state */
507 patlen = q-p;
508 /* assert(*q < NCHARS); */
509 if ((ns = f->gototab[s][*q]) != 0)
510 s = ns;
511 else
512 s = cgoto(f, s, *q);
513 if (s == 1) { /* no transition */
514 if (patlen >= 0) {
515 patbeg = (char *) p;
516 return(1);
517 }
518 else
519 goto nextin; /* no match */
520 }
521 } while (*q++ != 0);
522 if (f->out[s])
523 patlen = q-p-1; /* don't count $ */
524 if (patlen >= 0) {
525 patbeg = (char *) p;
526 return(1);
527 }
528 nextin:
529 s = 2;
530 if (f->reset) {
531 for (i = 2; i <= f->curstat; i++)
532 xfree(f->posns[i]);
533 k = *f->posns[0];
534 if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
535 overflo("out of space in pmatch");
536 for (i = 0; i <= k; i++)
537 (f->posns[2])[i] = (f->posns[0])[i];
538 f->initstat = f->curstat = 2;
539 f->out[2] = f->out[0];
540 for (i = 0; i < NCHARS; i++)
541 f->gototab[2][i] = 0;
542 }
543 } while (*p++ != 0);
544 return (0);
545 }
546
nematch(fa * f,const char * p0)547 int nematch(fa *f, const char *p0) /* non-empty match, for sub */
548 {
549 int s, ns;
550 uschar *p = (uschar *) p0;
551 uschar *q;
552 int i, k;
553
554 /* s = f->reset ? makeinit(f,1) : f->initstat; */
555 if (f->reset) {
556 f->initstat = s = makeinit(f,1);
557 } else {
558 s = f->initstat;
559 }
560 patlen = -1;
561 while (*p) {
562 q = p;
563 do {
564 if (f->out[s]) /* final state */
565 patlen = q-p;
566 /* assert(*q < NCHARS); */
567 if ((ns = f->gototab[s][*q]) != 0)
568 s = ns;
569 else
570 s = cgoto(f, s, *q);
571 if (s == 1) { /* no transition */
572 if (patlen > 0) {
573 patbeg = (char *) p;
574 return(1);
575 } else
576 goto nnextin; /* no nonempty match */
577 }
578 } while (*q++ != 0);
579 if (f->out[s])
580 patlen = q-p-1; /* don't count $ */
581 if (patlen > 0 ) {
582 patbeg = (char *) p;
583 return(1);
584 }
585 nnextin:
586 s = 2;
587 if (f->reset) {
588 for (i = 2; i <= f->curstat; i++)
589 xfree(f->posns[i]);
590 k = *f->posns[0];
591 if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
592 overflo("out of state space");
593 for (i = 0; i <= k; i++)
594 (f->posns[2])[i] = (f->posns[0])[i];
595 f->initstat = f->curstat = 2;
596 f->out[2] = f->out[0];
597 for (i = 0; i < NCHARS; i++)
598 f->gototab[2][i] = 0;
599 }
600 p++;
601 }
602 return (0);
603 }
604
reparse(const char * p)605 Node *reparse(const char *p) /* parses regular expression pointed to by p */
606 { /* uses relex() to scan regular expression */
607 Node *np;
608
609 dprintf( ("reparse <%s>\n", p) );
610 lastre = prestr = (uschar *) p; /* prestr points to string to be parsed */
611 rtok = relex();
612 /* GNU compatibility: an empty regexp matches anything */
613 if (rtok == '\0') {
614 /* FATAL("empty regular expression"); previous */
615 return(op2(EMPTYRE, NIL, NIL));
616 }
617 np = regexp();
618 if (rtok != '\0')
619 FATAL("syntax error in regular expression %s at %s", lastre, prestr);
620 return(np);
621 }
622
regexp(void)623 Node *regexp(void) /* top-level parse of reg expr */
624 {
625 return (alt(concat(primary())));
626 }
627
primary(void)628 Node *primary(void)
629 {
630 Node *np;
631
632 switch (rtok) {
633 case CHAR:
634 np = op2(CHAR, NIL, itonp(rlxval));
635 rtok = relex();
636 return (unary(np));
637 case ALL:
638 rtok = relex();
639 return (unary(op2(ALL, NIL, NIL)));
640 case EMPTYRE:
641 rtok = relex();
642 return (unary(op2(ALL, NIL, NIL)));
643 case DOT:
644 rtok = relex();
645 return (unary(op2(DOT, NIL, NIL)));
646 case CCL:
647 np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr));
648 rtok = relex();
649 return (unary(np));
650 case NCCL:
651 np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr));
652 rtok = relex();
653 return (unary(np));
654 case '^':
655 rtok = relex();
656 return (unary(op2(CHAR, NIL, itonp(HAT))));
657 case '$':
658 rtok = relex();
659 return (unary(op2(CHAR, NIL, NIL)));
660 case '(':
661 rtok = relex();
662 if (rtok == ')') { /* special pleading for () */
663 rtok = relex();
664 return unary(op2(CCL, NIL, (Node *) tostring("")));
665 }
666 np = regexp();
667 if (rtok == ')') {
668 rtok = relex();
669 return (unary(np));
670 }
671 else
672 FATAL("syntax error in regular expression %s at %s", lastre, prestr);
673 default:
674 FATAL("illegal primary in regular expression %s at %s", lastre, prestr);
675 }
676 return 0; /*NOTREACHED*/
677 }
678
concat(Node * np)679 Node *concat(Node *np)
680 {
681 switch (rtok) {
682 case CHAR: case DOT: case ALL: case EMPTYRE: case CCL: case NCCL: case '$': case '(':
683 return (concat(op2(CAT, np, primary())));
684 }
685 return (np);
686 }
687
alt(Node * np)688 Node *alt(Node *np)
689 {
690 if (rtok == OR) {
691 rtok = relex();
692 return (alt(op2(OR, np, concat(primary()))));
693 }
694 return (np);
695 }
696
unary(Node * np)697 Node *unary(Node *np)
698 {
699 switch (rtok) {
700 case STAR:
701 rtok = relex();
702 return (unary(op2(STAR, np, NIL)));
703 case PLUS:
704 rtok = relex();
705 return (unary(op2(PLUS, np, NIL)));
706 case QUEST:
707 rtok = relex();
708 return (unary(op2(QUEST, np, NIL)));
709 default:
710 return (np);
711 }
712 }
713
714 /*
715 * Character class definitions conformant to the POSIX locale as
716 * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source
717 * and operating character sets are both ASCII (ISO646) or supersets
718 * thereof.
719 *
720 * Note that to avoid overflowing the temporary buffer used in
721 * relex(), the expanded character class (prior to range expansion)
722 * must be less than twice the size of their full name.
723 */
724
725 /* Because isblank doesn't show up in any of the header files on any
726 * system i use, it's defined here. if some other locale has a richer
727 * definition of "blank", define HAS_ISBLANK and provide your own
728 * version.
729 * the parentheses here are an attempt to find a path through the maze
730 * of macro definition and/or function and/or version provided. thanks
731 * to nelson beebe for the suggestion; let's see if it works everywhere.
732 */
733
734 /* #define HAS_ISBLANK */
735 #ifndef HAS_ISBLANK
736
737 int (xisblank)(int c)
738 {
739 return c==' ' || c=='\t';
740 }
741
742 #endif
743
744 struct charclass {
745 const char *cc_name;
746 int cc_namelen;
747 int (*cc_func)(int);
748 } charclasses[] = {
749 { "alnum", 5, isalnum },
750 { "alpha", 5, isalpha },
751 #ifndef HAS_ISBLANK
752 { "blank", 5, isspace }, /* was isblank */
753 #else
754 { "blank", 5, isblank },
755 #endif
756 { "cntrl", 5, iscntrl },
757 { "digit", 5, isdigit },
758 { "graph", 5, isgraph },
759 { "lower", 5, islower },
760 { "print", 5, isprint },
761 { "punct", 5, ispunct },
762 { "space", 5, isspace },
763 { "upper", 5, isupper },
764 { "xdigit", 6, isxdigit },
765 { NULL, 0, NULL },
766 };
767
768
relex(void)769 int relex(void) /* lexical analyzer for reparse */
770 {
771 int c, n;
772 int cflag;
773 static uschar *buf = 0;
774 static int bufsz = 100;
775 uschar *bp;
776 struct charclass *cc;
777 int i;
778
779 switch (c = *prestr++) {
780 case '|': return OR;
781 case '*': return STAR;
782 case '+': return PLUS;
783 case '?': return QUEST;
784 case '.': return DOT;
785 case '\0': prestr--; return '\0';
786 case '^':
787 case '$':
788 case '(':
789 case ')':
790 return c;
791 case '\\':
792 rlxval = quoted(&prestr);
793 return CHAR;
794 default:
795 rlxval = c;
796 return CHAR;
797 case '[':
798 if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
799 FATAL("out of space in reg expr %.10s..", lastre);
800 bp = buf;
801 if (*prestr == '^') {
802 cflag = 1;
803 prestr++;
804 }
805 else
806 cflag = 0;
807 n = 2 * strlen((const char *) prestr)+1;
808 if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, "relex1"))
809 FATAL("out of space for reg expr %.10s...", lastre);
810 for (; ; ) {
811 if ((c = *prestr++) == '\\') {
812 *bp++ = '\\';
813 if ((c = *prestr++) == '\0')
814 FATAL("nonterminated character class %.20s...", lastre);
815 *bp++ = c;
816 /* } else if (c == '\n') { */
817 /* FATAL("newline in character class %.20s...", lastre); */
818 } else if (c == '[' && *prestr == ':') {
819 /* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */
820 for (cc = charclasses; cc->cc_name; cc++)
821 if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0)
822 break;
823 if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' &&
824 prestr[2 + cc->cc_namelen] == ']') {
825 prestr += cc->cc_namelen + 3;
826 for (i = 0; i < NCHARS; i++) {
827 if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, "relex2"))
828 FATAL("out of space for reg expr %.10s...", lastre);
829 if (cc->cc_func(i)) {
830 *bp++ = i;
831 n++;
832 }
833 }
834 } else
835 *bp++ = c;
836 } else if (c == '\0') {
837 FATAL("nonterminated character class %.20s", lastre);
838 } else if (bp == buf) { /* 1st char is special */
839 *bp++ = c;
840 } else if (c == ']') {
841 *bp++ = 0;
842 rlxstr = (uschar *) tostring((char *) buf);
843 if (cflag == 0)
844 return CCL;
845 else
846 return NCCL;
847 } else
848 *bp++ = c;
849 }
850 }
851 }
852
cgoto(fa * f,int s,int c)853 int cgoto(fa *f, int s, int c)
854 {
855 int i, j, k;
856 int *p, *q;
857
858 assert(c == HAT || c < NCHARS);
859 while (f->accept >= maxsetvec) { /* guessing here! */
860 maxsetvec *= 4;
861 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
862 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
863 if (setvec == 0 || tmpset == 0)
864 overflo("out of space in cgoto()");
865 }
866 for (i = 0; i <= f->accept; i++)
867 setvec[i] = 0;
868 setcnt = 0;
869 /* compute positions of gototab[s,c] into setvec */
870 p = f->posns[s];
871 for (i = 1; i <= *p; i++) {
872 if ((k = f->re[p[i]].ltype) != FINAL) {
873 if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np))
874 || (k == DOT && c != 0 && c != HAT)
875 || (k == ALL && c != 0)
876 || (k == EMPTYRE && c != 0)
877 || (k == CCL && member(c, (char *) f->re[p[i]].lval.up))
878 || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) {
879 q = f->re[p[i]].lfollow;
880 for (j = 1; j <= *q; j++) {
881 if (q[j] >= maxsetvec) {
882 maxsetvec *= 4;
883 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
884 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
885 if (setvec == 0 || tmpset == 0)
886 overflo("cgoto overflow");
887 }
888 if (setvec[q[j]] == 0) {
889 setcnt++;
890 setvec[q[j]] = 1;
891 }
892 }
893 }
894 }
895 }
896 /* determine if setvec is a previous state */
897 tmpset[0] = setcnt;
898 j = 1;
899 for (i = f->accept; i >= 0; i--)
900 if (setvec[i]) {
901 tmpset[j++] = i;
902 }
903 /* tmpset == previous state? */
904 for (i = 1; i <= f->curstat; i++) {
905 p = f->posns[i];
906 if ((k = tmpset[0]) != p[0])
907 goto different;
908 for (j = 1; j <= k; j++)
909 if (tmpset[j] != p[j])
910 goto different;
911 /* setvec is state i */
912 f->gototab[s][c] = i;
913 return i;
914 different:;
915 }
916
917 /* add tmpset to current set of states */
918 if (f->curstat >= NSTATES-1) {
919 f->curstat = 2;
920 f->reset = 1;
921 for (i = 2; i < NSTATES; i++)
922 xfree(f->posns[i]);
923 } else
924 ++(f->curstat);
925 for (i = 0; i < NCHARS; i++)
926 f->gototab[f->curstat][i] = 0;
927 xfree(f->posns[f->curstat]);
928 if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
929 overflo("out of space in cgoto");
930
931 f->posns[f->curstat] = p;
932 f->gototab[s][c] = f->curstat;
933 for (i = 0; i <= setcnt; i++)
934 p[i] = tmpset[i];
935 if (setvec[f->accept])
936 f->out[f->curstat] = 1;
937 else
938 f->out[f->curstat] = 0;
939 return f->curstat;
940 }
941
942
freefa(fa * f)943 void freefa(fa *f) /* free a finite automaton */
944 {
945 int i;
946
947 if (f == NULL)
948 return;
949 for (i = 0; i <= f->curstat; i++)
950 xfree(f->posns[i]);
951 for (i = 0; i <= f->accept; i++) {
952 xfree(f->re[i].lfollow);
953 if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
954 xfree((f->re[i].lval.np));
955 }
956 xfree(f->restr);
957 xfree(f);
958 }
959