1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <string.h>
4 #include <sys/types.h>
5 #include <assert.h>
6
7 #include "regex.h"
8
9 int debug = 0;
10 int line = 0;
11 int status = 0;
12
13 int copts = REG_EXTENDED;
14 int eopts = 0;
15 char *fopts = 0;
16 regoff_t startoff = 0;
17 regoff_t endoff = 0;
18
19 extern int split();
20 extern void regprint();
21
22 static void regress(FILE *in);
23 static void try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts);
24 static char *check(char *str, regmatch_t sub, char *should);
25 static int parseopts(int argc, char *argv[]);
26 static int options(int type, char *s);
27 static int opt(int c, char *s);
28 static void fixstr(char *p);
29 static char *eprint(int err);
30 static int efind(char *name);
31
32 /*
33 - main - do the simple case, hand off to regress() for regression
34 */
35 int
main(argc,argv)36 main(argc, argv)
37 int argc;
38 char *argv[];
39 {
40 regex_t re;
41 # define NS 10
42 regmatch_t subs[NS];
43 char erbuf[100];
44 int err;
45 int i;
46 int optind = parseopts(argc, argv);
47
48 if (fopts != 0) {
49 FILE *f = fopen(fopts, "r");
50 if (f == NULL) {
51 fputs("unable to open input\n", stderr);
52 exit(1);
53 }
54 regress(f);
55 exit(status);
56 }
57
58 if (optind >= argc) {
59 regress(stdin);
60 exit(status);
61 }
62
63 err = regcomp(&re, argv[optind++], copts);
64 if (err) {
65 size_t len = regerror(err, &re, erbuf, sizeof(erbuf));
66 fprintf(stderr, "error %s, %lu/%d `%s'\n",
67 eprint(err), (unsigned long)len, (int)sizeof(erbuf), erbuf);
68 exit(status);
69 }
70 regprint(&re, stdout);
71
72 if (optind >= argc) {
73 regfree(&re);
74 exit(status);
75 }
76
77 if (eopts®_STARTEND) {
78 subs[0].rm_so = startoff;
79 subs[0].rm_eo = (regoff_t)strlen(argv[optind]) - endoff;
80 }
81 err = regexec(&re, argv[optind], (size_t)NS, subs, eopts);
82 if (err) {
83 size_t len = regerror(err, &re, erbuf, sizeof(erbuf));
84 fprintf(stderr, "error %s, %lu/%d `%s'\n",
85 eprint(err), (unsigned long)len, (int)sizeof(erbuf), erbuf);
86 exit(status);
87 }
88 if (!(copts®_NOSUB)) {
89 int len = (int)(subs[0].rm_eo - subs[0].rm_so);
90 if (subs[0].rm_so != -1) {
91 if (len != 0)
92 printf("match `%.*s'\n", len,
93 argv[optind] + subs[0].rm_so);
94 else
95 printf("match `'@%.1s\n",
96 argv[optind] + subs[0].rm_so);
97 }
98 for (i = 1; i < NS; i++)
99 if (subs[i].rm_so != -1)
100 printf("(%d) `%.*s'\n", i,
101 (int)(subs[i].rm_eo - subs[i].rm_so),
102 argv[optind] + subs[i].rm_so);
103 }
104 exit(status);
105 }
106
107 /*
108 - regress - main loop of regression test
109 */
110 static void
regress(in)111 regress(in)
112 FILE *in;
113 {
114 char inbuf[1000];
115 # define MAXF 10
116 char *f[MAXF];
117 int nf;
118 int i;
119 char erbuf[100];
120 size_t ne;
121 char *badpat = "invalid regular expression";
122 # define SHORT 10
123 char *bpname = "REG_BADPAT";
124 regex_t re;
125
126 while (fgets(inbuf, sizeof(inbuf), in) != NULL) {
127 line++;
128 if (inbuf[0] == '#' || inbuf[0] == '\n')
129 continue; /* NOTE CONTINUE */
130 inbuf[strlen(inbuf)-1] = '\0'; /* get rid of stupid \n */
131 if (debug)
132 fprintf(stdout, "%d:\n", line);
133 nf = split(inbuf, f, MAXF, "\t\t");
134 if (nf < 3) {
135 fprintf(stderr, "bad input, line %d\n", line);
136 exit(1);
137 }
138 for (i = 0; i < nf; i++)
139 if (strcmp(f[i], "\"\"") == 0)
140 f[i] = "";
141 if (nf <= 3)
142 f[3] = NULL;
143 if (nf <= 4)
144 f[4] = NULL;
145 try(f[0], f[1], f[2], f[3], f[4], options('c', f[1]));
146 if (opt('&', f[1])) /* try with either type of RE */
147 try(f[0], f[1], f[2], f[3], f[4],
148 options('c', f[1]) &~ REG_EXTENDED);
149 }
150
151 ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
152 if (strcmp(erbuf, badpat) != 0 || ne != strlen(badpat)+1) {
153 fprintf(stderr, "end: regerror() test gave `%s' not `%s'\n",
154 erbuf, badpat);
155 status = 1;
156 }
157 ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, (size_t)SHORT);
158 if (strncmp(erbuf, badpat, SHORT-1) != 0 || erbuf[SHORT-1] != '\0' ||
159 ne != strlen(badpat)+1) {
160 fprintf(stderr, "end: regerror() short test gave `%s' not `%.*s'\n",
161 erbuf, SHORT-1, badpat);
162 status = 1;
163 }
164 ne = regerror(REG_ITOA|REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
165 if (strcmp(erbuf, bpname) != 0 || ne != strlen(bpname)+1) {
166 fprintf(stderr, "end: regerror() ITOA test gave `%s' not `%s'\n",
167 erbuf, bpname);
168 status = 1;
169 }
170 re.re_endp = bpname;
171 ne = regerror(REG_ATOI, &re, erbuf, sizeof(erbuf));
172 if (atoi(erbuf) != (int)REG_BADPAT) {
173 fprintf(stderr, "end: regerror() ATOI test gave `%s' not `%ld'\n",
174 erbuf, (long)REG_BADPAT);
175 status = 1;
176 } else if (ne != strlen(erbuf)+1) {
177 fprintf(stderr, "end: regerror() ATOI test len(`%s') = %ld\n",
178 erbuf, (long)REG_BADPAT);
179 status = 1;
180 }
181 }
182
183 /*
184 - try - try it, and report on problems
185 */
186 static void
try(f0,f1,f2,f3,f4,opts)187 try(f0, f1, f2, f3, f4, opts)
188 char *f0;
189 char *f1;
190 char *f2;
191 char *f3;
192 char *f4;
193 int opts; /* may not match f1 */
194 {
195 regex_t re;
196 # define NSUBS 10
197 regmatch_t subs[NSUBS];
198 # define NSHOULD 15
199 char *should[NSHOULD];
200 int nshould;
201 char erbuf[100];
202 int err;
203 size_t len;
204 char *type = (opts & REG_EXTENDED) ? "ERE" : "BRE";
205 unsigned int i;
206 char *grump;
207 char f0copy[1000];
208 char f2copy[1000];
209
210 strcpy(f0copy, f0);
211 re.re_endp = (opts®_PEND) ? f0copy + strlen(f0copy) : NULL;
212 fixstr(f0copy);
213 err = regcomp(&re, f0copy, opts);
214 if (err != 0 && (!opt('C', f1) || err != efind(f2))) {
215 /* unexpected error or wrong error */
216 len = regerror(err, &re, erbuf, sizeof(erbuf));
217 fprintf(stderr, "%d: %s error %s, %lu/%u `%s'\n",
218 line, type, eprint(err), (unsigned long)len,
219 (unsigned int)sizeof(erbuf), erbuf);
220 status = 1;
221 } else if (err == 0 && opt('C', f1)) {
222 /* unexpected success */
223 fprintf(stderr, "%d: %s should have given REG_%s\n",
224 line, type, f2);
225 status = 1;
226 err = 1; /* so we won't try regexec */
227 }
228
229 if (err != 0) {
230 regfree(&re);
231 return;
232 }
233
234 strcpy(f2copy, f2);
235 fixstr(f2copy);
236
237 if (options('e', f1)®_STARTEND) {
238 if (strchr(f2, '(') == NULL || strchr(f2, ')') == NULL)
239 fprintf(stderr, "%d: bad STARTEND syntax\n", line);
240 subs[0].rm_so = (regoff_t)(strchr(f2, '(') - f2 + 1);
241 subs[0].rm_eo = (regoff_t)(strchr(f2, ')') - f2);
242 }
243 err = regexec(&re, f2copy, NSUBS, subs, options('e', f1));
244
245 if (err != 0 && (f3 != NULL || err != REG_NOMATCH)) {
246 /* unexpected error or wrong error */
247 len = regerror(err, &re, erbuf, sizeof(erbuf));
248 fprintf(stderr, "%d: %s exec error %s, %lu/%u `%s'\n",
249 line, type, eprint(err), (unsigned long)len,
250 (unsigned int)sizeof(erbuf), erbuf);
251 status = 1;
252 } else if (err != 0) {
253 /* nothing more to check */
254 } else if (f3 == NULL) {
255 /* unexpected success */
256 fprintf(stderr, "%d: %s exec should have failed\n",
257 line, type);
258 status = 1;
259 err = 1; /* just on principle */
260 } else if (opts®_NOSUB) {
261 /* nothing more to check */
262 } else if ((grump = check(f2, subs[0], f3)) != NULL) {
263 fprintf(stderr, "%d: %s %s\n", line, type, grump);
264 status = 1;
265 err = 1;
266 }
267
268 if (err != 0 || f4 == NULL) {
269 regfree(&re);
270 return;
271 }
272
273 for (i = 1; i < NSHOULD; i++)
274 should[i] = NULL;
275 nshould = split(f4, should+1, NSHOULD-1, ",");
276 if (nshould == 0) {
277 nshould = 1;
278 should[1] = "";
279 }
280 for (i = 1; i < NSUBS; i++) {
281 grump = check(f2, subs[i], should[i]);
282 if (grump != NULL) {
283 fprintf(stderr, "%d: %s $%u %s\n", line,
284 type, i, grump);
285 status = 1;
286 err = 1;
287 }
288 }
289
290 regfree(&re);
291 }
292
293 /*
294 - parseopts - half-baked option processing to avoid using getopt, which isn't always available on Windows.
295 */
296 static int
parseopts(argc,argv)297 parseopts(argc, argv)
298 int argc;
299 char *argv[];
300 {
301 int i, j;
302 for (i = 1; i < argc; i++) {
303 if (argv[i][0] != '-' || argv[i][1] == 0) {
304 break;
305 }
306 for (j = 1; argv[i][j] != 0; j++) {
307 char opt = argv[i][j];
308 if (opt == 'x') {
309 debug++;
310 } else {
311 char *arg;
312 if (argv[i][j+1] != 0) {
313 arg = argv[i] + j+1;
314 } else {
315 if (i == argc-1) {
316 fprintf(stderr, "option requires an argument -- '%c'\n", opt);
317 exit(2);
318 }
319 arg = argv[++i];
320 }
321 switch (opt) {
322 case 'c':
323 copts = options(opt, arg);
324 break;
325 case 'e':
326 eopts = options(opt, arg);
327 break;
328 case 'f':
329 fopts = arg;
330 break;
331 case 'S':
332 startoff = (regoff_t)strtol(arg, NULL, 10);
333 break;
334 case 'E':
335 endoff = (regoff_t)strtol(arg, NULL, 10);
336 break;
337 default:
338 fprintf(stderr, "usage: %s ", argv[0]);
339 fprintf(stderr, "[-x][-c copt][-e eopt][-f file][-S startoff][-E endoff] [re]\n");
340 exit(2);
341 }
342 break;
343 }
344 }
345 }
346 return i;
347 }
348
349 /*
350 - options - pick options out of a regression-test string
351 */
352 static int
options(type,s)353 options(type, s)
354 int type; /* 'c' compile, 'e' exec */
355 char *s;
356 {
357 char *p;
358 int o = (type == 'c') ? copts : eopts;
359 char *legal = (type == 'c') ? "bisnmp" : "^$#tl";
360
361 for (p = s; *p != '\0'; p++)
362 if (strchr(legal, *p) != NULL)
363 switch (*p) {
364 case 'b':
365 o &= ~REG_EXTENDED;
366 break;
367 case 'i':
368 o |= REG_ICASE;
369 break;
370 case 's':
371 o |= REG_NOSUB;
372 break;
373 case 'n':
374 o |= REG_NEWLINE;
375 break;
376 case 'm':
377 o &= ~REG_EXTENDED;
378 o |= REG_NOSPEC;
379 break;
380 case 'p':
381 o |= REG_PEND;
382 break;
383 case '^':
384 o |= REG_NOTBOL;
385 break;
386 case '$':
387 o |= REG_NOTEOL;
388 break;
389 case '#':
390 o |= REG_STARTEND;
391 break;
392 case 't': /* trace */
393 o |= REG_TRACE;
394 break;
395 case 'l': /* force long representation */
396 o |= REG_LARGE;
397 break;
398 case 'r': /* force backref use */
399 o |= REG_BACKR;
400 break;
401 }
402 return(o);
403 }
404
405 /*
406 - opt - is a particular option in a regression string?
407 */
408 static int /* predicate */
opt(c,s)409 opt(c, s)
410 int c;
411 char *s;
412 {
413 return(strchr(s, c) != NULL);
414 }
415
416 /*
417 - fixstr - transform magic characters in strings
418 */
419 static void
fixstr(p)420 fixstr(p)
421 char *p;
422 {
423 if (p == NULL)
424 return;
425
426 for (; *p != '\0'; p++)
427 if (*p == 'N')
428 *p = '\n';
429 else if (*p == 'T')
430 *p = '\t';
431 else if (*p == 'S')
432 *p = ' ';
433 else if (*p == 'Z')
434 *p = '\0';
435 }
436
437 /*
438 - check - check a substring match
439 */
440 static char * /* NULL or complaint */
check(str,sub,should)441 check(str, sub, should)
442 char *str;
443 regmatch_t sub;
444 char *should;
445 {
446 regoff_t len;
447 size_t shlen;
448 char *p;
449 static char grump[500];
450 char *at = NULL;
451
452 if (should != NULL && strcmp(should, "-") == 0)
453 should = NULL;
454 if (should != NULL && should[0] == '@') {
455 at = should + 1;
456 should = "";
457 }
458
459 /* check rm_so and rm_eo for consistency */
460 if (sub.rm_so > sub.rm_eo || (sub.rm_so == -1 && sub.rm_eo != -1) ||
461 (sub.rm_so != -1 && sub.rm_eo == -1) ||
462 (sub.rm_so != -1 && sub.rm_so < 0) ||
463 (sub.rm_eo != -1 && sub.rm_eo < 0) ) {
464 sprintf(grump, "start %ld end %ld", (long)sub.rm_so,
465 (long)sub.rm_eo);
466 return(grump);
467 }
468
469 /* check for no match */
470 if (sub.rm_so == -1 && should == NULL)
471 return(NULL);
472 if (sub.rm_so == -1)
473 return("did not match");
474
475 /* check for in range */
476 if ((size_t) sub.rm_eo > strlen(str)) {
477 sprintf(grump, "start %ld end %ld, past end of string",
478 (long)sub.rm_so, (long)sub.rm_eo);
479 return(grump);
480 }
481
482 len = sub.rm_eo - sub.rm_so;
483 p = str + sub.rm_so;
484
485 /* check for not supposed to match */
486 if (should == NULL) {
487 sprintf(grump, "matched `%.*s'", len, p);
488 return(grump);
489 }
490
491 shlen = strlen(should);
492
493 /* check for wrong match */
494 if ((size_t)len != shlen || strncmp(p, should, shlen) != 0) {
495 sprintf(grump, "matched `%.*s' instead", len, p);
496 return(grump);
497 }
498 if (shlen > 0)
499 return(NULL);
500
501 /* check null match in right place */
502 if (at == NULL)
503 return(NULL);
504 shlen = strlen(at);
505 if (shlen == 0)
506 shlen = 1; /* force check for end-of-string */
507 if (strncmp(p, at, shlen) != 0) {
508 sprintf(grump, "matched null at `%.20s'", p);
509 return(grump);
510 }
511 return(NULL);
512 }
513
514 /*
515 - eprint - convert error number to name
516 */
517 static char *
eprint(err)518 eprint(err)
519 int err;
520 {
521 static char epbuf[100];
522 size_t len;
523
524 len = regerror(REG_ITOA|err, (regex_t *)NULL, epbuf, sizeof(epbuf));
525 assert(len <= sizeof(epbuf));
526 return(epbuf);
527 }
528
529 /*
530 - efind - convert error name to number
531 */
532 static int
efind(name)533 efind(name)
534 char *name;
535 {
536 static char efbuf[100];
537 regex_t re;
538
539 sprintf(efbuf, "REG_%s", name);
540 assert(strlen(efbuf) < sizeof(efbuf));
541 re.re_endp = efbuf;
542 (void) regerror(REG_ATOI, &re, efbuf, sizeof(efbuf));
543 return(atoi(efbuf));
544 }
545