• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <string.h>
4 #include <sys/types.h>
5 #include <assert.h>
6 
7 #include "regex.h"
8 
9 int debug = 0;
10 int line = 0;
11 int status = 0;
12 
13 int copts = REG_EXTENDED;
14 int eopts = 0;
15 char *fopts = 0;
16 regoff_t startoff = 0;
17 regoff_t endoff = 0;
18 
19 extern int split();
20 extern void regprint();
21 
22 static void regress(FILE *in);
23 static void try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts);
24 static char *check(char *str, regmatch_t sub, char *should);
25 static int parseopts(int argc, char *argv[]);
26 static int options(int type, char *s);
27 static int opt(int c, char *s);
28 static void fixstr(char *p);
29 static char *eprint(int err);
30 static int efind(char *name);
31 
32 /*
33  - main - do the simple case, hand off to regress() for regression
34  */
35 int
main(argc,argv)36 main(argc, argv)
37 int argc;
38 char *argv[];
39 {
40 	regex_t re;
41 #	define	NS	10
42 	regmatch_t subs[NS];
43 	char erbuf[100];
44 	int err;
45 	int i;
46 	int optind = parseopts(argc, argv);
47 
48 	if (fopts != 0) {
49 		FILE *f = fopen(fopts, "r");
50 		if (f == NULL) {
51 			fputs("unable to open input\n", stderr);
52 			exit(1);
53 		}
54 		regress(f);
55 		exit(status);
56 	}
57 
58 	if (optind >= argc) {
59 		regress(stdin);
60 		exit(status);
61 	}
62 
63 	err = regcomp(&re, argv[optind++], copts);
64 	if (err) {
65 	  	size_t len = regerror(err, &re, erbuf, sizeof(erbuf));
66 		fprintf(stderr, "error %s, %lu/%d `%s'\n",
67 			eprint(err), (unsigned long)len, (int)sizeof(erbuf), erbuf);
68 		exit(status);
69 	}
70 	regprint(&re, stdout);
71 
72 	if (optind >= argc) {
73 		regfree(&re);
74 		exit(status);
75 	}
76 
77 	if (eopts&REG_STARTEND) {
78 		subs[0].rm_so = startoff;
79 		subs[0].rm_eo = (regoff_t)strlen(argv[optind]) - endoff;
80 	}
81 	err = regexec(&re, argv[optind], (size_t)NS, subs, eopts);
82 	if (err) {
83 		size_t len = regerror(err, &re, erbuf, sizeof(erbuf));
84 		fprintf(stderr, "error %s, %lu/%d `%s'\n",
85 			eprint(err), (unsigned long)len, (int)sizeof(erbuf), erbuf);
86 		exit(status);
87 	}
88 	if (!(copts&REG_NOSUB)) {
89 		int len = (int)(subs[0].rm_eo - subs[0].rm_so);
90 		if (subs[0].rm_so != -1) {
91 			if (len != 0)
92 				printf("match `%.*s'\n", len,
93 					argv[optind] + subs[0].rm_so);
94 			else
95 				printf("match `'@%.1s\n",
96 					argv[optind] + subs[0].rm_so);
97 		}
98 		for (i = 1; i < NS; i++)
99 			if (subs[i].rm_so != -1)
100 				printf("(%d) `%.*s'\n", i,
101 					(int)(subs[i].rm_eo - subs[i].rm_so),
102 					argv[optind] + subs[i].rm_so);
103 	}
104 	exit(status);
105 }
106 
107 /*
108  - regress - main loop of regression test
109  */
110 static void
regress(in)111 regress(in)
112 FILE *in;
113 {
114 	char inbuf[1000];
115 #	define	MAXF	10
116 	char *f[MAXF];
117 	int nf;
118 	int i;
119 	char erbuf[100];
120 	size_t ne;
121 	char *badpat = "invalid regular expression";
122 #	define	SHORT	10
123 	char *bpname = "REG_BADPAT";
124 	regex_t re;
125 
126 	while (fgets(inbuf, sizeof(inbuf), in) != NULL) {
127 		line++;
128 		if (inbuf[0] == '#' || inbuf[0] == '\n')
129 			continue;			/* NOTE CONTINUE */
130 		inbuf[strlen(inbuf)-1] = '\0';	/* get rid of stupid \n */
131 		if (debug)
132 			fprintf(stdout, "%d:\n", line);
133 		nf = split(inbuf, f, MAXF, "\t\t");
134 		if (nf < 3) {
135 			fprintf(stderr, "bad input, line %d\n", line);
136 			exit(1);
137 		}
138 		for (i = 0; i < nf; i++)
139 			if (strcmp(f[i], "\"\"") == 0)
140 				f[i] = "";
141 		if (nf <= 3)
142 			f[3] = NULL;
143 		if (nf <= 4)
144 			f[4] = NULL;
145 		try(f[0], f[1], f[2], f[3], f[4], options('c', f[1]));
146 		if (opt('&', f[1]))	/* try with either type of RE */
147 			try(f[0], f[1], f[2], f[3], f[4],
148 					options('c', f[1]) &~ REG_EXTENDED);
149 	}
150 
151 	ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
152 	if (strcmp(erbuf, badpat) != 0 || ne != strlen(badpat)+1) {
153 		fprintf(stderr, "end: regerror() test gave `%s' not `%s'\n",
154 							erbuf, badpat);
155 		status = 1;
156 	}
157 	ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, (size_t)SHORT);
158 	if (strncmp(erbuf, badpat, SHORT-1) != 0 || erbuf[SHORT-1] != '\0' ||
159 						ne != strlen(badpat)+1) {
160 		fprintf(stderr, "end: regerror() short test gave `%s' not `%.*s'\n",
161 						erbuf, SHORT-1, badpat);
162 		status = 1;
163 	}
164 	ne = regerror(REG_ITOA|REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
165 	if (strcmp(erbuf, bpname) != 0 || ne != strlen(bpname)+1) {
166 		fprintf(stderr, "end: regerror() ITOA test gave `%s' not `%s'\n",
167 						erbuf, bpname);
168 		status = 1;
169 	}
170 	re.re_endp = bpname;
171 	ne = regerror(REG_ATOI, &re, erbuf, sizeof(erbuf));
172 	if (atoi(erbuf) != (int)REG_BADPAT) {
173 		fprintf(stderr, "end: regerror() ATOI test gave `%s' not `%ld'\n",
174 						erbuf, (long)REG_BADPAT);
175 		status = 1;
176 	} else if (ne != strlen(erbuf)+1) {
177 		fprintf(stderr, "end: regerror() ATOI test len(`%s') = %ld\n",
178 						erbuf, (long)REG_BADPAT);
179 		status = 1;
180 	}
181 }
182 
183 /*
184  - try - try it, and report on problems
185  */
186 static void
try(f0,f1,f2,f3,f4,opts)187 try(f0, f1, f2, f3, f4, opts)
188 char *f0;
189 char *f1;
190 char *f2;
191 char *f3;
192 char *f4;
193 int opts;			/* may not match f1 */
194 {
195 	regex_t re;
196 #	define	NSUBS	10
197 	regmatch_t subs[NSUBS];
198 #	define	NSHOULD	15
199 	char *should[NSHOULD];
200 	int nshould;
201 	char erbuf[100];
202 	int err;
203 	size_t len;
204 	char *type = (opts & REG_EXTENDED) ? "ERE" : "BRE";
205 	unsigned int i;
206 	char *grump;
207 	char f0copy[1000];
208 	char f2copy[1000];
209 
210 	strcpy(f0copy, f0);
211 	re.re_endp = (opts&REG_PEND) ? f0copy + strlen(f0copy) : NULL;
212 	fixstr(f0copy);
213 	err = regcomp(&re, f0copy, opts);
214 	if (err != 0 && (!opt('C', f1) || err != efind(f2))) {
215 		/* unexpected error or wrong error */
216 		len = regerror(err, &re, erbuf, sizeof(erbuf));
217 		fprintf(stderr, "%d: %s error %s, %lu/%u `%s'\n",
218 				line, type, eprint(err), (unsigned long)len,
219 				(unsigned int)sizeof(erbuf), erbuf);
220 		status = 1;
221 	} else if (err == 0 && opt('C', f1)) {
222 		/* unexpected success */
223 		fprintf(stderr, "%d: %s should have given REG_%s\n",
224 						line, type, f2);
225 		status = 1;
226 		err = 1;	/* so we won't try regexec */
227 	}
228 
229 	if (err != 0) {
230 		regfree(&re);
231 		return;
232 	}
233 
234 	strcpy(f2copy, f2);
235 	fixstr(f2copy);
236 
237 	if (options('e', f1)&REG_STARTEND) {
238 		if (strchr(f2, '(') == NULL || strchr(f2, ')') == NULL)
239 			fprintf(stderr, "%d: bad STARTEND syntax\n", line);
240 		subs[0].rm_so = (regoff_t)(strchr(f2, '(') - f2 + 1);
241 		subs[0].rm_eo = (regoff_t)(strchr(f2, ')') - f2);
242 	}
243 	err = regexec(&re, f2copy, NSUBS, subs, options('e', f1));
244 
245 	if (err != 0 && (f3 != NULL || err != REG_NOMATCH)) {
246 		/* unexpected error or wrong error */
247 		len = regerror(err, &re, erbuf, sizeof(erbuf));
248 		fprintf(stderr, "%d: %s exec error %s, %lu/%u `%s'\n",
249 				line, type, eprint(err), (unsigned long)len,
250 				(unsigned int)sizeof(erbuf), erbuf);
251 		status = 1;
252 	} else if (err != 0) {
253 		/* nothing more to check */
254 	} else if (f3 == NULL) {
255 		/* unexpected success */
256 		fprintf(stderr, "%d: %s exec should have failed\n",
257 						line, type);
258 		status = 1;
259 		err = 1;		/* just on principle */
260 	} else if (opts&REG_NOSUB) {
261 		/* nothing more to check */
262 	} else if ((grump = check(f2, subs[0], f3)) != NULL) {
263 		fprintf(stderr, "%d: %s %s\n", line, type, grump);
264 		status = 1;
265 		err = 1;
266 	}
267 
268 	if (err != 0 || f4 == NULL) {
269 		regfree(&re);
270 		return;
271 	}
272 
273 	for (i = 1; i < NSHOULD; i++)
274 		should[i] = NULL;
275 	nshould = split(f4, should+1, NSHOULD-1, ",");
276 	if (nshould == 0) {
277 		nshould = 1;
278 		should[1] = "";
279 	}
280 	for (i = 1; i < NSUBS; i++) {
281 		grump = check(f2, subs[i], should[i]);
282 		if (grump != NULL) {
283 			fprintf(stderr, "%d: %s $%u %s\n", line,
284 							type, i, grump);
285 			status = 1;
286 			err = 1;
287 		}
288 	}
289 
290 	regfree(&re);
291 }
292 
293 /*
294  - parseopts - half-baked option processing to avoid using getopt, which isn't always available on Windows.
295  */
296 static int
parseopts(argc,argv)297 parseopts(argc, argv)
298 int argc;
299 char *argv[];
300 {
301 	int i, j;
302 	for (i = 1; i < argc; i++) {
303 		if (argv[i][0] != '-' || argv[i][1] == 0) {
304 			break;
305 		}
306 		for (j = 1; argv[i][j] != 0; j++) {
307 			char opt = argv[i][j];
308 			if (opt == 'x') {
309 				debug++;
310 			} else {
311 				char *arg;
312 				if (argv[i][j+1] != 0) {
313 					arg = argv[i] + j+1;
314 				} else {
315 					if (i == argc-1) {
316 						fprintf(stderr, "option requires an argument -- '%c'\n", opt);
317 						exit(2);
318 					}
319 					arg = argv[++i];
320 				}
321 				switch (opt) {
322 				case 'c':
323 					copts = options(opt, arg);
324 					break;
325 				case 'e':
326 					eopts = options(opt, arg);
327 					break;
328 				case 'f':
329 					fopts = arg;
330 					break;
331 				case 'S':
332 					startoff = (regoff_t)strtol(arg, NULL, 10);
333 					break;
334 				case 'E':
335 					endoff = (regoff_t)strtol(arg, NULL, 10);
336 					break;
337 				default:
338 					fprintf(stderr, "usage: %s ", argv[0]);
339 					fprintf(stderr, "[-x][-c copt][-e eopt][-f file][-S startoff][-E endoff] [re]\n");
340 					exit(2);
341 				}
342 				break;
343 			}
344 		}
345 	}
346 	return i;
347 }
348 
349 /*
350  - options - pick options out of a regression-test string
351  */
352 static int
options(type,s)353 options(type, s)
354 int type;			/* 'c' compile, 'e' exec */
355 char *s;
356 {
357 	char *p;
358 	int o = (type == 'c') ? copts : eopts;
359 	char *legal = (type == 'c') ? "bisnmp" : "^$#tl";
360 
361 	for (p = s; *p != '\0'; p++)
362 		if (strchr(legal, *p) != NULL)
363 			switch (*p) {
364 			case 'b':
365 				o &= ~REG_EXTENDED;
366 				break;
367 			case 'i':
368 				o |= REG_ICASE;
369 				break;
370 			case 's':
371 				o |= REG_NOSUB;
372 				break;
373 			case 'n':
374 				o |= REG_NEWLINE;
375 				break;
376 			case 'm':
377 				o &= ~REG_EXTENDED;
378 				o |= REG_NOSPEC;
379 				break;
380 			case 'p':
381 				o |= REG_PEND;
382 				break;
383 			case '^':
384 				o |= REG_NOTBOL;
385 				break;
386 			case '$':
387 				o |= REG_NOTEOL;
388 				break;
389 			case '#':
390 				o |= REG_STARTEND;
391 				break;
392 			case 't':	/* trace */
393 				o |= REG_TRACE;
394 				break;
395 			case 'l':	/* force long representation */
396 				o |= REG_LARGE;
397 				break;
398 			case 'r':	/* force backref use */
399 				o |= REG_BACKR;
400 				break;
401 			}
402 	return(o);
403 }
404 
405 /*
406  - opt - is a particular option in a regression string?
407  */
408 static int				/* predicate */
opt(c,s)409 opt(c, s)
410 int c;
411 char *s;
412 {
413 	return(strchr(s, c) != NULL);
414 }
415 
416 /*
417  - fixstr - transform magic characters in strings
418  */
419 static void
fixstr(p)420 fixstr(p)
421 char *p;
422 {
423 	if (p == NULL)
424 		return;
425 
426 	for (; *p != '\0'; p++)
427 		if (*p == 'N')
428 			*p = '\n';
429 		else if (*p == 'T')
430 			*p = '\t';
431 		else if (*p == 'S')
432 			*p = ' ';
433 		else if (*p == 'Z')
434 			*p = '\0';
435 }
436 
437 /*
438  - check - check a substring match
439  */
440 static char *				/* NULL or complaint */
check(str,sub,should)441 check(str, sub, should)
442 char *str;
443 regmatch_t sub;
444 char *should;
445 {
446 	regoff_t len;
447 	size_t shlen;
448 	char *p;
449 	static char grump[500];
450 	char *at = NULL;
451 
452 	if (should != NULL && strcmp(should, "-") == 0)
453 		should = NULL;
454 	if (should != NULL && should[0] == '@') {
455 		at = should + 1;
456 		should = "";
457 	}
458 
459 	/* check rm_so and rm_eo for consistency */
460 	if (sub.rm_so > sub.rm_eo || (sub.rm_so == -1 && sub.rm_eo != -1) ||
461 				(sub.rm_so != -1 && sub.rm_eo == -1) ||
462 				(sub.rm_so != -1 && sub.rm_so < 0) ||
463 				(sub.rm_eo != -1 && sub.rm_eo < 0) ) {
464 		sprintf(grump, "start %ld end %ld", (long)sub.rm_so,
465 							(long)sub.rm_eo);
466 		return(grump);
467 	}
468 
469 	/* check for no match */
470 	if (sub.rm_so == -1 && should == NULL)
471 		return(NULL);
472 	if (sub.rm_so == -1)
473 		return("did not match");
474 
475 	/* check for in range */
476 	if ((size_t) sub.rm_eo > strlen(str)) {
477 		sprintf(grump, "start %ld end %ld, past end of string",
478 					(long)sub.rm_so, (long)sub.rm_eo);
479 		return(grump);
480 	}
481 
482 	len = sub.rm_eo - sub.rm_so;
483 	p = str + sub.rm_so;
484 
485 	/* check for not supposed to match */
486 	if (should == NULL) {
487 		sprintf(grump, "matched `%.*s'", len, p);
488 		return(grump);
489 	}
490 
491 	shlen = strlen(should);
492 
493 	/* check for wrong match */
494 	if ((size_t)len != shlen || strncmp(p, should, shlen) != 0) {
495 		sprintf(grump, "matched `%.*s' instead", len, p);
496 		return(grump);
497 	}
498 	if (shlen > 0)
499 		return(NULL);
500 
501 	/* check null match in right place */
502 	if (at == NULL)
503 		return(NULL);
504 	shlen = strlen(at);
505 	if (shlen == 0)
506 		shlen = 1;	/* force check for end-of-string */
507 	if (strncmp(p, at, shlen) != 0) {
508 		sprintf(grump, "matched null at `%.20s'", p);
509 		return(grump);
510 	}
511 	return(NULL);
512 }
513 
514 /*
515  - eprint - convert error number to name
516  */
517 static char *
eprint(err)518 eprint(err)
519 int err;
520 {
521 	static char epbuf[100];
522 	size_t len;
523 
524 	len = regerror(REG_ITOA|err, (regex_t *)NULL, epbuf, sizeof(epbuf));
525 	assert(len <= sizeof(epbuf));
526 	return(epbuf);
527 }
528 
529 /*
530  - efind - convert error name to number
531  */
532 static int
efind(name)533 efind(name)
534 char *name;
535 {
536 	static char efbuf[100];
537 	regex_t re;
538 
539 	sprintf(efbuf, "REG_%s", name);
540 	assert(strlen(efbuf) < sizeof(efbuf));
541 	re.re_endp = efbuf;
542 	(void) regerror(REG_ATOI, &re, efbuf, sizeof(efbuf));
543 	return(atoi(efbuf));
544 }
545