1 /* $NetBSD: util.c,v 1.16 2012/05/06 22:32:05 joerg Exp $ */
2 /* $FreeBSD: head/usr.bin/grep/util.c 211496 2010-08-19 09:28:59Z des $ */
3 /* $OpenBSD: util.c,v 1.39 2010/07/02 22:18:03 tedu Exp $ */
4
5 /*-
6 * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
7 * Copyright (C) 2008-2010 Gabor Kovesdan <gabor@FreeBSD.org>
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 #if HAVE_NBTOOL_CONFIG_H
33 #include "nbtool_config.h"
34 #endif
35
36 #include <sys/cdefs.h>
37 __RCSID("$NetBSD: util.c,v 1.16 2012/05/06 22:32:05 joerg Exp $");
38
39 #include <sys/stat.h>
40 #include <sys/types.h>
41
42 #include <ctype.h>
43 #include <err.h>
44 #include <errno.h>
45 #include <fnmatch.h>
46 #include <fts.h>
47 #include <libgen.h>
48 #include <stdbool.h>
49 #include <stdio.h>
50 #include <stdlib.h>
51 #include <string.h>
52 #include <unistd.h>
53 #include <wchar.h>
54 #include <wctype.h>
55
56 #include "grep.h"
57
58 static bool first, first_global = true;
59 static unsigned long long since_printed;
60
61 static int procline(struct str *l, int);
62
63 bool
file_matching(const char * fname)64 file_matching(const char *fname)
65 {
66 char *fname_base, *fname_copy;
67 unsigned int i;
68 bool ret;
69
70 ret = finclude ? false : true;
71 fname_copy = grep_strdup(fname);
72 fname_base = basename(fname_copy);
73
74 for (i = 0; i < fpatterns; ++i) {
75 if (fnmatch(fpattern[i].pat, fname, 0) == 0 ||
76 fnmatch(fpattern[i].pat, fname_base, 0) == 0) {
77 if (fpattern[i].mode == EXCL_PAT)
78 return (false);
79 else
80 ret = true;
81 }
82 }
83 free(fname_copy);
84 return (ret);
85 }
86
87 static inline bool
dir_matching(const char * dname)88 dir_matching(const char *dname)
89 {
90 unsigned int i;
91 bool ret;
92
93 ret = dinclude ? false : true;
94
95 for (i = 0; i < dpatterns; ++i) {
96 if (dname != NULL &&
97 fnmatch(dname, dpattern[i].pat, 0) == 0) {
98 if (dpattern[i].mode == EXCL_PAT)
99 return (false);
100 else
101 ret = true;
102 }
103 }
104 return (ret);
105 }
106
107 /*
108 * Processes a directory when a recursive search is performed with
109 * the -R option. Each appropriate file is passed to procfile().
110 */
111 int
grep_tree(char ** argv)112 grep_tree(char **argv)
113 {
114 FTS *fts;
115 FTSENT *p;
116 char *d, *dir = NULL;
117 int c, fts_flags;
118 bool ok;
119
120 c = fts_flags = 0;
121
122 switch(linkbehave) {
123 case LINK_EXPLICIT:
124 fts_flags = FTS_COMFOLLOW;
125 break;
126 case LINK_SKIP:
127 fts_flags = FTS_PHYSICAL;
128 break;
129 default:
130 fts_flags = FTS_LOGICAL;
131
132 }
133
134 fts_flags |= FTS_NOSTAT | FTS_NOCHDIR;
135
136 if (!(fts = fts_open(argv, fts_flags, NULL)))
137 err(2, "fts_open");
138 while ((p = fts_read(fts)) != NULL) {
139 switch (p->fts_info) {
140 case FTS_DNR:
141 /* FALLTHROUGH */
142 case FTS_ERR:
143 errx(2, "%s: %s", p->fts_path, strerror(p->fts_errno));
144 break;
145 case FTS_D:
146 /* FALLTHROUGH */
147 case FTS_DP:
148 break;
149 case FTS_DC:
150 /* Print a warning for recursive directory loop */
151 warnx("warning: %s: recursive directory loop",
152 p->fts_path);
153 break;
154 default:
155 /* Check for file exclusion/inclusion */
156 ok = true;
157 if (dexclude || dinclude) {
158 if ((d = strrchr(p->fts_path, '/')) != NULL) {
159 dir = grep_malloc(sizeof(char) *
160 (d - p->fts_path + 1));
161 memcpy(dir, p->fts_path,
162 d - p->fts_path);
163 dir[d - p->fts_path] = '\0';
164 }
165 ok = dir_matching(dir);
166 free(dir);
167 dir = NULL;
168 }
169 if (fexclude || finclude)
170 ok &= file_matching(p->fts_path);
171
172 if (ok)
173 c += procfile(p->fts_path);
174 break;
175 }
176 }
177
178 fts_close(fts);
179 return (c);
180 }
181
182 /*
183 * Opens a file and processes it. Each file is processed line-by-line
184 * passing the lines to procline().
185 */
186 int
procfile(const char * fn)187 procfile(const char *fn)
188 {
189 struct file *f;
190 struct stat sb;
191 struct str ln;
192 mode_t s;
193 int c, t;
194
195 if (mflag && (mcount <= 0))
196 return (0);
197
198 if (strcmp(fn, "-") == 0) {
199 fn = label != NULL ? label : getstr(1);
200 f = grep_open(NULL);
201 } else {
202 if (!stat(fn, &sb)) {
203 /* Check if we need to process the file */
204 s = sb.st_mode & S_IFMT;
205 if (s == S_IFDIR && dirbehave == DIR_SKIP)
206 return (0);
207 if ((s == S_IFIFO || s == S_IFCHR || s == S_IFBLK
208 || s == S_IFSOCK) && devbehave == DEV_SKIP)
209 return (0);
210 }
211 f = grep_open(fn);
212 }
213 if (f == NULL) {
214 if (!sflag)
215 warn("%s", fn);
216 if (errno == ENOENT)
217 notfound = true;
218 return (0);
219 }
220
221 ln.file = grep_malloc(strlen(fn) + 1);
222 strcpy(ln.file, fn);
223 ln.line_no = 0;
224 ln.len = 0;
225 tail = 0;
226 ln.off = -1;
227
228 for (first = true, c = 0; c == 0 || !(lflag || qflag); ) {
229 ln.off += ln.len + 1;
230 if ((ln.dat = grep_fgetln(f, &ln.len)) == NULL || ln.len == 0)
231 break;
232 if (ln.len > 0 && ln.dat[ln.len - 1] == line_sep)
233 --ln.len;
234 ln.line_no++;
235
236 /* Return if we need to skip a binary file */
237 if (f->binary && binbehave == BINFILE_SKIP) {
238 grep_close(f);
239 free(ln.file);
240 free(f);
241 return (0);
242 }
243 /* Process the file line-by-line */
244 t = procline(&ln, f->binary);
245 c += t;
246
247 /* Count the matches if we have a match limit */
248 if (mflag) {
249 mcount -= t;
250 if (mcount <= 0)
251 break;
252 }
253 }
254 if (Bflag > 0)
255 clearqueue();
256 grep_close(f);
257
258 if (cflag) {
259 if (!hflag)
260 printf("%s:", ln.file);
261 printf("%u%c", c, line_sep);
262 }
263 if (lflag && !qflag && c != 0)
264 printf("%s%c", fn, line_sep);
265 if (Lflag && !qflag && c == 0)
266 printf("%s%c", fn, line_sep);
267 if (c && !cflag && !lflag && !Lflag &&
268 binbehave == BINFILE_BIN && f->binary && !qflag)
269 printf(getstr(8), fn);
270
271 free(ln.file);
272 free(f);
273 return (c);
274 }
275
276 #define iswword(x) (iswalnum((x)) || (x) == L'_')
277
278 /*
279 * Processes a line comparing it with the specified patterns. Each pattern
280 * is looped to be compared along with the full string, saving each and every
281 * match, which is necessary to colorize the output and to count the
282 * matches. The matching lines are passed to printline() to display the
283 * appropriate output.
284 */
285 static int
procline(struct str * l,int nottext)286 procline(struct str *l, int nottext)
287 {
288 regmatch_t matches[MAX_LINE_MATCHES];
289 regmatch_t pmatch;
290 size_t st = 0;
291 unsigned int i;
292 int c = 0, m = 0, r = 0;
293
294 /* Loop to process the whole line */
295 while (st <= l->len) {
296 pmatch.rm_so = st;
297 pmatch.rm_eo = l->len;
298
299 /* Loop to compare with all the patterns */
300 for (i = 0; i < patterns; i++) {
301 /*
302 * XXX: grep_search() is a workaround for speed up and should be
303 * removed in the future. See fastgrep.c.
304 */
305 if (fg_pattern[i].pattern) {
306 r = grep_search(&fg_pattern[i],
307 (unsigned char *)l->dat,
308 l->len, &pmatch);
309 r = (r == 0) ? 0 : REG_NOMATCH;
310 st = pmatch.rm_eo;
311 } else {
312 r = regexec(&r_pattern[i], l->dat, 1,
313 &pmatch, eflags);
314 r = (r == 0) ? 0 : REG_NOMATCH;
315 st = pmatch.rm_eo;
316 }
317 if (r == REG_NOMATCH)
318 continue;
319 /* Check for full match */
320 if (xflag &&
321 (pmatch.rm_so != 0 ||
322 (size_t)pmatch.rm_eo != l->len))
323 continue;
324 /* Check for whole word match */
325 if (fg_pattern[i].word && pmatch.rm_so != 0) {
326 wint_t wbegin, wend;
327
328 wbegin = wend = L' ';
329 if (pmatch.rm_so != 0 &&
330 sscanf(&l->dat[pmatch.rm_so - 1],
331 "%lc", &wbegin) != 1)
332 continue;
333 if ((size_t)pmatch.rm_eo != l->len &&
334 sscanf(&l->dat[pmatch.rm_eo],
335 "%lc", &wend) != 1)
336 continue;
337 if (iswword(wbegin) || iswword(wend))
338 continue;
339 }
340 c = 1;
341 if (m < MAX_LINE_MATCHES)
342 matches[m++] = pmatch;
343 /* matches - skip further patterns */
344 if ((color != NULL && !oflag) || qflag || lflag)
345 break;
346 }
347
348 if (vflag) {
349 c = !c;
350 break;
351 }
352 /* One pass if we are not recording matches */
353 if ((color != NULL && !oflag) || qflag || lflag)
354 break;
355
356 if (st == (size_t)pmatch.rm_so)
357 break; /* No matches */
358 }
359
360 if (c && binbehave == BINFILE_BIN && nottext)
361 return (c); /* Binary file */
362
363 /* Dealing with the context */
364 if ((tail || c) && !cflag && !qflag && !lflag && !Lflag) {
365 if (c) {
366 if ((Aflag || Bflag) && !first_global &&
367 (first || since_printed > Bflag))
368 printf("--\n");
369 tail = Aflag;
370 if (Bflag > 0)
371 printqueue();
372 printline(l, ':', matches, m);
373 } else {
374 printline(l, '-', matches, m);
375 tail--;
376 }
377 first = false;
378 first_global = false;
379 since_printed = 0;
380 } else {
381 if (Bflag)
382 enqueue(l);
383 since_printed++;
384 }
385 return (c);
386 }
387
388 /*
389 * Safe malloc() for internal use.
390 */
391 void *
grep_malloc(size_t size)392 grep_malloc(size_t size)
393 {
394 void *ptr;
395
396 if ((ptr = malloc(size)) == NULL)
397 err(2, "malloc");
398 return (ptr);
399 }
400
401 /*
402 * Safe calloc() for internal use.
403 */
404 void *
grep_calloc(size_t nmemb,size_t size)405 grep_calloc(size_t nmemb, size_t size)
406 {
407 void *ptr;
408
409 if ((ptr = calloc(nmemb, size)) == NULL)
410 err(2, "calloc");
411 return (ptr);
412 }
413
414 /*
415 * Safe realloc() for internal use.
416 */
417 void *
grep_realloc(void * ptr,size_t size)418 grep_realloc(void *ptr, size_t size)
419 {
420
421 if ((ptr = realloc(ptr, size)) == NULL)
422 err(2, "realloc");
423 return (ptr);
424 }
425
426 /*
427 * Safe strdup() for internal use.
428 */
429 char *
grep_strdup(const char * str)430 grep_strdup(const char *str)
431 {
432 char *ret;
433
434 if ((ret = strdup(str)) == NULL)
435 err(2, "strdup");
436 return (ret);
437 }
438
439 /*
440 * Prints a matching line according to the command line options.
441 */
442 void
printline(struct str * line,int sep,regmatch_t * matches,int m)443 printline(struct str *line, int sep, regmatch_t *matches, int m)
444 {
445 size_t a = 0;
446 int i, n = 0;
447
448 if (!hflag) {
449 if (nullflag == 0)
450 fputs(line->file, stdout);
451 else {
452 printf("%s", line->file);
453 putchar(0);
454 }
455 ++n;
456 }
457 if (nflag) {
458 if (n > 0)
459 putchar(sep);
460 printf("%d", line->line_no);
461 ++n;
462 }
463 if (bflag) {
464 if (n > 0)
465 putchar(sep);
466 printf("%lld", (long long)line->off);
467 ++n;
468 }
469 if (n)
470 putchar(sep);
471 /* --color and -o */
472 if ((oflag || color) && m > 0) {
473 for (i = 0; i < m; i++) {
474 if (!oflag)
475 fwrite(line->dat + a, matches[i].rm_so - a, 1,
476 stdout);
477 if (color)
478 fprintf(stdout, "\33[%sm\33[K", color);
479
480 fwrite(line->dat + matches[i].rm_so,
481 matches[i].rm_eo - matches[i].rm_so, 1,
482 stdout);
483 if (color)
484 fprintf(stdout, "\33[m\33[K");
485 a = matches[i].rm_eo;
486 if (oflag)
487 putchar('\n');
488 }
489 if (!oflag) {
490 if (line->len - a > 0)
491 fwrite(line->dat + a, line->len - a, 1, stdout);
492 putchar(line_sep);
493 }
494 } else {
495 fwrite(line->dat, line->len, 1, stdout);
496 putchar(line_sep);
497 }
498 }
499