• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* grep.c - print lines what match given regular expression
2  *
3  * Copyright 2013 CE Strake <strake888 at gmail.com>
4  *
5  * See http://pubs.opengroup.org/onlinepubs/9699919799/utilities/grep.html
6  *
7  * TODO: --color, "Binary file %s matches"
8  *
9  * Posix doesn't even specify -r, documenting deviations from it is silly.
10 
11 USE_GREP(NEWTOY(grep, "S(exclude)*M(include)*C#B#A#ZzEFHabhinorsvwclqe*f*m#x[!wx][!EFw]", TOYFLAG_BIN))
12 USE_EGREP(OLDTOY(egrep, grep, TOYFLAG_BIN))
13 USE_FGREP(OLDTOY(fgrep, grep, TOYFLAG_BIN))
14 
15 config GREP
16   bool "grep"
17   default y
18   help
19     usage: grep [-EFrivwcloqsHbhn] [-ABC NUM] [-m MAX] [-e REGEX]... [-MS PATTERN]... [-f REGFILE] [FILE]...
20 
21     Show lines matching regular expressions. If no -e, first argument is
22     regular expression to match. With no files (or "-" filename) read stdin.
23     Returns 0 if matched, 1 if no match found.
24 
25     -e  Regex to match. (May be repeated.)
26     -f  File listing regular expressions to match.
27 
28     file search:
29     -r  Recurse into subdirectories (defaults FILE to ".")
30     -M  Match filename pattern (--include)
31     -S  Skip filename pattern (--exclude)
32 
33     match type:
34     -A  Show NUM lines after     -B  Show NUM lines before match
35     -C  NUM lines context (A+B)  -E  extended regex syntax
36     -F  fixed (literal match)    -i  case insensitive
37     -m  match MAX many lines     -v  invert match
38     -w  whole word (implies -E)  -x  whole line
39     -z  input NUL terminated
40 
41     display modes: (default: matched line)
42     -c  count of matching lines  -l  show matching filenames
43     -o  only matching part       -q  quiet (errors only)
44     -s  silent (no error msg)    -Z  output NUL terminated
45 
46     output prefix (default: filename if checking more than 1 file)
47     -H  force filename           -b  byte offset of match
48     -h  hide filename            -n  line number of match
49 
50 config EGREP
51   bool
52   default y
53   depends on GREP
54 
55 config FGREP
56   bool
57   default y
58   depends on GREP
59 */
60 
61 #define FOR_grep
62 #include "toys.h"
63 #include <regex.h>
64 
GLOBALS(long m;struct arg_list * f;struct arg_list * e;long a;long b;long c;struct arg_list * M;struct arg_list * S;char indelim,outdelim;int found;)65 GLOBALS(
66   long m;
67   struct arg_list *f;
68   struct arg_list *e;
69   long a;
70   long b;
71   long c;
72   struct arg_list *M;
73   struct arg_list *S;
74 
75   char indelim, outdelim;
76   int found;
77 )
78 
79 // Emit line with various potential prefixes and delimiter
80 static void outline(char *line, char dash, char *name, long lcount, long bcount,
81   int trim)
82 {
83   if (name && (toys.optflags&FLAG_H)) printf("%s%c", name, dash);
84   if (!line || (lcount && (toys.optflags&FLAG_n)))
85     printf("%ld%c", lcount, line ? dash : TT.outdelim);
86   if (bcount && (toys.optflags&FLAG_b)) printf("%ld%c", bcount-1, dash);
87   if (line) xprintf("%.*s%c", trim, line, TT.outdelim);
88 }
89 
90 // Show matches in one file
do_grep(int fd,char * name)91 static void do_grep(int fd, char *name)
92 {
93   struct double_list *dlb = 0;
94   FILE *file = fdopen(fd, "r");
95   long lcount = 0, mcount = 0, offset = 0, after = 0, before = 0;
96   char *bars = 0;
97 
98   if (!fd) name = "(standard input)";
99 
100   if (!file) {
101     perror_msg("%s", name);
102 
103     return;
104   }
105 
106   // Loop through lines of input
107   for (;;) {
108     char *line = 0, *start;
109     regmatch_t matches;
110     size_t unused;
111     long len;
112     int mmatch = 0;
113 
114     lcount++;
115     errno = 0;
116     len = getdelim(&line, &unused, TT.indelim, file);
117     if (errno) perror_msg("%s", name);
118     if (len<1) break;
119     if (line[len-1] == TT.indelim) line[len-1] = 0;
120 
121     start = line;
122 
123     // Loop through matches in this line
124     do {
125       int rc = 0, skip = 0;
126 
127       // Handle non-regex matches
128       if (toys.optflags & FLAG_F) {
129         struct arg_list *seek, fseek;
130         char *s = 0;
131 
132         for (seek = TT.e; seek; seek = seek->next) {
133           if (toys.optflags & FLAG_x) {
134             int i = (toys.optflags & FLAG_i);
135 
136             if ((i ? strcasecmp : strcmp)(seek->arg, line)) s = line;
137           } else if (!*seek->arg) {
138             seek = &fseek;
139             fseek.arg = s = line;
140             break;
141           }
142           if (toys.optflags & FLAG_i) s = strnstr(line, seek->arg);
143           else s = strstr(line, seek->arg);
144           if (s) break;
145         }
146 
147         if (s) {
148           matches.rm_so = (s-line);
149           skip = matches.rm_eo = (s-line)+strlen(seek->arg);
150         } else rc = 1;
151       } else {
152         rc = regexec((regex_t *)toybuf, start, 1, &matches,
153                      start==line ? 0 : REG_NOTBOL);
154         skip = matches.rm_eo;
155       }
156 
157       if (toys.optflags & FLAG_x)
158         if (matches.rm_so || line[matches.rm_eo]) rc = 1;
159 
160       if (!rc && (toys.optflags & FLAG_w)) {
161         char c = 0;
162 
163         if ((start+matches.rm_so)!=line) {
164           c = start[matches.rm_so-1];
165           if (!isalnum(c) && c != '_') c = 0;
166         }
167         if (!c) {
168           c = start[matches.rm_eo];
169           if (!isalnum(c) && c != '_') c = 0;
170         }
171         if (c) {
172           start += matches.rm_so+1;
173 
174           continue;
175         }
176       }
177 
178       if (toys.optflags & FLAG_v) {
179         if (toys.optflags & FLAG_o) {
180           if (rc) skip = matches.rm_eo = strlen(start);
181           else if (!matches.rm_so) {
182             start += skip;
183             continue;
184           } else matches.rm_eo = matches.rm_so;
185         } else {
186           if (!rc) break;
187           matches.rm_eo = strlen(start);
188         }
189         matches.rm_so = 0;
190       } else if (rc) break;
191 
192       // At least one line we didn't print since match while -ABC active
193       if (bars) {
194         xputs(bars);
195         bars = 0;
196       }
197       mmatch++;
198       TT.found = 1;
199       if (toys.optflags & FLAG_q) {
200         toys.exitval = 0;
201         xexit();
202       }
203       if (toys.optflags & FLAG_l) {
204         xprintf("%s%c", name, TT.outdelim);
205         free(line);
206         fclose(file);
207         return;
208       }
209       if (toys.optflags & FLAG_o)
210         if (matches.rm_eo == matches.rm_so)
211           break;
212 
213       if (!(toys.optflags & FLAG_c)) {
214         long bcount = 1 + offset + (start-line) +
215           ((toys.optflags & FLAG_o) ? matches.rm_so : 0);
216 
217         if (!(toys.optflags & FLAG_o)) {
218           while (dlb) {
219             struct double_list *dl = dlist_pop(&dlb);
220 
221             outline(dl->data, '-', name, lcount-before, 0, -1);
222             free(dl->data);
223             free(dl);
224             before--;
225           }
226 
227           outline(line, ':', name, lcount, bcount, -1);
228           if (TT.a) after = TT.a+1;
229         } else outline(start+matches.rm_so, ':', name, lcount, bcount,
230                        matches.rm_eo-matches.rm_so);
231       }
232 
233       start += skip;
234       if (!(toys.optflags & FLAG_o)) break;
235     } while (*start);
236     offset += len;
237 
238     if (mmatch) mcount++;
239     else {
240       int discard = (after || TT.b);
241 
242       if (after && --after) {
243         outline(line, '-', name, lcount, 0, -1);
244         discard = 0;
245       }
246       if (discard && TT.b) {
247         dlist_add(&dlb, line);
248         line = 0;
249         if (++before>TT.b) {
250           struct double_list *dl;
251 
252           dl = dlist_pop(&dlb);
253           free(dl->data);
254           free(dl);
255           before--;
256         } else discard = 0;
257       }
258       // If we discarded a line while displaying context, show bars before next
259       // line (but don't show them now in case that was last match in file)
260       if (discard && mcount) bars = "--";
261     }
262     free(line);
263 
264     if ((toys.optflags & FLAG_m) && mcount >= TT.m) break;
265   }
266 
267   if (toys.optflags & FLAG_c) outline(0, ':', name, mcount, 0, -1);
268 
269   // loopfiles will also close the fd, but this frees an (opaque) struct.
270   fclose(file);
271 }
272 
parse_regex(void)273 static void parse_regex(void)
274 {
275   struct arg_list *al, *new, *list = NULL;
276   long len = 0;
277   char *s, *ss;
278 
279   // Add all -f lines to -e list. (Yes, this is leaking allocation context for
280   // exit to free. Not supporting nofork for this command any time soon.)
281   al = TT.f ? TT.f : TT.e;
282   while (al) {
283     if (TT.f) s = ss = xreadfile(al->arg, 0, 0);
284     else s = ss = al->arg;
285 
286     // Split lines at \n, add individual lines to new list.
287     do {
288       ss = strchr(s, '\n');
289       if (ss) *(ss++) = 0;
290       new = xmalloc(sizeof(struct arg_list));
291       new->next = list;
292       new->arg = s;
293       list = new;
294       s = ss;
295     } while (ss && *s);
296 
297     // Advance, when we run out of -f switch to -e.
298     al = al->next;
299     if (!al && TT.f) {
300       TT.f = 0;
301       al = TT.e;
302     }
303   }
304   TT.e = list;
305 
306   if (!(toys.optflags & FLAG_F)) {
307     char *regstr;
308     int i;
309 
310     // Convert strings to one big regex
311     for (al = TT.e; al; al = al->next)
312       len += strlen(al->arg)+1+!(toys.optflags & FLAG_E);
313 
314     regstr = s = xmalloc(len);
315     for (al = TT.e; al; al = al->next) {
316       s = stpcpy(s, al->arg);
317       if (!(toys.optflags & FLAG_E)) *(s++) = '\\';
318       *(s++) = '|';
319     }
320     *(s-=(1+!(toys.optflags & FLAG_E))) = 0;
321 
322     i = regcomp((regex_t *)toybuf, regstr,
323                 ((toys.optflags & FLAG_E) ? REG_EXTENDED : 0) |
324                 ((toys.optflags & FLAG_i) ? REG_ICASE    : 0));
325 
326     if (i) {
327       regerror(i, (regex_t *)toybuf, toybuf+sizeof(regex_t),
328                sizeof(toybuf)-sizeof(regex_t));
329       error_exit("bad REGEX: %s", toybuf);
330     }
331   }
332 }
333 
do_grep_r(struct dirtree * new)334 static int do_grep_r(struct dirtree *new)
335 {
336   char *name;
337 
338   if (!dirtree_notdotdot(new)) return 0;
339   if (S_ISDIR(new->st.st_mode)) return DIRTREE_RECURSE;
340   if (TT.S || TT.M) {
341     struct arg_list *al;
342 
343     for (al = TT.S; al; al = al->next)
344       if (!fnmatch(al->arg, new->name, 0)) return 0;
345 
346     if (TT.M) {
347       for (al = TT.M; al; al = al->next)
348         if (!fnmatch(al->arg, new->name, 0)) break;
349 
350       if (!al) return 0;
351     }
352   }
353 
354   // "grep -r onefile" doesn't show filenames, but "grep -r onedir" should.
355   if (new->parent && !(toys.optflags & FLAG_h)) toys.optflags |= FLAG_H;
356 
357   name = dirtree_path(new, 0);
358   do_grep(openat(dirtree_parentfd(new), new->name, 0), name);
359   free(name);
360 
361   return 0;
362 }
363 
grep_main(void)364 void grep_main(void)
365 {
366   char **ss = toys.optargs;
367 
368   // Grep exits with 2 for errors
369   toys.exitval = 2;
370 
371   if (!TT.a) TT.a = TT.c;
372   if (!TT.b) TT.b = TT.c;
373 
374   TT.indelim = '\n' * !(toys.optflags&FLAG_z);
375   TT.outdelim = '\n' * !(toys.optflags&FLAG_Z);
376 
377   // Handle egrep and fgrep
378   if (*toys.which->name == 'e') toys.optflags |= FLAG_E;
379   if (*toys.which->name == 'f') toys.optflags |= FLAG_F;
380 
381   if (!TT.e && !TT.f) {
382     if (!*ss) error_exit("no REGEX");
383     TT.e = xzalloc(sizeof(struct arg_list));
384     TT.e->arg = *(ss++);
385     toys.optc--;
386   }
387 
388   parse_regex();
389 
390   if (!(toys.optflags & FLAG_h) && toys.optc>1) toys.optflags |= FLAG_H;
391 
392   if (toys.optflags & FLAG_s) {
393     close(2);
394     xopen_stdio("/dev/null", O_RDWR);
395   }
396 
397   if (toys.optflags & FLAG_r) {
398     // Iterate through -r arguments. Use "." as default if none provided.
399     for (ss = *ss ? ss : (char *[]){".", 0}; *ss; ss++) {
400       if (!strcmp(*ss, "-")) do_grep(0, *ss);
401       else dirtree_read(*ss, do_grep_r);
402     }
403   } else loopfiles_rw(ss, O_RDONLY|WARN_ONLY, 0, do_grep);
404   toys.exitval = !TT.found;
405 }
406