• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*************************************************
2 *               pcregrep program                 *
3 *************************************************/
4 
5 /* This is a grep program that uses the PCRE regular expression library to do
6 its pattern matching. On a Unix or Win32 system it can recurse into
7 directories.
8 
9            Copyright (c) 1997-2011 University of Cambridge
10 
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14 
15     * Redistributions of source code must retain the above copyright notice,
16       this list of conditions and the following disclaimer.
17 
18     * Redistributions in binary form must reproduce the above copyright
19       notice, this list of conditions and the following disclaimer in the
20       documentation and/or other materials provided with the distribution.
21 
22     * Neither the name of the University of Cambridge nor the names of its
23       contributors may be used to endorse or promote products derived from
24       this software without specific prior written permission.
25 
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39 
40 #ifdef HAVE_CONFIG_H
41 #include "config.h"
42 #endif
43 
44 #include <ctype.h>
45 #include <locale.h>
46 #include <stdio.h>
47 #include <string.h>
48 #include <stdlib.h>
49 #include <errno.h>
50 
51 #include <sys/types.h>
52 #include <sys/stat.h>
53 
54 #ifdef HAVE_UNISTD_H
55 #include <unistd.h>
56 #endif
57 
58 #ifdef SUPPORT_LIBZ
59 #include <zlib.h>
60 #endif
61 
62 #ifdef SUPPORT_LIBBZ2
63 #include <bzlib.h>
64 #endif
65 
66 #include "pcre.h"
67 
68 #define FALSE 0
69 #define TRUE 1
70 
71 typedef int BOOL;
72 
73 #define MAX_PATTERN_COUNT 100
74 #define OFFSET_SIZE 99
75 
76 #if BUFSIZ > 8192
77 #define MBUFTHIRD BUFSIZ
78 #else
79 #define MBUFTHIRD 8192
80 #endif
81 
82 /* Values for the "filenames" variable, which specifies options for file name
83 output. The order is important; it is assumed that a file name is wanted for
84 all values greater than FN_DEFAULT. */
85 
86 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87 
88 /* File reading styles */
89 
90 enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91 
92 /* Actions for the -d and -D options */
93 
94 enum { dee_READ, dee_SKIP, dee_RECURSE };
95 enum { DEE_READ, DEE_SKIP };
96 
97 /* Actions for special processing options (flag bits) */
98 
99 #define PO_WORD_MATCH     0x0001
100 #define PO_LINE_MATCH     0x0002
101 #define PO_FIXED_STRINGS  0x0004
102 
103 /* Line ending types */
104 
105 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106 
107 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
108 environments), a warning is issued if the value of fwrite() is ignored.
109 Unfortunately, casting to (void) does not suppress the warning. To get round
110 this, we use a macro that compiles a fudge. Oddly, this does not also seem to
111 apply to fprintf(). */
112 
113 #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
114 
115 
116 
117 /*************************************************
118 *               Global variables                 *
119 *************************************************/
120 
121 /* Jeffrey Friedl has some debugging requirements that are not part of the
122 regular code. */
123 
124 #ifdef JFRIEDL_DEBUG
125 static int S_arg = -1;
126 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
127 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
128 static const char *jfriedl_prefix = "";
129 static const char *jfriedl_postfix = "";
130 #endif
131 
132 static int  endlinetype;
133 
134 static char *colour_string = (char *)"1;31";
135 static char *colour_option = NULL;
136 static char *dee_option = NULL;
137 static char *DEE_option = NULL;
138 static char *newline = NULL;
139 static char *pattern_filename = NULL;
140 static char *stdin_name = (char *)"(standard input)";
141 static char *locale = NULL;
142 
143 static const unsigned char *pcretables = NULL;
144 
145 static int  pattern_count = 0;
146 static pcre **pattern_list = NULL;
147 static pcre_extra **hints_list = NULL;
148 
149 static char *include_pattern = NULL;
150 static char *exclude_pattern = NULL;
151 static char *include_dir_pattern = NULL;
152 static char *exclude_dir_pattern = NULL;
153 
154 static pcre *include_compiled = NULL;
155 static pcre *exclude_compiled = NULL;
156 static pcre *include_dir_compiled = NULL;
157 static pcre *exclude_dir_compiled = NULL;
158 
159 static int after_context = 0;
160 static int before_context = 0;
161 static int both_context = 0;
162 static int dee_action = dee_READ;
163 static int DEE_action = DEE_READ;
164 static int error_count = 0;
165 static int filenames = FN_DEFAULT;
166 static int only_matching = -1;
167 static int process_options = 0;
168 
169 static unsigned long int match_limit = 0;
170 static unsigned long int match_limit_recursion = 0;
171 
172 static BOOL count_only = FALSE;
173 static BOOL do_colour = FALSE;
174 static BOOL file_offsets = FALSE;
175 static BOOL hyphenpending = FALSE;
176 static BOOL invert = FALSE;
177 static BOOL line_buffered = FALSE;
178 static BOOL line_offsets = FALSE;
179 static BOOL multiline = FALSE;
180 static BOOL number = FALSE;
181 static BOOL omit_zero_count = FALSE;
182 static BOOL resource_error = FALSE;
183 static BOOL quiet = FALSE;
184 static BOOL silent = FALSE;
185 static BOOL utf8 = FALSE;
186 
187 /* Structure for options and list of them */
188 
189 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
190        OP_OP_NUMBER, OP_PATLIST };
191 
192 typedef struct option_item {
193   int type;
194   int one_char;
195   void *dataptr;
196   const char *long_name;
197   const char *help_text;
198 } option_item;
199 
200 /* Options without a single-letter equivalent get a negative value. This can be
201 used to identify them. */
202 
203 #define N_COLOUR       (-1)
204 #define N_EXCLUDE      (-2)
205 #define N_EXCLUDE_DIR  (-3)
206 #define N_HELP         (-4)
207 #define N_INCLUDE      (-5)
208 #define N_INCLUDE_DIR  (-6)
209 #define N_LABEL        (-7)
210 #define N_LOCALE       (-8)
211 #define N_NULL         (-9)
212 #define N_LOFFSETS     (-10)
213 #define N_FOFFSETS     (-11)
214 #define N_LBUFFER      (-12)
215 #define N_M_LIMIT      (-13)
216 #define N_M_LIMIT_REC  (-14)
217 
218 static option_item optionlist[] = {
219   { OP_NODATA,     N_NULL,   NULL,              "",              "  terminate options" },
220   { OP_NODATA,     N_HELP,   NULL,              "help",          "display this help and exit" },
221   { OP_NUMBER,     'A',      &after_context,    "after-context=number", "set number of following context lines" },
222   { OP_NUMBER,     'B',      &before_context,   "before-context=number", "set number of prior context lines" },
223   { OP_OP_STRING,  N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
224   { OP_OP_STRING,  N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
225   { OP_NUMBER,     'C',      &both_context,     "context=number", "set number of context lines, before & after" },
226   { OP_NODATA,     'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
227   { OP_STRING,     'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
228   { OP_STRING,     'd',      &dee_option,       "directories=action", "how to handle directories" },
229   { OP_PATLIST,    'e',      NULL,              "regex(p)=pattern", "specify pattern (may be used more than once)" },
230   { OP_NODATA,     'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
231   { OP_STRING,     'f',      &pattern_filename, "file=path",     "read patterns from file" },
232   { OP_NODATA,     N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
233   { OP_NODATA,     'H',      NULL,              "with-filename", "force the prefixing filename on output" },
234   { OP_NODATA,     'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
235   { OP_NODATA,     'i',      NULL,              "ignore-case",   "ignore case distinctions" },
236   { OP_NODATA,     'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
237   { OP_NODATA,     'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
238   { OP_STRING,     N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
239   { OP_NODATA,     N_LBUFFER, NULL,             "line-buffered", "use line buffering" },
240   { OP_NODATA,     N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
241   { OP_STRING,     N_LOCALE, &locale,           "locale=locale", "use the named locale" },
242   { OP_LONGNUMBER, N_M_LIMIT, &match_limit,     "match-limit=number", "set PCRE match limit option" },
243   { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
244   { OP_NODATA,     'M',      NULL,              "multiline",     "run in multiline mode" },
245   { OP_STRING,     'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
246   { OP_NODATA,     'n',      NULL,              "line-number",   "print line number with output lines" },
247   { OP_OP_NUMBER,  'o',      &only_matching,    "only-matching=n", "show only the part of the line that matched" },
248   { OP_NODATA,     'q',      NULL,              "quiet",         "suppress output, just set return code" },
249   { OP_NODATA,     'r',      NULL,              "recursive",     "recursively scan sub-directories" },
250   { OP_STRING,     N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
251   { OP_STRING,     N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
252   { OP_STRING,     N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude-dir=pattern","exclude matching directories when recursing" },
253   { OP_STRING,     N_INCLUDE_DIR,&include_dir_pattern, "include-dir=pattern","include matching directories when recursing" },
254 
255   /* These two were accidentally implemented with underscores instead of
256   hyphens in the option names. As this was not discovered for several releases,
257   the incorrect versions are left in the table for compatibility. However, the
258   --help function misses out any option that has an underscore in its name. */
259 
260   { OP_STRING,    N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
261   { OP_STRING,    N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
262 
263 #ifdef JFRIEDL_DEBUG
264   { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
265 #endif
266   { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
267   { OP_NODATA,    'u',      NULL,              "utf-8",         "use UTF-8 mode" },
268   { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
269   { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
270   { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
271   { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
272   { OP_NODATA,    0,        NULL,               NULL,            NULL }
273 };
274 
275 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
276 options. These set the 1, 2, and 4 bits in process_options, respectively. Note
277 that the combination of -w and -x has the same effect as -x on its own, so we
278 can treat them as the same. */
279 
280 static const char *prefix[] = {
281   "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
282 
283 static const char *suffix[] = {
284   "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
285 
286 /* UTF-8 tables - used only when the newline setting is "any". */
287 
288 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
289 
290 const char utf8_table4[] = {
291   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
292   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
293   2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
294   3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
295 
296 
297 
298 /*************************************************
299 *         Exit from the program                  *
300 *************************************************/
301 
302 /* If there has been a resource error, give a suitable message.
303 
304 Argument:  the return code
305 Returns:   does not return
306 */
307 
308 static void
pcregrep_exit(int rc)309 pcregrep_exit(int rc)
310 {
311 if (resource_error)
312   {
313   fprintf(stderr, "pcregrep: Error %d or %d means that a resource limit "
314     "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT);
315   fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
316   }
317 
318 exit(rc);
319 }
320 
321 
322 /*************************************************
323 *            OS-specific functions               *
324 *************************************************/
325 
326 /* These functions are defined so that they can be made system specific,
327 although at present the only ones are for Unix, Win32, and for "no support". */
328 
329 
330 /************* Directory scanning in Unix ***********/
331 
332 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
333 #include <sys/types.h>
334 #include <sys/stat.h>
335 #include <dirent.h>
336 
337 typedef DIR directory_type;
338 
339 static int
isdirectory(char * filename)340 isdirectory(char *filename)
341 {
342 struct stat statbuf;
343 if (stat(filename, &statbuf) < 0)
344   return 0;        /* In the expectation that opening as a file will fail */
345 return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
346 }
347 
348 static directory_type *
opendirectory(char * filename)349 opendirectory(char *filename)
350 {
351 return opendir(filename);
352 }
353 
354 static char *
readdirectory(directory_type * dir)355 readdirectory(directory_type *dir)
356 {
357 for (;;)
358   {
359   struct dirent *dent = readdir(dir);
360   if (dent == NULL) return NULL;
361   if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
362     return dent->d_name;
363   }
364 /* Control never reaches here */
365 }
366 
367 static void
closedirectory(directory_type * dir)368 closedirectory(directory_type *dir)
369 {
370 closedir(dir);
371 }
372 
373 
374 /************* Test for regular file in Unix **********/
375 
376 static int
isregfile(char * filename)377 isregfile(char *filename)
378 {
379 struct stat statbuf;
380 if (stat(filename, &statbuf) < 0)
381   return 1;        /* In the expectation that opening as a file will fail */
382 return (statbuf.st_mode & S_IFMT) == S_IFREG;
383 }
384 
385 
386 /************* Test for a terminal in Unix **********/
387 
388 static BOOL
is_stdout_tty(void)389 is_stdout_tty(void)
390 {
391 return isatty(fileno(stdout));
392 }
393 
394 static BOOL
is_file_tty(FILE * f)395 is_file_tty(FILE *f)
396 {
397 return isatty(fileno(f));
398 }
399 
400 
401 /************* Directory scanning in Win32 ***********/
402 
403 /* I (Philip Hazel) have no means of testing this code. It was contributed by
404 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
405 when it did not exist. David Byron added a patch that moved the #include of
406 <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
407 The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
408 undefined when it is indeed undefined. */
409 
410 #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
411 
412 #ifndef STRICT
413 # define STRICT
414 #endif
415 #ifndef WIN32_LEAN_AND_MEAN
416 # define WIN32_LEAN_AND_MEAN
417 #endif
418 
419 #include <windows.h>
420 
421 #ifndef INVALID_FILE_ATTRIBUTES
422 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
423 #endif
424 
425 typedef struct directory_type
426 {
427 HANDLE handle;
428 BOOL first;
429 WIN32_FIND_DATA data;
430 } directory_type;
431 
432 int
isdirectory(char * filename)433 isdirectory(char *filename)
434 {
435 DWORD attr = GetFileAttributes(filename);
436 if (attr == INVALID_FILE_ATTRIBUTES)
437   return 0;
438 return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
439 }
440 
441 directory_type *
opendirectory(char * filename)442 opendirectory(char *filename)
443 {
444 size_t len;
445 char *pattern;
446 directory_type *dir;
447 DWORD err;
448 len = strlen(filename);
449 pattern = (char *) malloc(len + 3);
450 dir = (directory_type *) malloc(sizeof(*dir));
451 if ((pattern == NULL) || (dir == NULL))
452   {
453   fprintf(stderr, "pcregrep: malloc failed\n");
454   pcregrep_exit(2);
455   }
456 memcpy(pattern, filename, len);
457 memcpy(&(pattern[len]), "\\*", 3);
458 dir->handle = FindFirstFile(pattern, &(dir->data));
459 if (dir->handle != INVALID_HANDLE_VALUE)
460   {
461   free(pattern);
462   dir->first = TRUE;
463   return dir;
464   }
465 err = GetLastError();
466 free(pattern);
467 free(dir);
468 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
469 return NULL;
470 }
471 
472 char *
readdirectory(directory_type * dir)473 readdirectory(directory_type *dir)
474 {
475 for (;;)
476   {
477   if (!dir->first)
478     {
479     if (!FindNextFile(dir->handle, &(dir->data)))
480       return NULL;
481     }
482   else
483     {
484     dir->first = FALSE;
485     }
486   if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
487     return dir->data.cFileName;
488   }
489 #ifndef _MSC_VER
490 return NULL;   /* Keep compiler happy; never executed */
491 #endif
492 }
493 
494 void
closedirectory(directory_type * dir)495 closedirectory(directory_type *dir)
496 {
497 FindClose(dir->handle);
498 free(dir);
499 }
500 
501 
502 /************* Test for regular file in Win32 **********/
503 
504 /* I don't know how to do this, or if it can be done; assume all paths are
505 regular if they are not directories. */
506 
isregfile(char * filename)507 int isregfile(char *filename)
508 {
509 return !isdirectory(filename);
510 }
511 
512 
513 /************* Test for a terminal in Win32 **********/
514 
515 /* I don't know how to do this; assume never */
516 
517 static BOOL
is_stdout_tty(void)518 is_stdout_tty(void)
519 {
520 return FALSE;
521 }
522 
523 static BOOL
is_file_tty(FILE * f)524 is_file_tty(FILE *f)
525 {
526 return FALSE;
527 }
528 
529 
530 /************* Directory scanning when we can't do it ***********/
531 
532 /* The type is void, and apart from isdirectory(), the functions do nothing. */
533 
534 #else
535 
536 typedef void directory_type;
537 
isdirectory(char * filename)538 int isdirectory(char *filename) { return 0; }
opendirectory(char * filename)539 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
readdirectory(directory_type * dir)540 char *readdirectory(directory_type *dir) { return (char*)0;}
closedirectory(directory_type * dir)541 void closedirectory(directory_type *dir) {}
542 
543 
544 /************* Test for regular when we can't do it **********/
545 
546 /* Assume all files are regular. */
547 
isregfile(char * filename)548 int isregfile(char *filename) { return 1; }
549 
550 
551 /************* Test for a terminal when we can't do it **********/
552 
553 static BOOL
is_stdout_tty(void)554 is_stdout_tty(void)
555 {
556 return FALSE;
557 }
558 
559 static BOOL
is_file_tty(FILE * f)560 is_file_tty(FILE *f)
561 {
562 return FALSE;
563 }
564 
565 #endif
566 
567 
568 
569 #ifndef HAVE_STRERROR
570 /*************************************************
571 *     Provide strerror() for non-ANSI libraries  *
572 *************************************************/
573 
574 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
575 in their libraries, but can provide the same facility by this simple
576 alternative function. */
577 
578 extern int   sys_nerr;
579 extern char *sys_errlist[];
580 
581 char *
strerror(int n)582 strerror(int n)
583 {
584 if (n < 0 || n >= sys_nerr) return "unknown error number";
585 return sys_errlist[n];
586 }
587 #endif /* HAVE_STRERROR */
588 
589 
590 
591 /*************************************************
592 *            Read one line of input              *
593 *************************************************/
594 
595 /* Normally, input is read using fread() into a large buffer, so many lines may
596 be read at once. However, doing this for tty input means that no output appears
597 until a lot of input has been typed. Instead, tty input is handled line by
598 line. We cannot use fgets() for this, because it does not stop at a binary
599 zero, and therefore there is no way of telling how many characters it has read,
600 because there may be binary zeros embedded in the data.
601 
602 Arguments:
603   buffer     the buffer to read into
604   length     the maximum number of characters to read
605   f          the file
606 
607 Returns:     the number of characters read, zero at end of file
608 */
609 
610 static int
read_one_line(char * buffer,int length,FILE * f)611 read_one_line(char *buffer, int length, FILE *f)
612 {
613 int c;
614 int yield = 0;
615 while ((c = fgetc(f)) != EOF)
616   {
617   buffer[yield++] = c;
618   if (c == '\n' || yield >= length) break;
619   }
620 return yield;
621 }
622 
623 
624 
625 /*************************************************
626 *             Find end of line                   *
627 *************************************************/
628 
629 /* The length of the endline sequence that is found is set via lenptr. This may
630 be zero at the very end of the file if there is no line-ending sequence there.
631 
632 Arguments:
633   p         current position in line
634   endptr    end of available data
635   lenptr    where to put the length of the eol sequence
636 
637 Returns:    pointer to the last byte of the line, including the newline byte(s)
638 */
639 
640 static char *
end_of_line(char * p,char * endptr,int * lenptr)641 end_of_line(char *p, char *endptr, int *lenptr)
642 {
643 switch(endlinetype)
644   {
645   default:      /* Just in case */
646   case EL_LF:
647   while (p < endptr && *p != '\n') p++;
648   if (p < endptr)
649     {
650     *lenptr = 1;
651     return p + 1;
652     }
653   *lenptr = 0;
654   return endptr;
655 
656   case EL_CR:
657   while (p < endptr && *p != '\r') p++;
658   if (p < endptr)
659     {
660     *lenptr = 1;
661     return p + 1;
662     }
663   *lenptr = 0;
664   return endptr;
665 
666   case EL_CRLF:
667   for (;;)
668     {
669     while (p < endptr && *p != '\r') p++;
670     if (++p >= endptr)
671       {
672       *lenptr = 0;
673       return endptr;
674       }
675     if (*p == '\n')
676       {
677       *lenptr = 2;
678       return p + 1;
679       }
680     }
681   break;
682 
683   case EL_ANYCRLF:
684   while (p < endptr)
685     {
686     int extra = 0;
687     register int c = *((unsigned char *)p);
688 
689     if (utf8 && c >= 0xc0)
690       {
691       int gcii, gcss;
692       extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
693       gcss = 6*extra;
694       c = (c & utf8_table3[extra]) << gcss;
695       for (gcii = 1; gcii <= extra; gcii++)
696         {
697         gcss -= 6;
698         c |= (p[gcii] & 0x3f) << gcss;
699         }
700       }
701 
702     p += 1 + extra;
703 
704     switch (c)
705       {
706       case 0x0a:    /* LF */
707       *lenptr = 1;
708       return p;
709 
710       case 0x0d:    /* CR */
711       if (p < endptr && *p == 0x0a)
712         {
713         *lenptr = 2;
714         p++;
715         }
716       else *lenptr = 1;
717       return p;
718 
719       default:
720       break;
721       }
722     }   /* End of loop for ANYCRLF case */
723 
724   *lenptr = 0;  /* Must have hit the end */
725   return endptr;
726 
727   case EL_ANY:
728   while (p < endptr)
729     {
730     int extra = 0;
731     register int c = *((unsigned char *)p);
732 
733     if (utf8 && c >= 0xc0)
734       {
735       int gcii, gcss;
736       extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
737       gcss = 6*extra;
738       c = (c & utf8_table3[extra]) << gcss;
739       for (gcii = 1; gcii <= extra; gcii++)
740         {
741         gcss -= 6;
742         c |= (p[gcii] & 0x3f) << gcss;
743         }
744       }
745 
746     p += 1 + extra;
747 
748     switch (c)
749       {
750       case 0x0a:    /* LF */
751       case 0x0b:    /* VT */
752       case 0x0c:    /* FF */
753       *lenptr = 1;
754       return p;
755 
756       case 0x0d:    /* CR */
757       if (p < endptr && *p == 0x0a)
758         {
759         *lenptr = 2;
760         p++;
761         }
762       else *lenptr = 1;
763       return p;
764 
765       case 0x85:    /* NEL */
766       *lenptr = utf8? 2 : 1;
767       return p;
768 
769       case 0x2028:  /* LS */
770       case 0x2029:  /* PS */
771       *lenptr = 3;
772       return p;
773 
774       default:
775       break;
776       }
777     }   /* End of loop for ANY case */
778 
779   *lenptr = 0;  /* Must have hit the end */
780   return endptr;
781   }     /* End of overall switch */
782 }
783 
784 
785 
786 /*************************************************
787 *         Find start of previous line            *
788 *************************************************/
789 
790 /* This is called when looking back for before lines to print.
791 
792 Arguments:
793   p         start of the subsequent line
794   startptr  start of available data
795 
796 Returns:    pointer to the start of the previous line
797 */
798 
799 static char *
previous_line(char * p,char * startptr)800 previous_line(char *p, char *startptr)
801 {
802 switch(endlinetype)
803   {
804   default:      /* Just in case */
805   case EL_LF:
806   p--;
807   while (p > startptr && p[-1] != '\n') p--;
808   return p;
809 
810   case EL_CR:
811   p--;
812   while (p > startptr && p[-1] != '\n') p--;
813   return p;
814 
815   case EL_CRLF:
816   for (;;)
817     {
818     p -= 2;
819     while (p > startptr && p[-1] != '\n') p--;
820     if (p <= startptr + 1 || p[-2] == '\r') return p;
821     }
822   return p;   /* But control should never get here */
823 
824   case EL_ANY:
825   case EL_ANYCRLF:
826   if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
827   if (utf8) while ((*p & 0xc0) == 0x80) p--;
828 
829   while (p > startptr)
830     {
831     register int c;
832     char *pp = p - 1;
833 
834     if (utf8)
835       {
836       int extra = 0;
837       while ((*pp & 0xc0) == 0x80) pp--;
838       c = *((unsigned char *)pp);
839       if (c >= 0xc0)
840         {
841         int gcii, gcss;
842         extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
843         gcss = 6*extra;
844         c = (c & utf8_table3[extra]) << gcss;
845         for (gcii = 1; gcii <= extra; gcii++)
846           {
847           gcss -= 6;
848           c |= (pp[gcii] & 0x3f) << gcss;
849           }
850         }
851       }
852     else c = *((unsigned char *)pp);
853 
854     if (endlinetype == EL_ANYCRLF) switch (c)
855       {
856       case 0x0a:    /* LF */
857       case 0x0d:    /* CR */
858       return p;
859 
860       default:
861       break;
862       }
863 
864     else switch (c)
865       {
866       case 0x0a:    /* LF */
867       case 0x0b:    /* VT */
868       case 0x0c:    /* FF */
869       case 0x0d:    /* CR */
870       case 0x85:    /* NEL */
871       case 0x2028:  /* LS */
872       case 0x2029:  /* PS */
873       return p;
874 
875       default:
876       break;
877       }
878 
879     p = pp;  /* Back one character */
880     }        /* End of loop for ANY case */
881 
882   return startptr;  /* Hit start of data */
883   }     /* End of overall switch */
884 }
885 
886 
887 
888 
889 
890 /*************************************************
891 *       Print the previous "after" lines         *
892 *************************************************/
893 
894 /* This is called if we are about to lose said lines because of buffer filling,
895 and at the end of the file. The data in the line is written using fwrite() so
896 that a binary zero does not terminate it.
897 
898 Arguments:
899   lastmatchnumber   the number of the last matching line, plus one
900   lastmatchrestart  where we restarted after the last match
901   endptr            end of available data
902   printname         filename for printing
903 
904 Returns:            nothing
905 */
906 
do_after_lines(int lastmatchnumber,char * lastmatchrestart,char * endptr,char * printname)907 static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
908   char *endptr, char *printname)
909 {
910 if (after_context > 0 && lastmatchnumber > 0)
911   {
912   int count = 0;
913   while (lastmatchrestart < endptr && count++ < after_context)
914     {
915     int ellength;
916     char *pp = lastmatchrestart;
917     if (printname != NULL) fprintf(stdout, "%s-", printname);
918     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
919     pp = end_of_line(pp, endptr, &ellength);
920     FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
921     lastmatchrestart = pp;
922     }
923   hyphenpending = TRUE;
924   }
925 }
926 
927 
928 
929 /*************************************************
930 *   Apply patterns to subject till one matches   *
931 *************************************************/
932 
933 /* This function is called to run through all patterns, looking for a match. It
934 is used multiple times for the same subject when colouring is enabled, in order
935 to find all possible matches.
936 
937 Arguments:
938   matchptr    the start of the subject
939   length      the length of the subject to match
940   offsets     the offets vector to fill in
941   mrc         address of where to put the result of pcre_exec()
942 
943 Returns:      TRUE if there was a match
944               FALSE if there was no match
945               invert if there was a non-fatal error
946 */
947 
948 static BOOL
match_patterns(char * matchptr,size_t length,int * offsets,int * mrc)949 match_patterns(char *matchptr, size_t length, int *offsets, int *mrc)
950 {
951 int i;
952 size_t slen = length;
953 const char *msg = "this text:\n\n";
954 if (slen > 200)
955   {
956   slen = 200;
957   msg = "text that starts:\n\n";
958   }
959 for (i = 0; i < pattern_count; i++)
960   {
961   *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length, 0,
962     PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
963   if (*mrc >= 0) return TRUE;
964   if (*mrc == PCRE_ERROR_NOMATCH) continue;
965   fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
966   if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
967   fprintf(stderr, "%s", msg);
968   FWRITE(matchptr, 1, slen, stderr);   /* In case binary zero included */
969   fprintf(stderr, "\n\n");
970   if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT)
971     resource_error = TRUE;
972   if (error_count++ > 20)
973     {
974     fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
975     pcregrep_exit(2);
976     }
977   return invert;    /* No more matching; don't show the line again */
978   }
979 
980 return FALSE;  /* No match, no errors */
981 }
982 
983 
984 
985 /*************************************************
986 *            Grep an individual file             *
987 *************************************************/
988 
989 /* This is called from grep_or_recurse() below. It uses a buffer that is three
990 times the value of MBUFTHIRD. The matching point is never allowed to stray into
991 the top third of the buffer, thus keeping more of the file available for
992 context printing or for multiline scanning. For large files, the pointer will
993 be in the middle third most of the time, so the bottom third is available for
994 "before" context printing.
995 
996 Arguments:
997   handle       the fopened FILE stream for a normal file
998                the gzFile pointer when reading is via libz
999                the BZFILE pointer when reading is via libbz2
1000   frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1001   printname    the file name if it is to be printed for each match
1002                or NULL if the file name is not to be printed
1003                it cannot be NULL if filenames[_nomatch]_only is set
1004 
1005 Returns:       0 if there was at least one match
1006                1 otherwise (no matches)
1007                2 if there is a read error on a .bz2 file
1008 */
1009 
1010 static int
pcregrep(void * handle,int frtype,char * printname)1011 pcregrep(void *handle, int frtype, char *printname)
1012 {
1013 int rc = 1;
1014 int linenumber = 1;
1015 int lastmatchnumber = 0;
1016 int count = 0;
1017 int filepos = 0;
1018 int offsets[OFFSET_SIZE];
1019 char *lastmatchrestart = NULL;
1020 char buffer[3*MBUFTHIRD];
1021 char *ptr = buffer;
1022 char *endptr;
1023 size_t bufflength;
1024 BOOL endhyphenpending = FALSE;
1025 BOOL input_line_buffered = line_buffered;
1026 FILE *in = NULL;                    /* Ensure initialized */
1027 
1028 #ifdef SUPPORT_LIBZ
1029 gzFile ingz = NULL;
1030 #endif
1031 
1032 #ifdef SUPPORT_LIBBZ2
1033 BZFILE *inbz2 = NULL;
1034 #endif
1035 
1036 
1037 /* Do the first read into the start of the buffer and set up the pointer to end
1038 of what we have. In the case of libz, a non-zipped .gz file will be read as a
1039 plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1040 fail. */
1041 
1042 #ifdef SUPPORT_LIBZ
1043 if (frtype == FR_LIBZ)
1044   {
1045   ingz = (gzFile)handle;
1046   bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
1047   }
1048 else
1049 #endif
1050 
1051 #ifdef SUPPORT_LIBBZ2
1052 if (frtype == FR_LIBBZ2)
1053   {
1054   inbz2 = (BZFILE *)handle;
1055   bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
1056   if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
1057   }                                    /* without the cast it is unsigned. */
1058 else
1059 #endif
1060 
1061   {
1062   in = (FILE *)handle;
1063   if (is_file_tty(in)) input_line_buffered = TRUE;
1064   bufflength = input_line_buffered?
1065     read_one_line(buffer, 3*MBUFTHIRD, in) :
1066     fread(buffer, 1, 3*MBUFTHIRD, in);
1067   }
1068 
1069 endptr = buffer + bufflength;
1070 
1071 /* Loop while the current pointer is not at the end of the file. For large
1072 files, endptr will be at the end of the buffer when we are in the middle of the
1073 file, but ptr will never get there, because as soon as it gets over 2/3 of the
1074 way, the buffer is shifted left and re-filled. */
1075 
1076 while (ptr < endptr)
1077   {
1078   int endlinelength;
1079   int mrc = 0;
1080   BOOL match;
1081   char *matchptr = ptr;
1082   char *t = ptr;
1083   size_t length, linelength;
1084 
1085   /* At this point, ptr is at the start of a line. We need to find the length
1086   of the subject string to pass to pcre_exec(). In multiline mode, it is the
1087   length remainder of the data in the buffer. Otherwise, it is the length of
1088   the next line, excluding the terminating newline. After matching, we always
1089   advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1090   option is used for compiling, so that any match is constrained to be in the
1091   first line. */
1092 
1093   t = end_of_line(t, endptr, &endlinelength);
1094   linelength = t - ptr - endlinelength;
1095   length = multiline? (size_t)(endptr - ptr) : linelength;
1096 
1097   /* Extra processing for Jeffrey Friedl's debugging. */
1098 
1099 #ifdef JFRIEDL_DEBUG
1100   if (jfriedl_XT || jfriedl_XR)
1101   {
1102       #include <sys/time.h>
1103       #include <time.h>
1104       struct timeval start_time, end_time;
1105       struct timezone dummy;
1106       int i;
1107 
1108       if (jfriedl_XT)
1109       {
1110           unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1111           const char *orig = ptr;
1112           ptr = malloc(newlen + 1);
1113           if (!ptr) {
1114                   printf("out of memory");
1115                   pcregrep_exit(2);
1116           }
1117           endptr = ptr;
1118           strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1119           for (i = 0; i < jfriedl_XT; i++) {
1120                   strncpy(endptr, orig,  length);
1121                   endptr += length;
1122           }
1123           strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1124           length = newlen;
1125       }
1126 
1127       if (gettimeofday(&start_time, &dummy) != 0)
1128               perror("bad gettimeofday");
1129 
1130 
1131       for (i = 0; i < jfriedl_XR; i++)
1132           match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1133               PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1134 
1135       if (gettimeofday(&end_time, &dummy) != 0)
1136               perror("bad gettimeofday");
1137 
1138       double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1139                       -
1140                       (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1141 
1142       printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1143       return 0;
1144   }
1145 #endif
1146 
1147   /* We come back here after a match when the -o option (only_matching) is set,
1148   in order to find any further matches in the same line. */
1149 
1150   ONLY_MATCHING_RESTART:
1151 
1152   /* Run through all the patterns until one matches or there is an error other
1153   than NOMATCH. This code is in a subroutine so that it can be re-used for
1154   finding subsequent matches when colouring matched lines. */
1155 
1156   match = match_patterns(matchptr, length, offsets, &mrc);
1157 
1158   /* If it's a match or a not-match (as required), do what's wanted. */
1159 
1160   if (match != invert)
1161     {
1162     BOOL hyphenprinted = FALSE;
1163 
1164     /* We've failed if we want a file that doesn't have any matches. */
1165 
1166     if (filenames == FN_NOMATCH_ONLY) return 1;
1167 
1168     /* Just count if just counting is wanted. */
1169 
1170     if (count_only) count++;
1171 
1172     /* If all we want is a file name, there is no need to scan any more lines
1173     in the file. */
1174 
1175     else if (filenames == FN_MATCH_ONLY)
1176       {
1177       fprintf(stdout, "%s\n", printname);
1178       return 0;
1179       }
1180 
1181     /* Likewise, if all we want is a yes/no answer. */
1182 
1183     else if (quiet) return 0;
1184 
1185     /* The --only-matching option prints just the substring that matched, or a
1186     captured portion of it, as long as this string is not empty, and the
1187     --file-offsets and --line-offsets options output offsets for the matching
1188     substring (they both force --only-matching = 0). None of these options
1189     prints any context. Afterwards, adjust the start and length, and then jump
1190     back to look for further matches in the same line. If we are in invert
1191     mode, however, nothing is printed and we do not restart - this could still
1192     be useful because the return code is set. */
1193 
1194     else if (only_matching >= 0)
1195       {
1196       if (!invert)
1197         {
1198         if (printname != NULL) fprintf(stdout, "%s:", printname);
1199         if (number) fprintf(stdout, "%d:", linenumber);
1200         if (line_offsets)
1201           fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1202             offsets[1] - offsets[0]);
1203         else if (file_offsets)
1204           fprintf(stdout, "%d,%d\n",
1205             (int)(filepos + matchptr + offsets[0] - ptr),
1206             offsets[1] - offsets[0]);
1207         else if (only_matching < mrc)
1208           {
1209           int plen = offsets[2*only_matching + 1] - offsets[2*only_matching];
1210           if (plen > 0)
1211             {
1212             if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1213             FWRITE(matchptr + offsets[only_matching*2], 1, plen, stdout);
1214             if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1215             fprintf(stdout, "\n");
1216             }
1217           }
1218         else if (printname != NULL || number) fprintf(stdout, "\n");
1219         matchptr += offsets[1];
1220         length -= offsets[1];
1221         match = FALSE;
1222         if (line_buffered) fflush(stdout);
1223         rc = 0;    /* Had some success */
1224         goto ONLY_MATCHING_RESTART;
1225         }
1226       }
1227 
1228     /* This is the default case when none of the above options is set. We print
1229     the matching lines(s), possibly preceded and/or followed by other lines of
1230     context. */
1231 
1232     else
1233       {
1234       /* See if there is a requirement to print some "after" lines from a
1235       previous match. We never print any overlaps. */
1236 
1237       if (after_context > 0 && lastmatchnumber > 0)
1238         {
1239         int ellength;
1240         int linecount = 0;
1241         char *p = lastmatchrestart;
1242 
1243         while (p < ptr && linecount < after_context)
1244           {
1245           p = end_of_line(p, ptr, &ellength);
1246           linecount++;
1247           }
1248 
1249         /* It is important to advance lastmatchrestart during this printing so
1250         that it interacts correctly with any "before" printing below. Print
1251         each line's data using fwrite() in case there are binary zeroes. */
1252 
1253         while (lastmatchrestart < p)
1254           {
1255           char *pp = lastmatchrestart;
1256           if (printname != NULL) fprintf(stdout, "%s-", printname);
1257           if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1258           pp = end_of_line(pp, endptr, &ellength);
1259           FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1260           lastmatchrestart = pp;
1261           }
1262         if (lastmatchrestart != ptr) hyphenpending = TRUE;
1263         }
1264 
1265       /* If there were non-contiguous lines printed above, insert hyphens. */
1266 
1267       if (hyphenpending)
1268         {
1269         fprintf(stdout, "--\n");
1270         hyphenpending = FALSE;
1271         hyphenprinted = TRUE;
1272         }
1273 
1274       /* See if there is a requirement to print some "before" lines for this
1275       match. Again, don't print overlaps. */
1276 
1277       if (before_context > 0)
1278         {
1279         int linecount = 0;
1280         char *p = ptr;
1281 
1282         while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1283                linecount < before_context)
1284           {
1285           linecount++;
1286           p = previous_line(p, buffer);
1287           }
1288 
1289         if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1290           fprintf(stdout, "--\n");
1291 
1292         while (p < ptr)
1293           {
1294           int ellength;
1295           char *pp = p;
1296           if (printname != NULL) fprintf(stdout, "%s-", printname);
1297           if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1298           pp = end_of_line(pp, endptr, &ellength);
1299           FWRITE(p, 1, pp - p, stdout);
1300           p = pp;
1301           }
1302         }
1303 
1304       /* Now print the matching line(s); ensure we set hyphenpending at the end
1305       of the file if any context lines are being output. */
1306 
1307       if (after_context > 0 || before_context > 0)
1308         endhyphenpending = TRUE;
1309 
1310       if (printname != NULL) fprintf(stdout, "%s:", printname);
1311       if (number) fprintf(stdout, "%d:", linenumber);
1312 
1313       /* In multiline mode, we want to print to the end of the line in which
1314       the end of the matched string is found, so we adjust linelength and the
1315       line number appropriately, but only when there actually was a match
1316       (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1317       the match will always be before the first newline sequence. */
1318 
1319       if (multiline & !invert)
1320         {
1321         char *endmatch = ptr + offsets[1];
1322         t = ptr;
1323         while (t < endmatch)
1324           {
1325           t = end_of_line(t, endptr, &endlinelength);
1326           if (t < endmatch) linenumber++; else break;
1327           }
1328         linelength = t - ptr - endlinelength;
1329         }
1330 
1331       /*** NOTE: Use only fwrite() to output the data line, so that binary
1332       zeroes are treated as just another data character. */
1333 
1334       /* This extra option, for Jeffrey Friedl's debugging requirements,
1335       replaces the matched string, or a specific captured string if it exists,
1336       with X. When this happens, colouring is ignored. */
1337 
1338 #ifdef JFRIEDL_DEBUG
1339       if (S_arg >= 0 && S_arg < mrc)
1340         {
1341         int first = S_arg * 2;
1342         int last  = first + 1;
1343         FWRITE(ptr, 1, offsets[first], stdout);
1344         fprintf(stdout, "X");
1345         FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1346         }
1347       else
1348 #endif
1349 
1350       /* We have to split the line(s) up if colouring, and search for further
1351       matches, but not of course if the line is a non-match. */
1352 
1353       if (do_colour && !invert)
1354         {
1355         int plength;
1356         int last_offset = 0;
1357         FWRITE(ptr, 1, offsets[0], stdout);
1358         fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1359         FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1360         fprintf(stdout, "%c[00m", 0x1b);
1361         for (;;)
1362           {
1363           last_offset += offsets[1];
1364           matchptr += offsets[1];
1365           length -= offsets[1];
1366           if (last_offset >= linelength + endlinelength ||
1367               !match_patterns(matchptr, length, offsets, &mrc)) break;
1368           FWRITE(matchptr, 1, offsets[0], stdout);
1369           fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1370           FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1371           fprintf(stdout, "%c[00m", 0x1b);
1372           }
1373 
1374         /* In multiline mode, we may have already printed the complete line
1375         and its line-ending characters (if they matched the pattern), so there
1376         may be no more to print. */
1377 
1378         plength = (linelength + endlinelength) - last_offset;
1379         if (plength > 0)
1380           FWRITE(ptr + last_offset, 1, plength, stdout);
1381         }
1382 
1383       /* Not colouring; no need to search for further matches */
1384 
1385       else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1386       }
1387 
1388     /* End of doing what has to be done for a match. If --line-buffered was
1389     given, flush the output. */
1390 
1391     if (line_buffered) fflush(stdout);
1392     rc = 0;    /* Had some success */
1393 
1394     /* Remember where the last match happened for after_context. We remember
1395     where we are about to restart, and that line's number. */
1396 
1397     lastmatchrestart = ptr + linelength + endlinelength;
1398     lastmatchnumber = linenumber + 1;
1399     }
1400 
1401   /* For a match in multiline inverted mode (which of course did not cause
1402   anything to be printed), we have to move on to the end of the match before
1403   proceeding. */
1404 
1405   if (multiline && invert && match)
1406     {
1407     int ellength;
1408     char *endmatch = ptr + offsets[1];
1409     t = ptr;
1410     while (t < endmatch)
1411       {
1412       t = end_of_line(t, endptr, &ellength);
1413       if (t <= endmatch) linenumber++; else break;
1414       }
1415     endmatch = end_of_line(endmatch, endptr, &ellength);
1416     linelength = endmatch - ptr - ellength;
1417     }
1418 
1419   /* Advance to after the newline and increment the line number. The file
1420   offset to the current line is maintained in filepos. */
1421 
1422   ptr += linelength + endlinelength;
1423   filepos += (int)(linelength + endlinelength);
1424   linenumber++;
1425 
1426   /* If input is line buffered, and the buffer is not yet full, read another
1427   line and add it into the buffer. */
1428 
1429   if (input_line_buffered && bufflength < sizeof(buffer))
1430     {
1431     int add = read_one_line(ptr, sizeof(buffer) - (ptr - buffer), in);
1432     bufflength += add;
1433     endptr += add;
1434     }
1435 
1436   /* If we haven't yet reached the end of the file (the buffer is full), and
1437   the current point is in the top 1/3 of the buffer, slide the buffer down by
1438   1/3 and refill it. Before we do this, if some unprinted "after" lines are
1439   about to be lost, print them. */
1440 
1441   if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1442     {
1443     if (after_context > 0 &&
1444         lastmatchnumber > 0 &&
1445         lastmatchrestart < buffer + MBUFTHIRD)
1446       {
1447       do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1448       lastmatchnumber = 0;
1449       }
1450 
1451     /* Now do the shuffle */
1452 
1453     memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1454     ptr -= MBUFTHIRD;
1455 
1456 #ifdef SUPPORT_LIBZ
1457     if (frtype == FR_LIBZ)
1458       bufflength = 2*MBUFTHIRD +
1459         gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1460     else
1461 #endif
1462 
1463 #ifdef SUPPORT_LIBBZ2
1464     if (frtype == FR_LIBBZ2)
1465       bufflength = 2*MBUFTHIRD +
1466         BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1467     else
1468 #endif
1469 
1470     bufflength = 2*MBUFTHIRD +
1471       (input_line_buffered?
1472        read_one_line(buffer + 2*MBUFTHIRD, MBUFTHIRD, in) :
1473        fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in));
1474     endptr = buffer + bufflength;
1475 
1476     /* Adjust any last match point */
1477 
1478     if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1479     }
1480   }     /* Loop through the whole file */
1481 
1482 /* End of file; print final "after" lines if wanted; do_after_lines sets
1483 hyphenpending if it prints something. */
1484 
1485 if (only_matching < 0 && !count_only)
1486   {
1487   do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1488   hyphenpending |= endhyphenpending;
1489   }
1490 
1491 /* Print the file name if we are looking for those without matches and there
1492 were none. If we found a match, we won't have got this far. */
1493 
1494 if (filenames == FN_NOMATCH_ONLY)
1495   {
1496   fprintf(stdout, "%s\n", printname);
1497   return 0;
1498   }
1499 
1500 /* Print the match count if wanted */
1501 
1502 if (count_only)
1503   {
1504   if (count > 0 || !omit_zero_count)
1505     {
1506     if (printname != NULL && filenames != FN_NONE)
1507       fprintf(stdout, "%s:", printname);
1508     fprintf(stdout, "%d\n", count);
1509     }
1510   }
1511 
1512 return rc;
1513 }
1514 
1515 
1516 
1517 /*************************************************
1518 *     Grep a file or recurse into a directory    *
1519 *************************************************/
1520 
1521 /* Given a path name, if it's a directory, scan all the files if we are
1522 recursing; if it's a file, grep it.
1523 
1524 Arguments:
1525   pathname          the path to investigate
1526   dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
1527   only_one_at_top   TRUE if the path is the only one at toplevel
1528 
1529 Returns:   0 if there was at least one match
1530            1 if there were no matches
1531            2 there was some kind of error
1532 
1533 However, file opening failures are suppressed if "silent" is set.
1534 */
1535 
1536 static int
grep_or_recurse(char * pathname,BOOL dir_recurse,BOOL only_one_at_top)1537 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1538 {
1539 int rc = 1;
1540 int sep;
1541 int frtype;
1542 int pathlen;
1543 void *handle;
1544 FILE *in = NULL;           /* Ensure initialized */
1545 
1546 #ifdef SUPPORT_LIBZ
1547 gzFile ingz = NULL;
1548 #endif
1549 
1550 #ifdef SUPPORT_LIBBZ2
1551 BZFILE *inbz2 = NULL;
1552 #endif
1553 
1554 /* If the file name is "-" we scan stdin */
1555 
1556 if (strcmp(pathname, "-") == 0)
1557   {
1558   return pcregrep(stdin, FR_PLAIN,
1559     (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1560       stdin_name : NULL);
1561   }
1562 
1563 /* If the file is a directory, skip if skipping or if we are recursing, scan
1564 each file and directory within it, subject to any include or exclude patterns
1565 that were set. The scanning code is localized so it can be made
1566 system-specific. */
1567 
1568 if ((sep = isdirectory(pathname)) != 0)
1569   {
1570   if (dee_action == dee_SKIP) return 1;
1571   if (dee_action == dee_RECURSE)
1572     {
1573     char buffer[1024];
1574     char *nextfile;
1575     directory_type *dir = opendirectory(pathname);
1576 
1577     if (dir == NULL)
1578       {
1579       if (!silent)
1580         fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1581           strerror(errno));
1582       return 2;
1583       }
1584 
1585     while ((nextfile = readdirectory(dir)) != NULL)
1586       {
1587       int frc, nflen;
1588       sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1589       nflen = (int)(strlen(nextfile));
1590 
1591       if (isdirectory(buffer))
1592         {
1593         if (exclude_dir_compiled != NULL &&
1594             pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1595           continue;
1596 
1597         if (include_dir_compiled != NULL &&
1598             pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1599           continue;
1600         }
1601       else
1602         {
1603         if (exclude_compiled != NULL &&
1604             pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1605           continue;
1606 
1607         if (include_compiled != NULL &&
1608             pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1609           continue;
1610         }
1611 
1612       frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1613       if (frc > 1) rc = frc;
1614        else if (frc == 0 && rc == 1) rc = 0;
1615       }
1616 
1617     closedirectory(dir);
1618     return rc;
1619     }
1620   }
1621 
1622 /* If the file is not a directory and not a regular file, skip it if that's
1623 been requested. */
1624 
1625 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1626 
1627 /* Control reaches here if we have a regular file, or if we have a directory
1628 and recursion or skipping was not requested, or if we have anything else and
1629 skipping was not requested. The scan proceeds. If this is the first and only
1630 argument at top level, we don't show the file name, unless we are only showing
1631 the file name, or the filename was forced (-H). */
1632 
1633 pathlen = (int)(strlen(pathname));
1634 
1635 /* Open using zlib if it is supported and the file name ends with .gz. */
1636 
1637 #ifdef SUPPORT_LIBZ
1638 if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1639   {
1640   ingz = gzopen(pathname, "rb");
1641   if (ingz == NULL)
1642     {
1643     if (!silent)
1644       fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1645         strerror(errno));
1646     return 2;
1647     }
1648   handle = (void *)ingz;
1649   frtype = FR_LIBZ;
1650   }
1651 else
1652 #endif
1653 
1654 /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1655 
1656 #ifdef SUPPORT_LIBBZ2
1657 if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1658   {
1659   inbz2 = BZ2_bzopen(pathname, "rb");
1660   handle = (void *)inbz2;
1661   frtype = FR_LIBBZ2;
1662   }
1663 else
1664 #endif
1665 
1666 /* Otherwise use plain fopen(). The label is so that we can come back here if
1667 an attempt to read a .bz2 file indicates that it really is a plain file. */
1668 
1669 #ifdef SUPPORT_LIBBZ2
1670 PLAIN_FILE:
1671 #endif
1672   {
1673   in = fopen(pathname, "rb");
1674   handle = (void *)in;
1675   frtype = FR_PLAIN;
1676   }
1677 
1678 /* All the opening methods return errno when they fail. */
1679 
1680 if (handle == NULL)
1681   {
1682   if (!silent)
1683     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1684       strerror(errno));
1685   return 2;
1686   }
1687 
1688 /* Now grep the file */
1689 
1690 rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
1691   (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1692 
1693 /* Close in an appropriate manner. */
1694 
1695 #ifdef SUPPORT_LIBZ
1696 if (frtype == FR_LIBZ)
1697   gzclose(ingz);
1698 else
1699 #endif
1700 
1701 /* If it is a .bz2 file and the result is 2, it means that the first attempt to
1702 read failed. If the error indicates that the file isn't in fact bzipped, try
1703 again as a normal file. */
1704 
1705 #ifdef SUPPORT_LIBBZ2
1706 if (frtype == FR_LIBBZ2)
1707   {
1708   if (rc == 2)
1709     {
1710     int errnum;
1711     const char *err = BZ2_bzerror(inbz2, &errnum);
1712     if (errnum == BZ_DATA_ERROR_MAGIC)
1713       {
1714       BZ2_bzclose(inbz2);
1715       goto PLAIN_FILE;
1716       }
1717     else if (!silent)
1718       fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1719         pathname, err);
1720     }
1721   BZ2_bzclose(inbz2);
1722   }
1723 else
1724 #endif
1725 
1726 /* Normal file close */
1727 
1728 fclose(in);
1729 
1730 /* Pass back the yield from pcregrep(). */
1731 
1732 return rc;
1733 }
1734 
1735 
1736 
1737 
1738 /*************************************************
1739 *                Usage function                  *
1740 *************************************************/
1741 
1742 static int
usage(int rc)1743 usage(int rc)
1744 {
1745 option_item *op;
1746 fprintf(stderr, "Usage: pcregrep [-");
1747 for (op = optionlist; op->one_char != 0; op++)
1748   {
1749   if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1750   }
1751 fprintf(stderr, "] [long options] [pattern] [files]\n");
1752 fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1753   "options.\n");
1754 return rc;
1755 }
1756 
1757 
1758 
1759 
1760 /*************************************************
1761 *                Help function                   *
1762 *************************************************/
1763 
1764 static void
help(void)1765 help(void)
1766 {
1767 option_item *op;
1768 
1769 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1770 printf("Search for PATTERN in each FILE or standard input.\n");
1771 printf("PATTERN must be present if neither -e nor -f is used.\n");
1772 printf("\"-\" can be used as a file name to mean STDIN.\n");
1773 
1774 #ifdef SUPPORT_LIBZ
1775 printf("Files whose names end in .gz are read using zlib.\n");
1776 #endif
1777 
1778 #ifdef SUPPORT_LIBBZ2
1779 printf("Files whose names end in .bz2 are read using bzlib2.\n");
1780 #endif
1781 
1782 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1783 printf("Other files and the standard input are read as plain files.\n\n");
1784 #else
1785 printf("All files are read as plain files, without any interpretation.\n\n");
1786 #endif
1787 
1788 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1789 printf("Options:\n");
1790 
1791 for (op = optionlist; op->one_char != 0; op++)
1792   {
1793   int n;
1794   char s[4];
1795 
1796   /* Two options were accidentally implemented and documented with underscores
1797   instead of hyphens in their names, something that was not noticed for quite a
1798   few releases. When fixing this, I left the underscored versions in the list
1799   in case people were using them. However, we don't want to display them in the
1800   help data. There are no other options that contain underscores, and we do not
1801   expect ever to implement such options. Therefore, just omit any option that
1802   contains an underscore. */
1803 
1804   if (strchr(op->long_name, '_') != NULL) continue;
1805 
1806   if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");
1807   n = 31 - printf("  %s --%s", s, op->long_name);
1808   if (n < 1) n = 1;
1809   printf("%.*s%s\n", n, "                     ", op->help_text);
1810   }
1811 
1812 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1813 printf("trailing white space is removed and blank lines are ignored.\n");
1814 printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1815 
1816 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1817 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1818 }
1819 
1820 
1821 
1822 
1823 /*************************************************
1824 *    Handle a single-letter, no data option      *
1825 *************************************************/
1826 
1827 static int
handle_option(int letter,int options)1828 handle_option(int letter, int options)
1829 {
1830 switch(letter)
1831   {
1832   case N_FOFFSETS: file_offsets = TRUE; break;
1833   case N_HELP: help(); pcregrep_exit(0);
1834   case N_LOFFSETS: line_offsets = number = TRUE; break;
1835   case N_LBUFFER: line_buffered = TRUE; break;
1836   case 'c': count_only = TRUE; break;
1837   case 'F': process_options |= PO_FIXED_STRINGS; break;
1838   case 'H': filenames = FN_FORCE; break;
1839   case 'h': filenames = FN_NONE; break;
1840   case 'i': options |= PCRE_CASELESS; break;
1841   case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
1842   case 'L': filenames = FN_NOMATCH_ONLY; break;
1843   case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1844   case 'n': number = TRUE; break;
1845   case 'o': only_matching = 0; break;
1846   case 'q': quiet = TRUE; break;
1847   case 'r': dee_action = dee_RECURSE; break;
1848   case 's': silent = TRUE; break;
1849   case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1850   case 'v': invert = TRUE; break;
1851   case 'w': process_options |= PO_WORD_MATCH; break;
1852   case 'x': process_options |= PO_LINE_MATCH; break;
1853 
1854   case 'V':
1855   fprintf(stderr, "pcregrep version %s\n", pcre_version());
1856   pcregrep_exit(0);
1857   break;
1858 
1859   default:
1860   fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1861   pcregrep_exit(usage(2));
1862   }
1863 
1864 return options;
1865 }
1866 
1867 
1868 
1869 
1870 /*************************************************
1871 *          Construct printed ordinal             *
1872 *************************************************/
1873 
1874 /* This turns a number into "1st", "3rd", etc. */
1875 
1876 static char *
ordin(int n)1877 ordin(int n)
1878 {
1879 static char buffer[8];
1880 char *p = buffer;
1881 sprintf(p, "%d", n);
1882 while (*p != 0) p++;
1883 switch (n%10)
1884   {
1885   case 1: strcpy(p, "st"); break;
1886   case 2: strcpy(p, "nd"); break;
1887   case 3: strcpy(p, "rd"); break;
1888   default: strcpy(p, "th"); break;
1889   }
1890 return buffer;
1891 }
1892 
1893 
1894 
1895 /*************************************************
1896 *          Compile a single pattern              *
1897 *************************************************/
1898 
1899 /* When the -F option has been used, this is called for each substring.
1900 Otherwise it's called for each supplied pattern.
1901 
1902 Arguments:
1903   pattern        the pattern string
1904   options        the PCRE options
1905   filename       the file name, or NULL for a command-line pattern
1906   count          0 if this is the only command line pattern, or
1907                  number of the command line pattern, or
1908                  linenumber for a pattern from a file
1909 
1910 Returns:         TRUE on success, FALSE after an error
1911 */
1912 
1913 static BOOL
compile_single_pattern(char * pattern,int options,char * filename,int count)1914 compile_single_pattern(char *pattern, int options, char *filename, int count)
1915 {
1916 char buffer[MBUFTHIRD + 16];
1917 const char *error;
1918 int errptr;
1919 
1920 if (pattern_count >= MAX_PATTERN_COUNT)
1921   {
1922   fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1923     (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1924   return FALSE;
1925   }
1926 
1927 sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1928   suffix[process_options]);
1929 pattern_list[pattern_count] =
1930   pcre_compile(buffer, options, &error, &errptr, pcretables);
1931 if (pattern_list[pattern_count] != NULL)
1932   {
1933   pattern_count++;
1934   return TRUE;
1935   }
1936 
1937 /* Handle compile errors */
1938 
1939 errptr -= (int)strlen(prefix[process_options]);
1940 if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1941 
1942 if (filename == NULL)
1943   {
1944   if (count == 0)
1945     fprintf(stderr, "pcregrep: Error in command-line regex "
1946       "at offset %d: %s\n", errptr, error);
1947   else
1948     fprintf(stderr, "pcregrep: Error in %s command-line regex "
1949       "at offset %d: %s\n", ordin(count), errptr, error);
1950   }
1951 else
1952   {
1953   fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1954     "at offset %d: %s\n", count, filename, errptr, error);
1955   }
1956 
1957 return FALSE;
1958 }
1959 
1960 
1961 
1962 /*************************************************
1963 *           Compile one supplied pattern         *
1964 *************************************************/
1965 
1966 /* When the -F option has been used, each string may be a list of strings,
1967 separated by line breaks. They will be matched literally.
1968 
1969 Arguments:
1970   pattern        the pattern string
1971   options        the PCRE options
1972   filename       the file name, or NULL for a command-line pattern
1973   count          0 if this is the only command line pattern, or
1974                  number of the command line pattern, or
1975                  linenumber for a pattern from a file
1976 
1977 Returns:         TRUE on success, FALSE after an error
1978 */
1979 
1980 static BOOL
compile_pattern(char * pattern,int options,char * filename,int count)1981 compile_pattern(char *pattern, int options, char *filename, int count)
1982 {
1983 if ((process_options & PO_FIXED_STRINGS) != 0)
1984   {
1985   char *eop = pattern + strlen(pattern);
1986   char buffer[MBUFTHIRD];
1987   for(;;)
1988     {
1989     int ellength;
1990     char *p = end_of_line(pattern, eop, &ellength);
1991     if (ellength == 0)
1992       return compile_single_pattern(pattern, options, filename, count);
1993     sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1994     pattern = p;
1995     if (!compile_single_pattern(buffer, options, filename, count))
1996       return FALSE;
1997     }
1998   }
1999 else return compile_single_pattern(pattern, options, filename, count);
2000 }
2001 
2002 
2003 
2004 /*************************************************
2005 *                Main program                    *
2006 *************************************************/
2007 
2008 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
2009 
2010 int
main(int argc,char ** argv)2011 main(int argc, char **argv)
2012 {
2013 int i, j;
2014 int rc = 1;
2015 int pcre_options = 0;
2016 int cmd_pattern_count = 0;
2017 int hint_count = 0;
2018 int errptr;
2019 BOOL only_one_at_top;
2020 char *patterns[MAX_PATTERN_COUNT];
2021 const char *locale_from = "--locale";
2022 const char *error;
2023 
2024 /* Set the default line ending value from the default in the PCRE library;
2025 "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2026 Note that the return values from pcre_config(), though derived from the ASCII
2027 codes, are the same in EBCDIC environments, so we must use the actual values
2028 rather than escapes such as as '\r'. */
2029 
2030 (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2031 switch(i)
2032   {
2033   default:               newline = (char *)"lf"; break;
2034   case 13:               newline = (char *)"cr"; break;
2035   case (13 << 8) | 10:   newline = (char *)"crlf"; break;
2036   case -1:               newline = (char *)"any"; break;
2037   case -2:               newline = (char *)"anycrlf"; break;
2038   }
2039 
2040 /* Process the options */
2041 
2042 for (i = 1; i < argc; i++)
2043   {
2044   option_item *op = NULL;
2045   char *option_data = (char *)"";    /* default to keep compiler happy */
2046   BOOL longop;
2047   BOOL longopwasequals = FALSE;
2048 
2049   if (argv[i][0] != '-') break;
2050 
2051   /* If we hit an argument that is just "-", it may be a reference to STDIN,
2052   but only if we have previously had -e or -f to define the patterns. */
2053 
2054   if (argv[i][1] == 0)
2055     {
2056     if (pattern_filename != NULL || pattern_count > 0) break;
2057       else pcregrep_exit(usage(2));
2058     }
2059 
2060   /* Handle a long name option, or -- to terminate the options */
2061 
2062   if (argv[i][1] == '-')
2063     {
2064     char *arg = argv[i] + 2;
2065     char *argequals = strchr(arg, '=');
2066 
2067     if (*arg == 0)    /* -- terminates options */
2068       {
2069       i++;
2070       break;                /* out of the options-handling loop */
2071       }
2072 
2073     longop = TRUE;
2074 
2075     /* Some long options have data that follows after =, for example file=name.
2076     Some options have variations in the long name spelling: specifically, we
2077     allow "regexp" because GNU grep allows it, though I personally go along
2078     with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2079     These options are entered in the table as "regex(p)". Options can be in
2080     both these categories. */
2081 
2082     for (op = optionlist; op->one_char != 0; op++)
2083       {
2084       char *opbra = strchr(op->long_name, '(');
2085       char *equals = strchr(op->long_name, '=');
2086 
2087       /* Handle options with only one spelling of the name */
2088 
2089       if (opbra == NULL)     /* Does not contain '(' */
2090         {
2091         if (equals == NULL)  /* Not thing=data case */
2092           {
2093           if (strcmp(arg, op->long_name) == 0) break;
2094           }
2095         else                 /* Special case xxx=data */
2096           {
2097           int oplen = (int)(equals - op->long_name);
2098           int arglen = (argequals == NULL)?
2099             (int)strlen(arg) : (int)(argequals - arg);
2100           if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2101             {
2102             option_data = arg + arglen;
2103             if (*option_data == '=')
2104               {
2105               option_data++;
2106               longopwasequals = TRUE;
2107               }
2108             break;
2109             }
2110           }
2111         }
2112 
2113       /* Handle options with an alternate spelling of the name */
2114 
2115       else
2116         {
2117         char buff1[24];
2118         char buff2[24];
2119 
2120         int baselen = (int)(opbra - op->long_name);
2121         int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2122         int arglen = (argequals == NULL || equals == NULL)?
2123           (int)strlen(arg) : (int)(argequals - arg);
2124 
2125         sprintf(buff1, "%.*s", baselen, op->long_name);
2126         sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2127 
2128         if (strncmp(arg, buff1, arglen) == 0 ||
2129            strncmp(arg, buff2, arglen) == 0)
2130           {
2131           if (equals != NULL && argequals != NULL)
2132             {
2133             option_data = argequals;
2134             if (*option_data == '=')
2135               {
2136               option_data++;
2137               longopwasequals = TRUE;
2138               }
2139             }
2140           break;
2141           }
2142         }
2143       }
2144 
2145     if (op->one_char == 0)
2146       {
2147       fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2148       pcregrep_exit(usage(2));
2149       }
2150     }
2151 
2152   /* Jeffrey Friedl's debugging harness uses these additional options which
2153   are not in the right form for putting in the option table because they use
2154   only one hyphen, yet are more than one character long. By putting them
2155   separately here, they will not get displayed as part of the help() output,
2156   but I don't think Jeffrey will care about that. */
2157 
2158 #ifdef JFRIEDL_DEBUG
2159   else if (strcmp(argv[i], "-pre") == 0) {
2160           jfriedl_prefix = argv[++i];
2161           continue;
2162   } else if (strcmp(argv[i], "-post") == 0) {
2163           jfriedl_postfix = argv[++i];
2164           continue;
2165   } else if (strcmp(argv[i], "-XT") == 0) {
2166           sscanf(argv[++i], "%d", &jfriedl_XT);
2167           continue;
2168   } else if (strcmp(argv[i], "-XR") == 0) {
2169           sscanf(argv[++i], "%d", &jfriedl_XR);
2170           continue;
2171   }
2172 #endif
2173 
2174 
2175   /* One-char options; many that have no data may be in a single argument; we
2176   continue till we hit the last one or one that needs data. */
2177 
2178   else
2179     {
2180     char *s = argv[i] + 1;
2181     longop = FALSE;
2182     while (*s != 0)
2183       {
2184       for (op = optionlist; op->one_char != 0; op++)
2185         {
2186         if (*s == op->one_char) break;
2187         }
2188       if (op->one_char == 0)
2189         {
2190         fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2191           *s, argv[i]);
2192         pcregrep_exit(usage(2));
2193         }
2194 
2195       /* Check for a single-character option that has data: OP_OP_NUMBER
2196       is used for one that either has a numerical number or defaults, i.e. the
2197       data is optional. If a digit follows, there is data; if not, carry on
2198       with other single-character options in the same string. */
2199 
2200       option_data = s+1;
2201       if (op->type == OP_OP_NUMBER)
2202         {
2203         if (isdigit((unsigned char)s[1])) break;
2204         }
2205       else   /* Check for end or a dataless option */
2206         {
2207         if (op->type != OP_NODATA || s[1] == 0) break;
2208         }
2209 
2210       /* Handle a single-character option with no data, then loop for the
2211       next character in the string. */
2212 
2213       pcre_options = handle_option(*s++, pcre_options);
2214       }
2215     }
2216 
2217   /* At this point we should have op pointing to a matched option. If the type
2218   is NO_DATA, it means that there is no data, and the option might set
2219   something in the PCRE options. */
2220 
2221   if (op->type == OP_NODATA)
2222     {
2223     pcre_options = handle_option(op->one_char, pcre_options);
2224     continue;
2225     }
2226 
2227   /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2228   either has a value or defaults to something. It cannot have data in a
2229   separate item. At the moment, the only such options are "colo(u)r",
2230   "only-matching", and Jeffrey Friedl's special -S debugging option. */
2231 
2232   if (*option_data == 0 &&
2233       (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
2234     {
2235     switch (op->one_char)
2236       {
2237       case N_COLOUR:
2238       colour_option = (char *)"auto";
2239       break;
2240 
2241       case 'o':
2242       only_matching = 0;
2243       break;
2244 
2245 #ifdef JFRIEDL_DEBUG
2246       case 'S':
2247       S_arg = 0;
2248       break;
2249 #endif
2250       }
2251     continue;
2252     }
2253 
2254   /* Otherwise, find the data string for the option. */
2255 
2256   if (*option_data == 0)
2257     {
2258     if (i >= argc - 1 || longopwasequals)
2259       {
2260       fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2261       pcregrep_exit(usage(2));
2262       }
2263     option_data = argv[++i];
2264     }
2265 
2266   /* If the option type is OP_PATLIST, it's the -e option, which can be called
2267   multiple times to create a list of patterns. */
2268 
2269   if (op->type == OP_PATLIST)
2270     {
2271     if (cmd_pattern_count >= MAX_PATTERN_COUNT)
2272       {
2273       fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2274         MAX_PATTERN_COUNT);
2275       return 2;
2276       }
2277     patterns[cmd_pattern_count++] = option_data;
2278     }
2279 
2280   /* Otherwise, deal with single string or numeric data values. */
2281 
2282   else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
2283            op->type != OP_OP_NUMBER)
2284     {
2285     *((char **)op->dataptr) = option_data;
2286     }
2287 
2288   /* Avoid the use of strtoul() because SunOS4 doesn't have it. This is used
2289   only for unpicking arguments, so just keep it simple. */
2290 
2291   else
2292     {
2293     unsigned long int n = 0;
2294     char *endptr = option_data;
2295     while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
2296     while (isdigit((unsigned char)(*endptr)))
2297       n = n * 10 + (int)(*endptr++ - '0');
2298     if (*endptr != 0)
2299       {
2300       if (longop)
2301         {
2302         char *equals = strchr(op->long_name, '=');
2303         int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2304           (int)(equals - op->long_name);
2305         fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2306           option_data, nlen, op->long_name);
2307         }
2308       else
2309         fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2310           option_data, op->one_char);
2311       pcregrep_exit(usage(2));
2312       }
2313     if (op->type == OP_LONGNUMBER)
2314         *((unsigned long int *)op->dataptr) = n;
2315     else
2316         *((int *)op->dataptr) = n;
2317     }
2318   }
2319 
2320 /* Options have been decoded. If -C was used, its value is used as a default
2321 for -A and -B. */
2322 
2323 if (both_context > 0)
2324   {
2325   if (after_context == 0) after_context = both_context;
2326   if (before_context == 0) before_context = both_context;
2327   }
2328 
2329 /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2330 However, the latter two set only_matching. */
2331 
2332 if ((only_matching >= 0 && (file_offsets || line_offsets)) ||
2333     (file_offsets && line_offsets))
2334   {
2335   fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2336     "and/or --line-offsets\n");
2337   pcregrep_exit(usage(2));
2338   }
2339 
2340 if (file_offsets || line_offsets) only_matching = 0;
2341 
2342 /* If a locale has not been provided as an option, see if the LC_CTYPE or
2343 LC_ALL environment variable is set, and if so, use it. */
2344 
2345 if (locale == NULL)
2346   {
2347   locale = getenv("LC_ALL");
2348   locale_from = "LCC_ALL";
2349   }
2350 
2351 if (locale == NULL)
2352   {
2353   locale = getenv("LC_CTYPE");
2354   locale_from = "LC_CTYPE";
2355   }
2356 
2357 /* If a locale has been provided, set it, and generate the tables the PCRE
2358 needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2359 
2360 if (locale != NULL)
2361   {
2362   if (setlocale(LC_CTYPE, locale) == NULL)
2363     {
2364     fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2365       locale, locale_from);
2366     return 2;
2367     }
2368   pcretables = pcre_maketables();
2369   }
2370 
2371 /* Sort out colouring */
2372 
2373 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2374   {
2375   if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2376   else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2377   else
2378     {
2379     fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2380       colour_option);
2381     return 2;
2382     }
2383   if (do_colour)
2384     {
2385     char *cs = getenv("PCREGREP_COLOUR");
2386     if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2387     if (cs != NULL) colour_string = cs;
2388     }
2389   }
2390 
2391 /* Interpret the newline type; the default settings are Unix-like. */
2392 
2393 if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2394   {
2395   pcre_options |= PCRE_NEWLINE_CR;
2396   endlinetype = EL_CR;
2397   }
2398 else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2399   {
2400   pcre_options |= PCRE_NEWLINE_LF;
2401   endlinetype = EL_LF;
2402   }
2403 else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2404   {
2405   pcre_options |= PCRE_NEWLINE_CRLF;
2406   endlinetype = EL_CRLF;
2407   }
2408 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2409   {
2410   pcre_options |= PCRE_NEWLINE_ANY;
2411   endlinetype = EL_ANY;
2412   }
2413 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2414   {
2415   pcre_options |= PCRE_NEWLINE_ANYCRLF;
2416   endlinetype = EL_ANYCRLF;
2417   }
2418 else
2419   {
2420   fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2421   return 2;
2422   }
2423 
2424 /* Interpret the text values for -d and -D */
2425 
2426 if (dee_option != NULL)
2427   {
2428   if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2429   else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2430   else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2431   else
2432     {
2433     fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2434     return 2;
2435     }
2436   }
2437 
2438 if (DEE_option != NULL)
2439   {
2440   if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2441   else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2442   else
2443     {
2444     fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2445     return 2;
2446     }
2447   }
2448 
2449 /* Check the values for Jeffrey Friedl's debugging options. */
2450 
2451 #ifdef JFRIEDL_DEBUG
2452 if (S_arg > 9)
2453   {
2454   fprintf(stderr, "pcregrep: bad value for -S option\n");
2455   return 2;
2456   }
2457 if (jfriedl_XT != 0 || jfriedl_XR != 0)
2458   {
2459   if (jfriedl_XT == 0) jfriedl_XT = 1;
2460   if (jfriedl_XR == 0) jfriedl_XR = 1;
2461   }
2462 #endif
2463 
2464 /* Get memory to store the pattern and hints lists. */
2465 
2466 pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2467 hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2468 
2469 if (pattern_list == NULL || hints_list == NULL)
2470   {
2471   fprintf(stderr, "pcregrep: malloc failed\n");
2472   goto EXIT2;
2473   }
2474 
2475 /* If no patterns were provided by -e, and there is no file provided by -f,
2476 the first argument is the one and only pattern, and it must exist. */
2477 
2478 if (cmd_pattern_count == 0 && pattern_filename == NULL)
2479   {
2480   if (i >= argc) return usage(2);
2481   patterns[cmd_pattern_count++] = argv[i++];
2482   }
2483 
2484 /* Compile the patterns that were provided on the command line, either by
2485 multiple uses of -e or as a single unkeyed pattern. */
2486 
2487 for (j = 0; j < cmd_pattern_count; j++)
2488   {
2489   if (!compile_pattern(patterns[j], pcre_options, NULL,
2490        (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2491     goto EXIT2;
2492   }
2493 
2494 /* Compile the regular expressions that are provided in a file. */
2495 
2496 if (pattern_filename != NULL)
2497   {
2498   int linenumber = 0;
2499   FILE *f;
2500   char *filename;
2501   char buffer[MBUFTHIRD];
2502 
2503   if (strcmp(pattern_filename, "-") == 0)
2504     {
2505     f = stdin;
2506     filename = stdin_name;
2507     }
2508   else
2509     {
2510     f = fopen(pattern_filename, "r");
2511     if (f == NULL)
2512       {
2513       fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2514         strerror(errno));
2515       goto EXIT2;
2516       }
2517     filename = pattern_filename;
2518     }
2519 
2520   while (fgets(buffer, MBUFTHIRD, f) != NULL)
2521     {
2522     char *s = buffer + (int)strlen(buffer);
2523     while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2524     *s = 0;
2525     linenumber++;
2526     if (buffer[0] == 0) continue;   /* Skip blank lines */
2527     if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2528       goto EXIT2;
2529     }
2530 
2531   if (f != stdin) fclose(f);
2532   }
2533 
2534 /* Study the regular expressions, as we will be running them many times */
2535 
2536 for (j = 0; j < pattern_count; j++)
2537   {
2538   hints_list[j] = pcre_study(pattern_list[j], 0, &error);
2539   if (error != NULL)
2540     {
2541     char s[16];
2542     if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2543     fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2544     goto EXIT2;
2545     }
2546   hint_count++;
2547   }
2548 
2549 /* If --match-limit or --recursion-limit was set, put the value(s) into the
2550 pcre_extra block for each pattern. */
2551 
2552 if (match_limit > 0 || match_limit_recursion > 0)
2553   {
2554   for (j = 0; j < pattern_count; j++)
2555     {
2556     if (hints_list[j] == NULL)
2557       {
2558       hints_list[j] = malloc(sizeof(pcre_extra));
2559       if (hints_list[j] == NULL)
2560         {
2561         fprintf(stderr, "pcregrep: malloc failed\n");
2562         pcregrep_exit(2);
2563         }
2564       }
2565     if (match_limit > 0)
2566       {
2567       hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT;
2568       hints_list[j]->match_limit = match_limit;
2569       }
2570     if (match_limit_recursion > 0)
2571       {
2572       hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2573       hints_list[j]->match_limit_recursion = match_limit_recursion;
2574       }
2575     }
2576   }
2577 
2578 /* If there are include or exclude patterns, compile them. */
2579 
2580 if (exclude_pattern != NULL)
2581   {
2582   exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2583     pcretables);
2584   if (exclude_compiled == NULL)
2585     {
2586     fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2587       errptr, error);
2588     goto EXIT2;
2589     }
2590   }
2591 
2592 if (include_pattern != NULL)
2593   {
2594   include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2595     pcretables);
2596   if (include_compiled == NULL)
2597     {
2598     fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2599       errptr, error);
2600     goto EXIT2;
2601     }
2602   }
2603 
2604 if (exclude_dir_pattern != NULL)
2605   {
2606   exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2607     pcretables);
2608   if (exclude_dir_compiled == NULL)
2609     {
2610     fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2611       errptr, error);
2612     goto EXIT2;
2613     }
2614   }
2615 
2616 if (include_dir_pattern != NULL)
2617   {
2618   include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2619     pcretables);
2620   if (include_dir_compiled == NULL)
2621     {
2622     fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2623       errptr, error);
2624     goto EXIT2;
2625     }
2626   }
2627 
2628 /* If there are no further arguments, do the business on stdin and exit. */
2629 
2630 if (i >= argc)
2631   {
2632   rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
2633   goto EXIT;
2634   }
2635 
2636 /* Otherwise, work through the remaining arguments as files or directories.
2637 Pass in the fact that there is only one argument at top level - this suppresses
2638 the file name if the argument is not a directory and filenames are not
2639 otherwise forced. */
2640 
2641 only_one_at_top = i == argc - 1;   /* Catch initial value of i */
2642 
2643 for (; i < argc; i++)
2644   {
2645   int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2646     only_one_at_top);
2647   if (frc > 1) rc = frc;
2648     else if (frc == 0 && rc == 1) rc = 0;
2649   }
2650 
2651 EXIT:
2652 if (pattern_list != NULL)
2653   {
2654   for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2655   free(pattern_list);
2656   }
2657 if (hints_list != NULL)
2658   {
2659   for (i = 0; i < hint_count; i++)
2660     {
2661     if (hints_list[i] != NULL) free(hints_list[i]);
2662     }
2663   free(hints_list);
2664   }
2665 pcregrep_exit(rc);
2666 
2667 EXIT2:
2668 rc = 2;
2669 goto EXIT;
2670 }
2671 
2672 /* End of pcregrep */
2673