• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*************************************************
2 *               pcregrep program                 *
3 *************************************************/
4 
5 /* This is a grep program that uses the PCRE regular expression library to do
6 its pattern matching. On Unix-like, Windows, and native z/OS systems it can
7 recurse into directories, and in z/OS it can handle PDS files.
8 
9 Note that for native z/OS, in addition to defining the NATIVE_ZOS macro, an
10 additional header is required. That header is not included in the main PCRE
11 distribution because other apparatus is needed to compile pcregrep for z/OS.
12 The header can be found in the special z/OS distribution, which is available
13 from www.zaconsultants.net or from www.cbttape.org.
14 
15            Copyright (c) 1997-2014 University of Cambridge
16 
17 -----------------------------------------------------------------------------
18 Redistribution and use in source and binary forms, with or without
19 modification, are permitted provided that the following conditions are met:
20 
21     * Redistributions of source code must retain the above copyright notice,
22       this list of conditions and the following disclaimer.
23 
24     * Redistributions in binary form must reproduce the above copyright
25       notice, this list of conditions and the following disclaimer in the
26       documentation and/or other materials provided with the distribution.
27 
28     * Neither the name of the University of Cambridge nor the names of its
29       contributors may be used to endorse or promote products derived from
30       this software without specific prior written permission.
31 
32 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
33 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
34 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
35 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
36 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
37 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
38 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
39 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
40 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
41 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
42 POSSIBILITY OF SUCH DAMAGE.
43 -----------------------------------------------------------------------------
44 */
45 
46 #ifdef HAVE_CONFIG_H
47 #include "config.h"
48 #endif
49 
50 #include <ctype.h>
51 #include <locale.h>
52 #include <stdio.h>
53 #include <string.h>
54 #include <stdlib.h>
55 #include <errno.h>
56 
57 #include <sys/types.h>
58 #include <sys/stat.h>
59 
60 #ifdef HAVE_UNISTD_H
61 #include <unistd.h>
62 #endif
63 
64 #ifdef SUPPORT_LIBZ
65 #include <zlib.h>
66 #endif
67 
68 #ifdef SUPPORT_LIBBZ2
69 #include <bzlib.h>
70 #endif
71 
72 #include "pcre.h"
73 
74 #define FALSE 0
75 #define TRUE 1
76 
77 typedef int BOOL;
78 
79 #define OFFSET_SIZE 99
80 
81 #if BUFSIZ > 8192
82 #define MAXPATLEN BUFSIZ
83 #else
84 #define MAXPATLEN 8192
85 #endif
86 
87 #define PATBUFSIZE (MAXPATLEN + 10)   /* Allows for prefix+suffix */
88 
89 /* Values for the "filenames" variable, which specifies options for file name
90 output. The order is important; it is assumed that a file name is wanted for
91 all values greater than FN_DEFAULT. */
92 
93 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
94 
95 /* File reading styles */
96 
97 enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
98 
99 /* Actions for the -d and -D options */
100 
101 enum { dee_READ, dee_SKIP, dee_RECURSE };
102 enum { DEE_READ, DEE_SKIP };
103 
104 /* Actions for special processing options (flag bits) */
105 
106 #define PO_WORD_MATCH     0x0001
107 #define PO_LINE_MATCH     0x0002
108 #define PO_FIXED_STRINGS  0x0004
109 
110 /* Line ending types */
111 
112 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
113 
114 /* Binary file options */
115 
116 enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT };
117 
118 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
119 environments), a warning is issued if the value of fwrite() is ignored.
120 Unfortunately, casting to (void) does not suppress the warning. To get round
121 this, we use a macro that compiles a fudge. Oddly, this does not also seem to
122 apply to fprintf(). */
123 
124 #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
125 
126 
127 
128 /*************************************************
129 *               Global variables                 *
130 *************************************************/
131 
132 /* Jeffrey Friedl has some debugging requirements that are not part of the
133 regular code. */
134 
135 #ifdef JFRIEDL_DEBUG
136 static int S_arg = -1;
137 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
138 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
139 static const char *jfriedl_prefix = "";
140 static const char *jfriedl_postfix = "";
141 #endif
142 
143 static int  endlinetype;
144 
145 static char *colour_string = (char *)"1;31";
146 static char *colour_option = NULL;
147 static char *dee_option = NULL;
148 static char *DEE_option = NULL;
149 static char *locale = NULL;
150 static char *main_buffer = NULL;
151 static char *newline = NULL;
152 static char *om_separator = (char *)"";
153 static char *stdin_name = (char *)"(standard input)";
154 
155 static const unsigned char *pcretables = NULL;
156 
157 static int after_context = 0;
158 static int before_context = 0;
159 static int binary_files = BIN_BINARY;
160 static int both_context = 0;
161 static int bufthird = PCREGREP_BUFSIZE;
162 static int bufsize = 3*PCREGREP_BUFSIZE;
163 
164 #if defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
165 static int dee_action = dee_SKIP;
166 #else
167 static int dee_action = dee_READ;
168 #endif
169 
170 static int DEE_action = DEE_READ;
171 static int error_count = 0;
172 static int filenames = FN_DEFAULT;
173 static int pcre_options = 0;
174 static int process_options = 0;
175 
176 #ifdef SUPPORT_PCREGREP_JIT
177 static int study_options = PCRE_STUDY_JIT_COMPILE;
178 #else
179 static int study_options = 0;
180 #endif
181 
182 static unsigned long int match_limit = 0;
183 static unsigned long int match_limit_recursion = 0;
184 
185 static BOOL count_only = FALSE;
186 static BOOL do_colour = FALSE;
187 static BOOL file_offsets = FALSE;
188 static BOOL hyphenpending = FALSE;
189 static BOOL invert = FALSE;
190 static BOOL line_buffered = FALSE;
191 static BOOL line_offsets = FALSE;
192 static BOOL multiline = FALSE;
193 static BOOL number = FALSE;
194 static BOOL omit_zero_count = FALSE;
195 static BOOL resource_error = FALSE;
196 static BOOL quiet = FALSE;
197 static BOOL show_only_matching = FALSE;
198 static BOOL silent = FALSE;
199 static BOOL utf8 = FALSE;
200 
201 /* Structure for list of --only-matching capturing numbers. */
202 
203 typedef struct omstr {
204   struct omstr *next;
205   int groupnum;
206 } omstr;
207 
208 static omstr *only_matching = NULL;
209 static omstr *only_matching_last = NULL;
210 
211 /* Structure for holding the two variables that describe a number chain. */
212 
213 typedef struct omdatastr {
214   omstr **anchor;
215   omstr **lastptr;
216 } omdatastr;
217 
218 static omdatastr only_matching_data = { &only_matching, &only_matching_last };
219 
220 /* Structure for list of file names (for -f and --{in,ex}clude-from) */
221 
222 typedef struct fnstr {
223   struct fnstr *next;
224   char *name;
225 } fnstr;
226 
227 static fnstr *exclude_from = NULL;
228 static fnstr *exclude_from_last = NULL;
229 static fnstr *include_from = NULL;
230 static fnstr *include_from_last = NULL;
231 
232 static fnstr *file_lists = NULL;
233 static fnstr *file_lists_last = NULL;
234 static fnstr *pattern_files = NULL;
235 static fnstr *pattern_files_last = NULL;
236 
237 /* Structure for holding the two variables that describe a file name chain. */
238 
239 typedef struct fndatastr {
240   fnstr **anchor;
241   fnstr **lastptr;
242 } fndatastr;
243 
244 static fndatastr exclude_from_data = { &exclude_from, &exclude_from_last };
245 static fndatastr include_from_data = { &include_from, &include_from_last };
246 static fndatastr file_lists_data = { &file_lists, &file_lists_last };
247 static fndatastr pattern_files_data = { &pattern_files, &pattern_files_last };
248 
249 /* Structure for pattern and its compiled form; used for matching patterns and
250 also for include/exclude patterns. */
251 
252 typedef struct patstr {
253   struct patstr *next;
254   char *string;
255   pcre *compiled;
256   pcre_extra *hint;
257 } patstr;
258 
259 static patstr *patterns = NULL;
260 static patstr *patterns_last = NULL;
261 static patstr *include_patterns = NULL;
262 static patstr *include_patterns_last = NULL;
263 static patstr *exclude_patterns = NULL;
264 static patstr *exclude_patterns_last = NULL;
265 static patstr *include_dir_patterns = NULL;
266 static patstr *include_dir_patterns_last = NULL;
267 static patstr *exclude_dir_patterns = NULL;
268 static patstr *exclude_dir_patterns_last = NULL;
269 
270 /* Structure holding the two variables that describe a pattern chain. A pointer
271 to such structures is used for each appropriate option. */
272 
273 typedef struct patdatastr {
274   patstr **anchor;
275   patstr **lastptr;
276 } patdatastr;
277 
278 static patdatastr match_patdata = { &patterns, &patterns_last };
279 static patdatastr include_patdata = { &include_patterns, &include_patterns_last };
280 static patdatastr exclude_patdata = { &exclude_patterns, &exclude_patterns_last };
281 static patdatastr include_dir_patdata = { &include_dir_patterns, &include_dir_patterns_last };
282 static patdatastr exclude_dir_patdata = { &exclude_dir_patterns, &exclude_dir_patterns_last };
283 
284 static patstr **incexlist[4] = { &include_patterns, &exclude_patterns,
285                                  &include_dir_patterns, &exclude_dir_patterns };
286 
287 static const char *incexname[4] = { "--include", "--exclude",
288                                     "--include-dir", "--exclude-dir" };
289 
290 /* Structure for options and list of them */
291 
292 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
293        OP_OP_NUMBER, OP_OP_NUMBERS, OP_PATLIST, OP_FILELIST, OP_BINFILES };
294 
295 typedef struct option_item {
296   int type;
297   int one_char;
298   void *dataptr;
299   const char *long_name;
300   const char *help_text;
301 } option_item;
302 
303 /* Options without a single-letter equivalent get a negative value. This can be
304 used to identify them. */
305 
306 #define N_COLOUR       (-1)
307 #define N_EXCLUDE      (-2)
308 #define N_EXCLUDE_DIR  (-3)
309 #define N_HELP         (-4)
310 #define N_INCLUDE      (-5)
311 #define N_INCLUDE_DIR  (-6)
312 #define N_LABEL        (-7)
313 #define N_LOCALE       (-8)
314 #define N_NULL         (-9)
315 #define N_LOFFSETS     (-10)
316 #define N_FOFFSETS     (-11)
317 #define N_LBUFFER      (-12)
318 #define N_M_LIMIT      (-13)
319 #define N_M_LIMIT_REC  (-14)
320 #define N_BUFSIZE      (-15)
321 #define N_NOJIT        (-16)
322 #define N_FILE_LIST    (-17)
323 #define N_BINARY_FILES (-18)
324 #define N_EXCLUDE_FROM (-19)
325 #define N_INCLUDE_FROM (-20)
326 #define N_OM_SEPARATOR (-21)
327 
328 static option_item optionlist[] = {
329   { OP_NODATA,     N_NULL,   NULL,              "",              "terminate options" },
330   { OP_NODATA,     N_HELP,   NULL,              "help",          "display this help and exit" },
331   { OP_NUMBER,     'A',      &after_context,    "after-context=number", "set number of following context lines" },
332   { OP_NODATA,     'a',      NULL,              "text",          "treat binary files as text" },
333   { OP_NUMBER,     'B',      &before_context,   "before-context=number", "set number of prior context lines" },
334   { OP_BINFILES,   N_BINARY_FILES, NULL,        "binary-files=word", "set treatment of binary files" },
335   { OP_NUMBER,     N_BUFSIZE,&bufthird,         "buffer-size=number", "set processing buffer size parameter" },
336   { OP_OP_STRING,  N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
337   { OP_OP_STRING,  N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
338   { OP_NUMBER,     'C',      &both_context,     "context=number", "set number of context lines, before & after" },
339   { OP_NODATA,     'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
340   { OP_STRING,     'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
341   { OP_STRING,     'd',      &dee_option,       "directories=action", "how to handle directories" },
342   { OP_PATLIST,    'e',      &match_patdata,    "regex(p)=pattern", "specify pattern (may be used more than once)" },
343   { OP_NODATA,     'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
344   { OP_FILELIST,   'f',      &pattern_files_data, "file=path",   "read patterns from file" },
345   { OP_FILELIST,   N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" },
346   { OP_NODATA,     N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
347   { OP_NODATA,     'H',      NULL,              "with-filename", "force the prefixing filename on output" },
348   { OP_NODATA,     'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
349   { OP_NODATA,     'I',      NULL,              "",              "treat binary files as not matching (ignore)" },
350   { OP_NODATA,     'i',      NULL,              "ignore-case",   "ignore case distinctions" },
351 #ifdef SUPPORT_PCREGREP_JIT
352   { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "do not use just-in-time compiler optimization" },
353 #else
354   { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "ignored: this pcregrep does not support JIT" },
355 #endif
356   { OP_NODATA,     'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
357   { OP_NODATA,     'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
358   { OP_STRING,     N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
359   { OP_NODATA,     N_LBUFFER, NULL,             "line-buffered", "use line buffering" },
360   { OP_NODATA,     N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
361   { OP_STRING,     N_LOCALE, &locale,           "locale=locale", "use the named locale" },
362   { OP_LONGNUMBER, N_M_LIMIT, &match_limit,     "match-limit=number", "set PCRE match limit option" },
363   { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
364   { OP_NODATA,     'M',      NULL,              "multiline",     "run in multiline mode" },
365   { OP_STRING,     'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
366   { OP_NODATA,     'n',      NULL,              "line-number",   "print line number with output lines" },
367   { OP_OP_NUMBERS, 'o',      &only_matching_data, "only-matching=n", "show only the part of the line that matched" },
368   { OP_STRING,     N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" },
369   { OP_NODATA,     'q',      NULL,              "quiet",         "suppress output, just set return code" },
370   { OP_NODATA,     'r',      NULL,              "recursive",     "recursively scan sub-directories" },
371   { OP_PATLIST,    N_EXCLUDE,&exclude_patdata,  "exclude=pattern","exclude matching files when recursing" },
372   { OP_PATLIST,    N_INCLUDE,&include_patdata,  "include=pattern","include matching files when recursing" },
373   { OP_PATLIST,    N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude-dir=pattern","exclude matching directories when recursing" },
374   { OP_PATLIST,    N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" },
375   { OP_FILELIST,   N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" },
376   { OP_FILELIST,   N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" },
377 
378   /* These two were accidentally implemented with underscores instead of
379   hyphens in the option names. As this was not discovered for several releases,
380   the incorrect versions are left in the table for compatibility. However, the
381   --help function misses out any option that has an underscore in its name. */
382 
383   { OP_PATLIST,   N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude_dir=pattern","exclude matching directories when recursing" },
384   { OP_PATLIST,   N_INCLUDE_DIR,&include_dir_patdata, "include_dir=pattern","include matching directories when recursing" },
385 
386 #ifdef JFRIEDL_DEBUG
387   { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
388 #endif
389   { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
390   { OP_NODATA,    'u',      NULL,              "utf-8",         "use UTF-8 mode" },
391   { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
392   { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
393   { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
394   { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
395   { OP_NODATA,    0,        NULL,               NULL,            NULL }
396 };
397 
398 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
399 options. These set the 1, 2, and 4 bits in process_options, respectively. Note
400 that the combination of -w and -x has the same effect as -x on its own, so we
401 can treat them as the same. Note that the MAXPATLEN macro assumes the longest
402 prefix+suffix is 10 characters; if anything longer is added, it must be
403 adjusted. */
404 
405 static const char *prefix[] = {
406   "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
407 
408 static const char *suffix[] = {
409   "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
410 
411 /* UTF-8 tables - used only when the newline setting is "any". */
412 
413 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
414 
415 const char utf8_table4[] = {
416   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
417   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
418   2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
419   3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
420 
421 
422 
423 /*************************************************
424 *         Exit from the program                  *
425 *************************************************/
426 
427 /* If there has been a resource error, give a suitable message.
428 
429 Argument:  the return code
430 Returns:   does not return
431 */
432 
433 static void
pcregrep_exit(int rc)434 pcregrep_exit(int rc)
435 {
436 if (resource_error)
437   {
438   fprintf(stderr, "pcregrep: Error %d, %d or %d means that a resource limit "
439     "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT,
440     PCRE_ERROR_JIT_STACKLIMIT);
441   fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
442   }
443 exit(rc);
444 }
445 
446 
447 /*************************************************
448 *          Add item to chain of patterns         *
449 *************************************************/
450 
451 /* Used to add an item onto a chain, or just return an unconnected item if the
452 "after" argument is NULL.
453 
454 Arguments:
455   s          pattern string to add
456   after      if not NULL points to item to insert after
457 
458 Returns:     new pattern block or NULL on error
459 */
460 
461 static patstr *
add_pattern(char * s,patstr * after)462 add_pattern(char *s, patstr *after)
463 {
464 patstr *p = (patstr *)malloc(sizeof(patstr));
465 if (p == NULL)
466   {
467   fprintf(stderr, "pcregrep: malloc failed\n");
468   pcregrep_exit(2);
469   }
470 if (strlen(s) > MAXPATLEN)
471   {
472   fprintf(stderr, "pcregrep: pattern is too long (limit is %d bytes)\n",
473     MAXPATLEN);
474   free(p);
475   return NULL;
476   }
477 p->next = NULL;
478 p->string = s;
479 p->compiled = NULL;
480 p->hint = NULL;
481 
482 if (after != NULL)
483   {
484   p->next = after->next;
485   after->next = p;
486   }
487 return p;
488 }
489 
490 
491 /*************************************************
492 *           Free chain of patterns               *
493 *************************************************/
494 
495 /* Used for several chains of patterns.
496 
497 Argument: pointer to start of chain
498 Returns:  nothing
499 */
500 
501 static void
free_pattern_chain(patstr * pc)502 free_pattern_chain(patstr *pc)
503 {
504 while (pc != NULL)
505   {
506   patstr *p = pc;
507   pc = p->next;
508   if (p->hint != NULL) pcre_free_study(p->hint);
509   if (p->compiled != NULL) pcre_free(p->compiled);
510   free(p);
511   }
512 }
513 
514 
515 /*************************************************
516 *           Free chain of file names             *
517 *************************************************/
518 
519 /*
520 Argument: pointer to start of chain
521 Returns:  nothing
522 */
523 
524 static void
free_file_chain(fnstr * fn)525 free_file_chain(fnstr *fn)
526 {
527 while (fn != NULL)
528   {
529   fnstr *f = fn;
530   fn = f->next;
531   free(f);
532   }
533 }
534 
535 
536 /*************************************************
537 *            OS-specific functions               *
538 *************************************************/
539 
540 /* These functions are defined so that they can be made system specific.
541 At present there are versions for Unix-style environments, Windows, native
542 z/OS, and "no support". */
543 
544 
545 /************* Directory scanning Unix-style and z/OS ***********/
546 
547 #if (defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H) || defined NATIVE_ZOS
548 #include <sys/types.h>
549 #include <sys/stat.h>
550 #include <dirent.h>
551 
552 #if defined NATIVE_ZOS
553 /************* Directory and PDS/E scanning for z/OS ***********/
554 /************* z/OS looks mostly like Unix with USS ************/
555 /* However, z/OS needs the #include statements in this header */
556 #include "pcrzosfs.h"
557 /* That header is not included in the main PCRE distribution because
558    other apparatus is needed to compile pcregrep for z/OS. The header
559    can be found in the special z/OS distribution, which is available
560    from www.zaconsultants.net or from www.cbttape.org. */
561 #endif
562 
563 typedef DIR directory_type;
564 #define FILESEP '/'
565 
566 static int
isdirectory(char * filename)567 isdirectory(char *filename)
568 {
569 struct stat statbuf;
570 if (stat(filename, &statbuf) < 0)
571   return 0;        /* In the expectation that opening as a file will fail */
572 return (statbuf.st_mode & S_IFMT) == S_IFDIR;
573 }
574 
575 static directory_type *
opendirectory(char * filename)576 opendirectory(char *filename)
577 {
578 return opendir(filename);
579 }
580 
581 static char *
readdirectory(directory_type * dir)582 readdirectory(directory_type *dir)
583 {
584 for (;;)
585   {
586   struct dirent *dent = readdir(dir);
587   if (dent == NULL) return NULL;
588   if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
589     return dent->d_name;
590   }
591 /* Control never reaches here */
592 }
593 
594 static void
closedirectory(directory_type * dir)595 closedirectory(directory_type *dir)
596 {
597 closedir(dir);
598 }
599 
600 
601 /************* Test for regular file, Unix-style **********/
602 
603 static int
isregfile(char * filename)604 isregfile(char *filename)
605 {
606 struct stat statbuf;
607 if (stat(filename, &statbuf) < 0)
608   return 1;        /* In the expectation that opening as a file will fail */
609 return (statbuf.st_mode & S_IFMT) == S_IFREG;
610 }
611 
612 
613 #if defined NATIVE_ZOS
614 /************* Test for a terminal in z/OS **********/
615 /* isatty() does not work in a TSO environment, so always give FALSE.*/
616 
617 static BOOL
is_stdout_tty(void)618 is_stdout_tty(void)
619 {
620 return FALSE;
621 }
622 
623 static BOOL
is_file_tty(FILE * f)624 is_file_tty(FILE *f)
625 {
626 return FALSE;
627 }
628 
629 
630 /************* Test for a terminal, Unix-style **********/
631 
632 #else
633 static BOOL
is_stdout_tty(void)634 is_stdout_tty(void)
635 {
636 return isatty(fileno(stdout));
637 }
638 
639 static BOOL
is_file_tty(FILE * f)640 is_file_tty(FILE *f)
641 {
642 return isatty(fileno(f));
643 }
644 #endif
645 
646 /* End of Unix-style or native z/OS environment functions. */
647 
648 
649 /************* Directory scanning in Windows ***********/
650 
651 /* I (Philip Hazel) have no means of testing this code. It was contributed by
652 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
653 when it did not exist. David Byron added a patch that moved the #include of
654 <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
655 The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
656 undefined when it is indeed undefined. */
657 
658 #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
659 
660 #ifndef STRICT
661 # define STRICT
662 #endif
663 #ifndef WIN32_LEAN_AND_MEAN
664 # define WIN32_LEAN_AND_MEAN
665 #endif
666 
667 #include <windows.h>
668 
669 #ifndef INVALID_FILE_ATTRIBUTES
670 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
671 #endif
672 
673 typedef struct directory_type
674 {
675 HANDLE handle;
676 BOOL first;
677 WIN32_FIND_DATA data;
678 } directory_type;
679 
680 #define FILESEP '/'
681 
682 int
isdirectory(char * filename)683 isdirectory(char *filename)
684 {
685 DWORD attr = GetFileAttributes(filename);
686 if (attr == INVALID_FILE_ATTRIBUTES)
687   return 0;
688 return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0;
689 }
690 
691 directory_type *
opendirectory(char * filename)692 opendirectory(char *filename)
693 {
694 size_t len;
695 char *pattern;
696 directory_type *dir;
697 DWORD err;
698 len = strlen(filename);
699 pattern = (char *)malloc(len + 3);
700 dir = (directory_type *)malloc(sizeof(*dir));
701 if ((pattern == NULL) || (dir == NULL))
702   {
703   fprintf(stderr, "pcregrep: malloc failed\n");
704   pcregrep_exit(2);
705   }
706 memcpy(pattern, filename, len);
707 memcpy(&(pattern[len]), "\\*", 3);
708 dir->handle = FindFirstFile(pattern, &(dir->data));
709 if (dir->handle != INVALID_HANDLE_VALUE)
710   {
711   free(pattern);
712   dir->first = TRUE;
713   return dir;
714   }
715 err = GetLastError();
716 free(pattern);
717 free(dir);
718 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
719 return NULL;
720 }
721 
722 char *
readdirectory(directory_type * dir)723 readdirectory(directory_type *dir)
724 {
725 for (;;)
726   {
727   if (!dir->first)
728     {
729     if (!FindNextFile(dir->handle, &(dir->data)))
730       return NULL;
731     }
732   else
733     {
734     dir->first = FALSE;
735     }
736   if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
737     return dir->data.cFileName;
738   }
739 #ifndef _MSC_VER
740 return NULL;   /* Keep compiler happy; never executed */
741 #endif
742 }
743 
744 void
closedirectory(directory_type * dir)745 closedirectory(directory_type *dir)
746 {
747 FindClose(dir->handle);
748 free(dir);
749 }
750 
751 
752 /************* Test for regular file in Windows **********/
753 
754 /* I don't know how to do this, or if it can be done; assume all paths are
755 regular if they are not directories. */
756 
isregfile(char * filename)757 int isregfile(char *filename)
758 {
759 return !isdirectory(filename);
760 }
761 
762 
763 /************* Test for a terminal in Windows **********/
764 
765 /* I don't know how to do this; assume never */
766 
767 static BOOL
is_stdout_tty(void)768 is_stdout_tty(void)
769 {
770 return FALSE;
771 }
772 
773 static BOOL
is_file_tty(FILE * f)774 is_file_tty(FILE *f)
775 {
776 return FALSE;
777 }
778 
779 /* End of Windows functions */
780 
781 
782 /************* Directory scanning when we can't do it ***********/
783 
784 /* The type is void, and apart from isdirectory(), the functions do nothing. */
785 
786 #else
787 
788 #define FILESEP 0
789 typedef void directory_type;
790 
isdirectory(char * filename)791 int isdirectory(char *filename) { return 0; }
opendirectory(char * filename)792 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
readdirectory(directory_type * dir)793 char *readdirectory(directory_type *dir) { return (char*)0;}
closedirectory(directory_type * dir)794 void closedirectory(directory_type *dir) {}
795 
796 
797 /************* Test for regular file when we can't do it **********/
798 
799 /* Assume all files are regular. */
800 
isregfile(char * filename)801 int isregfile(char *filename) { return 1; }
802 
803 
804 /************* Test for a terminal when we can't do it **********/
805 
806 static BOOL
is_stdout_tty(void)807 is_stdout_tty(void)
808 {
809 return FALSE;
810 }
811 
812 static BOOL
is_file_tty(FILE * f)813 is_file_tty(FILE *f)
814 {
815 return FALSE;
816 }
817 
818 #endif  /* End of system-specific functions */
819 
820 
821 
822 #ifndef HAVE_STRERROR
823 /*************************************************
824 *     Provide strerror() for non-ANSI libraries  *
825 *************************************************/
826 
827 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
828 in their libraries, but can provide the same facility by this simple
829 alternative function. */
830 
831 extern int   sys_nerr;
832 extern char *sys_errlist[];
833 
834 char *
strerror(int n)835 strerror(int n)
836 {
837 if (n < 0 || n >= sys_nerr) return "unknown error number";
838 return sys_errlist[n];
839 }
840 #endif /* HAVE_STRERROR */
841 
842 
843 
844 /*************************************************
845 *                Usage function                  *
846 *************************************************/
847 
848 static int
usage(int rc)849 usage(int rc)
850 {
851 option_item *op;
852 fprintf(stderr, "Usage: pcregrep [-");
853 for (op = optionlist; op->one_char != 0; op++)
854   {
855   if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
856   }
857 fprintf(stderr, "] [long options] [pattern] [files]\n");
858 fprintf(stderr, "Type `pcregrep --help' for more information and the long "
859   "options.\n");
860 return rc;
861 }
862 
863 
864 
865 /*************************************************
866 *                Help function                   *
867 *************************************************/
868 
869 static void
help(void)870 help(void)
871 {
872 option_item *op;
873 
874 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
875 printf("Search for PATTERN in each FILE or standard input.\n");
876 printf("PATTERN must be present if neither -e nor -f is used.\n");
877 printf("\"-\" can be used as a file name to mean STDIN.\n");
878 
879 #ifdef SUPPORT_LIBZ
880 printf("Files whose names end in .gz are read using zlib.\n");
881 #endif
882 
883 #ifdef SUPPORT_LIBBZ2
884 printf("Files whose names end in .bz2 are read using bzlib2.\n");
885 #endif
886 
887 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
888 printf("Other files and the standard input are read as plain files.\n\n");
889 #else
890 printf("All files are read as plain files, without any interpretation.\n\n");
891 #endif
892 
893 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
894 printf("Options:\n");
895 
896 for (op = optionlist; op->one_char != 0; op++)
897   {
898   int n;
899   char s[4];
900 
901   /* Two options were accidentally implemented and documented with underscores
902   instead of hyphens in their names, something that was not noticed for quite a
903   few releases. When fixing this, I left the underscored versions in the list
904   in case people were using them. However, we don't want to display them in the
905   help data. There are no other options that contain underscores, and we do not
906   expect ever to implement such options. Therefore, just omit any option that
907   contains an underscore. */
908 
909   if (strchr(op->long_name, '_') != NULL) continue;
910 
911   if (op->one_char > 0 && (op->long_name)[0] == 0)
912     n = 31 - printf("  -%c", op->one_char);
913   else
914     {
915     if (op->one_char > 0) sprintf(s, "-%c,", op->one_char);
916       else strcpy(s, "   ");
917     n = 31 - printf("  %s --%s", s, op->long_name);
918     }
919 
920   if (n < 1) n = 1;
921   printf("%.*s%s\n", n, "                           ", op->help_text);
922   }
923 
924 printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n");
925 printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE);
926 printf("When reading patterns or file names from a file, trailing white\n");
927 printf("space is removed and blank lines are ignored.\n");
928 printf("The maximum size of any pattern is %d bytes.\n", MAXPATLEN);
929 
930 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
931 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
932 }
933 
934 
935 
936 /*************************************************
937 *            Test exclude/includes               *
938 *************************************************/
939 
940 /* If any exclude pattern matches, the path is excluded. Otherwise, unless
941 there are no includes, the path must match an include pattern.
942 
943 Arguments:
944   path      the path to be matched
945   ip        the chain of include patterns
946   ep        the chain of exclude patterns
947 
948 Returns:    TRUE if the path is not excluded
949 */
950 
951 static BOOL
test_incexc(char * path,patstr * ip,patstr * ep)952 test_incexc(char *path, patstr *ip, patstr *ep)
953 {
954 int plen = strlen(path);
955 
956 for (; ep != NULL; ep = ep->next)
957   {
958   if (pcre_exec(ep->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
959     return FALSE;
960   }
961 
962 if (ip == NULL) return TRUE;
963 
964 for (; ip != NULL; ip = ip->next)
965   {
966   if (pcre_exec(ip->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
967     return TRUE;
968   }
969 
970 return FALSE;
971 }
972 
973 
974 
975 /*************************************************
976 *         Decode integer argument value          *
977 *************************************************/
978 
979 /* Integer arguments can be followed by K or M. Avoid the use of strtoul()
980 because SunOS4 doesn't have it. This is used only for unpicking arguments, so
981 just keep it simple.
982 
983 Arguments:
984   option_data   the option data string
985   op            the option item (for error messages)
986   longop        TRUE if option given in long form
987 
988 Returns:        a long integer
989 */
990 
991 static long int
decode_number(char * option_data,option_item * op,BOOL longop)992 decode_number(char *option_data, option_item *op, BOOL longop)
993 {
994 unsigned long int n = 0;
995 char *endptr = option_data;
996 while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
997 while (isdigit((unsigned char)(*endptr)))
998   n = n * 10 + (int)(*endptr++ - '0');
999 if (toupper(*endptr) == 'K')
1000   {
1001   n *= 1024;
1002   endptr++;
1003   }
1004 else if (toupper(*endptr) == 'M')
1005   {
1006   n *= 1024*1024;
1007   endptr++;
1008   }
1009 
1010 if (*endptr != 0)   /* Error */
1011   {
1012   if (longop)
1013     {
1014     char *equals = strchr(op->long_name, '=');
1015     int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1016       (int)(equals - op->long_name);
1017     fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
1018       option_data, nlen, op->long_name);
1019     }
1020   else
1021     fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
1022       option_data, op->one_char);
1023   pcregrep_exit(usage(2));
1024   }
1025 
1026 return n;
1027 }
1028 
1029 
1030 
1031 /*************************************************
1032 *       Add item to a chain of numbers           *
1033 *************************************************/
1034 
1035 /* Used to add an item onto a chain, or just return an unconnected item if the
1036 "after" argument is NULL.
1037 
1038 Arguments:
1039   n          the number to add
1040   after      if not NULL points to item to insert after
1041 
1042 Returns:     new number block
1043 */
1044 
1045 static omstr *
add_number(int n,omstr * after)1046 add_number(int n, omstr *after)
1047 {
1048 omstr *om = (omstr *)malloc(sizeof(omstr));
1049 
1050 if (om == NULL)
1051   {
1052   fprintf(stderr, "pcregrep: malloc failed\n");
1053   pcregrep_exit(2);
1054   }
1055 om->next = NULL;
1056 om->groupnum = n;
1057 
1058 if (after != NULL)
1059   {
1060   om->next = after->next;
1061   after->next = om;
1062   }
1063 return om;
1064 }
1065 
1066 
1067 
1068 /*************************************************
1069 *            Read one line of input              *
1070 *************************************************/
1071 
1072 /* Normally, input is read using fread() into a large buffer, so many lines may
1073 be read at once. However, doing this for tty input means that no output appears
1074 until a lot of input has been typed. Instead, tty input is handled line by
1075 line. We cannot use fgets() for this, because it does not stop at a binary
1076 zero, and therefore there is no way of telling how many characters it has read,
1077 because there may be binary zeros embedded in the data.
1078 
1079 Arguments:
1080   buffer     the buffer to read into
1081   length     the maximum number of characters to read
1082   f          the file
1083 
1084 Returns:     the number of characters read, zero at end of file
1085 */
1086 
1087 static unsigned int
read_one_line(char * buffer,int length,FILE * f)1088 read_one_line(char *buffer, int length, FILE *f)
1089 {
1090 int c;
1091 int yield = 0;
1092 while ((c = fgetc(f)) != EOF)
1093   {
1094   buffer[yield++] = c;
1095   if (c == '\n' || yield >= length) break;
1096   }
1097 return yield;
1098 }
1099 
1100 
1101 
1102 /*************************************************
1103 *             Find end of line                   *
1104 *************************************************/
1105 
1106 /* The length of the endline sequence that is found is set via lenptr. This may
1107 be zero at the very end of the file if there is no line-ending sequence there.
1108 
1109 Arguments:
1110   p         current position in line
1111   endptr    end of available data
1112   lenptr    where to put the length of the eol sequence
1113 
1114 Returns:    pointer after the last byte of the line,
1115             including the newline byte(s)
1116 */
1117 
1118 static char *
end_of_line(char * p,char * endptr,int * lenptr)1119 end_of_line(char *p, char *endptr, int *lenptr)
1120 {
1121 switch(endlinetype)
1122   {
1123   default:      /* Just in case */
1124   case EL_LF:
1125   while (p < endptr && *p != '\n') p++;
1126   if (p < endptr)
1127     {
1128     *lenptr = 1;
1129     return p + 1;
1130     }
1131   *lenptr = 0;
1132   return endptr;
1133 
1134   case EL_CR:
1135   while (p < endptr && *p != '\r') p++;
1136   if (p < endptr)
1137     {
1138     *lenptr = 1;
1139     return p + 1;
1140     }
1141   *lenptr = 0;
1142   return endptr;
1143 
1144   case EL_CRLF:
1145   for (;;)
1146     {
1147     while (p < endptr && *p != '\r') p++;
1148     if (++p >= endptr)
1149       {
1150       *lenptr = 0;
1151       return endptr;
1152       }
1153     if (*p == '\n')
1154       {
1155       *lenptr = 2;
1156       return p + 1;
1157       }
1158     }
1159   break;
1160 
1161   case EL_ANYCRLF:
1162   while (p < endptr)
1163     {
1164     int extra = 0;
1165     register int c = *((unsigned char *)p);
1166 
1167     if (utf8 && c >= 0xc0)
1168       {
1169       int gcii, gcss;
1170       extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
1171       gcss = 6*extra;
1172       c = (c & utf8_table3[extra]) << gcss;
1173       for (gcii = 1; gcii <= extra; gcii++)
1174         {
1175         gcss -= 6;
1176         c |= (p[gcii] & 0x3f) << gcss;
1177         }
1178       }
1179 
1180     p += 1 + extra;
1181 
1182     switch (c)
1183       {
1184       case '\n':
1185       *lenptr = 1;
1186       return p;
1187 
1188       case '\r':
1189       if (p < endptr && *p == '\n')
1190         {
1191         *lenptr = 2;
1192         p++;
1193         }
1194       else *lenptr = 1;
1195       return p;
1196 
1197       default:
1198       break;
1199       }
1200     }   /* End of loop for ANYCRLF case */
1201 
1202   *lenptr = 0;  /* Must have hit the end */
1203   return endptr;
1204 
1205   case EL_ANY:
1206   while (p < endptr)
1207     {
1208     int extra = 0;
1209     register int c = *((unsigned char *)p);
1210 
1211     if (utf8 && c >= 0xc0)
1212       {
1213       int gcii, gcss;
1214       extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
1215       gcss = 6*extra;
1216       c = (c & utf8_table3[extra]) << gcss;
1217       for (gcii = 1; gcii <= extra; gcii++)
1218         {
1219         gcss -= 6;
1220         c |= (p[gcii] & 0x3f) << gcss;
1221         }
1222       }
1223 
1224     p += 1 + extra;
1225 
1226     switch (c)
1227       {
1228       case '\n':    /* LF */
1229       case '\v':    /* VT */
1230       case '\f':    /* FF */
1231       *lenptr = 1;
1232       return p;
1233 
1234       case '\r':    /* CR */
1235       if (p < endptr && *p == '\n')
1236         {
1237         *lenptr = 2;
1238         p++;
1239         }
1240       else *lenptr = 1;
1241       return p;
1242 
1243 #ifndef EBCDIC
1244       case 0x85:    /* Unicode NEL */
1245       *lenptr = utf8? 2 : 1;
1246       return p;
1247 
1248       case 0x2028:  /* Unicode LS */
1249       case 0x2029:  /* Unicode PS */
1250       *lenptr = 3;
1251       return p;
1252 #endif  /* Not EBCDIC */
1253 
1254       default:
1255       break;
1256       }
1257     }   /* End of loop for ANY case */
1258 
1259   *lenptr = 0;  /* Must have hit the end */
1260   return endptr;
1261   }     /* End of overall switch */
1262 }
1263 
1264 
1265 
1266 /*************************************************
1267 *         Find start of previous line            *
1268 *************************************************/
1269 
1270 /* This is called when looking back for before lines to print.
1271 
1272 Arguments:
1273   p         start of the subsequent line
1274   startptr  start of available data
1275 
1276 Returns:    pointer to the start of the previous line
1277 */
1278 
1279 static char *
previous_line(char * p,char * startptr)1280 previous_line(char *p, char *startptr)
1281 {
1282 switch(endlinetype)
1283   {
1284   default:      /* Just in case */
1285   case EL_LF:
1286   p--;
1287   while (p > startptr && p[-1] != '\n') p--;
1288   return p;
1289 
1290   case EL_CR:
1291   p--;
1292   while (p > startptr && p[-1] != '\n') p--;
1293   return p;
1294 
1295   case EL_CRLF:
1296   for (;;)
1297     {
1298     p -= 2;
1299     while (p > startptr && p[-1] != '\n') p--;
1300     if (p <= startptr + 1 || p[-2] == '\r') return p;
1301     }
1302   /* Control can never get here */
1303 
1304   case EL_ANY:
1305   case EL_ANYCRLF:
1306   if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
1307   if (utf8) while ((*p & 0xc0) == 0x80) p--;
1308 
1309   while (p > startptr)
1310     {
1311     register unsigned int c;
1312     char *pp = p - 1;
1313 
1314     if (utf8)
1315       {
1316       int extra = 0;
1317       while ((*pp & 0xc0) == 0x80) pp--;
1318       c = *((unsigned char *)pp);
1319       if (c >= 0xc0)
1320         {
1321         int gcii, gcss;
1322         extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
1323         gcss = 6*extra;
1324         c = (c & utf8_table3[extra]) << gcss;
1325         for (gcii = 1; gcii <= extra; gcii++)
1326           {
1327           gcss -= 6;
1328           c |= (pp[gcii] & 0x3f) << gcss;
1329           }
1330         }
1331       }
1332     else c = *((unsigned char *)pp);
1333 
1334     if (endlinetype == EL_ANYCRLF) switch (c)
1335       {
1336       case '\n':    /* LF */
1337       case '\r':    /* CR */
1338       return p;
1339 
1340       default:
1341       break;
1342       }
1343 
1344     else switch (c)
1345       {
1346       case '\n':    /* LF */
1347       case '\v':    /* VT */
1348       case '\f':    /* FF */
1349       case '\r':    /* CR */
1350 #ifndef EBCDIE
1351       case 0x85:    /* Unicode NEL */
1352       case 0x2028:  /* Unicode LS */
1353       case 0x2029:  /* Unicode PS */
1354 #endif  /* Not EBCDIC */
1355       return p;
1356 
1357       default:
1358       break;
1359       }
1360 
1361     p = pp;  /* Back one character */
1362     }        /* End of loop for ANY case */
1363 
1364   return startptr;  /* Hit start of data */
1365   }     /* End of overall switch */
1366 }
1367 
1368 
1369 
1370 
1371 
1372 /*************************************************
1373 *       Print the previous "after" lines         *
1374 *************************************************/
1375 
1376 /* This is called if we are about to lose said lines because of buffer filling,
1377 and at the end of the file. The data in the line is written using fwrite() so
1378 that a binary zero does not terminate it.
1379 
1380 Arguments:
1381   lastmatchnumber   the number of the last matching line, plus one
1382   lastmatchrestart  where we restarted after the last match
1383   endptr            end of available data
1384   printname         filename for printing
1385 
1386 Returns:            nothing
1387 */
1388 
1389 static void
do_after_lines(int lastmatchnumber,char * lastmatchrestart,char * endptr,char * printname)1390 do_after_lines(int lastmatchnumber, char *lastmatchrestart, char *endptr,
1391   char *printname)
1392 {
1393 if (after_context > 0 && lastmatchnumber > 0)
1394   {
1395   int count = 0;
1396   while (lastmatchrestart < endptr && count++ < after_context)
1397     {
1398     int ellength;
1399     char *pp = lastmatchrestart;
1400     if (printname != NULL) fprintf(stdout, "%s-", printname);
1401     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1402     pp = end_of_line(pp, endptr, &ellength);
1403     FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1404     lastmatchrestart = pp;
1405     }
1406   hyphenpending = TRUE;
1407   }
1408 }
1409 
1410 
1411 
1412 /*************************************************
1413 *   Apply patterns to subject till one matches   *
1414 *************************************************/
1415 
1416 /* This function is called to run through all patterns, looking for a match. It
1417 is used multiple times for the same subject when colouring is enabled, in order
1418 to find all possible matches.
1419 
1420 Arguments:
1421   matchptr     the start of the subject
1422   length       the length of the subject to match
1423   options      options for pcre_exec
1424   startoffset  where to start matching
1425   offsets      the offets vector to fill in
1426   mrc          address of where to put the result of pcre_exec()
1427 
1428 Returns:      TRUE if there was a match
1429               FALSE if there was no match
1430               invert if there was a non-fatal error
1431 */
1432 
1433 static BOOL
match_patterns(char * matchptr,size_t length,unsigned int options,int startoffset,int * offsets,int * mrc)1434 match_patterns(char *matchptr, size_t length, unsigned int options,
1435   int startoffset, int *offsets, int *mrc)
1436 {
1437 int i;
1438 size_t slen = length;
1439 patstr *p = patterns;
1440 const char *msg = "this text:\n\n";
1441 
1442 if (slen > 200)
1443   {
1444   slen = 200;
1445   msg = "text that starts:\n\n";
1446   }
1447 for (i = 1; p != NULL; p = p->next, i++)
1448   {
1449   *mrc = pcre_exec(p->compiled, p->hint, matchptr, (int)length,
1450     startoffset, options, offsets, OFFSET_SIZE);
1451   if (*mrc >= 0) return TRUE;
1452   if (*mrc == PCRE_ERROR_NOMATCH) continue;
1453   fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
1454   if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i);
1455   fprintf(stderr, "%s", msg);
1456   FWRITE(matchptr, 1, slen, stderr);   /* In case binary zero included */
1457   fprintf(stderr, "\n\n");
1458   if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT ||
1459       *mrc == PCRE_ERROR_JIT_STACKLIMIT)
1460     resource_error = TRUE;
1461   if (error_count++ > 20)
1462     {
1463     fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
1464     pcregrep_exit(2);
1465     }
1466   return invert;    /* No more matching; don't show the line again */
1467   }
1468 
1469 return FALSE;  /* No match, no errors */
1470 }
1471 
1472 
1473 
1474 /*************************************************
1475 *            Grep an individual file             *
1476 *************************************************/
1477 
1478 /* This is called from grep_or_recurse() below. It uses a buffer that is three
1479 times the value of bufthird. The matching point is never allowed to stray into
1480 the top third of the buffer, thus keeping more of the file available for
1481 context printing or for multiline scanning. For large files, the pointer will
1482 be in the middle third most of the time, so the bottom third is available for
1483 "before" context printing.
1484 
1485 Arguments:
1486   handle       the fopened FILE stream for a normal file
1487                the gzFile pointer when reading is via libz
1488                the BZFILE pointer when reading is via libbz2
1489   frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1490   filename     the file name or NULL (for errors)
1491   printname    the file name if it is to be printed for each match
1492                or NULL if the file name is not to be printed
1493                it cannot be NULL if filenames[_nomatch]_only is set
1494 
1495 Returns:       0 if there was at least one match
1496                1 otherwise (no matches)
1497                2 if an overlong line is encountered
1498                3 if there is a read error on a .bz2 file
1499 */
1500 
1501 static int
pcregrep(void * handle,int frtype,char * filename,char * printname)1502 pcregrep(void *handle, int frtype, char *filename, char *printname)
1503 {
1504 int rc = 1;
1505 int linenumber = 1;
1506 int lastmatchnumber = 0;
1507 int count = 0;
1508 int filepos = 0;
1509 int offsets[OFFSET_SIZE];
1510 char *lastmatchrestart = NULL;
1511 char *ptr = main_buffer;
1512 char *endptr;
1513 size_t bufflength;
1514 BOOL binary = FALSE;
1515 BOOL endhyphenpending = FALSE;
1516 BOOL input_line_buffered = line_buffered;
1517 FILE *in = NULL;                    /* Ensure initialized */
1518 
1519 #ifdef SUPPORT_LIBZ
1520 gzFile ingz = NULL;
1521 #endif
1522 
1523 #ifdef SUPPORT_LIBBZ2
1524 BZFILE *inbz2 = NULL;
1525 #endif
1526 
1527 
1528 /* Do the first read into the start of the buffer and set up the pointer to end
1529 of what we have. In the case of libz, a non-zipped .gz file will be read as a
1530 plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1531 fail. */
1532 
1533 (void)frtype;
1534 
1535 #ifdef SUPPORT_LIBZ
1536 if (frtype == FR_LIBZ)
1537   {
1538   ingz = (gzFile)handle;
1539   bufflength = gzread (ingz, main_buffer, bufsize);
1540   }
1541 else
1542 #endif
1543 
1544 #ifdef SUPPORT_LIBBZ2
1545 if (frtype == FR_LIBBZ2)
1546   {
1547   inbz2 = (BZFILE *)handle;
1548   bufflength = BZ2_bzread(inbz2, main_buffer, bufsize);
1549   if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
1550   }                                    /* without the cast it is unsigned. */
1551 else
1552 #endif
1553 
1554   {
1555   in = (FILE *)handle;
1556   if (is_file_tty(in)) input_line_buffered = TRUE;
1557   bufflength = input_line_buffered?
1558     read_one_line(main_buffer, bufsize, in) :
1559     fread(main_buffer, 1, bufsize, in);
1560   }
1561 
1562 endptr = main_buffer + bufflength;
1563 
1564 /* Unless binary-files=text, see if we have a binary file. This uses the same
1565 rule as GNU grep, namely, a search for a binary zero byte near the start of the
1566 file. */
1567 
1568 if (binary_files != BIN_TEXT)
1569   {
1570   binary =
1571     memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength) != NULL;
1572   if (binary && binary_files == BIN_NOMATCH) return 1;
1573   }
1574 
1575 /* Loop while the current pointer is not at the end of the file. For large
1576 files, endptr will be at the end of the buffer when we are in the middle of the
1577 file, but ptr will never get there, because as soon as it gets over 2/3 of the
1578 way, the buffer is shifted left and re-filled. */
1579 
1580 while (ptr < endptr)
1581   {
1582   int endlinelength;
1583   int mrc = 0;
1584   int startoffset = 0;
1585   unsigned int options = 0;
1586   BOOL match;
1587   char *matchptr = ptr;
1588   char *t = ptr;
1589   size_t length, linelength;
1590 
1591   /* At this point, ptr is at the start of a line. We need to find the length
1592   of the subject string to pass to pcre_exec(). In multiline mode, it is the
1593   length remainder of the data in the buffer. Otherwise, it is the length of
1594   the next line, excluding the terminating newline. After matching, we always
1595   advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1596   option is used for compiling, so that any match is constrained to be in the
1597   first line. */
1598 
1599   t = end_of_line(t, endptr, &endlinelength);
1600   linelength = t - ptr - endlinelength;
1601   length = multiline? (size_t)(endptr - ptr) : linelength;
1602 
1603   /* Check to see if the line we are looking at extends right to the very end
1604   of the buffer without a line terminator. This means the line is too long to
1605   handle. */
1606 
1607   if (endlinelength == 0 && t == main_buffer + bufsize)
1608     {
1609     fprintf(stderr, "pcregrep: line %d%s%s is too long for the internal buffer\n"
1610                     "pcregrep: check the --buffer-size option\n",
1611                     linenumber,
1612                     (filename == NULL)? "" : " of file ",
1613                     (filename == NULL)? "" : filename);
1614     return 2;
1615     }
1616 
1617   /* Extra processing for Jeffrey Friedl's debugging. */
1618 
1619 #ifdef JFRIEDL_DEBUG
1620   if (jfriedl_XT || jfriedl_XR)
1621   {
1622 #     include <sys/time.h>
1623 #     include <time.h>
1624       struct timeval start_time, end_time;
1625       struct timezone dummy;
1626       int i;
1627 
1628       if (jfriedl_XT)
1629       {
1630           unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1631           const char *orig = ptr;
1632           ptr = malloc(newlen + 1);
1633           if (!ptr) {
1634                   printf("out of memory");
1635                   pcregrep_exit(2);
1636           }
1637           endptr = ptr;
1638           strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1639           for (i = 0; i < jfriedl_XT; i++) {
1640                   strncpy(endptr, orig,  length);
1641                   endptr += length;
1642           }
1643           strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1644           length = newlen;
1645       }
1646 
1647       if (gettimeofday(&start_time, &dummy) != 0)
1648               perror("bad gettimeofday");
1649 
1650 
1651       for (i = 0; i < jfriedl_XR; i++)
1652           match = (pcre_exec(patterns->compiled, patterns->hint, ptr, length, 0,
1653               PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1654 
1655       if (gettimeofday(&end_time, &dummy) != 0)
1656               perror("bad gettimeofday");
1657 
1658       double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1659                       -
1660                       (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1661 
1662       printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1663       return 0;
1664   }
1665 #endif
1666 
1667   /* We come back here after a match when show_only_matching is set, in order
1668   to find any further matches in the same line. This applies to
1669   --only-matching, --file-offsets, and --line-offsets. */
1670 
1671   ONLY_MATCHING_RESTART:
1672 
1673   /* Run through all the patterns until one matches or there is an error other
1674   than NOMATCH. This code is in a subroutine so that it can be re-used for
1675   finding subsequent matches when colouring matched lines. After finding one
1676   match, set PCRE_NOTEMPTY to disable any further matches of null strings in
1677   this line. */
1678 
1679   match = match_patterns(matchptr, length, options, startoffset, offsets, &mrc);
1680   options = PCRE_NOTEMPTY;
1681 
1682   /* If it's a match or a not-match (as required), do what's wanted. */
1683 
1684   if (match != invert)
1685     {
1686     BOOL hyphenprinted = FALSE;
1687 
1688     /* We've failed if we want a file that doesn't have any matches. */
1689 
1690     if (filenames == FN_NOMATCH_ONLY) return 1;
1691 
1692     /* Just count if just counting is wanted. */
1693 
1694     if (count_only) count++;
1695 
1696     /* When handling a binary file and binary-files==binary, the "binary"
1697     variable will be set true (it's false in all other cases). In this
1698     situation we just want to output the file name. No need to scan further. */
1699 
1700     else if (binary)
1701       {
1702       fprintf(stdout, "Binary file %s matches\n", filename);
1703       return 0;
1704       }
1705 
1706     /* If all we want is a file name, there is no need to scan any more lines
1707     in the file. */
1708 
1709     else if (filenames == FN_MATCH_ONLY)
1710       {
1711       fprintf(stdout, "%s\n", printname);
1712       return 0;
1713       }
1714 
1715     /* Likewise, if all we want is a yes/no answer. */
1716 
1717     else if (quiet) return 0;
1718 
1719     /* The --only-matching option prints just the substring that matched,
1720     and/or one or more captured portions of it, as long as these strings are
1721     not empty. The --file-offsets and --line-offsets options output offsets for
1722     the matching substring (all three set show_only_matching). None of these
1723     mutually exclusive options prints any context. Afterwards, adjust the start
1724     and then jump back to look for further matches in the same line. If we are
1725     in invert mode, however, nothing is printed and we do not restart - this
1726     could still be useful because the return code is set. */
1727 
1728     else if (show_only_matching)
1729       {
1730       if (!invert)
1731         {
1732         if (printname != NULL) fprintf(stdout, "%s:", printname);
1733         if (number) fprintf(stdout, "%d:", linenumber);
1734 
1735         /* Handle --line-offsets */
1736 
1737         if (line_offsets)
1738           fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1739             offsets[1] - offsets[0]);
1740 
1741         /* Handle --file-offsets */
1742 
1743         else if (file_offsets)
1744           fprintf(stdout, "%d,%d\n",
1745             (int)(filepos + matchptr + offsets[0] - ptr),
1746             offsets[1] - offsets[0]);
1747 
1748         /* Handle --only-matching, which may occur many times */
1749 
1750         else
1751           {
1752           BOOL printed = FALSE;
1753           omstr *om;
1754 
1755           for (om = only_matching; om != NULL; om = om->next)
1756             {
1757             int n = om->groupnum;
1758             if (n < mrc)
1759               {
1760               int plen = offsets[2*n + 1] - offsets[2*n];
1761               if (plen > 0)
1762                 {
1763                 if (printed) fprintf(stdout, "%s", om_separator);
1764                 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1765                 FWRITE(matchptr + offsets[n*2], 1, plen, stdout);
1766                 if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1767                 printed = TRUE;
1768                 }
1769               }
1770             }
1771 
1772           if (printed || printname != NULL || number) fprintf(stdout, "\n");
1773           }
1774 
1775         /* Prepare to repeat to find the next match */
1776 
1777         match = FALSE;
1778         if (line_buffered) fflush(stdout);
1779         rc = 0;                      /* Had some success */
1780         startoffset = offsets[1];    /* Restart after the match */
1781         goto ONLY_MATCHING_RESTART;
1782         }
1783       }
1784 
1785     /* This is the default case when none of the above options is set. We print
1786     the matching lines(s), possibly preceded and/or followed by other lines of
1787     context. */
1788 
1789     else
1790       {
1791       /* See if there is a requirement to print some "after" lines from a
1792       previous match. We never print any overlaps. */
1793 
1794       if (after_context > 0 && lastmatchnumber > 0)
1795         {
1796         int ellength;
1797         int linecount = 0;
1798         char *p = lastmatchrestart;
1799 
1800         while (p < ptr && linecount < after_context)
1801           {
1802           p = end_of_line(p, ptr, &ellength);
1803           linecount++;
1804           }
1805 
1806         /* It is important to advance lastmatchrestart during this printing so
1807         that it interacts correctly with any "before" printing below. Print
1808         each line's data using fwrite() in case there are binary zeroes. */
1809 
1810         while (lastmatchrestart < p)
1811           {
1812           char *pp = lastmatchrestart;
1813           if (printname != NULL) fprintf(stdout, "%s-", printname);
1814           if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1815           pp = end_of_line(pp, endptr, &ellength);
1816           FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1817           lastmatchrestart = pp;
1818           }
1819         if (lastmatchrestart != ptr) hyphenpending = TRUE;
1820         }
1821 
1822       /* If there were non-contiguous lines printed above, insert hyphens. */
1823 
1824       if (hyphenpending)
1825         {
1826         fprintf(stdout, "--\n");
1827         hyphenpending = FALSE;
1828         hyphenprinted = TRUE;
1829         }
1830 
1831       /* See if there is a requirement to print some "before" lines for this
1832       match. Again, don't print overlaps. */
1833 
1834       if (before_context > 0)
1835         {
1836         int linecount = 0;
1837         char *p = ptr;
1838 
1839         while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1840                linecount < before_context)
1841           {
1842           linecount++;
1843           p = previous_line(p, main_buffer);
1844           }
1845 
1846         if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1847           fprintf(stdout, "--\n");
1848 
1849         while (p < ptr)
1850           {
1851           int ellength;
1852           char *pp = p;
1853           if (printname != NULL) fprintf(stdout, "%s-", printname);
1854           if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1855           pp = end_of_line(pp, endptr, &ellength);
1856           FWRITE(p, 1, pp - p, stdout);
1857           p = pp;
1858           }
1859         }
1860 
1861       /* Now print the matching line(s); ensure we set hyphenpending at the end
1862       of the file if any context lines are being output. */
1863 
1864       if (after_context > 0 || before_context > 0)
1865         endhyphenpending = TRUE;
1866 
1867       if (printname != NULL) fprintf(stdout, "%s:", printname);
1868       if (number) fprintf(stdout, "%d:", linenumber);
1869 
1870       /* In multiline mode, we want to print to the end of the line in which
1871       the end of the matched string is found, so we adjust linelength and the
1872       line number appropriately, but only when there actually was a match
1873       (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1874       the match will always be before the first newline sequence. */
1875 
1876       if (multiline & !invert)
1877         {
1878         char *endmatch = ptr + offsets[1];
1879         t = ptr;
1880         while (t <= endmatch)
1881           {
1882           t = end_of_line(t, endptr, &endlinelength);
1883           if (t < endmatch) linenumber++; else break;
1884           }
1885         linelength = t - ptr - endlinelength;
1886         }
1887 
1888       /*** NOTE: Use only fwrite() to output the data line, so that binary
1889       zeroes are treated as just another data character. */
1890 
1891       /* This extra option, for Jeffrey Friedl's debugging requirements,
1892       replaces the matched string, or a specific captured string if it exists,
1893       with X. When this happens, colouring is ignored. */
1894 
1895 #ifdef JFRIEDL_DEBUG
1896       if (S_arg >= 0 && S_arg < mrc)
1897         {
1898         int first = S_arg * 2;
1899         int last  = first + 1;
1900         FWRITE(ptr, 1, offsets[first], stdout);
1901         fprintf(stdout, "X");
1902         FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1903         }
1904       else
1905 #endif
1906 
1907       /* We have to split the line(s) up if colouring, and search for further
1908       matches, but not of course if the line is a non-match. */
1909 
1910       if (do_colour && !invert)
1911         {
1912         int plength;
1913         FWRITE(ptr, 1, offsets[0], stdout);
1914         fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1915         FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1916         fprintf(stdout, "%c[00m", 0x1b);
1917         for (;;)
1918           {
1919           startoffset = offsets[1];
1920           if (startoffset >= (int)linelength + endlinelength ||
1921               !match_patterns(matchptr, length, options, startoffset, offsets,
1922                 &mrc))
1923             break;
1924           FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
1925           fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1926           FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1927           fprintf(stdout, "%c[00m", 0x1b);
1928           }
1929 
1930         /* In multiline mode, we may have already printed the complete line
1931         and its line-ending characters (if they matched the pattern), so there
1932         may be no more to print. */
1933 
1934         plength = (int)((linelength + endlinelength) - startoffset);
1935         if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout);
1936         }
1937 
1938       /* Not colouring; no need to search for further matches */
1939 
1940       else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1941       }
1942 
1943     /* End of doing what has to be done for a match. If --line-buffered was
1944     given, flush the output. */
1945 
1946     if (line_buffered) fflush(stdout);
1947     rc = 0;    /* Had some success */
1948 
1949     /* Remember where the last match happened for after_context. We remember
1950     where we are about to restart, and that line's number. */
1951 
1952     lastmatchrestart = ptr + linelength + endlinelength;
1953     lastmatchnumber = linenumber + 1;
1954     }
1955 
1956   /* For a match in multiline inverted mode (which of course did not cause
1957   anything to be printed), we have to move on to the end of the match before
1958   proceeding. */
1959 
1960   if (multiline && invert && match)
1961     {
1962     int ellength;
1963     char *endmatch = ptr + offsets[1];
1964     t = ptr;
1965     while (t < endmatch)
1966       {
1967       t = end_of_line(t, endptr, &ellength);
1968       if (t <= endmatch) linenumber++; else break;
1969       }
1970     endmatch = end_of_line(endmatch, endptr, &ellength);
1971     linelength = endmatch - ptr - ellength;
1972     }
1973 
1974   /* Advance to after the newline and increment the line number. The file
1975   offset to the current line is maintained in filepos. */
1976 
1977   ptr += linelength + endlinelength;
1978   filepos += (int)(linelength + endlinelength);
1979   linenumber++;
1980 
1981   /* If input is line buffered, and the buffer is not yet full, read another
1982   line and add it into the buffer. */
1983 
1984   if (input_line_buffered && bufflength < (size_t)bufsize)
1985     {
1986     int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
1987     bufflength += add;
1988     endptr += add;
1989     }
1990 
1991   /* If we haven't yet reached the end of the file (the buffer is full), and
1992   the current point is in the top 1/3 of the buffer, slide the buffer down by
1993   1/3 and refill it. Before we do this, if some unprinted "after" lines are
1994   about to be lost, print them. */
1995 
1996   if (bufflength >= (size_t)bufsize && ptr > main_buffer + 2*bufthird)
1997     {
1998     if (after_context > 0 &&
1999         lastmatchnumber > 0 &&
2000         lastmatchrestart < main_buffer + bufthird)
2001       {
2002       do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
2003       lastmatchnumber = 0;
2004       }
2005 
2006     /* Now do the shuffle */
2007 
2008     memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
2009     ptr -= bufthird;
2010 
2011 #ifdef SUPPORT_LIBZ
2012     if (frtype == FR_LIBZ)
2013       bufflength = 2*bufthird +
2014         gzread (ingz, main_buffer + 2*bufthird, bufthird);
2015     else
2016 #endif
2017 
2018 #ifdef SUPPORT_LIBBZ2
2019     if (frtype == FR_LIBBZ2)
2020       bufflength = 2*bufthird +
2021         BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird);
2022     else
2023 #endif
2024 
2025     bufflength = 2*bufthird +
2026       (input_line_buffered?
2027        read_one_line(main_buffer + 2*bufthird, bufthird, in) :
2028        fread(main_buffer + 2*bufthird, 1, bufthird, in));
2029     endptr = main_buffer + bufflength;
2030 
2031     /* Adjust any last match point */
2032 
2033     if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
2034     }
2035   }     /* Loop through the whole file */
2036 
2037 /* End of file; print final "after" lines if wanted; do_after_lines sets
2038 hyphenpending if it prints something. */
2039 
2040 if (!show_only_matching && !count_only)
2041   {
2042   do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
2043   hyphenpending |= endhyphenpending;
2044   }
2045 
2046 /* Print the file name if we are looking for those without matches and there
2047 were none. If we found a match, we won't have got this far. */
2048 
2049 if (filenames == FN_NOMATCH_ONLY)
2050   {
2051   fprintf(stdout, "%s\n", printname);
2052   return 0;
2053   }
2054 
2055 /* Print the match count if wanted */
2056 
2057 if (count_only)
2058   {
2059   if (count > 0 || !omit_zero_count)
2060     {
2061     if (printname != NULL && filenames != FN_NONE)
2062       fprintf(stdout, "%s:", printname);
2063     fprintf(stdout, "%d\n", count);
2064     }
2065   }
2066 
2067 return rc;
2068 }
2069 
2070 
2071 
2072 /*************************************************
2073 *     Grep a file or recurse into a directory    *
2074 *************************************************/
2075 
2076 /* Given a path name, if it's a directory, scan all the files if we are
2077 recursing; if it's a file, grep it.
2078 
2079 Arguments:
2080   pathname          the path to investigate
2081   dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
2082   only_one_at_top   TRUE if the path is the only one at toplevel
2083 
2084 Returns:  -1 the file/directory was skipped
2085            0 if there was at least one match
2086            1 if there were no matches
2087            2 there was some kind of error
2088 
2089 However, file opening failures are suppressed if "silent" is set.
2090 */
2091 
2092 static int
grep_or_recurse(char * pathname,BOOL dir_recurse,BOOL only_one_at_top)2093 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
2094 {
2095 int rc = 1;
2096 int frtype;
2097 void *handle;
2098 char *lastcomp;
2099 FILE *in = NULL;           /* Ensure initialized */
2100 
2101 #ifdef SUPPORT_LIBZ
2102 gzFile ingz = NULL;
2103 #endif
2104 
2105 #ifdef SUPPORT_LIBBZ2
2106 BZFILE *inbz2 = NULL;
2107 #endif
2108 
2109 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
2110 int pathlen;
2111 #endif
2112 
2113 #if defined NATIVE_ZOS
2114 int zos_type;
2115 FILE *zos_test_file;
2116 #endif
2117 
2118 /* If the file name is "-" we scan stdin */
2119 
2120 if (strcmp(pathname, "-") == 0)
2121   {
2122   return pcregrep(stdin, FR_PLAIN, stdin_name,
2123     (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
2124       stdin_name : NULL);
2125   }
2126 
2127 /* Inclusion and exclusion: --include-dir and --exclude-dir apply only to
2128 directories, whereas --include and --exclude apply to everything else. The test
2129 is against the final component of the path. */
2130 
2131 lastcomp = strrchr(pathname, FILESEP);
2132 lastcomp = (lastcomp == NULL)? pathname : lastcomp + 1;
2133 
2134 /* If the file is a directory, skip if not recursing or if explicitly excluded.
2135 Otherwise, scan the directory and recurse for each path within it. The scanning
2136 code is localized so it can be made system-specific. */
2137 
2138 
2139 /* For z/OS, determine the file type. */
2140 
2141 #if defined NATIVE_ZOS
2142 zos_test_file =  fopen(pathname,"rb");
2143 
2144 if (zos_test_file == NULL)
2145    {
2146    if (!silent) fprintf(stderr, "pcregrep: failed to test next file %s\n",
2147      pathname, strerror(errno));
2148    return -1;
2149    }
2150 zos_type = identifyzosfiletype (zos_test_file);
2151 fclose (zos_test_file);
2152 
2153 /* Handle a PDS in separate code */
2154 
2155 if (zos_type == __ZOS_PDS || zos_type == __ZOS_PDSE)
2156    {
2157    return travelonpdsdir (pathname, only_one_at_top);
2158    }
2159 
2160 /* Deal with regular files in the normal way below. These types are:
2161    zos_type == __ZOS_PDS_MEMBER
2162    zos_type == __ZOS_PS
2163    zos_type == __ZOS_VSAM_KSDS
2164    zos_type == __ZOS_VSAM_ESDS
2165    zos_type == __ZOS_VSAM_RRDS
2166 */
2167 
2168 /* Handle a z/OS directory using common code. */
2169 
2170 else if (zos_type == __ZOS_HFS)
2171  {
2172 #endif  /* NATIVE_ZOS */
2173 
2174 
2175 /* Handle directories: common code for all OS */
2176 
2177 if (isdirectory(pathname))
2178   {
2179   if (dee_action == dee_SKIP ||
2180       !test_incexc(lastcomp, include_dir_patterns, exclude_dir_patterns))
2181     return -1;
2182 
2183   if (dee_action == dee_RECURSE)
2184     {
2185     char buffer[1024];
2186     char *nextfile;
2187     directory_type *dir = opendirectory(pathname);
2188 
2189     if (dir == NULL)
2190       {
2191       if (!silent)
2192         fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
2193           strerror(errno));
2194       return 2;
2195       }
2196 
2197     while ((nextfile = readdirectory(dir)) != NULL)
2198       {
2199       int frc;
2200       sprintf(buffer, "%.512s%c%.128s", pathname, FILESEP, nextfile);
2201       frc = grep_or_recurse(buffer, dir_recurse, FALSE);
2202       if (frc > 1) rc = frc;
2203        else if (frc == 0 && rc == 1) rc = 0;
2204       }
2205 
2206     closedirectory(dir);
2207     return rc;
2208     }
2209   }
2210 
2211 #if defined NATIVE_ZOS
2212  }
2213 #endif
2214 
2215 /* If the file is not a directory, check for a regular file, and if it is not,
2216 skip it if that's been requested. Otherwise, check for an explicit inclusion or
2217 exclusion. */
2218 
2219 else if (
2220 #if defined NATIVE_ZOS
2221         (zos_type == __ZOS_NOFILE && DEE_action == DEE_SKIP) ||
2222 #else  /* all other OS */
2223         (!isregfile(pathname) && DEE_action == DEE_SKIP) ||
2224 #endif
2225         !test_incexc(lastcomp, include_patterns, exclude_patterns))
2226   return -1;  /* File skipped */
2227 
2228 /* Control reaches here if we have a regular file, or if we have a directory
2229 and recursion or skipping was not requested, or if we have anything else and
2230 skipping was not requested. The scan proceeds. If this is the first and only
2231 argument at top level, we don't show the file name, unless we are only showing
2232 the file name, or the filename was forced (-H). */
2233 
2234 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
2235 pathlen = (int)(strlen(pathname));
2236 #endif
2237 
2238 /* Open using zlib if it is supported and the file name ends with .gz. */
2239 
2240 #ifdef SUPPORT_LIBZ
2241 if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
2242   {
2243   ingz = gzopen(pathname, "rb");
2244   if (ingz == NULL)
2245     {
2246     if (!silent)
2247       fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
2248         strerror(errno));
2249     return 2;
2250     }
2251   handle = (void *)ingz;
2252   frtype = FR_LIBZ;
2253   }
2254 else
2255 #endif
2256 
2257 /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
2258 
2259 #ifdef SUPPORT_LIBBZ2
2260 if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
2261   {
2262   inbz2 = BZ2_bzopen(pathname, "rb");
2263   handle = (void *)inbz2;
2264   frtype = FR_LIBBZ2;
2265   }
2266 else
2267 #endif
2268 
2269 /* Otherwise use plain fopen(). The label is so that we can come back here if
2270 an attempt to read a .bz2 file indicates that it really is a plain file. */
2271 
2272 #ifdef SUPPORT_LIBBZ2
2273 PLAIN_FILE:
2274 #endif
2275   {
2276   in = fopen(pathname, "rb");
2277   handle = (void *)in;
2278   frtype = FR_PLAIN;
2279   }
2280 
2281 /* All the opening methods return errno when they fail. */
2282 
2283 if (handle == NULL)
2284   {
2285   if (!silent)
2286     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
2287       strerror(errno));
2288   return 2;
2289   }
2290 
2291 /* Now grep the file */
2292 
2293 rc = pcregrep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
2294   (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
2295 
2296 /* Close in an appropriate manner. */
2297 
2298 #ifdef SUPPORT_LIBZ
2299 if (frtype == FR_LIBZ)
2300   gzclose(ingz);
2301 else
2302 #endif
2303 
2304 /* If it is a .bz2 file and the result is 3, it means that the first attempt to
2305 read failed. If the error indicates that the file isn't in fact bzipped, try
2306 again as a normal file. */
2307 
2308 #ifdef SUPPORT_LIBBZ2
2309 if (frtype == FR_LIBBZ2)
2310   {
2311   if (rc == 3)
2312     {
2313     int errnum;
2314     const char *err = BZ2_bzerror(inbz2, &errnum);
2315     if (errnum == BZ_DATA_ERROR_MAGIC)
2316       {
2317       BZ2_bzclose(inbz2);
2318       goto PLAIN_FILE;
2319       }
2320     else if (!silent)
2321       fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
2322         pathname, err);
2323     rc = 2;    /* The normal "something went wrong" code */
2324     }
2325   BZ2_bzclose(inbz2);
2326   }
2327 else
2328 #endif
2329 
2330 /* Normal file close */
2331 
2332 fclose(in);
2333 
2334 /* Pass back the yield from pcregrep(). */
2335 
2336 return rc;
2337 }
2338 
2339 
2340 
2341 /*************************************************
2342 *    Handle a single-letter, no data option      *
2343 *************************************************/
2344 
2345 static int
handle_option(int letter,int options)2346 handle_option(int letter, int options)
2347 {
2348 switch(letter)
2349   {
2350   case N_FOFFSETS: file_offsets = TRUE; break;
2351   case N_HELP: help(); pcregrep_exit(0);
2352   case N_LBUFFER: line_buffered = TRUE; break;
2353   case N_LOFFSETS: line_offsets = number = TRUE; break;
2354   case N_NOJIT: study_options &= ~PCRE_STUDY_JIT_COMPILE; break;
2355   case 'a': binary_files = BIN_TEXT; break;
2356   case 'c': count_only = TRUE; break;
2357   case 'F': process_options |= PO_FIXED_STRINGS; break;
2358   case 'H': filenames = FN_FORCE; break;
2359   case 'I': binary_files = BIN_NOMATCH; break;
2360   case 'h': filenames = FN_NONE; break;
2361   case 'i': options |= PCRE_CASELESS; break;
2362   case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
2363   case 'L': filenames = FN_NOMATCH_ONLY; break;
2364   case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
2365   case 'n': number = TRUE; break;
2366 
2367   case 'o':
2368   only_matching_last = add_number(0, only_matching_last);
2369   if (only_matching == NULL) only_matching = only_matching_last;
2370   break;
2371 
2372   case 'q': quiet = TRUE; break;
2373   case 'r': dee_action = dee_RECURSE; break;
2374   case 's': silent = TRUE; break;
2375   case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
2376   case 'v': invert = TRUE; break;
2377   case 'w': process_options |= PO_WORD_MATCH; break;
2378   case 'x': process_options |= PO_LINE_MATCH; break;
2379 
2380   case 'V':
2381   fprintf(stdout, "pcregrep version %s\n", pcre_version());
2382   pcregrep_exit(0);
2383   break;
2384 
2385   default:
2386   fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
2387   pcregrep_exit(usage(2));
2388   }
2389 
2390 return options;
2391 }
2392 
2393 
2394 
2395 
2396 /*************************************************
2397 *          Construct printed ordinal             *
2398 *************************************************/
2399 
2400 /* This turns a number into "1st", "3rd", etc. */
2401 
2402 static char *
ordin(int n)2403 ordin(int n)
2404 {
2405 static char buffer[8];
2406 char *p = buffer;
2407 sprintf(p, "%d", n);
2408 while (*p != 0) p++;
2409 switch (n%10)
2410   {
2411   case 1: strcpy(p, "st"); break;
2412   case 2: strcpy(p, "nd"); break;
2413   case 3: strcpy(p, "rd"); break;
2414   default: strcpy(p, "th"); break;
2415   }
2416 return buffer;
2417 }
2418 
2419 
2420 
2421 /*************************************************
2422 *          Compile a single pattern              *
2423 *************************************************/
2424 
2425 /* Do nothing if the pattern has already been compiled. This is the case for
2426 include/exclude patterns read from a file.
2427 
2428 When the -F option has been used, each "pattern" may be a list of strings,
2429 separated by line breaks. They will be matched literally. We split such a
2430 string and compile the first substring, inserting an additional block into the
2431 pattern chain.
2432 
2433 Arguments:
2434   p              points to the pattern block
2435   options        the PCRE options
2436   popts          the processing options
2437   fromfile       TRUE if the pattern was read from a file
2438   fromtext       file name or identifying text (e.g. "include")
2439   count          0 if this is the only command line pattern, or
2440                  number of the command line pattern, or
2441                  linenumber for a pattern from a file
2442 
2443 Returns:         TRUE on success, FALSE after an error
2444 */
2445 
2446 static BOOL
compile_pattern(patstr * p,int options,int popts,int fromfile,const char * fromtext,int count)2447 compile_pattern(patstr *p, int options, int popts, int fromfile,
2448   const char *fromtext, int count)
2449 {
2450 char buffer[PATBUFSIZE];
2451 const char *error;
2452 char *ps = p->string;
2453 int patlen = strlen(ps);
2454 int errptr;
2455 
2456 if (p->compiled != NULL) return TRUE;
2457 
2458 if ((popts & PO_FIXED_STRINGS) != 0)
2459   {
2460   int ellength;
2461   char *eop = ps + patlen;
2462   char *pe = end_of_line(ps, eop, &ellength);
2463 
2464   if (ellength != 0)
2465     {
2466     if (add_pattern(pe, p) == NULL) return FALSE;
2467     patlen = (int)(pe - ps - ellength);
2468     }
2469   }
2470 
2471 sprintf(buffer, "%s%.*s%s", prefix[popts], patlen, ps, suffix[popts]);
2472 p->compiled = pcre_compile(buffer, options, &error, &errptr, pcretables);
2473 if (p->compiled != NULL) return TRUE;
2474 
2475 /* Handle compile errors */
2476 
2477 errptr -= (int)strlen(prefix[popts]);
2478 if (errptr > patlen) errptr = patlen;
2479 
2480 if (fromfile)
2481   {
2482   fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
2483     "at offset %d: %s\n", count, fromtext, errptr, error);
2484   }
2485 else
2486   {
2487   if (count == 0)
2488     fprintf(stderr, "pcregrep: Error in %s regex at offset %d: %s\n",
2489       fromtext, errptr, error);
2490   else
2491     fprintf(stderr, "pcregrep: Error in %s %s regex at offset %d: %s\n",
2492       ordin(count), fromtext, errptr, error);
2493   }
2494 
2495 return FALSE;
2496 }
2497 
2498 
2499 
2500 /*************************************************
2501 *     Read and compile a file of patterns        *
2502 *************************************************/
2503 
2504 /* This is used for --filelist, --include-from, and --exclude-from.
2505 
2506 Arguments:
2507   name         the name of the file; "-" is stdin
2508   patptr       pointer to the pattern chain anchor
2509   patlastptr   pointer to the last pattern pointer
2510   popts        the process options to pass to pattern_compile()
2511 
2512 Returns:       TRUE if all went well
2513 */
2514 
2515 static BOOL
read_pattern_file(char * name,patstr ** patptr,patstr ** patlastptr,int popts)2516 read_pattern_file(char *name, patstr **patptr, patstr **patlastptr, int popts)
2517 {
2518 int linenumber = 0;
2519 FILE *f;
2520 char *filename;
2521 char buffer[PATBUFSIZE];
2522 
2523 if (strcmp(name, "-") == 0)
2524   {
2525   f = stdin;
2526   filename = stdin_name;
2527   }
2528 else
2529   {
2530   f = fopen(name, "r");
2531   if (f == NULL)
2532     {
2533     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", name, strerror(errno));
2534     return FALSE;
2535     }
2536   filename = name;
2537   }
2538 
2539 while (fgets(buffer, PATBUFSIZE, f) != NULL)
2540   {
2541   char *s = buffer + (int)strlen(buffer);
2542   while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2543   *s = 0;
2544   linenumber++;
2545   if (buffer[0] == 0) continue;   /* Skip blank lines */
2546 
2547   /* Note: this call to add_pattern() puts a pointer to the local variable
2548   "buffer" into the pattern chain. However, that pointer is used only when
2549   compiling the pattern, which happens immediately below, so we flatten it
2550   afterwards, as a precaution against any later code trying to use it. */
2551 
2552   *patlastptr = add_pattern(buffer, *patlastptr);
2553   if (*patlastptr == NULL)
2554     {
2555     if (f != stdin) fclose(f);
2556     return FALSE;
2557     }
2558   if (*patptr == NULL) *patptr = *patlastptr;
2559 
2560   /* This loop is needed because compiling a "pattern" when -F is set may add
2561   on additional literal patterns if the original contains a newline. In the
2562   common case, it never will, because fgets() stops at a newline. However,
2563   the -N option can be used to give pcregrep a different newline setting. */
2564 
2565   for(;;)
2566     {
2567     if (!compile_pattern(*patlastptr, pcre_options, popts, TRUE, filename,
2568         linenumber))
2569       {
2570       if (f != stdin) fclose(f);
2571       return FALSE;
2572       }
2573     (*patlastptr)->string = NULL;            /* Insurance */
2574     if ((*patlastptr)->next == NULL) break;
2575     *patlastptr = (*patlastptr)->next;
2576     }
2577   }
2578 
2579 if (f != stdin) fclose(f);
2580 return TRUE;
2581 }
2582 
2583 
2584 
2585 /*************************************************
2586 *                Main program                    *
2587 *************************************************/
2588 
2589 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
2590 
2591 int
main(int argc,char ** argv)2592 main(int argc, char **argv)
2593 {
2594 int i, j;
2595 int rc = 1;
2596 BOOL only_one_at_top;
2597 patstr *cp;
2598 fnstr *fn;
2599 const char *locale_from = "--locale";
2600 const char *error;
2601 
2602 #ifdef SUPPORT_PCREGREP_JIT
2603 pcre_jit_stack *jit_stack = NULL;
2604 #endif
2605 
2606 /* Set the default line ending value from the default in the PCRE library;
2607 "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2608 Note that the return values from pcre_config(), though derived from the ASCII
2609 codes, are the same in EBCDIC environments, so we must use the actual values
2610 rather than escapes such as as '\r'. */
2611 
2612 (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2613 switch(i)
2614   {
2615   default:               newline = (char *)"lf"; break;
2616   case 13:               newline = (char *)"cr"; break;
2617   case (13 << 8) | 10:   newline = (char *)"crlf"; break;
2618   case -1:               newline = (char *)"any"; break;
2619   case -2:               newline = (char *)"anycrlf"; break;
2620   }
2621 
2622 /* Process the options */
2623 
2624 for (i = 1; i < argc; i++)
2625   {
2626   option_item *op = NULL;
2627   char *option_data = (char *)"";    /* default to keep compiler happy */
2628   BOOL longop;
2629   BOOL longopwasequals = FALSE;
2630 
2631   if (argv[i][0] != '-') break;
2632 
2633   /* If we hit an argument that is just "-", it may be a reference to STDIN,
2634   but only if we have previously had -e or -f to define the patterns. */
2635 
2636   if (argv[i][1] == 0)
2637     {
2638     if (pattern_files != NULL || patterns != NULL) break;
2639       else pcregrep_exit(usage(2));
2640     }
2641 
2642   /* Handle a long name option, or -- to terminate the options */
2643 
2644   if (argv[i][1] == '-')
2645     {
2646     char *arg = argv[i] + 2;
2647     char *argequals = strchr(arg, '=');
2648 
2649     if (*arg == 0)    /* -- terminates options */
2650       {
2651       i++;
2652       break;                /* out of the options-handling loop */
2653       }
2654 
2655     longop = TRUE;
2656 
2657     /* Some long options have data that follows after =, for example file=name.
2658     Some options have variations in the long name spelling: specifically, we
2659     allow "regexp" because GNU grep allows it, though I personally go along
2660     with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2661     These options are entered in the table as "regex(p)". Options can be in
2662     both these categories. */
2663 
2664     for (op = optionlist; op->one_char != 0; op++)
2665       {
2666       char *opbra = strchr(op->long_name, '(');
2667       char *equals = strchr(op->long_name, '=');
2668 
2669       /* Handle options with only one spelling of the name */
2670 
2671       if (opbra == NULL)     /* Does not contain '(' */
2672         {
2673         if (equals == NULL)  /* Not thing=data case */
2674           {
2675           if (strcmp(arg, op->long_name) == 0) break;
2676           }
2677         else                 /* Special case xxx=data */
2678           {
2679           int oplen = (int)(equals - op->long_name);
2680           int arglen = (argequals == NULL)?
2681             (int)strlen(arg) : (int)(argequals - arg);
2682           if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2683             {
2684             option_data = arg + arglen;
2685             if (*option_data == '=')
2686               {
2687               option_data++;
2688               longopwasequals = TRUE;
2689               }
2690             break;
2691             }
2692           }
2693         }
2694 
2695       /* Handle options with an alternate spelling of the name */
2696 
2697       else
2698         {
2699         char buff1[24];
2700         char buff2[24];
2701 
2702         int baselen = (int)(opbra - op->long_name);
2703         int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2704         int arglen = (argequals == NULL || equals == NULL)?
2705           (int)strlen(arg) : (int)(argequals - arg);
2706 
2707         sprintf(buff1, "%.*s", baselen, op->long_name);
2708         sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2709 
2710         if (strncmp(arg, buff1, arglen) == 0 ||
2711            strncmp(arg, buff2, arglen) == 0)
2712           {
2713           if (equals != NULL && argequals != NULL)
2714             {
2715             option_data = argequals;
2716             if (*option_data == '=')
2717               {
2718               option_data++;
2719               longopwasequals = TRUE;
2720               }
2721             }
2722           break;
2723           }
2724         }
2725       }
2726 
2727     if (op->one_char == 0)
2728       {
2729       fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2730       pcregrep_exit(usage(2));
2731       }
2732     }
2733 
2734   /* Jeffrey Friedl's debugging harness uses these additional options which
2735   are not in the right form for putting in the option table because they use
2736   only one hyphen, yet are more than one character long. By putting them
2737   separately here, they will not get displayed as part of the help() output,
2738   but I don't think Jeffrey will care about that. */
2739 
2740 #ifdef JFRIEDL_DEBUG
2741   else if (strcmp(argv[i], "-pre") == 0) {
2742           jfriedl_prefix = argv[++i];
2743           continue;
2744   } else if (strcmp(argv[i], "-post") == 0) {
2745           jfriedl_postfix = argv[++i];
2746           continue;
2747   } else if (strcmp(argv[i], "-XT") == 0) {
2748           sscanf(argv[++i], "%d", &jfriedl_XT);
2749           continue;
2750   } else if (strcmp(argv[i], "-XR") == 0) {
2751           sscanf(argv[++i], "%d", &jfriedl_XR);
2752           continue;
2753   }
2754 #endif
2755 
2756 
2757   /* One-char options; many that have no data may be in a single argument; we
2758   continue till we hit the last one or one that needs data. */
2759 
2760   else
2761     {
2762     char *s = argv[i] + 1;
2763     longop = FALSE;
2764 
2765     while (*s != 0)
2766       {
2767       for (op = optionlist; op->one_char != 0; op++)
2768         {
2769         if (*s == op->one_char) break;
2770         }
2771       if (op->one_char == 0)
2772         {
2773         fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2774           *s, argv[i]);
2775         pcregrep_exit(usage(2));
2776         }
2777 
2778       option_data = s+1;
2779 
2780       /* Break out if this is the last character in the string; it's handled
2781       below like a single multi-char option. */
2782 
2783       if (*option_data == 0) break;
2784 
2785       /* Check for a single-character option that has data: OP_OP_NUMBER(S)
2786       are used for ones that either have a numerical number or defaults, i.e.
2787       the data is optional. If a digit follows, there is data; if not, carry on
2788       with other single-character options in the same string. */
2789 
2790       if (op->type == OP_OP_NUMBER || op->type == OP_OP_NUMBERS)
2791         {
2792         if (isdigit((unsigned char)s[1])) break;
2793         }
2794       else   /* Check for an option with data */
2795         {
2796         if (op->type != OP_NODATA) break;
2797         }
2798 
2799       /* Handle a single-character option with no data, then loop for the
2800       next character in the string. */
2801 
2802       pcre_options = handle_option(*s++, pcre_options);
2803       }
2804     }
2805 
2806   /* At this point we should have op pointing to a matched option. If the type
2807   is NO_DATA, it means that there is no data, and the option might set
2808   something in the PCRE options. */
2809 
2810   if (op->type == OP_NODATA)
2811     {
2812     pcre_options = handle_option(op->one_char, pcre_options);
2813     continue;
2814     }
2815 
2816   /* If the option type is OP_OP_STRING or OP_OP_NUMBER(S), it's an option that
2817   either has a value or defaults to something. It cannot have data in a
2818   separate item. At the moment, the only such options are "colo(u)r",
2819   "only-matching", and Jeffrey Friedl's special -S debugging option. */
2820 
2821   if (*option_data == 0 &&
2822       (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER ||
2823        op->type == OP_OP_NUMBERS))
2824     {
2825     switch (op->one_char)
2826       {
2827       case N_COLOUR:
2828       colour_option = (char *)"auto";
2829       break;
2830 
2831       case 'o':
2832       only_matching_last = add_number(0, only_matching_last);
2833       if (only_matching == NULL) only_matching = only_matching_last;
2834       break;
2835 
2836 #ifdef JFRIEDL_DEBUG
2837       case 'S':
2838       S_arg = 0;
2839       break;
2840 #endif
2841       }
2842     continue;
2843     }
2844 
2845   /* Otherwise, find the data string for the option. */
2846 
2847   if (*option_data == 0)
2848     {
2849     if (i >= argc - 1 || longopwasequals)
2850       {
2851       fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2852       pcregrep_exit(usage(2));
2853       }
2854     option_data = argv[++i];
2855     }
2856 
2857   /* If the option type is OP_OP_NUMBERS, the value is a number that is to be
2858   added to a chain of numbers. */
2859 
2860   if (op->type == OP_OP_NUMBERS)
2861     {
2862     unsigned long int n = decode_number(option_data, op, longop);
2863     omdatastr *omd = (omdatastr *)op->dataptr;
2864     *(omd->lastptr) = add_number((int)n, *(omd->lastptr));
2865     if (*(omd->anchor) == NULL) *(omd->anchor) = *(omd->lastptr);
2866     }
2867 
2868   /* If the option type is OP_PATLIST, it's the -e option, or one of the
2869   include/exclude options, which can be called multiple times to create lists
2870   of patterns. */
2871 
2872   else if (op->type == OP_PATLIST)
2873     {
2874     patdatastr *pd = (patdatastr *)op->dataptr;
2875     *(pd->lastptr) = add_pattern(option_data, *(pd->lastptr));
2876     if (*(pd->lastptr) == NULL) goto EXIT2;
2877     if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr);
2878     }
2879 
2880   /* If the option type is OP_FILELIST, it's one of the options that names a
2881   file. */
2882 
2883   else if (op->type == OP_FILELIST)
2884     {
2885     fndatastr *fd = (fndatastr *)op->dataptr;
2886     fn = (fnstr *)malloc(sizeof(fnstr));
2887     if (fn == NULL)
2888       {
2889       fprintf(stderr, "pcregrep: malloc failed\n");
2890       goto EXIT2;
2891       }
2892     fn->next = NULL;
2893     fn->name = option_data;
2894     if (*(fd->anchor) == NULL)
2895       *(fd->anchor) = fn;
2896     else
2897       (*(fd->lastptr))->next = fn;
2898     *(fd->lastptr) = fn;
2899     }
2900 
2901   /* Handle OP_BINARY_FILES */
2902 
2903   else if (op->type == OP_BINFILES)
2904     {
2905     if (strcmp(option_data, "binary") == 0)
2906       binary_files = BIN_BINARY;
2907     else if (strcmp(option_data, "without-match") == 0)
2908       binary_files = BIN_NOMATCH;
2909     else if (strcmp(option_data, "text") == 0)
2910       binary_files = BIN_TEXT;
2911     else
2912       {
2913       fprintf(stderr, "pcregrep: unknown value \"%s\" for binary-files\n",
2914         option_data);
2915       pcregrep_exit(usage(2));
2916       }
2917     }
2918 
2919   /* Otherwise, deal with a single string or numeric data value. */
2920 
2921   else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
2922            op->type != OP_OP_NUMBER)
2923     {
2924     *((char **)op->dataptr) = option_data;
2925     }
2926   else
2927     {
2928     unsigned long int n = decode_number(option_data, op, longop);
2929     if (op->type == OP_LONGNUMBER) *((unsigned long int *)op->dataptr) = n;
2930       else *((int *)op->dataptr) = n;
2931     }
2932   }
2933 
2934 /* Options have been decoded. If -C was used, its value is used as a default
2935 for -A and -B. */
2936 
2937 if (both_context > 0)
2938   {
2939   if (after_context == 0) after_context = both_context;
2940   if (before_context == 0) before_context = both_context;
2941   }
2942 
2943 /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2944 However, all three set show_only_matching because they display, each in their
2945 own way, only the data that has matched. */
2946 
2947 if ((only_matching != NULL && (file_offsets || line_offsets)) ||
2948     (file_offsets && line_offsets))
2949   {
2950   fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2951     "and/or --line-offsets\n");
2952   pcregrep_exit(usage(2));
2953   }
2954 
2955 if (only_matching != NULL || file_offsets || line_offsets)
2956   show_only_matching = TRUE;
2957 
2958 /* If a locale has not been provided as an option, see if the LC_CTYPE or
2959 LC_ALL environment variable is set, and if so, use it. */
2960 
2961 if (locale == NULL)
2962   {
2963   locale = getenv("LC_ALL");
2964   locale_from = "LCC_ALL";
2965   }
2966 
2967 if (locale == NULL)
2968   {
2969   locale = getenv("LC_CTYPE");
2970   locale_from = "LC_CTYPE";
2971   }
2972 
2973 /* If a locale is set, use it to generate the tables the PCRE needs. Otherwise,
2974 pcretables==NULL, which causes the use of default tables. */
2975 
2976 if (locale != NULL)
2977   {
2978   if (setlocale(LC_CTYPE, locale) == NULL)
2979     {
2980     fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2981       locale, locale_from);
2982     goto EXIT2;
2983     }
2984   pcretables = pcre_maketables();
2985   }
2986 
2987 /* Sort out colouring */
2988 
2989 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2990   {
2991   if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2992   else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2993   else
2994     {
2995     fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2996       colour_option);
2997     goto EXIT2;
2998     }
2999   if (do_colour)
3000     {
3001     char *cs = getenv("PCREGREP_COLOUR");
3002     if (cs == NULL) cs = getenv("PCREGREP_COLOR");
3003     if (cs != NULL) colour_string = cs;
3004     }
3005   }
3006 
3007 /* Interpret the newline type; the default settings are Unix-like. */
3008 
3009 if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
3010   {
3011   pcre_options |= PCRE_NEWLINE_CR;
3012   endlinetype = EL_CR;
3013   }
3014 else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
3015   {
3016   pcre_options |= PCRE_NEWLINE_LF;
3017   endlinetype = EL_LF;
3018   }
3019 else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
3020   {
3021   pcre_options |= PCRE_NEWLINE_CRLF;
3022   endlinetype = EL_CRLF;
3023   }
3024 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
3025   {
3026   pcre_options |= PCRE_NEWLINE_ANY;
3027   endlinetype = EL_ANY;
3028   }
3029 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
3030   {
3031   pcre_options |= PCRE_NEWLINE_ANYCRLF;
3032   endlinetype = EL_ANYCRLF;
3033   }
3034 else
3035   {
3036   fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
3037   goto EXIT2;
3038   }
3039 
3040 /* Interpret the text values for -d and -D */
3041 
3042 if (dee_option != NULL)
3043   {
3044   if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
3045   else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
3046   else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
3047   else
3048     {
3049     fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
3050     goto EXIT2;
3051     }
3052   }
3053 
3054 if (DEE_option != NULL)
3055   {
3056   if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
3057   else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
3058   else
3059     {
3060     fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
3061     goto EXIT2;
3062     }
3063   }
3064 
3065 /* Check the values for Jeffrey Friedl's debugging options. */
3066 
3067 #ifdef JFRIEDL_DEBUG
3068 if (S_arg > 9)
3069   {
3070   fprintf(stderr, "pcregrep: bad value for -S option\n");
3071   return 2;
3072   }
3073 if (jfriedl_XT != 0 || jfriedl_XR != 0)
3074   {
3075   if (jfriedl_XT == 0) jfriedl_XT = 1;
3076   if (jfriedl_XR == 0) jfriedl_XR = 1;
3077   }
3078 #endif
3079 
3080 /* Get memory for the main buffer. */
3081 
3082 bufsize = 3*bufthird;
3083 main_buffer = (char *)malloc(bufsize);
3084 
3085 if (main_buffer == NULL)
3086   {
3087   fprintf(stderr, "pcregrep: malloc failed\n");
3088   goto EXIT2;
3089   }
3090 
3091 /* If no patterns were provided by -e, and there are no files provided by -f,
3092 the first argument is the one and only pattern, and it must exist. */
3093 
3094 if (patterns == NULL && pattern_files == NULL)
3095   {
3096   if (i >= argc) return usage(2);
3097   patterns = patterns_last = add_pattern(argv[i++], NULL);
3098   if (patterns == NULL) goto EXIT2;
3099   }
3100 
3101 /* Compile the patterns that were provided on the command line, either by
3102 multiple uses of -e or as a single unkeyed pattern. We cannot do this until
3103 after all the command-line options are read so that we know which PCRE options
3104 to use. When -F is used, compile_pattern() may add another block into the
3105 chain, so we must not access the next pointer till after the compile. */
3106 
3107 for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
3108   {
3109   if (!compile_pattern(cp, pcre_options, process_options, FALSE, "command-line",
3110        (j == 1 && patterns->next == NULL)? 0 : j))
3111     goto EXIT2;
3112   }
3113 
3114 /* Read and compile the regular expressions that are provided in files. */
3115 
3116 for (fn = pattern_files; fn != NULL; fn = fn->next)
3117   {
3118   if (!read_pattern_file(fn->name, &patterns, &patterns_last, process_options))
3119     goto EXIT2;
3120   }
3121 
3122 /* Study the regular expressions, as we will be running them many times. If an
3123 extra block is needed for a limit, set PCRE_STUDY_EXTRA_NEEDED so that one is
3124 returned, even if studying produces no data. */
3125 
3126 if (match_limit > 0 || match_limit_recursion > 0)
3127   study_options |= PCRE_STUDY_EXTRA_NEEDED;
3128 
3129 /* Unless JIT has been explicitly disabled, arrange a stack for it to use. */
3130 
3131 #ifdef SUPPORT_PCREGREP_JIT
3132 if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3133   jit_stack = pcre_jit_stack_alloc(32*1024, 1024*1024);
3134 #endif
3135 
3136 for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
3137   {
3138   cp->hint = pcre_study(cp->compiled, study_options, &error);
3139   if (error != NULL)
3140     {
3141     char s[16];
3142     if (patterns->next == NULL) s[0] = 0; else sprintf(s, " number %d", j);
3143     fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
3144     goto EXIT2;
3145     }
3146 #ifdef SUPPORT_PCREGREP_JIT
3147   if (jit_stack != NULL && cp->hint != NULL)
3148     pcre_assign_jit_stack(cp->hint, NULL, jit_stack);
3149 #endif
3150   }
3151 
3152 /* If --match-limit or --recursion-limit was set, put the value(s) into the
3153 pcre_extra block for each pattern. There will always be an extra block because
3154 of the use of PCRE_STUDY_EXTRA_NEEDED above. */
3155 
3156 for (cp = patterns; cp != NULL; cp = cp->next)
3157   {
3158   if (match_limit > 0)
3159     {
3160     cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT;
3161     cp->hint->match_limit = match_limit;
3162     }
3163 
3164   if (match_limit_recursion > 0)
3165     {
3166     cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
3167     cp->hint->match_limit_recursion = match_limit_recursion;
3168     }
3169   }
3170 
3171 /* If there are include or exclude patterns read from the command line, compile
3172 them. -F, -w, and -x do not apply, so the third argument of compile_pattern is
3173 0. */
3174 
3175 for (j = 0; j < 4; j++)
3176   {
3177   int k;
3178   for (k = 1, cp = *(incexlist[j]); cp != NULL; k++, cp = cp->next)
3179     {
3180     if (!compile_pattern(cp, pcre_options, 0, FALSE, incexname[j],
3181          (k == 1 && cp->next == NULL)? 0 : k))
3182       goto EXIT2;
3183     }
3184   }
3185 
3186 /* Read and compile include/exclude patterns from files. */
3187 
3188 for (fn = include_from; fn != NULL; fn = fn->next)
3189   {
3190   if (!read_pattern_file(fn->name, &include_patterns, &include_patterns_last, 0))
3191     goto EXIT2;
3192   }
3193 
3194 for (fn = exclude_from; fn != NULL; fn = fn->next)
3195   {
3196   if (!read_pattern_file(fn->name, &exclude_patterns, &exclude_patterns_last, 0))
3197     goto EXIT2;
3198   }
3199 
3200 /* If there are no files that contain lists of files to search, and there are
3201 no file arguments, search stdin, and then exit. */
3202 
3203 if (file_lists == NULL && i >= argc)
3204   {
3205   rc = pcregrep(stdin, FR_PLAIN, stdin_name,
3206     (filenames > FN_DEFAULT)? stdin_name : NULL);
3207   goto EXIT;
3208   }
3209 
3210 /* If any files that contains a list of files to search have been specified,
3211 read them line by line and search the given files. */
3212 
3213 for (fn = file_lists; fn != NULL; fn = fn->next)
3214   {
3215   char buffer[PATBUFSIZE];
3216   FILE *fl;
3217   if (strcmp(fn->name, "-") == 0) fl = stdin; else
3218     {
3219     fl = fopen(fn->name, "rb");
3220     if (fl == NULL)
3221       {
3222       fprintf(stderr, "pcregrep: Failed to open %s: %s\n", fn->name,
3223         strerror(errno));
3224       goto EXIT2;
3225       }
3226     }
3227   while (fgets(buffer, PATBUFSIZE, fl) != NULL)
3228     {
3229     int frc;
3230     char *end = buffer + (int)strlen(buffer);
3231     while (end > buffer && isspace(end[-1])) end--;
3232     *end = 0;
3233     if (*buffer != 0)
3234       {
3235       frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);
3236       if (frc > 1) rc = frc;
3237         else if (frc == 0 && rc == 1) rc = 0;
3238       }
3239     }
3240   if (fl != stdin) fclose(fl);
3241   }
3242 
3243 /* After handling file-list, work through remaining arguments. Pass in the fact
3244 that there is only one argument at top level - this suppresses the file name if
3245 the argument is not a directory and filenames are not otherwise forced. */
3246 
3247 only_one_at_top = i == argc - 1 && file_lists == NULL;
3248 
3249 for (; i < argc; i++)
3250   {
3251   int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
3252     only_one_at_top);
3253   if (frc > 1) rc = frc;
3254     else if (frc == 0 && rc == 1) rc = 0;
3255   }
3256 
3257 EXIT:
3258 #ifdef SUPPORT_PCREGREP_JIT
3259 if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
3260 #endif
3261 
3262 free(main_buffer);
3263 free((void *)pcretables);
3264 
3265 free_pattern_chain(patterns);
3266 free_pattern_chain(include_patterns);
3267 free_pattern_chain(include_dir_patterns);
3268 free_pattern_chain(exclude_patterns);
3269 free_pattern_chain(exclude_dir_patterns);
3270 
3271 free_file_chain(exclude_from);
3272 free_file_chain(include_from);
3273 free_file_chain(pattern_files);
3274 free_file_chain(file_lists);
3275 
3276 while (only_matching != NULL)
3277   {
3278   omstr *this = only_matching;
3279   only_matching = this->next;
3280   free(this);
3281   }
3282 
3283 pcregrep_exit(rc);
3284 
3285 EXIT2:
3286 rc = 2;
3287 goto EXIT;
3288 }
3289 
3290 /* End of pcregrep */
3291