• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*************************************************
2 *               pcre2grep program                *
3 *************************************************/
4 
5 /* This is a grep program that uses the 8-bit PCRE regular expression library
6 via the PCRE2 updated API to do its pattern matching. On Unix-like, Windows,
7 and native z/OS systems it can recurse into directories, and in z/OS it can
8 handle PDS files.
9 
10 Note that for native z/OS, in addition to defining the NATIVE_ZOS macro, an
11 additional header is required. That header is not included in the main PCRE2
12 distribution because other apparatus is needed to compile pcre2grep for z/OS.
13 The header can be found in the special z/OS distribution, which is available
14 from www.zaconsultants.net or from www.cbttape.org.
15 
16            Copyright (c) 1997-2016 University of Cambridge
17 
18 -----------------------------------------------------------------------------
19 Redistribution and use in source and binary forms, with or without
20 modification, are permitted provided that the following conditions are met:
21 
22     * Redistributions of source code must retain the above copyright notice,
23       this list of conditions and the following disclaimer.
24 
25     * Redistributions in binary form must reproduce the above copyright
26       notice, this list of conditions and the following disclaimer in the
27       documentation and/or other materials provided with the distribution.
28 
29     * Neither the name of the University of Cambridge nor the names of its
30       contributors may be used to endorse or promote products derived from
31       this software without specific prior written permission.
32 
33 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
34 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
35 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
37 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
38 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
39 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
40 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
41 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
43 POSSIBILITY OF SUCH DAMAGE.
44 -----------------------------------------------------------------------------
45 */
46 
47 #ifdef HAVE_CONFIG_H
48 #include "config.h"
49 #endif
50 
51 #include <ctype.h>
52 #include <locale.h>
53 #include <stdio.h>
54 #include <string.h>
55 #include <stdlib.h>
56 #include <errno.h>
57 
58 #include <sys/types.h>
59 #include <sys/stat.h>
60 
61 #if defined(_WIN32) || defined(WIN32)
62 #include <io.h>                /* For _setmode() */
63 #include <fcntl.h>             /* For _O_BINARY */
64 #endif
65 
66 #ifdef SUPPORT_PCRE2GREP_CALLOUT
67 #include <sys/wait.h>
68 #endif
69 
70 #ifdef HAVE_UNISTD_H
71 #include <unistd.h>
72 #endif
73 
74 #ifdef SUPPORT_LIBZ
75 #include <zlib.h>
76 #endif
77 
78 #ifdef SUPPORT_LIBBZ2
79 #include <bzlib.h>
80 #endif
81 
82 #define PCRE2_CODE_UNIT_WIDTH 8
83 #include "pcre2.h"
84 
85 #define FALSE 0
86 #define TRUE 1
87 
88 typedef int BOOL;
89 
90 #define OFFSET_SIZE 33
91 
92 #if BUFSIZ > 8192
93 #define MAXPATLEN BUFSIZ
94 #else
95 #define MAXPATLEN 8192
96 #endif
97 
98 #define PATBUFSIZE (MAXPATLEN + 10)   /* Allows for prefix+suffix */
99 
100 /* Values for the "filenames" variable, which specifies options for file name
101 output. The order is important; it is assumed that a file name is wanted for
102 all values greater than FN_DEFAULT. */
103 
104 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
105 
106 /* File reading styles */
107 
108 enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
109 
110 /* Actions for the -d and -D options */
111 
112 enum { dee_READ, dee_SKIP, dee_RECURSE };
113 enum { DEE_READ, DEE_SKIP };
114 
115 /* Actions for special processing options (flag bits) */
116 
117 #define PO_WORD_MATCH     0x0001
118 #define PO_LINE_MATCH     0x0002
119 #define PO_FIXED_STRINGS  0x0004
120 
121 /* Binary file options */
122 
123 enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT };
124 
125 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
126 environments), a warning is issued if the value of fwrite() is ignored.
127 Unfortunately, casting to (void) does not suppress the warning. To get round
128 this, we use a macro that compiles a fudge. Oddly, this does not also seem to
129 apply to fprintf(). */
130 
131 #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
132 
133 /* Under Windows, we have to set stdout to be binary, so that it does not
134 convert \r\n at the ends of output lines to \r\r\n. However, that means that
135 any messages written to stdout must have \r\n as their line terminator. This is
136 handled by using STDOUT_NL as the newline string. */
137 
138 #if defined(_WIN32) || defined(WIN32)
139 #define STDOUT_NL  "\r\n"
140 #else
141 #define STDOUT_NL  "\n"
142 #endif
143 
144 
145 
146 /*************************************************
147 *               Global variables                 *
148 *************************************************/
149 
150 /* Jeffrey Friedl has some debugging requirements that are not part of the
151 regular code. */
152 
153 #ifdef JFRIEDL_DEBUG
154 static int S_arg = -1;
155 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
156 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
157 static const char *jfriedl_prefix = "";
158 static const char *jfriedl_postfix = "";
159 #endif
160 
161 static char *colour_string = (char *)"1;31";
162 static char *colour_option = NULL;
163 static char *dee_option = NULL;
164 static char *DEE_option = NULL;
165 static char *locale = NULL;
166 static char *main_buffer = NULL;
167 static char *newline_arg = NULL;
168 static char *om_separator = (char *)"";
169 static char *stdin_name = (char *)"(standard input)";
170 
171 static int after_context = 0;
172 static int before_context = 0;
173 static int binary_files = BIN_BINARY;
174 static int both_context = 0;
175 static int bufthird = PCRE2GREP_BUFSIZE;
176 static int bufsize = 3*PCRE2GREP_BUFSIZE;
177 static int endlinetype;
178 
179 #if defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
180 static int dee_action = dee_SKIP;
181 #else
182 static int dee_action = dee_READ;
183 #endif
184 static int DEE_action = DEE_READ;
185 static int error_count = 0;
186 static int filenames = FN_DEFAULT;
187 
188 #ifdef SUPPORT_PCRE2GREP_JIT
189 static BOOL use_jit = TRUE;
190 #else
191 static BOOL use_jit = FALSE;
192 #endif
193 
194 static const uint8_t *character_tables = NULL;
195 
196 static uint32_t pcre2_options = 0;
197 static uint32_t process_options = 0;
198 static uint32_t match_limit = 0;
199 static uint32_t recursion_limit = 0;
200 
201 static pcre2_compile_context *compile_context;
202 static pcre2_match_context *match_context;
203 static pcre2_match_data *match_data;
204 static PCRE2_SIZE *offsets;
205 
206 static BOOL count_only = FALSE;
207 static BOOL do_colour = FALSE;
208 static BOOL file_offsets = FALSE;
209 static BOOL hyphenpending = FALSE;
210 static BOOL invert = FALSE;
211 static BOOL line_buffered = FALSE;
212 static BOOL line_offsets = FALSE;
213 static BOOL multiline = FALSE;
214 static BOOL number = FALSE;
215 static BOOL omit_zero_count = FALSE;
216 static BOOL resource_error = FALSE;
217 static BOOL quiet = FALSE;
218 static BOOL show_only_matching = FALSE;
219 static BOOL silent = FALSE;
220 static BOOL utf = FALSE;
221 
222 /* Structure for list of --only-matching capturing numbers. */
223 
224 typedef struct omstr {
225   struct omstr *next;
226   int groupnum;
227 } omstr;
228 
229 static omstr *only_matching = NULL;
230 static omstr *only_matching_last = NULL;
231 
232 /* Structure for holding the two variables that describe a number chain. */
233 
234 typedef struct omdatastr {
235   omstr **anchor;
236   omstr **lastptr;
237 } omdatastr;
238 
239 static omdatastr only_matching_data = { &only_matching, &only_matching_last };
240 
241 /* Structure for list of file names (for -f and --{in,ex}clude-from) */
242 
243 typedef struct fnstr {
244   struct fnstr *next;
245   char *name;
246 } fnstr;
247 
248 static fnstr *exclude_from = NULL;
249 static fnstr *exclude_from_last = NULL;
250 static fnstr *include_from = NULL;
251 static fnstr *include_from_last = NULL;
252 
253 static fnstr *file_lists = NULL;
254 static fnstr *file_lists_last = NULL;
255 static fnstr *pattern_files = NULL;
256 static fnstr *pattern_files_last = NULL;
257 
258 /* Structure for holding the two variables that describe a file name chain. */
259 
260 typedef struct fndatastr {
261   fnstr **anchor;
262   fnstr **lastptr;
263 } fndatastr;
264 
265 static fndatastr exclude_from_data = { &exclude_from, &exclude_from_last };
266 static fndatastr include_from_data = { &include_from, &include_from_last };
267 static fndatastr file_lists_data = { &file_lists, &file_lists_last };
268 static fndatastr pattern_files_data = { &pattern_files, &pattern_files_last };
269 
270 /* Structure for pattern and its compiled form; used for matching patterns and
271 also for include/exclude patterns. */
272 
273 typedef struct patstr {
274   struct patstr *next;
275   char *string;
276   pcre2_code *compiled;
277 } patstr;
278 
279 static patstr *patterns = NULL;
280 static patstr *patterns_last = NULL;
281 static patstr *include_patterns = NULL;
282 static patstr *include_patterns_last = NULL;
283 static patstr *exclude_patterns = NULL;
284 static patstr *exclude_patterns_last = NULL;
285 static patstr *include_dir_patterns = NULL;
286 static patstr *include_dir_patterns_last = NULL;
287 static patstr *exclude_dir_patterns = NULL;
288 static patstr *exclude_dir_patterns_last = NULL;
289 
290 /* Structure holding the two variables that describe a pattern chain. A pointer
291 to such structures is used for each appropriate option. */
292 
293 typedef struct patdatastr {
294   patstr **anchor;
295   patstr **lastptr;
296 } patdatastr;
297 
298 static patdatastr match_patdata = { &patterns, &patterns_last };
299 static patdatastr include_patdata = { &include_patterns, &include_patterns_last };
300 static patdatastr exclude_patdata = { &exclude_patterns, &exclude_patterns_last };
301 static patdatastr include_dir_patdata = { &include_dir_patterns, &include_dir_patterns_last };
302 static patdatastr exclude_dir_patdata = { &exclude_dir_patterns, &exclude_dir_patterns_last };
303 
304 static patstr **incexlist[4] = { &include_patterns, &exclude_patterns,
305                                  &include_dir_patterns, &exclude_dir_patterns };
306 
307 static const char *incexname[4] = { "--include", "--exclude",
308                                     "--include-dir", "--exclude-dir" };
309 
310 /* Structure for options and list of them */
311 
312 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_U32NUMBER,
313        OP_OP_NUMBER, OP_OP_NUMBERS, OP_PATLIST, OP_FILELIST, OP_BINFILES };
314 
315 typedef struct option_item {
316   int type;
317   int one_char;
318   void *dataptr;
319   const char *long_name;
320   const char *help_text;
321 } option_item;
322 
323 /* Options without a single-letter equivalent get a negative value. This can be
324 used to identify them. */
325 
326 #define N_COLOUR       (-1)
327 #define N_EXCLUDE      (-2)
328 #define N_EXCLUDE_DIR  (-3)
329 #define N_HELP         (-4)
330 #define N_INCLUDE      (-5)
331 #define N_INCLUDE_DIR  (-6)
332 #define N_LABEL        (-7)
333 #define N_LOCALE       (-8)
334 #define N_NULL         (-9)
335 #define N_LOFFSETS     (-10)
336 #define N_FOFFSETS     (-11)
337 #define N_LBUFFER      (-12)
338 #define N_M_LIMIT      (-13)
339 #define N_M_LIMIT_REC  (-14)
340 #define N_BUFSIZE      (-15)
341 #define N_NOJIT        (-16)
342 #define N_FILE_LIST    (-17)
343 #define N_BINARY_FILES (-18)
344 #define N_EXCLUDE_FROM (-19)
345 #define N_INCLUDE_FROM (-20)
346 #define N_OM_SEPARATOR (-21)
347 
348 static option_item optionlist[] = {
349   { OP_NODATA,     N_NULL,   NULL,              "",              "terminate options" },
350   { OP_NODATA,     N_HELP,   NULL,              "help",          "display this help and exit" },
351   { OP_NUMBER,     'A',      &after_context,    "after-context=number", "set number of following context lines" },
352   { OP_NODATA,     'a',      NULL,              "text",          "treat binary files as text" },
353   { OP_NUMBER,     'B',      &before_context,   "before-context=number", "set number of prior context lines" },
354   { OP_BINFILES,   N_BINARY_FILES, NULL,        "binary-files=word", "set treatment of binary files" },
355   { OP_NUMBER,     N_BUFSIZE,&bufthird,         "buffer-size=number", "set processing buffer size parameter" },
356   { OP_OP_STRING,  N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
357   { OP_OP_STRING,  N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
358   { OP_NUMBER,     'C',      &both_context,     "context=number", "set number of context lines, before & after" },
359   { OP_NODATA,     'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
360   { OP_STRING,     'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
361   { OP_STRING,     'd',      &dee_option,       "directories=action", "how to handle directories" },
362   { OP_PATLIST,    'e',      &match_patdata,    "regex(p)=pattern", "specify pattern (may be used more than once)" },
363   { OP_NODATA,     'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
364   { OP_FILELIST,   'f',      &pattern_files_data, "file=path",   "read patterns from file" },
365   { OP_FILELIST,   N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" },
366   { OP_NODATA,     N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
367   { OP_NODATA,     'H',      NULL,              "with-filename", "force the prefixing filename on output" },
368   { OP_NODATA,     'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
369   { OP_NODATA,     'I',      NULL,              "",              "treat binary files as not matching (ignore)" },
370   { OP_NODATA,     'i',      NULL,              "ignore-case",   "ignore case distinctions" },
371 #ifdef SUPPORT_PCRE2GREP_JIT
372   { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "do not use just-in-time compiler optimization" },
373 #else
374   { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "ignored: this pcre2grep does not support JIT" },
375 #endif
376   { OP_NODATA,     'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
377   { OP_NODATA,     'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
378   { OP_STRING,     N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
379   { OP_NODATA,     N_LBUFFER, NULL,             "line-buffered", "use line buffering" },
380   { OP_NODATA,     N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
381   { OP_STRING,     N_LOCALE, &locale,           "locale=locale", "use the named locale" },
382   { OP_U32NUMBER,  N_M_LIMIT, &match_limit,     "match-limit=number", "set PCRE match limit option" },
383   { OP_U32NUMBER,  N_M_LIMIT_REC, &recursion_limit, "recursion-limit=number", "set PCRE match recursion limit option" },
384   { OP_NODATA,     'M',      NULL,              "multiline",     "run in multiline mode" },
385   { OP_STRING,     'N',      &newline_arg,      "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
386   { OP_NODATA,     'n',      NULL,              "line-number",   "print line number with output lines" },
387   { OP_OP_NUMBERS, 'o',      &only_matching_data, "only-matching=n", "show only the part of the line that matched" },
388   { OP_STRING,     N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" },
389   { OP_NODATA,     'q',      NULL,              "quiet",         "suppress output, just set return code" },
390   { OP_NODATA,     'r',      NULL,              "recursive",     "recursively scan sub-directories" },
391   { OP_PATLIST,    N_EXCLUDE,&exclude_patdata,  "exclude=pattern","exclude matching files when recursing" },
392   { OP_PATLIST,    N_INCLUDE,&include_patdata,  "include=pattern","include matching files when recursing" },
393   { OP_PATLIST,    N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude-dir=pattern","exclude matching directories when recursing" },
394   { OP_PATLIST,    N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" },
395   { OP_FILELIST,   N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" },
396   { OP_FILELIST,   N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" },
397 #ifdef JFRIEDL_DEBUG
398   { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
399 #endif
400   { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
401   { OP_NODATA,    'u',      NULL,              "utf",           "use UTF mode" },
402   { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
403   { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
404   { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
405   { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
406   { OP_NODATA,    0,        NULL,               NULL,            NULL }
407 };
408 
409 /* Table of names for newline types. Must be kept in step with the definitions
410 of PCRE2_NEWLINE_xx in pcre2.h. */
411 
412 static const char *newlines[] = {
413   "DEFAULT", "CR", "LF", "CRLF", "ANY", "ANYCRLF" };
414 
415 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
416 options. These set the 1, 2, and 4 bits in process_options, respectively. Note
417 that the combination of -w and -x has the same effect as -x on its own, so we
418 can treat them as the same. Note that the MAXPATLEN macro assumes the longest
419 prefix+suffix is 10 characters; if anything longer is added, it must be
420 adjusted. */
421 
422 static const char *prefix[] = {
423   "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
424 
425 static const char *suffix[] = {
426   "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
427 
428 /* UTF-8 tables - used only when the newline setting is "any". */
429 
430 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
431 
432 const char utf8_table4[] = {
433   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
434   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
435   2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
436   3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
437 
438 
439 
440 /*************************************************
441 *         Case-independent string compare        *
442 *************************************************/
443 
444 static int
strcmpic(const char * str1,const char * str2)445 strcmpic(const char *str1, const char *str2)
446 {
447 unsigned int c1, c2;
448 while (*str1 != '\0' || *str2 != '\0')
449   {
450   c1 = tolower(*str1++);
451   c2 = tolower(*str2++);
452   if (c1 != c2) return ((c1 > c2) << 1) - 1;
453   }
454 return 0;
455 }
456 
457 
458 
459 /*************************************************
460 *         Exit from the program                  *
461 *************************************************/
462 
463 /* If there has been a resource error, give a suitable message.
464 
465 Argument:  the return code
466 Returns:   does not return
467 */
468 
469 static void
pcre2grep_exit(int rc)470 pcre2grep_exit(int rc)
471 {
472 if (resource_error)
473   {
474   fprintf(stderr, "pcre2grep: Error %d, %d or %d means that a resource limit "
475     "was exceeded.\n", PCRE2_ERROR_JIT_STACKLIMIT, PCRE2_ERROR_MATCHLIMIT,
476     PCRE2_ERROR_RECURSIONLIMIT);
477   fprintf(stderr, "pcre2grep: Check your regex for nested unlimited loops.\n");
478   }
479 exit(rc);
480 }
481 
482 
483 /*************************************************
484 *          Add item to chain of patterns         *
485 *************************************************/
486 
487 /* Used to add an item onto a chain, or just return an unconnected item if the
488 "after" argument is NULL.
489 
490 Arguments:
491   s          pattern string to add
492   after      if not NULL points to item to insert after
493 
494 Returns:     new pattern block or NULL on error
495 */
496 
497 static patstr *
add_pattern(char * s,patstr * after)498 add_pattern(char *s, patstr *after)
499 {
500 patstr *p = (patstr *)malloc(sizeof(patstr));
501 if (p == NULL)
502   {
503   fprintf(stderr, "pcre2grep: malloc failed\n");
504   pcre2grep_exit(2);
505   }
506 if (strlen(s) > MAXPATLEN)
507   {
508   fprintf(stderr, "pcre2grep: pattern is too long (limit is %d bytes)\n",
509     MAXPATLEN);
510   free(p);
511   return NULL;
512   }
513 p->next = NULL;
514 p->string = s;
515 p->compiled = NULL;
516 
517 if (after != NULL)
518   {
519   p->next = after->next;
520   after->next = p;
521   }
522 return p;
523 }
524 
525 
526 /*************************************************
527 *           Free chain of patterns               *
528 *************************************************/
529 
530 /* Used for several chains of patterns.
531 
532 Argument: pointer to start of chain
533 Returns:  nothing
534 */
535 
536 static void
free_pattern_chain(patstr * pc)537 free_pattern_chain(patstr *pc)
538 {
539 while (pc != NULL)
540   {
541   patstr *p = pc;
542   pc = p->next;
543   if (p->compiled != NULL) pcre2_code_free(p->compiled);
544   free(p);
545   }
546 }
547 
548 
549 /*************************************************
550 *           Free chain of file names             *
551 *************************************************/
552 
553 /*
554 Argument: pointer to start of chain
555 Returns:  nothing
556 */
557 
558 static void
free_file_chain(fnstr * fn)559 free_file_chain(fnstr *fn)
560 {
561 while (fn != NULL)
562   {
563   fnstr *f = fn;
564   fn = f->next;
565   free(f);
566   }
567 }
568 
569 
570 /*************************************************
571 *            OS-specific functions               *
572 *************************************************/
573 
574 /* These functions are defined so that they can be made system specific.
575 At present there are versions for Unix-style environments, Windows, native
576 z/OS, and "no support". */
577 
578 
579 /************* Directory scanning Unix-style and z/OS ***********/
580 
581 #if (defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H) || defined NATIVE_ZOS
582 #include <sys/types.h>
583 #include <sys/stat.h>
584 #include <dirent.h>
585 
586 #if defined NATIVE_ZOS
587 /************* Directory and PDS/E scanning for z/OS ***********/
588 /************* z/OS looks mostly like Unix with USS ************/
589 /* However, z/OS needs the #include statements in this header */
590 #include "pcrzosfs.h"
591 /* That header is not included in the main PCRE distribution because
592    other apparatus is needed to compile pcre2grep for z/OS. The header
593    can be found in the special z/OS distribution, which is available
594    from www.zaconsultants.net or from www.cbttape.org. */
595 #endif
596 
597 typedef DIR directory_type;
598 #define FILESEP '/'
599 
600 static int
isdirectory(char * filename)601 isdirectory(char *filename)
602 {
603 struct stat statbuf;
604 if (stat(filename, &statbuf) < 0)
605   return 0;        /* In the expectation that opening as a file will fail */
606 return S_ISDIR(statbuf.st_mode);
607 }
608 
609 static directory_type *
opendirectory(char * filename)610 opendirectory(char *filename)
611 {
612 return opendir(filename);
613 }
614 
615 static char *
readdirectory(directory_type * dir)616 readdirectory(directory_type *dir)
617 {
618 for (;;)
619   {
620   struct dirent *dent = readdir(dir);
621   if (dent == NULL) return NULL;
622   if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
623     return dent->d_name;
624   }
625 /* Control never reaches here */
626 }
627 
628 static void
closedirectory(directory_type * dir)629 closedirectory(directory_type *dir)
630 {
631 closedir(dir);
632 }
633 
634 
635 /************* Test for regular file, Unix-style **********/
636 
637 static int
isregfile(char * filename)638 isregfile(char *filename)
639 {
640 struct stat statbuf;
641 if (stat(filename, &statbuf) < 0)
642   return 1;        /* In the expectation that opening as a file will fail */
643 return S_ISREG(statbuf.st_mode);
644 }
645 
646 
647 #if defined NATIVE_ZOS
648 /************* Test for a terminal in z/OS **********/
649 /* isatty() does not work in a TSO environment, so always give FALSE.*/
650 
651 static BOOL
is_stdout_tty(void)652 is_stdout_tty(void)
653 {
654 return FALSE;
655 }
656 
657 static BOOL
is_file_tty(FILE * f)658 is_file_tty(FILE *f)
659 {
660 return FALSE;
661 }
662 
663 
664 /************* Test for a terminal, Unix-style **********/
665 
666 #else
667 static BOOL
is_stdout_tty(void)668 is_stdout_tty(void)
669 {
670 return isatty(fileno(stdout));
671 }
672 
673 static BOOL
is_file_tty(FILE * f)674 is_file_tty(FILE *f)
675 {
676 return isatty(fileno(f));
677 }
678 #endif
679 
680 /* End of Unix-style or native z/OS environment functions. */
681 
682 
683 /************* Directory scanning in Windows ***********/
684 
685 /* I (Philip Hazel) have no means of testing this code. It was contributed by
686 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
687 when it did not exist. David Byron added a patch that moved the #include of
688 <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
689 The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
690 undefined when it is indeed undefined. */
691 
692 #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
693 
694 #ifndef STRICT
695 # define STRICT
696 #endif
697 #ifndef WIN32_LEAN_AND_MEAN
698 # define WIN32_LEAN_AND_MEAN
699 #endif
700 
701 #include <windows.h>
702 
703 #ifndef INVALID_FILE_ATTRIBUTES
704 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
705 #endif
706 
707 typedef struct directory_type
708 {
709 HANDLE handle;
710 BOOL first;
711 WIN32_FIND_DATA data;
712 } directory_type;
713 
714 #define FILESEP '/'
715 
716 int
isdirectory(char * filename)717 isdirectory(char *filename)
718 {
719 DWORD attr = GetFileAttributes(filename);
720 if (attr == INVALID_FILE_ATTRIBUTES)
721   return 0;
722 return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0;
723 }
724 
725 directory_type *
opendirectory(char * filename)726 opendirectory(char *filename)
727 {
728 size_t len;
729 char *pattern;
730 directory_type *dir;
731 DWORD err;
732 len = strlen(filename);
733 pattern = (char *)malloc(len + 3);
734 dir = (directory_type *)malloc(sizeof(*dir));
735 if ((pattern == NULL) || (dir == NULL))
736   {
737   fprintf(stderr, "pcre2grep: malloc failed\n");
738   pcre2grep_exit(2);
739   }
740 memcpy(pattern, filename, len);
741 memcpy(&(pattern[len]), "\\*", 3);
742 dir->handle = FindFirstFile(pattern, &(dir->data));
743 if (dir->handle != INVALID_HANDLE_VALUE)
744   {
745   free(pattern);
746   dir->first = TRUE;
747   return dir;
748   }
749 err = GetLastError();
750 free(pattern);
751 free(dir);
752 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
753 return NULL;
754 }
755 
756 char *
readdirectory(directory_type * dir)757 readdirectory(directory_type *dir)
758 {
759 for (;;)
760   {
761   if (!dir->first)
762     {
763     if (!FindNextFile(dir->handle, &(dir->data)))
764       return NULL;
765     }
766   else
767     {
768     dir->first = FALSE;
769     }
770   if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
771     return dir->data.cFileName;
772   }
773 #ifndef _MSC_VER
774 return NULL;   /* Keep compiler happy; never executed */
775 #endif
776 }
777 
778 void
closedirectory(directory_type * dir)779 closedirectory(directory_type *dir)
780 {
781 FindClose(dir->handle);
782 free(dir);
783 }
784 
785 
786 /************* Test for regular file in Windows **********/
787 
788 /* I don't know how to do this, or if it can be done; assume all paths are
789 regular if they are not directories. */
790 
isregfile(char * filename)791 int isregfile(char *filename)
792 {
793 return !isdirectory(filename);
794 }
795 
796 
797 /************* Test for a terminal in Windows **********/
798 
799 /* I don't know how to do this; assume never */
800 
801 static BOOL
is_stdout_tty(void)802 is_stdout_tty(void)
803 {
804 return FALSE;
805 }
806 
807 static BOOL
is_file_tty(FILE * f)808 is_file_tty(FILE *f)
809 {
810 return FALSE;
811 }
812 
813 /* End of Windows functions */
814 
815 
816 /************* Directory scanning when we can't do it ***********/
817 
818 /* The type is void, and apart from isdirectory(), the functions do nothing. */
819 
820 #else
821 
822 #define FILESEP 0
823 typedef void directory_type;
824 
isdirectory(char * filename)825 int isdirectory(char *filename) { return 0; }
opendirectory(char * filename)826 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
readdirectory(directory_type * dir)827 char *readdirectory(directory_type *dir) { return (char*)0;}
closedirectory(directory_type * dir)828 void closedirectory(directory_type *dir) {}
829 
830 
831 /************* Test for regular file when we can't do it **********/
832 
833 /* Assume all files are regular. */
834 
isregfile(char * filename)835 int isregfile(char *filename) { return 1; }
836 
837 
838 /************* Test for a terminal when we can't do it **********/
839 
840 static BOOL
is_stdout_tty(void)841 is_stdout_tty(void)
842 {
843 return FALSE;
844 }
845 
846 static BOOL
is_file_tty(FILE * f)847 is_file_tty(FILE *f)
848 {
849 return FALSE;
850 }
851 
852 #endif  /* End of system-specific functions */
853 
854 
855 
856 #ifndef HAVE_STRERROR
857 /*************************************************
858 *     Provide strerror() for non-ANSI libraries  *
859 *************************************************/
860 
861 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
862 in their libraries, but can provide the same facility by this simple
863 alternative function. */
864 
865 extern int   sys_nerr;
866 extern char *sys_errlist[];
867 
868 char *
strerror(int n)869 strerror(int n)
870 {
871 if (n < 0 || n >= sys_nerr) return "unknown error number";
872 return sys_errlist[n];
873 }
874 #endif /* HAVE_STRERROR */
875 
876 
877 
878 /*************************************************
879 *                Usage function                  *
880 *************************************************/
881 
882 static int
usage(int rc)883 usage(int rc)
884 {
885 option_item *op;
886 fprintf(stderr, "Usage: pcre2grep [-");
887 for (op = optionlist; op->one_char != 0; op++)
888   {
889   if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
890   }
891 fprintf(stderr, "] [long options] [pattern] [files]\n");
892 fprintf(stderr, "Type `pcre2grep --help' for more information and the long "
893   "options.\n");
894 return rc;
895 }
896 
897 
898 
899 /*************************************************
900 *                Help function                   *
901 *************************************************/
902 
903 static void
help(void)904 help(void)
905 {
906 option_item *op;
907 
908 printf("Usage: pcre2grep [OPTION]... [PATTERN] [FILE1 FILE2 ...]" STDOUT_NL);
909 printf("Search for PATTERN in each FILE or standard input." STDOUT_NL);
910 printf("PATTERN must be present if neither -e nor -f is used." STDOUT_NL);
911 
912 #ifdef SUPPORT_PCRE2GREP_CALLOUT
913 printf("Callout scripts in patterns are supported." STDOUT_NL);
914 #else
915 printf("Callout scripts are not supported in this pcre2grep." STDOUT_NL);
916 #endif
917 
918 printf("\"-\" can be used as a file name to mean STDIN." STDOUT_NL);
919 
920 #ifdef SUPPORT_LIBZ
921 printf("Files whose names end in .gz are read using zlib." STDOUT_NL);
922 #endif
923 
924 #ifdef SUPPORT_LIBBZ2
925 printf("Files whose names end in .bz2 are read using bzlib2." STDOUT_NL);
926 #endif
927 
928 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
929 printf("Other files and the standard input are read as plain files." STDOUT_NL STDOUT_NL);
930 #else
931 printf("All files are read as plain files, without any interpretation." STDOUT_NL STDOUT_NL);
932 #endif
933 
934 printf("Example: pcre2grep -i 'hello.*world' menu.h main.c" STDOUT_NL STDOUT_NL);
935 printf("Options:" STDOUT_NL);
936 
937 for (op = optionlist; op->one_char != 0; op++)
938   {
939   int n;
940   char s[4];
941 
942   if (op->one_char > 0 && (op->long_name)[0] == 0)
943     n = 31 - printf("  -%c", op->one_char);
944   else
945     {
946     if (op->one_char > 0) sprintf(s, "-%c,", op->one_char);
947       else strcpy(s, "   ");
948     n = 31 - printf("  %s --%s", s, op->long_name);
949     }
950 
951   if (n < 1) n = 1;
952   printf("%.*s%s" STDOUT_NL, n, "                           ", op->help_text);
953   }
954 
955 printf(STDOUT_NL "Numbers may be followed by K or M, e.g. --buffer-size=100K." STDOUT_NL);
956 printf("The default value for --buffer-size is %d." STDOUT_NL, PCRE2GREP_BUFSIZE);
957 printf("When reading patterns or file names from a file, trailing white" STDOUT_NL);
958 printf("space is removed and blank lines are ignored." STDOUT_NL);
959 printf("The maximum size of any pattern is %d bytes." STDOUT_NL, MAXPATLEN);
960 
961 printf(STDOUT_NL "With no FILEs, read standard input. If fewer than two FILEs given, assume -h." STDOUT_NL);
962 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble." STDOUT_NL);
963 }
964 
965 
966 
967 /*************************************************
968 *            Test exclude/includes               *
969 *************************************************/
970 
971 /* If any exclude pattern matches, the path is excluded. Otherwise, unless
972 there are no includes, the path must match an include pattern.
973 
974 Arguments:
975   path      the path to be matched
976   ip        the chain of include patterns
977   ep        the chain of exclude patterns
978 
979 Returns:    TRUE if the path is not excluded
980 */
981 
982 static BOOL
test_incexc(char * path,patstr * ip,patstr * ep)983 test_incexc(char *path, patstr *ip, patstr *ep)
984 {
985 int plen = strlen((const char *)path);
986 
987 for (; ep != NULL; ep = ep->next)
988   {
989   if (pcre2_match(ep->compiled, (PCRE2_SPTR)path, plen, 0, 0, match_data, NULL) >= 0)
990     return FALSE;
991   }
992 
993 if (ip == NULL) return TRUE;
994 
995 for (; ip != NULL; ip = ip->next)
996   {
997   if (pcre2_match(ip->compiled, (PCRE2_SPTR)path, plen, 0, 0, match_data, NULL) >= 0)
998     return TRUE;
999   }
1000 
1001 return FALSE;
1002 }
1003 
1004 
1005 
1006 /*************************************************
1007 *         Decode integer argument value          *
1008 *************************************************/
1009 
1010 /* Integer arguments can be followed by K or M. Avoid the use of strtoul()
1011 because SunOS4 doesn't have it. This is used only for unpicking arguments, so
1012 just keep it simple.
1013 
1014 Arguments:
1015   option_data   the option data string
1016   op            the option item (for error messages)
1017   longop        TRUE if option given in long form
1018 
1019 Returns:        a long integer
1020 */
1021 
1022 static long int
decode_number(char * option_data,option_item * op,BOOL longop)1023 decode_number(char *option_data, option_item *op, BOOL longop)
1024 {
1025 unsigned long int n = 0;
1026 char *endptr = option_data;
1027 while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
1028 while (isdigit((unsigned char)(*endptr)))
1029   n = n * 10 + (int)(*endptr++ - '0');
1030 if (toupper(*endptr) == 'K')
1031   {
1032   n *= 1024;
1033   endptr++;
1034   }
1035 else if (toupper(*endptr) == 'M')
1036   {
1037   n *= 1024*1024;
1038   endptr++;
1039   }
1040 
1041 if (*endptr != 0)   /* Error */
1042   {
1043   if (longop)
1044     {
1045     char *equals = strchr(op->long_name, '=');
1046     int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1047       (int)(equals - op->long_name);
1048     fprintf(stderr, "pcre2grep: Malformed number \"%s\" after --%.*s\n",
1049       option_data, nlen, op->long_name);
1050     }
1051   else
1052     fprintf(stderr, "pcre2grep: Malformed number \"%s\" after -%c\n",
1053       option_data, op->one_char);
1054   pcre2grep_exit(usage(2));
1055   }
1056 
1057 return n;
1058 }
1059 
1060 
1061 
1062 /*************************************************
1063 *       Add item to a chain of numbers           *
1064 *************************************************/
1065 
1066 /* Used to add an item onto a chain, or just return an unconnected item if the
1067 "after" argument is NULL.
1068 
1069 Arguments:
1070   n          the number to add
1071   after      if not NULL points to item to insert after
1072 
1073 Returns:     new number block
1074 */
1075 
1076 static omstr *
add_number(int n,omstr * after)1077 add_number(int n, omstr *after)
1078 {
1079 omstr *om = (omstr *)malloc(sizeof(omstr));
1080 
1081 if (om == NULL)
1082   {
1083   fprintf(stderr, "pcre2grep: malloc failed\n");
1084   pcre2grep_exit(2);
1085   }
1086 om->next = NULL;
1087 om->groupnum = n;
1088 
1089 if (after != NULL)
1090   {
1091   om->next = after->next;
1092   after->next = om;
1093   }
1094 return om;
1095 }
1096 
1097 
1098 
1099 /*************************************************
1100 *            Read one line of input              *
1101 *************************************************/
1102 
1103 /* Normally, input is read using fread() into a large buffer, so many lines may
1104 be read at once. However, doing this for tty input means that no output appears
1105 until a lot of input has been typed. Instead, tty input is handled line by
1106 line. We cannot use fgets() for this, because it does not stop at a binary
1107 zero, and therefore there is no way of telling how many characters it has read,
1108 because there may be binary zeros embedded in the data.
1109 
1110 Arguments:
1111   buffer     the buffer to read into
1112   length     the maximum number of characters to read
1113   f          the file
1114 
1115 Returns:     the number of characters read, zero at end of file
1116 */
1117 
1118 static unsigned int
read_one_line(char * buffer,int length,FILE * f)1119 read_one_line(char *buffer, int length, FILE *f)
1120 {
1121 int c;
1122 int yield = 0;
1123 while ((c = fgetc(f)) != EOF)
1124   {
1125   buffer[yield++] = c;
1126   if (c == '\n' || yield >= length) break;
1127   }
1128 return yield;
1129 }
1130 
1131 
1132 
1133 /*************************************************
1134 *             Find end of line                   *
1135 *************************************************/
1136 
1137 /* The length of the endline sequence that is found is set via lenptr. This may
1138 be zero at the very end of the file if there is no line-ending sequence there.
1139 
1140 Arguments:
1141   p         current position in line
1142   endptr    end of available data
1143   lenptr    where to put the length of the eol sequence
1144 
1145 Returns:    pointer after the last byte of the line,
1146             including the newline byte(s)
1147 */
1148 
1149 static char *
end_of_line(char * p,char * endptr,int * lenptr)1150 end_of_line(char *p, char *endptr, int *lenptr)
1151 {
1152 switch(endlinetype)
1153   {
1154   default:      /* Just in case */
1155   case PCRE2_NEWLINE_LF:
1156   while (p < endptr && *p != '\n') p++;
1157   if (p < endptr)
1158     {
1159     *lenptr = 1;
1160     return p + 1;
1161     }
1162   *lenptr = 0;
1163   return endptr;
1164 
1165   case PCRE2_NEWLINE_CR:
1166   while (p < endptr && *p != '\r') p++;
1167   if (p < endptr)
1168     {
1169     *lenptr = 1;
1170     return p + 1;
1171     }
1172   *lenptr = 0;
1173   return endptr;
1174 
1175   case PCRE2_NEWLINE_CRLF:
1176   for (;;)
1177     {
1178     while (p < endptr && *p != '\r') p++;
1179     if (++p >= endptr)
1180       {
1181       *lenptr = 0;
1182       return endptr;
1183       }
1184     if (*p == '\n')
1185       {
1186       *lenptr = 2;
1187       return p + 1;
1188       }
1189     }
1190   break;
1191 
1192   case PCRE2_NEWLINE_ANYCRLF:
1193   while (p < endptr)
1194     {
1195     int extra = 0;
1196     register int c = *((unsigned char *)p);
1197 
1198     if (utf && c >= 0xc0)
1199       {
1200       int gcii, gcss;
1201       extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
1202       gcss = 6*extra;
1203       c = (c & utf8_table3[extra]) << gcss;
1204       for (gcii = 1; gcii <= extra; gcii++)
1205         {
1206         gcss -= 6;
1207         c |= (p[gcii] & 0x3f) << gcss;
1208         }
1209       }
1210 
1211     p += 1 + extra;
1212 
1213     switch (c)
1214       {
1215       case '\n':
1216       *lenptr = 1;
1217       return p;
1218 
1219       case '\r':
1220       if (p < endptr && *p == '\n')
1221         {
1222         *lenptr = 2;
1223         p++;
1224         }
1225       else *lenptr = 1;
1226       return p;
1227 
1228       default:
1229       break;
1230       }
1231     }   /* End of loop for ANYCRLF case */
1232 
1233   *lenptr = 0;  /* Must have hit the end */
1234   return endptr;
1235 
1236   case PCRE2_NEWLINE_ANY:
1237   while (p < endptr)
1238     {
1239     int extra = 0;
1240     register int c = *((unsigned char *)p);
1241 
1242     if (utf && c >= 0xc0)
1243       {
1244       int gcii, gcss;
1245       extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
1246       gcss = 6*extra;
1247       c = (c & utf8_table3[extra]) << gcss;
1248       for (gcii = 1; gcii <= extra; gcii++)
1249         {
1250         gcss -= 6;
1251         c |= (p[gcii] & 0x3f) << gcss;
1252         }
1253       }
1254 
1255     p += 1 + extra;
1256 
1257     switch (c)
1258       {
1259       case '\n':    /* LF */
1260       case '\v':    /* VT */
1261       case '\f':    /* FF */
1262       *lenptr = 1;
1263       return p;
1264 
1265       case '\r':    /* CR */
1266       if (p < endptr && *p == '\n')
1267         {
1268         *lenptr = 2;
1269         p++;
1270         }
1271       else *lenptr = 1;
1272       return p;
1273 
1274 #ifndef EBCDIC
1275       case 0x85:    /* Unicode NEL */
1276       *lenptr = utf? 2 : 1;
1277       return p;
1278 
1279       case 0x2028:  /* Unicode LS */
1280       case 0x2029:  /* Unicode PS */
1281       *lenptr = 3;
1282       return p;
1283 #endif  /* Not EBCDIC */
1284 
1285       default:
1286       break;
1287       }
1288     }   /* End of loop for ANY case */
1289 
1290   *lenptr = 0;  /* Must have hit the end */
1291   return endptr;
1292   }     /* End of overall switch */
1293 }
1294 
1295 
1296 
1297 /*************************************************
1298 *         Find start of previous line            *
1299 *************************************************/
1300 
1301 /* This is called when looking back for before lines to print.
1302 
1303 Arguments:
1304   p         start of the subsequent line
1305   startptr  start of available data
1306 
1307 Returns:    pointer to the start of the previous line
1308 */
1309 
1310 static char *
previous_line(char * p,char * startptr)1311 previous_line(char *p, char *startptr)
1312 {
1313 switch(endlinetype)
1314   {
1315   default:      /* Just in case */
1316   case PCRE2_NEWLINE_LF:
1317   p--;
1318   while (p > startptr && p[-1] != '\n') p--;
1319   return p;
1320 
1321   case PCRE2_NEWLINE_CR:
1322   p--;
1323   while (p > startptr && p[-1] != '\n') p--;
1324   return p;
1325 
1326   case PCRE2_NEWLINE_CRLF:
1327   for (;;)
1328     {
1329     p -= 2;
1330     while (p > startptr && p[-1] != '\n') p--;
1331     if (p <= startptr + 1 || p[-2] == '\r') return p;
1332     }
1333   /* Control can never get here */
1334 
1335   case PCRE2_NEWLINE_ANY:
1336   case PCRE2_NEWLINE_ANYCRLF:
1337   if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
1338   if (utf) while ((*p & 0xc0) == 0x80) p--;
1339 
1340   while (p > startptr)
1341     {
1342     register unsigned int c;
1343     char *pp = p - 1;
1344 
1345     if (utf)
1346       {
1347       int extra = 0;
1348       while ((*pp & 0xc0) == 0x80) pp--;
1349       c = *((unsigned char *)pp);
1350       if (c >= 0xc0)
1351         {
1352         int gcii, gcss;
1353         extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
1354         gcss = 6*extra;
1355         c = (c & utf8_table3[extra]) << gcss;
1356         for (gcii = 1; gcii <= extra; gcii++)
1357           {
1358           gcss -= 6;
1359           c |= (pp[gcii] & 0x3f) << gcss;
1360           }
1361         }
1362       }
1363     else c = *((unsigned char *)pp);
1364 
1365     if (endlinetype == PCRE2_NEWLINE_ANYCRLF) switch (c)
1366       {
1367       case '\n':    /* LF */
1368       case '\r':    /* CR */
1369       return p;
1370 
1371       default:
1372       break;
1373       }
1374 
1375     else switch (c)
1376       {
1377       case '\n':    /* LF */
1378       case '\v':    /* VT */
1379       case '\f':    /* FF */
1380       case '\r':    /* CR */
1381 #ifndef EBCDIE
1382       case 0x85:    /* Unicode NEL */
1383       case 0x2028:  /* Unicode LS */
1384       case 0x2029:  /* Unicode PS */
1385 #endif  /* Not EBCDIC */
1386       return p;
1387 
1388       default:
1389       break;
1390       }
1391 
1392     p = pp;  /* Back one character */
1393     }        /* End of loop for ANY case */
1394 
1395   return startptr;  /* Hit start of data */
1396   }     /* End of overall switch */
1397 }
1398 
1399 
1400 
1401 
1402 
1403 /*************************************************
1404 *       Print the previous "after" lines         *
1405 *************************************************/
1406 
1407 /* This is called if we are about to lose said lines because of buffer filling,
1408 and at the end of the file. The data in the line is written using fwrite() so
1409 that a binary zero does not terminate it.
1410 
1411 Arguments:
1412   lastmatchnumber   the number of the last matching line, plus one
1413   lastmatchrestart  where we restarted after the last match
1414   endptr            end of available data
1415   printname         filename for printing
1416 
1417 Returns:            nothing
1418 */
1419 
1420 static void
do_after_lines(int lastmatchnumber,char * lastmatchrestart,char * endptr,char * printname)1421 do_after_lines(int lastmatchnumber, char *lastmatchrestart, char *endptr,
1422   char *printname)
1423 {
1424 if (after_context > 0 && lastmatchnumber > 0)
1425   {
1426   int count = 0;
1427   while (lastmatchrestart < endptr && count++ < after_context)
1428     {
1429     int ellength;
1430     char *pp = lastmatchrestart;
1431     if (printname != NULL) fprintf(stdout, "%s-", printname);
1432     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1433     pp = end_of_line(pp, endptr, &ellength);
1434     FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1435     lastmatchrestart = pp;
1436     }
1437   hyphenpending = TRUE;
1438   }
1439 }
1440 
1441 
1442 
1443 /*************************************************
1444 *   Apply patterns to subject till one matches   *
1445 *************************************************/
1446 
1447 /* This function is called to run through all patterns, looking for a match. It
1448 is used multiple times for the same subject when colouring is enabled, in order
1449 to find all possible matches.
1450 
1451 Arguments:
1452   matchptr     the start of the subject
1453   length       the length of the subject to match
1454   options      options for pcre_exec
1455   startoffset  where to start matching
1456   mrc          address of where to put the result of pcre2_match()
1457 
1458 Returns:      TRUE if there was a match
1459               FALSE if there was no match
1460               invert if there was a non-fatal error
1461 */
1462 
1463 static BOOL
match_patterns(char * matchptr,size_t length,unsigned int options,size_t startoffset,int * mrc)1464 match_patterns(char *matchptr, size_t length, unsigned int options,
1465   size_t startoffset, int *mrc)
1466 {
1467 int i;
1468 size_t slen = length;
1469 patstr *p = patterns;
1470 const char *msg = "this text:\n\n";
1471 
1472 if (slen > 200)
1473   {
1474   slen = 200;
1475   msg = "text that starts:\n\n";
1476   }
1477 for (i = 1; p != NULL; p = p->next, i++)
1478   {
1479   *mrc = pcre2_match(p->compiled, (PCRE2_SPTR)matchptr, (int)length,
1480     startoffset, options, match_data, match_context);
1481   if (*mrc >= 0) return TRUE;
1482   if (*mrc == PCRE2_ERROR_NOMATCH) continue;
1483   fprintf(stderr, "pcre2grep: pcre2_match() gave error %d while matching ", *mrc);
1484   if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i);
1485   fprintf(stderr, "%s", msg);
1486   FWRITE(matchptr, 1, slen, stderr);   /* In case binary zero included */
1487   fprintf(stderr, "\n\n");
1488   if (*mrc == PCRE2_ERROR_MATCHLIMIT || *mrc == PCRE2_ERROR_RECURSIONLIMIT ||
1489       *mrc == PCRE2_ERROR_JIT_STACKLIMIT)
1490     resource_error = TRUE;
1491   if (error_count++ > 20)
1492     {
1493     fprintf(stderr, "pcre2grep: Too many errors - abandoned.\n");
1494     pcre2grep_exit(2);
1495     }
1496   return invert;    /* No more matching; don't show the line again */
1497   }
1498 
1499 return FALSE;  /* No match, no errors */
1500 }
1501 
1502 
1503 #ifdef SUPPORT_PCRE2GREP_CALLOUT
1504 
1505 /*************************************************
1506 *        Parse and execute callout scripts       *
1507 *************************************************/
1508 
1509 /* This function parses a callout string block and executes the
1510 program specified by the string. The string is a list of substrings
1511 separated by pipe characters. The first substring represents the
1512 executable name, and the following substrings specify the arguments:
1513 
1514   program_name|param1|param2|...
1515 
1516 Any substirng (including the program name) can contain escape sequences
1517 started by the dollar character. The escape sequences are substituted as
1518 follows:
1519 
1520   $<digits> or ${<digits>} is replaced by the captured substring of the given
1521   decimal number, which must be greater than zero. If the number is greater
1522   than the number of capturing substrings, or if the capture is unset, the
1523   replacement is empty.
1524 
1525   Any other character is substituted by itself. E.g: $$ is replaced by a single
1526   dollar or $| replaced by a pipe character.
1527 
1528 Example:
1529 
1530   echo -e "abcde\n12345" | pcre2grep \
1531     '(.)(..(.))(?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4)")()' -
1532 
1533   Output:
1534 
1535     Arg1: [a] [bcd] [d] Arg2: |a| ()
1536     abcde
1537     Arg1: [1] [234] [4] Arg2: |1| ()
1538     12345
1539 
1540 Arguments:
1541   blockptr     the callout block
1542 
1543 Returns:       currently it always returns with 0
1544 */
1545 
1546 static int
pcre2grep_callout(pcre2_callout_block * calloutptr,void * unused)1547 pcre2grep_callout(pcre2_callout_block *calloutptr, void *unused)
1548 {
1549 PCRE2_SIZE length = calloutptr->callout_string_length;
1550 PCRE2_SPTR string = calloutptr->callout_string;
1551 PCRE2_SPTR subject = calloutptr->subject;
1552 PCRE2_SIZE *ovector = calloutptr->offset_vector;
1553 PCRE2_SIZE capture_top = calloutptr->capture_top;
1554 PCRE2_SIZE argsvectorlen = 2;
1555 PCRE2_SIZE argslen = 1;
1556 char *args;
1557 char *argsptr;
1558 char **argsvector;
1559 char **argsvectorptr;
1560 pid_t pid;
1561 int result = 0;
1562 
1563 (void)unused;   /* Avoid compiler warning */
1564 
1565 /* Only callout with strings are supported. */
1566 if (string == NULL || length == 0) return 0;
1567 
1568 /* Checking syntax and compute the number of string fragments. Callout strings
1569 are ignored in case of a syntax error. */
1570 
1571 while (length > 0)
1572   {
1573   if (*string == '|')
1574     {
1575     argsvectorlen++;
1576 
1577     /* Maximum 10000 arguments allowed. */
1578     if (argsvectorlen > 10000) return 0;
1579     }
1580   else if (*string == '$')
1581     {
1582     PCRE2_SIZE capture_id = 0;
1583 
1584     string++;
1585     length--;
1586 
1587     /* Syntax error: a character must be present after $. */
1588     if (length == 0) return 0;
1589 
1590     if (*string >= '1' && *string <= '9')
1591       {
1592       do
1593         {
1594         /* Maximum capture id is 65535. */
1595         if (capture_id <= 65535)
1596           capture_id = capture_id * 10 + (*string - '0');
1597 
1598         string++;
1599         length--;
1600         }
1601       while (length > 0 && *string >= '0' && *string <= '9');
1602 
1603       /* To negate the effect of string++ below. */
1604       string--;
1605       length++;
1606       }
1607     else if (*string == '{')
1608       {
1609       /* Must be a decimal number in parenthesis, e.g: (5) or (38) */
1610       string++;
1611       length--;
1612 
1613       /* Syntax error: a decimal number required. */
1614       if (length == 0) return 0;
1615       if (*string < '1' || *string > '9') return 0;
1616 
1617       do
1618         {
1619         /* Maximum capture id is 65535. */
1620         if (capture_id <= 65535)
1621           capture_id = capture_id * 10 + (*string - '0');
1622 
1623         string++;
1624         length--;
1625 
1626         /* Syntax error: no more characters */
1627         if (length == 0) return 0;
1628         }
1629       while (*string >= '0' && *string <= '9');
1630 
1631       /* Syntax error: close paren is missing. */
1632       if (*string != '}') return 0;
1633       }
1634 
1635     if (capture_id > 0)
1636       {
1637       if (capture_id < capture_top)
1638         {
1639         capture_id *= 2;
1640         argslen += ovector[capture_id + 1] - ovector[capture_id];
1641         }
1642 
1643       /* To negate the effect of argslen++ below. */
1644       argslen--;
1645       }
1646     }
1647 
1648   string++;
1649   length--;
1650   argslen++;
1651   }
1652 
1653 args = (char*)malloc(argslen);
1654 if (args == NULL) return 0;
1655 
1656 argsvector = (char**)malloc(argsvectorlen * sizeof(char*));
1657 if (argsvector == NULL)
1658   {
1659   free(args);
1660   return 0;
1661   }
1662 
1663 argsptr = args;
1664 argsvectorptr = argsvector;
1665 
1666 *argsvectorptr++ = argsptr;
1667 
1668 length = calloutptr->callout_string_length;
1669 string = calloutptr->callout_string;
1670 
1671 while (length > 0)
1672   {
1673   if (*string == '|')
1674     {
1675     *argsptr++ = '\0';
1676     *argsvectorptr++ = argsptr;
1677     }
1678   else if (*string == '$')
1679     {
1680     string++;
1681     length--;
1682 
1683     if ((*string >= '1' && *string <= '9') || *string == '{')
1684       {
1685       PCRE2_SIZE capture_id = 0;
1686 
1687       if (*string != '{')
1688         {
1689         do
1690           {
1691           /* Maximum capture id is 65535. */
1692           if (capture_id <= 65535)
1693             capture_id = capture_id * 10 + (*string - '0');
1694 
1695           string++;
1696           length--;
1697           }
1698         while (length > 0 && *string >= '0' && *string <= '9');
1699 
1700         /* To negate the effect of string++ below. */
1701         string--;
1702         length++;
1703         }
1704       else
1705         {
1706         string++;
1707         length--;
1708 
1709         do
1710           {
1711           /* Maximum capture id is 65535. */
1712           if (capture_id <= 65535)
1713             capture_id = capture_id * 10 + (*string - '0');
1714 
1715           string++;
1716           length--;
1717           }
1718         while (*string != '}');
1719         }
1720 
1721         if (capture_id < capture_top)
1722           {
1723           PCRE2_SIZE capturesize;
1724           capture_id *= 2;
1725 
1726           capturesize = ovector[capture_id + 1] - ovector[capture_id];
1727           memcpy(argsptr, subject + ovector[capture_id], capturesize);
1728           argsptr += capturesize;
1729           }
1730       }
1731     else
1732       {
1733       *argsptr++ = *string;
1734       }
1735     }
1736   else
1737     {
1738     *argsptr++ = *string;
1739     }
1740 
1741   string++;
1742   length--;
1743   }
1744 
1745 *argsptr++ = '\0';
1746 *argsvectorptr = NULL;
1747 
1748 pid = fork();
1749 
1750 if (pid == 0)
1751   {
1752   (void)execv(argsvector[0], argsvector);
1753   /* Control gets here if there is an error, e.g. a non-existent program */
1754   exit(1);
1755   }
1756 else if (pid > 0)
1757   (void)waitpid(pid, &result, 0);
1758 
1759 free(args);
1760 free(argsvector);
1761 
1762 /* Currently negative return values are not supported, only zero (match
1763 continues) or non-zero (match fails). */
1764 
1765 return result != 0;
1766 }
1767 
1768 #endif
1769 
1770 
1771 
1772 /*************************************************
1773 *            Grep an individual file             *
1774 *************************************************/
1775 
1776 /* This is called from grep_or_recurse() below. It uses a buffer that is three
1777 times the value of bufthird. The matching point is never allowed to stray into
1778 the top third of the buffer, thus keeping more of the file available for
1779 context printing or for multiline scanning. For large files, the pointer will
1780 be in the middle third most of the time, so the bottom third is available for
1781 "before" context printing.
1782 
1783 Arguments:
1784   handle       the fopened FILE stream for a normal file
1785                the gzFile pointer when reading is via libz
1786                the BZFILE pointer when reading is via libbz2
1787   frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1788   filename     the file name or NULL (for errors)
1789   printname    the file name if it is to be printed for each match
1790                or NULL if the file name is not to be printed
1791                it cannot be NULL if filenames[_nomatch]_only is set
1792 
1793 Returns:       0 if there was at least one match
1794                1 otherwise (no matches)
1795                2 if an overlong line is encountered
1796                3 if there is a read error on a .bz2 file
1797 */
1798 
1799 static int
pcre2grep(void * handle,int frtype,char * filename,char * printname)1800 pcre2grep(void *handle, int frtype, char *filename, char *printname)
1801 {
1802 int rc = 1;
1803 int linenumber = 1;
1804 int lastmatchnumber = 0;
1805 int count = 0;
1806 int filepos = 0;
1807 char *lastmatchrestart = NULL;
1808 char *ptr = main_buffer;
1809 char *endptr;
1810 size_t bufflength;
1811 BOOL binary = FALSE;
1812 BOOL endhyphenpending = FALSE;
1813 BOOL input_line_buffered = line_buffered;
1814 FILE *in = NULL;                    /* Ensure initialized */
1815 
1816 #ifdef SUPPORT_LIBZ
1817 gzFile ingz = NULL;
1818 #endif
1819 
1820 #ifdef SUPPORT_LIBBZ2
1821 BZFILE *inbz2 = NULL;
1822 #endif
1823 
1824 
1825 /* Do the first read into the start of the buffer and set up the pointer to end
1826 of what we have. In the case of libz, a non-zipped .gz file will be read as a
1827 plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1828 fail. */
1829 
1830 (void)frtype;
1831 
1832 #ifdef SUPPORT_LIBZ
1833 if (frtype == FR_LIBZ)
1834   {
1835   ingz = (gzFile)handle;
1836   bufflength = gzread (ingz, main_buffer, bufsize);
1837   }
1838 else
1839 #endif
1840 
1841 #ifdef SUPPORT_LIBBZ2
1842 if (frtype == FR_LIBBZ2)
1843   {
1844   inbz2 = (BZFILE *)handle;
1845   bufflength = BZ2_bzread(inbz2, main_buffer, bufsize);
1846   if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
1847   }                                    /* without the cast it is unsigned. */
1848 else
1849 #endif
1850 
1851   {
1852   in = (FILE *)handle;
1853   if (is_file_tty(in)) input_line_buffered = TRUE;
1854   bufflength = input_line_buffered?
1855     read_one_line(main_buffer, bufsize, in) :
1856     fread(main_buffer, 1, bufsize, in);
1857   }
1858 
1859 endptr = main_buffer + bufflength;
1860 
1861 /* Unless binary-files=text, see if we have a binary file. This uses the same
1862 rule as GNU grep, namely, a search for a binary zero byte near the start of the
1863 file. */
1864 
1865 if (binary_files != BIN_TEXT)
1866   {
1867   binary =
1868     memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength) != NULL;
1869   if (binary && binary_files == BIN_NOMATCH) return 1;
1870   }
1871 
1872 /* Loop while the current pointer is not at the end of the file. For large
1873 files, endptr will be at the end of the buffer when we are in the middle of the
1874 file, but ptr will never get there, because as soon as it gets over 2/3 of the
1875 way, the buffer is shifted left and re-filled. */
1876 
1877 while (ptr < endptr)
1878   {
1879   int endlinelength;
1880   int mrc = 0;
1881   unsigned int options = 0;
1882   BOOL match;
1883   char *matchptr = ptr;
1884   char *t = ptr;
1885   size_t length, linelength;
1886   size_t startoffset = 0;
1887 
1888   /* At this point, ptr is at the start of a line. We need to find the length
1889   of the subject string to pass to pcre2_match(). In multiline mode, it is the
1890   length remainder of the data in the buffer. Otherwise, it is the length of
1891   the next line, excluding the terminating newline. After matching, we always
1892   advance by the length of the next line. In multiline mode the PCRE2_FIRSTLINE
1893   option is used for compiling, so that any match is constrained to be in the
1894   first line. */
1895 
1896   t = end_of_line(t, endptr, &endlinelength);
1897   linelength = t - ptr - endlinelength;
1898   length = multiline? (size_t)(endptr - ptr) : linelength;
1899 
1900   /* Check to see if the line we are looking at extends right to the very end
1901   of the buffer without a line terminator. This means the line is too long to
1902   handle. */
1903 
1904   if (endlinelength == 0 && t == main_buffer + bufsize)
1905     {
1906     fprintf(stderr, "pcre2grep: line %d%s%s is too long for the internal buffer\n"
1907                     "pcre2grep: the buffer size is %d\n"
1908                     "pcre2grep: use the --buffer-size option to change it\n",
1909                     linenumber,
1910                     (filename == NULL)? "" : " of file ",
1911                     (filename == NULL)? "" : filename,
1912                     bufthird);
1913     return 2;
1914     }
1915 
1916   /* Extra processing for Jeffrey Friedl's debugging. */
1917 
1918 #ifdef JFRIEDL_DEBUG
1919   if (jfriedl_XT || jfriedl_XR)
1920   {
1921 #     include <sys/time.h>
1922 #     include <time.h>
1923       struct timeval start_time, end_time;
1924       struct timezone dummy;
1925       int i;
1926 
1927       if (jfriedl_XT)
1928       {
1929           unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1930           const char *orig = ptr;
1931           ptr = malloc(newlen + 1);
1932           if (!ptr) {
1933                   printf("out of memory");
1934                   pcre2grep_exit(2);
1935           }
1936           endptr = ptr;
1937           strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1938           for (i = 0; i < jfriedl_XT; i++) {
1939                   strncpy(endptr, orig,  length);
1940                   endptr += length;
1941           }
1942           strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1943           length = newlen;
1944       }
1945 
1946       if (gettimeofday(&start_time, &dummy) != 0)
1947               perror("bad gettimeofday");
1948 
1949 
1950       for (i = 0; i < jfriedl_XR; i++)
1951           match = (pcre_exec(patterns->compiled, patterns->hint, ptr, length, 0,
1952               PCRE2_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1953 
1954       if (gettimeofday(&end_time, &dummy) != 0)
1955               perror("bad gettimeofday");
1956 
1957       double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1958                       -
1959                       (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1960 
1961       printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1962       return 0;
1963   }
1964 #endif
1965 
1966   /* We come back here after a match when show_only_matching is set, in order
1967   to find any further matches in the same line. This applies to
1968   --only-matching, --file-offsets, and --line-offsets. */
1969 
1970   ONLY_MATCHING_RESTART:
1971 
1972   /* Run through all the patterns until one matches or there is an error other
1973   than NOMATCH. This code is in a subroutine so that it can be re-used for
1974   finding subsequent matches when colouring matched lines. After finding one
1975   match, set PCRE2_NOTEMPTY to disable any further matches of null strings in
1976   this line. */
1977 
1978   match = match_patterns(matchptr, length, options, startoffset, &mrc);
1979   options = PCRE2_NOTEMPTY;
1980 
1981   /* If it's a match or a not-match (as required), do what's wanted. */
1982 
1983   if (match != invert)
1984     {
1985     BOOL hyphenprinted = FALSE;
1986 
1987     /* We've failed if we want a file that doesn't have any matches. */
1988 
1989     if (filenames == FN_NOMATCH_ONLY) return 1;
1990 
1991     /* If all we want is a yes/no answer, we can return immediately. */
1992 
1993     if (quiet) return 0;
1994 
1995     /* Just count if just counting is wanted. */
1996 
1997     else if (count_only) count++;
1998 
1999     /* When handling a binary file and binary-files==binary, the "binary"
2000     variable will be set true (it's false in all other cases). In this
2001     situation we just want to output the file name. No need to scan further. */
2002 
2003     else if (binary)
2004       {
2005       fprintf(stdout, "Binary file %s matches" STDOUT_NL, filename);
2006       return 0;
2007       }
2008 
2009     /* Likewise, if all we want is a file name, there is no need to scan any
2010     more lines in the file. */
2011 
2012     else if (filenames == FN_MATCH_ONLY)
2013       {
2014       fprintf(stdout, "%s" STDOUT_NL, printname);
2015       return 0;
2016       }
2017 
2018     /* The --only-matching option prints just the substring that matched,
2019     and/or one or more captured portions of it, as long as these strings are
2020     not empty. The --file-offsets and --line-offsets options output offsets for
2021     the matching substring (all three set show_only_matching). None of these
2022     mutually exclusive options prints any context. Afterwards, adjust the start
2023     and then jump back to look for further matches in the same line. If we are
2024     in invert mode, however, nothing is printed and we do not restart - this
2025     could still be useful because the return code is set. */
2026 
2027     else if (show_only_matching)
2028       {
2029       if (!invert)
2030         {
2031         size_t oldstartoffset;
2032 
2033         if (printname != NULL) fprintf(stdout, "%s:", printname);
2034         if (number) fprintf(stdout, "%d:", linenumber);
2035 
2036         /* Handle --line-offsets */
2037 
2038         if (line_offsets)
2039           fprintf(stdout, "%d,%d" STDOUT_NL, (int)(matchptr + offsets[0] - ptr),
2040             (int)(offsets[1] - offsets[0]));
2041 
2042         /* Handle --file-offsets */
2043 
2044         else if (file_offsets)
2045           fprintf(stdout, "%d,%d" STDOUT_NL,
2046             (int)(filepos + matchptr + offsets[0] - ptr),
2047             (int)(offsets[1] - offsets[0]));
2048 
2049         /* Handle --only-matching, which may occur many times */
2050 
2051         else
2052           {
2053           BOOL printed = FALSE;
2054           omstr *om;
2055 
2056           for (om = only_matching; om != NULL; om = om->next)
2057             {
2058             int n = om->groupnum;
2059             if (n < mrc)
2060               {
2061               int plen = offsets[2*n + 1] - offsets[2*n];
2062               if (plen > 0)
2063                 {
2064                 if (printed) fprintf(stdout, "%s", om_separator);
2065                 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
2066                 FWRITE(matchptr + offsets[n*2], 1, plen, stdout);
2067                 if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
2068                 printed = TRUE;
2069                 }
2070               }
2071             }
2072 
2073           if (printed || printname != NULL || number)
2074             fprintf(stdout, STDOUT_NL);
2075           }
2076 
2077         /* Prepare to repeat to find the next match in the line. */
2078 
2079         match = FALSE;
2080         if (line_buffered) fflush(stdout);
2081         rc = 0;                      /* Had some success */
2082 
2083         /* If the current match ended past the end of the line (only possible
2084         in multiline mode), we are done with this line. */
2085 
2086         if (offsets[1] > linelength) goto END_ONE_MATCH;
2087 
2088         /* If the pattern contained a lookbehind that included \K, it is
2089         possible that the end of the match might be at or before the actual
2090         starting offset we have just used. In this case, start one character
2091         further on. */
2092 
2093         startoffset = offsets[1];    /* Restart after the match */
2094         oldstartoffset = pcre2_get_startchar(match_data);
2095         if (startoffset <= oldstartoffset)
2096           {
2097           if (startoffset >= length) goto END_ONE_MATCH;  /* Were at end */
2098           startoffset = oldstartoffset + 1;
2099           if (utf)
2100             while ((matchptr[startoffset] & 0xc0) == 0x80) startoffset++;
2101           }
2102         goto ONLY_MATCHING_RESTART;
2103         }
2104       }
2105 
2106     /* This is the default case when none of the above options is set. We print
2107     the matching lines(s), possibly preceded and/or followed by other lines of
2108     context. */
2109 
2110     else
2111       {
2112       /* See if there is a requirement to print some "after" lines from a
2113       previous match. We never print any overlaps. */
2114 
2115       if (after_context > 0 && lastmatchnumber > 0)
2116         {
2117         int ellength;
2118         int linecount = 0;
2119         char *p = lastmatchrestart;
2120 
2121         while (p < ptr && linecount < after_context)
2122           {
2123           p = end_of_line(p, ptr, &ellength);
2124           linecount++;
2125           }
2126 
2127         /* It is important to advance lastmatchrestart during this printing so
2128         that it interacts correctly with any "before" printing below. Print
2129         each line's data using fwrite() in case there are binary zeroes. */
2130 
2131         while (lastmatchrestart < p)
2132           {
2133           char *pp = lastmatchrestart;
2134           if (printname != NULL) fprintf(stdout, "%s-", printname);
2135           if (number) fprintf(stdout, "%d-", lastmatchnumber++);
2136           pp = end_of_line(pp, endptr, &ellength);
2137           FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
2138           lastmatchrestart = pp;
2139           }
2140         if (lastmatchrestart != ptr) hyphenpending = TRUE;
2141         }
2142 
2143       /* If there were non-contiguous lines printed above, insert hyphens. */
2144 
2145       if (hyphenpending)
2146         {
2147         fprintf(stdout, "--" STDOUT_NL);
2148         hyphenpending = FALSE;
2149         hyphenprinted = TRUE;
2150         }
2151 
2152       /* See if there is a requirement to print some "before" lines for this
2153       match. Again, don't print overlaps. */
2154 
2155       if (before_context > 0)
2156         {
2157         int linecount = 0;
2158         char *p = ptr;
2159 
2160         while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
2161                linecount < before_context)
2162           {
2163           linecount++;
2164           p = previous_line(p, main_buffer);
2165           }
2166 
2167         if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
2168           fprintf(stdout, "--" STDOUT_NL);
2169 
2170         while (p < ptr)
2171           {
2172           int ellength;
2173           char *pp = p;
2174           if (printname != NULL) fprintf(stdout, "%s-", printname);
2175           if (number) fprintf(stdout, "%d-", linenumber - linecount--);
2176           pp = end_of_line(pp, endptr, &ellength);
2177           FWRITE(p, 1, pp - p, stdout);
2178           p = pp;
2179           }
2180         }
2181 
2182       /* Now print the matching line(s); ensure we set hyphenpending at the end
2183       of the file if any context lines are being output. */
2184 
2185       if (after_context > 0 || before_context > 0)
2186         endhyphenpending = TRUE;
2187 
2188       if (printname != NULL) fprintf(stdout, "%s:", printname);
2189       if (number) fprintf(stdout, "%d:", linenumber);
2190 
2191       /* In multiline mode, we want to print to the end of the line in which
2192       the end of the matched string is found, so we adjust linelength and the
2193       line number appropriately, but only when there actually was a match
2194       (invert not set). Because the PCRE2_FIRSTLINE option is set, the start of
2195       the match will always be before the first newline sequence. */
2196 
2197       if (multiline & !invert)
2198         {
2199         char *endmatch = ptr + offsets[1];
2200         t = ptr;
2201         while (t <= endmatch)
2202           {
2203           t = end_of_line(t, endptr, &endlinelength);
2204           if (t < endmatch) linenumber++; else break;
2205           }
2206         linelength = t - ptr - endlinelength;
2207         }
2208 
2209       /*** NOTE: Use only fwrite() to output the data line, so that binary
2210       zeroes are treated as just another data character. */
2211 
2212       /* This extra option, for Jeffrey Friedl's debugging requirements,
2213       replaces the matched string, or a specific captured string if it exists,
2214       with X. When this happens, colouring is ignored. */
2215 
2216 #ifdef JFRIEDL_DEBUG
2217       if (S_arg >= 0 && S_arg < mrc)
2218         {
2219         int first = S_arg * 2;
2220         int last  = first + 1;
2221         FWRITE(ptr, 1, offsets[first], stdout);
2222         fprintf(stdout, "X");
2223         FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
2224         }
2225       else
2226 #endif
2227 
2228       /* We have to split the line(s) up if colouring, and search for further
2229       matches, but not of course if the line is a non-match. */
2230 
2231       if (do_colour && !invert)
2232         {
2233         int plength;
2234         FWRITE(ptr, 1, offsets[0], stdout);
2235         fprintf(stdout, "%c[%sm", 0x1b, colour_string);
2236         FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
2237         fprintf(stdout, "%c[00m", 0x1b);
2238         for (;;)
2239           {
2240           startoffset = offsets[1];
2241           if (startoffset >= linelength + endlinelength ||
2242               !match_patterns(matchptr, length, options, startoffset, &mrc))
2243             break;
2244           FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
2245           fprintf(stdout, "%c[%sm", 0x1b, colour_string);
2246           FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
2247           fprintf(stdout, "%c[00m", 0x1b);
2248           }
2249 
2250         /* In multiline mode, we may have already printed the complete line
2251         and its line-ending characters (if they matched the pattern), so there
2252         may be no more to print. */
2253 
2254         plength = (int)((linelength + endlinelength) - startoffset);
2255         if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout);
2256         }
2257 
2258       /* Not colouring; no need to search for further matches */
2259 
2260       else FWRITE(ptr, 1, linelength + endlinelength, stdout);
2261       }
2262 
2263     /* End of doing what has to be done for a match. If --line-buffered was
2264     given, flush the output. */
2265 
2266     if (line_buffered) fflush(stdout);
2267     rc = 0;    /* Had some success */
2268 
2269     /* Remember where the last match happened for after_context. We remember
2270     where we are about to restart, and that line's number. */
2271 
2272     lastmatchrestart = ptr + linelength + endlinelength;
2273     lastmatchnumber = linenumber + 1;
2274     }
2275 
2276   /* For a match in multiline inverted mode (which of course did not cause
2277   anything to be printed), we have to move on to the end of the match before
2278   proceeding. */
2279 
2280   if (multiline && invert && match)
2281     {
2282     int ellength;
2283     char *endmatch = ptr + offsets[1];
2284     t = ptr;
2285     while (t < endmatch)
2286       {
2287       t = end_of_line(t, endptr, &ellength);
2288       if (t <= endmatch) linenumber++; else break;
2289       }
2290     endmatch = end_of_line(endmatch, endptr, &ellength);
2291     linelength = endmatch - ptr - ellength;
2292     }
2293 
2294   /* Advance to after the newline and increment the line number. The file
2295   offset to the current line is maintained in filepos. */
2296 
2297   END_ONE_MATCH:
2298   ptr += linelength + endlinelength;
2299   filepos += (int)(linelength + endlinelength);
2300   linenumber++;
2301 
2302   /* If input is line buffered, and the buffer is not yet full, read another
2303   line and add it into the buffer. */
2304 
2305   if (input_line_buffered && bufflength < (size_t)bufsize)
2306     {
2307     int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
2308     bufflength += add;
2309     endptr += add;
2310     }
2311 
2312   /* If we haven't yet reached the end of the file (the buffer is full), and
2313   the current point is in the top 1/3 of the buffer, slide the buffer down by
2314   1/3 and refill it. Before we do this, if some unprinted "after" lines are
2315   about to be lost, print them. */
2316 
2317   if (bufflength >= (size_t)bufsize && ptr > main_buffer + 2*bufthird)
2318     {
2319     if (after_context > 0 &&
2320         lastmatchnumber > 0 &&
2321         lastmatchrestart < main_buffer + bufthird)
2322       {
2323       do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
2324       lastmatchnumber = 0;
2325       }
2326 
2327     /* Now do the shuffle */
2328 
2329     memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
2330     ptr -= bufthird;
2331 
2332 #ifdef SUPPORT_LIBZ
2333     if (frtype == FR_LIBZ)
2334       bufflength = 2*bufthird +
2335         gzread (ingz, main_buffer + 2*bufthird, bufthird);
2336     else
2337 #endif
2338 
2339 #ifdef SUPPORT_LIBBZ2
2340     if (frtype == FR_LIBBZ2)
2341       bufflength = 2*bufthird +
2342         BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird);
2343     else
2344 #endif
2345 
2346     bufflength = 2*bufthird +
2347       (input_line_buffered?
2348        read_one_line(main_buffer + 2*bufthird, bufthird, in) :
2349        fread(main_buffer + 2*bufthird, 1, bufthird, in));
2350     endptr = main_buffer + bufflength;
2351 
2352     /* Adjust any last match point */
2353 
2354     if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
2355     }
2356   }     /* Loop through the whole file */
2357 
2358 /* End of file; print final "after" lines if wanted; do_after_lines sets
2359 hyphenpending if it prints something. */
2360 
2361 if (!show_only_matching && !count_only)
2362   {
2363   do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
2364   hyphenpending |= endhyphenpending;
2365   }
2366 
2367 /* Print the file name if we are looking for those without matches and there
2368 were none. If we found a match, we won't have got this far. */
2369 
2370 if (filenames == FN_NOMATCH_ONLY)
2371   {
2372   fprintf(stdout, "%s" STDOUT_NL, printname);
2373   return 0;
2374   }
2375 
2376 /* Print the match count if wanted */
2377 
2378 if (count_only && !quiet)
2379   {
2380   if (count > 0 || !omit_zero_count)
2381     {
2382     if (printname != NULL && filenames != FN_NONE)
2383       fprintf(stdout, "%s:", printname);
2384     fprintf(stdout, "%d" STDOUT_NL, count);
2385     }
2386   }
2387 
2388 return rc;
2389 }
2390 
2391 
2392 
2393 /*************************************************
2394 *     Grep a file or recurse into a directory    *
2395 *************************************************/
2396 
2397 /* Given a path name, if it's a directory, scan all the files if we are
2398 recursing; if it's a file, grep it.
2399 
2400 Arguments:
2401   pathname          the path to investigate
2402   dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
2403   only_one_at_top   TRUE if the path is the only one at toplevel
2404 
2405 Returns:  -1 the file/directory was skipped
2406            0 if there was at least one match
2407            1 if there were no matches
2408            2 there was some kind of error
2409 
2410 However, file opening failures are suppressed if "silent" is set.
2411 */
2412 
2413 static int
grep_or_recurse(char * pathname,BOOL dir_recurse,BOOL only_one_at_top)2414 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
2415 {
2416 int rc = 1;
2417 int frtype;
2418 void *handle;
2419 char *lastcomp;
2420 FILE *in = NULL;           /* Ensure initialized */
2421 
2422 #ifdef SUPPORT_LIBZ
2423 gzFile ingz = NULL;
2424 #endif
2425 
2426 #ifdef SUPPORT_LIBBZ2
2427 BZFILE *inbz2 = NULL;
2428 #endif
2429 
2430 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
2431 int pathlen;
2432 #endif
2433 
2434 #if defined NATIVE_ZOS
2435 int zos_type;
2436 FILE *zos_test_file;
2437 #endif
2438 
2439 /* If the file name is "-" we scan stdin */
2440 
2441 if (strcmp(pathname, "-") == 0)
2442   {
2443   return pcre2grep(stdin, FR_PLAIN, stdin_name,
2444     (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
2445       stdin_name : NULL);
2446   }
2447 
2448 /* Inclusion and exclusion: --include-dir and --exclude-dir apply only to
2449 directories, whereas --include and --exclude apply to everything else. The test
2450 is against the final component of the path. */
2451 
2452 lastcomp = strrchr(pathname, FILESEP);
2453 lastcomp = (lastcomp == NULL)? pathname : lastcomp + 1;
2454 
2455 /* If the file is a directory, skip if not recursing or if explicitly excluded.
2456 Otherwise, scan the directory and recurse for each path within it. The scanning
2457 code is localized so it can be made system-specific. */
2458 
2459 
2460 /* For z/OS, determine the file type. */
2461 
2462 #if defined NATIVE_ZOS
2463 zos_test_file =  fopen(pathname,"rb");
2464 
2465 if (zos_test_file == NULL)
2466    {
2467    if (!silent) fprintf(stderr, "pcre2grep: failed to test next file %s\n",
2468      pathname, strerror(errno));
2469    return -1;
2470    }
2471 zos_type = identifyzosfiletype (zos_test_file);
2472 fclose (zos_test_file);
2473 
2474 /* Handle a PDS in separate code */
2475 
2476 if (zos_type == __ZOS_PDS || zos_type == __ZOS_PDSE)
2477    {
2478    return travelonpdsdir (pathname, only_one_at_top);
2479    }
2480 
2481 /* Deal with regular files in the normal way below. These types are:
2482    zos_type == __ZOS_PDS_MEMBER
2483    zos_type == __ZOS_PS
2484    zos_type == __ZOS_VSAM_KSDS
2485    zos_type == __ZOS_VSAM_ESDS
2486    zos_type == __ZOS_VSAM_RRDS
2487 */
2488 
2489 /* Handle a z/OS directory using common code. */
2490 
2491 else if (zos_type == __ZOS_HFS)
2492  {
2493 #endif  /* NATIVE_ZOS */
2494 
2495 
2496 /* Handle directories: common code for all OS */
2497 
2498 if (isdirectory(pathname))
2499   {
2500   if (dee_action == dee_SKIP ||
2501       !test_incexc(lastcomp, include_dir_patterns, exclude_dir_patterns))
2502     return -1;
2503 
2504   if (dee_action == dee_RECURSE)
2505     {
2506     char buffer[1024];
2507     char *nextfile;
2508     directory_type *dir = opendirectory(pathname);
2509 
2510     if (dir == NULL)
2511       {
2512       if (!silent)
2513         fprintf(stderr, "pcre2grep: Failed to open directory %s: %s\n", pathname,
2514           strerror(errno));
2515       return 2;
2516       }
2517 
2518     while ((nextfile = readdirectory(dir)) != NULL)
2519       {
2520       int frc;
2521       sprintf(buffer, "%.512s%c%.128s", pathname, FILESEP, nextfile);
2522       frc = grep_or_recurse(buffer, dir_recurse, FALSE);
2523       if (frc > 1) rc = frc;
2524        else if (frc == 0 && rc == 1) rc = 0;
2525       }
2526 
2527     closedirectory(dir);
2528     return rc;
2529     }
2530   }
2531 
2532 #if defined NATIVE_ZOS
2533  }
2534 #endif
2535 
2536 /* If the file is not a directory, check for a regular file, and if it is not,
2537 skip it if that's been requested. Otherwise, check for an explicit inclusion or
2538 exclusion. */
2539 
2540 else if (
2541 #if defined NATIVE_ZOS
2542         (zos_type == __ZOS_NOFILE && DEE_action == DEE_SKIP) ||
2543 #else  /* all other OS */
2544         (!isregfile(pathname) && DEE_action == DEE_SKIP) ||
2545 #endif
2546         !test_incexc(lastcomp, include_patterns, exclude_patterns))
2547   return -1;  /* File skipped */
2548 
2549 /* Control reaches here if we have a regular file, or if we have a directory
2550 and recursion or skipping was not requested, or if we have anything else and
2551 skipping was not requested. The scan proceeds. If this is the first and only
2552 argument at top level, we don't show the file name, unless we are only showing
2553 the file name, or the filename was forced (-H). */
2554 
2555 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
2556 pathlen = (int)(strlen(pathname));
2557 #endif
2558 
2559 /* Open using zlib if it is supported and the file name ends with .gz. */
2560 
2561 #ifdef SUPPORT_LIBZ
2562 if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
2563   {
2564   ingz = gzopen(pathname, "rb");
2565   if (ingz == NULL)
2566     {
2567     if (!silent)
2568       fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", pathname,
2569         strerror(errno));
2570     return 2;
2571     }
2572   handle = (void *)ingz;
2573   frtype = FR_LIBZ;
2574   }
2575 else
2576 #endif
2577 
2578 /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
2579 
2580 #ifdef SUPPORT_LIBBZ2
2581 if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
2582   {
2583   inbz2 = BZ2_bzopen(pathname, "rb");
2584   handle = (void *)inbz2;
2585   frtype = FR_LIBBZ2;
2586   }
2587 else
2588 #endif
2589 
2590 /* Otherwise use plain fopen(). The label is so that we can come back here if
2591 an attempt to read a .bz2 file indicates that it really is a plain file. */
2592 
2593 #ifdef SUPPORT_LIBBZ2
2594 PLAIN_FILE:
2595 #endif
2596   {
2597   in = fopen(pathname, "rb");
2598   handle = (void *)in;
2599   frtype = FR_PLAIN;
2600   }
2601 
2602 /* All the opening methods return errno when they fail. */
2603 
2604 if (handle == NULL)
2605   {
2606   if (!silent)
2607     fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", pathname,
2608       strerror(errno));
2609   return 2;
2610   }
2611 
2612 /* Now grep the file */
2613 
2614 rc = pcre2grep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
2615   (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
2616 
2617 /* Close in an appropriate manner. */
2618 
2619 #ifdef SUPPORT_LIBZ
2620 if (frtype == FR_LIBZ)
2621   gzclose(ingz);
2622 else
2623 #endif
2624 
2625 /* If it is a .bz2 file and the result is 3, it means that the first attempt to
2626 read failed. If the error indicates that the file isn't in fact bzipped, try
2627 again as a normal file. */
2628 
2629 #ifdef SUPPORT_LIBBZ2
2630 if (frtype == FR_LIBBZ2)
2631   {
2632   if (rc == 3)
2633     {
2634     int errnum;
2635     const char *err = BZ2_bzerror(inbz2, &errnum);
2636     if (errnum == BZ_DATA_ERROR_MAGIC)
2637       {
2638       BZ2_bzclose(inbz2);
2639       goto PLAIN_FILE;
2640       }
2641     else if (!silent)
2642       fprintf(stderr, "pcre2grep: Failed to read %s using bzlib: %s\n",
2643         pathname, err);
2644     rc = 2;    /* The normal "something went wrong" code */
2645     }
2646   BZ2_bzclose(inbz2);
2647   }
2648 else
2649 #endif
2650 
2651 /* Normal file close */
2652 
2653 fclose(in);
2654 
2655 /* Pass back the yield from pcre2grep(). */
2656 
2657 return rc;
2658 }
2659 
2660 
2661 
2662 /*************************************************
2663 *    Handle a single-letter, no data option      *
2664 *************************************************/
2665 
2666 static int
handle_option(int letter,int options)2667 handle_option(int letter, int options)
2668 {
2669 switch(letter)
2670   {
2671   case N_FOFFSETS: file_offsets = TRUE; break;
2672   case N_HELP: help(); pcre2grep_exit(0);
2673   case N_LBUFFER: line_buffered = TRUE; break;
2674   case N_LOFFSETS: line_offsets = number = TRUE; break;
2675   case N_NOJIT: use_jit = FALSE; break;
2676   case 'a': binary_files = BIN_TEXT; break;
2677   case 'c': count_only = TRUE; break;
2678   case 'F': process_options |= PO_FIXED_STRINGS; break;
2679   case 'H': filenames = FN_FORCE; break;
2680   case 'I': binary_files = BIN_NOMATCH; break;
2681   case 'h': filenames = FN_NONE; break;
2682   case 'i': options |= PCRE2_CASELESS; break;
2683   case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
2684   case 'L': filenames = FN_NOMATCH_ONLY; break;
2685   case 'M': multiline = TRUE; options |= PCRE2_MULTILINE|PCRE2_FIRSTLINE; break;
2686   case 'n': number = TRUE; break;
2687 
2688   case 'o':
2689   only_matching_last = add_number(0, only_matching_last);
2690   if (only_matching == NULL) only_matching = only_matching_last;
2691   break;
2692 
2693   case 'q': quiet = TRUE; break;
2694   case 'r': dee_action = dee_RECURSE; break;
2695   case 's': silent = TRUE; break;
2696   case 'u': options |= PCRE2_UTF; utf = TRUE; break;
2697   case 'v': invert = TRUE; break;
2698   case 'w': process_options |= PO_WORD_MATCH; break;
2699   case 'x': process_options |= PO_LINE_MATCH; break;
2700 
2701   case 'V':
2702     {
2703     unsigned char buffer[128];
2704     (void)pcre2_config(PCRE2_CONFIG_VERSION, buffer);
2705     fprintf(stdout, "pcre2grep version %s" STDOUT_NL, buffer);
2706     }
2707   pcre2grep_exit(0);
2708   break;
2709 
2710   default:
2711   fprintf(stderr, "pcre2grep: Unknown option -%c\n", letter);
2712   pcre2grep_exit(usage(2));
2713   }
2714 
2715 return options;
2716 }
2717 
2718 
2719 
2720 
2721 /*************************************************
2722 *          Construct printed ordinal             *
2723 *************************************************/
2724 
2725 /* This turns a number into "1st", "3rd", etc. */
2726 
2727 static char *
ordin(int n)2728 ordin(int n)
2729 {
2730 static char buffer[14];
2731 char *p = buffer;
2732 sprintf(p, "%d", n);
2733 while (*p != 0) p++;
2734 switch (n%10)
2735   {
2736   case 1: strcpy(p, "st"); break;
2737   case 2: strcpy(p, "nd"); break;
2738   case 3: strcpy(p, "rd"); break;
2739   default: strcpy(p, "th"); break;
2740   }
2741 return buffer;
2742 }
2743 
2744 
2745 
2746 /*************************************************
2747 *          Compile a single pattern              *
2748 *************************************************/
2749 
2750 /* Do nothing if the pattern has already been compiled. This is the case for
2751 include/exclude patterns read from a file.
2752 
2753 When the -F option has been used, each "pattern" may be a list of strings,
2754 separated by line breaks. They will be matched literally. We split such a
2755 string and compile the first substring, inserting an additional block into the
2756 pattern chain.
2757 
2758 Arguments:
2759   p              points to the pattern block
2760   options        the PCRE options
2761   popts          the processing options
2762   fromfile       TRUE if the pattern was read from a file
2763   fromtext       file name or identifying text (e.g. "include")
2764   count          0 if this is the only command line pattern, or
2765                  number of the command line pattern, or
2766                  linenumber for a pattern from a file
2767 
2768 Returns:         TRUE on success, FALSE after an error
2769 */
2770 
2771 static BOOL
compile_pattern(patstr * p,int options,int popts,int fromfile,const char * fromtext,int count)2772 compile_pattern(patstr *p, int options, int popts, int fromfile,
2773   const char *fromtext, int count)
2774 {
2775 unsigned char buffer[PATBUFSIZE];
2776 PCRE2_SIZE erroffset;
2777 char *ps = p->string;
2778 unsigned int patlen = strlen(ps);
2779 int errcode;
2780 
2781 if (p->compiled != NULL) return TRUE;
2782 
2783 if ((popts & PO_FIXED_STRINGS) != 0)
2784   {
2785   int ellength;
2786   char *eop = ps + patlen;
2787   char *pe = end_of_line(ps, eop, &ellength);
2788 
2789   if (ellength != 0)
2790     {
2791     if (add_pattern(pe, p) == NULL) return FALSE;
2792     patlen = (int)(pe - ps - ellength);
2793     }
2794   }
2795 
2796 sprintf((char *)buffer, "%s%.*s%s", prefix[popts], patlen, ps, suffix[popts]);
2797 p->compiled = pcre2_compile(buffer, PCRE2_ZERO_TERMINATED, options, &errcode,
2798   &erroffset, compile_context);
2799 
2800 /* Handle successful compile */
2801 
2802 if (p->compiled != NULL)
2803   {
2804 #ifdef SUPPORT_PCRE2GREP_JIT
2805   if (use_jit)
2806     {
2807     errcode = pcre2_jit_compile(p->compiled, PCRE2_JIT_COMPLETE);
2808     if (errcode == 0) return TRUE;
2809     erroffset = PCRE2_SIZE_MAX;     /* Will get reduced to patlen below */
2810     }
2811   else
2812 #endif
2813   return TRUE;
2814   }
2815 
2816 /* Handle compile and JIT compile errors */
2817 
2818 erroffset -= (int)strlen(prefix[popts]);
2819 if (erroffset > patlen) erroffset = patlen;
2820 pcre2_get_error_message(errcode, buffer, PATBUFSIZE);
2821 
2822 if (fromfile)
2823   {
2824   fprintf(stderr, "pcre2grep: Error in regex in line %d of %s "
2825     "at offset %d: %s\n", count, fromtext, (int)erroffset, buffer);
2826   }
2827 else
2828   {
2829   if (count == 0)
2830     fprintf(stderr, "pcre2grep: Error in %s regex at offset %d: %s\n",
2831       fromtext, (int)erroffset, buffer);
2832   else
2833     fprintf(stderr, "pcre2grep: Error in %s %s regex at offset %d: %s\n",
2834       ordin(count), fromtext, (int)erroffset, buffer);
2835   }
2836 
2837 return FALSE;
2838 }
2839 
2840 
2841 
2842 /*************************************************
2843 *     Read and compile a file of patterns        *
2844 *************************************************/
2845 
2846 /* This is used for --filelist, --include-from, and --exclude-from.
2847 
2848 Arguments:
2849   name         the name of the file; "-" is stdin
2850   patptr       pointer to the pattern chain anchor
2851   patlastptr   pointer to the last pattern pointer
2852   popts        the process options to pass to pattern_compile()
2853 
2854 Returns:       TRUE if all went well
2855 */
2856 
2857 static BOOL
read_pattern_file(char * name,patstr ** patptr,patstr ** patlastptr,int popts)2858 read_pattern_file(char *name, patstr **patptr, patstr **patlastptr, int popts)
2859 {
2860 int linenumber = 0;
2861 FILE *f;
2862 char *filename;
2863 char buffer[PATBUFSIZE];
2864 
2865 if (strcmp(name, "-") == 0)
2866   {
2867   f = stdin;
2868   filename = stdin_name;
2869   }
2870 else
2871   {
2872   f = fopen(name, "r");
2873   if (f == NULL)
2874     {
2875     fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", name, strerror(errno));
2876     return FALSE;
2877     }
2878   filename = name;
2879   }
2880 
2881 while (fgets(buffer, PATBUFSIZE, f) != NULL)
2882   {
2883   char *s = buffer + (int)strlen(buffer);
2884   while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2885   *s = 0;
2886   linenumber++;
2887   if (buffer[0] == 0) continue;   /* Skip blank lines */
2888 
2889   /* Note: this call to add_pattern() puts a pointer to the local variable
2890   "buffer" into the pattern chain. However, that pointer is used only when
2891   compiling the pattern, which happens immediately below, so we flatten it
2892   afterwards, as a precaution against any later code trying to use it. */
2893 
2894   *patlastptr = add_pattern(buffer, *patlastptr);
2895   if (*patlastptr == NULL)
2896     {
2897     if (f != stdin) fclose(f);
2898     return FALSE;
2899     }
2900   if (*patptr == NULL) *patptr = *patlastptr;
2901 
2902   /* This loop is needed because compiling a "pattern" when -F is set may add
2903   on additional literal patterns if the original contains a newline. In the
2904   common case, it never will, because fgets() stops at a newline. However,
2905   the -N option can be used to give pcre2grep a different newline setting. */
2906 
2907   for(;;)
2908     {
2909     if (!compile_pattern(*patlastptr, pcre2_options, popts, TRUE, filename,
2910         linenumber))
2911       {
2912       if (f != stdin) fclose(f);
2913       return FALSE;
2914       }
2915     (*patlastptr)->string = NULL;            /* Insurance */
2916     if ((*patlastptr)->next == NULL) break;
2917     *patlastptr = (*patlastptr)->next;
2918     }
2919   }
2920 
2921 if (f != stdin) fclose(f);
2922 return TRUE;
2923 }
2924 
2925 
2926 
2927 /*************************************************
2928 *                Main program                    *
2929 *************************************************/
2930 
2931 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
2932 
2933 int
main(int argc,char ** argv)2934 main(int argc, char **argv)
2935 {
2936 int i, j;
2937 int rc = 1;
2938 BOOL only_one_at_top;
2939 patstr *cp;
2940 fnstr *fn;
2941 const char *locale_from = "--locale";
2942 
2943 #ifdef SUPPORT_PCRE2GREP_JIT
2944 pcre2_jit_stack *jit_stack = NULL;
2945 #endif
2946 
2947 /* In Windows, stdout is set up as a text stream, which means that \n is
2948 converted to \r\n. This causes output lines that are copied from the input to
2949 change from ....\r\n to ....\r\r\n, which is not right. We therefore ensure
2950 that stdout is a binary stream. Note that this means all other output to stdout
2951 must use STDOUT_NL to terminate lines. */
2952 
2953 #if defined(_WIN32) || defined(WIN32)
2954 _setmode( _fileno(stdout), _O_BINARY);
2955 #endif
2956 
2957 /* Set up a default compile and match contexts and a match data block. */
2958 
2959 compile_context = pcre2_compile_context_create(NULL);
2960 match_context = pcre2_match_context_create(NULL);
2961 match_data = pcre2_match_data_create(OFFSET_SIZE, NULL);
2962 offsets = pcre2_get_ovector_pointer(match_data);
2963 
2964 /* If string (script) callouts are supported, set up the callout processing
2965 function. */
2966 
2967 #ifdef SUPPORT_PCRE2GREP_CALLOUT
2968 pcre2_set_callout(match_context, pcre2grep_callout, NULL);
2969 #endif
2970 
2971 /* Process the options */
2972 
2973 for (i = 1; i < argc; i++)
2974   {
2975   option_item *op = NULL;
2976   char *option_data = (char *)"";    /* default to keep compiler happy */
2977   BOOL longop;
2978   BOOL longopwasequals = FALSE;
2979 
2980   if (argv[i][0] != '-') break;
2981 
2982   /* If we hit an argument that is just "-", it may be a reference to STDIN,
2983   but only if we have previously had -e or -f to define the patterns. */
2984 
2985   if (argv[i][1] == 0)
2986     {
2987     if (pattern_files != NULL || patterns != NULL) break;
2988       else pcre2grep_exit(usage(2));
2989     }
2990 
2991   /* Handle a long name option, or -- to terminate the options */
2992 
2993   if (argv[i][1] == '-')
2994     {
2995     char *arg = argv[i] + 2;
2996     char *argequals = strchr(arg, '=');
2997 
2998     if (*arg == 0)    /* -- terminates options */
2999       {
3000       i++;
3001       break;                /* out of the options-handling loop */
3002       }
3003 
3004     longop = TRUE;
3005 
3006     /* Some long options have data that follows after =, for example file=name.
3007     Some options have variations in the long name spelling: specifically, we
3008     allow "regexp" because GNU grep allows it, though I personally go along
3009     with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
3010     These options are entered in the table as "regex(p)". Options can be in
3011     both these categories. */
3012 
3013     for (op = optionlist; op->one_char != 0; op++)
3014       {
3015       char *opbra = strchr(op->long_name, '(');
3016       char *equals = strchr(op->long_name, '=');
3017 
3018       /* Handle options with only one spelling of the name */
3019 
3020       if (opbra == NULL)     /* Does not contain '(' */
3021         {
3022         if (equals == NULL)  /* Not thing=data case */
3023           {
3024           if (strcmp(arg, op->long_name) == 0) break;
3025           }
3026         else                 /* Special case xxx=data */
3027           {
3028           int oplen = (int)(equals - op->long_name);
3029           int arglen = (argequals == NULL)?
3030             (int)strlen(arg) : (int)(argequals - arg);
3031           if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
3032             {
3033             option_data = arg + arglen;
3034             if (*option_data == '=')
3035               {
3036               option_data++;
3037               longopwasequals = TRUE;
3038               }
3039             break;
3040             }
3041           }
3042         }
3043 
3044       /* Handle options with an alternate spelling of the name */
3045 
3046       else
3047         {
3048         char buff1[24];
3049         char buff2[24];
3050 
3051         int baselen = (int)(opbra - op->long_name);
3052         int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
3053         int arglen = (argequals == NULL || equals == NULL)?
3054           (int)strlen(arg) : (int)(argequals - arg);
3055 
3056         sprintf(buff1, "%.*s", baselen, op->long_name);
3057         sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
3058 
3059         if (strncmp(arg, buff1, arglen) == 0 ||
3060            strncmp(arg, buff2, arglen) == 0)
3061           {
3062           if (equals != NULL && argequals != NULL)
3063             {
3064             option_data = argequals;
3065             if (*option_data == '=')
3066               {
3067               option_data++;
3068               longopwasequals = TRUE;
3069               }
3070             }
3071           break;
3072           }
3073         }
3074       }
3075 
3076     if (op->one_char == 0)
3077       {
3078       fprintf(stderr, "pcre2grep: Unknown option %s\n", argv[i]);
3079       pcre2grep_exit(usage(2));
3080       }
3081     }
3082 
3083   /* Jeffrey Friedl's debugging harness uses these additional options which
3084   are not in the right form for putting in the option table because they use
3085   only one hyphen, yet are more than one character long. By putting them
3086   separately here, they will not get displayed as part of the help() output,
3087   but I don't think Jeffrey will care about that. */
3088 
3089 #ifdef JFRIEDL_DEBUG
3090   else if (strcmp(argv[i], "-pre") == 0) {
3091           jfriedl_prefix = argv[++i];
3092           continue;
3093   } else if (strcmp(argv[i], "-post") == 0) {
3094           jfriedl_postfix = argv[++i];
3095           continue;
3096   } else if (strcmp(argv[i], "-XT") == 0) {
3097           sscanf(argv[++i], "%d", &jfriedl_XT);
3098           continue;
3099   } else if (strcmp(argv[i], "-XR") == 0) {
3100           sscanf(argv[++i], "%d", &jfriedl_XR);
3101           continue;
3102   }
3103 #endif
3104 
3105 
3106   /* One-char options; many that have no data may be in a single argument; we
3107   continue till we hit the last one or one that needs data. */
3108 
3109   else
3110     {
3111     char *s = argv[i] + 1;
3112     longop = FALSE;
3113 
3114     while (*s != 0)
3115       {
3116       for (op = optionlist; op->one_char != 0; op++)
3117         {
3118         if (*s == op->one_char) break;
3119         }
3120       if (op->one_char == 0)
3121         {
3122         fprintf(stderr, "pcre2grep: Unknown option letter '%c' in \"%s\"\n",
3123           *s, argv[i]);
3124         pcre2grep_exit(usage(2));
3125         }
3126 
3127       option_data = s+1;
3128 
3129       /* Break out if this is the last character in the string; it's handled
3130       below like a single multi-char option. */
3131 
3132       if (*option_data == 0) break;
3133 
3134       /* Check for a single-character option that has data: OP_OP_NUMBER(S)
3135       are used for ones that either have a numerical number or defaults, i.e.
3136       the data is optional. If a digit follows, there is data; if not, carry on
3137       with other single-character options in the same string. */
3138 
3139       if (op->type == OP_OP_NUMBER || op->type == OP_OP_NUMBERS)
3140         {
3141         if (isdigit((unsigned char)s[1])) break;
3142         }
3143       else   /* Check for an option with data */
3144         {
3145         if (op->type != OP_NODATA) break;
3146         }
3147 
3148       /* Handle a single-character option with no data, then loop for the
3149       next character in the string. */
3150 
3151       pcre2_options = handle_option(*s++, pcre2_options);
3152       }
3153     }
3154 
3155   /* At this point we should have op pointing to a matched option. If the type
3156   is NO_DATA, it means that there is no data, and the option might set
3157   something in the PCRE options. */
3158 
3159   if (op->type == OP_NODATA)
3160     {
3161     pcre2_options = handle_option(op->one_char, pcre2_options);
3162     continue;
3163     }
3164 
3165   /* If the option type is OP_OP_STRING or OP_OP_NUMBER(S), it's an option that
3166   either has a value or defaults to something. It cannot have data in a
3167   separate item. At the moment, the only such options are "colo(u)r",
3168   "only-matching", and Jeffrey Friedl's special -S debugging option. */
3169 
3170   if (*option_data == 0 &&
3171       (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER ||
3172        op->type == OP_OP_NUMBERS))
3173     {
3174     switch (op->one_char)
3175       {
3176       case N_COLOUR:
3177       colour_option = (char *)"auto";
3178       break;
3179 
3180       case 'o':
3181       only_matching_last = add_number(0, only_matching_last);
3182       if (only_matching == NULL) only_matching = only_matching_last;
3183       break;
3184 
3185 #ifdef JFRIEDL_DEBUG
3186       case 'S':
3187       S_arg = 0;
3188       break;
3189 #endif
3190       }
3191     continue;
3192     }
3193 
3194   /* Otherwise, find the data string for the option. */
3195 
3196   if (*option_data == 0)
3197     {
3198     if (i >= argc - 1 || longopwasequals)
3199       {
3200       fprintf(stderr, "pcre2grep: Data missing after %s\n", argv[i]);
3201       pcre2grep_exit(usage(2));
3202       }
3203     option_data = argv[++i];
3204     }
3205 
3206   /* If the option type is OP_OP_NUMBERS, the value is a number that is to be
3207   added to a chain of numbers. */
3208 
3209   if (op->type == OP_OP_NUMBERS)
3210     {
3211     unsigned long int n = decode_number(option_data, op, longop);
3212     omdatastr *omd = (omdatastr *)op->dataptr;
3213     *(omd->lastptr) = add_number((int)n, *(omd->lastptr));
3214     if (*(omd->anchor) == NULL) *(omd->anchor) = *(omd->lastptr);
3215     }
3216 
3217   /* If the option type is OP_PATLIST, it's the -e option, or one of the
3218   include/exclude options, which can be called multiple times to create lists
3219   of patterns. */
3220 
3221   else if (op->type == OP_PATLIST)
3222     {
3223     patdatastr *pd = (patdatastr *)op->dataptr;
3224     *(pd->lastptr) = add_pattern(option_data, *(pd->lastptr));
3225     if (*(pd->lastptr) == NULL) goto EXIT2;
3226     if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr);
3227     }
3228 
3229   /* If the option type is OP_FILELIST, it's one of the options that names a
3230   file. */
3231 
3232   else if (op->type == OP_FILELIST)
3233     {
3234     fndatastr *fd = (fndatastr *)op->dataptr;
3235     fn = (fnstr *)malloc(sizeof(fnstr));
3236     if (fn == NULL)
3237       {
3238       fprintf(stderr, "pcre2grep: malloc failed\n");
3239       goto EXIT2;
3240       }
3241     fn->next = NULL;
3242     fn->name = option_data;
3243     if (*(fd->anchor) == NULL)
3244       *(fd->anchor) = fn;
3245     else
3246       (*(fd->lastptr))->next = fn;
3247     *(fd->lastptr) = fn;
3248     }
3249 
3250   /* Handle OP_BINARY_FILES */
3251 
3252   else if (op->type == OP_BINFILES)
3253     {
3254     if (strcmp(option_data, "binary") == 0)
3255       binary_files = BIN_BINARY;
3256     else if (strcmp(option_data, "without-match") == 0)
3257       binary_files = BIN_NOMATCH;
3258     else if (strcmp(option_data, "text") == 0)
3259       binary_files = BIN_TEXT;
3260     else
3261       {
3262       fprintf(stderr, "pcre2grep: unknown value \"%s\" for binary-files\n",
3263         option_data);
3264       pcre2grep_exit(usage(2));
3265       }
3266     }
3267 
3268   /* Otherwise, deal with a single string or numeric data value. */
3269 
3270   else if (op->type != OP_NUMBER && op->type != OP_U32NUMBER &&
3271            op->type != OP_OP_NUMBER)
3272     {
3273     *((char **)op->dataptr) = option_data;
3274     }
3275   else
3276     {
3277     unsigned long int n = decode_number(option_data, op, longop);
3278     if (op->type == OP_U32NUMBER) *((uint32_t *)op->dataptr) = n;
3279       else *((int *)op->dataptr) = n;
3280     }
3281   }
3282 
3283 /* Options have been decoded. If -C was used, its value is used as a default
3284 for -A and -B. */
3285 
3286 if (both_context > 0)
3287   {
3288   if (after_context == 0) after_context = both_context;
3289   if (before_context == 0) before_context = both_context;
3290   }
3291 
3292 /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
3293 However, all three set show_only_matching because they display, each in their
3294 own way, only the data that has matched. */
3295 
3296 if ((only_matching != NULL && (file_offsets || line_offsets)) ||
3297     (file_offsets && line_offsets))
3298   {
3299   fprintf(stderr, "pcre2grep: Cannot mix --only-matching, --file-offsets "
3300     "and/or --line-offsets\n");
3301   pcre2grep_exit(usage(2));
3302   }
3303 
3304 /* Put limits into the match data block. */
3305 
3306 if (match_limit > 0) pcre2_set_match_limit(match_context, match_limit);
3307 if (recursion_limit > 0) pcre2_set_recursion_limit(match_context, recursion_limit);
3308 
3309 if (only_matching != NULL || file_offsets || line_offsets)
3310   show_only_matching = TRUE;
3311 
3312 /* If a locale has not been provided as an option, see if the LC_CTYPE or
3313 LC_ALL environment variable is set, and if so, use it. */
3314 
3315 if (locale == NULL)
3316   {
3317   locale = getenv("LC_ALL");
3318   locale_from = "LCC_ALL";
3319   }
3320 
3321 if (locale == NULL)
3322   {
3323   locale = getenv("LC_CTYPE");
3324   locale_from = "LC_CTYPE";
3325   }
3326 
3327 /* If a locale is set, use it to generate the tables the PCRE needs. Passing
3328 NULL to pcre2_maketables() means that malloc() is used to get the memory. */
3329 
3330 if (locale != NULL)
3331   {
3332   if (setlocale(LC_CTYPE, locale) == NULL)
3333     {
3334     fprintf(stderr, "pcre2grep: Failed to set locale %s (obtained from %s)\n",
3335       locale, locale_from);
3336     goto EXIT2;
3337     }
3338   character_tables = pcre2_maketables(NULL);
3339   pcre2_set_character_tables(compile_context, character_tables);
3340   }
3341 
3342 /* Sort out colouring */
3343 
3344 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
3345   {
3346   if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
3347   else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
3348   else
3349     {
3350     fprintf(stderr, "pcre2grep: Unknown colour setting \"%s\"\n",
3351       colour_option);
3352     goto EXIT2;
3353     }
3354   if (do_colour)
3355     {
3356     char *cs = getenv("PCRE2GREP_COLOUR");
3357     if (cs == NULL) cs = getenv("PCRE2GREP_COLOR");
3358     if (cs != NULL) colour_string = cs;
3359     }
3360   }
3361 
3362 /* Sort out a newline setting. */
3363 
3364 if (newline_arg != NULL)
3365   {
3366   for (endlinetype = 1; endlinetype < (int)(sizeof(newlines)/sizeof(char *));
3367        endlinetype++)
3368     {
3369     if (strcmpic(newline_arg, newlines[endlinetype]) == 0) break;
3370     }
3371   if (endlinetype < (int)(sizeof(newlines)/sizeof(char *)))
3372     pcre2_set_newline(compile_context, endlinetype);
3373   else
3374     {
3375     fprintf(stderr, "pcre2grep: Invalid newline specifier \"%s\"\n",
3376       newline_arg);
3377     goto EXIT2;
3378     }
3379   }
3380 
3381 /* Find default newline convention */
3382 
3383 else
3384   {
3385   (void)pcre2_config(PCRE2_CONFIG_NEWLINE, &endlinetype);
3386   }
3387 
3388 /* Interpret the text values for -d and -D */
3389 
3390 if (dee_option != NULL)
3391   {
3392   if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
3393   else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
3394   else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
3395   else
3396     {
3397     fprintf(stderr, "pcre2grep: Invalid value \"%s\" for -d\n", dee_option);
3398     goto EXIT2;
3399     }
3400   }
3401 
3402 if (DEE_option != NULL)
3403   {
3404   if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
3405   else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
3406   else
3407     {
3408     fprintf(stderr, "pcre2grep: Invalid value \"%s\" for -D\n", DEE_option);
3409     goto EXIT2;
3410     }
3411   }
3412 
3413 /* Check the values for Jeffrey Friedl's debugging options. */
3414 
3415 #ifdef JFRIEDL_DEBUG
3416 if (S_arg > 9)
3417   {
3418   fprintf(stderr, "pcre2grep: bad value for -S option\n");
3419   return 2;
3420   }
3421 if (jfriedl_XT != 0 || jfriedl_XR != 0)
3422   {
3423   if (jfriedl_XT == 0) jfriedl_XT = 1;
3424   if (jfriedl_XR == 0) jfriedl_XR = 1;
3425   }
3426 #endif
3427 
3428 /* Get memory for the main buffer. */
3429 
3430 bufsize = 3*bufthird;
3431 main_buffer = (char *)malloc(bufsize);
3432 
3433 if (main_buffer == NULL)
3434   {
3435   fprintf(stderr, "pcre2grep: malloc failed\n");
3436   goto EXIT2;
3437   }
3438 
3439 /* If no patterns were provided by -e, and there are no files provided by -f,
3440 the first argument is the one and only pattern, and it must exist. */
3441 
3442 if (patterns == NULL && pattern_files == NULL)
3443   {
3444   if (i >= argc) return usage(2);
3445   patterns = patterns_last = add_pattern(argv[i++], NULL);
3446   if (patterns == NULL) goto EXIT2;
3447   }
3448 
3449 /* Compile the patterns that were provided on the command line, either by
3450 multiple uses of -e or as a single unkeyed pattern. We cannot do this until
3451 after all the command-line options are read so that we know which PCRE options
3452 to use. When -F is used, compile_pattern() may add another block into the
3453 chain, so we must not access the next pointer till after the compile. */
3454 
3455 for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
3456   {
3457   if (!compile_pattern(cp, pcre2_options, process_options, FALSE, "command-line",
3458        (j == 1 && patterns->next == NULL)? 0 : j))
3459     goto EXIT2;
3460   }
3461 
3462 /* Read and compile the regular expressions that are provided in files. */
3463 
3464 for (fn = pattern_files; fn != NULL; fn = fn->next)
3465   {
3466   if (!read_pattern_file(fn->name, &patterns, &patterns_last, process_options))
3467     goto EXIT2;
3468   }
3469 
3470 /* Unless JIT has been explicitly disabled, arrange a stack for it to use. */
3471 
3472 #ifdef SUPPORT_PCRE2GREP_JIT
3473 if (use_jit)
3474   jit_stack = pcre2_jit_stack_create(32*1024, 1024*1024, NULL);
3475 #endif
3476 
3477 for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
3478   {
3479 #ifdef SUPPORT_PCRE2GREP_JIT
3480   if (jit_stack != NULL && cp->compiled != NULL)
3481     pcre2_jit_stack_assign(match_context, NULL, jit_stack);
3482 #endif
3483   }
3484 
3485 /* If there are include or exclude patterns read from the command line, compile
3486 them. -F, -w, and -x do not apply, so the third argument of compile_pattern is
3487 0. */
3488 
3489 for (j = 0; j < 4; j++)
3490   {
3491   int k;
3492   for (k = 1, cp = *(incexlist[j]); cp != NULL; k++, cp = cp->next)
3493     {
3494     if (!compile_pattern(cp, pcre2_options, 0, FALSE, incexname[j],
3495          (k == 1 && cp->next == NULL)? 0 : k))
3496       goto EXIT2;
3497     }
3498   }
3499 
3500 /* Read and compile include/exclude patterns from files. */
3501 
3502 for (fn = include_from; fn != NULL; fn = fn->next)
3503   {
3504   if (!read_pattern_file(fn->name, &include_patterns, &include_patterns_last, 0))
3505     goto EXIT2;
3506   }
3507 
3508 for (fn = exclude_from; fn != NULL; fn = fn->next)
3509   {
3510   if (!read_pattern_file(fn->name, &exclude_patterns, &exclude_patterns_last, 0))
3511     goto EXIT2;
3512   }
3513 
3514 /* If there are no files that contain lists of files to search, and there are
3515 no file arguments, search stdin, and then exit. */
3516 
3517 if (file_lists == NULL && i >= argc)
3518   {
3519   rc = pcre2grep(stdin, FR_PLAIN, stdin_name,
3520     (filenames > FN_DEFAULT)? stdin_name : NULL);
3521   goto EXIT;
3522   }
3523 
3524 /* If any files that contains a list of files to search have been specified,
3525 read them line by line and search the given files. */
3526 
3527 for (fn = file_lists; fn != NULL; fn = fn->next)
3528   {
3529   char buffer[PATBUFSIZE];
3530   FILE *fl;
3531   if (strcmp(fn->name, "-") == 0) fl = stdin; else
3532     {
3533     fl = fopen(fn->name, "rb");
3534     if (fl == NULL)
3535       {
3536       fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", fn->name,
3537         strerror(errno));
3538       goto EXIT2;
3539       }
3540     }
3541   while (fgets(buffer, PATBUFSIZE, fl) != NULL)
3542     {
3543     int frc;
3544     char *end = buffer + (int)strlen(buffer);
3545     while (end > buffer && isspace(end[-1])) end--;
3546     *end = 0;
3547     if (*buffer != 0)
3548       {
3549       frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);
3550       if (frc > 1) rc = frc;
3551         else if (frc == 0 && rc == 1) rc = 0;
3552       }
3553     }
3554   if (fl != stdin) fclose(fl);
3555   }
3556 
3557 /* After handling file-list, work through remaining arguments. Pass in the fact
3558 that there is only one argument at top level - this suppresses the file name if
3559 the argument is not a directory and filenames are not otherwise forced. */
3560 
3561 only_one_at_top = i == argc - 1 && file_lists == NULL;
3562 
3563 for (; i < argc; i++)
3564   {
3565   int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
3566     only_one_at_top);
3567   if (frc > 1) rc = frc;
3568     else if (frc == 0 && rc == 1) rc = 0;
3569   }
3570 
3571 EXIT:
3572 #ifdef SUPPORT_PCRE2GREP_JIT
3573 if (jit_stack != NULL) pcre2_jit_stack_free(jit_stack);
3574 #endif
3575 
3576 free(main_buffer);
3577 free((void *)character_tables);
3578 
3579 pcre2_compile_context_free(compile_context);
3580 pcre2_match_context_free(match_context);
3581 pcre2_match_data_free(match_data);
3582 
3583 free_pattern_chain(patterns);
3584 free_pattern_chain(include_patterns);
3585 free_pattern_chain(include_dir_patterns);
3586 free_pattern_chain(exclude_patterns);
3587 free_pattern_chain(exclude_dir_patterns);
3588 
3589 free_file_chain(exclude_from);
3590 free_file_chain(include_from);
3591 free_file_chain(pattern_files);
3592 free_file_chain(file_lists);
3593 
3594 while (only_matching != NULL)
3595   {
3596   omstr *this = only_matching;
3597   only_matching = this->next;
3598   free(this);
3599   }
3600 
3601 pcre2grep_exit(rc);
3602 
3603 EXIT2:
3604 rc = 2;
3605 goto EXIT;
3606 }
3607 
3608 /* End of pcre2grep */
3609