• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*************************************************
2 *               pcre2grep program                *
3 *************************************************/
4 
5 /* This is a grep program that uses the 8-bit PCRE regular expression library
6 via the PCRE2 updated API to do its pattern matching. On Unix-like, Windows,
7 and native z/OS systems it can recurse into directories, and in z/OS it can
8 handle PDS files.
9 
10 Note that for native z/OS, in addition to defining the NATIVE_ZOS macro, an
11 additional header is required. That header is not included in the main PCRE2
12 distribution because other apparatus is needed to compile pcre2grep for z/OS.
13 The header can be found in the special z/OS distribution, which is available
14 from www.zaconsultants.net or from www.cbttape.org.
15 
16            Copyright (c) 1997-2018 University of Cambridge
17 
18 -----------------------------------------------------------------------------
19 Redistribution and use in source and binary forms, with or without
20 modification, are permitted provided that the following conditions are met:
21 
22     * Redistributions of source code must retain the above copyright notice,
23       this list of conditions and the following disclaimer.
24 
25     * Redistributions in binary form must reproduce the above copyright
26       notice, this list of conditions and the following disclaimer in the
27       documentation and/or other materials provided with the distribution.
28 
29     * Neither the name of the University of Cambridge nor the names of its
30       contributors may be used to endorse or promote products derived from
31       this software without specific prior written permission.
32 
33 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
34 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
35 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
37 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
38 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
39 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
40 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
41 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
43 POSSIBILITY OF SUCH DAMAGE.
44 -----------------------------------------------------------------------------
45 */
46 
47 #ifdef HAVE_CONFIG_H
48 #include "config.h"
49 #endif
50 
51 #include <ctype.h>
52 #include <locale.h>
53 #include <stdio.h>
54 #include <string.h>
55 #include <stdlib.h>
56 #include <errno.h>
57 
58 #include <sys/types.h>
59 #include <sys/stat.h>
60 
61 #if (defined _WIN32 || (defined HAVE_WINDOWS_H && HAVE_WINDOWS_H)) \
62   && !defined WIN32 && !defined(__CYGWIN__)
63 #define WIN32
64 #endif
65 
66 /* Some cmake's define it still */
67 #if defined(__CYGWIN__) && defined(WIN32)
68 #undef WIN32
69 #endif
70 
71 #ifdef __VMS
72 #include clidef
73 #include descrip
74 #include lib$routines
75 #endif
76 
77 #ifdef WIN32
78 #include <io.h>                /* For _setmode() */
79 #include <fcntl.h>             /* For _O_BINARY */
80 #endif
81 
82 #if defined(SUPPORT_PCRE2GREP_CALLOUT) && defined(SUPPORT_PCRE2GREP_CALLOUT_FORK)
83 #ifdef WIN32
84 #include <process.h>
85 #else
86 #include <sys/wait.h>
87 #endif
88 #endif
89 
90 #ifdef HAVE_UNISTD_H
91 #include <unistd.h>
92 #endif
93 
94 #ifdef SUPPORT_LIBZ
95 #include <zlib.h>
96 #endif
97 
98 #ifdef SUPPORT_LIBBZ2
99 #include <bzlib.h>
100 #endif
101 
102 #define PCRE2_CODE_UNIT_WIDTH 8
103 #include "pcre2.h"
104 
105 /* Older versions of MSVC lack snprintf(). This define allows for
106 warning/error-free compilation and testing with MSVC compilers back to at least
107 MSVC 10/2010. Except for VC6 (which is missing some fundamentals and fails). */
108 
109 #if defined(_MSC_VER) && (_MSC_VER < 1900)
110 #define snprintf _snprintf
111 #endif
112 
113 #define FALSE 0
114 #define TRUE 1
115 
116 typedef int BOOL;
117 
118 #define OFFSET_SIZE 33
119 
120 #if BUFSIZ > 8192
121 #define MAXPATLEN BUFSIZ
122 #else
123 #define MAXPATLEN 8192
124 #endif
125 
126 #define FNBUFSIZ 2048
127 #define ERRBUFSIZ 256
128 
129 /* Values for the "filenames" variable, which specifies options for file name
130 output. The order is important; it is assumed that a file name is wanted for
131 all values greater than FN_DEFAULT. */
132 
133 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
134 
135 /* File reading styles */
136 
137 enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
138 
139 /* Actions for the -d and -D options */
140 
141 enum { dee_READ, dee_SKIP, dee_RECURSE };
142 enum { DEE_READ, DEE_SKIP };
143 
144 /* Actions for special processing options (flag bits) */
145 
146 #define PO_WORD_MATCH     0x0001
147 #define PO_LINE_MATCH     0x0002
148 #define PO_FIXED_STRINGS  0x0004
149 
150 /* Binary file options */
151 
152 enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT };
153 
154 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
155 environments), a warning is issued if the value of fwrite() is ignored.
156 Unfortunately, casting to (void) does not suppress the warning. To get round
157 this, we use a macro that compiles a fudge. Oddly, this does not also seem to
158 apply to fprintf(). */
159 
160 #define FWRITE_IGNORE(a,b,c,d) if (fwrite(a,b,c,d)) {}
161 
162 /* Under Windows, we have to set stdout to be binary, so that it does not
163 convert \r\n at the ends of output lines to \r\r\n. However, that means that
164 any messages written to stdout must have \r\n as their line terminator. This is
165 handled by using STDOUT_NL as the newline string. We also use a normal double
166 quote for the example, as single quotes aren't usually available. */
167 
168 #ifdef WIN32
169 #define STDOUT_NL  "\r\n"
170 #define QUOT       "\""
171 #else
172 #define STDOUT_NL  "\n"
173 #define QUOT       "'"
174 #endif
175 
176 
177 
178 /*************************************************
179 *               Global variables                 *
180 *************************************************/
181 
182 /* Jeffrey Friedl has some debugging requirements that are not part of the
183 regular code. */
184 
185 #ifdef JFRIEDL_DEBUG
186 static int S_arg = -1;
187 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
188 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
189 static const char *jfriedl_prefix = "";
190 static const char *jfriedl_postfix = "";
191 #endif
192 
193 static const char *colour_string = "1;31";
194 static const char *colour_option = NULL;
195 static const char *dee_option = NULL;
196 static const char *DEE_option = NULL;
197 static const char *locale = NULL;
198 static const char *newline_arg = NULL;
199 static const char *om_separator = NULL;
200 static const char *stdin_name = "(standard input)";
201 static const char *output_text = NULL;
202 
203 static char *main_buffer = NULL;
204 
205 static int after_context = 0;
206 static int before_context = 0;
207 static int binary_files = BIN_BINARY;
208 static int both_context = 0;
209 static int bufthird = PCRE2GREP_BUFSIZE;
210 static int max_bufthird = PCRE2GREP_MAX_BUFSIZE;
211 static int bufsize = 3*PCRE2GREP_BUFSIZE;
212 static int endlinetype;
213 
214 static unsigned long int total_count = 0;
215 static unsigned long int counts_printed = 0;
216 
217 #ifdef WIN32
218 static int dee_action = dee_SKIP;
219 #else
220 static int dee_action = dee_READ;
221 #endif
222 
223 static int DEE_action = DEE_READ;
224 static int error_count = 0;
225 static int filenames = FN_DEFAULT;
226 
227 #ifdef SUPPORT_PCRE2GREP_JIT
228 static BOOL use_jit = TRUE;
229 #else
230 static BOOL use_jit = FALSE;
231 #endif
232 
233 static const uint8_t *character_tables = NULL;
234 
235 static uint32_t pcre2_options = 0;
236 static uint32_t extra_options = 0;
237 static PCRE2_SIZE heap_limit = PCRE2_UNSET;
238 static uint32_t match_limit = 0;
239 static uint32_t depth_limit = 0;
240 
241 static pcre2_compile_context *compile_context;
242 static pcre2_match_context *match_context;
243 static pcre2_match_data *match_data;
244 static PCRE2_SIZE *offsets;
245 
246 static BOOL count_only = FALSE;
247 static BOOL do_colour = FALSE;
248 #ifdef WIN32
249 static BOOL do_ansi = FALSE;
250 #endif
251 static BOOL file_offsets = FALSE;
252 static BOOL hyphenpending = FALSE;
253 static BOOL invert = FALSE;
254 static BOOL line_buffered = FALSE;
255 static BOOL line_offsets = FALSE;
256 static BOOL multiline = FALSE;
257 static BOOL number = FALSE;
258 static BOOL omit_zero_count = FALSE;
259 static BOOL resource_error = FALSE;
260 static BOOL quiet = FALSE;
261 static BOOL show_total_count = FALSE;
262 static BOOL silent = FALSE;
263 static BOOL utf = FALSE;
264 
265 /* Structure for list of --only-matching capturing numbers. */
266 
267 typedef struct omstr {
268   struct omstr *next;
269   int groupnum;
270 } omstr;
271 
272 static omstr *only_matching = NULL;
273 static omstr *only_matching_last = NULL;
274 static int only_matching_count;
275 
276 /* Structure for holding the two variables that describe a number chain. */
277 
278 typedef struct omdatastr {
279   omstr **anchor;
280   omstr **lastptr;
281 } omdatastr;
282 
283 static omdatastr only_matching_data = { &only_matching, &only_matching_last };
284 
285 /* Structure for list of file names (for -f and --{in,ex}clude-from) */
286 
287 typedef struct fnstr {
288   struct fnstr *next;
289   char *name;
290 } fnstr;
291 
292 static fnstr *exclude_from = NULL;
293 static fnstr *exclude_from_last = NULL;
294 static fnstr *include_from = NULL;
295 static fnstr *include_from_last = NULL;
296 
297 static fnstr *file_lists = NULL;
298 static fnstr *file_lists_last = NULL;
299 static fnstr *pattern_files = NULL;
300 static fnstr *pattern_files_last = NULL;
301 
302 /* Structure for holding the two variables that describe a file name chain. */
303 
304 typedef struct fndatastr {
305   fnstr **anchor;
306   fnstr **lastptr;
307 } fndatastr;
308 
309 static fndatastr exclude_from_data = { &exclude_from, &exclude_from_last };
310 static fndatastr include_from_data = { &include_from, &include_from_last };
311 static fndatastr file_lists_data = { &file_lists, &file_lists_last };
312 static fndatastr pattern_files_data = { &pattern_files, &pattern_files_last };
313 
314 /* Structure for pattern and its compiled form; used for matching patterns and
315 also for include/exclude patterns. */
316 
317 typedef struct patstr {
318   struct patstr *next;
319   char *string;
320   PCRE2_SIZE length;
321   pcre2_code *compiled;
322 } patstr;
323 
324 static patstr *patterns = NULL;
325 static patstr *patterns_last = NULL;
326 static patstr *include_patterns = NULL;
327 static patstr *include_patterns_last = NULL;
328 static patstr *exclude_patterns = NULL;
329 static patstr *exclude_patterns_last = NULL;
330 static patstr *include_dir_patterns = NULL;
331 static patstr *include_dir_patterns_last = NULL;
332 static patstr *exclude_dir_patterns = NULL;
333 static patstr *exclude_dir_patterns_last = NULL;
334 
335 /* Structure holding the two variables that describe a pattern chain. A pointer
336 to such structures is used for each appropriate option. */
337 
338 typedef struct patdatastr {
339   patstr **anchor;
340   patstr **lastptr;
341 } patdatastr;
342 
343 static patdatastr match_patdata = { &patterns, &patterns_last };
344 static patdatastr include_patdata = { &include_patterns, &include_patterns_last };
345 static patdatastr exclude_patdata = { &exclude_patterns, &exclude_patterns_last };
346 static patdatastr include_dir_patdata = { &include_dir_patterns, &include_dir_patterns_last };
347 static patdatastr exclude_dir_patdata = { &exclude_dir_patterns, &exclude_dir_patterns_last };
348 
349 static patstr **incexlist[4] = { &include_patterns, &exclude_patterns,
350                                  &include_dir_patterns, &exclude_dir_patterns };
351 
352 static const char *incexname[4] = { "--include", "--exclude",
353                                     "--include-dir", "--exclude-dir" };
354 
355 /* Structure for options and list of them */
356 
357 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_U32NUMBER, OP_SIZE,
358        OP_OP_NUMBER, OP_OP_NUMBERS, OP_PATLIST, OP_FILELIST, OP_BINFILES };
359 
360 typedef struct option_item {
361   int type;
362   int one_char;
363   void *dataptr;
364   const char *long_name;
365   const char *help_text;
366 } option_item;
367 
368 /* Options without a single-letter equivalent get a negative value. This can be
369 used to identify them. */
370 
371 #define N_COLOUR       (-1)
372 #define N_EXCLUDE      (-2)
373 #define N_EXCLUDE_DIR  (-3)
374 #define N_HELP         (-4)
375 #define N_INCLUDE      (-5)
376 #define N_INCLUDE_DIR  (-6)
377 #define N_LABEL        (-7)
378 #define N_LOCALE       (-8)
379 #define N_NULL         (-9)
380 #define N_LOFFSETS     (-10)
381 #define N_FOFFSETS     (-11)
382 #define N_LBUFFER      (-12)
383 #define N_H_LIMIT      (-13)
384 #define N_M_LIMIT      (-14)
385 #define N_M_LIMIT_DEP  (-15)
386 #define N_BUFSIZE      (-16)
387 #define N_NOJIT        (-17)
388 #define N_FILE_LIST    (-18)
389 #define N_BINARY_FILES (-19)
390 #define N_EXCLUDE_FROM (-20)
391 #define N_INCLUDE_FROM (-21)
392 #define N_OM_SEPARATOR (-22)
393 #define N_MAX_BUFSIZE  (-23)
394 
395 static option_item optionlist[] = {
396   { OP_NODATA,     N_NULL,   NULL,              "",              "terminate options" },
397   { OP_NODATA,     N_HELP,   NULL,              "help",          "display this help and exit" },
398   { OP_NUMBER,     'A',      &after_context,    "after-context=number", "set number of following context lines" },
399   { OP_NODATA,     'a',      NULL,              "text",          "treat binary files as text" },
400   { OP_NUMBER,     'B',      &before_context,   "before-context=number", "set number of prior context lines" },
401   { OP_BINFILES,   N_BINARY_FILES, NULL,        "binary-files=word", "set treatment of binary files" },
402   { OP_NUMBER,     N_BUFSIZE,&bufthird,         "buffer-size=number", "set processing buffer starting size" },
403   { OP_NUMBER,     N_MAX_BUFSIZE,&max_bufthird, "max-buffer-size=number",  "set processing buffer maximum size" },
404   { OP_OP_STRING,  N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
405   { OP_OP_STRING,  N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
406   { OP_NUMBER,     'C',      &both_context,     "context=number", "set number of context lines, before & after" },
407   { OP_NODATA,     'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
408   { OP_STRING,     'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
409   { OP_STRING,     'd',      &dee_option,       "directories=action", "how to handle directories" },
410   { OP_PATLIST,    'e',      &match_patdata,    "regex(p)=pattern", "specify pattern (may be used more than once)" },
411   { OP_NODATA,     'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
412   { OP_FILELIST,   'f',      &pattern_files_data, "file=path",   "read patterns from file" },
413   { OP_FILELIST,   N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" },
414   { OP_NODATA,     N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
415   { OP_NODATA,     'H',      NULL,              "with-filename", "force the prefixing filename on output" },
416   { OP_NODATA,     'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
417   { OP_NODATA,     'I',      NULL,              "",              "treat binary files as not matching (ignore)" },
418   { OP_NODATA,     'i',      NULL,              "ignore-case",   "ignore case distinctions" },
419   { OP_NODATA,     'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
420   { OP_NODATA,     'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
421   { OP_STRING,     N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
422   { OP_NODATA,     N_LBUFFER, NULL,             "line-buffered", "use line buffering" },
423   { OP_NODATA,     N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
424   { OP_STRING,     N_LOCALE, &locale,           "locale=locale", "use the named locale" },
425   { OP_SIZE,       N_H_LIMIT, &heap_limit,      "heap-limit=number",  "set PCRE2 heap limit option (kibibytes)" },
426   { OP_U32NUMBER,  N_M_LIMIT, &match_limit,     "match-limit=number", "set PCRE2 match limit option" },
427   { OP_U32NUMBER,  N_M_LIMIT_DEP, &depth_limit, "depth-limit=number", "set PCRE2 depth limit option" },
428   { OP_U32NUMBER,  N_M_LIMIT_DEP, &depth_limit, "recursion-limit=number", "obsolete synonym for depth-limit" },
429   { OP_NODATA,     'M',      NULL,              "multiline",     "run in multiline mode" },
430   { OP_STRING,     'N',      &newline_arg,      "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF, ANY, or NUL)" },
431   { OP_NODATA,     'n',      NULL,              "line-number",   "print line number with output lines" },
432 #ifdef SUPPORT_PCRE2GREP_JIT
433   { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "do not use just-in-time compiler optimization" },
434 #else
435   { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "ignored: this pcre2grep does not support JIT" },
436 #endif
437   { OP_STRING,     'O',      &output_text,       "output=text",   "show only this text (possibly expanded)" },
438   { OP_OP_NUMBERS, 'o',      &only_matching_data, "only-matching=n", "show only the part of the line that matched" },
439   { OP_STRING,     N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" },
440   { OP_NODATA,     'q',      NULL,              "quiet",         "suppress output, just set return code" },
441   { OP_NODATA,     'r',      NULL,              "recursive",     "recursively scan sub-directories" },
442   { OP_PATLIST,    N_EXCLUDE,&exclude_patdata,  "exclude=pattern","exclude matching files when recursing" },
443   { OP_PATLIST,    N_INCLUDE,&include_patdata,  "include=pattern","include matching files when recursing" },
444   { OP_PATLIST,    N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude-dir=pattern","exclude matching directories when recursing" },
445   { OP_PATLIST,    N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" },
446   { OP_FILELIST,   N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" },
447   { OP_FILELIST,   N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" },
448 #ifdef JFRIEDL_DEBUG
449   { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
450 #endif
451   { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
452   { OP_NODATA,    't',      NULL,              "total-count",   "print total count of matching lines" },
453   { OP_NODATA,    'u',      NULL,              "utf",           "use UTF mode" },
454   { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
455   { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
456   { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
457   { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
458   { OP_NODATA,    0,        NULL,               NULL,            NULL }
459 };
460 
461 /* Table of names for newline types. Must be kept in step with the definitions
462 of PCRE2_NEWLINE_xx in pcre2.h. */
463 
464 static const char *newlines[] = {
465   "DEFAULT", "CR", "LF", "CRLF", "ANY", "ANYCRLF", "NUL" };
466 
467 /* UTF-8 tables - used only when the newline setting is "any". */
468 
469 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
470 
471 const char utf8_table4[] = {
472   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
473   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
474   2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
475   3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
476 
477 
478 #if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE)
479 /*************************************************
480 *    Emulated memmove() for systems without it   *
481 *************************************************/
482 
483 /* This function can make use of bcopy() if it is available. Otherwise do it by
484 steam, as there are some non-Unix environments that lack both memmove() and
485 bcopy(). */
486 
487 static void *
emulated_memmove(void * d,const void * s,size_t n)488 emulated_memmove(void *d, const void *s, size_t n)
489 {
490 #ifdef HAVE_BCOPY
491 bcopy(s, d, n);
492 return d;
493 #else
494 size_t i;
495 unsigned char *dest = (unsigned char *)d;
496 const unsigned char *src = (const unsigned char *)s;
497 if (dest > src)
498   {
499   dest += n;
500   src += n;
501   for (i = 0; i < n; ++i) *(--dest) = *(--src);
502   return (void *)dest;
503   }
504 else
505   {
506   for (i = 0; i < n; ++i) *dest++ = *src++;
507   return (void *)(dest - n);
508   }
509 #endif   /* not HAVE_BCOPY */
510 }
511 #undef memmove
512 #define memmove(d,s,n) emulated_memmove(d,s,n)
513 #endif   /* not VPCOMPAT && not HAVE_MEMMOVE */
514 
515 
516 /*************************************************
517 *         Case-independent string compare        *
518 *************************************************/
519 
520 static int
strcmpic(const char * str1,const char * str2)521 strcmpic(const char *str1, const char *str2)
522 {
523 unsigned int c1, c2;
524 while (*str1 != '\0' || *str2 != '\0')
525   {
526   c1 = tolower(*str1++);
527   c2 = tolower(*str2++);
528   if (c1 != c2) return ((c1 > c2) << 1) - 1;
529   }
530 return 0;
531 }
532 
533 
534 /*************************************************
535 *         Parse GREP_COLORS                      *
536 *************************************************/
537 
538 /* Extract ms or mt from GREP_COLORS.
539 
540 Argument:  the string, possibly NULL
541 Returns:   the value of ms or mt, or NULL if neither present
542 */
543 
544 static char *
parse_grep_colors(const char * gc)545 parse_grep_colors(const char *gc)
546 {
547 static char seq[16];
548 char *col;
549 uint32_t len;
550 if (gc == NULL) return NULL;
551 col = strstr(gc, "ms=");
552 if (col == NULL) col = strstr(gc, "mt=");
553 if (col == NULL) return NULL;
554 len = 0;
555 col += 3;
556 while (*col != ':' && *col != 0 && len < sizeof(seq)-1)
557   seq[len++] = *col++;
558 seq[len] = 0;
559 return seq;
560 }
561 
562 
563 /*************************************************
564 *         Exit from the program                  *
565 *************************************************/
566 
567 /* If there has been a resource error, give a suitable message.
568 
569 Argument:  the return code
570 Returns:   does not return
571 */
572 
573 static void
pcre2grep_exit(int rc)574 pcre2grep_exit(int rc)
575 {
576 /* VMS does exit codes differently: both exit(1) and exit(0) return with a
577 status of 1, which is not helpful. To help with this problem, define a symbol
578 (akin to an environment variable) called "PCRE2GREP_RC" and put the exit code
579 therein. */
580 
581 #ifdef __VMS
582   char val_buf[4];
583   $DESCRIPTOR(sym_nam, "PCRE2GREP_RC");
584   $DESCRIPTOR(sym_val, val_buf);
585   sprintf(val_buf, "%d", rc);
586   sym_val.dsc$w_length = strlen(val_buf);
587   lib$set_symbol(&sym_nam, &sym_val);
588 #endif
589 
590 if (resource_error)
591   {
592   fprintf(stderr, "pcre2grep: Error %d, %d, %d or %d means that a resource "
593     "limit was exceeded.\n", PCRE2_ERROR_JIT_STACKLIMIT, PCRE2_ERROR_MATCHLIMIT,
594     PCRE2_ERROR_DEPTHLIMIT, PCRE2_ERROR_HEAPLIMIT);
595   fprintf(stderr, "pcre2grep: Check your regex for nested unlimited loops.\n");
596   }
597 exit(rc);
598 }
599 
600 
601 /*************************************************
602 *          Add item to chain of patterns         *
603 *************************************************/
604 
605 /* Used to add an item onto a chain, or just return an unconnected item if the
606 "after" argument is NULL.
607 
608 Arguments:
609   s          pattern string to add
610   patlen     length of pattern
611   after      if not NULL points to item to insert after
612 
613 Returns:     new pattern block or NULL on error
614 */
615 
616 static patstr *
add_pattern(char * s,PCRE2_SIZE patlen,patstr * after)617 add_pattern(char *s, PCRE2_SIZE patlen, patstr *after)
618 {
619 patstr *p = (patstr *)malloc(sizeof(patstr));
620 if (p == NULL)
621   {
622   fprintf(stderr, "pcre2grep: malloc failed\n");
623   pcre2grep_exit(2);
624   }
625 if (patlen > MAXPATLEN)
626   {
627   fprintf(stderr, "pcre2grep: pattern is too long (limit is %d bytes)\n",
628     MAXPATLEN);
629   free(p);
630   return NULL;
631   }
632 p->next = NULL;
633 p->string = s;
634 p->length = patlen;
635 p->compiled = NULL;
636 
637 if (after != NULL)
638   {
639   p->next = after->next;
640   after->next = p;
641   }
642 return p;
643 }
644 
645 
646 /*************************************************
647 *           Free chain of patterns               *
648 *************************************************/
649 
650 /* Used for several chains of patterns.
651 
652 Argument: pointer to start of chain
653 Returns:  nothing
654 */
655 
656 static void
free_pattern_chain(patstr * pc)657 free_pattern_chain(patstr *pc)
658 {
659 while (pc != NULL)
660   {
661   patstr *p = pc;
662   pc = p->next;
663   if (p->compiled != NULL) pcre2_code_free(p->compiled);
664   free(p);
665   }
666 }
667 
668 
669 /*************************************************
670 *           Free chain of file names             *
671 *************************************************/
672 
673 /*
674 Argument: pointer to start of chain
675 Returns:  nothing
676 */
677 
678 static void
free_file_chain(fnstr * fn)679 free_file_chain(fnstr *fn)
680 {
681 while (fn != NULL)
682   {
683   fnstr *f = fn;
684   fn = f->next;
685   free(f);
686   }
687 }
688 
689 
690 /*************************************************
691 *            OS-specific functions               *
692 *************************************************/
693 
694 /* These definitions are needed in all Windows environments, even those where
695 Unix-style directory scanning can be used (see below). */
696 
697 #ifdef WIN32
698 
699 #ifndef STRICT
700 # define STRICT
701 #endif
702 #ifndef WIN32_LEAN_AND_MEAN
703 # define WIN32_LEAN_AND_MEAN
704 #endif
705 
706 #include <windows.h>
707 
708 #define iswild(name) (strpbrk(name, "*?") != NULL)
709 
710 /* Convert ANSI BGR format to RGB used by Windows */
711 #define BGR_RGB(x) ((x & 1 ? 4 : 0) | (x & 2) | (x & 4 ? 1 : 0))
712 
713 static HANDLE hstdout;
714 static CONSOLE_SCREEN_BUFFER_INFO csbi;
715 static WORD match_colour;
716 
717 static WORD
decode_ANSI_colour(const char * cs)718 decode_ANSI_colour(const char *cs)
719 {
720 WORD result = csbi.wAttributes;
721 while (*cs)
722   {
723   if (isdigit(*cs))
724     {
725     int code = atoi(cs);
726     if (code == 1) result |= 0x08;
727     else if (code == 4) result |= 0x8000;
728     else if (code == 5) result |= 0x80;
729     else if (code >= 30 && code <= 37) result = (result & 0xF8) | BGR_RGB(code - 30);
730     else if (code == 39) result = (result & 0xF0) | (csbi.wAttributes & 0x0F);
731     else if (code >= 40 && code <= 47) result = (result & 0x8F) | (BGR_RGB(code - 40) << 4);
732     else if (code == 49) result = (result & 0x0F) | (csbi.wAttributes & 0xF0);
733     /* aixterm high intensity colour codes */
734     else if (code >= 90 && code <= 97) result = (result & 0xF0) | BGR_RGB(code - 90) | 0x08;
735     else if (code >= 100 && code <= 107) result = (result & 0x0F) | (BGR_RGB(code - 100) << 4) | 0x80;
736 
737     while (isdigit(*cs)) cs++;
738     }
739   if (*cs) cs++;
740   }
741 return result;
742 }
743 
744 
745 static void
init_colour_output()746 init_colour_output()
747 {
748 if (do_colour)
749   {
750   hstdout = GetStdHandle(STD_OUTPUT_HANDLE);
751   /* This fails when redirected to con; try again if so. */
752   if (!GetConsoleScreenBufferInfo(hstdout, &csbi) && !do_ansi)
753     {
754     HANDLE hcon = CreateFile("CONOUT$", GENERIC_READ | GENERIC_WRITE,
755       FILE_SHARE_WRITE, NULL, OPEN_EXISTING, 0, NULL);
756     GetConsoleScreenBufferInfo(hcon, &csbi);
757     CloseHandle(hcon);
758     }
759   match_colour = decode_ANSI_colour(colour_string);
760   /* No valid colour found - turn off colouring */
761   if (!match_colour) do_colour = FALSE;
762   }
763 }
764 
765 #endif  /* WIN32 */
766 
767 
768 /* The following sets of functions are defined so that they can be made system
769 specific. At present there are versions for Unix-style environments, Windows,
770 native z/OS, and "no support". */
771 
772 
773 /************* Directory scanning Unix-style and z/OS ***********/
774 
775 #if (defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H) || defined NATIVE_ZOS
776 #include <sys/types.h>
777 #include <sys/stat.h>
778 #include <dirent.h>
779 
780 #if defined NATIVE_ZOS
781 /************* Directory and PDS/E scanning for z/OS ***********/
782 /************* z/OS looks mostly like Unix with USS ************/
783 /* However, z/OS needs the #include statements in this header */
784 #include "pcrzosfs.h"
785 /* That header is not included in the main PCRE distribution because
786    other apparatus is needed to compile pcre2grep for z/OS. The header
787    can be found in the special z/OS distribution, which is available
788    from www.zaconsultants.net or from www.cbttape.org. */
789 #endif
790 
791 typedef DIR directory_type;
792 #define FILESEP '/'
793 
794 static int
isdirectory(char * filename)795 isdirectory(char *filename)
796 {
797 struct stat statbuf;
798 if (stat(filename, &statbuf) < 0)
799   return 0;        /* In the expectation that opening as a file will fail */
800 return S_ISDIR(statbuf.st_mode);
801 }
802 
803 static directory_type *
opendirectory(char * filename)804 opendirectory(char *filename)
805 {
806 return opendir(filename);
807 }
808 
809 static char *
readdirectory(directory_type * dir)810 readdirectory(directory_type *dir)
811 {
812 for (;;)
813   {
814   struct dirent *dent = readdir(dir);
815   if (dent == NULL) return NULL;
816   if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
817     return dent->d_name;
818   }
819 /* Control never reaches here */
820 }
821 
822 static void
closedirectory(directory_type * dir)823 closedirectory(directory_type *dir)
824 {
825 closedir(dir);
826 }
827 
828 
829 /************* Test for regular file, Unix-style **********/
830 
831 static int
isregfile(char * filename)832 isregfile(char *filename)
833 {
834 struct stat statbuf;
835 if (stat(filename, &statbuf) < 0)
836   return 1;        /* In the expectation that opening as a file will fail */
837 return S_ISREG(statbuf.st_mode);
838 }
839 
840 
841 #if defined NATIVE_ZOS
842 /************* Test for a terminal in z/OS **********/
843 /* isatty() does not work in a TSO environment, so always give FALSE.*/
844 
845 static BOOL
is_stdout_tty(void)846 is_stdout_tty(void)
847 {
848 return FALSE;
849 }
850 
851 static BOOL
is_file_tty(FILE * f)852 is_file_tty(FILE *f)
853 {
854 return FALSE;
855 }
856 
857 
858 /************* Test for a terminal, Unix-style **********/
859 
860 #else
861 static BOOL
is_stdout_tty(void)862 is_stdout_tty(void)
863 {
864 return isatty(fileno(stdout));
865 }
866 
867 static BOOL
is_file_tty(FILE * f)868 is_file_tty(FILE *f)
869 {
870 return isatty(fileno(f));
871 }
872 #endif
873 
874 
875 /************* Print optionally coloured match Unix-style and z/OS **********/
876 
877 static void
print_match(const void * buf,int length)878 print_match(const void *buf, int length)
879 {
880 if (length == 0) return;
881 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
882 FWRITE_IGNORE(buf, 1, length, stdout);
883 if (do_colour) fprintf(stdout, "%c[0m", 0x1b);
884 }
885 
886 /* End of Unix-style or native z/OS environment functions. */
887 
888 
889 /************* Directory scanning in Windows ***********/
890 
891 /* I (Philip Hazel) have no means of testing this code. It was contributed by
892 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
893 when it did not exist. David Byron added a patch that moved the #include of
894 <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
895 */
896 
897 #elif defined WIN32
898 
899 #ifndef INVALID_FILE_ATTRIBUTES
900 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
901 #endif
902 
903 typedef struct directory_type
904 {
905 HANDLE handle;
906 BOOL first;
907 WIN32_FIND_DATA data;
908 } directory_type;
909 
910 #define FILESEP '/'
911 
912 int
isdirectory(char * filename)913 isdirectory(char *filename)
914 {
915 DWORD attr = GetFileAttributes(filename);
916 if (attr == INVALID_FILE_ATTRIBUTES)
917   return 0;
918 return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0;
919 }
920 
921 directory_type *
opendirectory(char * filename)922 opendirectory(char *filename)
923 {
924 size_t len;
925 char *pattern;
926 directory_type *dir;
927 DWORD err;
928 len = strlen(filename);
929 pattern = (char *)malloc(len + 3);
930 dir = (directory_type *)malloc(sizeof(*dir));
931 if ((pattern == NULL) || (dir == NULL))
932   {
933   fprintf(stderr, "pcre2grep: malloc failed\n");
934   pcre2grep_exit(2);
935   }
936 memcpy(pattern, filename, len);
937 if (iswild(filename))
938   pattern[len] = 0;
939 else
940   memcpy(&(pattern[len]), "\\*", 3);
941 dir->handle = FindFirstFile(pattern, &(dir->data));
942 if (dir->handle != INVALID_HANDLE_VALUE)
943   {
944   free(pattern);
945   dir->first = TRUE;
946   return dir;
947   }
948 err = GetLastError();
949 free(pattern);
950 free(dir);
951 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
952 return NULL;
953 }
954 
955 char *
readdirectory(directory_type * dir)956 readdirectory(directory_type *dir)
957 {
958 for (;;)
959   {
960   if (!dir->first)
961     {
962     if (!FindNextFile(dir->handle, &(dir->data)))
963       return NULL;
964     }
965   else
966     {
967     dir->first = FALSE;
968     }
969   if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
970     return dir->data.cFileName;
971   }
972 #ifndef _MSC_VER
973 return NULL;   /* Keep compiler happy; never executed */
974 #endif
975 }
976 
977 void
closedirectory(directory_type * dir)978 closedirectory(directory_type *dir)
979 {
980 FindClose(dir->handle);
981 free(dir);
982 }
983 
984 
985 /************* Test for regular file in Windows **********/
986 
987 /* I don't know how to do this, or if it can be done; assume all paths are
988 regular if they are not directories. */
989 
isregfile(char * filename)990 int isregfile(char *filename)
991 {
992 return !isdirectory(filename);
993 }
994 
995 
996 /************* Test for a terminal in Windows **********/
997 
998 static BOOL
is_stdout_tty(void)999 is_stdout_tty(void)
1000 {
1001 return _isatty(_fileno(stdout));
1002 }
1003 
1004 static BOOL
is_file_tty(FILE * f)1005 is_file_tty(FILE *f)
1006 {
1007 return _isatty(_fileno(f));
1008 }
1009 
1010 
1011 /************* Print optionally coloured match in Windows **********/
1012 
1013 static void
print_match(const void * buf,int length)1014 print_match(const void *buf, int length)
1015 {
1016 if (length == 0) return;
1017 if (do_colour)
1018   {
1019   if (do_ansi) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1020     else SetConsoleTextAttribute(hstdout, match_colour);
1021   }
1022 FWRITE_IGNORE(buf, 1, length, stdout);
1023 if (do_colour)
1024   {
1025   if (do_ansi) fprintf(stdout, "%c[0m", 0x1b);
1026     else SetConsoleTextAttribute(hstdout, csbi.wAttributes);
1027   }
1028 }
1029 
1030 /* End of Windows functions */
1031 
1032 
1033 /************* Directory scanning when we can't do it ***********/
1034 
1035 /* The type is void, and apart from isdirectory(), the functions do nothing. */
1036 
1037 #else
1038 
1039 #define FILESEP 0
1040 typedef void directory_type;
1041 
isdirectory(char * filename)1042 int isdirectory(char *filename) { return 0; }
opendirectory(char * filename)1043 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
readdirectory(directory_type * dir)1044 char *readdirectory(directory_type *dir) { return (char*)0;}
closedirectory(directory_type * dir)1045 void closedirectory(directory_type *dir) {}
1046 
1047 
1048 /************* Test for regular file when we can't do it **********/
1049 
1050 /* Assume all files are regular. */
1051 
isregfile(char * filename)1052 int isregfile(char *filename) { return 1; }
1053 
1054 
1055 /************* Test for a terminal when we can't do it **********/
1056 
1057 static BOOL
is_stdout_tty(void)1058 is_stdout_tty(void)
1059 {
1060 return FALSE;
1061 }
1062 
1063 static BOOL
is_file_tty(FILE * f)1064 is_file_tty(FILE *f)
1065 {
1066 return FALSE;
1067 }
1068 
1069 
1070 /************* Print optionally coloured match when we can't do it **********/
1071 
1072 static void
print_match(const void * buf,int length)1073 print_match(const void *buf, int length)
1074 {
1075 if (length == 0) return;
1076 FWRITE_IGNORE(buf, 1, length, stdout);
1077 }
1078 
1079 #endif  /* End of system-specific functions */
1080 
1081 
1082 
1083 #ifndef HAVE_STRERROR
1084 /*************************************************
1085 *     Provide strerror() for non-ANSI libraries  *
1086 *************************************************/
1087 
1088 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1089 in their libraries, but can provide the same facility by this simple
1090 alternative function. */
1091 
1092 extern int   sys_nerr;
1093 extern char *sys_errlist[];
1094 
1095 char *
strerror(int n)1096 strerror(int n)
1097 {
1098 if (n < 0 || n >= sys_nerr) return "unknown error number";
1099 return sys_errlist[n];
1100 }
1101 #endif /* HAVE_STRERROR */
1102 
1103 
1104 
1105 /*************************************************
1106 *                Usage function                  *
1107 *************************************************/
1108 
1109 static int
usage(int rc)1110 usage(int rc)
1111 {
1112 option_item *op;
1113 fprintf(stderr, "Usage: pcre2grep [-");
1114 for (op = optionlist; op->one_char != 0; op++)
1115   {
1116   if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1117   }
1118 fprintf(stderr, "] [long options] [pattern] [files]\n");
1119 fprintf(stderr, "Type \"pcre2grep --help\" for more information and the long "
1120   "options.\n");
1121 return rc;
1122 }
1123 
1124 
1125 
1126 /*************************************************
1127 *                Help function                   *
1128 *************************************************/
1129 
1130 static void
help(void)1131 help(void)
1132 {
1133 option_item *op;
1134 
1135 printf("Usage: pcre2grep [OPTION]... [PATTERN] [FILE1 FILE2 ...]" STDOUT_NL);
1136 printf("Search for PATTERN in each FILE or standard input." STDOUT_NL);
1137 printf("PATTERN must be present if neither -e nor -f is used." STDOUT_NL);
1138 
1139 #ifdef SUPPORT_PCRE2GREP_CALLOUT
1140 #ifdef SUPPORT_PCRE2GREP_CALLOUT_FORK
1141 printf("All callout scripts in patterns are supported." STDOUT_NL);
1142 #else
1143 printf("Non-fork callout scripts in patterns are supported." STDOUT_NL);
1144 #endif
1145 #else
1146 printf("Callout scripts are not supported in this pcre2grep." STDOUT_NL);
1147 #endif
1148 
1149 printf("\"-\" can be used as a file name to mean STDIN." STDOUT_NL);
1150 
1151 #ifdef SUPPORT_LIBZ
1152 printf("Files whose names end in .gz are read using zlib." STDOUT_NL);
1153 #endif
1154 
1155 #ifdef SUPPORT_LIBBZ2
1156 printf("Files whose names end in .bz2 are read using bzlib2." STDOUT_NL);
1157 #endif
1158 
1159 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1160 printf("Other files and the standard input are read as plain files." STDOUT_NL STDOUT_NL);
1161 #else
1162 printf("All files are read as plain files, without any interpretation." STDOUT_NL STDOUT_NL);
1163 #endif
1164 
1165 printf("Example: pcre2grep -i " QUOT "hello.*world" QUOT " menu.h main.c" STDOUT_NL STDOUT_NL);
1166 printf("Options:" STDOUT_NL);
1167 
1168 for (op = optionlist; op->one_char != 0; op++)
1169   {
1170   int n;
1171   char s[4];
1172 
1173   if (op->one_char > 0 && (op->long_name)[0] == 0)
1174     n = 31 - printf("  -%c", op->one_char);
1175   else
1176     {
1177     if (op->one_char > 0) sprintf(s, "-%c,", op->one_char);
1178       else strcpy(s, "   ");
1179     n = 31 - printf("  %s --%s", s, op->long_name);
1180     }
1181 
1182   if (n < 1) n = 1;
1183   printf("%.*s%s" STDOUT_NL, n, "                           ", op->help_text);
1184   }
1185 
1186 printf(STDOUT_NL "Numbers may be followed by K or M, e.g. --max-buffer-size=100K." STDOUT_NL);
1187 printf("The default value for --buffer-size is %d." STDOUT_NL, PCRE2GREP_BUFSIZE);
1188 printf("The default value for --max-buffer-size is %d." STDOUT_NL, PCRE2GREP_MAX_BUFSIZE);
1189 printf("When reading patterns or file names from a file, trailing white" STDOUT_NL);
1190 printf("space is removed and blank lines are ignored." STDOUT_NL);
1191 printf("The maximum size of any pattern is %d bytes." STDOUT_NL, MAXPATLEN);
1192 
1193 printf(STDOUT_NL "With no FILEs, read standard input. If fewer than two FILEs given, assume -h." STDOUT_NL);
1194 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble." STDOUT_NL);
1195 }
1196 
1197 
1198 
1199 /*************************************************
1200 *            Test exclude/includes               *
1201 *************************************************/
1202 
1203 /* If any exclude pattern matches, the path is excluded. Otherwise, unless
1204 there are no includes, the path must match an include pattern.
1205 
1206 Arguments:
1207   path      the path to be matched
1208   ip        the chain of include patterns
1209   ep        the chain of exclude patterns
1210 
1211 Returns:    TRUE if the path is not excluded
1212 */
1213 
1214 static BOOL
test_incexc(char * path,patstr * ip,patstr * ep)1215 test_incexc(char *path, patstr *ip, patstr *ep)
1216 {
1217 int plen = strlen((const char *)path);
1218 
1219 for (; ep != NULL; ep = ep->next)
1220   {
1221   if (pcre2_match(ep->compiled, (PCRE2_SPTR)path, plen, 0, 0, match_data, NULL) >= 0)
1222     return FALSE;
1223   }
1224 
1225 if (ip == NULL) return TRUE;
1226 
1227 for (; ip != NULL; ip = ip->next)
1228   {
1229   if (pcre2_match(ip->compiled, (PCRE2_SPTR)path, plen, 0, 0, match_data, NULL) >= 0)
1230     return TRUE;
1231   }
1232 
1233 return FALSE;
1234 }
1235 
1236 
1237 
1238 /*************************************************
1239 *         Decode integer argument value          *
1240 *************************************************/
1241 
1242 /* Integer arguments can be followed by K or M. Avoid the use of strtoul()
1243 because SunOS4 doesn't have it. This is used only for unpicking arguments, so
1244 just keep it simple.
1245 
1246 Arguments:
1247   option_data   the option data string
1248   op            the option item (for error messages)
1249   longop        TRUE if option given in long form
1250 
1251 Returns:        a long integer
1252 */
1253 
1254 static long int
decode_number(char * option_data,option_item * op,BOOL longop)1255 decode_number(char *option_data, option_item *op, BOOL longop)
1256 {
1257 unsigned long int n = 0;
1258 char *endptr = option_data;
1259 while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
1260 while (isdigit((unsigned char)(*endptr)))
1261   n = n * 10 + (int)(*endptr++ - '0');
1262 if (toupper(*endptr) == 'K')
1263   {
1264   n *= 1024;
1265   endptr++;
1266   }
1267 else if (toupper(*endptr) == 'M')
1268   {
1269   n *= 1024*1024;
1270   endptr++;
1271   }
1272 
1273 if (*endptr != 0)   /* Error */
1274   {
1275   if (longop)
1276     {
1277     char *equals = strchr(op->long_name, '=');
1278     int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1279       (int)(equals - op->long_name);
1280     fprintf(stderr, "pcre2grep: Malformed number \"%s\" after --%.*s\n",
1281       option_data, nlen, op->long_name);
1282     }
1283   else
1284     fprintf(stderr, "pcre2grep: Malformed number \"%s\" after -%c\n",
1285       option_data, op->one_char);
1286   pcre2grep_exit(usage(2));
1287   }
1288 
1289 return n;
1290 }
1291 
1292 
1293 
1294 /*************************************************
1295 *       Add item to a chain of numbers           *
1296 *************************************************/
1297 
1298 /* Used to add an item onto a chain, or just return an unconnected item if the
1299 "after" argument is NULL.
1300 
1301 Arguments:
1302   n          the number to add
1303   after      if not NULL points to item to insert after
1304 
1305 Returns:     new number block
1306 */
1307 
1308 static omstr *
add_number(int n,omstr * after)1309 add_number(int n, omstr *after)
1310 {
1311 omstr *om = (omstr *)malloc(sizeof(omstr));
1312 
1313 if (om == NULL)
1314   {
1315   fprintf(stderr, "pcre2grep: malloc failed\n");
1316   pcre2grep_exit(2);
1317   }
1318 om->next = NULL;
1319 om->groupnum = n;
1320 
1321 if (after != NULL)
1322   {
1323   om->next = after->next;
1324   after->next = om;
1325   }
1326 return om;
1327 }
1328 
1329 
1330 
1331 /*************************************************
1332 *            Read one line of input              *
1333 *************************************************/
1334 
1335 /* Normally, input that is to be scanned is read using fread() (or gzread, or
1336 BZ2_read) into a large buffer, so many lines may be read at once. However,
1337 doing this for tty input means that no output appears until a lot of input has
1338 been typed. Instead, tty input is handled line by line. We cannot use fgets()
1339 for this, because it does not stop at a binary zero, and therefore there is no
1340 way of telling how many characters it has read, because there may be binary
1341 zeros embedded in the data. This function is also used for reading patterns
1342 from files (the -f option).
1343 
1344 Arguments:
1345   buffer     the buffer to read into
1346   length     the maximum number of characters to read
1347   f          the file
1348 
1349 Returns:     the number of characters read, zero at end of file
1350 */
1351 
1352 static PCRE2_SIZE
read_one_line(char * buffer,int length,FILE * f)1353 read_one_line(char *buffer, int length, FILE *f)
1354 {
1355 int c;
1356 int yield = 0;
1357 while ((c = fgetc(f)) != EOF)
1358   {
1359   buffer[yield++] = c;
1360   if (c == '\n' || yield >= length) break;
1361   }
1362 return yield;
1363 }
1364 
1365 
1366 
1367 /*************************************************
1368 *             Find end of line                   *
1369 *************************************************/
1370 
1371 /* The length of the endline sequence that is found is set via lenptr. This may
1372 be zero at the very end of the file if there is no line-ending sequence there.
1373 
1374 Arguments:
1375   p         current position in line
1376   endptr    end of available data
1377   lenptr    where to put the length of the eol sequence
1378 
1379 Returns:    pointer after the last byte of the line,
1380             including the newline byte(s)
1381 */
1382 
1383 static char *
end_of_line(char * p,char * endptr,int * lenptr)1384 end_of_line(char *p, char *endptr, int *lenptr)
1385 {
1386 switch(endlinetype)
1387   {
1388   default:      /* Just in case */
1389   case PCRE2_NEWLINE_LF:
1390   while (p < endptr && *p != '\n') p++;
1391   if (p < endptr)
1392     {
1393     *lenptr = 1;
1394     return p + 1;
1395     }
1396   *lenptr = 0;
1397   return endptr;
1398 
1399   case PCRE2_NEWLINE_CR:
1400   while (p < endptr && *p != '\r') p++;
1401   if (p < endptr)
1402     {
1403     *lenptr = 1;
1404     return p + 1;
1405     }
1406   *lenptr = 0;
1407   return endptr;
1408 
1409   case PCRE2_NEWLINE_NUL:
1410   while (p < endptr && *p != '\0') p++;
1411   if (p < endptr)
1412     {
1413     *lenptr = 1;
1414     return p + 1;
1415     }
1416   *lenptr = 0;
1417   return endptr;
1418 
1419   case PCRE2_NEWLINE_CRLF:
1420   for (;;)
1421     {
1422     while (p < endptr && *p != '\r') p++;
1423     if (++p >= endptr)
1424       {
1425       *lenptr = 0;
1426       return endptr;
1427       }
1428     if (*p == '\n')
1429       {
1430       *lenptr = 2;
1431       return p + 1;
1432       }
1433     }
1434   break;
1435 
1436   case PCRE2_NEWLINE_ANYCRLF:
1437   while (p < endptr)
1438     {
1439     int extra = 0;
1440     int c = *((unsigned char *)p);
1441 
1442     if (utf && c >= 0xc0)
1443       {
1444       int gcii, gcss;
1445       extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
1446       gcss = 6*extra;
1447       c = (c & utf8_table3[extra]) << gcss;
1448       for (gcii = 1; gcii <= extra; gcii++)
1449         {
1450         gcss -= 6;
1451         c |= (p[gcii] & 0x3f) << gcss;
1452         }
1453       }
1454 
1455     p += 1 + extra;
1456 
1457     switch (c)
1458       {
1459       case '\n':
1460       *lenptr = 1;
1461       return p;
1462 
1463       case '\r':
1464       if (p < endptr && *p == '\n')
1465         {
1466         *lenptr = 2;
1467         p++;
1468         }
1469       else *lenptr = 1;
1470       return p;
1471 
1472       default:
1473       break;
1474       }
1475     }   /* End of loop for ANYCRLF case */
1476 
1477   *lenptr = 0;  /* Must have hit the end */
1478   return endptr;
1479 
1480   case PCRE2_NEWLINE_ANY:
1481   while (p < endptr)
1482     {
1483     int extra = 0;
1484     int c = *((unsigned char *)p);
1485 
1486     if (utf && c >= 0xc0)
1487       {
1488       int gcii, gcss;
1489       extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
1490       gcss = 6*extra;
1491       c = (c & utf8_table3[extra]) << gcss;
1492       for (gcii = 1; gcii <= extra; gcii++)
1493         {
1494         gcss -= 6;
1495         c |= (p[gcii] & 0x3f) << gcss;
1496         }
1497       }
1498 
1499     p += 1 + extra;
1500 
1501     switch (c)
1502       {
1503       case '\n':    /* LF */
1504       case '\v':    /* VT */
1505       case '\f':    /* FF */
1506       *lenptr = 1;
1507       return p;
1508 
1509       case '\r':    /* CR */
1510       if (p < endptr && *p == '\n')
1511         {
1512         *lenptr = 2;
1513         p++;
1514         }
1515       else *lenptr = 1;
1516       return p;
1517 
1518 #ifndef EBCDIC
1519       case 0x85:    /* Unicode NEL */
1520       *lenptr = utf? 2 : 1;
1521       return p;
1522 
1523       case 0x2028:  /* Unicode LS */
1524       case 0x2029:  /* Unicode PS */
1525       *lenptr = 3;
1526       return p;
1527 #endif  /* Not EBCDIC */
1528 
1529       default:
1530       break;
1531       }
1532     }   /* End of loop for ANY case */
1533 
1534   *lenptr = 0;  /* Must have hit the end */
1535   return endptr;
1536   }     /* End of overall switch */
1537 }
1538 
1539 
1540 
1541 /*************************************************
1542 *         Find start of previous line            *
1543 *************************************************/
1544 
1545 /* This is called when looking back for before lines to print.
1546 
1547 Arguments:
1548   p         start of the subsequent line
1549   startptr  start of available data
1550 
1551 Returns:    pointer to the start of the previous line
1552 */
1553 
1554 static char *
previous_line(char * p,char * startptr)1555 previous_line(char *p, char *startptr)
1556 {
1557 switch(endlinetype)
1558   {
1559   default:      /* Just in case */
1560   case PCRE2_NEWLINE_LF:
1561   p--;
1562   while (p > startptr && p[-1] != '\n') p--;
1563   return p;
1564 
1565   case PCRE2_NEWLINE_CR:
1566   p--;
1567   while (p > startptr && p[-1] != '\n') p--;
1568   return p;
1569 
1570   case PCRE2_NEWLINE_NUL:
1571   p--;
1572   while (p > startptr && p[-1] != '\0') p--;
1573   return p;
1574 
1575   case PCRE2_NEWLINE_CRLF:
1576   for (;;)
1577     {
1578     p -= 2;
1579     while (p > startptr && p[-1] != '\n') p--;
1580     if (p <= startptr + 1 || p[-2] == '\r') return p;
1581     }
1582   /* Control can never get here */
1583 
1584   case PCRE2_NEWLINE_ANY:
1585   case PCRE2_NEWLINE_ANYCRLF:
1586   if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
1587   if (utf) while ((*p & 0xc0) == 0x80) p--;
1588 
1589   while (p > startptr)
1590     {
1591     unsigned int c;
1592     char *pp = p - 1;
1593 
1594     if (utf)
1595       {
1596       int extra = 0;
1597       while ((*pp & 0xc0) == 0x80) pp--;
1598       c = *((unsigned char *)pp);
1599       if (c >= 0xc0)
1600         {
1601         int gcii, gcss;
1602         extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
1603         gcss = 6*extra;
1604         c = (c & utf8_table3[extra]) << gcss;
1605         for (gcii = 1; gcii <= extra; gcii++)
1606           {
1607           gcss -= 6;
1608           c |= (pp[gcii] & 0x3f) << gcss;
1609           }
1610         }
1611       }
1612     else c = *((unsigned char *)pp);
1613 
1614     if (endlinetype == PCRE2_NEWLINE_ANYCRLF) switch (c)
1615       {
1616       case '\n':    /* LF */
1617       case '\r':    /* CR */
1618       return p;
1619 
1620       default:
1621       break;
1622       }
1623 
1624     else switch (c)
1625       {
1626       case '\n':    /* LF */
1627       case '\v':    /* VT */
1628       case '\f':    /* FF */
1629       case '\r':    /* CR */
1630 #ifndef EBCDIC
1631       case 0x85:    /* Unicode NEL */
1632       case 0x2028:  /* Unicode LS */
1633       case 0x2029:  /* Unicode PS */
1634 #endif  /* Not EBCDIC */
1635       return p;
1636 
1637       default:
1638       break;
1639       }
1640 
1641     p = pp;  /* Back one character */
1642     }        /* End of loop for ANY case */
1643 
1644   return startptr;  /* Hit start of data */
1645   }     /* End of overall switch */
1646 }
1647 
1648 
1649 
1650 /*************************************************
1651 *       Print the previous "after" lines         *
1652 *************************************************/
1653 
1654 /* This is called if we are about to lose said lines because of buffer filling,
1655 and at the end of the file. The data in the line is written using fwrite() so
1656 that a binary zero does not terminate it.
1657 
1658 Arguments:
1659   lastmatchnumber   the number of the last matching line, plus one
1660   lastmatchrestart  where we restarted after the last match
1661   endptr            end of available data
1662   printname         filename for printing
1663 
1664 Returns:            nothing
1665 */
1666 
1667 static void
do_after_lines(unsigned long int lastmatchnumber,char * lastmatchrestart,char * endptr,const char * printname)1668 do_after_lines(unsigned long int lastmatchnumber, char *lastmatchrestart,
1669   char *endptr, const char *printname)
1670 {
1671 if (after_context > 0 && lastmatchnumber > 0)
1672   {
1673   int count = 0;
1674   while (lastmatchrestart < endptr && count < after_context)
1675     {
1676     int ellength;
1677     char *pp = end_of_line(lastmatchrestart, endptr, &ellength);
1678     if (ellength == 0 && pp == main_buffer + bufsize) break;
1679     if (printname != NULL) fprintf(stdout, "%s-", printname);
1680     if (number) fprintf(stdout, "%lu-", lastmatchnumber++);
1681     FWRITE_IGNORE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1682     lastmatchrestart = pp;
1683     count++;
1684     }
1685   if (count > 0) hyphenpending = TRUE;
1686   }
1687 }
1688 
1689 
1690 
1691 /*************************************************
1692 *   Apply patterns to subject till one matches   *
1693 *************************************************/
1694 
1695 /* This function is called to run through all patterns, looking for a match. It
1696 is used multiple times for the same subject when colouring is enabled, in order
1697 to find all possible matches.
1698 
1699 Arguments:
1700   matchptr     the start of the subject
1701   length       the length of the subject to match
1702   options      options for pcre_exec
1703   startoffset  where to start matching
1704   mrc          address of where to put the result of pcre2_match()
1705 
1706 Returns:      TRUE if there was a match
1707               FALSE if there was no match
1708               invert if there was a non-fatal error
1709 */
1710 
1711 static BOOL
match_patterns(char * matchptr,PCRE2_SIZE length,unsigned int options,PCRE2_SIZE startoffset,int * mrc)1712 match_patterns(char *matchptr, PCRE2_SIZE length, unsigned int options,
1713   PCRE2_SIZE startoffset, int *mrc)
1714 {
1715 int i;
1716 PCRE2_SIZE slen = length;
1717 patstr *p = patterns;
1718 const char *msg = "this text:\n\n";
1719 
1720 if (slen > 200)
1721   {
1722   slen = 200;
1723   msg = "text that starts:\n\n";
1724   }
1725 for (i = 1; p != NULL; p = p->next, i++)
1726   {
1727   *mrc = pcre2_match(p->compiled, (PCRE2_SPTR)matchptr, (int)length,
1728     startoffset, options, match_data, match_context);
1729   if (*mrc >= 0) return TRUE;
1730   if (*mrc == PCRE2_ERROR_NOMATCH) continue;
1731   fprintf(stderr, "pcre2grep: pcre2_match() gave error %d while matching ", *mrc);
1732   if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i);
1733   fprintf(stderr, "%s", msg);
1734   FWRITE_IGNORE(matchptr, 1, slen, stderr);   /* In case binary zero included */
1735   fprintf(stderr, "\n\n");
1736   if (*mrc == PCRE2_ERROR_MATCHLIMIT || *mrc == PCRE2_ERROR_DEPTHLIMIT ||
1737       *mrc == PCRE2_ERROR_HEAPLIMIT || *mrc == PCRE2_ERROR_JIT_STACKLIMIT)
1738     resource_error = TRUE;
1739   if (error_count++ > 20)
1740     {
1741     fprintf(stderr, "pcre2grep: Too many errors - abandoned.\n");
1742     pcre2grep_exit(2);
1743     }
1744   return invert;    /* No more matching; don't show the line again */
1745   }
1746 
1747 return FALSE;  /* No match, no errors */
1748 }
1749 
1750 
1751 /*************************************************
1752 *          Check output text for errors          *
1753 *************************************************/
1754 
1755 static BOOL
syntax_check_output_text(PCRE2_SPTR string,BOOL callout)1756 syntax_check_output_text(PCRE2_SPTR string, BOOL callout)
1757 {
1758 PCRE2_SPTR begin = string;
1759 for (; *string != 0; string++)
1760   {
1761   if (*string == '$')
1762     {
1763     PCRE2_SIZE capture_id = 0;
1764     BOOL brace = FALSE;
1765 
1766     string++;
1767 
1768     /* Syntax error: a character must be present after $. */
1769     if (*string == 0)
1770       {
1771       if (!callout)
1772         fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
1773           (int)(string - begin), "no character after $");
1774       return FALSE;
1775       }
1776 
1777     if (*string == '{')
1778       {
1779       /* Must be a decimal number in braces, e.g: {5} or {38} */
1780       string++;
1781 
1782       brace = TRUE;
1783       }
1784 
1785     if ((*string >= '1' && *string <= '9') || (!callout && *string == '0'))
1786       {
1787       do
1788         {
1789         /* Maximum capture id is 65535. */
1790         if (capture_id <= 65535)
1791           capture_id = capture_id * 10 + (*string - '0');
1792 
1793         string++;
1794         }
1795       while (*string >= '0' && *string <= '9');
1796 
1797       if (brace)
1798         {
1799         /* Syntax error: closing brace is missing. */
1800         if (*string != '}')
1801           {
1802           if (!callout)
1803             fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
1804               (int)(string - begin), "missing closing brace");
1805           return FALSE;
1806           }
1807         }
1808       else
1809         {
1810         /* To negate the effect of the for. */
1811         string--;
1812         }
1813       }
1814     else if (brace)
1815       {
1816       /* Syntax error: a decimal number required. */
1817       if (!callout)
1818         fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
1819           (int)(string - begin), "decimal number expected");
1820       return FALSE;
1821       }
1822     else if (*string == 'o')
1823       {
1824       string++;
1825 
1826       if (*string < '0' || *string > '7')
1827         {
1828         /* Syntax error: an octal number required. */
1829         if (!callout)
1830           fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
1831             (int)(string - begin), "octal number expected");
1832         return FALSE;
1833         }
1834       }
1835     else if (*string == 'x')
1836       {
1837       string++;
1838 
1839       if (!isxdigit((unsigned char)*string))
1840         {
1841         /* Syntax error: a hexdecimal number required. */
1842         if (!callout)
1843           fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
1844             (int)(string - begin), "hexadecimal number expected");
1845         return FALSE;
1846         }
1847       }
1848     }
1849   }
1850 
1851   return TRUE;
1852 }
1853 
1854 
1855 /*************************************************
1856 *              Display output text               *
1857 *************************************************/
1858 
1859 /* Display the output text, which is assumed to have already been syntax
1860 checked. Output may contain escape sequences started by the dollar sign. The
1861 escape sequences are substituted as follows:
1862 
1863   $<digits> or ${<digits>} is replaced by the captured substring of the given
1864   decimal number; zero will substitute the whole match. If the number is
1865   greater than the number of capturing substrings, or if the capture is unset,
1866   the replacement is empty.
1867 
1868   $a is replaced by bell.
1869   $b is replaced by backspace.
1870   $e is replaced by escape.
1871   $f is replaced by form feed.
1872   $n is replaced by newline.
1873   $r is replaced by carriage return.
1874   $t is replaced by tab.
1875   $v is replaced by vertical tab.
1876 
1877   $o<digits> is replaced by the character represented by the given octal
1878   number; up to three digits are processed.
1879 
1880   $x<digits> is replaced by the character represented by the given hexadecimal
1881   number; up to two digits are processed.
1882 
1883   Any other character is substituted by itself. E.g: $$ is replaced by a single
1884   dollar.
1885 
1886 Arguments:
1887   string:       the output text
1888   callout:      TRUE for the builtin callout, FALSE for --output
1889   subject       the start of the subject
1890   ovector:      capture offsets
1891   capture_top:  number of captures
1892 
1893 Returns:        TRUE if something was output, other than newline
1894                 FALSE if nothing was output, or newline was last output
1895 */
1896 
1897 static BOOL
display_output_text(PCRE2_SPTR string,BOOL callout,PCRE2_SPTR subject,PCRE2_SIZE * ovector,PCRE2_SIZE capture_top)1898 display_output_text(PCRE2_SPTR string, BOOL callout, PCRE2_SPTR subject,
1899   PCRE2_SIZE *ovector, PCRE2_SIZE capture_top)
1900 {
1901 BOOL printed = FALSE;
1902 
1903 for (; *string != 0; string++)
1904   {
1905   int ch = EOF;
1906   if (*string == '$')
1907     {
1908     PCRE2_SIZE capture_id = 0;
1909     BOOL brace = FALSE;
1910 
1911     string++;
1912 
1913     if (*string == '{')
1914       {
1915       /* Must be a decimal number in braces, e.g: {5} or {38} */
1916       string++;
1917 
1918       brace = TRUE;
1919       }
1920 
1921     if ((*string >= '1' && *string <= '9') || (!callout && *string == '0'))
1922       {
1923       do
1924         {
1925         /* Maximum capture id is 65535. */
1926         if (capture_id <= 65535)
1927           capture_id = capture_id * 10 + (*string - '0');
1928 
1929         string++;
1930         }
1931       while (*string >= '0' && *string <= '9');
1932 
1933       if (!brace)
1934         {
1935         /* To negate the effect of the for. */
1936         string--;
1937         }
1938 
1939       if (capture_id < capture_top)
1940         {
1941         PCRE2_SIZE capturesize;
1942         capture_id *= 2;
1943 
1944         capturesize = ovector[capture_id + 1] - ovector[capture_id];
1945         if (capturesize > 0)
1946           {
1947           print_match(subject + ovector[capture_id], capturesize);
1948           printed = TRUE;
1949           }
1950         }
1951       }
1952     else if (*string == 'a') ch = '\a';
1953     else if (*string == 'b') ch = '\b';
1954 #ifndef EBCDIC
1955     else if (*string == 'e') ch = '\033';
1956 #else
1957     else if (*string == 'e') ch = '\047';
1958 #endif
1959     else if (*string == 'f') ch = '\f';
1960     else if (*string == 'r') ch = '\r';
1961     else if (*string == 't') ch = '\t';
1962     else if (*string == 'v') ch = '\v';
1963     else if (*string == 'n')
1964       {
1965       fprintf(stdout, STDOUT_NL);
1966       printed = FALSE;
1967       }
1968     else if (*string == 'o')
1969       {
1970       string++;
1971 
1972       ch = *string - '0';
1973       if (string[1] >= '0' && string[1] <= '7')
1974         {
1975         string++;
1976         ch = ch * 8 + (*string - '0');
1977         }
1978       if (string[1] >= '0' && string[1] <= '7')
1979         {
1980         string++;
1981         ch = ch * 8 + (*string - '0');
1982         }
1983       }
1984     else if (*string == 'x')
1985       {
1986       string++;
1987 
1988       if (*string >= '0' && *string <= '9')
1989         ch = *string - '0';
1990       else
1991         ch = (*string | 0x20) - 'a' + 10;
1992       if (isxdigit((unsigned char)string[1]))
1993         {
1994         string++;
1995         ch *= 16;
1996         if (*string >= '0' && *string <= '9')
1997           ch += *string - '0';
1998         else
1999           ch += (*string | 0x20) - 'a' + 10;
2000         }
2001       }
2002     else
2003       {
2004       ch = *string;
2005       }
2006     }
2007   else
2008     {
2009     ch = *string;
2010     }
2011   if (ch != EOF)
2012     {
2013     fprintf(stdout, "%c", ch);
2014     printed = TRUE;
2015     }
2016   }
2017 
2018 return printed;
2019 }
2020 
2021 
2022 #ifdef SUPPORT_PCRE2GREP_CALLOUT
2023 
2024 /*************************************************
2025 *        Parse and execute callout scripts       *
2026 *************************************************/
2027 
2028 /* If SUPPORT_PCRE2GREP_CALLOUT_FORK is defined, this function parses a callout
2029 string block and executes the program specified by the string. The string is a
2030 list of substrings separated by pipe characters. The first substring represents
2031 the executable name, and the following substrings specify the arguments:
2032 
2033   program_name|param1|param2|...
2034 
2035 Any substring (including the program name) can contain escape sequences
2036 started by the dollar character. The escape sequences are substituted as
2037 follows:
2038 
2039   $<digits> or ${<digits>} is replaced by the captured substring of the given
2040   decimal number, which must be greater than zero. If the number is greater
2041   than the number of capturing substrings, or if the capture is unset, the
2042   replacement is empty.
2043 
2044   Any other character is substituted by itself. E.g: $$ is replaced by a single
2045   dollar or $| replaced by a pipe character.
2046 
2047 Alternatively, if string starts with pipe, the remainder is taken as an output
2048 string, same as --output. This is the only form that is supported if
2049 SUPPORT_PCRE2GREP_FORK is not defined. In this case, --om-separator is used to
2050 separate each callout, defaulting to newline.
2051 
2052 Example:
2053 
2054   echo -e "abcde\n12345" | pcre2grep \
2055     '(.)(..(.))(?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4)")()' -
2056 
2057   Output:
2058 
2059     Arg1: [a] [bcd] [d] Arg2: |a| ()
2060     abcde
2061     Arg1: [1] [234] [4] Arg2: |1| ()
2062     12345
2063 
2064 Arguments:
2065   blockptr     the callout block
2066 
2067 Returns:       currently it always returns with 0
2068 */
2069 
2070 static int
pcre2grep_callout(pcre2_callout_block * calloutptr,void * unused)2071 pcre2grep_callout(pcre2_callout_block *calloutptr, void *unused)
2072 {
2073 PCRE2_SIZE length = calloutptr->callout_string_length;
2074 PCRE2_SPTR string = calloutptr->callout_string;
2075 PCRE2_SPTR subject = calloutptr->subject;
2076 PCRE2_SIZE *ovector = calloutptr->offset_vector;
2077 PCRE2_SIZE capture_top = calloutptr->capture_top;
2078 
2079 #ifdef SUPPORT_PCRE2GREP_CALLOUT_FORK
2080 PCRE2_SIZE argsvectorlen = 2;
2081 PCRE2_SIZE argslen = 1;
2082 char *args;
2083 char *argsptr;
2084 char **argsvector;
2085 char **argsvectorptr;
2086 #ifndef WIN32
2087 pid_t pid;
2088 #endif
2089 int result = 0;
2090 #endif  /* SUPPORT_PCRE2GREP_CALLOUT_FORK */
2091 
2092 (void)unused;   /* Avoid compiler warning */
2093 
2094 /* Only callout with strings are supported. */
2095 
2096 if (string == NULL || length == 0) return 0;
2097 
2098 /* If there's no command, output the remainder directly. */
2099 
2100 if (*string == '|')
2101   {
2102   string++;
2103   if (!syntax_check_output_text(string, TRUE)) return 0;
2104   (void)display_output_text(string, TRUE, subject, ovector, capture_top);
2105   return 0;
2106   }
2107 
2108 #ifndef SUPPORT_PCRE2GREP_CALLOUT_FORK
2109 return 0;
2110 #else
2111 
2112 /* Checking syntax and compute the number of string fragments. Callout strings
2113 are ignored in case of a syntax error. */
2114 
2115 while (length > 0)
2116   {
2117   if (*string == '|')
2118     {
2119     argsvectorlen++;
2120 
2121     /* Maximum 10000 arguments allowed. */
2122     if (argsvectorlen > 10000) return 0;
2123     }
2124   else if (*string == '$')
2125     {
2126     PCRE2_SIZE capture_id = 0;
2127 
2128     string++;
2129     length--;
2130 
2131     /* Syntax error: a character must be present after $. */
2132     if (length == 0) return 0;
2133 
2134     if (*string >= '1' && *string <= '9')
2135       {
2136       do
2137         {
2138         /* Maximum capture id is 65535. */
2139         if (capture_id <= 65535)
2140           capture_id = capture_id * 10 + (*string - '0');
2141 
2142         string++;
2143         length--;
2144         }
2145       while (length > 0 && *string >= '0' && *string <= '9');
2146 
2147       /* To negate the effect of string++ below. */
2148       string--;
2149       length++;
2150       }
2151     else if (*string == '{')
2152       {
2153       /* Must be a decimal number in braces, e.g: {5} or {38} */
2154       string++;
2155       length--;
2156 
2157       /* Syntax error: a decimal number required. */
2158       if (length == 0) return 0;
2159       if (*string < '1' || *string > '9') return 0;
2160 
2161       do
2162         {
2163         /* Maximum capture id is 65535. */
2164         if (capture_id <= 65535)
2165           capture_id = capture_id * 10 + (*string - '0');
2166 
2167         string++;
2168         length--;
2169 
2170         /* Syntax error: no more characters */
2171         if (length == 0) return 0;
2172         }
2173       while (*string >= '0' && *string <= '9');
2174 
2175       /* Syntax error: closing brace is missing. */
2176       if (*string != '}') return 0;
2177       }
2178 
2179     if (capture_id > 0)
2180       {
2181       if (capture_id < capture_top)
2182         {
2183         capture_id *= 2;
2184         argslen += ovector[capture_id + 1] - ovector[capture_id];
2185         }
2186 
2187       /* To negate the effect of argslen++ below. */
2188       argslen--;
2189       }
2190     }
2191 
2192   string++;
2193   length--;
2194   argslen++;
2195   }
2196 
2197 args = (char*)malloc(argslen);
2198 if (args == NULL) return 0;
2199 
2200 argsvector = (char**)malloc(argsvectorlen * sizeof(char*));
2201 if (argsvector == NULL)
2202   {
2203   free(args);
2204   return 0;
2205   }
2206 
2207 argsptr = args;
2208 argsvectorptr = argsvector;
2209 
2210 *argsvectorptr++ = argsptr;
2211 
2212 length = calloutptr->callout_string_length;
2213 string = calloutptr->callout_string;
2214 
2215 while (length > 0)
2216   {
2217   if (*string == '|')
2218     {
2219     *argsptr++ = '\0';
2220     *argsvectorptr++ = argsptr;
2221     }
2222   else if (*string == '$')
2223     {
2224     string++;
2225     length--;
2226 
2227     if ((*string >= '1' && *string <= '9') || *string == '{')
2228       {
2229       PCRE2_SIZE capture_id = 0;
2230 
2231       if (*string != '{')
2232         {
2233         do
2234           {
2235           /* Maximum capture id is 65535. */
2236           if (capture_id <= 65535)
2237             capture_id = capture_id * 10 + (*string - '0');
2238 
2239           string++;
2240           length--;
2241           }
2242         while (length > 0 && *string >= '0' && *string <= '9');
2243 
2244         /* To negate the effect of string++ below. */
2245         string--;
2246         length++;
2247         }
2248       else
2249         {
2250         string++;
2251         length--;
2252 
2253         do
2254           {
2255           /* Maximum capture id is 65535. */
2256           if (capture_id <= 65535)
2257             capture_id = capture_id * 10 + (*string - '0');
2258 
2259           string++;
2260           length--;
2261           }
2262         while (*string != '}');
2263         }
2264 
2265         if (capture_id < capture_top)
2266           {
2267           PCRE2_SIZE capturesize;
2268           capture_id *= 2;
2269 
2270           capturesize = ovector[capture_id + 1] - ovector[capture_id];
2271           memcpy(argsptr, subject + ovector[capture_id], capturesize);
2272           argsptr += capturesize;
2273           }
2274       }
2275     else
2276       {
2277       *argsptr++ = *string;
2278       }
2279     }
2280   else
2281     {
2282     *argsptr++ = *string;
2283     }
2284 
2285   string++;
2286   length--;
2287   }
2288 
2289 *argsptr++ = '\0';
2290 *argsvectorptr = NULL;
2291 
2292 /* Running an external command is system-dependent. Handle Windows and VMS as
2293 necessary, otherwise assume fork(). */
2294 
2295 #ifdef WIN32
2296 result = _spawnvp(_P_WAIT, argsvector[0], (const char * const *)argsvector);
2297 
2298 #elif defined __VMS
2299   {
2300   char cmdbuf[500];
2301   short i = 0;
2302   int flags = CLI$M_NOCLISYM|CLI$M_NOLOGNAM|CLI$M_NOKEYPAD, status, retstat;
2303   $DESCRIPTOR(cmd, cmdbuf);
2304 
2305   cmdbuf[0] = 0;
2306   while (argsvector[i])
2307   {
2308     strcat(cmdbuf, argsvector[i]);
2309     strcat(cmdbuf, " ");
2310     i++;
2311   }
2312   cmd.dsc$w_length = strlen(cmdbuf) - 1;
2313   status = lib$spawn(&cmd, 0,0, &flags, 0,0, &retstat);
2314   if (!(status & 1)) result = 0;
2315   else result = retstat & 1 ? 0 : 1;
2316   }
2317 
2318 #else  /* Neither Windows nor VMS */
2319 pid = fork();
2320 if (pid == 0)
2321   {
2322   (void)execv(argsvector[0], argsvector);
2323   /* Control gets here if there is an error, e.g. a non-existent program */
2324   exit(1);
2325   }
2326 else if (pid > 0)
2327   (void)waitpid(pid, &result, 0);
2328 #endif  /* End Windows/VMS/other handling */
2329 
2330 free(args);
2331 free(argsvector);
2332 
2333 /* Currently negative return values are not supported, only zero (match
2334 continues) or non-zero (match fails). */
2335 
2336 return result != 0;
2337 #endif  /* SUPPORT_PCRE2GREP_CALLOUT_FORK */
2338 }
2339 #endif  /* SUPPORT_PCRE2GREP_CALLOUT */
2340 
2341 
2342 
2343 /*************************************************
2344 *     Read a portion of the file into buffer     *
2345 *************************************************/
2346 
2347 static int
fill_buffer(void * handle,int frtype,char * buffer,int length,BOOL input_line_buffered)2348 fill_buffer(void *handle, int frtype, char *buffer, int length,
2349   BOOL input_line_buffered)
2350 {
2351 (void)frtype;  /* Avoid warning when not used */
2352 
2353 #ifdef SUPPORT_LIBZ
2354 if (frtype == FR_LIBZ)
2355   return gzread((gzFile)handle, buffer, length);
2356 else
2357 #endif
2358 
2359 #ifdef SUPPORT_LIBBZ2
2360 if (frtype == FR_LIBBZ2)
2361   return BZ2_bzread((BZFILE *)handle, buffer, length);
2362 else
2363 #endif
2364 
2365 return (input_line_buffered ?
2366   read_one_line(buffer, length, (FILE *)handle) :
2367   fread(buffer, 1, length, (FILE *)handle));
2368 }
2369 
2370 
2371 
2372 /*************************************************
2373 *            Grep an individual file             *
2374 *************************************************/
2375 
2376 /* This is called from grep_or_recurse() below. It uses a buffer that is three
2377 times the value of bufthird. The matching point is never allowed to stray into
2378 the top third of the buffer, thus keeping more of the file available for
2379 context printing or for multiline scanning. For large files, the pointer will
2380 be in the middle third most of the time, so the bottom third is available for
2381 "before" context printing.
2382 
2383 Arguments:
2384   handle       the fopened FILE stream for a normal file
2385                the gzFile pointer when reading is via libz
2386                the BZFILE pointer when reading is via libbz2
2387   frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
2388   filename     the file name or NULL (for errors)
2389   printname    the file name if it is to be printed for each match
2390                or NULL if the file name is not to be printed
2391                it cannot be NULL if filenames[_nomatch]_only is set
2392 
2393 Returns:       0 if there was at least one match
2394                1 otherwise (no matches)
2395                2 if an overlong line is encountered
2396                3 if there is a read error on a .bz2 file
2397 */
2398 
2399 static int
pcre2grep(void * handle,int frtype,const char * filename,const char * printname)2400 pcre2grep(void *handle, int frtype, const char *filename, const char *printname)
2401 {
2402 int rc = 1;
2403 int filepos = 0;
2404 unsigned long int linenumber = 1;
2405 unsigned long int lastmatchnumber = 0;
2406 unsigned long int count = 0;
2407 char *lastmatchrestart = main_buffer;
2408 char *ptr = main_buffer;
2409 char *endptr;
2410 PCRE2_SIZE bufflength;
2411 BOOL binary = FALSE;
2412 BOOL endhyphenpending = FALSE;
2413 BOOL input_line_buffered = line_buffered;
2414 FILE *in = NULL;                    /* Ensure initialized */
2415 
2416 /* Do the first read into the start of the buffer and set up the pointer to end
2417 of what we have. In the case of libz, a non-zipped .gz file will be read as a
2418 plain file. However, if a .bz2 file isn't actually bzipped, the first read will
2419 fail. */
2420 
2421 if (frtype != FR_LIBZ && frtype != FR_LIBBZ2)
2422   {
2423   in = (FILE *)handle;
2424   if (is_file_tty(in)) input_line_buffered = TRUE;
2425   }
2426 else input_line_buffered = FALSE;
2427 
2428 bufflength = fill_buffer(handle, frtype, main_buffer, bufsize,
2429   input_line_buffered);
2430 
2431 #ifdef SUPPORT_LIBBZ2
2432 if (frtype == FR_LIBBZ2 && (int)bufflength < 0) return 2;   /* Gotcha: bufflength is PCRE2_SIZE; */
2433 #endif
2434 
2435 endptr = main_buffer + bufflength;
2436 
2437 /* Unless binary-files=text, see if we have a binary file. This uses the same
2438 rule as GNU grep, namely, a search for a binary zero byte near the start of the
2439 file. However, when the newline convention is binary zero, we can't do this. */
2440 
2441 if (binary_files != BIN_TEXT)
2442   {
2443   if (endlinetype != PCRE2_NEWLINE_NUL)
2444     binary = memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength)
2445       != NULL;
2446   if (binary && binary_files == BIN_NOMATCH) return 1;
2447   }
2448 
2449 /* Loop while the current pointer is not at the end of the file. For large
2450 files, endptr will be at the end of the buffer when we are in the middle of the
2451 file, but ptr will never get there, because as soon as it gets over 2/3 of the
2452 way, the buffer is shifted left and re-filled. */
2453 
2454 while (ptr < endptr)
2455   {
2456   int endlinelength;
2457   int mrc = 0;
2458   unsigned int options = 0;
2459   BOOL match;
2460   char *t = ptr;
2461   PCRE2_SIZE length, linelength;
2462   PCRE2_SIZE startoffset = 0;
2463 
2464   /* At this point, ptr is at the start of a line. We need to find the length
2465   of the subject string to pass to pcre2_match(). In multiline mode, it is the
2466   length remainder of the data in the buffer. Otherwise, it is the length of
2467   the next line, excluding the terminating newline. After matching, we always
2468   advance by the length of the next line. In multiline mode the PCRE2_FIRSTLINE
2469   option is used for compiling, so that any match is constrained to be in the
2470   first line. */
2471 
2472   t = end_of_line(t, endptr, &endlinelength);
2473   linelength = t - ptr - endlinelength;
2474   length = multiline? (PCRE2_SIZE)(endptr - ptr) : linelength;
2475 
2476   /* Check to see if the line we are looking at extends right to the very end
2477   of the buffer without a line terminator. This means the line is too long to
2478   handle at the current buffer size. Until the buffer reaches its maximum size,
2479   try doubling it and reading more data. */
2480 
2481   if (endlinelength == 0 && t == main_buffer + bufsize)
2482     {
2483     if (bufthird < max_bufthird)
2484       {
2485       char *new_buffer;
2486       int new_bufthird = 2*bufthird;
2487 
2488       if (new_bufthird > max_bufthird) new_bufthird = max_bufthird;
2489       new_buffer = (char *)malloc(3*new_bufthird);
2490 
2491       if (new_buffer == NULL)
2492         {
2493         fprintf(stderr,
2494           "pcre2grep: line %lu%s%s is too long for the internal buffer\n"
2495           "pcre2grep: not enough memory to increase the buffer size to %d\n",
2496           linenumber,
2497           (filename == NULL)? "" : " of file ",
2498           (filename == NULL)? "" : filename,
2499           new_bufthird);
2500         return 2;
2501         }
2502 
2503       /* Copy the data and adjust pointers to the new buffer location. */
2504 
2505       memcpy(new_buffer, main_buffer, bufsize);
2506       bufthird = new_bufthird;
2507       bufsize = 3*bufthird;
2508       ptr = new_buffer + (ptr - main_buffer);
2509       lastmatchrestart = new_buffer + (lastmatchrestart - main_buffer);
2510       free(main_buffer);
2511       main_buffer = new_buffer;
2512 
2513       /* Read more data into the buffer and then try to find the line ending
2514       again. */
2515 
2516       bufflength += fill_buffer(handle, frtype, main_buffer + bufflength,
2517         bufsize - bufflength, input_line_buffered);
2518       endptr = main_buffer + bufflength;
2519       continue;
2520       }
2521     else
2522       {
2523       fprintf(stderr,
2524         "pcre2grep: line %lu%s%s is too long for the internal buffer\n"
2525         "pcre2grep: the maximum buffer size is %d\n"
2526         "pcre2grep: use the --max-buffer-size option to change it\n",
2527         linenumber,
2528         (filename == NULL)? "" : " of file ",
2529         (filename == NULL)? "" : filename,
2530         bufthird);
2531       return 2;
2532       }
2533     }
2534 
2535   /* Extra processing for Jeffrey Friedl's debugging. */
2536 
2537 #ifdef JFRIEDL_DEBUG
2538   if (jfriedl_XT || jfriedl_XR)
2539   {
2540 #     include <sys/time.h>
2541 #     include <time.h>
2542       struct timeval start_time, end_time;
2543       struct timezone dummy;
2544       int i;
2545 
2546       if (jfriedl_XT)
2547       {
2548           unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
2549           const char *orig = ptr;
2550           ptr = malloc(newlen + 1);
2551           if (!ptr) {
2552                   printf("out of memory");
2553                   pcre2grep_exit(2);
2554           }
2555           endptr = ptr;
2556           strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
2557           for (i = 0; i < jfriedl_XT; i++) {
2558                   strncpy(endptr, orig,  length);
2559                   endptr += length;
2560           }
2561           strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
2562           length = newlen;
2563       }
2564 
2565       if (gettimeofday(&start_time, &dummy) != 0)
2566               perror("bad gettimeofday");
2567 
2568 
2569       for (i = 0; i < jfriedl_XR; i++)
2570           match = (pcre_exec(patterns->compiled, patterns->hint, ptr, length, 0,
2571               PCRE2_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
2572 
2573       if (gettimeofday(&end_time, &dummy) != 0)
2574               perror("bad gettimeofday");
2575 
2576       double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
2577                       -
2578                       (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
2579 
2580       printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
2581       return 0;
2582   }
2583 #endif
2584 
2585   /* We come back here after a match when only_matching_count is non-zero, in
2586   order to find any further matches in the same line. This applies to
2587   --only-matching, --file-offsets, and --line-offsets. */
2588 
2589   ONLY_MATCHING_RESTART:
2590 
2591   /* Run through all the patterns until one matches or there is an error other
2592   than NOMATCH. This code is in a subroutine so that it can be re-used for
2593   finding subsequent matches when colouring matched lines. After finding one
2594   match, set PCRE2_NOTEMPTY to disable any further matches of null strings in
2595   this line. */
2596 
2597   match = match_patterns(ptr, length, options, startoffset, &mrc);
2598   options = PCRE2_NOTEMPTY;
2599 
2600   /* If it's a match or a not-match (as required), do what's wanted. NOTE: Use
2601   only FWRITE_IGNORE() - which is just a packaged fwrite() that ignores its
2602   return code - to output data lines, so that binary zeroes are treated as just
2603   another data character. */
2604 
2605   if (match != invert)
2606     {
2607     BOOL hyphenprinted = FALSE;
2608 
2609     /* We've failed if we want a file that doesn't have any matches. */
2610 
2611     if (filenames == FN_NOMATCH_ONLY) return 1;
2612 
2613     /* If all we want is a yes/no answer, we can return immediately. */
2614 
2615     if (quiet) return 0;
2616 
2617     /* Just count if just counting is wanted. */
2618 
2619     else if (count_only || show_total_count) count++;
2620 
2621     /* When handling a binary file and binary-files==binary, the "binary"
2622     variable will be set true (it's false in all other cases). In this
2623     situation we just want to output the file name. No need to scan further. */
2624 
2625     else if (binary)
2626       {
2627       fprintf(stdout, "Binary file %s matches" STDOUT_NL, filename);
2628       return 0;
2629       }
2630 
2631     /* Likewise, if all we want is a file name, there is no need to scan any
2632     more lines in the file. */
2633 
2634     else if (filenames == FN_MATCH_ONLY)
2635       {
2636       fprintf(stdout, "%s" STDOUT_NL, printname);
2637       return 0;
2638       }
2639 
2640     /* The --only-matching option prints just the substring that matched,
2641     and/or one or more captured portions of it, as long as these strings are
2642     not empty. The --file-offsets and --line-offsets options output offsets for
2643     the matching substring (all three set only_matching_count non-zero). None
2644     of these mutually exclusive options prints any context. Afterwards, adjust
2645     the start and then jump back to look for further matches in the same line.
2646     If we are in invert mode, however, nothing is printed and we do not restart
2647     - this could still be useful because the return code is set. */
2648 
2649     else if (only_matching_count != 0)
2650       {
2651       if (!invert)
2652         {
2653         PCRE2_SIZE oldstartoffset;
2654 
2655         if (printname != NULL) fprintf(stdout, "%s:", printname);
2656         if (number) fprintf(stdout, "%lu:", linenumber);
2657 
2658         /* Handle --line-offsets */
2659 
2660         if (line_offsets)
2661           fprintf(stdout, "%d,%d" STDOUT_NL, (int)(ptr + offsets[0] - ptr),
2662             (int)(offsets[1] - offsets[0]));
2663 
2664         /* Handle --file-offsets */
2665 
2666         else if (file_offsets)
2667           fprintf(stdout, "%d,%d" STDOUT_NL,
2668             (int)(filepos + ptr + offsets[0] - ptr),
2669             (int)(offsets[1] - offsets[0]));
2670 
2671         /* Handle --output (which has already been syntax checked) */
2672 
2673         else if (output_text != NULL)
2674           {
2675           if (display_output_text((PCRE2_SPTR)output_text, FALSE,
2676               (PCRE2_SPTR)ptr, offsets, mrc) || printname != NULL ||
2677               number)
2678             fprintf(stdout, STDOUT_NL);
2679           }
2680 
2681         /* Handle --only-matching, which may occur many times */
2682 
2683         else
2684           {
2685           BOOL printed = FALSE;
2686           omstr *om;
2687 
2688           for (om = only_matching; om != NULL; om = om->next)
2689             {
2690             int n = om->groupnum;
2691             if (n < mrc)
2692               {
2693               int plen = offsets[2*n + 1] - offsets[2*n];
2694               if (plen > 0)
2695                 {
2696                 if (printed && om_separator != NULL)
2697                   fprintf(stdout, "%s", om_separator);
2698                 print_match(ptr + offsets[n*2], plen);
2699                 printed = TRUE;
2700                 }
2701               }
2702             }
2703 
2704           if (printed || printname != NULL || number)
2705             fprintf(stdout, STDOUT_NL);
2706           }
2707 
2708         /* Prepare to repeat to find the next match in the line. */
2709 
2710         match = FALSE;
2711         if (line_buffered) fflush(stdout);
2712         rc = 0;                      /* Had some success */
2713 
2714         /* If the pattern contained a lookbehind that included \K, it is
2715         possible that the end of the match might be at or before the actual
2716         starting offset we have just used. In this case, start one character
2717         further on. */
2718 
2719         startoffset = offsets[1];    /* Restart after the match */
2720         oldstartoffset = pcre2_get_startchar(match_data);
2721         if (startoffset <= oldstartoffset)
2722           {
2723           if (startoffset >= length) goto END_ONE_MATCH;  /* Were at end */
2724           startoffset = oldstartoffset + 1;
2725           if (utf) while ((ptr[startoffset] & 0xc0) == 0x80) startoffset++;
2726           }
2727 
2728         /* If the current match ended past the end of the line (only possible
2729         in multiline mode), we must move on to the line in which it did end
2730         before searching for more matches. */
2731 
2732         while (startoffset > linelength)
2733           {
2734           ptr += linelength + endlinelength;
2735           filepos += (int)(linelength + endlinelength);
2736           linenumber++;
2737           startoffset -= (int)(linelength + endlinelength);
2738           t = end_of_line(ptr, endptr, &endlinelength);
2739           linelength = t - ptr - endlinelength;
2740           length = (PCRE2_SIZE)(endptr - ptr);
2741           }
2742 
2743         goto ONLY_MATCHING_RESTART;
2744         }
2745       }
2746 
2747     /* This is the default case when none of the above options is set. We print
2748     the matching lines(s), possibly preceded and/or followed by other lines of
2749     context. */
2750 
2751     else
2752       {
2753       /* See if there is a requirement to print some "after" lines from a
2754       previous match. We never print any overlaps. */
2755 
2756       if (after_context > 0 && lastmatchnumber > 0)
2757         {
2758         int ellength;
2759         int linecount = 0;
2760         char *p = lastmatchrestart;
2761 
2762         while (p < ptr && linecount < after_context)
2763           {
2764           p = end_of_line(p, ptr, &ellength);
2765           linecount++;
2766           }
2767 
2768         /* It is important to advance lastmatchrestart during this printing so
2769         that it interacts correctly with any "before" printing below. Print
2770         each line's data using fwrite() in case there are binary zeroes. */
2771 
2772         while (lastmatchrestart < p)
2773           {
2774           char *pp = lastmatchrestart;
2775           if (printname != NULL) fprintf(stdout, "%s-", printname);
2776           if (number) fprintf(stdout, "%lu-", lastmatchnumber++);
2777           pp = end_of_line(pp, endptr, &ellength);
2778           FWRITE_IGNORE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
2779           lastmatchrestart = pp;
2780           }
2781         if (lastmatchrestart != ptr) hyphenpending = TRUE;
2782         }
2783 
2784       /* If there were non-contiguous lines printed above, insert hyphens. */
2785 
2786       if (hyphenpending)
2787         {
2788         fprintf(stdout, "--" STDOUT_NL);
2789         hyphenpending = FALSE;
2790         hyphenprinted = TRUE;
2791         }
2792 
2793       /* See if there is a requirement to print some "before" lines for this
2794       match. Again, don't print overlaps. */
2795 
2796       if (before_context > 0)
2797         {
2798         int linecount = 0;
2799         char *p = ptr;
2800 
2801         while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
2802                linecount < before_context)
2803           {
2804           linecount++;
2805           p = previous_line(p, main_buffer);
2806           }
2807 
2808         if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
2809           fprintf(stdout, "--" STDOUT_NL);
2810 
2811         while (p < ptr)
2812           {
2813           int ellength;
2814           char *pp = p;
2815           if (printname != NULL) fprintf(stdout, "%s-", printname);
2816           if (number) fprintf(stdout, "%lu-", linenumber - linecount--);
2817           pp = end_of_line(pp, endptr, &ellength);
2818           FWRITE_IGNORE(p, 1, pp - p, stdout);
2819           p = pp;
2820           }
2821         }
2822 
2823       /* Now print the matching line(s); ensure we set hyphenpending at the end
2824       of the file if any context lines are being output. */
2825 
2826       if (after_context > 0 || before_context > 0)
2827         endhyphenpending = TRUE;
2828 
2829       if (printname != NULL) fprintf(stdout, "%s:", printname);
2830       if (number) fprintf(stdout, "%lu:", linenumber);
2831 
2832       /* This extra option, for Jeffrey Friedl's debugging requirements,
2833       replaces the matched string, or a specific captured string if it exists,
2834       with X. When this happens, colouring is ignored. */
2835 
2836 #ifdef JFRIEDL_DEBUG
2837       if (S_arg >= 0 && S_arg < mrc)
2838         {
2839         int first = S_arg * 2;
2840         int last  = first + 1;
2841         FWRITE_IGNORE(ptr, 1, offsets[first], stdout);
2842         fprintf(stdout, "X");
2843         FWRITE_IGNORE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
2844         }
2845       else
2846 #endif
2847 
2848       /* In multiline mode, or if colouring, we have to split the line(s) up
2849       and search for further matches, but not of course if the line is a
2850       non-match. In multiline mode this is necessary in case there is another
2851       match that spans the end of the current line. When colouring we want to
2852       colour all matches. */
2853 
2854       if ((multiline || do_colour) && !invert)
2855         {
2856         int plength;
2857         PCRE2_SIZE endprevious;
2858 
2859         /* The use of \K may make the end offset earlier than the start. In
2860         this situation, swap them round. */
2861 
2862         if (offsets[0] > offsets[1])
2863           {
2864           PCRE2_SIZE temp = offsets[0];
2865           offsets[0] = offsets[1];
2866           offsets[1] = temp;
2867           }
2868 
2869         FWRITE_IGNORE(ptr, 1, offsets[0], stdout);
2870         print_match(ptr + offsets[0], offsets[1] - offsets[0]);
2871 
2872         for (;;)
2873           {
2874           PCRE2_SIZE oldstartoffset = pcre2_get_startchar(match_data);
2875 
2876           endprevious = offsets[1];
2877           startoffset = endprevious;  /* Advance after previous match. */
2878 
2879           /* If the pattern contained a lookbehind that included \K, it is
2880           possible that the end of the match might be at or before the actual
2881           starting offset we have just used. In this case, start one character
2882           further on. */
2883 
2884           if (startoffset <= oldstartoffset)
2885             {
2886             startoffset = oldstartoffset + 1;
2887             if (utf) while ((ptr[startoffset] & 0xc0) == 0x80) startoffset++;
2888             }
2889 
2890           /* If the current match ended past the end of the line (only possible
2891           in multiline mode), we must move on to the line in which it did end
2892           before searching for more matches. Because the PCRE2_FIRSTLINE option
2893           is set, the start of the match will always be before the first
2894           newline sequence. */
2895 
2896           while (startoffset > linelength + endlinelength)
2897             {
2898             ptr += linelength + endlinelength;
2899             filepos += (int)(linelength + endlinelength);
2900             linenumber++;
2901             startoffset -= (int)(linelength + endlinelength);
2902             endprevious -= (int)(linelength + endlinelength);
2903             t = end_of_line(ptr, endptr, &endlinelength);
2904             linelength = t - ptr - endlinelength;
2905             length = (PCRE2_SIZE)(endptr - ptr);
2906             }
2907 
2908           /* If startoffset is at the exact end of the line it means this
2909           complete line was the final part of the match, so there is nothing
2910           more to do. */
2911 
2912           if (startoffset == linelength + endlinelength) break;
2913 
2914           /* Otherwise, run a match from within the final line, and if found,
2915           loop for any that may follow. */
2916 
2917           if (!match_patterns(ptr, length, options, startoffset, &mrc)) break;
2918 
2919           /* The use of \K may make the end offset earlier than the start. In
2920           this situation, swap them round. */
2921 
2922           if (offsets[0] > offsets[1])
2923             {
2924             PCRE2_SIZE temp = offsets[0];
2925             offsets[0] = offsets[1];
2926             offsets[1] = temp;
2927             }
2928 
2929           FWRITE_IGNORE(ptr + endprevious, 1, offsets[0] - endprevious, stdout);
2930           print_match(ptr + offsets[0], offsets[1] - offsets[0]);
2931           }
2932 
2933         /* In multiline mode, we may have already printed the complete line
2934         and its line-ending characters (if they matched the pattern), so there
2935         may be no more to print. */
2936 
2937         plength = (int)((linelength + endlinelength) - endprevious);
2938         if (plength > 0) FWRITE_IGNORE(ptr + endprevious, 1, plength, stdout);
2939         }
2940 
2941       /* Not colouring or multiline; no need to search for further matches. */
2942 
2943       else FWRITE_IGNORE(ptr, 1, linelength + endlinelength, stdout);
2944       }
2945 
2946     /* End of doing what has to be done for a match. If --line-buffered was
2947     given, flush the output. */
2948 
2949     if (line_buffered) fflush(stdout);
2950     rc = 0;    /* Had some success */
2951 
2952     /* Remember where the last match happened for after_context. We remember
2953     where we are about to restart, and that line's number. */
2954 
2955     lastmatchrestart = ptr + linelength + endlinelength;
2956     lastmatchnumber = linenumber + 1;
2957     }
2958 
2959   /* For a match in multiline inverted mode (which of course did not cause
2960   anything to be printed), we have to move on to the end of the match before
2961   proceeding. */
2962 
2963   if (multiline && invert && match)
2964     {
2965     int ellength;
2966     char *endmatch = ptr + offsets[1];
2967     t = ptr;
2968     while (t < endmatch)
2969       {
2970       t = end_of_line(t, endptr, &ellength);
2971       if (t <= endmatch) linenumber++; else break;
2972       }
2973     endmatch = end_of_line(endmatch, endptr, &ellength);
2974     linelength = endmatch - ptr - ellength;
2975     }
2976 
2977   /* Advance to after the newline and increment the line number. The file
2978   offset to the current line is maintained in filepos. */
2979 
2980   END_ONE_MATCH:
2981   ptr += linelength + endlinelength;
2982   filepos += (int)(linelength + endlinelength);
2983   linenumber++;
2984 
2985   /* If input is line buffered, and the buffer is not yet full, read another
2986   line and add it into the buffer. */
2987 
2988   if (input_line_buffered && bufflength < (PCRE2_SIZE)bufsize)
2989     {
2990     int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
2991     bufflength += add;
2992     endptr += add;
2993     }
2994 
2995   /* If we haven't yet reached the end of the file (the buffer is full), and
2996   the current point is in the top 1/3 of the buffer, slide the buffer down by
2997   1/3 and refill it. Before we do this, if some unprinted "after" lines are
2998   about to be lost, print them. */
2999 
3000   if (bufflength >= (PCRE2_SIZE)bufsize && ptr > main_buffer + 2*bufthird)
3001     {
3002     if (after_context > 0 &&
3003         lastmatchnumber > 0 &&
3004         lastmatchrestart < main_buffer + bufthird)
3005       {
3006       do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
3007       lastmatchnumber = 0;  /* Indicates no after lines pending */
3008       }
3009 
3010     /* Now do the shuffle */
3011 
3012     (void)memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
3013     ptr -= bufthird;
3014 
3015     bufflength = 2*bufthird + fill_buffer(handle, frtype,
3016       main_buffer + 2*bufthird, bufthird, input_line_buffered);
3017     endptr = main_buffer + bufflength;
3018 
3019     /* Adjust any last match point */
3020 
3021     if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
3022     }
3023   }     /* Loop through the whole file */
3024 
3025 /* End of file; print final "after" lines if wanted; do_after_lines sets
3026 hyphenpending if it prints something. */
3027 
3028 if (only_matching_count == 0 && !(count_only|show_total_count))
3029   {
3030   do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
3031   hyphenpending |= endhyphenpending;
3032   }
3033 
3034 /* Print the file name if we are looking for those without matches and there
3035 were none. If we found a match, we won't have got this far. */
3036 
3037 if (filenames == FN_NOMATCH_ONLY)
3038   {
3039   fprintf(stdout, "%s" STDOUT_NL, printname);
3040   return 0;
3041   }
3042 
3043 /* Print the match count if wanted */
3044 
3045 if (count_only && !quiet)
3046   {
3047   if (count > 0 || !omit_zero_count)
3048     {
3049     if (printname != NULL && filenames != FN_NONE)
3050       fprintf(stdout, "%s:", printname);
3051     fprintf(stdout, "%lu" STDOUT_NL, count);
3052     counts_printed++;
3053     }
3054   }
3055 
3056 total_count += count;   /* Can be set without count_only */
3057 return rc;
3058 }
3059 
3060 
3061 
3062 /*************************************************
3063 *     Grep a file or recurse into a directory    *
3064 *************************************************/
3065 
3066 /* Given a path name, if it's a directory, scan all the files if we are
3067 recursing; if it's a file, grep it.
3068 
3069 Arguments:
3070   pathname          the path to investigate
3071   dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
3072   only_one_at_top   TRUE if the path is the only one at toplevel
3073 
3074 Returns:  -1 the file/directory was skipped
3075            0 if there was at least one match
3076            1 if there were no matches
3077            2 there was some kind of error
3078 
3079 However, file opening failures are suppressed if "silent" is set.
3080 */
3081 
3082 static int
grep_or_recurse(char * pathname,BOOL dir_recurse,BOOL only_one_at_top)3083 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
3084 {
3085 int rc = 1;
3086 int frtype;
3087 void *handle;
3088 char *lastcomp;
3089 FILE *in = NULL;           /* Ensure initialized */
3090 
3091 #ifdef SUPPORT_LIBZ
3092 gzFile ingz = NULL;
3093 #endif
3094 
3095 #ifdef SUPPORT_LIBBZ2
3096 BZFILE *inbz2 = NULL;
3097 #endif
3098 
3099 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
3100 int pathlen;
3101 #endif
3102 
3103 #if defined NATIVE_ZOS
3104 int zos_type;
3105 FILE *zos_test_file;
3106 #endif
3107 
3108 /* If the file name is "-" we scan stdin */
3109 
3110 if (strcmp(pathname, "-") == 0)
3111   {
3112   return pcre2grep(stdin, FR_PLAIN, stdin_name,
3113     (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
3114       stdin_name : NULL);
3115   }
3116 
3117 /* Inclusion and exclusion: --include-dir and --exclude-dir apply only to
3118 directories, whereas --include and --exclude apply to everything else. The test
3119 is against the final component of the path. */
3120 
3121 lastcomp = strrchr(pathname, FILESEP);
3122 lastcomp = (lastcomp == NULL)? pathname : lastcomp + 1;
3123 
3124 /* If the file is a directory, skip if not recursing or if explicitly excluded.
3125 Otherwise, scan the directory and recurse for each path within it. The scanning
3126 code is localized so it can be made system-specific. */
3127 
3128 
3129 /* For z/OS, determine the file type. */
3130 
3131 #if defined NATIVE_ZOS
3132 zos_test_file =  fopen(pathname,"rb");
3133 
3134 if (zos_test_file == NULL)
3135    {
3136    if (!silent) fprintf(stderr, "pcre2grep: failed to test next file %s\n",
3137      pathname, strerror(errno));
3138    return -1;
3139    }
3140 zos_type = identifyzosfiletype (zos_test_file);
3141 fclose (zos_test_file);
3142 
3143 /* Handle a PDS in separate code */
3144 
3145 if (zos_type == __ZOS_PDS || zos_type == __ZOS_PDSE)
3146    {
3147    return travelonpdsdir (pathname, only_one_at_top);
3148    }
3149 
3150 /* Deal with regular files in the normal way below. These types are:
3151    zos_type == __ZOS_PDS_MEMBER
3152    zos_type == __ZOS_PS
3153    zos_type == __ZOS_VSAM_KSDS
3154    zos_type == __ZOS_VSAM_ESDS
3155    zos_type == __ZOS_VSAM_RRDS
3156 */
3157 
3158 /* Handle a z/OS directory using common code. */
3159 
3160 else if (zos_type == __ZOS_HFS)
3161  {
3162 #endif  /* NATIVE_ZOS */
3163 
3164 
3165 /* Handle directories: common code for all OS */
3166 
3167 if (isdirectory(pathname))
3168   {
3169   if (dee_action == dee_SKIP ||
3170       !test_incexc(lastcomp, include_dir_patterns, exclude_dir_patterns))
3171     return -1;
3172 
3173   if (dee_action == dee_RECURSE)
3174     {
3175     char buffer[FNBUFSIZ];
3176     char *nextfile;
3177     directory_type *dir = opendirectory(pathname);
3178 
3179     if (dir == NULL)
3180       {
3181       if (!silent)
3182         fprintf(stderr, "pcre2grep: Failed to open directory %s: %s\n", pathname,
3183           strerror(errno));
3184       return 2;
3185       }
3186 
3187     while ((nextfile = readdirectory(dir)) != NULL)
3188       {
3189       int frc;
3190       int fnlength = strlen(pathname) + strlen(nextfile) + 2;
3191       if (fnlength > FNBUFSIZ)
3192         {
3193         fprintf(stderr, "pcre2grep: recursive filename is too long\n");
3194         rc = 2;
3195         break;
3196         }
3197       sprintf(buffer, "%s%c%s", pathname, FILESEP, nextfile);
3198       frc = grep_or_recurse(buffer, dir_recurse, FALSE);
3199       if (frc > 1) rc = frc;
3200        else if (frc == 0 && rc == 1) rc = 0;
3201       }
3202 
3203     closedirectory(dir);
3204     return rc;
3205     }
3206   }
3207 
3208 #ifdef WIN32
3209 if (iswild(pathname))
3210   {
3211   char buffer[1024];
3212   char *nextfile;
3213   char *name;
3214   directory_type *dir = opendirectory(pathname);
3215 
3216   if (dir == NULL)
3217     return 0;
3218 
3219   for (nextfile = name = pathname; *nextfile != 0; nextfile++)
3220     if (*nextfile == '/' || *nextfile == '\\')
3221       name = nextfile + 1;
3222   *name = 0;
3223 
3224   while ((nextfile = readdirectory(dir)) != NULL)
3225     {
3226     int frc;
3227     sprintf(buffer, "%.512s%.128s", pathname, nextfile);
3228     frc = grep_or_recurse(buffer, dir_recurse, FALSE);
3229     if (frc > 1) rc = frc;
3230      else if (frc == 0 && rc == 1) rc = 0;
3231     }
3232 
3233   closedirectory(dir);
3234   return rc;
3235   }
3236 #endif
3237 
3238 #if defined NATIVE_ZOS
3239  }
3240 #endif
3241 
3242 /* If the file is not a directory, check for a regular file, and if it is not,
3243 skip it if that's been requested. Otherwise, check for an explicit inclusion or
3244 exclusion. */
3245 
3246 else if (
3247 #if defined NATIVE_ZOS
3248         (zos_type == __ZOS_NOFILE && DEE_action == DEE_SKIP) ||
3249 #else  /* all other OS */
3250         (!isregfile(pathname) && DEE_action == DEE_SKIP) ||
3251 #endif
3252         !test_incexc(lastcomp, include_patterns, exclude_patterns))
3253   return -1;  /* File skipped */
3254 
3255 /* Control reaches here if we have a regular file, or if we have a directory
3256 and recursion or skipping was not requested, or if we have anything else and
3257 skipping was not requested. The scan proceeds. If this is the first and only
3258 argument at top level, we don't show the file name, unless we are only showing
3259 the file name, or the filename was forced (-H). */
3260 
3261 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
3262 pathlen = (int)(strlen(pathname));
3263 #endif
3264 
3265 /* Open using zlib if it is supported and the file name ends with .gz. */
3266 
3267 #ifdef SUPPORT_LIBZ
3268 if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
3269   {
3270   ingz = gzopen(pathname, "rb");
3271   if (ingz == NULL)
3272     {
3273     if (!silent)
3274       fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", pathname,
3275         strerror(errno));
3276     return 2;
3277     }
3278   handle = (void *)ingz;
3279   frtype = FR_LIBZ;
3280   }
3281 else
3282 #endif
3283 
3284 /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
3285 
3286 #ifdef SUPPORT_LIBBZ2
3287 if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
3288   {
3289   inbz2 = BZ2_bzopen(pathname, "rb");
3290   handle = (void *)inbz2;
3291   frtype = FR_LIBBZ2;
3292   }
3293 else
3294 #endif
3295 
3296 /* Otherwise use plain fopen(). The label is so that we can come back here if
3297 an attempt to read a .bz2 file indicates that it really is a plain file. */
3298 
3299 #ifdef SUPPORT_LIBBZ2
3300 PLAIN_FILE:
3301 #endif
3302   {
3303   in = fopen(pathname, "rb");
3304   handle = (void *)in;
3305   frtype = FR_PLAIN;
3306   }
3307 
3308 /* All the opening methods return errno when they fail. */
3309 
3310 if (handle == NULL)
3311   {
3312   if (!silent)
3313     fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", pathname,
3314       strerror(errno));
3315   return 2;
3316   }
3317 
3318 /* Now grep the file */
3319 
3320 rc = pcre2grep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
3321   (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
3322 
3323 /* Close in an appropriate manner. */
3324 
3325 #ifdef SUPPORT_LIBZ
3326 if (frtype == FR_LIBZ)
3327   gzclose(ingz);
3328 else
3329 #endif
3330 
3331 /* If it is a .bz2 file and the result is 3, it means that the first attempt to
3332 read failed. If the error indicates that the file isn't in fact bzipped, try
3333 again as a normal file. */
3334 
3335 #ifdef SUPPORT_LIBBZ2
3336 if (frtype == FR_LIBBZ2)
3337   {
3338   if (rc == 3)
3339     {
3340     int errnum;
3341     const char *err = BZ2_bzerror(inbz2, &errnum);
3342     if (errnum == BZ_DATA_ERROR_MAGIC)
3343       {
3344       BZ2_bzclose(inbz2);
3345       goto PLAIN_FILE;
3346       }
3347     else if (!silent)
3348       fprintf(stderr, "pcre2grep: Failed to read %s using bzlib: %s\n",
3349         pathname, err);
3350     rc = 2;    /* The normal "something went wrong" code */
3351     }
3352   BZ2_bzclose(inbz2);
3353   }
3354 else
3355 #endif
3356 
3357 /* Normal file close */
3358 
3359 fclose(in);
3360 
3361 /* Pass back the yield from pcre2grep(). */
3362 
3363 return rc;
3364 }
3365 
3366 
3367 
3368 /*************************************************
3369 *    Handle a single-letter, no data option      *
3370 *************************************************/
3371 
3372 static int
handle_option(int letter,int options)3373 handle_option(int letter, int options)
3374 {
3375 switch(letter)
3376   {
3377   case N_FOFFSETS: file_offsets = TRUE; break;
3378   case N_HELP: help(); pcre2grep_exit(0); break; /* Stops compiler warning */
3379   case N_LBUFFER: line_buffered = TRUE; break;
3380   case N_LOFFSETS: line_offsets = number = TRUE; break;
3381   case N_NOJIT: use_jit = FALSE; break;
3382   case 'a': binary_files = BIN_TEXT; break;
3383   case 'c': count_only = TRUE; break;
3384   case 'F': options |= PCRE2_LITERAL; break;
3385   case 'H': filenames = FN_FORCE; break;
3386   case 'I': binary_files = BIN_NOMATCH; break;
3387   case 'h': filenames = FN_NONE; break;
3388   case 'i': options |= PCRE2_CASELESS; break;
3389   case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
3390   case 'L': filenames = FN_NOMATCH_ONLY; break;
3391   case 'M': multiline = TRUE; options |= PCRE2_MULTILINE|PCRE2_FIRSTLINE; break;
3392   case 'n': number = TRUE; break;
3393 
3394   case 'o':
3395   only_matching_last = add_number(0, only_matching_last);
3396   if (only_matching == NULL) only_matching = only_matching_last;
3397   break;
3398 
3399   case 'q': quiet = TRUE; break;
3400   case 'r': dee_action = dee_RECURSE; break;
3401   case 's': silent = TRUE; break;
3402   case 't': show_total_count = TRUE; break;
3403   case 'u': options |= PCRE2_UTF; utf = TRUE; break;
3404   case 'v': invert = TRUE; break;
3405   case 'w': extra_options |= PCRE2_EXTRA_MATCH_WORD; break;
3406   case 'x': extra_options |= PCRE2_EXTRA_MATCH_LINE; break;
3407 
3408   case 'V':
3409     {
3410     unsigned char buffer[128];
3411     (void)pcre2_config(PCRE2_CONFIG_VERSION, buffer);
3412     fprintf(stdout, "pcre2grep version %s" STDOUT_NL, buffer);
3413     }
3414   pcre2grep_exit(0);
3415   break;
3416 
3417   default:
3418   fprintf(stderr, "pcre2grep: Unknown option -%c\n", letter);
3419   pcre2grep_exit(usage(2));
3420   }
3421 
3422 return options;
3423 }
3424 
3425 
3426 
3427 /*************************************************
3428 *          Construct printed ordinal             *
3429 *************************************************/
3430 
3431 /* This turns a number into "1st", "3rd", etc. */
3432 
3433 static char *
ordin(int n)3434 ordin(int n)
3435 {
3436 static char buffer[14];
3437 char *p = buffer;
3438 sprintf(p, "%d", n);
3439 while (*p != 0) p++;
3440 n %= 100;
3441 if (n >= 11 && n <= 13) n = 0;
3442 switch (n%10)
3443   {
3444   case 1: strcpy(p, "st"); break;
3445   case 2: strcpy(p, "nd"); break;
3446   case 3: strcpy(p, "rd"); break;
3447   default: strcpy(p, "th"); break;
3448   }
3449 return buffer;
3450 }
3451 
3452 
3453 
3454 /*************************************************
3455 *          Compile a single pattern              *
3456 *************************************************/
3457 
3458 /* Do nothing if the pattern has already been compiled. This is the case for
3459 include/exclude patterns read from a file.
3460 
3461 When the -F option has been used, each "pattern" may be a list of strings,
3462 separated by line breaks. They will be matched literally. We split such a
3463 string and compile the first substring, inserting an additional block into the
3464 pattern chain.
3465 
3466 Arguments:
3467   p              points to the pattern block
3468   options        the PCRE options
3469   fromfile       TRUE if the pattern was read from a file
3470   fromtext       file name or identifying text (e.g. "include")
3471   count          0 if this is the only command line pattern, or
3472                  number of the command line pattern, or
3473                  linenumber for a pattern from a file
3474 
3475 Returns:         TRUE on success, FALSE after an error
3476 */
3477 
3478 static BOOL
compile_pattern(patstr * p,int options,int fromfile,const char * fromtext,int count)3479 compile_pattern(patstr *p, int options, int fromfile, const char *fromtext,
3480   int count)
3481 {
3482 char *ps;
3483 int errcode;
3484 PCRE2_SIZE patlen, erroffset;
3485 PCRE2_UCHAR errmessbuffer[ERRBUFSIZ];
3486 
3487 if (p->compiled != NULL) return TRUE;
3488 ps = p->string;
3489 patlen = p->length;
3490 
3491 if ((options & PCRE2_LITERAL) != 0)
3492   {
3493   int ellength;
3494   char *eop = ps + patlen;
3495   char *pe = end_of_line(ps, eop, &ellength);
3496 
3497   if (ellength != 0)
3498     {
3499     patlen = pe - ps - ellength;
3500     if (add_pattern(pe, p->length-patlen-ellength, p) == NULL) return FALSE;
3501     }
3502   }
3503 
3504 p->compiled = pcre2_compile((PCRE2_SPTR)ps, patlen, options, &errcode,
3505   &erroffset, compile_context);
3506 
3507 /* Handle successful compile. Try JIT-compiling if supported and enabled. We
3508 ignore any JIT compiler errors, relying falling back to interpreting if
3509 anything goes wrong with JIT. */
3510 
3511 if (p->compiled != NULL)
3512   {
3513 #ifdef SUPPORT_PCRE2GREP_JIT
3514   if (use_jit) (void)pcre2_jit_compile(p->compiled, PCRE2_JIT_COMPLETE);
3515 #endif
3516   return TRUE;
3517   }
3518 
3519 /* Handle compile errors */
3520 
3521 if (erroffset > patlen) erroffset = patlen;
3522 pcre2_get_error_message(errcode, errmessbuffer, sizeof(errmessbuffer));
3523 
3524 if (fromfile)
3525   {
3526   fprintf(stderr, "pcre2grep: Error in regex in line %d of %s "
3527     "at offset %d: %s\n", count, fromtext, (int)erroffset, errmessbuffer);
3528   }
3529 else
3530   {
3531   if (count == 0)
3532     fprintf(stderr, "pcre2grep: Error in %s regex at offset %d: %s\n",
3533       fromtext, (int)erroffset, errmessbuffer);
3534   else
3535     fprintf(stderr, "pcre2grep: Error in %s %s regex at offset %d: %s\n",
3536       ordin(count), fromtext, (int)erroffset, errmessbuffer);
3537   }
3538 
3539 return FALSE;
3540 }
3541 
3542 
3543 
3544 /*************************************************
3545 *     Read and compile a file of patterns        *
3546 *************************************************/
3547 
3548 /* This is used for --filelist, --include-from, and --exclude-from.
3549 
3550 Arguments:
3551   name         the name of the file; "-" is stdin
3552   patptr       pointer to the pattern chain anchor
3553   patlastptr   pointer to the last pattern pointer
3554 
3555 Returns:       TRUE if all went well
3556 */
3557 
3558 static BOOL
read_pattern_file(char * name,patstr ** patptr,patstr ** patlastptr)3559 read_pattern_file(char *name, patstr **patptr, patstr **patlastptr)
3560 {
3561 int linenumber = 0;
3562 PCRE2_SIZE patlen;
3563 FILE *f;
3564 const char *filename;
3565 char buffer[MAXPATLEN+20];
3566 
3567 if (strcmp(name, "-") == 0)
3568   {
3569   f = stdin;
3570   filename = stdin_name;
3571   }
3572 else
3573   {
3574   f = fopen(name, "r");
3575   if (f == NULL)
3576     {
3577     fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", name, strerror(errno));
3578     return FALSE;
3579     }
3580   filename = name;
3581   }
3582 
3583 while ((patlen = read_one_line(buffer, sizeof(buffer), f)) > 0)
3584   {
3585   while (patlen > 0 && isspace((unsigned char)(buffer[patlen-1]))) patlen--;
3586   linenumber++;
3587   if (patlen == 0) continue;   /* Skip blank lines */
3588 
3589   /* Note: this call to add_pattern() puts a pointer to the local variable
3590   "buffer" into the pattern chain. However, that pointer is used only when
3591   compiling the pattern, which happens immediately below, so we flatten it
3592   afterwards, as a precaution against any later code trying to use it. */
3593 
3594   *patlastptr = add_pattern(buffer, patlen, *patlastptr);
3595   if (*patlastptr == NULL)
3596     {
3597     if (f != stdin) fclose(f);
3598     return FALSE;
3599     }
3600   if (*patptr == NULL) *patptr = *patlastptr;
3601 
3602   /* This loop is needed because compiling a "pattern" when -F is set may add
3603   on additional literal patterns if the original contains a newline. In the
3604   common case, it never will, because read_one_line() stops at a newline.
3605   However, the -N option can be used to give pcre2grep a different newline
3606   setting. */
3607 
3608   for(;;)
3609     {
3610     if (!compile_pattern(*patlastptr, pcre2_options, TRUE, filename,
3611         linenumber))
3612       {
3613       if (f != stdin) fclose(f);
3614       return FALSE;
3615       }
3616     (*patlastptr)->string = NULL;            /* Insurance */
3617     if ((*patlastptr)->next == NULL) break;
3618     *patlastptr = (*patlastptr)->next;
3619     }
3620   }
3621 
3622 if (f != stdin) fclose(f);
3623 return TRUE;
3624 }
3625 
3626 
3627 
3628 /*************************************************
3629 *                Main program                    *
3630 *************************************************/
3631 
3632 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
3633 
3634 int
main(int argc,char ** argv)3635 main(int argc, char **argv)
3636 {
3637 int i, j;
3638 int rc = 1;
3639 BOOL only_one_at_top;
3640 patstr *cp;
3641 fnstr *fn;
3642 const char *locale_from = "--locale";
3643 
3644 #ifdef SUPPORT_PCRE2GREP_JIT
3645 pcre2_jit_stack *jit_stack = NULL;
3646 #endif
3647 
3648 /* In Windows, stdout is set up as a text stream, which means that \n is
3649 converted to \r\n. This causes output lines that are copied from the input to
3650 change from ....\r\n to ....\r\r\n, which is not right. We therefore ensure
3651 that stdout is a binary stream. Note that this means all other output to stdout
3652 must use STDOUT_NL to terminate lines. */
3653 
3654 #ifdef WIN32
3655 _setmode(_fileno(stdout), _O_BINARY);
3656 #endif
3657 
3658 /* Set up a default compile and match contexts and a match data block. */
3659 
3660 compile_context = pcre2_compile_context_create(NULL);
3661 match_context = pcre2_match_context_create(NULL);
3662 match_data = pcre2_match_data_create(OFFSET_SIZE, NULL);
3663 offsets = pcre2_get_ovector_pointer(match_data);
3664 
3665 /* If string (script) callouts are supported, set up the callout processing
3666 function. */
3667 
3668 #ifdef SUPPORT_PCRE2GREP_CALLOUT
3669 pcre2_set_callout(match_context, pcre2grep_callout, NULL);
3670 #endif
3671 
3672 /* Process the options */
3673 
3674 for (i = 1; i < argc; i++)
3675   {
3676   option_item *op = NULL;
3677   char *option_data = (char *)"";    /* default to keep compiler happy */
3678   BOOL longop;
3679   BOOL longopwasequals = FALSE;
3680 
3681   if (argv[i][0] != '-') break;
3682 
3683   /* If we hit an argument that is just "-", it may be a reference to STDIN,
3684   but only if we have previously had -e or -f to define the patterns. */
3685 
3686   if (argv[i][1] == 0)
3687     {
3688     if (pattern_files != NULL || patterns != NULL) break;
3689       else pcre2grep_exit(usage(2));
3690     }
3691 
3692   /* Handle a long name option, or -- to terminate the options */
3693 
3694   if (argv[i][1] == '-')
3695     {
3696     char *arg = argv[i] + 2;
3697     char *argequals = strchr(arg, '=');
3698 
3699     if (*arg == 0)    /* -- terminates options */
3700       {
3701       i++;
3702       break;                /* out of the options-handling loop */
3703       }
3704 
3705     longop = TRUE;
3706 
3707     /* Some long options have data that follows after =, for example file=name.
3708     Some options have variations in the long name spelling: specifically, we
3709     allow "regexp" because GNU grep allows it, though I personally go along
3710     with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
3711     These options are entered in the table as "regex(p)". Options can be in
3712     both these categories. */
3713 
3714     for (op = optionlist; op->one_char != 0; op++)
3715       {
3716       char *opbra = strchr(op->long_name, '(');
3717       char *equals = strchr(op->long_name, '=');
3718 
3719       /* Handle options with only one spelling of the name */
3720 
3721       if (opbra == NULL)     /* Does not contain '(' */
3722         {
3723         if (equals == NULL)  /* Not thing=data case */
3724           {
3725           if (strcmp(arg, op->long_name) == 0) break;
3726           }
3727         else                 /* Special case xxx=data */
3728           {
3729           int oplen = (int)(equals - op->long_name);
3730           int arglen = (argequals == NULL)?
3731             (int)strlen(arg) : (int)(argequals - arg);
3732           if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
3733             {
3734             option_data = arg + arglen;
3735             if (*option_data == '=')
3736               {
3737               option_data++;
3738               longopwasequals = TRUE;
3739               }
3740             break;
3741             }
3742           }
3743         }
3744 
3745       /* Handle options with an alternate spelling of the name */
3746 
3747       else
3748         {
3749         char buff1[24];
3750         char buff2[24];
3751         int ret;
3752 
3753         int baselen = (int)(opbra - op->long_name);
3754         int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
3755         int arglen = (argequals == NULL || equals == NULL)?
3756           (int)strlen(arg) : (int)(argequals - arg);
3757 
3758         if ((ret = snprintf(buff1, sizeof(buff1), "%.*s", baselen, op->long_name),
3759              ret < 0 || ret > (int)sizeof(buff1)) ||
3760             (ret = snprintf(buff2, sizeof(buff2), "%s%.*s", buff1,
3761                      fulllen - baselen - 2, opbra + 1),
3762              ret < 0 || ret > (int)sizeof(buff2)))
3763           {
3764           fprintf(stderr, "pcre2grep: Buffer overflow when parsing %s option\n",
3765             op->long_name);
3766           pcre2grep_exit(2);
3767           }
3768 
3769         if (strncmp(arg, buff1, arglen) == 0 ||
3770            strncmp(arg, buff2, arglen) == 0)
3771           {
3772           if (equals != NULL && argequals != NULL)
3773             {
3774             option_data = argequals;
3775             if (*option_data == '=')
3776               {
3777               option_data++;
3778               longopwasequals = TRUE;
3779               }
3780             }
3781           break;
3782           }
3783         }
3784       }
3785 
3786     if (op->one_char == 0)
3787       {
3788       fprintf(stderr, "pcre2grep: Unknown option %s\n", argv[i]);
3789       pcre2grep_exit(usage(2));
3790       }
3791     }
3792 
3793   /* Jeffrey Friedl's debugging harness uses these additional options which
3794   are not in the right form for putting in the option table because they use
3795   only one hyphen, yet are more than one character long. By putting them
3796   separately here, they will not get displayed as part of the help() output,
3797   but I don't think Jeffrey will care about that. */
3798 
3799 #ifdef JFRIEDL_DEBUG
3800   else if (strcmp(argv[i], "-pre") == 0) {
3801           jfriedl_prefix = argv[++i];
3802           continue;
3803   } else if (strcmp(argv[i], "-post") == 0) {
3804           jfriedl_postfix = argv[++i];
3805           continue;
3806   } else if (strcmp(argv[i], "-XT") == 0) {
3807           sscanf(argv[++i], "%d", &jfriedl_XT);
3808           continue;
3809   } else if (strcmp(argv[i], "-XR") == 0) {
3810           sscanf(argv[++i], "%d", &jfriedl_XR);
3811           continue;
3812   }
3813 #endif
3814 
3815 
3816   /* One-char options; many that have no data may be in a single argument; we
3817   continue till we hit the last one or one that needs data. */
3818 
3819   else
3820     {
3821     char *s = argv[i] + 1;
3822     longop = FALSE;
3823 
3824     while (*s != 0)
3825       {
3826       for (op = optionlist; op->one_char != 0; op++)
3827         {
3828         if (*s == op->one_char) break;
3829         }
3830       if (op->one_char == 0)
3831         {
3832         fprintf(stderr, "pcre2grep: Unknown option letter '%c' in \"%s\"\n",
3833           *s, argv[i]);
3834         pcre2grep_exit(usage(2));
3835         }
3836 
3837       option_data = s+1;
3838 
3839       /* Break out if this is the last character in the string; it's handled
3840       below like a single multi-char option. */
3841 
3842       if (*option_data == 0) break;
3843 
3844       /* Check for a single-character option that has data: OP_OP_NUMBER(S)
3845       are used for ones that either have a numerical number or defaults, i.e.
3846       the data is optional. If a digit follows, there is data; if not, carry on
3847       with other single-character options in the same string. */
3848 
3849       if (op->type == OP_OP_NUMBER || op->type == OP_OP_NUMBERS)
3850         {
3851         if (isdigit((unsigned char)s[1])) break;
3852         }
3853       else   /* Check for an option with data */
3854         {
3855         if (op->type != OP_NODATA) break;
3856         }
3857 
3858       /* Handle a single-character option with no data, then loop for the
3859       next character in the string. */
3860 
3861       pcre2_options = handle_option(*s++, pcre2_options);
3862       }
3863     }
3864 
3865   /* At this point we should have op pointing to a matched option. If the type
3866   is NO_DATA, it means that there is no data, and the option might set
3867   something in the PCRE options. */
3868 
3869   if (op->type == OP_NODATA)
3870     {
3871     pcre2_options = handle_option(op->one_char, pcre2_options);
3872     continue;
3873     }
3874 
3875   /* If the option type is OP_OP_STRING or OP_OP_NUMBER(S), it's an option that
3876   either has a value or defaults to something. It cannot have data in a
3877   separate item. At the moment, the only such options are "colo(u)r",
3878   "only-matching", and Jeffrey Friedl's special -S debugging option. */
3879 
3880   if (*option_data == 0 &&
3881       (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER ||
3882        op->type == OP_OP_NUMBERS))
3883     {
3884     switch (op->one_char)
3885       {
3886       case N_COLOUR:
3887       colour_option = "auto";
3888       break;
3889 
3890       case 'o':
3891       only_matching_last = add_number(0, only_matching_last);
3892       if (only_matching == NULL) only_matching = only_matching_last;
3893       break;
3894 
3895 #ifdef JFRIEDL_DEBUG
3896       case 'S':
3897       S_arg = 0;
3898       break;
3899 #endif
3900       }
3901     continue;
3902     }
3903 
3904   /* Otherwise, find the data string for the option. */
3905 
3906   if (*option_data == 0)
3907     {
3908     if (i >= argc - 1 || longopwasequals)
3909       {
3910       fprintf(stderr, "pcre2grep: Data missing after %s\n", argv[i]);
3911       pcre2grep_exit(usage(2));
3912       }
3913     option_data = argv[++i];
3914     }
3915 
3916   /* If the option type is OP_OP_NUMBERS, the value is a number that is to be
3917   added to a chain of numbers. */
3918 
3919   if (op->type == OP_OP_NUMBERS)
3920     {
3921     unsigned long int n = decode_number(option_data, op, longop);
3922     omdatastr *omd = (omdatastr *)op->dataptr;
3923     *(omd->lastptr) = add_number((int)n, *(omd->lastptr));
3924     if (*(omd->anchor) == NULL) *(omd->anchor) = *(omd->lastptr);
3925     }
3926 
3927   /* If the option type is OP_PATLIST, it's the -e option, or one of the
3928   include/exclude options, which can be called multiple times to create lists
3929   of patterns. */
3930 
3931   else if (op->type == OP_PATLIST)
3932     {
3933     patdatastr *pd = (patdatastr *)op->dataptr;
3934     *(pd->lastptr) = add_pattern(option_data, (PCRE2_SIZE)strlen(option_data),
3935       *(pd->lastptr));
3936     if (*(pd->lastptr) == NULL) goto EXIT2;
3937     if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr);
3938     }
3939 
3940   /* If the option type is OP_FILELIST, it's one of the options that names a
3941   file. */
3942 
3943   else if (op->type == OP_FILELIST)
3944     {
3945     fndatastr *fd = (fndatastr *)op->dataptr;
3946     fn = (fnstr *)malloc(sizeof(fnstr));
3947     if (fn == NULL)
3948       {
3949       fprintf(stderr, "pcre2grep: malloc failed\n");
3950       goto EXIT2;
3951       }
3952     fn->next = NULL;
3953     fn->name = option_data;
3954     if (*(fd->anchor) == NULL)
3955       *(fd->anchor) = fn;
3956     else
3957       (*(fd->lastptr))->next = fn;
3958     *(fd->lastptr) = fn;
3959     }
3960 
3961   /* Handle OP_BINARY_FILES */
3962 
3963   else if (op->type == OP_BINFILES)
3964     {
3965     if (strcmp(option_data, "binary") == 0)
3966       binary_files = BIN_BINARY;
3967     else if (strcmp(option_data, "without-match") == 0)
3968       binary_files = BIN_NOMATCH;
3969     else if (strcmp(option_data, "text") == 0)
3970       binary_files = BIN_TEXT;
3971     else
3972       {
3973       fprintf(stderr, "pcre2grep: unknown value \"%s\" for binary-files\n",
3974         option_data);
3975       pcre2grep_exit(usage(2));
3976       }
3977     }
3978 
3979   /* Otherwise, deal with a single string or numeric data value. */
3980 
3981   else if (op->type != OP_NUMBER && op->type != OP_U32NUMBER &&
3982            op->type != OP_OP_NUMBER && op->type != OP_SIZE)
3983     {
3984     *((char **)op->dataptr) = option_data;
3985     }
3986   else
3987     {
3988     unsigned long int n = decode_number(option_data, op, longop);
3989     if (op->type == OP_U32NUMBER) *((uint32_t *)op->dataptr) = n;
3990       else if (op->type == OP_SIZE) *((PCRE2_SIZE *)op->dataptr) = n;
3991       else *((int *)op->dataptr) = n;
3992     }
3993   }
3994 
3995 /* Options have been decoded. If -C was used, its value is used as a default
3996 for -A and -B. */
3997 
3998 if (both_context > 0)
3999   {
4000   if (after_context == 0) after_context = both_context;
4001   if (before_context == 0) before_context = both_context;
4002   }
4003 
4004 /* Only one of --only-matching, --output, --file-offsets, or --line-offsets is
4005 permitted. They display, each in their own way, only the data that has matched.
4006 */
4007 
4008 only_matching_count = (only_matching != NULL) + (output_text != NULL) +
4009   file_offsets + line_offsets;
4010 
4011 if (only_matching_count > 1)
4012   {
4013   fprintf(stderr, "pcre2grep: Cannot mix --only-matching, --output, "
4014     "--file-offsets and/or --line-offsets\n");
4015   pcre2grep_exit(usage(2));
4016   }
4017 
4018 /* Check the text supplied to --output for errors. */
4019 
4020 if (output_text != NULL &&
4021     !syntax_check_output_text((PCRE2_SPTR)output_text, FALSE))
4022   goto EXIT2;
4023 
4024 /* Put limits into the match data block. */
4025 
4026 if (heap_limit != PCRE2_UNSET) pcre2_set_heap_limit(match_context, heap_limit);
4027 if (match_limit > 0) pcre2_set_match_limit(match_context, match_limit);
4028 if (depth_limit > 0) pcre2_set_depth_limit(match_context, depth_limit);
4029 
4030 /* If a locale has not been provided as an option, see if the LC_CTYPE or
4031 LC_ALL environment variable is set, and if so, use it. */
4032 
4033 if (locale == NULL)
4034   {
4035   locale = getenv("LC_ALL");
4036   locale_from = "LC_ALL";
4037   }
4038 
4039 if (locale == NULL)
4040   {
4041   locale = getenv("LC_CTYPE");
4042   locale_from = "LC_CTYPE";
4043   }
4044 
4045 /* If a locale is set, use it to generate the tables the PCRE needs. Passing
4046 NULL to pcre2_maketables() means that malloc() is used to get the memory. */
4047 
4048 if (locale != NULL)
4049   {
4050   if (setlocale(LC_CTYPE, locale) == NULL)
4051     {
4052     fprintf(stderr, "pcre2grep: Failed to set locale %s (obtained from %s)\n",
4053       locale, locale_from);
4054     goto EXIT2;
4055     }
4056   character_tables = pcre2_maketables(NULL);
4057   pcre2_set_character_tables(compile_context, character_tables);
4058   }
4059 
4060 /* Sort out colouring */
4061 
4062 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
4063   {
4064   if (strcmp(colour_option, "always") == 0)
4065 #ifdef WIN32
4066     do_ansi = !is_stdout_tty(),
4067 #endif
4068     do_colour = TRUE;
4069   else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
4070   else
4071     {
4072     fprintf(stderr, "pcre2grep: Unknown colour setting \"%s\"\n",
4073       colour_option);
4074     goto EXIT2;
4075     }
4076   if (do_colour)
4077     {
4078     char *cs = getenv("PCRE2GREP_COLOUR");
4079     if (cs == NULL) cs = getenv("PCRE2GREP_COLOR");
4080     if (cs == NULL) cs = getenv("PCREGREP_COLOUR");
4081     if (cs == NULL) cs = getenv("PCREGREP_COLOR");
4082     if (cs == NULL) cs = parse_grep_colors(getenv("GREP_COLORS"));
4083     if (cs == NULL) cs = getenv("GREP_COLOR");
4084     if (cs != NULL)
4085       {
4086       if (strspn(cs, ";0123456789") == strlen(cs)) colour_string = cs;
4087       }
4088 #ifdef WIN32
4089     init_colour_output();
4090 #endif
4091     }
4092   }
4093 
4094 /* Sort out a newline setting. */
4095 
4096 if (newline_arg != NULL)
4097   {
4098   for (endlinetype = 1; endlinetype < (int)(sizeof(newlines)/sizeof(char *));
4099        endlinetype++)
4100     {
4101     if (strcmpic(newline_arg, newlines[endlinetype]) == 0) break;
4102     }
4103   if (endlinetype < (int)(sizeof(newlines)/sizeof(char *)))
4104     pcre2_set_newline(compile_context, endlinetype);
4105   else
4106     {
4107     fprintf(stderr, "pcre2grep: Invalid newline specifier \"%s\"\n",
4108       newline_arg);
4109     goto EXIT2;
4110     }
4111   }
4112 
4113 /* Find default newline convention */
4114 
4115 else
4116   {
4117   (void)pcre2_config(PCRE2_CONFIG_NEWLINE, &endlinetype);
4118   }
4119 
4120 /* Interpret the text values for -d and -D */
4121 
4122 if (dee_option != NULL)
4123   {
4124   if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
4125   else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
4126   else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
4127   else
4128     {
4129     fprintf(stderr, "pcre2grep: Invalid value \"%s\" for -d\n", dee_option);
4130     goto EXIT2;
4131     }
4132   }
4133 
4134 if (DEE_option != NULL)
4135   {
4136   if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
4137   else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
4138   else
4139     {
4140     fprintf(stderr, "pcre2grep: Invalid value \"%s\" for -D\n", DEE_option);
4141     goto EXIT2;
4142     }
4143   }
4144 
4145 /* Set the extra options */
4146 
4147 (void)pcre2_set_compile_extra_options(compile_context, extra_options);
4148 
4149 /* Check the values for Jeffrey Friedl's debugging options. */
4150 
4151 #ifdef JFRIEDL_DEBUG
4152 if (S_arg > 9)
4153   {
4154   fprintf(stderr, "pcre2grep: bad value for -S option\n");
4155   return 2;
4156   }
4157 if (jfriedl_XT != 0 || jfriedl_XR != 0)
4158   {
4159   if (jfriedl_XT == 0) jfriedl_XT = 1;
4160   if (jfriedl_XR == 0) jfriedl_XR = 1;
4161   }
4162 #endif
4163 
4164 /* If use_jit is set, check whether JIT is available. If not, do not try
4165 to use JIT. */
4166 
4167 if (use_jit)
4168   {
4169   uint32_t answer;
4170   (void)pcre2_config(PCRE2_CONFIG_JIT, &answer);
4171   if (!answer) use_jit = FALSE;
4172   }
4173 
4174 /* Get memory for the main buffer. */
4175 
4176 if (bufthird <= 0)
4177   {
4178   fprintf(stderr, "pcre2grep: --buffer-size must be greater than zero\n");
4179   goto EXIT2;
4180   }
4181 
4182 bufsize = 3*bufthird;
4183 main_buffer = (char *)malloc(bufsize);
4184 
4185 if (main_buffer == NULL)
4186   {
4187   fprintf(stderr, "pcre2grep: malloc failed\n");
4188   goto EXIT2;
4189   }
4190 
4191 /* If no patterns were provided by -e, and there are no files provided by -f,
4192 the first argument is the one and only pattern, and it must exist. */
4193 
4194 if (patterns == NULL && pattern_files == NULL)
4195   {
4196   if (i >= argc) return usage(2);
4197   patterns = patterns_last = add_pattern(argv[i], (PCRE2_SIZE)strlen(argv[i]),
4198     NULL);
4199   i++;
4200   if (patterns == NULL) goto EXIT2;
4201   }
4202 
4203 /* Compile the patterns that were provided on the command line, either by
4204 multiple uses of -e or as a single unkeyed pattern. We cannot do this until
4205 after all the command-line options are read so that we know which PCRE options
4206 to use. When -F is used, compile_pattern() may add another block into the
4207 chain, so we must not access the next pointer till after the compile. */
4208 
4209 for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
4210   {
4211   if (!compile_pattern(cp, pcre2_options, FALSE, "command-line",
4212        (j == 1 && patterns->next == NULL)? 0 : j))
4213     goto EXIT2;
4214   }
4215 
4216 /* Read and compile the regular expressions that are provided in files. */
4217 
4218 for (fn = pattern_files; fn != NULL; fn = fn->next)
4219   {
4220   if (!read_pattern_file(fn->name, &patterns, &patterns_last)) goto EXIT2;
4221   }
4222 
4223 /* Unless JIT has been explicitly disabled, arrange a stack for it to use. */
4224 
4225 #ifdef SUPPORT_PCRE2GREP_JIT
4226 if (use_jit)
4227   {
4228   jit_stack = pcre2_jit_stack_create(32*1024, 1024*1024, NULL);
4229   if (jit_stack != NULL                        )
4230     pcre2_jit_stack_assign(match_context, NULL, jit_stack);
4231   }
4232 #endif
4233 
4234 /* -F, -w, and -x do not apply to include or exclude patterns, so we must
4235 adjust the options. */
4236 
4237 pcre2_options &= ~PCRE2_LITERAL;
4238 (void)pcre2_set_compile_extra_options(compile_context, 0);
4239 
4240 /* If there are include or exclude patterns read from the command line, compile
4241 them. */
4242 
4243 for (j = 0; j < 4; j++)
4244   {
4245   int k;
4246   for (k = 1, cp = *(incexlist[j]); cp != NULL; k++, cp = cp->next)
4247     {
4248     if (!compile_pattern(cp, pcre2_options, FALSE, incexname[j],
4249          (k == 1 && cp->next == NULL)? 0 : k))
4250       goto EXIT2;
4251     }
4252   }
4253 
4254 /* Read and compile include/exclude patterns from files. */
4255 
4256 for (fn = include_from; fn != NULL; fn = fn->next)
4257   {
4258   if (!read_pattern_file(fn->name, &include_patterns, &include_patterns_last))
4259     goto EXIT2;
4260   }
4261 
4262 for (fn = exclude_from; fn != NULL; fn = fn->next)
4263   {
4264   if (!read_pattern_file(fn->name, &exclude_patterns, &exclude_patterns_last))
4265     goto EXIT2;
4266   }
4267 
4268 /* If there are no files that contain lists of files to search, and there are
4269 no file arguments, search stdin, and then exit. */
4270 
4271 if (file_lists == NULL && i >= argc)
4272   {
4273   rc = pcre2grep(stdin, FR_PLAIN, stdin_name,
4274     (filenames > FN_DEFAULT)? stdin_name : NULL);
4275   goto EXIT;
4276   }
4277 
4278 /* If any files that contains a list of files to search have been specified,
4279 read them line by line and search the given files. */
4280 
4281 for (fn = file_lists; fn != NULL; fn = fn->next)
4282   {
4283   char buffer[FNBUFSIZ];
4284   FILE *fl;
4285   if (strcmp(fn->name, "-") == 0) fl = stdin; else
4286     {
4287     fl = fopen(fn->name, "rb");
4288     if (fl == NULL)
4289       {
4290       fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", fn->name,
4291         strerror(errno));
4292       goto EXIT2;
4293       }
4294     }
4295   while (fgets(buffer, sizeof(buffer), fl) != NULL)
4296     {
4297     int frc;
4298     char *end = buffer + (int)strlen(buffer);
4299     while (end > buffer && isspace(end[-1])) end--;
4300     *end = 0;
4301     if (*buffer != 0)
4302       {
4303       frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);
4304       if (frc > 1) rc = frc;
4305         else if (frc == 0 && rc == 1) rc = 0;
4306       }
4307     }
4308   if (fl != stdin) fclose(fl);
4309   }
4310 
4311 /* After handling file-list, work through remaining arguments. Pass in the fact
4312 that there is only one argument at top level - this suppresses the file name if
4313 the argument is not a directory and filenames are not otherwise forced. */
4314 
4315 only_one_at_top = i == argc - 1 && file_lists == NULL;
4316 
4317 for (; i < argc; i++)
4318   {
4319   int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
4320     only_one_at_top);
4321   if (frc > 1) rc = frc;
4322     else if (frc == 0 && rc == 1) rc = 0;
4323   }
4324 
4325 #ifdef SUPPORT_PCRE2GREP_CALLOUT
4326 /* If separating builtin echo callouts by implicit newline, add one more for
4327 the final item. */
4328 
4329 if (om_separator != NULL && strcmp(om_separator, STDOUT_NL) == 0)
4330   fprintf(stdout, STDOUT_NL);
4331 #endif
4332 
4333 /* Show the total number of matches if requested, but not if only one file's
4334 count was printed. */
4335 
4336 if (show_total_count && counts_printed != 1 && filenames != FN_NOMATCH_ONLY)
4337   {
4338   if (counts_printed != 0 && filenames >= FN_DEFAULT)
4339     fprintf(stdout, "TOTAL:");
4340   fprintf(stdout, "%lu" STDOUT_NL, total_count);
4341   }
4342 
4343 EXIT:
4344 #ifdef SUPPORT_PCRE2GREP_JIT
4345 pcre2_jit_free_unused_memory(NULL);
4346 if (jit_stack != NULL) pcre2_jit_stack_free(jit_stack);
4347 #endif
4348 
4349 free(main_buffer);
4350 free((void *)character_tables);
4351 
4352 pcre2_compile_context_free(compile_context);
4353 pcre2_match_context_free(match_context);
4354 pcre2_match_data_free(match_data);
4355 
4356 free_pattern_chain(patterns);
4357 free_pattern_chain(include_patterns);
4358 free_pattern_chain(include_dir_patterns);
4359 free_pattern_chain(exclude_patterns);
4360 free_pattern_chain(exclude_dir_patterns);
4361 
4362 free_file_chain(exclude_from);
4363 free_file_chain(include_from);
4364 free_file_chain(pattern_files);
4365 free_file_chain(file_lists);
4366 
4367 while (only_matching != NULL)
4368   {
4369   omstr *this = only_matching;
4370   only_matching = this->next;
4371   free(this);
4372   }
4373 
4374 pcre2grep_exit(rc);
4375 
4376 EXIT2:
4377 rc = 2;
4378 goto EXIT;
4379 }
4380 
4381 /* End of pcre2grep */
4382