• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*************************************************
2 *               pcre2grep program                *
3 *************************************************/
4 
5 /* This is a grep program that uses the 8-bit PCRE regular expression library
6 via the PCRE2 updated API to do its pattern matching. On Unix-like, Windows,
7 and native z/OS systems it can recurse into directories, and in z/OS it can
8 handle PDS files.
9 
10 Note that for native z/OS, in addition to defining the NATIVE_ZOS macro, an
11 additional header is required. That header is not included in the main PCRE2
12 distribution because other apparatus is needed to compile pcre2grep for z/OS.
13 The header can be found in the special z/OS distribution, which is available
14 from www.zaconsultants.net or from www.cbttape.org.
15 
16            Copyright (c) 1997-2022 University of Cambridge
17 
18 -----------------------------------------------------------------------------
19 Redistribution and use in source and binary forms, with or without
20 modification, are permitted provided that the following conditions are met:
21 
22     * Redistributions of source code must retain the above copyright notice,
23       this list of conditions and the following disclaimer.
24 
25     * Redistributions in binary form must reproduce the above copyright
26       notice, this list of conditions and the following disclaimer in the
27       documentation and/or other materials provided with the distribution.
28 
29     * Neither the name of the University of Cambridge nor the names of its
30       contributors may be used to endorse or promote products derived from
31       this software without specific prior written permission.
32 
33 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
34 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
35 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
37 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
38 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
39 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
40 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
41 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
43 POSSIBILITY OF SUCH DAMAGE.
44 -----------------------------------------------------------------------------
45 */
46 
47 #ifdef HAVE_CONFIG_H
48 #include "config.h"
49 #endif
50 
51 #include <ctype.h>
52 #include <locale.h>
53 #include <stdio.h>
54 #include <string.h>
55 #include <stdlib.h>
56 #include <errno.h>
57 
58 #include <sys/types.h>
59 #include <sys/stat.h>
60 
61 #if (defined _WIN32 || (defined HAVE_WINDOWS_H && HAVE_WINDOWS_H)) \
62   && !defined WIN32 && !defined(__CYGWIN__)
63 #define WIN32
64 #endif
65 
66 /* Some CMake's define it still */
67 #if defined(__CYGWIN__) && defined(WIN32)
68 #undef WIN32
69 #endif
70 
71 #ifdef __VMS
72 #include clidef
73 #include descrip
74 #include lib$routines
75 #endif
76 
77 #ifdef WIN32
78 #include <io.h>                /* For _setmode() */
79 #include <fcntl.h>             /* For _O_BINARY */
80 #endif
81 
82 #if defined(SUPPORT_PCRE2GREP_CALLOUT) && defined(SUPPORT_PCRE2GREP_CALLOUT_FORK)
83 #ifdef WIN32
84 #include <process.h>
85 #else
86 #include <sys/wait.h>
87 #endif
88 #endif
89 
90 #ifdef HAVE_UNISTD_H
91 #include <unistd.h>
92 #endif
93 
94 #ifdef SUPPORT_LIBZ
95 #include <zlib.h>
96 #endif
97 
98 #ifdef SUPPORT_LIBBZ2
99 #include <bzlib.h>
100 #endif
101 
102 #define PCRE2_CODE_UNIT_WIDTH 8
103 #include "pcre2.h"
104 
105 /* Older versions of MSVC lack snprintf(). This define allows for
106 warning/error-free compilation and testing with MSVC compilers back to at least
107 MSVC 10/2010. Except for VC6 (which is missing some fundamentals and fails). */
108 
109 #if defined(_MSC_VER) && (_MSC_VER < 1900)
110 #define snprintf _snprintf
111 #endif
112 
113 /* old VC and older compilers don't support %td or %zu, and even some that claim to
114 be C99 don't support it (hence DISABLE_PERCENT_ZT). */
115 
116 #if defined(DISABLE_PERCENT_ZT) || (defined(_MSC_VER) && (_MSC_VER < 1800)) || \
117   (!defined(_MSC_VER) && (!defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L))
118 #ifdef _WIN64
119 #define SIZ_FORM "llu"
120 #else
121 #define SIZ_FORM "lu"
122 #endif
123 #else
124 #define SIZ_FORM "zu"
125 #endif
126 
127 #define FALSE 0
128 #define TRUE 1
129 
130 typedef int BOOL;
131 
132 #define DEFAULT_CAPTURE_MAX 50
133 
134 #if BUFSIZ > 8192
135 #define MAXPATLEN BUFSIZ
136 #else
137 #define MAXPATLEN 8192
138 #endif
139 
140 #define FNBUFSIZ 2048
141 #define ERRBUFSIZ 256
142 
143 /* Values for the "filenames" variable, which specifies options for file name
144 output. The order is important; it is assumed that a file name is wanted for
145 all values greater than FN_DEFAULT. */
146 
147 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
148 
149 /* File reading styles */
150 
151 enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
152 
153 /* Actions for the -d and -D options */
154 
155 enum { dee_READ, dee_SKIP, dee_RECURSE };
156 enum { DEE_READ, DEE_SKIP };
157 
158 /* Actions for special processing options (flag bits) */
159 
160 #define PO_WORD_MATCH     0x0001
161 #define PO_LINE_MATCH     0x0002
162 #define PO_FIXED_STRINGS  0x0004
163 
164 /* Binary file options */
165 
166 enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT };
167 
168 /* Return values from decode_dollar_escape() */
169 
170 enum { DDE_ERROR, DDE_CAPTURE, DDE_CHAR };
171 
172 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
173 environments), a warning is issued if the value of fwrite() is ignored.
174 Unfortunately, casting to (void) does not suppress the warning. To get round
175 this, we use a macro that compiles a fudge. Oddly, this does not also seem to
176 apply to fprintf(). */
177 
178 #define FWRITE_IGNORE(a,b,c,d) if (fwrite(a,b,c,d)) {}
179 
180 /* Under Windows, we have to set stdout to be binary, so that it does not
181 convert \r\n at the ends of output lines to \r\r\n. However, that means that
182 any messages written to stdout must have \r\n as their line terminator. This is
183 handled by using STDOUT_NL as the newline string. We also use a normal double
184 quote for the example, as single quotes aren't usually available. */
185 
186 #ifdef WIN32
187 #define STDOUT_NL     "\r\n"
188 #define STDOUT_NL_LEN  2
189 #define QUOT          "\""
190 #else
191 #define STDOUT_NL      "\n"
192 #define STDOUT_NL_LEN  1
193 #define QUOT           "'"
194 #endif
195 
196 /* This code is returned from decode_dollar_escape() when $n is encountered,
197 and used to mean "output STDOUT_NL". It is, of course, not a valid Unicode code
198 point. */
199 
200 #define STDOUT_NL_CODE 0x7fffffffu
201 
202 
203 
204 /*************************************************
205 *               Global variables                 *
206 *************************************************/
207 
208 static const char *colour_string = "1;31";
209 static const char *colour_option = NULL;
210 static const char *dee_option = NULL;
211 static const char *DEE_option = NULL;
212 static const char *locale = NULL;
213 static const char *newline_arg = NULL;
214 static const char *om_separator = NULL;
215 static const char *stdin_name = "(standard input)";
216 static const char *output_text = NULL;
217 
218 static char *main_buffer = NULL;
219 
220 static const char *printname_nl = STDOUT_NL;  /* Changed to NULL for -Z */
221 static int printname_colon = ':';             /* Changed to 0 for -Z */
222 static int printname_hyphen = '-';            /* Changed to 0 for -Z */
223 
224 static int after_context = 0;
225 static int before_context = 0;
226 static int binary_files = BIN_BINARY;
227 static int both_context = 0;
228 static int endlinetype;
229 
230 static int count_limit = -1;  /* Not long, so that it works with OP_NUMBER */
231 static unsigned long int counts_printed = 0;
232 static unsigned long int total_count = 0;
233 
234 static PCRE2_SIZE bufthird = PCRE2GREP_BUFSIZE;
235 static PCRE2_SIZE max_bufthird = PCRE2GREP_MAX_BUFSIZE;
236 static PCRE2_SIZE bufsize = 3*PCRE2GREP_BUFSIZE;
237 
238 #ifdef WIN32
239 static int dee_action = dee_SKIP;
240 #else
241 static int dee_action = dee_READ;
242 #endif
243 
244 static int DEE_action = DEE_READ;
245 static int error_count = 0;
246 static int filenames = FN_DEFAULT;
247 
248 #ifdef SUPPORT_PCRE2GREP_JIT
249 static BOOL use_jit = TRUE;
250 #else
251 static BOOL use_jit = FALSE;
252 #endif
253 
254 static const uint8_t *character_tables = NULL;
255 
256 static uint32_t pcre2_options = 0;
257 static uint32_t extra_options = 0;
258 static PCRE2_SIZE heap_limit = PCRE2_UNSET;
259 static uint32_t match_limit = 0;
260 static uint32_t depth_limit = 0;
261 
262 static pcre2_compile_context *compile_context;
263 static pcre2_match_context *match_context;
264 static pcre2_match_data *match_data, *match_data_pair[2];
265 static PCRE2_SIZE *offsets, *offsets_pair[2];
266 static int match_data_toggle;
267 static uint32_t offset_size;
268 static uint32_t capture_max = DEFAULT_CAPTURE_MAX;
269 
270 static BOOL all_matches = FALSE;
271 static BOOL count_only = FALSE;
272 static BOOL do_colour = FALSE;
273 #ifdef WIN32
274 static BOOL do_ansi = FALSE;
275 #endif
276 static BOOL file_offsets = FALSE;
277 static BOOL hyphenpending = FALSE;
278 static BOOL invert = FALSE;
279 static BOOL line_buffered = FALSE;
280 static BOOL line_offsets = FALSE;
281 static BOOL multiline = FALSE;
282 static BOOL number = FALSE;
283 static BOOL omit_zero_count = FALSE;
284 static BOOL resource_error = FALSE;
285 static BOOL quiet = FALSE;
286 static BOOL show_total_count = FALSE;
287 static BOOL silent = FALSE;
288 static BOOL utf = FALSE;
289 
290 static uint8_t utf8_buffer[8];
291 
292 
293 /* Structure for list of --only-matching capturing numbers. */
294 
295 typedef struct omstr {
296   struct omstr *next;
297   int groupnum;
298 } omstr;
299 
300 static omstr *only_matching = NULL;
301 static omstr *only_matching_last = NULL;
302 static int only_matching_count;
303 
304 /* Structure for holding the two variables that describe a number chain. */
305 
306 typedef struct omdatastr {
307   omstr **anchor;
308   omstr **lastptr;
309 } omdatastr;
310 
311 static omdatastr only_matching_data = { &only_matching, &only_matching_last };
312 
313 /* Structure for list of file names (for -f and --{in,ex}clude-from) */
314 
315 typedef struct fnstr {
316   struct fnstr *next;
317   char *name;
318 } fnstr;
319 
320 static fnstr *exclude_from = NULL;
321 static fnstr *exclude_from_last = NULL;
322 static fnstr *include_from = NULL;
323 static fnstr *include_from_last = NULL;
324 
325 static fnstr *file_lists = NULL;
326 static fnstr *file_lists_last = NULL;
327 static fnstr *pattern_files = NULL;
328 static fnstr *pattern_files_last = NULL;
329 
330 /* Structure for holding the two variables that describe a file name chain. */
331 
332 typedef struct fndatastr {
333   fnstr **anchor;
334   fnstr **lastptr;
335 } fndatastr;
336 
337 static fndatastr exclude_from_data = { &exclude_from, &exclude_from_last };
338 static fndatastr include_from_data = { &include_from, &include_from_last };
339 static fndatastr file_lists_data = { &file_lists, &file_lists_last };
340 static fndatastr pattern_files_data = { &pattern_files, &pattern_files_last };
341 
342 /* Structure for pattern and its compiled form; used for matching patterns and
343 also for include/exclude patterns. */
344 
345 typedef struct patstr {
346   struct patstr *next;
347   char *string;
348   PCRE2_SIZE length;
349   pcre2_code *compiled;
350 } patstr;
351 
352 static patstr *patterns = NULL;
353 static patstr *patterns_last = NULL;
354 static patstr *include_patterns = NULL;
355 static patstr *include_patterns_last = NULL;
356 static patstr *exclude_patterns = NULL;
357 static patstr *exclude_patterns_last = NULL;
358 static patstr *include_dir_patterns = NULL;
359 static patstr *include_dir_patterns_last = NULL;
360 static patstr *exclude_dir_patterns = NULL;
361 static patstr *exclude_dir_patterns_last = NULL;
362 
363 /* Structure holding the two variables that describe a pattern chain. A pointer
364 to such structures is used for each appropriate option. */
365 
366 typedef struct patdatastr {
367   patstr **anchor;
368   patstr **lastptr;
369 } patdatastr;
370 
371 static patdatastr match_patdata = { &patterns, &patterns_last };
372 static patdatastr include_patdata = { &include_patterns, &include_patterns_last };
373 static patdatastr exclude_patdata = { &exclude_patterns, &exclude_patterns_last };
374 static patdatastr include_dir_patdata = { &include_dir_patterns, &include_dir_patterns_last };
375 static patdatastr exclude_dir_patdata = { &exclude_dir_patterns, &exclude_dir_patterns_last };
376 
377 static patstr **incexlist[4] = { &include_patterns, &exclude_patterns,
378                                  &include_dir_patterns, &exclude_dir_patterns };
379 
380 static const char *incexname[4] = { "--include", "--exclude",
381                                     "--include-dir", "--exclude-dir" };
382 
383 /* Structure for options and list of them */
384 
385 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_U32NUMBER, OP_SIZE,
386        OP_OP_NUMBER, OP_OP_NUMBERS, OP_PATLIST, OP_FILELIST, OP_BINFILES };
387 
388 typedef struct option_item {
389   int type;
390   int one_char;
391   void *dataptr;
392   const char *long_name;
393   const char *help_text;
394 } option_item;
395 
396 /* Options without a single-letter equivalent get a negative value. This can be
397 used to identify them. */
398 
399 #define N_COLOUR       (-1)
400 #define N_EXCLUDE      (-2)
401 #define N_EXCLUDE_DIR  (-3)
402 #define N_HELP         (-4)
403 #define N_INCLUDE      (-5)
404 #define N_INCLUDE_DIR  (-6)
405 #define N_LABEL        (-7)
406 #define N_LOCALE       (-8)
407 #define N_NULL         (-9)
408 #define N_LOFFSETS     (-10)
409 #define N_FOFFSETS     (-11)
410 #define N_LBUFFER      (-12)
411 #define N_H_LIMIT      (-13)
412 #define N_M_LIMIT      (-14)
413 #define N_M_LIMIT_DEP  (-15)
414 #define N_BUFSIZE      (-16)
415 #define N_NOJIT        (-17)
416 #define N_FILE_LIST    (-18)
417 #define N_BINARY_FILES (-19)
418 #define N_EXCLUDE_FROM (-20)
419 #define N_INCLUDE_FROM (-21)
420 #define N_OM_SEPARATOR (-22)
421 #define N_MAX_BUFSIZE  (-23)
422 #define N_OM_CAPTURE   (-24)
423 #define N_ALLABSK      (-25)
424 
425 static option_item optionlist[] = {
426   { OP_NODATA,     N_NULL,   NULL,              "",              "terminate options" },
427   { OP_NODATA,     N_HELP,   NULL,              "help",          "display this help and exit" },
428   { OP_NUMBER,     'A',      &after_context,    "after-context=number", "set number of following context lines" },
429   { OP_NODATA,     'a',      NULL,              "text",          "treat binary files as text" },
430   { OP_NUMBER,     'B',      &before_context,   "before-context=number", "set number of prior context lines" },
431   { OP_BINFILES,   N_BINARY_FILES, NULL,        "binary-files=word", "set treatment of binary files" },
432   { OP_SIZE,       N_BUFSIZE,&bufthird,         "buffer-size=number", "set processing buffer starting size" },
433   { OP_SIZE,       N_MAX_BUFSIZE,&max_bufthird, "max-buffer-size=number",  "set processing buffer maximum size" },
434   { OP_OP_STRING,  N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
435   { OP_OP_STRING,  N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
436   { OP_NUMBER,     'C',      &both_context,     "context=number", "set number of context lines, before & after" },
437   { OP_NODATA,     'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
438   { OP_STRING,     'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
439   { OP_STRING,     'd',      &dee_option,       "directories=action", "how to handle directories" },
440   { OP_PATLIST,    'e',      &match_patdata,    "regex(p)=pattern", "specify pattern (may be used more than once)" },
441   { OP_NODATA,     'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
442   { OP_FILELIST,   'f',      &pattern_files_data, "file=path",   "read patterns from file" },
443   { OP_FILELIST,   N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" },
444   { OP_NODATA,     N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
445   { OP_NODATA,     'H',      NULL,              "with-filename", "force the prefixing filename on output" },
446   { OP_NODATA,     'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
447   { OP_NODATA,     'I',      NULL,              "",              "treat binary files as not matching (ignore)" },
448   { OP_NODATA,     'i',      NULL,              "ignore-case",   "ignore case distinctions" },
449   { OP_NODATA,     'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
450   { OP_NODATA,     'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
451   { OP_STRING,     N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
452   { OP_NODATA,     N_LBUFFER, NULL,             "line-buffered", "use line buffering" },
453   { OP_NODATA,     N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
454   { OP_STRING,     N_LOCALE, &locale,           "locale=locale", "use the named locale" },
455   { OP_SIZE,       N_H_LIMIT, &heap_limit,      "heap-limit=number",  "set PCRE2 heap limit option (kibibytes)" },
456   { OP_U32NUMBER,  N_M_LIMIT, &match_limit,     "match-limit=number", "set PCRE2 match limit option" },
457   { OP_U32NUMBER,  N_M_LIMIT_DEP, &depth_limit, "depth-limit=number", "set PCRE2 depth limit option" },
458   { OP_U32NUMBER,  N_M_LIMIT_DEP, &depth_limit, "recursion-limit=number", "obsolete synonym for depth-limit" },
459   { OP_NODATA,     'M',      NULL,              "multiline",     "run in multiline mode" },
460   { OP_NUMBER,     'm',      &count_limit,      "max-count=number", "stop after <number> matched lines" },
461   { OP_STRING,     'N',      &newline_arg,      "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF, ANY, or NUL)" },
462   { OP_NODATA,     'n',      NULL,              "line-number",   "print line number with output lines" },
463 #ifdef SUPPORT_PCRE2GREP_JIT
464   { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "do not use just-in-time compiler optimization" },
465 #else
466   { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "ignored: this pcre2grep does not support JIT" },
467 #endif
468   { OP_STRING,     'O',      &output_text,       "output=text",   "show only this text (possibly expanded)" },
469   { OP_OP_NUMBERS, 'o',      &only_matching_data, "only-matching=n", "show only the part of the line that matched" },
470   { OP_STRING,     N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" },
471   { OP_U32NUMBER,  N_OM_CAPTURE, &capture_max,  "om-capture=n",  "set capture count for --only-matching" },
472   { OP_NODATA,     'q',      NULL,              "quiet",         "suppress output, just set return code" },
473   { OP_NODATA,     'r',      NULL,              "recursive",     "recursively scan sub-directories" },
474   { OP_PATLIST,    N_EXCLUDE,&exclude_patdata,  "exclude=pattern","exclude matching files when recursing" },
475   { OP_PATLIST,    N_INCLUDE,&include_patdata,  "include=pattern","include matching files when recursing" },
476   { OP_PATLIST,    N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude-dir=pattern","exclude matching directories when recursing" },
477   { OP_PATLIST,    N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" },
478   { OP_FILELIST,   N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" },
479   { OP_FILELIST,   N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" },
480   { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
481   { OP_NODATA,    't',      NULL,              "total-count",   "print total count of matching lines" },
482   { OP_NODATA,    'u',      NULL,              "utf",           "use UTF mode" },
483   { OP_NODATA,    'U',      NULL,              "utf-allow-invalid", "use UTF mode, allow for invalid code units" },
484   { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
485   { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
486   { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
487   { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
488   { OP_NODATA,   N_ALLABSK, NULL,              "allow-lookaround-bsk", "allow \\K in lookarounds" },
489   { OP_NODATA,    'Z',      NULL,              "null",          "output 0 byte after file names"  },
490   { OP_NODATA,    0,        NULL,               NULL,            NULL }
491 };
492 
493 /* Table of names for newline types. Must be kept in step with the definitions
494 of PCRE2_NEWLINE_xx in pcre2.h. */
495 
496 static const char *newlines[] = {
497   "DEFAULT", "CR", "LF", "CRLF", "ANY", "ANYCRLF", "NUL" };
498 
499 /* UTF-8 tables  */
500 
501 const int utf8_table1[] =
502   { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};
503 const int utf8_table1_size = sizeof(utf8_table1) / sizeof(int);
504 
505 const int utf8_table2[] = { 0,    0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
506 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
507 
508 const char utf8_table4[] = {
509   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
510   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
511   2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
512   3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
513 
514 
515 #if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE)
516 /*************************************************
517 *    Emulated memmove() for systems without it   *
518 *************************************************/
519 
520 /* This function can make use of bcopy() if it is available. Otherwise do it by
521 steam, as there are some non-Unix environments that lack both memmove() and
522 bcopy(). */
523 
524 static void *
emulated_memmove(void * d,const void * s,size_t n)525 emulated_memmove(void *d, const void *s, size_t n)
526 {
527 #ifdef HAVE_BCOPY
528 bcopy(s, d, n);
529 return d;
530 #else
531 size_t i;
532 unsigned char *dest = (unsigned char *)d;
533 const unsigned char *src = (const unsigned char *)s;
534 if (dest > src)
535   {
536   dest += n;
537   src += n;
538   for (i = 0; i < n; ++i) *(--dest) = *(--src);
539   return (void *)dest;
540   }
541 else
542   {
543   for (i = 0; i < n; ++i) *dest++ = *src++;
544   return (void *)(dest - n);
545   }
546 #endif   /* not HAVE_BCOPY */
547 }
548 #undef memmove
549 #define memmove(d,s,n) emulated_memmove(d,s,n)
550 #endif   /* not VPCOMPAT && not HAVE_MEMMOVE */
551 
552 
553 
554 /*************************************************
555 *           Convert code point to UTF-8          *
556 *************************************************/
557 
558 /* A static buffer is used. Returns the number of bytes. */
559 
560 static int
ord2utf8(uint32_t value)561 ord2utf8(uint32_t value)
562 {
563 int i, j;
564 uint8_t *utf8bytes = utf8_buffer;
565 for (i = 0; i < utf8_table1_size; i++)
566   if (value <= (uint32_t)utf8_table1[i]) break;
567 utf8bytes += i;
568 for (j = i; j > 0; j--)
569   {
570   *utf8bytes-- = 0x80 | (value & 0x3f);
571   value >>= 6;
572   }
573 *utf8bytes = utf8_table2[i] | value;
574 return i + 1;
575 }
576 
577 
578 
579 /*************************************************
580 *         Case-independent string compare        *
581 *************************************************/
582 
583 static int
strcmpic(const char * str1,const char * str2)584 strcmpic(const char *str1, const char *str2)
585 {
586 unsigned int c1, c2;
587 while (*str1 != '\0' || *str2 != '\0')
588   {
589   c1 = tolower(*str1++);
590   c2 = tolower(*str2++);
591   if (c1 != c2) return ((c1 > c2) << 1) - 1;
592   }
593 return 0;
594 }
595 
596 
597 /*************************************************
598 *         Parse GREP_COLORS                      *
599 *************************************************/
600 
601 /* Extract ms or mt from GREP_COLORS.
602 
603 Argument:  the string, possibly NULL
604 Returns:   the value of ms or mt, or NULL if neither present
605 */
606 
607 static char *
parse_grep_colors(const char * gc)608 parse_grep_colors(const char *gc)
609 {
610 static char seq[16];
611 char *col;
612 uint32_t len;
613 if (gc == NULL) return NULL;
614 col = strstr(gc, "ms=");
615 if (col == NULL) col = strstr(gc, "mt=");
616 if (col == NULL) return NULL;
617 len = 0;
618 col += 3;
619 while (*col != ':' && *col != 0 && len < sizeof(seq)-1)
620   seq[len++] = *col++;
621 seq[len] = 0;
622 return seq;
623 }
624 
625 
626 /*************************************************
627 *         Exit from the program                  *
628 *************************************************/
629 
630 /* If there has been a resource error, give a suitable message.
631 
632 Argument:  the return code
633 Returns:   does not return
634 */
635 
636 static void
pcre2grep_exit(int rc)637 pcre2grep_exit(int rc)
638 {
639 /* VMS does exit codes differently: both exit(1) and exit(0) return with a
640 status of 1, which is not helpful. To help with this problem, define a symbol
641 (akin to an environment variable) called "PCRE2GREP_RC" and put the exit code
642 therein. */
643 
644 #ifdef __VMS
645   char val_buf[4];
646   $DESCRIPTOR(sym_nam, "PCRE2GREP_RC");
647   $DESCRIPTOR(sym_val, val_buf);
648   sprintf(val_buf, "%d", rc);
649   sym_val.dsc$w_length = strlen(val_buf);
650   lib$set_symbol(&sym_nam, &sym_val);
651 #endif
652 
653 if (resource_error)
654   {
655   fprintf(stderr, "pcre2grep: Error %d, %d, %d or %d means that a resource "
656     "limit was exceeded.\n", PCRE2_ERROR_JIT_STACKLIMIT, PCRE2_ERROR_MATCHLIMIT,
657     PCRE2_ERROR_DEPTHLIMIT, PCRE2_ERROR_HEAPLIMIT);
658   fprintf(stderr, "pcre2grep: Check your regex for nested unlimited loops.\n");
659   }
660 exit(rc);
661 }
662 
663 
664 /*************************************************
665 *          Add item to chain of patterns         *
666 *************************************************/
667 
668 /* Used to add an item onto a chain, or just return an unconnected item if the
669 "after" argument is NULL.
670 
671 Arguments:
672   s          pattern string to add
673   patlen     length of pattern
674   after      if not NULL points to item to insert after
675 
676 Returns:     new pattern block or NULL on error
677 */
678 
679 static patstr *
add_pattern(char * s,PCRE2_SIZE patlen,patstr * after)680 add_pattern(char *s, PCRE2_SIZE patlen, patstr *after)
681 {
682 patstr *p = (patstr *)malloc(sizeof(patstr));
683 
684 /* LCOV_EXCL_START - These won't be hit in normal testing. */
685 
686 if (p == NULL)
687   {
688   fprintf(stderr, "pcre2grep: malloc failed\n");
689   pcre2grep_exit(2);
690   }
691 if (patlen > MAXPATLEN)
692   {
693   fprintf(stderr, "pcre2grep: pattern is too long (limit is %d bytes)\n",
694     MAXPATLEN);
695   free(p);
696   return NULL;
697   }
698 
699 /* LCOV_EXCL_STOP */
700 
701 p->next = NULL;
702 p->string = s;
703 p->length = patlen;
704 p->compiled = NULL;
705 
706 if (after != NULL)
707   {
708   p->next = after->next;
709   after->next = p;
710   }
711 return p;
712 }
713 
714 
715 /*************************************************
716 *           Free chain of patterns               *
717 *************************************************/
718 
719 /* Used for several chains of patterns.
720 
721 Argument: pointer to start of chain
722 Returns:  nothing
723 */
724 
725 static void
free_pattern_chain(patstr * pc)726 free_pattern_chain(patstr *pc)
727 {
728 while (pc != NULL)
729   {
730   patstr *p = pc;
731   pc = p->next;
732   if (p->compiled != NULL) pcre2_code_free(p->compiled);
733   free(p);
734   }
735 }
736 
737 
738 /*************************************************
739 *           Free chain of file names             *
740 *************************************************/
741 
742 /*
743 Argument: pointer to start of chain
744 Returns:  nothing
745 */
746 
747 static void
free_file_chain(fnstr * fn)748 free_file_chain(fnstr *fn)
749 {
750 while (fn != NULL)
751   {
752   fnstr *f = fn;
753   fn = f->next;
754   free(f);
755   }
756 }
757 
758 
759 /*************************************************
760 *            OS-specific functions               *
761 *************************************************/
762 
763 /* These definitions are needed in all Windows environments, even those where
764 Unix-style directory scanning can be used (see below). */
765 
766 #ifdef WIN32
767 
768 #ifndef STRICT
769 # define STRICT
770 #endif
771 #ifndef WIN32_LEAN_AND_MEAN
772 # define WIN32_LEAN_AND_MEAN
773 #endif
774 
775 #include <windows.h>
776 
777 #define iswild(name) (strpbrk(name, "*?") != NULL)
778 
779 /* Convert ANSI BGR format to RGB used by Windows */
780 #define BGR_RGB(x) ((x & 1 ? 4 : 0) | (x & 2) | (x & 4 ? 1 : 0))
781 
782 static HANDLE hstdout;
783 static CONSOLE_SCREEN_BUFFER_INFO csbi;
784 static WORD match_colour;
785 
786 static WORD
decode_ANSI_colour(const char * cs)787 decode_ANSI_colour(const char *cs)
788 {
789 WORD result = csbi.wAttributes;
790 while (*cs)
791   {
792   if (isdigit(*cs))
793     {
794     int code = atoi(cs);
795     if (code == 1) result |= 0x08;
796     else if (code == 4) result |= 0x8000;
797     else if (code == 5) result |= 0x80;
798     else if (code >= 30 && code <= 37) result = (result & 0xF8) | BGR_RGB(code - 30);
799     else if (code == 39) result = (result & 0xF0) | (csbi.wAttributes & 0x0F);
800     else if (code >= 40 && code <= 47) result = (result & 0x8F) | (BGR_RGB(code - 40) << 4);
801     else if (code == 49) result = (result & 0x0F) | (csbi.wAttributes & 0xF0);
802     /* aixterm high intensity colour codes */
803     else if (code >= 90 && code <= 97) result = (result & 0xF0) | BGR_RGB(code - 90) | 0x08;
804     else if (code >= 100 && code <= 107) result = (result & 0x0F) | (BGR_RGB(code - 100) << 4) | 0x80;
805 
806     while (isdigit(*cs)) cs++;
807     }
808   if (*cs) cs++;
809   }
810 return result;
811 }
812 
813 
814 static void
init_colour_output()815 init_colour_output()
816 {
817 if (do_colour)
818   {
819   hstdout = GetStdHandle(STD_OUTPUT_HANDLE);
820   /* This fails when redirected to con; try again if so. */
821   if (!GetConsoleScreenBufferInfo(hstdout, &csbi) && !do_ansi)
822     {
823     HANDLE hcon = CreateFile("CONOUT$", GENERIC_READ | GENERIC_WRITE,
824       FILE_SHARE_WRITE, NULL, OPEN_EXISTING, 0, NULL);
825     GetConsoleScreenBufferInfo(hcon, &csbi);
826     CloseHandle(hcon);
827     }
828   match_colour = decode_ANSI_colour(colour_string);
829   /* No valid colour found - turn off colouring */
830   if (!match_colour) do_colour = FALSE;
831   }
832 }
833 
834 #endif  /* WIN32 */
835 
836 
837 /* The following sets of functions are defined so that they can be made system
838 specific. At present there are versions for Unix-style environments, Windows,
839 native z/OS, and "no support". */
840 
841 
842 /************* Directory scanning Unix-style and z/OS ***********/
843 
844 #if (defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H) || defined NATIVE_ZOS
845 #include <sys/types.h>
846 #include <sys/stat.h>
847 #include <dirent.h>
848 
849 #if defined NATIVE_ZOS
850 /************* Directory and PDS/E scanning for z/OS ***********/
851 /************* z/OS looks mostly like Unix with USS ************/
852 /* However, z/OS needs the #include statements in this header */
853 #include "pcrzosfs.h"
854 /* That header is not included in the main PCRE distribution because
855    other apparatus is needed to compile pcre2grep for z/OS. The header
856    can be found in the special z/OS distribution, which is available
857    from www.zaconsultants.net or from www.cbttape.org. */
858 #endif
859 
860 typedef DIR directory_type;
861 #define FILESEP '/'
862 
863 static int
isdirectory(char * filename)864 isdirectory(char *filename)
865 {
866 struct stat statbuf;
867 if (stat(filename, &statbuf) < 0)
868   return 0;        /* In the expectation that opening as a file will fail */
869 return S_ISDIR(statbuf.st_mode);
870 }
871 
872 static directory_type *
opendirectory(char * filename)873 opendirectory(char *filename)
874 {
875 return opendir(filename);
876 }
877 
878 static char *
readdirectory(directory_type * dir)879 readdirectory(directory_type *dir)
880 {
881 for (;;)
882   {
883   struct dirent *dent = readdir(dir);
884   if (dent == NULL) return NULL;
885   if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
886     return dent->d_name;
887   }
888 /* Control never reaches here */
889 }
890 
891 static void
closedirectory(directory_type * dir)892 closedirectory(directory_type *dir)
893 {
894 closedir(dir);
895 }
896 
897 
898 /************* Test for regular file, Unix-style **********/
899 
900 static int
isregfile(char * filename)901 isregfile(char *filename)
902 {
903 struct stat statbuf;
904 if (stat(filename, &statbuf) < 0)
905   return 1;        /* In the expectation that opening as a file will fail */
906 return S_ISREG(statbuf.st_mode);
907 }
908 
909 
910 #if defined NATIVE_ZOS
911 /************* Test for a terminal in z/OS **********/
912 /* isatty() does not work in a TSO environment, so always give FALSE.*/
913 
914 static BOOL
is_stdout_tty(void)915 is_stdout_tty(void)
916 {
917 return FALSE;
918 }
919 
920 static BOOL
is_file_tty(FILE * f)921 is_file_tty(FILE *f)
922 {
923 return FALSE;
924 }
925 
926 
927 /************* Test for a terminal, Unix-style **********/
928 
929 #else
930 static BOOL
is_stdout_tty(void)931 is_stdout_tty(void)
932 {
933 return isatty(fileno(stdout));
934 }
935 
936 static BOOL
is_file_tty(FILE * f)937 is_file_tty(FILE *f)
938 {
939 return isatty(fileno(f));
940 }
941 #endif
942 
943 
944 /************* Print optionally coloured match Unix-style and z/OS **********/
945 
946 static void
print_match(const void * buf,int length)947 print_match(const void *buf, int length)
948 {
949 if (length == 0) return;
950 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
951 FWRITE_IGNORE(buf, 1, length, stdout);
952 if (do_colour) fprintf(stdout, "%c[0m", 0x1b);
953 }
954 
955 /* End of Unix-style or native z/OS environment functions. */
956 
957 
958 /************* Directory scanning in Windows ***********/
959 
960 /* I (Philip Hazel) have no means of testing this code. It was contributed by
961 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
962 when it did not exist. David Byron added a patch that moved the #include of
963 <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
964 */
965 
966 #elif defined WIN32
967 
968 #ifndef INVALID_FILE_ATTRIBUTES
969 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
970 #endif
971 
972 typedef struct directory_type
973 {
974 HANDLE handle;
975 BOOL first;
976 WIN32_FIND_DATA data;
977 } directory_type;
978 
979 #define FILESEP '/'
980 
981 int
isdirectory(char * filename)982 isdirectory(char *filename)
983 {
984 DWORD attr = GetFileAttributes(filename);
985 if (attr == INVALID_FILE_ATTRIBUTES)
986   return 0;
987 return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0;
988 }
989 
990 directory_type *
opendirectory(char * filename)991 opendirectory(char *filename)
992 {
993 size_t len;
994 char *pattern;
995 directory_type *dir;
996 DWORD err;
997 len = strlen(filename);
998 pattern = (char *)malloc(len + 3);
999 dir = (directory_type *)malloc(sizeof(*dir));
1000 if ((pattern == NULL) || (dir == NULL))
1001   {
1002   fprintf(stderr, "pcre2grep: malloc failed\n");
1003   pcre2grep_exit(2);
1004   }
1005 memcpy(pattern, filename, len);
1006 if (iswild(filename))
1007   pattern[len] = 0;
1008 else
1009   memcpy(&(pattern[len]), "\\*", 3);
1010 dir->handle = FindFirstFile(pattern, &(dir->data));
1011 if (dir->handle != INVALID_HANDLE_VALUE)
1012   {
1013   free(pattern);
1014   dir->first = TRUE;
1015   return dir;
1016   }
1017 err = GetLastError();
1018 free(pattern);
1019 free(dir);
1020 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
1021 return NULL;
1022 }
1023 
1024 char *
readdirectory(directory_type * dir)1025 readdirectory(directory_type *dir)
1026 {
1027 for (;;)
1028   {
1029   if (!dir->first)
1030     {
1031     if (!FindNextFile(dir->handle, &(dir->data)))
1032       return NULL;
1033     }
1034   else
1035     {
1036     dir->first = FALSE;
1037     }
1038   if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
1039     return dir->data.cFileName;
1040   }
1041 #ifndef _MSC_VER
1042 return NULL;   /* Keep compiler happy; never executed */
1043 #endif
1044 }
1045 
1046 void
closedirectory(directory_type * dir)1047 closedirectory(directory_type *dir)
1048 {
1049 FindClose(dir->handle);
1050 free(dir);
1051 }
1052 
1053 
1054 /************* Test for regular file in Windows **********/
1055 
1056 /* I don't know how to do this, or if it can be done; assume all paths are
1057 regular if they are not directories. */
1058 
isregfile(char * filename)1059 int isregfile(char *filename)
1060 {
1061 return !isdirectory(filename);
1062 }
1063 
1064 
1065 /************* Test for a terminal in Windows **********/
1066 
1067 static BOOL
is_stdout_tty(void)1068 is_stdout_tty(void)
1069 {
1070 return _isatty(_fileno(stdout));
1071 }
1072 
1073 static BOOL
is_file_tty(FILE * f)1074 is_file_tty(FILE *f)
1075 {
1076 return _isatty(_fileno(f));
1077 }
1078 
1079 
1080 /************* Print optionally coloured match in Windows **********/
1081 
1082 static void
print_match(const void * buf,int length)1083 print_match(const void *buf, int length)
1084 {
1085 if (length == 0) return;
1086 if (do_colour)
1087   {
1088   if (do_ansi) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1089     else SetConsoleTextAttribute(hstdout, match_colour);
1090   }
1091 FWRITE_IGNORE(buf, 1, length, stdout);
1092 if (do_colour)
1093   {
1094   if (do_ansi) fprintf(stdout, "%c[0m", 0x1b);
1095     else SetConsoleTextAttribute(hstdout, csbi.wAttributes);
1096   }
1097 }
1098 
1099 /* End of Windows functions */
1100 
1101 
1102 /************* Directory scanning when we can't do it ***********/
1103 
1104 /* The type is void, and apart from isdirectory(), the functions do nothing. */
1105 
1106 #else
1107 
1108 #define FILESEP 0
1109 typedef void directory_type;
1110 
isdirectory(char * filename)1111 int isdirectory(char *filename) { return 0; }
opendirectory(char * filename)1112 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
readdirectory(directory_type * dir)1113 char *readdirectory(directory_type *dir) { return (char*)0;}
closedirectory(directory_type * dir)1114 void closedirectory(directory_type *dir) {}
1115 
1116 
1117 /************* Test for regular file when we can't do it **********/
1118 
1119 /* Assume all files are regular. */
1120 
isregfile(char * filename)1121 int isregfile(char *filename) { return 1; }
1122 
1123 
1124 /************* Test for a terminal when we can't do it **********/
1125 
1126 static BOOL
is_stdout_tty(void)1127 is_stdout_tty(void)
1128 {
1129 return FALSE;
1130 }
1131 
1132 static BOOL
is_file_tty(FILE * f)1133 is_file_tty(FILE *f)
1134 {
1135 return FALSE;
1136 }
1137 
1138 
1139 /************* Print optionally coloured match when we can't do it **********/
1140 
1141 static void
print_match(const void * buf,int length)1142 print_match(const void *buf, int length)
1143 {
1144 if (length == 0) return;
1145 FWRITE_IGNORE(buf, 1, length, stdout);
1146 }
1147 
1148 #endif  /* End of system-specific functions */
1149 
1150 
1151 
1152 #ifndef HAVE_STRERROR
1153 /*************************************************
1154 *     Provide strerror() for non-ANSI libraries  *
1155 *************************************************/
1156 
1157 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1158 in their libraries, but can provide the same facility by this simple
1159 alternative function. */
1160 
1161 extern int   sys_nerr;
1162 extern char *sys_errlist[];
1163 
1164 char *
strerror(int n)1165 strerror(int n)
1166 {
1167 if (n < 0 || n >= sys_nerr) return "unknown error number";
1168 return sys_errlist[n];
1169 }
1170 #endif /* HAVE_STRERROR */
1171 
1172 
1173 
1174 /*************************************************
1175 *                Usage function                  *
1176 *************************************************/
1177 
1178 static int
usage(int rc)1179 usage(int rc)
1180 {
1181 option_item *op;
1182 fprintf(stderr, "Usage: pcre2grep [-");
1183 for (op = optionlist; op->one_char != 0; op++)
1184   {
1185   if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1186   }
1187 fprintf(stderr, "] [long options] [pattern] [files]\n");
1188 fprintf(stderr, "Type \"pcre2grep --help\" for more information and the long "
1189   "options.\n");
1190 return rc;
1191 }
1192 
1193 
1194 
1195 /*************************************************
1196 *                Help function                   *
1197 *************************************************/
1198 
1199 static void
help(void)1200 help(void)
1201 {
1202 option_item *op;
1203 
1204 printf("Usage: pcre2grep [OPTION]... [PATTERN] [FILE1 FILE2 ...]" STDOUT_NL);
1205 printf("Search for PATTERN in each FILE or standard input." STDOUT_NL);
1206 printf("PATTERN must be present if neither -e nor -f is used." STDOUT_NL);
1207 
1208 #ifdef SUPPORT_PCRE2GREP_CALLOUT
1209 #ifdef SUPPORT_PCRE2GREP_CALLOUT_FORK
1210 printf("All callout scripts in patterns are supported." STDOUT_NL);
1211 #else
1212 printf("Non-fork callout scripts in patterns are supported." STDOUT_NL);
1213 #endif
1214 #else
1215 printf("Callout scripts are not supported in this pcre2grep." STDOUT_NL);
1216 #endif
1217 
1218 printf("\"-\" can be used as a file name to mean STDIN." STDOUT_NL);
1219 
1220 #ifdef SUPPORT_LIBZ
1221 printf("Files whose names end in .gz are read using zlib." STDOUT_NL);
1222 #endif
1223 
1224 #ifdef SUPPORT_LIBBZ2
1225 printf("Files whose names end in .bz2 are read using bzlib2." STDOUT_NL);
1226 #endif
1227 
1228 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1229 printf("Other files and the standard input are read as plain files." STDOUT_NL STDOUT_NL);
1230 #else
1231 printf("All files are read as plain files, without any interpretation." STDOUT_NL STDOUT_NL);
1232 #endif
1233 
1234 printf("Example: pcre2grep -i " QUOT "hello.*world" QUOT " menu.h main.c" STDOUT_NL STDOUT_NL);
1235 printf("Options:" STDOUT_NL);
1236 
1237 for (op = optionlist; op->one_char != 0; op++)
1238   {
1239   int n;
1240   char s[4];
1241 
1242   if (op->one_char > 0 && (op->long_name)[0] == 0)
1243     n = 31 - printf("  -%c", op->one_char);
1244   else
1245     {
1246     if (op->one_char > 0) sprintf(s, "-%c,", op->one_char);
1247       else strcpy(s, "   ");
1248     n = 31 - printf("  %s --%s", s, op->long_name);
1249     }
1250 
1251   if (n < 1) n = 1;
1252   printf("%.*s%s" STDOUT_NL, n, "                           ", op->help_text);
1253   }
1254 
1255 printf(STDOUT_NL "Numbers may be followed by K or M, e.g. --max-buffer-size=100K." STDOUT_NL);
1256 printf("The default value for --buffer-size is %d." STDOUT_NL, PCRE2GREP_BUFSIZE);
1257 printf("The default value for --max-buffer-size is %d." STDOUT_NL, PCRE2GREP_MAX_BUFSIZE);
1258 printf("When reading patterns or file names from a file, trailing white" STDOUT_NL);
1259 printf("space is removed and blank lines are ignored." STDOUT_NL);
1260 printf("The maximum size of any pattern is %d bytes." STDOUT_NL, MAXPATLEN);
1261 
1262 printf(STDOUT_NL "With no FILEs, read standard input. If fewer than two FILEs given, assume -h." STDOUT_NL);
1263 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble." STDOUT_NL);
1264 }
1265 
1266 
1267 
1268 /*************************************************
1269 *            Test exclude/includes               *
1270 *************************************************/
1271 
1272 /* If any exclude pattern matches, the path is excluded. Otherwise, unless
1273 there are no includes, the path must match an include pattern.
1274 
1275 Arguments:
1276   path      the path to be matched
1277   ip        the chain of include patterns
1278   ep        the chain of exclude patterns
1279 
1280 Returns:    TRUE if the path is not excluded
1281 */
1282 
1283 static BOOL
test_incexc(char * path,patstr * ip,patstr * ep)1284 test_incexc(char *path, patstr *ip, patstr *ep)
1285 {
1286 int plen = strlen((const char *)path);
1287 
1288 for (; ep != NULL; ep = ep->next)
1289   {
1290   if (pcre2_match(ep->compiled, (PCRE2_SPTR)path, plen, 0, 0, match_data, NULL) >= 0)
1291     return FALSE;
1292   }
1293 
1294 if (ip == NULL) return TRUE;
1295 
1296 for (; ip != NULL; ip = ip->next)
1297   {
1298   if (pcre2_match(ip->compiled, (PCRE2_SPTR)path, plen, 0, 0, match_data, NULL) >= 0)
1299     return TRUE;
1300   }
1301 
1302 return FALSE;
1303 }
1304 
1305 
1306 
1307 /*************************************************
1308 *         Decode integer argument value          *
1309 *************************************************/
1310 
1311 /* Integer arguments can be followed by K or M. Avoid the use of strtoul()
1312 because SunOS4 doesn't have it. This is used only for unpicking arguments, so
1313 just keep it simple.
1314 
1315 Arguments:
1316   option_data   the option data string
1317   op            the option item (for error messages)
1318   longop        TRUE if option given in long form
1319 
1320 Returns:        a long integer
1321 */
1322 
1323 static long int
decode_number(char * option_data,option_item * op,BOOL longop)1324 decode_number(char *option_data, option_item *op, BOOL longop)
1325 {
1326 unsigned long int n = 0;
1327 char *endptr = option_data;
1328 while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
1329 while (isdigit((unsigned char)(*endptr)))
1330   n = n * 10 + (int)(*endptr++ - '0');
1331 if (toupper(*endptr) == 'K')
1332   {
1333   n *= 1024;
1334   endptr++;
1335   }
1336 else if (toupper(*endptr) == 'M')
1337   {
1338   n *= 1024*1024;
1339   endptr++;
1340   }
1341 
1342 if (*endptr != 0)   /* Error */
1343   {
1344   if (longop)
1345     {
1346     char *equals = strchr(op->long_name, '=');
1347     int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1348       (int)(equals - op->long_name);
1349     fprintf(stderr, "pcre2grep: Malformed number \"%s\" after --%.*s\n",
1350       option_data, nlen, op->long_name);
1351     }
1352   else
1353     fprintf(stderr, "pcre2grep: Malformed number \"%s\" after -%c\n",
1354       option_data, op->one_char);
1355   pcre2grep_exit(usage(2));
1356   }
1357 
1358 return n;
1359 }
1360 
1361 
1362 
1363 /*************************************************
1364 *       Add item to a chain of numbers           *
1365 *************************************************/
1366 
1367 /* Used to add an item onto a chain, or just return an unconnected item if the
1368 "after" argument is NULL.
1369 
1370 Arguments:
1371   n          the number to add
1372   after      if not NULL points to item to insert after
1373 
1374 Returns:     new number block
1375 */
1376 
1377 static omstr *
add_number(int n,omstr * after)1378 add_number(int n, omstr *after)
1379 {
1380 omstr *om = (omstr *)malloc(sizeof(omstr));
1381 
1382 /* LCOV_EXCL_START - These lines won't be hit in normal testing. */
1383 
1384 if (om == NULL)
1385   {
1386   fprintf(stderr, "pcre2grep: malloc failed\n");
1387   pcre2grep_exit(2);
1388   }
1389 
1390 /* LCOV_EXCL_STOP */
1391 
1392 om->next = NULL;
1393 om->groupnum = n;
1394 
1395 if (after != NULL)
1396   {
1397   om->next = after->next;
1398   after->next = om;
1399   }
1400 return om;
1401 }
1402 
1403 
1404 
1405 /*************************************************
1406 *            Read one line of input              *
1407 *************************************************/
1408 
1409 /* Normally, input that is to be scanned is read using fread() (or gzread, or
1410 BZ2_read) into a large buffer, so many lines may be read at once. However,
1411 doing this for tty input means that no output appears until a lot of input has
1412 been typed. Instead, tty input is handled line by line. We cannot use fgets()
1413 for this, because it does not stop at a binary zero, and therefore there is no
1414 way of telling how many characters it has read, because there may be binary
1415 zeros embedded in the data. This function is also used for reading patterns
1416 from files (the -f option).
1417 
1418 Arguments:
1419   buffer     the buffer to read into
1420   length     the maximum number of characters to read
1421   f          the file
1422 
1423 Returns:     the number of characters read, zero at end of file
1424 */
1425 
1426 static PCRE2_SIZE
read_one_line(char * buffer,PCRE2_SIZE length,FILE * f)1427 read_one_line(char *buffer, PCRE2_SIZE length, FILE *f)
1428 {
1429 int c;
1430 PCRE2_SIZE yield = 0;
1431 while ((c = fgetc(f)) != EOF)
1432   {
1433   buffer[yield++] = c;
1434   if (c == '\n' || yield >= length) break;
1435   }
1436 return yield;
1437 }
1438 
1439 
1440 
1441 /*************************************************
1442 *             Find end of line                   *
1443 *************************************************/
1444 
1445 /* The length of the endline sequence that is found is set via lenptr. This may
1446 be zero at the very end of the file if there is no line-ending sequence there.
1447 
1448 Arguments:
1449   p         current position in line
1450   endptr    end of available data
1451   lenptr    where to put the length of the eol sequence
1452 
1453 Returns:    pointer after the last byte of the line,
1454             including the newline byte(s)
1455 */
1456 
1457 static char *
end_of_line(char * p,char * endptr,int * lenptr)1458 end_of_line(char *p, char *endptr, int *lenptr)
1459 {
1460 switch(endlinetype)
1461   {
1462   default:      /* Just in case */
1463   case PCRE2_NEWLINE_LF:
1464   while (p < endptr && *p != '\n') p++;
1465   if (p < endptr)
1466     {
1467     *lenptr = 1;
1468     return p + 1;
1469     }
1470   *lenptr = 0;
1471   return endptr;
1472 
1473   case PCRE2_NEWLINE_CR:
1474   while (p < endptr && *p != '\r') p++;
1475   if (p < endptr)
1476     {
1477     *lenptr = 1;
1478     return p + 1;
1479     }
1480   *lenptr = 0;
1481   return endptr;
1482 
1483   case PCRE2_NEWLINE_NUL:
1484   while (p < endptr && *p != '\0') p++;
1485   if (p < endptr)
1486     {
1487     *lenptr = 1;
1488     return p + 1;
1489     }
1490   *lenptr = 0;
1491   return endptr;
1492 
1493   case PCRE2_NEWLINE_CRLF:
1494   for (;;)
1495     {
1496     while (p < endptr && *p != '\r') p++;
1497     if (++p >= endptr)
1498       {
1499       *lenptr = 0;
1500       return endptr;
1501       }
1502     if (*p == '\n')
1503       {
1504       *lenptr = 2;
1505       return p + 1;
1506       }
1507     }
1508   break;
1509 
1510   case PCRE2_NEWLINE_ANYCRLF:
1511   while (p < endptr)
1512     {
1513     int extra = 0;
1514     int c = *((unsigned char *)p);
1515 
1516     if (utf && c >= 0xc0)
1517       {
1518       int gcii, gcss;
1519       extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
1520       gcss = 6*extra;
1521       c = (c & utf8_table3[extra]) << gcss;
1522       for (gcii = 1; gcii <= extra; gcii++)
1523         {
1524         gcss -= 6;
1525         c |= (p[gcii] & 0x3f) << gcss;
1526         }
1527       }
1528 
1529     p += 1 + extra;
1530 
1531     switch (c)
1532       {
1533       case '\n':
1534       *lenptr = 1;
1535       return p;
1536 
1537       case '\r':
1538       if (p < endptr && *p == '\n')
1539         {
1540         *lenptr = 2;
1541         p++;
1542         }
1543       else *lenptr = 1;
1544       return p;
1545 
1546       default:
1547       break;
1548       }
1549     }   /* End of loop for ANYCRLF case */
1550 
1551   *lenptr = 0;  /* Must have hit the end */
1552   return endptr;
1553 
1554   case PCRE2_NEWLINE_ANY:
1555   while (p < endptr)
1556     {
1557     int extra = 0;
1558     int c = *((unsigned char *)p);
1559 
1560     if (utf && c >= 0xc0)
1561       {
1562       int gcii, gcss;
1563       extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
1564       gcss = 6*extra;
1565       c = (c & utf8_table3[extra]) << gcss;
1566       for (gcii = 1; gcii <= extra; gcii++)
1567         {
1568         gcss -= 6;
1569         c |= (p[gcii] & 0x3f) << gcss;
1570         }
1571       }
1572 
1573     p += 1 + extra;
1574 
1575     switch (c)
1576       {
1577       case '\n':    /* LF */
1578       case '\v':    /* VT */
1579       case '\f':    /* FF */
1580       *lenptr = 1;
1581       return p;
1582 
1583       case '\r':    /* CR */
1584       if (p < endptr && *p == '\n')
1585         {
1586         *lenptr = 2;
1587         p++;
1588         }
1589       else *lenptr = 1;
1590       return p;
1591 
1592 #ifndef EBCDIC
1593       case 0x85:    /* Unicode NEL */
1594       *lenptr = utf? 2 : 1;
1595       return p;
1596 
1597       case 0x2028:  /* Unicode LS */
1598       case 0x2029:  /* Unicode PS */
1599       *lenptr = 3;
1600       return p;
1601 #endif  /* Not EBCDIC */
1602 
1603       default:
1604       break;
1605       }
1606     }   /* End of loop for ANY case */
1607 
1608   *lenptr = 0;  /* Must have hit the end */
1609   return endptr;
1610   }     /* End of overall switch */
1611 }
1612 
1613 
1614 
1615 /*************************************************
1616 *         Find start of previous line            *
1617 *************************************************/
1618 
1619 /* This is called when looking back for before lines to print.
1620 
1621 Arguments:
1622   p         start of the subsequent line
1623   startptr  start of available data
1624 
1625 Returns:    pointer to the start of the previous line
1626 */
1627 
1628 static char *
previous_line(char * p,char * startptr)1629 previous_line(char *p, char *startptr)
1630 {
1631 switch(endlinetype)
1632   {
1633   default:      /* Just in case */
1634   case PCRE2_NEWLINE_LF:
1635   p--;
1636   while (p > startptr && p[-1] != '\n') p--;
1637   return p;
1638 
1639   case PCRE2_NEWLINE_CR:
1640   p--;
1641   while (p > startptr && p[-1] != '\n') p--;
1642   return p;
1643 
1644   case PCRE2_NEWLINE_NUL:
1645   p--;
1646   while (p > startptr && p[-1] != '\0') p--;
1647   return p;
1648 
1649   case PCRE2_NEWLINE_CRLF:
1650   for (;;)
1651     {
1652     p -= 2;
1653     while (p > startptr && p[-1] != '\n') p--;
1654     if (p <= startptr + 1 || p[-2] == '\r') return p;
1655     }
1656   /* Control can never get here */
1657 
1658   case PCRE2_NEWLINE_ANY:
1659   case PCRE2_NEWLINE_ANYCRLF:
1660   if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
1661   if (utf) while ((*p & 0xc0) == 0x80) p--;
1662 
1663   while (p > startptr)
1664     {
1665     unsigned int c;
1666     char *pp = p - 1;
1667 
1668     if (utf)
1669       {
1670       int extra = 0;
1671       while ((*pp & 0xc0) == 0x80) pp--;
1672       c = *((unsigned char *)pp);
1673       if (c >= 0xc0)
1674         {
1675         int gcii, gcss;
1676         extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
1677         gcss = 6*extra;
1678         c = (c & utf8_table3[extra]) << gcss;
1679         for (gcii = 1; gcii <= extra; gcii++)
1680           {
1681           gcss -= 6;
1682           c |= (pp[gcii] & 0x3f) << gcss;
1683           }
1684         }
1685       }
1686     else c = *((unsigned char *)pp);
1687 
1688     if (endlinetype == PCRE2_NEWLINE_ANYCRLF) switch (c)
1689       {
1690       case '\n':    /* LF */
1691       case '\r':    /* CR */
1692       return p;
1693 
1694       default:
1695       break;
1696       }
1697 
1698     else switch (c)
1699       {
1700       case '\n':    /* LF */
1701       case '\v':    /* VT */
1702       case '\f':    /* FF */
1703       case '\r':    /* CR */
1704 #ifndef EBCDIC
1705       case 0x85:    /* Unicode NEL */
1706       case 0x2028:  /* Unicode LS */
1707       case 0x2029:  /* Unicode PS */
1708 #endif  /* Not EBCDIC */
1709       return p;
1710 
1711       default:
1712       break;
1713       }
1714 
1715     p = pp;  /* Back one character */
1716     }        /* End of loop for ANY case */
1717 
1718   return startptr;  /* Hit start of data */
1719   }     /* End of overall switch */
1720 }
1721 
1722 
1723 
1724 /*************************************************
1725 *              Output newline at end             *
1726 *************************************************/
1727 
1728 /* This function is called if the final line of a file has been written to
1729 stdout, but it does not have a terminating newline.
1730 
1731 Arguments:  none
1732 Returns:    nothing
1733 */
1734 
1735 static void
write_final_newline(void)1736 write_final_newline(void)
1737 {
1738 switch(endlinetype)
1739   {
1740   default:      /* Just in case */
1741   case PCRE2_NEWLINE_LF:
1742   case PCRE2_NEWLINE_ANY:
1743   case PCRE2_NEWLINE_ANYCRLF:
1744   fprintf(stdout, "\n");
1745   break;
1746 
1747   case PCRE2_NEWLINE_CR:
1748   fprintf(stdout, "\r");
1749   break;
1750 
1751   case PCRE2_NEWLINE_CRLF:
1752   fprintf(stdout, "\r\n");
1753   break;
1754 
1755   case PCRE2_NEWLINE_NUL:
1756   fprintf(stdout, "%c", 0);
1757   break;
1758   }
1759 }
1760 
1761 
1762 /*************************************************
1763 *       Print the previous "after" lines         *
1764 *************************************************/
1765 
1766 /* This is called if we are about to lose said lines because of buffer filling,
1767 and at the end of the file. The data in the line is written using fwrite() so
1768 that a binary zero does not terminate it.
1769 
1770 Arguments:
1771   lastmatchnumber   the number of the last matching line, plus one
1772   lastmatchrestart  where we restarted after the last match
1773   endptr            end of available data
1774   printname         filename for printing
1775 
1776 Returns:            nothing
1777 */
1778 
1779 static void
do_after_lines(unsigned long int lastmatchnumber,char * lastmatchrestart,char * endptr,const char * printname)1780 do_after_lines(unsigned long int lastmatchnumber, char *lastmatchrestart,
1781   char *endptr, const char *printname)
1782 {
1783 if (after_context > 0 && lastmatchnumber > 0)
1784   {
1785   int count = 0;
1786   int ellength = 0;
1787   while (lastmatchrestart < endptr && count < after_context)
1788     {
1789     char *pp = end_of_line(lastmatchrestart, endptr, &ellength);
1790     if (ellength == 0 && pp == main_buffer + bufsize) break;
1791     if (printname != NULL) fprintf(stdout, "%s%c", printname, printname_hyphen);
1792     if (number) fprintf(stdout, "%lu-", lastmatchnumber++);
1793     FWRITE_IGNORE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1794     lastmatchrestart = pp;
1795     count++;
1796     }
1797 
1798   /* If we have printed any lines, arrange for a hyphen separator if anything
1799   else follows. Also, if the last line is the final line in the file and it had
1800   no newline, add one. */
1801 
1802   if (count > 0)
1803     {
1804     hyphenpending = TRUE;
1805     if (ellength == 0 && lastmatchrestart >= endptr)
1806       write_final_newline();
1807     }
1808   }
1809 }
1810 
1811 
1812 
1813 /*************************************************
1814 *   Apply patterns to subject till one matches   *
1815 *************************************************/
1816 
1817 /* This function is called to run through all the patterns, looking for a
1818 match. When all possible matches are required, for example, for colouring, it
1819 checks all patterns for matching, and returns the earliest match. Otherwise, it
1820 returns the first pattern that has matched.
1821 
1822 Arguments:
1823   matchptr     the start of the subject
1824   length       the length of the subject to match
1825   options      options for pcre2_match
1826   startoffset  where to start matching
1827   mrc          address of where to put the result of pcre2_match()
1828 
1829 Returns:       TRUE if there was a match, match_data and offsets are set
1830                FALSE if there was no match (but no errors)
1831                invert if there was a non-fatal error
1832 */
1833 
1834 static BOOL
match_patterns(char * matchptr,PCRE2_SIZE length,unsigned int options,PCRE2_SIZE startoffset,int * mrc)1835 match_patterns(char *matchptr, PCRE2_SIZE length, unsigned int options,
1836   PCRE2_SIZE startoffset, int *mrc)
1837 {
1838 PCRE2_SIZE slen = length;
1839 int first = -1;
1840 int firstrc = 0;
1841 patstr *p = patterns;
1842 const char *msg = "this text:\n\n";
1843 
1844 if (slen > 200)
1845   {
1846   slen = 200;
1847   msg = "text that starts:\n\n";
1848   }
1849 
1850 for (int i = 1; p != NULL; p = p->next, i++)
1851   {
1852   int rc = pcre2_match(p->compiled, (PCRE2_SPTR)matchptr, (int)length,
1853     startoffset, options, match_data, match_context);
1854   if (rc == PCRE2_ERROR_NOMATCH) continue;
1855 
1856   /* Handle a successful match. When all_matches is false, we are done.
1857   Otherwise we must save the earliest match. */
1858 
1859   if (rc >= 0)
1860     {
1861     if (!all_matches)
1862       {
1863       *mrc = rc;
1864       return TRUE;
1865       }
1866 
1867     if (first < 0 || offsets[0] < offsets_pair[first][0] ||
1868          (offsets[0] == offsets_pair[first][0] &&
1869           offsets[1] > offsets_pair[first][1]))
1870       {
1871       first = match_data_toggle;
1872       firstrc = rc;
1873       match_data_toggle ^= 1;
1874       match_data = match_data_pair[match_data_toggle];
1875       offsets = offsets_pair[match_data_toggle];
1876       }
1877     continue;
1878     }
1879 
1880   /* Deal with PCRE2 error. */
1881 
1882   fprintf(stderr, "pcre2grep: pcre2_match() gave error %d while matching ", rc);
1883   if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i);
1884   fprintf(stderr, "%s", msg);
1885   FWRITE_IGNORE(matchptr, 1, slen, stderr);   /* In case binary zero included */
1886   fprintf(stderr, "\n\n");
1887   if (rc <= PCRE2_ERROR_UTF8_ERR1 &&
1888       rc >= PCRE2_ERROR_UTF8_ERR21)
1889     {
1890     unsigned char mbuffer[256];
1891     PCRE2_SIZE startchar = pcre2_get_startchar(match_data);
1892     (void)pcre2_get_error_message(rc, mbuffer, sizeof(mbuffer));
1893     fprintf(stderr, "%s at offset %" SIZ_FORM "\n\n", mbuffer, startchar);
1894     }
1895   if (rc == PCRE2_ERROR_MATCHLIMIT || rc == PCRE2_ERROR_DEPTHLIMIT ||
1896       rc == PCRE2_ERROR_HEAPLIMIT || rc == PCRE2_ERROR_JIT_STACKLIMIT)
1897     resource_error = TRUE;
1898   if (error_count++ > 20)
1899     {
1900     fprintf(stderr, "pcre2grep: Too many errors - abandoned.\n");
1901     pcre2grep_exit(2);
1902     }
1903   return invert;    /* No more matching; don't show the line again */
1904   }
1905 
1906 /* We get here when all patterns have been tried. If all_matches is false,
1907 this means that none of them matched. If all_matches is true, matched_first
1908 will be non-NULL if there was at least one match, and it will point to the
1909 appropriate match_data block. */
1910 
1911 if (!all_matches || first < 0) return FALSE;
1912 
1913 match_data_toggle = first;
1914 match_data = match_data_pair[first];
1915 offsets = offsets_pair[first];
1916 *mrc = firstrc;
1917 return TRUE;
1918 }
1919 
1920 
1921 
1922 /*************************************************
1923 *          Decode dollar escape sequence         *
1924 *************************************************/
1925 
1926 /* Called from various places to decode $ escapes in output strings. The escape
1927 sequences are as follows:
1928 
1929 $<digits> or ${<digits>} returns a capture number. However, if callout is TRUE,
1930 zero is never returned; '0' is substituted.
1931 
1932 $a returns bell.
1933 $b returns backspace.
1934 $e returns escape.
1935 $f returns form feed.
1936 $n returns newline.
1937 $r returns carriage return.
1938 $t returns tab.
1939 $v returns vertical tab.
1940 $o<digits> returns the character represented by the given octal
1941   number; up to three digits are processed.
1942 $o{<digits>} does the same, up to 7 digits, but gives an error for mode-invalid
1943   code points.
1944 $x<digits> returns the character represented by the given hexadecimal
1945   number; up to two digits are processed.
1946 $x{<digits} does the same, up to 6 digits, but gives an error for mode-invalid
1947   code points.
1948 Any other character is substituted by itself. E.g: $$ is replaced by a single
1949 dollar.
1950 
1951 Arguments:
1952   begin      the start of the whole string
1953   string     points to the $
1954   callout    TRUE if in a callout (inhibits error messages)
1955   value      where to return a value
1956   last       where to return pointer to the last used character
1957 
1958 Returns:     DDE_ERROR    after a syntax error
1959              DDE_CAPTURE  if *value is a capture number
1960              DDE_CHAR     if *value is a character code
1961 */
1962 
1963 static int
decode_dollar_escape(PCRE2_SPTR begin,PCRE2_SPTR string,BOOL callout,uint32_t * value,PCRE2_SPTR * last)1964 decode_dollar_escape(PCRE2_SPTR begin, PCRE2_SPTR string, BOOL callout,
1965   uint32_t *value, PCRE2_SPTR *last)
1966 {
1967 uint32_t c = 0;
1968 int base = 10;
1969 int dcount;
1970 int rc = DDE_CHAR;
1971 BOOL brace = FALSE;
1972 
1973 switch (*(++string))
1974   {
1975   case 0:   /* Syntax error: a character must be present after $. */
1976   if (!callout)
1977     fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
1978       (int)(string - begin), "no character after $");
1979   *last = string;
1980   return DDE_ERROR;
1981 
1982   case '{':
1983   brace = TRUE;
1984   string++;
1985   if (!isdigit(*string))  /* Syntax error: a decimal number required. */
1986     {
1987     if (!callout)
1988       fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
1989         (int)(string - begin), "decimal number expected");
1990     rc = DDE_ERROR;
1991     break;
1992     }
1993 
1994   /* Fall through */
1995 
1996   /* The maximum capture number is 65535, so any number greater than that will
1997   always be an unknown capture number. We just stop incrementing, in order to
1998   avoid overflow. */
1999 
2000   case '0': case '1': case '2': case '3': case '4':
2001   case '5': case '6': case '7': case '8': case '9':
2002   do
2003     {
2004     if (c <= 65535) c = c * 10 + (*string - '0');
2005     string++;
2006     }
2007   while (*string >= '0' && *string <= '9');
2008   string--;  /* Point to last digit */
2009 
2010   /* In a callout, capture number 0 is not available. No error can be given,
2011   so just return the character '0'. */
2012 
2013   if (callout && c == 0)
2014     {
2015     *value = '0';
2016     }
2017   else
2018     {
2019     *value = c;
2020     rc = DDE_CAPTURE;
2021     }
2022   break;
2023 
2024   /* Limit octal numbers to 3 digits without braces, or up to 7 with braces,
2025   for valid Unicode code points. */
2026 
2027   case 'o':
2028   base = 8;
2029   string++;
2030   if (*string == '{')
2031     {
2032     brace = TRUE;
2033     string++;
2034     dcount = 7;
2035     }
2036   else dcount = 3;
2037   for (; dcount > 0; dcount--)
2038     {
2039     if (*string < '0' || *string > '7') break;
2040     c = c * 8 + (*string++ - '0');
2041     }
2042   *value = c;
2043   string--;  /* Point to last digit */
2044   break;
2045 
2046   /* Limit hex numbers to 2 digits without braces, or up to 6 with braces,
2047   for valid Unicode code points. */
2048 
2049   case 'x':
2050   base = 16;
2051   string++;
2052   if (*string == '{')
2053     {
2054     brace = TRUE;
2055     string++;
2056     dcount = 6;
2057     }
2058   else dcount = 2;
2059   for (; dcount > 0; dcount--)
2060     {
2061     if (!isxdigit(*string)) break;
2062     if (*string >= '0' && *string <= '9')
2063       c = c *16 + *string++ - '0';
2064     else
2065       c = c * 16 + (*string++ | 0x20) - 'a' + 10;
2066     }
2067   *value = c;
2068   string--;  /* Point to last digit */
2069   break;
2070 
2071   case 'a': *value = '\a'; break;
2072   case 'b': *value = '\b'; break;
2073 #ifndef EBCDIC
2074   case 'e': *value = '\033'; break;
2075 #else
2076   case 'e': *value = '\047'; break;
2077 #endif
2078   case 'f': *value = '\f'; break;
2079   case 'n': *value = STDOUT_NL_CODE; break;
2080   case 'r': *value = '\r'; break;
2081   case 't': *value = '\t'; break;
2082   case 'v': *value = '\v'; break;
2083 
2084   default: *value = *string; break;
2085   }
2086 
2087 if (brace)
2088   {
2089   c = string[1];
2090   if (c != '}')
2091     {
2092     rc = DDE_ERROR;
2093     if (!callout)
2094       {
2095       if ((base == 8 && c >= '0' && c <= '7') ||
2096           (base == 16 && isxdigit(c)))
2097         {
2098         fprintf(stderr, "pcre2grep: Error in output text at offset %d: "
2099           "too many %s digits\n", (int)(string - begin),
2100           (base == 8)? "octal" : "hex");
2101         }
2102       else
2103         {
2104         fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
2105           (int)(string - begin), "missing closing brace");
2106         }
2107       }
2108     }
2109   else string++;
2110   }
2111 
2112 /* Check maximum code point values, but take note of STDOUT_NL_CODE. */
2113 
2114 if (rc == DDE_CHAR && *value != STDOUT_NL_CODE)
2115   {
2116   uint32_t max = utf? 0x0010ffffu : 0xffu;
2117   if (*value > max)
2118     {
2119     if (!callout)
2120       fprintf(stderr, "pcre2grep: Error in output text at offset %d: "
2121         "code point greater than 0x%x is invalid\n", (int)(string - begin), max);
2122     rc = DDE_ERROR;
2123     }
2124   }
2125 
2126 *last = string;
2127 return rc;
2128 }
2129 
2130 
2131 
2132 /*************************************************
2133 *          Check output text for errors          *
2134 *************************************************/
2135 
2136 /* Called early, to get errors before doing anything for -O text; also called
2137 from callouts to check before outputting.
2138 
2139 Arguments:
2140   string    an --output text string
2141   callout   TRUE if in a callout (stops printing errors)
2142 
2143 Returns:    TRUE if OK, FALSE on error
2144 */
2145 
2146 static BOOL
syntax_check_output_text(PCRE2_SPTR string,BOOL callout)2147 syntax_check_output_text(PCRE2_SPTR string, BOOL callout)
2148 {
2149 uint32_t value;
2150 PCRE2_SPTR begin = string;
2151 
2152 for (; *string != 0; string++)
2153   {
2154   if (*string == '$' &&
2155     decode_dollar_escape(begin, string, callout, &value, &string) == DDE_ERROR)
2156       return FALSE;
2157   }
2158 
2159 return TRUE;
2160 }
2161 
2162 
2163 /*************************************************
2164 *              Display output text               *
2165 *************************************************/
2166 
2167 /* Display the output text, which is assumed to have already been syntax
2168 checked. Output may contain escape sequences started by the dollar sign.
2169 
2170 Arguments:
2171   string:       the output text
2172   callout:      TRUE for the builtin callout, FALSE for --output
2173   subject       the start of the subject
2174   ovector:      capture offsets
2175   capture_top:  number of captures
2176 
2177 Returns:        TRUE if something was output, other than newline
2178                 FALSE if nothing was output, or newline was last output
2179 */
2180 
2181 static BOOL
display_output_text(PCRE2_SPTR string,BOOL callout,PCRE2_SPTR subject,PCRE2_SIZE * ovector,PCRE2_SIZE capture_top)2182 display_output_text(PCRE2_SPTR string, BOOL callout, PCRE2_SPTR subject,
2183   PCRE2_SIZE *ovector, PCRE2_SIZE capture_top)
2184 {
2185 uint32_t value;
2186 BOOL printed = FALSE;
2187 PCRE2_SPTR begin = string;
2188 
2189 for (; *string != 0; string++)
2190   {
2191   if (*string == '$')
2192     {
2193     switch(decode_dollar_escape(begin, string, callout, &value, &string))
2194       {
2195       case DDE_CHAR:
2196       if (value == STDOUT_NL_CODE)
2197         {
2198         fprintf(stdout, STDOUT_NL);
2199         printed = FALSE;
2200         continue;
2201         }
2202       break;  /* Will print value */
2203 
2204       case DDE_CAPTURE:
2205       if (value < capture_top)
2206         {
2207         PCRE2_SIZE capturesize;
2208         value *= 2;
2209         capturesize = ovector[value + 1] - ovector[value];
2210         if (capturesize > 0)
2211           {
2212           print_match(subject + ovector[value], capturesize);
2213           printed = TRUE;
2214           }
2215         }
2216       continue;
2217 
2218       /* LCOV_EXCL_START */
2219       default:  /* Should not occur */
2220       break;
2221       /* LCOV_EXCL_STOP */
2222       }
2223     }
2224 
2225   else value = *string;  /* Not a $ escape */
2226 
2227   if (!utf || value <= 127) fprintf(stdout, "%c", value); else
2228     {
2229     int n = ord2utf8(value);
2230     for (int i = 0; i < n; i++) fputc(utf8_buffer[i], stdout);
2231     }
2232 
2233   printed = TRUE;
2234   }
2235 
2236 return printed;
2237 }
2238 
2239 
2240 #ifdef SUPPORT_PCRE2GREP_CALLOUT
2241 
2242 /*************************************************
2243 *        Parse and execute callout scripts       *
2244 *************************************************/
2245 
2246 /* If SUPPORT_PCRE2GREP_CALLOUT_FORK is defined, this function parses a callout
2247 string block and executes the program specified by the string. The string is a
2248 list of substrings separated by pipe characters. The first substring represents
2249 the executable name, and the following substrings specify the arguments:
2250 
2251   program_name|param1|param2|...
2252 
2253 Any substring (including the program name) can contain escape sequences
2254 started by the dollar character. The escape sequences are substituted as
2255 follows:
2256 
2257   $<digits> or ${<digits>} is replaced by the captured substring of the given
2258   decimal number, which must be greater than zero. If the number is greater
2259   than the number of capturing substrings, or if the capture is unset, the
2260   replacement is empty.
2261 
2262   Any other character is substituted by itself. E.g: $$ is replaced by a single
2263   dollar or $| replaced by a pipe character.
2264 
2265 Alternatively, if string starts with pipe, the remainder is taken as an output
2266 string, same as --output. This is the only form that is supported if
2267 SUPPORT_PCRE2GREP_FORK is not defined. In this case, --om-separator is used to
2268 separate each callout, defaulting to newline.
2269 
2270 Example:
2271 
2272   echo -e "abcde\n12345" | pcre2grep \
2273     '(.)(..(.))(?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4)")()' -
2274 
2275   Output:
2276 
2277     Arg1: [a] [bcd] [d] Arg2: |a| ()
2278     abcde
2279     Arg1: [1] [234] [4] Arg2: |1| ()
2280     12345
2281 
2282 Arguments:
2283   blockptr     the callout block
2284 
2285 Returns:       currently it always returns with 0
2286 */
2287 
2288 static int
pcre2grep_callout(pcre2_callout_block * calloutptr,void * unused)2289 pcre2grep_callout(pcre2_callout_block *calloutptr, void *unused)
2290 {
2291 PCRE2_SIZE length = calloutptr->callout_string_length;
2292 PCRE2_SPTR string = calloutptr->callout_string;
2293 PCRE2_SPTR subject = calloutptr->subject;
2294 PCRE2_SIZE *ovector = calloutptr->offset_vector;
2295 PCRE2_SIZE capture_top = calloutptr->capture_top;
2296 
2297 #ifdef SUPPORT_PCRE2GREP_CALLOUT_FORK
2298 PCRE2_SIZE argsvectorlen = 2;
2299 PCRE2_SIZE argslen = 1;
2300 char *args;
2301 char *argsptr;
2302 char **argsvector;
2303 char **argsvectorptr;
2304 #ifndef WIN32
2305 pid_t pid;
2306 #endif
2307 int result = 0;
2308 #endif  /* SUPPORT_PCRE2GREP_CALLOUT_FORK */
2309 
2310 (void)unused;   /* Avoid compiler warning */
2311 
2312 /* Only callouts with strings are supported. */
2313 
2314 if (string == NULL || length == 0) return 0;
2315 
2316 /* If there's no command, output the remainder directly. */
2317 
2318 if (*string == '|')
2319   {
2320   string++;
2321   if (!syntax_check_output_text(string, TRUE)) return 0;
2322   (void)display_output_text(string, TRUE, subject, ovector, capture_top);
2323   return 0;
2324   }
2325 
2326 #ifndef SUPPORT_PCRE2GREP_CALLOUT_FORK
2327 return 0;
2328 #else
2329 
2330 /* Checking syntax and compute the number of string fragments. Callout strings
2331 are silently ignored in the event of a syntax error. */
2332 
2333 while (length > 0)
2334   {
2335   if (*string == '|')
2336     {
2337     argsvectorlen++;
2338     if (argsvectorlen > 10000) return 0;  /* Too many args */
2339     }
2340 
2341   else if (*string == '$')
2342     {
2343     uint32_t value;
2344     PCRE2_SPTR begin = string;
2345 
2346     switch (decode_dollar_escape(begin, string, TRUE, &value, &string))
2347       {
2348       case DDE_CAPTURE:
2349       if (value < capture_top)
2350         {
2351         value *= 2;
2352         argslen += ovector[value + 1] - ovector[value];
2353         }
2354       argslen--;   /* Negate the effect of argslen++ below. */
2355       break;
2356 
2357       case DDE_CHAR:
2358       if (value == STDOUT_NL_CODE) argslen += STDOUT_NL_LEN - 1;
2359         else if (utf && value > 127) argslen += ord2utf8(value) - 1;
2360       break;
2361 
2362       /* LCOV_EXCL_START */
2363       default:         /* Should not occur */
2364       case DDE_ERROR:
2365       return 0;
2366       /* LCOV_EXCL_STOP */
2367       }
2368 
2369     length -= (string - begin);
2370     }
2371 
2372   string++;
2373   length--;
2374   argslen++;
2375   }
2376 
2377 /* Get memory for the argument vector and its strings. */
2378 
2379 args = (char*)malloc(argslen);
2380 if (args == NULL) return 0;
2381 
2382 argsvector = (char**)malloc(argsvectorlen * sizeof(char*));
2383 if (argsvector == NULL)
2384   {
2385   /* LCOV_EXCL_START */
2386   free(args);
2387   return 0;
2388   /* LCOV_EXCL_STOP */
2389   }
2390 
2391 /* Now reprocess the string and set up the arguments. */
2392 
2393 argsptr = args;
2394 argsvectorptr = argsvector;
2395 *argsvectorptr++ = argsptr;
2396 
2397 length = calloutptr->callout_string_length;
2398 string = calloutptr->callout_string;
2399 
2400 while (length > 0)
2401   {
2402   if (*string == '|')
2403     {
2404     *argsptr++ = '\0';
2405     *argsvectorptr++ = argsptr;
2406     }
2407 
2408   else if (*string == '$')
2409     {
2410     uint32_t value;
2411     PCRE2_SPTR begin = string;
2412 
2413     switch (decode_dollar_escape(begin, string, TRUE, &value, &string))
2414       {
2415       case DDE_CAPTURE:
2416       if (value < capture_top)
2417         {
2418         PCRE2_SIZE capturesize;
2419         value *= 2;
2420         capturesize = ovector[value + 1] - ovector[value];
2421         memcpy(argsptr, subject + ovector[value], capturesize);
2422         argsptr += capturesize;
2423         }
2424       break;
2425 
2426       case DDE_CHAR:
2427       if (value == STDOUT_NL_CODE)
2428         {
2429         memcpy(argsptr, STDOUT_NL, STDOUT_NL_LEN);
2430         argsptr += STDOUT_NL_LEN;
2431         }
2432       else if (utf && value > 127)
2433         {
2434         int n = ord2utf8(value);
2435         memcpy(argsptr, utf8_buffer, n);
2436         argsptr += n;
2437         }
2438       else
2439         {
2440         *argsptr++ = value;
2441         }
2442       break;
2443 
2444       /* LCOV_EXCL_START */
2445       default:         /* Even though this should not occur, the string having */
2446       case DDE_ERROR:  /* been checked above, we need to include the free() */
2447       free(args);      /* calls so that source checkers do not complain. */
2448       free(argsvector);
2449       return 0;
2450       /* LCOV_EXCL_STOP */
2451       }
2452 
2453     length -= (string - begin);
2454     }
2455 
2456   else *argsptr++ = *string;
2457 
2458   /* Advance along the string */
2459 
2460   string++;
2461   length--;
2462   }
2463 
2464 *argsptr++ = '\0';
2465 *argsvectorptr = NULL;
2466 
2467 /* Running an external command is system-dependent. Handle Windows and VMS as
2468 necessary, otherwise assume fork(). */
2469 
2470 #ifdef WIN32
2471 result = _spawnvp(_P_WAIT, argsvector[0], (const char * const *)argsvector);
2472 
2473 #elif defined __VMS
2474   {
2475   char cmdbuf[500];
2476   short i = 0;
2477   int flags = CLI$M_NOCLISYM|CLI$M_NOLOGNAM|CLI$M_NOKEYPAD, status, retstat;
2478   $DESCRIPTOR(cmd, cmdbuf);
2479 
2480   cmdbuf[0] = 0;
2481   while (argsvector[i])
2482   {
2483     strcat(cmdbuf, argsvector[i]);
2484     strcat(cmdbuf, " ");
2485     i++;
2486   }
2487   cmd.dsc$w_length = strlen(cmdbuf) - 1;
2488   status = lib$spawn(&cmd, 0,0, &flags, 0,0, &retstat);
2489   if (!(status & 1)) result = 0;
2490   else result = retstat & 1 ? 0 : 1;
2491   }
2492 
2493 #else  /* Neither Windows nor VMS */
2494 pid = fork();
2495 if (pid == 0)
2496   {
2497   (void)execv(argsvector[0], argsvector);
2498   /* Control gets here if there is an error, e.g. a non-existent program */
2499   exit(1);
2500   }
2501 else if (pid > 0)
2502   {
2503   (void)fflush(stdout);
2504   (void)waitpid(pid, &result, 0);
2505   (void)fflush(stdout);
2506   }
2507 #endif  /* End Windows/VMS/other handling */
2508 
2509 free(args);
2510 free(argsvector);
2511 
2512 /* Currently negative return values are not supported, only zero (match
2513 continues) or non-zero (match fails). */
2514 
2515 return result != 0;
2516 #endif  /* SUPPORT_PCRE2GREP_CALLOUT_FORK */
2517 }
2518 #endif  /* SUPPORT_PCRE2GREP_CALLOUT */
2519 
2520 
2521 
2522 /*************************************************
2523 *     Read a portion of the file into buffer     *
2524 *************************************************/
2525 
2526 static PCRE2_SIZE
fill_buffer(void * handle,int frtype,char * buffer,PCRE2_SIZE length,BOOL input_line_buffered)2527 fill_buffer(void *handle, int frtype, char *buffer, PCRE2_SIZE length,
2528   BOOL input_line_buffered)
2529 {
2530 (void)frtype;  /* Avoid warning when not used */
2531 
2532 #ifdef SUPPORT_LIBZ
2533 if (frtype == FR_LIBZ)
2534   return gzread((gzFile)handle, buffer, length);
2535 else
2536 #endif
2537 
2538 #ifdef SUPPORT_LIBBZ2
2539 if (frtype == FR_LIBBZ2)
2540   return (PCRE2_SIZE)BZ2_bzread((BZFILE *)handle, buffer, length);
2541 else
2542 #endif
2543 
2544 return (input_line_buffered ?
2545   read_one_line(buffer, length, (FILE *)handle) :
2546   fread(buffer, 1, length, (FILE *)handle));
2547 }
2548 
2549 
2550 
2551 /*************************************************
2552 *            Grep an individual file             *
2553 *************************************************/
2554 
2555 /* This is called from grep_or_recurse() below. It uses a buffer that is three
2556 times the value of bufthird. The matching point is never allowed to stray into
2557 the top third of the buffer, thus keeping more of the file available for
2558 context printing or for multiline scanning. For large files, the pointer will
2559 be in the middle third most of the time, so the bottom third is available for
2560 "before" context printing.
2561 
2562 Arguments:
2563   handle       the fopened FILE stream for a normal file
2564                the gzFile pointer when reading is via libz
2565                the BZFILE pointer when reading is via libbz2
2566   frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
2567   filename     the file name or NULL (for errors)
2568   printname    the file name if it is to be printed for each match
2569                or NULL if the file name is not to be printed
2570                it cannot be NULL if filenames[_nomatch]_only is set
2571 
2572 Returns:       0 if there was at least one match
2573                1 otherwise (no matches)
2574                2 if an overlong line is encountered
2575                3 if there is a read error on a .bz2 file
2576 */
2577 
2578 static int
pcre2grep(void * handle,int frtype,const char * filename,const char * printname)2579 pcre2grep(void *handle, int frtype, const char *filename, const char *printname)
2580 {
2581 int rc = 1;
2582 int filepos = 0;
2583 unsigned long int linenumber = 1;
2584 unsigned long int lastmatchnumber = 0;
2585 unsigned long int count = 0;
2586 long int count_matched_lines = 0;
2587 char *lastmatchrestart = main_buffer;
2588 char *ptr = main_buffer;
2589 char *endptr;
2590 PCRE2_SIZE bufflength;
2591 BOOL binary = FALSE;
2592 BOOL endhyphenpending = FALSE;
2593 BOOL lines_printed = FALSE;
2594 BOOL input_line_buffered = line_buffered;
2595 FILE *in = NULL;                    /* Ensure initialized */
2596 long stream_start = -1;             /* Only non-negative if relevant */
2597 
2598 /* Do the first read into the start of the buffer and set up the pointer to end
2599 of what we have. In the case of libz, a non-zipped .gz file will be read as a
2600 plain file. However, if a .bz2 file isn't actually bzipped, the first read will
2601 fail. */
2602 
2603 if (frtype != FR_LIBZ && frtype != FR_LIBBZ2)
2604   {
2605   in = (FILE *)handle;
2606   if (feof(in)) return 1;
2607   if (is_file_tty(in)) input_line_buffered = TRUE;
2608   else
2609     {
2610     if (count_limit >= 0  && filename == stdin_name)
2611       stream_start = ftell(in);
2612     }
2613   }
2614 else input_line_buffered = FALSE;
2615 
2616 bufflength = fill_buffer(handle, frtype, main_buffer, bufsize,
2617   input_line_buffered);
2618 
2619 #ifdef SUPPORT_LIBBZ2
2620 if (frtype == FR_LIBBZ2 && (int)bufflength < 0) return 3;   /* Gotcha: bufflength is PCRE2_SIZE */
2621 #endif
2622 
2623 endptr = main_buffer + bufflength;
2624 
2625 /* Unless binary-files=text, see if we have a binary file. This uses the same
2626 rule as GNU grep, namely, a search for a binary zero byte near the start of the
2627 file. However, when the newline convention is binary zero, we can't do this. */
2628 
2629 if (binary_files != BIN_TEXT)
2630   {
2631   if (endlinetype != PCRE2_NEWLINE_NUL)
2632     binary = memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength)
2633       != NULL;
2634   if (binary && binary_files == BIN_NOMATCH) return 1;
2635   }
2636 
2637 /* Loop while the current pointer is not at the end of the file. For large
2638 files, endptr will be at the end of the buffer when we are in the middle of the
2639 file, but ptr will never get there, because as soon as it gets over 2/3 of the
2640 way, the buffer is shifted left and re-filled. */
2641 
2642 while (ptr < endptr)
2643   {
2644   int endlinelength;
2645   int mrc = 0;
2646   unsigned int options = 0;
2647   BOOL match;
2648   BOOL line_matched = FALSE;
2649   char *t = ptr;
2650   PCRE2_SIZE length, linelength;
2651   PCRE2_SIZE startoffset = 0;
2652 
2653   /* If the -m option set a limit for the number of matched or non-matched
2654   lines, check it here. A limit of zero means that no matching is ever done.
2655   For stdin from a file, set the file position. */
2656 
2657   if (count_limit >= 0 && count_matched_lines >= count_limit)
2658     {
2659     if (stream_start >= 0)
2660       (void)fseek(handle, stream_start + (long int)filepos, SEEK_SET);
2661     rc = (count_limit == 0)? 1 : 0;
2662     break;
2663     }
2664 
2665   /* At this point, ptr is at the start of a line. We need to find the length
2666   of the subject string to pass to pcre2_match(). In multiline mode, it is the
2667   length remainder of the data in the buffer. Otherwise, it is the length of
2668   the next line, excluding the terminating newline. After matching, we always
2669   advance by the length of the next line. In multiline mode the PCRE2_FIRSTLINE
2670   option is used for compiling, so that any match is constrained to be in the
2671   first line. */
2672 
2673   t = end_of_line(t, endptr, &endlinelength);
2674   linelength = t - ptr - endlinelength;
2675   length = multiline? (PCRE2_SIZE)(endptr - ptr) : linelength;
2676 
2677   /* Check to see if the line we are looking at extends right to the very end
2678   of the buffer without a line terminator. This means the line is too long to
2679   handle at the current buffer size. Until the buffer reaches its maximum size,
2680   try doubling it and reading more data. */
2681 
2682   if (endlinelength == 0 && t == main_buffer + bufsize)
2683     {
2684     if (bufthird < max_bufthird)
2685       {
2686       char *new_buffer;
2687       PCRE2_SIZE new_bufthird = 2*bufthird;
2688 
2689       if (new_bufthird > max_bufthird) new_bufthird = max_bufthird;
2690       new_buffer = (char *)malloc(3*new_bufthird);
2691 
2692       if (new_buffer == NULL)
2693         {
2694         /* LCOV_EXCL_START */
2695         fprintf(stderr,
2696           "pcre2grep: line %lu%s%s is too long for the internal buffer\n"
2697           "pcre2grep: not enough memory to increase the buffer size to %"
2698             SIZ_FORM "\n",
2699           linenumber,
2700           (filename == NULL)? "" : " of file ",
2701           (filename == NULL)? "" : filename,
2702           new_bufthird);
2703         return 2;
2704         /* LCOV_EXCL_STOP */
2705         }
2706 
2707       /* Copy the data and adjust pointers to the new buffer location. */
2708 
2709       memcpy(new_buffer, main_buffer, bufsize);
2710       bufthird = new_bufthird;
2711       bufsize = 3*bufthird;
2712       ptr = new_buffer + (ptr - main_buffer);
2713       lastmatchrestart = new_buffer + (lastmatchrestart - main_buffer);
2714       free(main_buffer);
2715       main_buffer = new_buffer;
2716 
2717       /* Read more data into the buffer and then try to find the line ending
2718       again. */
2719 
2720       bufflength += fill_buffer(handle, frtype, main_buffer + bufflength,
2721         bufsize - bufflength, input_line_buffered);
2722       endptr = main_buffer + bufflength;
2723       continue;
2724       }
2725     else
2726       {
2727       fprintf(stderr,
2728         "pcre2grep: line %lu%s%s is too long for the internal buffer\n"
2729         "pcre2grep: the maximum buffer size is %" SIZ_FORM "\n"
2730         "pcre2grep: use the --max-buffer-size option to change it\n",
2731         linenumber,
2732         (filename == NULL)? "" : " of file ",
2733         (filename == NULL)? "" : filename,
2734         bufthird);
2735       return 2;
2736       }
2737     }
2738 
2739   /* We come back here after a match when only_matching_count is non-zero, in
2740   order to find any further matches in the same line. This applies to
2741   --only-matching, --file-offsets, and --line-offsets. */
2742 
2743   ONLY_MATCHING_RESTART:
2744 
2745   /* Run through all the patterns until one matches or there is an error other
2746   than NOMATCH. This code is in a subroutine so that it can be re-used for
2747   finding subsequent matches when colouring matched lines. After finding one
2748   match, set PCRE2_NOTEMPTY to disable any further matches of null strings in
2749   this line. */
2750 
2751   match = match_patterns(ptr, length, options, startoffset, &mrc);
2752   options = PCRE2_NOTEMPTY;
2753 
2754   /* If it's a match or a not-match (as required), do what's wanted. NOTE: Use
2755   only FWRITE_IGNORE() - which is just a packaged fwrite() that ignores its
2756   return code - to output data lines, so that binary zeroes are treated as just
2757   another data character. */
2758 
2759   if (match != invert)
2760     {
2761     BOOL hyphenprinted = FALSE;
2762 
2763     /* We've failed if we want a file that doesn't have any matches. */
2764 
2765     if (filenames == FN_NOMATCH_ONLY) return 1;
2766 
2767     /* Remember that this line matched (for counting matched lines) */
2768 
2769     line_matched = TRUE;
2770 
2771     /* If all we want is a yes/no answer, we can return immediately. */
2772 
2773     if (quiet) return 0;
2774 
2775     /* Just count if just counting is wanted. */
2776 
2777     else if (count_only || show_total_count) count++;
2778 
2779     /* When handling a binary file and binary-files==binary, the "binary"
2780     variable will be set true (it's false in all other cases). In this
2781     situation we just want to output the file name. No need to scan further. */
2782 
2783     else if (binary)
2784       {
2785       fprintf(stdout, "Binary file %s matches" STDOUT_NL, filename);
2786       return 0;
2787       }
2788 
2789     /* Likewise, if all we want is a file name, there is no need to scan any
2790     more lines in the file. */
2791 
2792     else if (filenames == FN_MATCH_ONLY)
2793       {
2794       fprintf(stdout, "%s", printname);
2795       if (printname_nl == NULL) fprintf(stdout, "%c", 0);
2796         else fprintf(stdout, "%s", printname_nl);
2797       return 0;
2798       }
2799 
2800     /* The --only-matching option prints just the substring that matched,
2801     and/or one or more captured portions of it, as long as these strings are
2802     not empty. The --file-offsets and --line-offsets options output offsets for
2803     the matching substring (all three set only_matching_count non-zero). None
2804     of these mutually exclusive options prints any context. Afterwards, adjust
2805     the start and then jump back to look for further matches in the same line.
2806     If we are in invert mode, however, nothing is printed and we do not restart
2807     - this could still be useful because the return code is set. */
2808 
2809     else if (only_matching_count != 0)
2810       {
2811       if (!invert)
2812         {
2813         PCRE2_SIZE oldstartoffset;
2814 
2815         if (printname != NULL) fprintf(stdout, "%s%c", printname,
2816           printname_colon);
2817         if (number) fprintf(stdout, "%lu:", linenumber);
2818 
2819         /* Handle --line-offsets */
2820 
2821         if (line_offsets)
2822           fprintf(stdout, "%d,%d" STDOUT_NL, (int)(ptr + offsets[0] - ptr),
2823             (int)(offsets[1] - offsets[0]));
2824 
2825         /* Handle --file-offsets */
2826 
2827         else if (file_offsets)
2828           fprintf(stdout, "%d,%d" STDOUT_NL,
2829             (int)(filepos + ptr + offsets[0] - ptr),
2830             (int)(offsets[1] - offsets[0]));
2831 
2832         /* Handle --output (which has already been syntax checked) */
2833 
2834         else if (output_text != NULL)
2835           {
2836           (void)display_output_text((PCRE2_SPTR)output_text, FALSE,
2837               (PCRE2_SPTR)ptr, offsets, mrc);
2838           fprintf(stdout, STDOUT_NL);
2839           }
2840 
2841         /* Handle --only-matching, which may occur many times */
2842 
2843         else
2844           {
2845           BOOL printed = FALSE;
2846           omstr *om;
2847 
2848           for (om = only_matching; om != NULL; om = om->next)
2849             {
2850             int n = om->groupnum;
2851             if (n == 0 || n < mrc)
2852               {
2853               int plen = offsets[2*n + 1] - offsets[2*n];
2854               if (plen > 0)
2855                 {
2856                 if (printed && om_separator != NULL)
2857                   fprintf(stdout, "%s", om_separator);
2858                 print_match(ptr + offsets[n*2], plen);
2859                 printed = TRUE;
2860                 }
2861               }
2862             }
2863           if (printed || printname != NULL || number)
2864             fprintf(stdout, STDOUT_NL);
2865           }
2866 
2867         /* Prepare to repeat to find the next match in the line. */
2868 
2869         match = FALSE;
2870         if (line_buffered) fflush(stdout);
2871         rc = 0;                      /* Had some success */
2872 
2873         /* If the pattern contained a lookbehind that included \K, it is
2874         possible that the end of the match might be at or before the actual
2875         starting offset we have just used. In this case, start one character
2876         further on. */
2877 
2878         startoffset = offsets[1];    /* Restart after the match */
2879         oldstartoffset = pcre2_get_startchar(match_data);
2880         if (startoffset <= oldstartoffset)
2881           {
2882           if (startoffset >= length) goto END_ONE_MATCH;  /* Were at end */
2883           startoffset = oldstartoffset + 1;
2884           if (utf) while ((ptr[startoffset] & 0xc0) == 0x80) startoffset++;
2885           }
2886 
2887         /* If the current match ended past the end of the line (only possible
2888         in multiline mode), we must move on to the line in which it did end
2889         before searching for more matches. */
2890 
2891         while (startoffset > linelength)
2892           {
2893           ptr += linelength + endlinelength;
2894           filepos += (int)(linelength + endlinelength);
2895           linenumber++;
2896           startoffset -= (int)(linelength + endlinelength);
2897           t = end_of_line(ptr, endptr, &endlinelength);
2898           linelength = t - ptr - endlinelength;
2899           length = (PCRE2_SIZE)(endptr - ptr);
2900           }
2901 
2902         goto ONLY_MATCHING_RESTART;
2903         }
2904       }
2905 
2906     /* This is the default case when none of the above options is set. We print
2907     the matching lines(s), possibly preceded and/or followed by other lines of
2908     context. */
2909 
2910     else
2911       {
2912       lines_printed = TRUE;
2913 
2914       /* See if there is a requirement to print some "after" lines from a
2915       previous match. We never print any overlaps. */
2916 
2917       if (after_context > 0 && lastmatchnumber > 0)
2918         {
2919         int ellength;
2920         int linecount = 0;
2921         char *p = lastmatchrestart;
2922 
2923         while (p < ptr && linecount < after_context)
2924           {
2925           p = end_of_line(p, ptr, &ellength);
2926           linecount++;
2927           }
2928 
2929         /* It is important to advance lastmatchrestart during this printing so
2930         that it interacts correctly with any "before" printing below. Print
2931         each line's data using fwrite() in case there are binary zeroes. */
2932 
2933         while (lastmatchrestart < p)
2934           {
2935           char *pp = lastmatchrestart;
2936           if (printname != NULL) fprintf(stdout, "%s%c", printname,
2937             printname_hyphen);
2938           if (number) fprintf(stdout, "%lu-", lastmatchnumber++);
2939           pp = end_of_line(pp, endptr, &ellength);
2940           FWRITE_IGNORE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
2941           lastmatchrestart = pp;
2942           }
2943         if (lastmatchrestart != ptr) hyphenpending = TRUE;
2944         }
2945 
2946       /* If there were non-contiguous lines printed above, insert hyphens. */
2947 
2948       if (hyphenpending)
2949         {
2950         fprintf(stdout, "--" STDOUT_NL);
2951         hyphenpending = FALSE;
2952         hyphenprinted = TRUE;
2953         }
2954 
2955       /* See if there is a requirement to print some "before" lines for this
2956       match. Again, don't print overlaps. */
2957 
2958       if (before_context > 0)
2959         {
2960         int linecount = 0;
2961         char *p = ptr;
2962 
2963         while (p > main_buffer &&
2964                (lastmatchnumber == 0 || p > lastmatchrestart) &&
2965                linecount < before_context)
2966           {
2967           linecount++;
2968           p = previous_line(p, main_buffer);
2969           }
2970 
2971         if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
2972           fprintf(stdout, "--" STDOUT_NL);
2973 
2974         while (p < ptr)
2975           {
2976           int ellength;
2977           char *pp = p;
2978           if (printname != NULL) fprintf(stdout, "%s%c", printname,
2979             printname_hyphen);
2980           if (number) fprintf(stdout, "%lu-", linenumber - linecount--);
2981           pp = end_of_line(pp, endptr, &ellength);
2982           FWRITE_IGNORE(p, 1, pp - p, stdout);
2983           p = pp;
2984           }
2985         }
2986 
2987       /* Now print the matching line(s); ensure we set hyphenpending at the end
2988       of the file if any context lines are being output. */
2989 
2990       if (after_context > 0 || before_context > 0)
2991         endhyphenpending = TRUE;
2992 
2993       if (printname != NULL) fprintf(stdout, "%s%c", printname,
2994         printname_colon);
2995       if (number) fprintf(stdout, "%lu:", linenumber);
2996 
2997       /* In multiline mode, or if colouring, we have to split the line(s) up
2998       and search for further matches, but not of course if the line is a
2999       non-match. In multiline mode this is necessary in case there is another
3000       match that spans the end of the current line. When colouring we want to
3001       colour all matches. */
3002 
3003       if ((multiline || do_colour) && !invert)
3004         {
3005         int plength;
3006         PCRE2_SIZE endprevious;
3007 
3008         /* The use of \K may make the end offset earlier than the start. In
3009         this situation, swap them round. */
3010 
3011         if (offsets[0] > offsets[1])
3012           {
3013           PCRE2_SIZE temp = offsets[0];
3014           offsets[0] = offsets[1];
3015           offsets[1] = temp;
3016           }
3017 
3018         FWRITE_IGNORE(ptr, 1, offsets[0], stdout);
3019         print_match(ptr + offsets[0], offsets[1] - offsets[0]);
3020 
3021         for (;;)
3022           {
3023           PCRE2_SIZE oldstartoffset = pcre2_get_startchar(match_data);
3024 
3025           endprevious = offsets[1];
3026           startoffset = endprevious;  /* Advance after previous match. */
3027 
3028           /* If the pattern contained a lookbehind that included \K, it is
3029           possible that the end of the match might be at or before the actual
3030           starting offset we have just used. In this case, start one character
3031           further on. */
3032 
3033           if (startoffset <= oldstartoffset)
3034             {
3035             startoffset = oldstartoffset + 1;
3036             if (utf) while ((ptr[startoffset] & 0xc0) == 0x80) startoffset++;
3037             }
3038 
3039           /* If the current match ended past the end of the line (only possible
3040           in multiline mode), we must move on to the line in which it did end
3041           before searching for more matches. Because the PCRE2_FIRSTLINE option
3042           is set, the start of the match will always be before the first
3043           newline sequence. */
3044 
3045           while (startoffset > linelength + endlinelength)
3046             {
3047             ptr += linelength + endlinelength;
3048             filepos += (int)(linelength + endlinelength);
3049             linenumber++;
3050             startoffset -= (int)(linelength + endlinelength);
3051             endprevious -= (int)(linelength + endlinelength);
3052             t = end_of_line(ptr, endptr, &endlinelength);
3053             linelength = t - ptr - endlinelength;
3054             length = (PCRE2_SIZE)(endptr - ptr);
3055             }
3056 
3057           /* If startoffset is at the exact end of the line it means this
3058           complete line was the final part of the match, so there is nothing
3059           more to do. */
3060 
3061           if (startoffset == linelength + endlinelength) break;
3062 
3063           /* Otherwise, run a match from within the final line, and if found,
3064           loop for any that may follow. */
3065 
3066           if (!match_patterns(ptr, length, options, startoffset, &mrc)) break;
3067 
3068           /* The use of \K may make the end offset earlier than the start. In
3069           this situation, swap them round. */
3070 
3071           if (offsets[0] > offsets[1])
3072             {
3073             PCRE2_SIZE temp = offsets[0];
3074             offsets[0] = offsets[1];
3075             offsets[1] = temp;
3076             }
3077 
3078           FWRITE_IGNORE(ptr + endprevious, 1, offsets[0] - endprevious, stdout);
3079           print_match(ptr + offsets[0], offsets[1] - offsets[0]);
3080           }
3081 
3082         /* In multiline mode, we may have already printed the complete line
3083         and its line-ending characters (if they matched the pattern), so there
3084         may be no more to print. */
3085 
3086         plength = (int)((linelength + endlinelength) - endprevious);
3087         if (plength > 0) FWRITE_IGNORE(ptr + endprevious, 1, plength, stdout);
3088         }
3089 
3090       /* Not colouring or multiline; no need to search for further matches. */
3091 
3092       else FWRITE_IGNORE(ptr, 1, linelength + endlinelength, stdout);
3093       }
3094 
3095     /* End of doing what has to be done for a match. If --line-buffered was
3096     given, flush the output. */
3097 
3098     if (line_buffered) fflush(stdout);
3099     rc = 0;    /* Had some success */
3100 
3101     /* Remember where the last match happened for after_context. We remember
3102     where we are about to restart, and that line's number. */
3103 
3104     lastmatchrestart = ptr + linelength + endlinelength;
3105     lastmatchnumber = linenumber + 1;
3106 
3107     /* If a line was printed and we are now at the end of the file and the last
3108     line had no newline, output one. */
3109 
3110     if (lines_printed && lastmatchrestart >= endptr && endlinelength == 0)
3111       write_final_newline();
3112     }
3113 
3114   /* For a match in multiline inverted mode (which of course did not cause
3115   anything to be printed), we have to move on to the end of the match before
3116   proceeding. */
3117 
3118   if (multiline && invert && match)
3119     {
3120     int ellength;
3121     char *endmatch = ptr + offsets[1];
3122     t = ptr;
3123     while (t < endmatch)
3124       {
3125       t = end_of_line(t, endptr, &ellength);
3126       if (t <= endmatch) linenumber++; else break;
3127       }
3128     endmatch = end_of_line(endmatch, endptr, &ellength);
3129     linelength = endmatch - ptr - ellength;
3130     }
3131 
3132   /* Advance to after the newline and increment the line number. The file
3133   offset to the current line is maintained in filepos. */
3134 
3135   END_ONE_MATCH:
3136   ptr += linelength + endlinelength;
3137   filepos += (int)(linelength + endlinelength);
3138   linenumber++;
3139 
3140   /* If there was at least one match (or a non-match, as required) in the line,
3141   increment the count for the -m option. */
3142 
3143   if (line_matched) count_matched_lines++;
3144 
3145   /* If input is line buffered, and the buffer is not yet full, read another
3146   line and add it into the buffer. */
3147 
3148   if (input_line_buffered && bufflength < (PCRE2_SIZE)bufsize)
3149     {
3150     PCRE2_SIZE add = read_one_line(ptr, bufsize - (ptr - main_buffer), in);
3151     bufflength += add;
3152     endptr += add;
3153     }
3154 
3155   /* If we haven't yet reached the end of the file (the buffer is full), and
3156   the current point is in the top 1/3 of the buffer, slide the buffer down by
3157   1/3 and refill it. Before we do this, if some unprinted "after" lines are
3158   about to be lost, print them. */
3159 
3160   if (bufflength >= (PCRE2_SIZE)bufsize && ptr > main_buffer + 2*bufthird)
3161     {
3162     if (after_context > 0 &&
3163         lastmatchnumber > 0 &&
3164         lastmatchrestart < main_buffer + bufthird)
3165       {
3166       do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
3167       lastmatchnumber = 0;  /* Indicates no after lines pending */
3168       }
3169 
3170     /* Now do the shuffle */
3171 
3172     (void)memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
3173     ptr -= bufthird;
3174 
3175     bufflength = 2*bufthird + fill_buffer(handle, frtype,
3176       main_buffer + 2*bufthird, bufthird, input_line_buffered);
3177     endptr = main_buffer + bufflength;
3178 
3179     /* Adjust any last match point */
3180 
3181     if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
3182     }
3183   }     /* Loop through the whole file */
3184 
3185 /* End of file; print final "after" lines if wanted; do_after_lines sets
3186 hyphenpending if it prints something. */
3187 
3188 if (only_matching_count == 0 && !(count_only|show_total_count))
3189   {
3190   do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
3191   hyphenpending |= endhyphenpending;
3192   }
3193 
3194 /* Print the file name if we are looking for those without matches and there
3195 were none. If we found a match, we won't have got this far. */
3196 
3197 if (filenames == FN_NOMATCH_ONLY)
3198   {
3199   fprintf(stdout, "%s", printname);
3200   if (printname_nl == NULL) fprintf(stdout, "%c", 0);
3201     else fprintf(stdout, "%s", printname_nl);
3202   return 0;
3203   }
3204 
3205 /* Print the match count if wanted */
3206 
3207 if (count_only && !quiet)
3208   {
3209   if (count > 0 || !omit_zero_count)
3210     {
3211     if (printname != NULL && filenames != FN_NONE)
3212       fprintf(stdout, "%s%c", printname, printname_colon);
3213     fprintf(stdout, "%lu" STDOUT_NL, count);
3214     counts_printed++;
3215     }
3216   }
3217 
3218 total_count += count;   /* Can be set without count_only */
3219 return rc;
3220 }
3221 
3222 
3223 
3224 /*************************************************
3225 *     Grep a file or recurse into a directory    *
3226 *************************************************/
3227 
3228 /* Given a path name, if it's a directory, scan all the files if we are
3229 recursing; if it's a file, grep it.
3230 
3231 Arguments:
3232   pathname          the path to investigate
3233   dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
3234   only_one_at_top   TRUE if the path is the only one at toplevel
3235 
3236 Returns:  -1 the file/directory was skipped
3237            0 if there was at least one match
3238            1 if there were no matches
3239            2 there was some kind of error
3240 
3241 However, file opening failures are suppressed if "silent" is set.
3242 */
3243 
3244 static int
grep_or_recurse(char * pathname,BOOL dir_recurse,BOOL only_one_at_top)3245 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
3246 {
3247 int rc = 1;
3248 int frtype;
3249 void *handle;
3250 char *lastcomp;
3251 FILE *in = NULL;           /* Ensure initialized */
3252 
3253 #ifdef SUPPORT_LIBZ
3254 gzFile ingz = NULL;
3255 #endif
3256 
3257 #ifdef SUPPORT_LIBBZ2
3258 BZFILE *inbz2 = NULL;
3259 #endif
3260 
3261 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
3262 int pathlen;
3263 #endif
3264 
3265 #if defined NATIVE_ZOS
3266 int zos_type;
3267 FILE *zos_test_file;
3268 #endif
3269 
3270 /* If the file name is "-" we scan stdin */
3271 
3272 if (strcmp(pathname, "-") == 0)
3273   {
3274   if (count_limit >= 0) setbuf(stdin, NULL);
3275   return pcre2grep(stdin, FR_PLAIN, stdin_name,
3276     (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
3277       stdin_name : NULL);
3278   }
3279 
3280 /* Inclusion and exclusion: --include-dir and --exclude-dir apply only to
3281 directories, whereas --include and --exclude apply to everything else. The test
3282 is against the final component of the path. */
3283 
3284 lastcomp = strrchr(pathname, FILESEP);
3285 lastcomp = (lastcomp == NULL)? pathname : lastcomp + 1;
3286 
3287 /* If the file is a directory, skip if not recursing or if explicitly excluded.
3288 Otherwise, scan the directory and recurse for each path within it. The scanning
3289 code is localized so it can be made system-specific. */
3290 
3291 
3292 /* For z/OS, determine the file type. */
3293 
3294 #if defined NATIVE_ZOS
3295 zos_test_file =  fopen(pathname,"rb");
3296 
3297 if (zos_test_file == NULL)
3298    {
3299    if (!silent) fprintf(stderr, "pcre2grep: failed to test next file %s\n",
3300      pathname, strerror(errno));
3301    return -1;
3302    }
3303 zos_type = identifyzosfiletype (zos_test_file);
3304 fclose (zos_test_file);
3305 
3306 /* Handle a PDS in separate code */
3307 
3308 if (zos_type == __ZOS_PDS || zos_type == __ZOS_PDSE)
3309    {
3310    return travelonpdsdir (pathname, only_one_at_top);
3311    }
3312 
3313 /* Deal with regular files in the normal way below. These types are:
3314    zos_type == __ZOS_PDS_MEMBER
3315    zos_type == __ZOS_PS
3316    zos_type == __ZOS_VSAM_KSDS
3317    zos_type == __ZOS_VSAM_ESDS
3318    zos_type == __ZOS_VSAM_RRDS
3319 */
3320 
3321 /* Handle a z/OS directory using common code. */
3322 
3323 else if (zos_type == __ZOS_HFS)
3324  {
3325 #endif  /* NATIVE_ZOS */
3326 
3327 
3328 /* Handle directories: common code for all OS */
3329 
3330 if (isdirectory(pathname))
3331   {
3332   if (dee_action == dee_SKIP ||
3333       !test_incexc(lastcomp, include_dir_patterns, exclude_dir_patterns))
3334     return -1;
3335 
3336   if (dee_action == dee_RECURSE)
3337     {
3338     char childpath[FNBUFSIZ];
3339     char *nextfile;
3340     directory_type *dir = opendirectory(pathname);
3341 
3342     if (dir == NULL)
3343       {
3344       /* LCOV_EXCL_START - this is a "never" event */
3345       if (!silent)
3346         fprintf(stderr, "pcre2grep: Failed to open directory %s: %s\n", pathname,
3347           strerror(errno));
3348       return 2;
3349       /* LCOV_EXCL_STOP */
3350       }
3351 
3352     while ((nextfile = readdirectory(dir)) != NULL)
3353       {
3354       int frc;
3355       int fnlength = strlen(pathname) + strlen(nextfile) + 2;
3356       if (fnlength > FNBUFSIZ)
3357         {
3358         /* LCOV_EXCL_START - this is a "never" event */
3359         fprintf(stderr, "pcre2grep: recursive filename is too long\n");
3360         rc = 2;
3361         break;
3362         /* LCOV_EXCL_STOP */
3363         }
3364       sprintf(childpath, "%s%c%s", pathname, FILESEP, nextfile);
3365 
3366       /* If the realpath() function is available, we can try to prevent endless
3367       recursion caused by a symlink pointing to a parent directory (GitHub
3368       issue #2 (old Bugzilla #2794). Original patch from Thomas Tempelmann.
3369       Modified to avoid using strlcat() because that isn't a standard C
3370       function, and also modified not to copy back the fully resolved path,
3371       because that affects the output from pcre2grep. */
3372 
3373 #ifdef HAVE_REALPATH
3374       {
3375       char resolvedpath[PATH_MAX];
3376       BOOL isSame;
3377       size_t rlen;
3378       if (realpath(childpath, resolvedpath) == NULL)
3379         /* LCOV_EXCL_START - this is a "never" event */
3380         continue;     /* This path is invalid - we can skip processing this */
3381         /* LCOV_EXCL_STOP */
3382       isSame = strcmp(pathname, resolvedpath) == 0;
3383       if (isSame) continue;    /* We have a recursion */
3384       rlen = strlen(resolvedpath);
3385       if (rlen++ < sizeof(resolvedpath) - 3)
3386         {
3387         BOOL contained;
3388         strcat(resolvedpath, "/");
3389         contained = strncmp(pathname, resolvedpath, rlen) == 0;
3390         if (contained) continue;    /* We have a recursion */
3391         }
3392       }
3393 #endif  /* HAVE_REALPATH */
3394 
3395       frc = grep_or_recurse(childpath, dir_recurse, FALSE);
3396       if (frc > 1) rc = frc;
3397        else if (frc == 0 && rc == 1) rc = 0;
3398       }
3399 
3400     closedirectory(dir);
3401     return rc;
3402     }
3403   }
3404 
3405 #ifdef WIN32
3406 if (iswild(pathname))
3407   {
3408   char buffer[1024];
3409   char *nextfile;
3410   char *name;
3411   directory_type *dir = opendirectory(pathname);
3412 
3413   if (dir == NULL)
3414     return 0;
3415 
3416   for (nextfile = name = pathname; *nextfile != 0; nextfile++)
3417     if (*nextfile == '/' || *nextfile == '\\')
3418       name = nextfile + 1;
3419   *name = 0;
3420 
3421   while ((nextfile = readdirectory(dir)) != NULL)
3422     {
3423     int frc;
3424     sprintf(buffer, "%.512s%.128s", pathname, nextfile);
3425     frc = grep_or_recurse(buffer, dir_recurse, FALSE);
3426     if (frc > 1) rc = frc;
3427      else if (frc == 0 && rc == 1) rc = 0;
3428     }
3429 
3430   closedirectory(dir);
3431   return rc;
3432   }
3433 #endif
3434 
3435 #if defined NATIVE_ZOS
3436  }
3437 #endif
3438 
3439 /* If the file is not a directory, check for a regular file, and if it is not,
3440 skip it if that's been requested. Otherwise, check for an explicit inclusion or
3441 exclusion. */
3442 
3443 else if (
3444 #if defined NATIVE_ZOS
3445         (zos_type == __ZOS_NOFILE && DEE_action == DEE_SKIP) ||
3446 #else  /* all other OS */
3447         (!isregfile(pathname) && DEE_action == DEE_SKIP) ||
3448 #endif
3449         !test_incexc(lastcomp, include_patterns, exclude_patterns))
3450   return -1;  /* File skipped */
3451 
3452 /* Control reaches here if we have a regular file, or if we have a directory
3453 and recursion or skipping was not requested, or if we have anything else and
3454 skipping was not requested. The scan proceeds. If this is the first and only
3455 argument at top level, we don't show the file name, unless we are only showing
3456 the file name, or the filename was forced (-H). */
3457 
3458 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
3459 pathlen = (int)(strlen(pathname));
3460 #endif
3461 
3462 /* Open using zlib if it is supported and the file name ends with .gz. */
3463 
3464 #ifdef SUPPORT_LIBZ
3465 if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
3466   {
3467   ingz = gzopen(pathname, "rb");
3468   if (ingz == NULL)
3469     {
3470     /* LCOV_EXCL_START */
3471     if (!silent)
3472       fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", pathname,
3473         strerror(errno));
3474     return 2;
3475     /* LCOV_EXCL_STOP */
3476     }
3477   handle = (void *)ingz;
3478   frtype = FR_LIBZ;
3479   }
3480 else
3481 #endif
3482 
3483 /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
3484 
3485 #ifdef SUPPORT_LIBBZ2
3486 if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
3487   {
3488   inbz2 = BZ2_bzopen(pathname, "rb");
3489   handle = (void *)inbz2;
3490   frtype = FR_LIBBZ2;
3491   }
3492 else
3493 #endif
3494 
3495 /* Otherwise use plain fopen(). The label is so that we can come back here if
3496 an attempt to read a .bz2 file indicates that it really is a plain file. */
3497 
3498 #ifdef SUPPORT_LIBBZ2
3499 PLAIN_FILE:
3500 #endif
3501   {
3502   in = fopen(pathname, "rb");
3503   handle = (void *)in;
3504   frtype = FR_PLAIN;
3505   }
3506 
3507 /* All the opening methods return errno when they fail. */
3508 
3509 if (handle == NULL)
3510   {
3511   if (!silent)
3512     fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", pathname,
3513       strerror(errno));
3514   return 2;
3515   }
3516 
3517 /* Now grep the file */
3518 
3519 rc = pcre2grep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
3520   (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
3521 
3522 /* Close in an appropriate manner. */
3523 
3524 #ifdef SUPPORT_LIBZ
3525 if (frtype == FR_LIBZ)
3526   gzclose(ingz);
3527 else
3528 #endif
3529 
3530 /* If it is a .bz2 file and the result is 3, it means that the first attempt to
3531 read failed. If the error indicates that the file isn't in fact bzipped, try
3532 again as a normal file. */
3533 
3534 #ifdef SUPPORT_LIBBZ2
3535 if (frtype == FR_LIBBZ2)
3536   {
3537   if (rc == 3)
3538     {
3539     int errnum;
3540     const char *err = BZ2_bzerror(inbz2, &errnum);
3541     if (errnum == BZ_DATA_ERROR_MAGIC)
3542       {
3543       BZ2_bzclose(inbz2);
3544       goto PLAIN_FILE;
3545       }
3546     /* LCOV_EXCL_START */
3547     else if (!silent)
3548       fprintf(stderr, "pcre2grep: Failed to read %s using bzlib: %s\n",
3549         pathname, err);
3550     rc = 2;    /* The normal "something went wrong" code */
3551     /* LCOV_EXCL_STOP */
3552     }
3553   BZ2_bzclose(inbz2);
3554   }
3555 else
3556 #endif
3557 
3558 /* Normal file close */
3559 
3560 fclose(in);
3561 
3562 /* Pass back the yield from pcre2grep(). */
3563 
3564 return rc;
3565 }
3566 
3567 
3568 
3569 /*************************************************
3570 *          Handle a no-data option               *
3571 *************************************************/
3572 
3573 /* This is called when a known option has been identified. */
3574 
3575 static int
handle_option(int letter,int options)3576 handle_option(int letter, int options)
3577 {
3578 switch(letter)
3579   {
3580   case N_FOFFSETS: file_offsets = TRUE; break;
3581   case N_HELP: help(); pcre2grep_exit(0); break; /* Stops compiler warning */
3582   case N_LBUFFER: line_buffered = TRUE; break;
3583   case N_LOFFSETS: line_offsets = number = TRUE; break;
3584   case N_NOJIT: use_jit = FALSE; break;
3585   case N_ALLABSK: extra_options |= PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK; break;
3586   case 'a': binary_files = BIN_TEXT; break;
3587   case 'c': count_only = TRUE; break;
3588   case 'F': options |= PCRE2_LITERAL; break;
3589   case 'H': filenames = FN_FORCE; break;
3590   case 'I': binary_files = BIN_NOMATCH; break;
3591   case 'h': filenames = FN_NONE; break;
3592   case 'i': options |= PCRE2_CASELESS; break;
3593   case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
3594   case 'L': filenames = FN_NOMATCH_ONLY; break;
3595   case 'M': multiline = TRUE; options |= PCRE2_MULTILINE|PCRE2_FIRSTLINE; break;
3596   case 'n': number = TRUE; break;
3597 
3598   case 'o':
3599   only_matching_last = add_number(0, only_matching_last);
3600   if (only_matching == NULL) only_matching = only_matching_last;
3601   break;
3602 
3603   case 'q': quiet = TRUE; break;
3604   case 'r': dee_action = dee_RECURSE; break;
3605   case 's': silent = TRUE; break;
3606   case 't': show_total_count = TRUE; break;
3607   case 'u': options |= PCRE2_UTF; utf = TRUE; break;
3608   case 'U': options |= PCRE2_UTF|PCRE2_MATCH_INVALID_UTF; utf = TRUE; break;
3609   case 'v': invert = TRUE; break;
3610 
3611   case 'V':
3612     {
3613     unsigned char buffer[128];
3614     (void)pcre2_config(PCRE2_CONFIG_VERSION, buffer);
3615     fprintf(stdout, "pcre2grep version %s" STDOUT_NL, buffer);
3616     }
3617   pcre2grep_exit(0);
3618   break;  /* LCOV_EXCL_LINE - statement kept to avoid compiler warning */
3619 
3620   case 'w': extra_options |= PCRE2_EXTRA_MATCH_WORD; break;
3621   case 'x': extra_options |= PCRE2_EXTRA_MATCH_LINE; break;
3622   case 'Z': printname_colon = printname_hyphen = 0; printname_nl = NULL; break;
3623 
3624   /* LCOV_EXCL_START - this is a "never event" */
3625   default:
3626   fprintf(stderr, "pcre2grep: Unknown option -%c\n", letter);
3627   pcre2grep_exit(usage(2));
3628   /* LCOV_EXCL_STOP */
3629   }
3630 
3631 return options;
3632 }
3633 
3634 
3635 
3636 /*************************************************
3637 *          Construct printed ordinal             *
3638 *************************************************/
3639 
3640 /* This turns a number into "1st", "3rd", etc. */
3641 
3642 static char *
ordin(int n)3643 ordin(int n)
3644 {
3645 static char buffer[14];
3646 char *p = buffer;
3647 sprintf(p, "%d", n);
3648 while (*p != 0) p++;
3649 n %= 100;
3650 if (n >= 11 && n <= 13) n = 0;
3651 switch (n%10)
3652   {
3653   case 1: strcpy(p, "st"); break;
3654   case 2: strcpy(p, "nd"); break;
3655   case 3: strcpy(p, "rd"); break;
3656   default: strcpy(p, "th"); break;
3657   }
3658 return buffer;
3659 }
3660 
3661 
3662 
3663 /*************************************************
3664 *          Compile a single pattern              *
3665 *************************************************/
3666 
3667 /* Do nothing if the pattern has already been compiled. This is the case for
3668 include/exclude patterns read from a file.
3669 
3670 When the -F option has been used, each "pattern" may be a list of strings,
3671 separated by line breaks. They will be matched literally. We split such a
3672 string and compile the first substring, inserting an additional block into the
3673 pattern chain.
3674 
3675 Arguments:
3676   p              points to the pattern block
3677   options        the PCRE options
3678   fromfile       TRUE if the pattern was read from a file
3679   fromtext       file name or identifying text (e.g. "include")
3680   count          0 if this is the only command line pattern, or
3681                  number of the command line pattern, or
3682                  linenumber for a pattern from a file
3683 
3684 Returns:         TRUE on success, FALSE after an error
3685 */
3686 
3687 static BOOL
compile_pattern(patstr * p,int options,int fromfile,const char * fromtext,int count)3688 compile_pattern(patstr *p, int options, int fromfile, const char *fromtext,
3689   int count)
3690 {
3691 char *ps;
3692 int errcode;
3693 PCRE2_SIZE patlen, erroffset;
3694 PCRE2_UCHAR errmessbuffer[ERRBUFSIZ];
3695 
3696 if (p->compiled != NULL) return TRUE;
3697 ps = p->string;
3698 patlen = p->length;
3699 
3700 if ((options & PCRE2_LITERAL) != 0)
3701   {
3702   int ellength;
3703   char *eop = ps + patlen;
3704   char *pe = end_of_line(ps, eop, &ellength);
3705 
3706   if (ellength != 0)
3707     {
3708     patlen = pe - ps - ellength;
3709     if (add_pattern(pe, p->length-patlen-ellength, p) == NULL) return FALSE;
3710     }
3711   }
3712 
3713 p->compiled = pcre2_compile((PCRE2_SPTR)ps, patlen, options, &errcode,
3714   &erroffset, compile_context);
3715 
3716 /* Handle successful compile. Try JIT-compiling if supported and enabled. We
3717 ignore any JIT compiler errors, relying falling back to interpreting if
3718 anything goes wrong with JIT. */
3719 
3720 if (p->compiled != NULL)
3721   {
3722 #ifdef SUPPORT_PCRE2GREP_JIT
3723   if (use_jit) (void)pcre2_jit_compile(p->compiled, PCRE2_JIT_COMPLETE);
3724 #endif
3725   return TRUE;
3726   }
3727 
3728 /* Handle compile errors */
3729 
3730 if (erroffset > patlen) erroffset = patlen;
3731 pcre2_get_error_message(errcode, errmessbuffer, sizeof(errmessbuffer));
3732 
3733 if (fromfile)
3734   {
3735   fprintf(stderr, "pcre2grep: Error in regex in line %d of %s "
3736     "at offset %d: %s\n", count, fromtext, (int)erroffset, errmessbuffer);
3737   }
3738 else
3739   {
3740   if (count == 0)
3741     fprintf(stderr, "pcre2grep: Error in %s regex at offset %d: %s\n",
3742       fromtext, (int)erroffset, errmessbuffer);
3743   else
3744     fprintf(stderr, "pcre2grep: Error in %s %s regex at offset %d: %s\n",
3745       ordin(count), fromtext, (int)erroffset, errmessbuffer);
3746   }
3747 
3748 return FALSE;
3749 }
3750 
3751 
3752 
3753 /*************************************************
3754 *     Read and compile a file of patterns        *
3755 *************************************************/
3756 
3757 /* This is used for --filelist, --include-from, and --exclude-from.
3758 
3759 Arguments:
3760   name         the name of the file; "-" is stdin
3761   patptr       pointer to the pattern chain anchor
3762   patlastptr   pointer to the last pattern pointer
3763 
3764 Returns:       TRUE if all went well
3765 */
3766 
3767 static BOOL
read_pattern_file(char * name,patstr ** patptr,patstr ** patlastptr)3768 read_pattern_file(char *name, patstr **patptr, patstr **patlastptr)
3769 {
3770 int linenumber = 0;
3771 PCRE2_SIZE patlen;
3772 FILE *f;
3773 const char *filename;
3774 char buffer[MAXPATLEN+20];
3775 
3776 if (strcmp(name, "-") == 0)
3777   {
3778   f = stdin;
3779   filename = stdin_name;
3780   }
3781 else
3782   {
3783   f = fopen(name, "r");
3784   if (f == NULL)
3785     {
3786     fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", name, strerror(errno));
3787     return FALSE;
3788     }
3789   filename = name;
3790   }
3791 
3792 while ((patlen = read_one_line(buffer, sizeof(buffer), f)) > 0)
3793   {
3794   while (patlen > 0 && isspace((unsigned char)(buffer[patlen-1]))) patlen--;
3795   linenumber++;
3796   if (patlen == 0) continue;   /* Skip blank lines */
3797 
3798   /* Note: this call to add_pattern() puts a pointer to the local variable
3799   "buffer" into the pattern chain. However, that pointer is used only when
3800   compiling the pattern, which happens immediately below, so we flatten it
3801   afterwards, as a precaution against any later code trying to use it. */
3802 
3803   *patlastptr = add_pattern(buffer, patlen, *patlastptr);
3804   if (*patlastptr == NULL)
3805     {
3806     /* LCOV_EXCL_START - won't happen in testing */
3807     if (f != stdin) fclose(f);
3808     return FALSE;
3809     /* LCOV_EXCL_STOP */
3810     }
3811   if (*patptr == NULL) *patptr = *patlastptr;
3812 
3813   /* This loop is needed because compiling a "pattern" when -F is set may add
3814   on additional literal patterns if the original contains a newline. In the
3815   common case, it never will, because read_one_line() stops at a newline.
3816   However, the -N option can be used to give pcre2grep a different newline
3817   setting. */
3818 
3819   for(;;)
3820     {
3821     if (!compile_pattern(*patlastptr, pcre2_options, TRUE, filename,
3822         linenumber))
3823       {
3824       if (f != stdin) fclose(f);
3825       return FALSE;
3826       }
3827     (*patlastptr)->string = NULL;            /* Insurance */
3828     if ((*patlastptr)->next == NULL) break;
3829     *patlastptr = (*patlastptr)->next;
3830     }
3831   }
3832 
3833 if (f != stdin) fclose(f);
3834 return TRUE;
3835 }
3836 
3837 
3838 
3839 /*************************************************
3840 *                Main program                    *
3841 *************************************************/
3842 
3843 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
3844 
3845 int
main(int argc,char ** argv)3846 main(int argc, char **argv)
3847 {
3848 int i, j;
3849 int rc = 1;
3850 BOOL only_one_at_top;
3851 patstr *cp;
3852 fnstr *fn;
3853 omstr *om;
3854 const char *locale_from = "--locale";
3855 
3856 #ifdef SUPPORT_PCRE2GREP_JIT
3857 pcre2_jit_stack *jit_stack = NULL;
3858 #endif
3859 
3860 /* In Windows, stdout is set up as a text stream, which means that \n is
3861 converted to \r\n. This causes output lines that are copied from the input to
3862 change from ....\r\n to ....\r\r\n, which is not right. We therefore ensure
3863 that stdout is a binary stream. Note that this means all other output to stdout
3864 must use STDOUT_NL to terminate lines. */
3865 
3866 #ifdef WIN32
3867 _setmode(_fileno(stdout), _O_BINARY);
3868 #endif
3869 
3870 /* Process the options */
3871 
3872 for (i = 1; i < argc; i++)
3873   {
3874   option_item *op = NULL;
3875   char *option_data = (char *)"";    /* default to keep compiler happy */
3876   BOOL longop;
3877   BOOL longopwasequals = FALSE;
3878 
3879   if (argv[i][0] != '-') break;
3880 
3881   /* If we hit an argument that is just "-", it may be a reference to STDIN,
3882   but only if we have previously had -e or -f to define the patterns. */
3883 
3884   if (argv[i][1] == 0)
3885     {
3886     if (pattern_files != NULL || patterns != NULL) break;
3887       else pcre2grep_exit(usage(2));
3888     }
3889 
3890   /* Handle a long name option, or -- to terminate the options */
3891 
3892   if (argv[i][1] == '-')
3893     {
3894     char *arg = argv[i] + 2;
3895     char *argequals = strchr(arg, '=');
3896 
3897     if (*arg == 0)    /* -- terminates options */
3898       {
3899       i++;
3900       break;                /* out of the options-handling loop */
3901       }
3902 
3903     longop = TRUE;
3904 
3905     /* Some long options have data that follows after =, for example file=name.
3906     Some options have variations in the long name spelling: specifically, we
3907     allow "regexp" because GNU grep allows it, though I personally go along
3908     with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
3909     These options are entered in the table as "regex(p)". Options can be in
3910     both these categories. */
3911 
3912     for (op = optionlist; op->one_char != 0; op++)
3913       {
3914       char *opbra = strchr(op->long_name, '(');
3915       char *equals = strchr(op->long_name, '=');
3916 
3917       /* Handle options with only one spelling of the name */
3918 
3919       if (opbra == NULL)     /* Does not contain '(' */
3920         {
3921         if (equals == NULL)  /* Not thing=data case */
3922           {
3923           if (strcmp(arg, op->long_name) == 0) break;
3924           }
3925         else                 /* Special case xxx=data */
3926           {
3927           int oplen = (int)(equals - op->long_name);
3928           int arglen = (argequals == NULL)?
3929             (int)strlen(arg) : (int)(argequals - arg);
3930           if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
3931             {
3932             option_data = arg + arglen;
3933             if (*option_data == '=')
3934               {
3935               option_data++;
3936               longopwasequals = TRUE;
3937               }
3938             break;
3939             }
3940           }
3941         }
3942 
3943       /* Handle options with an alternate spelling of the name */
3944 
3945       else
3946         {
3947         char buff1[24];
3948         char buff2[24];
3949         int ret;
3950 
3951         int baselen = (int)(opbra - op->long_name);
3952         int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
3953         int arglen = (argequals == NULL || equals == NULL)?
3954           (int)strlen(arg) : (int)(argequals - arg);
3955 
3956         if ((ret = snprintf(buff1, sizeof(buff1), "%.*s", baselen, op->long_name),
3957              ret < 0 || ret > (int)sizeof(buff1)) ||
3958             (ret = snprintf(buff2, sizeof(buff2), "%s%.*s", buff1,
3959                      fulllen - baselen - 2, opbra + 1),
3960              ret < 0 || ret > (int)sizeof(buff2)))
3961           {
3962           /* LCOV_EXCL_START - this is a "never" event */
3963           fprintf(stderr, "pcre2grep: Buffer overflow when parsing %s option\n",
3964             op->long_name);
3965           pcre2grep_exit(2);
3966           /* LCOV_EXCL_STOP */
3967           }
3968 
3969         if (strncmp(arg, buff1, arglen) == 0 ||
3970            strncmp(arg, buff2, arglen) == 0)
3971           {
3972           if (equals != NULL && argequals != NULL)
3973             {
3974             option_data = argequals;
3975             if (*option_data == '=')
3976               {
3977               option_data++;
3978               longopwasequals = TRUE;
3979               }
3980             }
3981           break;
3982           }
3983         }
3984       }
3985 
3986     if (op->one_char == 0)
3987       {
3988       fprintf(stderr, "pcre2grep: Unknown option %s\n", argv[i]);
3989       pcre2grep_exit(usage(2));
3990       }
3991     }
3992 
3993   /* One-char options; many that have no data may be in a single argument; we
3994   continue till we hit the last one or one that needs data. */
3995 
3996   else
3997     {
3998     char *s = argv[i] + 1;
3999     longop = FALSE;
4000 
4001     while (*s != 0)
4002       {
4003       for (op = optionlist; op->one_char != 0; op++)
4004         {
4005         if (*s == op->one_char) break;
4006         }
4007       if (op->one_char == 0)
4008         {
4009         fprintf(stderr, "pcre2grep: Unknown option letter '%c' in \"%s\"\n",
4010           *s, argv[i]);
4011         pcre2grep_exit(usage(2));
4012         }
4013 
4014       option_data = s+1;
4015 
4016       /* Break out if this is the last character in the string; it's handled
4017       below like a single multi-char option. */
4018 
4019       if (*option_data == 0) break;
4020 
4021       /* Check for a single-character option that has data: OP_OP_NUMBER(S)
4022       are used for ones that either have a numerical number or defaults, i.e.
4023       the data is optional. If a digit follows, there is data; if not, carry on
4024       with other single-character options in the same string. */
4025 
4026       if (op->type == OP_OP_NUMBER || op->type == OP_OP_NUMBERS)
4027         {
4028         if (isdigit((unsigned char)s[1])) break;
4029         }
4030       else   /* Check for an option with data */
4031         {
4032         if (op->type != OP_NODATA) break;
4033         }
4034 
4035       /* Handle a single-character option with no data, then loop for the
4036       next character in the string. */
4037 
4038       pcre2_options = handle_option(*s++, pcre2_options);
4039       }
4040     }
4041 
4042   /* At this point we should have op pointing to a matched option. If the type
4043   is NO_DATA, it means that there is no data, and the option might set
4044   something in the PCRE options. */
4045 
4046   if (op->type == OP_NODATA)
4047     {
4048     pcre2_options = handle_option(op->one_char, pcre2_options);
4049     continue;
4050     }
4051 
4052   /* If the option type is OP_OP_STRING or OP_OP_NUMBER(S), it's an option that
4053   either has a value or defaults to something. It cannot have data in a
4054   separate item. At the moment, the only such options are "colo(u)r",
4055   and "only-matching". */
4056 
4057   if (*option_data == 0 &&
4058       (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER ||
4059        op->type == OP_OP_NUMBERS))
4060     {
4061     switch (op->one_char)
4062       {
4063       case N_COLOUR:
4064       colour_option = "auto";
4065       break;
4066 
4067       case 'o':
4068       only_matching_last = add_number(0, only_matching_last);
4069       if (only_matching == NULL) only_matching = only_matching_last;
4070       break;
4071       }
4072     continue;
4073     }
4074 
4075   /* Otherwise, find the data string for the option. */
4076 
4077   if (*option_data == 0)
4078     {
4079     if (i >= argc - 1 || longopwasequals)
4080       {
4081       fprintf(stderr, "pcre2grep: Data missing after %s\n", argv[i]);
4082       pcre2grep_exit(usage(2));
4083       }
4084     option_data = argv[++i];
4085     }
4086 
4087   /* If the option type is OP_OP_NUMBERS, the value is a number that is to be
4088   added to a chain of numbers. */
4089 
4090   if (op->type == OP_OP_NUMBERS)
4091     {
4092     unsigned long int n = decode_number(option_data, op, longop);
4093     omdatastr *omd = (omdatastr *)op->dataptr;
4094     *(omd->lastptr) = add_number((int)n, *(omd->lastptr));
4095     if (*(omd->anchor) == NULL) *(omd->anchor) = *(omd->lastptr);
4096     }
4097 
4098   /* If the option type is OP_PATLIST, it's the -e option, or one of the
4099   include/exclude options, which can be called multiple times to create lists
4100   of patterns. */
4101 
4102   else if (op->type == OP_PATLIST)
4103     {
4104     patdatastr *pd = (patdatastr *)op->dataptr;
4105     *(pd->lastptr) = add_pattern(option_data, (PCRE2_SIZE)strlen(option_data),
4106       *(pd->lastptr));
4107     if (*(pd->lastptr) == NULL) goto EXIT2;
4108     if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr);
4109     }
4110 
4111   /* If the option type is OP_FILELIST, it's one of the options that names a
4112   file. */
4113 
4114   else if (op->type == OP_FILELIST)
4115     {
4116     fndatastr *fd = (fndatastr *)op->dataptr;
4117     fn = (fnstr *)malloc(sizeof(fnstr));
4118     if (fn == NULL)
4119       {
4120       /* LCOV_EXCL_START */
4121       fprintf(stderr, "pcre2grep: malloc failed\n");
4122       goto EXIT2;
4123       /* LCOV_EXCL_STOP */
4124       }
4125     fn->next = NULL;
4126     fn->name = option_data;
4127     if (*(fd->anchor) == NULL)
4128       *(fd->anchor) = fn;
4129     else
4130       (*(fd->lastptr))->next = fn;
4131     *(fd->lastptr) = fn;
4132     }
4133 
4134   /* Handle OP_BINARY_FILES */
4135 
4136   else if (op->type == OP_BINFILES)
4137     {
4138     if (strcmp(option_data, "binary") == 0)
4139       binary_files = BIN_BINARY;
4140     else if (strcmp(option_data, "without-match") == 0)
4141       binary_files = BIN_NOMATCH;
4142     else if (strcmp(option_data, "text") == 0)
4143       binary_files = BIN_TEXT;
4144     else
4145       {
4146       fprintf(stderr, "pcre2grep: unknown value \"%s\" for binary-files\n",
4147         option_data);
4148       pcre2grep_exit(usage(2));
4149       }
4150     }
4151 
4152   /* Otherwise, deal with a single string or numeric data value. */
4153 
4154   else if (op->type != OP_NUMBER && op->type != OP_U32NUMBER &&
4155            op->type != OP_OP_NUMBER && op->type != OP_SIZE)
4156     {
4157     *((char **)op->dataptr) = option_data;
4158     }
4159   else
4160     {
4161     unsigned long int n = decode_number(option_data, op, longop);
4162     if (op->type == OP_U32NUMBER) *((uint32_t *)op->dataptr) = n;
4163       else if (op->type == OP_SIZE) *((PCRE2_SIZE *)op->dataptr) = n;
4164       else *((int *)op->dataptr) = n;
4165     }
4166   }
4167 
4168 /* Options have been decoded. If -C was used, its value is used as a default
4169 for -A and -B. */
4170 
4171 if (both_context > 0)
4172   {
4173   if (after_context == 0) after_context = both_context;
4174   if (before_context == 0) before_context = both_context;
4175   }
4176 
4177 /* Only one of --only-matching, --output, --file-offsets, or --line-offsets is
4178 permitted. They display, each in their own way, only the data that has matched.
4179 */
4180 
4181 only_matching_count = (only_matching != NULL) + (output_text != NULL) +
4182   file_offsets + line_offsets;
4183 
4184 if (only_matching_count > 1)
4185   {
4186   fprintf(stderr, "pcre2grep: Cannot mix --only-matching, --output, "
4187     "--file-offsets and/or --line-offsets\n");
4188   pcre2grep_exit(usage(2));
4189   }
4190 
4191 /* Check that there is a big enough ovector for all -o settings. */
4192 
4193 for (om = only_matching; om != NULL; om = om->next)
4194   {
4195   int n = om->groupnum;
4196   if (n > (int)capture_max)
4197     {
4198     fprintf(stderr, "pcre2grep: Requested group %d cannot be captured.\n", n);
4199     fprintf(stderr, "pcre2grep: Use --om-capture to increase the size of the capture vector.\n");
4200     goto EXIT2;
4201     }
4202   }
4203 
4204 /* Check the text supplied to --output for errors. */
4205 
4206 if (output_text != NULL &&
4207     !syntax_check_output_text((PCRE2_SPTR)output_text, FALSE))
4208   goto EXIT2;
4209 
4210 /* Set up default compile and match contexts and match data blocks. */
4211 
4212 offset_size = capture_max + 1;
4213 compile_context = pcre2_compile_context_create(NULL);
4214 match_context = pcre2_match_context_create(NULL);
4215 match_data_pair[0] = pcre2_match_data_create(offset_size, NULL);
4216 match_data_pair[1] = pcre2_match_data_create(offset_size, NULL);
4217 offsets_pair[0] = pcre2_get_ovector_pointer(match_data_pair[0]);
4218 offsets_pair[1] = pcre2_get_ovector_pointer(match_data_pair[1]);
4219 match_data = match_data_pair[0];
4220 offsets = offsets_pair[0];
4221 match_data_toggle = 0;
4222 
4223 /* If string (script) callouts are supported, set up the callout processing
4224 function. */
4225 
4226 #ifdef SUPPORT_PCRE2GREP_CALLOUT
4227 pcre2_set_callout(match_context, pcre2grep_callout, NULL);
4228 #endif
4229 
4230 /* Put limits into the match data block. */
4231 
4232 if (heap_limit != PCRE2_UNSET) pcre2_set_heap_limit(match_context, heap_limit);
4233 if (match_limit > 0) pcre2_set_match_limit(match_context, match_limit);
4234 if (depth_limit > 0) pcre2_set_depth_limit(match_context, depth_limit);
4235 
4236 /* If a locale has not been provided as an option, see if the LC_CTYPE or
4237 LC_ALL environment variable is set, and if so, use it. */
4238 
4239 if (locale == NULL)
4240   {
4241   locale = getenv("LC_ALL");
4242   locale_from = "LC_ALL";
4243   }
4244 
4245 if (locale == NULL)
4246   {
4247   locale = getenv("LC_CTYPE");
4248   locale_from = "LC_CTYPE";
4249   }
4250 
4251 /* If a locale is set, use it to generate the tables the PCRE needs. Passing
4252 NULL to pcre2_maketables() means that malloc() is used to get the memory. */
4253 
4254 if (locale != NULL)
4255   {
4256   if (setlocale(LC_CTYPE, locale) == NULL)
4257     {
4258     fprintf(stderr, "pcre2grep: Failed to set locale %s (obtained from %s)\n",
4259       locale, locale_from);
4260     goto EXIT2;
4261     }
4262   character_tables = pcre2_maketables(NULL);
4263   pcre2_set_character_tables(compile_context, character_tables);
4264   }
4265 
4266 /* Sort out colouring */
4267 
4268 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
4269   {
4270   if (strcmp(colour_option, "always") == 0)
4271 #ifdef WIN32
4272     do_ansi = !is_stdout_tty(),
4273 #endif
4274     do_colour = TRUE;
4275   else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
4276   else
4277     {
4278     fprintf(stderr, "pcre2grep: Unknown colour setting \"%s\"\n",
4279       colour_option);
4280     goto EXIT2;
4281     }
4282   if (do_colour)
4283     {
4284     char *cs = getenv("PCRE2GREP_COLOUR");
4285     if (cs == NULL) cs = getenv("PCRE2GREP_COLOR");
4286     if (cs == NULL) cs = getenv("PCREGREP_COLOUR");
4287     if (cs == NULL) cs = getenv("PCREGREP_COLOR");
4288     if (cs == NULL) cs = parse_grep_colors(getenv("GREP_COLORS"));
4289     if (cs == NULL) cs = getenv("GREP_COLOR");
4290     if (cs != NULL)
4291       {
4292       if (strspn(cs, ";0123456789") == strlen(cs)) colour_string = cs;
4293       }
4294 #ifdef WIN32
4295     init_colour_output();
4296 #endif
4297     }
4298   }
4299 
4300 /* When colouring or otherwise identifying matching substrings, we need to find
4301 all possible matches when there are multiple patterns. */
4302 
4303 all_matches = do_colour || only_matching_count != 0;
4304 
4305 /* Sort out a newline setting. */
4306 
4307 if (newline_arg != NULL)
4308   {
4309   for (endlinetype = 1; endlinetype < (int)(sizeof(newlines)/sizeof(char *));
4310        endlinetype++)
4311     {
4312     if (strcmpic(newline_arg, newlines[endlinetype]) == 0) break;
4313     }
4314   if (endlinetype < (int)(sizeof(newlines)/sizeof(char *)))
4315     pcre2_set_newline(compile_context, endlinetype);
4316   else
4317     {
4318     fprintf(stderr, "pcre2grep: Invalid newline specifier \"%s\"\n",
4319       newline_arg);
4320     goto EXIT2;
4321     }
4322   }
4323 
4324 /* Find default newline convention */
4325 
4326 else
4327   {
4328   (void)pcre2_config(PCRE2_CONFIG_NEWLINE, &endlinetype);
4329   }
4330 
4331 /* Interpret the text values for -d and -D */
4332 
4333 if (dee_option != NULL)
4334   {
4335   if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
4336   else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
4337   else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
4338   else
4339     {
4340     fprintf(stderr, "pcre2grep: Invalid value \"%s\" for -d\n", dee_option);
4341     goto EXIT2;
4342     }
4343   }
4344 
4345 if (DEE_option != NULL)
4346   {
4347   if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
4348   else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
4349   else
4350     {
4351     fprintf(stderr, "pcre2grep: Invalid value \"%s\" for -D\n", DEE_option);
4352     goto EXIT2;
4353     }
4354   }
4355 
4356 /* Set the extra options */
4357 
4358 (void)pcre2_set_compile_extra_options(compile_context, extra_options);
4359 
4360 /* If use_jit is set, check whether JIT is available. If not, do not try
4361 to use JIT. */
4362 
4363 if (use_jit)
4364   {
4365   uint32_t answer;
4366   (void)pcre2_config(PCRE2_CONFIG_JIT, &answer);
4367   if (!answer) use_jit = FALSE;
4368   }
4369 
4370 /* Get memory for the main buffer. */
4371 
4372 if (bufthird <= 0)
4373   {
4374   fprintf(stderr, "pcre2grep: --buffer-size must be greater than zero\n");
4375   goto EXIT2;
4376   }
4377 
4378 bufsize = 3*bufthird;
4379 main_buffer = (char *)malloc(bufsize);
4380 
4381 if (main_buffer == NULL)
4382   {
4383   /* LCOV_EXCL_START */
4384   fprintf(stderr, "pcre2grep: malloc failed\n");
4385   goto EXIT2;
4386   /* LCOV_EXCL_STOP */
4387   }
4388 
4389 /* If no patterns were provided by -e, and there are no files provided by -f,
4390 the first argument is the one and only pattern, and it must exist. */
4391 
4392 if (patterns == NULL && pattern_files == NULL)
4393   {
4394   if (i >= argc) return usage(2);
4395   patterns = patterns_last = add_pattern(argv[i], (PCRE2_SIZE)strlen(argv[i]),
4396     NULL);
4397   i++;
4398   if (patterns == NULL) goto EXIT2;
4399   }
4400 
4401 /* Compile the patterns that were provided on the command line, either by
4402 multiple uses of -e or as a single unkeyed pattern. We cannot do this until
4403 after all the command-line options are read so that we know which PCRE options
4404 to use. When -F is used, compile_pattern() may add another block into the
4405 chain, so we must not access the next pointer till after the compile. */
4406 
4407 for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
4408   {
4409   if (!compile_pattern(cp, pcre2_options, FALSE, "command-line",
4410        (j == 1 && patterns->next == NULL)? 0 : j))
4411     goto EXIT2;
4412   }
4413 
4414 /* Read and compile the regular expressions that are provided in files. */
4415 
4416 for (fn = pattern_files; fn != NULL; fn = fn->next)
4417   {
4418   if (!read_pattern_file(fn->name, &patterns, &patterns_last)) goto EXIT2;
4419   }
4420 
4421 /* Unless JIT has been explicitly disabled, arrange a stack for it to use. */
4422 
4423 #ifdef SUPPORT_PCRE2GREP_JIT
4424 if (use_jit)
4425   {
4426   jit_stack = pcre2_jit_stack_create(32*1024, 1024*1024, NULL);
4427   if (jit_stack != NULL                        )
4428     pcre2_jit_stack_assign(match_context, NULL, jit_stack);
4429   }
4430 #endif
4431 
4432 /* -F, -w, and -x do not apply to include or exclude patterns, so we must
4433 adjust the options. */
4434 
4435 pcre2_options &= ~PCRE2_LITERAL;
4436 (void)pcre2_set_compile_extra_options(compile_context, 0);
4437 
4438 /* If there are include or exclude patterns read from the command line, compile
4439 them. */
4440 
4441 for (j = 0; j < 4; j++)
4442   {
4443   int k;
4444   for (k = 1, cp = *(incexlist[j]); cp != NULL; k++, cp = cp->next)
4445     {
4446     if (!compile_pattern(cp, pcre2_options, FALSE, incexname[j],
4447          (k == 1 && cp->next == NULL)? 0 : k))
4448       goto EXIT2;
4449     }
4450   }
4451 
4452 /* Read and compile include/exclude patterns from files. */
4453 
4454 for (fn = include_from; fn != NULL; fn = fn->next)
4455   {
4456   if (!read_pattern_file(fn->name, &include_patterns, &include_patterns_last))
4457     goto EXIT2;
4458   }
4459 
4460 for (fn = exclude_from; fn != NULL; fn = fn->next)
4461   {
4462   if (!read_pattern_file(fn->name, &exclude_patterns, &exclude_patterns_last))
4463     goto EXIT2;
4464   }
4465 
4466 /* If there are no files that contain lists of files to search, and there are
4467 no file arguments, search stdin, and then exit. */
4468 
4469 if (file_lists == NULL && i >= argc)
4470   {
4471   /* Using a buffered stdin, that then is seek is not portable,
4472      so attempt to remove the buffer, to workaround reported issues
4473      affecting several BSD and AIX */
4474   if (count_limit >= 0)
4475     setbuf(stdin, NULL);
4476   rc = pcre2grep(stdin, FR_PLAIN, stdin_name,
4477     (filenames > FN_DEFAULT)? stdin_name : NULL);
4478   goto EXIT;
4479   }
4480 
4481 /* If any files that contains a list of files to search have been specified,
4482 read them line by line and search the given files. */
4483 
4484 for (fn = file_lists; fn != NULL; fn = fn->next)
4485   {
4486   char buffer[FNBUFSIZ];
4487   FILE *fl;
4488   if (strcmp(fn->name, "-") == 0) fl = stdin; else
4489     {
4490     fl = fopen(fn->name, "rb");
4491     if (fl == NULL)
4492       {
4493       fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", fn->name,
4494         strerror(errno));
4495       goto EXIT2;
4496       }
4497     }
4498   while (fgets(buffer, sizeof(buffer), fl) != NULL)
4499     {
4500     int frc;
4501     char *end = buffer + (int)strlen(buffer);
4502     while (end > buffer && isspace(end[-1])) end--;
4503     *end = 0;
4504     if (*buffer != 0)
4505       {
4506       frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);
4507       if (frc > 1) rc = frc;
4508         else if (frc == 0 && rc == 1) rc = 0;
4509       }
4510     }
4511   if (fl != stdin) fclose(fl);
4512   }
4513 
4514 /* After handling file-list, work through remaining arguments. Pass in the fact
4515 that there is only one argument at top level - this suppresses the file name if
4516 the argument is not a directory and filenames are not otherwise forced. */
4517 
4518 only_one_at_top = i == argc - 1 && file_lists == NULL;
4519 
4520 for (; i < argc; i++)
4521   {
4522   int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
4523     only_one_at_top);
4524   if (frc > 1) rc = frc;
4525     else if (frc == 0 && rc == 1) rc = 0;
4526   }
4527 
4528 /* Show the total number of matches if requested, but not if only one file's
4529 count was printed. */
4530 
4531 if (show_total_count && counts_printed != 1 && filenames != FN_NOMATCH_ONLY)
4532   {
4533   if (counts_printed != 0 && filenames >= FN_DEFAULT)
4534     fprintf(stdout, "TOTAL:");
4535   fprintf(stdout, "%lu" STDOUT_NL, total_count);
4536   }
4537 
4538 EXIT:
4539 #ifdef SUPPORT_PCRE2GREP_JIT
4540 pcre2_jit_free_unused_memory(NULL);
4541 if (jit_stack != NULL) pcre2_jit_stack_free(jit_stack);
4542 #endif
4543 
4544 free(main_buffer);
4545 if (character_tables != NULL) pcre2_maketables_free(NULL, character_tables);
4546 
4547 pcre2_compile_context_free(compile_context);
4548 pcre2_match_context_free(match_context);
4549 pcre2_match_data_free(match_data_pair[0]);
4550 pcre2_match_data_free(match_data_pair[1]);
4551 
4552 free_pattern_chain(patterns);
4553 free_pattern_chain(include_patterns);
4554 free_pattern_chain(include_dir_patterns);
4555 free_pattern_chain(exclude_patterns);
4556 free_pattern_chain(exclude_dir_patterns);
4557 
4558 free_file_chain(exclude_from);
4559 free_file_chain(include_from);
4560 free_file_chain(pattern_files);
4561 free_file_chain(file_lists);
4562 
4563 while (only_matching != NULL)
4564   {
4565   omstr *this = only_matching;
4566   only_matching = this->next;
4567   free(this);
4568   }
4569 
4570 pcre2grep_exit(rc);
4571 
4572 EXIT2:
4573 rc = 2;
4574 goto EXIT;
4575 }
4576 
4577 /* End of pcre2grep */
4578