• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*************************************************
2 *               pcre2grep program                *
3 *************************************************/
4 
5 /* This is a grep program that uses the 8-bit PCRE regular expression library
6 via the PCRE2 updated API to do its pattern matching. On Unix-like, Windows,
7 and native z/OS systems it can recurse into directories, and in z/OS it can
8 handle PDS files.
9 
10 Note that for native z/OS, in addition to defining the NATIVE_ZOS macro, an
11 additional header is required. That header is not included in the main PCRE2
12 distribution because other apparatus is needed to compile pcre2grep for z/OS.
13 The header can be found in the special z/OS distribution, which is available
14 from www.zaconsultants.net or from www.cbttape.org.
15 
16            Copyright (c) 1997-2020 University of Cambridge
17 
18 -----------------------------------------------------------------------------
19 Redistribution and use in source and binary forms, with or without
20 modification, are permitted provided that the following conditions are met:
21 
22     * Redistributions of source code must retain the above copyright notice,
23       this list of conditions and the following disclaimer.
24 
25     * Redistributions in binary form must reproduce the above copyright
26       notice, this list of conditions and the following disclaimer in the
27       documentation and/or other materials provided with the distribution.
28 
29     * Neither the name of the University of Cambridge nor the names of its
30       contributors may be used to endorse or promote products derived from
31       this software without specific prior written permission.
32 
33 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
34 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
35 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
37 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
38 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
39 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
40 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
41 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
43 POSSIBILITY OF SUCH DAMAGE.
44 -----------------------------------------------------------------------------
45 */
46 
47 #ifdef HAVE_CONFIG_H
48 #include "config.h"
49 #endif
50 
51 #include <ctype.h>
52 #include <locale.h>
53 #include <stdio.h>
54 #include <string.h>
55 #include <stdlib.h>
56 #include <errno.h>
57 
58 #include <sys/types.h>
59 #include <sys/stat.h>
60 
61 #if (defined _WIN32 || (defined HAVE_WINDOWS_H && HAVE_WINDOWS_H)) \
62   && !defined WIN32 && !defined(__CYGWIN__)
63 #define WIN32
64 #endif
65 
66 /* Some CMake's define it still */
67 #if defined(__CYGWIN__) && defined(WIN32)
68 #undef WIN32
69 #endif
70 
71 #ifdef __VMS
72 #include clidef
73 #include descrip
74 #include lib$routines
75 #endif
76 
77 #ifdef WIN32
78 #include <io.h>                /* For _setmode() */
79 #include <fcntl.h>             /* For _O_BINARY */
80 #endif
81 
82 #if defined(SUPPORT_PCRE2GREP_CALLOUT) && defined(SUPPORT_PCRE2GREP_CALLOUT_FORK)
83 #ifdef WIN32
84 #include <process.h>
85 #else
86 #include <sys/wait.h>
87 #endif
88 #endif
89 
90 #ifdef HAVE_UNISTD_H
91 #include <unistd.h>
92 #endif
93 
94 #ifdef SUPPORT_LIBZ
95 #include <zlib.h>
96 #endif
97 
98 #ifdef SUPPORT_LIBBZ2
99 #include <bzlib.h>
100 #endif
101 
102 #define PCRE2_CODE_UNIT_WIDTH 8
103 #include "pcre2.h"
104 
105 /* Older versions of MSVC lack snprintf(). This define allows for
106 warning/error-free compilation and testing with MSVC compilers back to at least
107 MSVC 10/2010. Except for VC6 (which is missing some fundamentals and fails). */
108 
109 #if defined(_MSC_VER) && (_MSC_VER < 1900)
110 #define snprintf _snprintf
111 #endif
112 
113 /* old VC and older compilers don't support %td or %zu, and even some that claim to
114 be C99 don't support it (hence DISABLE_PERCENT_ZT). */
115 
116 #if defined(DISABLE_PERCENT_ZT) || (defined(_MSC_VER) && (_MSC_VER < 1800)) || \
117   (!defined(_MSC_VER) && (!defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L))
118 #ifdef _WIN64
119 #define SIZ_FORM "llu"
120 #else
121 #define SIZ_FORM "lu"
122 #endif
123 #else
124 #define SIZ_FORM "zu"
125 #endif
126 
127 #define FALSE 0
128 #define TRUE 1
129 
130 typedef int BOOL;
131 
132 #define DEFAULT_CAPTURE_MAX 50
133 
134 #if BUFSIZ > 8192
135 #define MAXPATLEN BUFSIZ
136 #else
137 #define MAXPATLEN 8192
138 #endif
139 
140 #define FNBUFSIZ 2048
141 #define ERRBUFSIZ 256
142 
143 /* Values for the "filenames" variable, which specifies options for file name
144 output. The order is important; it is assumed that a file name is wanted for
145 all values greater than FN_DEFAULT. */
146 
147 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
148 
149 /* File reading styles */
150 
151 enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
152 
153 /* Actions for the -d and -D options */
154 
155 enum { dee_READ, dee_SKIP, dee_RECURSE };
156 enum { DEE_READ, DEE_SKIP };
157 
158 /* Actions for special processing options (flag bits) */
159 
160 #define PO_WORD_MATCH     0x0001
161 #define PO_LINE_MATCH     0x0002
162 #define PO_FIXED_STRINGS  0x0004
163 
164 /* Binary file options */
165 
166 enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT };
167 
168 /* Return values from decode_dollar_escape() */
169 
170 enum { DDE_ERROR, DDE_CAPTURE, DDE_CHAR };
171 
172 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
173 environments), a warning is issued if the value of fwrite() is ignored.
174 Unfortunately, casting to (void) does not suppress the warning. To get round
175 this, we use a macro that compiles a fudge. Oddly, this does not also seem to
176 apply to fprintf(). */
177 
178 #define FWRITE_IGNORE(a,b,c,d) if (fwrite(a,b,c,d)) {}
179 
180 /* Under Windows, we have to set stdout to be binary, so that it does not
181 convert \r\n at the ends of output lines to \r\r\n. However, that means that
182 any messages written to stdout must have \r\n as their line terminator. This is
183 handled by using STDOUT_NL as the newline string. We also use a normal double
184 quote for the example, as single quotes aren't usually available. */
185 
186 #ifdef WIN32
187 #define STDOUT_NL     "\r\n"
188 #define STDOUT_NL_LEN  2
189 #define QUOT          "\""
190 #else
191 #define STDOUT_NL      "\n"
192 #define STDOUT_NL_LEN  1
193 #define QUOT           "'"
194 #endif
195 
196 /* This code is returned from decode_dollar_escape() when $n is encountered,
197 and used to mean "output STDOUT_NL". It is, of course, not a valid Unicode code
198 point. */
199 
200 #define STDOUT_NL_CODE 0x7fffffffu
201 
202 
203 
204 /*************************************************
205 *               Global variables                 *
206 *************************************************/
207 
208 /* Jeffrey Friedl has some debugging requirements that are not part of the
209 regular code. */
210 
211 static const char *colour_string = "1;31";
212 static const char *colour_option = NULL;
213 static const char *dee_option = NULL;
214 static const char *DEE_option = NULL;
215 static const char *locale = NULL;
216 static const char *newline_arg = NULL;
217 static const char *om_separator = NULL;
218 static const char *stdin_name = "(standard input)";
219 static const char *output_text = NULL;
220 
221 static char *main_buffer = NULL;
222 
223 static int after_context = 0;
224 static int before_context = 0;
225 static int binary_files = BIN_BINARY;
226 static int both_context = 0;
227 static int bufthird = PCRE2GREP_BUFSIZE;
228 static int max_bufthird = PCRE2GREP_MAX_BUFSIZE;
229 static int bufsize = 3*PCRE2GREP_BUFSIZE;
230 static int endlinetype;
231 
232 static int count_limit = -1;  /* Not long, so that it works with OP_NUMBER */
233 static unsigned long int counts_printed = 0;
234 static unsigned long int total_count = 0;
235 
236 #ifdef WIN32
237 static int dee_action = dee_SKIP;
238 #else
239 static int dee_action = dee_READ;
240 #endif
241 
242 static int DEE_action = DEE_READ;
243 static int error_count = 0;
244 static int filenames = FN_DEFAULT;
245 
246 #ifdef SUPPORT_PCRE2GREP_JIT
247 static BOOL use_jit = TRUE;
248 #else
249 static BOOL use_jit = FALSE;
250 #endif
251 
252 static const uint8_t *character_tables = NULL;
253 
254 static uint32_t pcre2_options = 0;
255 static uint32_t extra_options = 0;
256 static PCRE2_SIZE heap_limit = PCRE2_UNSET;
257 static uint32_t match_limit = 0;
258 static uint32_t depth_limit = 0;
259 
260 static pcre2_compile_context *compile_context;
261 static pcre2_match_context *match_context;
262 static pcre2_match_data *match_data;
263 static PCRE2_SIZE *offsets;
264 static uint32_t offset_size;
265 static uint32_t capture_max = DEFAULT_CAPTURE_MAX;
266 
267 static BOOL count_only = FALSE;
268 static BOOL do_colour = FALSE;
269 #ifdef WIN32
270 static BOOL do_ansi = FALSE;
271 #endif
272 static BOOL file_offsets = FALSE;
273 static BOOL hyphenpending = FALSE;
274 static BOOL invert = FALSE;
275 static BOOL line_buffered = FALSE;
276 static BOOL line_offsets = FALSE;
277 static BOOL multiline = FALSE;
278 static BOOL number = FALSE;
279 static BOOL omit_zero_count = FALSE;
280 static BOOL resource_error = FALSE;
281 static BOOL quiet = FALSE;
282 static BOOL show_total_count = FALSE;
283 static BOOL silent = FALSE;
284 static BOOL utf = FALSE;
285 
286 static uint8_t utf8_buffer[8];
287 
288 
289 /* Structure for list of --only-matching capturing numbers. */
290 
291 typedef struct omstr {
292   struct omstr *next;
293   int groupnum;
294 } omstr;
295 
296 static omstr *only_matching = NULL;
297 static omstr *only_matching_last = NULL;
298 static int only_matching_count;
299 
300 /* Structure for holding the two variables that describe a number chain. */
301 
302 typedef struct omdatastr {
303   omstr **anchor;
304   omstr **lastptr;
305 } omdatastr;
306 
307 static omdatastr only_matching_data = { &only_matching, &only_matching_last };
308 
309 /* Structure for list of file names (for -f and --{in,ex}clude-from) */
310 
311 typedef struct fnstr {
312   struct fnstr *next;
313   char *name;
314 } fnstr;
315 
316 static fnstr *exclude_from = NULL;
317 static fnstr *exclude_from_last = NULL;
318 static fnstr *include_from = NULL;
319 static fnstr *include_from_last = NULL;
320 
321 static fnstr *file_lists = NULL;
322 static fnstr *file_lists_last = NULL;
323 static fnstr *pattern_files = NULL;
324 static fnstr *pattern_files_last = NULL;
325 
326 /* Structure for holding the two variables that describe a file name chain. */
327 
328 typedef struct fndatastr {
329   fnstr **anchor;
330   fnstr **lastptr;
331 } fndatastr;
332 
333 static fndatastr exclude_from_data = { &exclude_from, &exclude_from_last };
334 static fndatastr include_from_data = { &include_from, &include_from_last };
335 static fndatastr file_lists_data = { &file_lists, &file_lists_last };
336 static fndatastr pattern_files_data = { &pattern_files, &pattern_files_last };
337 
338 /* Structure for pattern and its compiled form; used for matching patterns and
339 also for include/exclude patterns. */
340 
341 typedef struct patstr {
342   struct patstr *next;
343   char *string;
344   PCRE2_SIZE length;
345   pcre2_code *compiled;
346 } patstr;
347 
348 static patstr *patterns = NULL;
349 static patstr *patterns_last = NULL;
350 static patstr *include_patterns = NULL;
351 static patstr *include_patterns_last = NULL;
352 static patstr *exclude_patterns = NULL;
353 static patstr *exclude_patterns_last = NULL;
354 static patstr *include_dir_patterns = NULL;
355 static patstr *include_dir_patterns_last = NULL;
356 static patstr *exclude_dir_patterns = NULL;
357 static patstr *exclude_dir_patterns_last = NULL;
358 
359 /* Structure holding the two variables that describe a pattern chain. A pointer
360 to such structures is used for each appropriate option. */
361 
362 typedef struct patdatastr {
363   patstr **anchor;
364   patstr **lastptr;
365 } patdatastr;
366 
367 static patdatastr match_patdata = { &patterns, &patterns_last };
368 static patdatastr include_patdata = { &include_patterns, &include_patterns_last };
369 static patdatastr exclude_patdata = { &exclude_patterns, &exclude_patterns_last };
370 static patdatastr include_dir_patdata = { &include_dir_patterns, &include_dir_patterns_last };
371 static patdatastr exclude_dir_patdata = { &exclude_dir_patterns, &exclude_dir_patterns_last };
372 
373 static patstr **incexlist[4] = { &include_patterns, &exclude_patterns,
374                                  &include_dir_patterns, &exclude_dir_patterns };
375 
376 static const char *incexname[4] = { "--include", "--exclude",
377                                     "--include-dir", "--exclude-dir" };
378 
379 /* Structure for options and list of them */
380 
381 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_U32NUMBER, OP_SIZE,
382        OP_OP_NUMBER, OP_OP_NUMBERS, OP_PATLIST, OP_FILELIST, OP_BINFILES };
383 
384 typedef struct option_item {
385   int type;
386   int one_char;
387   void *dataptr;
388   const char *long_name;
389   const char *help_text;
390 } option_item;
391 
392 /* Options without a single-letter equivalent get a negative value. This can be
393 used to identify them. */
394 
395 #define N_COLOUR       (-1)
396 #define N_EXCLUDE      (-2)
397 #define N_EXCLUDE_DIR  (-3)
398 #define N_HELP         (-4)
399 #define N_INCLUDE      (-5)
400 #define N_INCLUDE_DIR  (-6)
401 #define N_LABEL        (-7)
402 #define N_LOCALE       (-8)
403 #define N_NULL         (-9)
404 #define N_LOFFSETS     (-10)
405 #define N_FOFFSETS     (-11)
406 #define N_LBUFFER      (-12)
407 #define N_H_LIMIT      (-13)
408 #define N_M_LIMIT      (-14)
409 #define N_M_LIMIT_DEP  (-15)
410 #define N_BUFSIZE      (-16)
411 #define N_NOJIT        (-17)
412 #define N_FILE_LIST    (-18)
413 #define N_BINARY_FILES (-19)
414 #define N_EXCLUDE_FROM (-20)
415 #define N_INCLUDE_FROM (-21)
416 #define N_OM_SEPARATOR (-22)
417 #define N_MAX_BUFSIZE  (-23)
418 #define N_OM_CAPTURE   (-24)
419 #define N_ALLABSK      (-25)
420 
421 static option_item optionlist[] = {
422   { OP_NODATA,     N_NULL,   NULL,              "",              "terminate options" },
423   { OP_NODATA,     N_HELP,   NULL,              "help",          "display this help and exit" },
424   { OP_NUMBER,     'A',      &after_context,    "after-context=number", "set number of following context lines" },
425   { OP_NODATA,     'a',      NULL,              "text",          "treat binary files as text" },
426   { OP_NUMBER,     'B',      &before_context,   "before-context=number", "set number of prior context lines" },
427   { OP_BINFILES,   N_BINARY_FILES, NULL,        "binary-files=word", "set treatment of binary files" },
428   { OP_NUMBER,     N_BUFSIZE,&bufthird,         "buffer-size=number", "set processing buffer starting size" },
429   { OP_NUMBER,     N_MAX_BUFSIZE,&max_bufthird, "max-buffer-size=number",  "set processing buffer maximum size" },
430   { OP_OP_STRING,  N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
431   { OP_OP_STRING,  N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
432   { OP_NUMBER,     'C',      &both_context,     "context=number", "set number of context lines, before & after" },
433   { OP_NODATA,     'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
434   { OP_STRING,     'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
435   { OP_STRING,     'd',      &dee_option,       "directories=action", "how to handle directories" },
436   { OP_PATLIST,    'e',      &match_patdata,    "regex(p)=pattern", "specify pattern (may be used more than once)" },
437   { OP_NODATA,     'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
438   { OP_FILELIST,   'f',      &pattern_files_data, "file=path",   "read patterns from file" },
439   { OP_FILELIST,   N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" },
440   { OP_NODATA,     N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
441   { OP_NODATA,     'H',      NULL,              "with-filename", "force the prefixing filename on output" },
442   { OP_NODATA,     'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
443   { OP_NODATA,     'I',      NULL,              "",              "treat binary files as not matching (ignore)" },
444   { OP_NODATA,     'i',      NULL,              "ignore-case",   "ignore case distinctions" },
445   { OP_NODATA,     'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
446   { OP_NODATA,     'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
447   { OP_STRING,     N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
448   { OP_NODATA,     N_LBUFFER, NULL,             "line-buffered", "use line buffering" },
449   { OP_NODATA,     N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
450   { OP_STRING,     N_LOCALE, &locale,           "locale=locale", "use the named locale" },
451   { OP_SIZE,       N_H_LIMIT, &heap_limit,      "heap-limit=number",  "set PCRE2 heap limit option (kibibytes)" },
452   { OP_U32NUMBER,  N_M_LIMIT, &match_limit,     "match-limit=number", "set PCRE2 match limit option" },
453   { OP_U32NUMBER,  N_M_LIMIT_DEP, &depth_limit, "depth-limit=number", "set PCRE2 depth limit option" },
454   { OP_U32NUMBER,  N_M_LIMIT_DEP, &depth_limit, "recursion-limit=number", "obsolete synonym for depth-limit" },
455   { OP_NODATA,     'M',      NULL,              "multiline",     "run in multiline mode" },
456   { OP_NUMBER,     'm',      &count_limit,      "max-count=number", "stop after <number> matched lines" },
457   { OP_STRING,     'N',      &newline_arg,      "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF, ANY, or NUL)" },
458   { OP_NODATA,     'n',      NULL,              "line-number",   "print line number with output lines" },
459 #ifdef SUPPORT_PCRE2GREP_JIT
460   { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "do not use just-in-time compiler optimization" },
461 #else
462   { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "ignored: this pcre2grep does not support JIT" },
463 #endif
464   { OP_STRING,     'O',      &output_text,       "output=text",   "show only this text (possibly expanded)" },
465   { OP_OP_NUMBERS, 'o',      &only_matching_data, "only-matching=n", "show only the part of the line that matched" },
466   { OP_STRING,     N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" },
467   { OP_U32NUMBER,  N_OM_CAPTURE, &capture_max,  "om-capture=n",  "set capture count for --only-matching" },
468   { OP_NODATA,     'q',      NULL,              "quiet",         "suppress output, just set return code" },
469   { OP_NODATA,     'r',      NULL,              "recursive",     "recursively scan sub-directories" },
470   { OP_PATLIST,    N_EXCLUDE,&exclude_patdata,  "exclude=pattern","exclude matching files when recursing" },
471   { OP_PATLIST,    N_INCLUDE,&include_patdata,  "include=pattern","include matching files when recursing" },
472   { OP_PATLIST,    N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude-dir=pattern","exclude matching directories when recursing" },
473   { OP_PATLIST,    N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" },
474   { OP_FILELIST,   N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" },
475   { OP_FILELIST,   N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" },
476   { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
477   { OP_NODATA,    't',      NULL,              "total-count",   "print total count of matching lines" },
478   { OP_NODATA,    'u',      NULL,              "utf",           "use UTF mode" },
479   { OP_NODATA,    'U',      NULL,              "utf-allow-invalid", "use UTF mode, allow for invalid code units" },
480   { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
481   { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
482   { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
483   { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
484   { OP_NODATA,   N_ALLABSK, NULL,              "allow-lookaround-bsk", "allow \\K in lookarounds" },
485   { OP_NODATA,    0,        NULL,               NULL,            NULL }
486 };
487 
488 /* Table of names for newline types. Must be kept in step with the definitions
489 of PCRE2_NEWLINE_xx in pcre2.h. */
490 
491 static const char *newlines[] = {
492   "DEFAULT", "CR", "LF", "CRLF", "ANY", "ANYCRLF", "NUL" };
493 
494 /* UTF-8 tables  */
495 
496 const int utf8_table1[] =
497   { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};
498 const int utf8_table1_size = sizeof(utf8_table1) / sizeof(int);
499 
500 const int utf8_table2[] = { 0,    0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
501 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
502 
503 const char utf8_table4[] = {
504   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
505   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
506   2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
507   3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
508 
509 
510 #if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE)
511 /*************************************************
512 *    Emulated memmove() for systems without it   *
513 *************************************************/
514 
515 /* This function can make use of bcopy() if it is available. Otherwise do it by
516 steam, as there are some non-Unix environments that lack both memmove() and
517 bcopy(). */
518 
519 static void *
emulated_memmove(void * d,const void * s,size_t n)520 emulated_memmove(void *d, const void *s, size_t n)
521 {
522 #ifdef HAVE_BCOPY
523 bcopy(s, d, n);
524 return d;
525 #else
526 size_t i;
527 unsigned char *dest = (unsigned char *)d;
528 const unsigned char *src = (const unsigned char *)s;
529 if (dest > src)
530   {
531   dest += n;
532   src += n;
533   for (i = 0; i < n; ++i) *(--dest) = *(--src);
534   return (void *)dest;
535   }
536 else
537   {
538   for (i = 0; i < n; ++i) *dest++ = *src++;
539   return (void *)(dest - n);
540   }
541 #endif   /* not HAVE_BCOPY */
542 }
543 #undef memmove
544 #define memmove(d,s,n) emulated_memmove(d,s,n)
545 #endif   /* not VPCOMPAT && not HAVE_MEMMOVE */
546 
547 
548 
549 /*************************************************
550 *           Convert code point to UTF-8          *
551 *************************************************/
552 
553 /* A static buffer is used. Returns the number of bytes. */
554 
555 static int
ord2utf8(uint32_t value)556 ord2utf8(uint32_t value)
557 {
558 int i, j;
559 uint8_t *utf8bytes = utf8_buffer;
560 for (i = 0; i < utf8_table1_size; i++)
561   if (value <= (uint32_t)utf8_table1[i]) break;
562 utf8bytes += i;
563 for (j = i; j > 0; j--)
564   {
565   *utf8bytes-- = 0x80 | (value & 0x3f);
566   value >>= 6;
567   }
568 *utf8bytes = utf8_table2[i] | value;
569 return i + 1;
570 }
571 
572 
573 
574 /*************************************************
575 *         Case-independent string compare        *
576 *************************************************/
577 
578 static int
strcmpic(const char * str1,const char * str2)579 strcmpic(const char *str1, const char *str2)
580 {
581 unsigned int c1, c2;
582 while (*str1 != '\0' || *str2 != '\0')
583   {
584   c1 = tolower(*str1++);
585   c2 = tolower(*str2++);
586   if (c1 != c2) return ((c1 > c2) << 1) - 1;
587   }
588 return 0;
589 }
590 
591 
592 /*************************************************
593 *         Parse GREP_COLORS                      *
594 *************************************************/
595 
596 /* Extract ms or mt from GREP_COLORS.
597 
598 Argument:  the string, possibly NULL
599 Returns:   the value of ms or mt, or NULL if neither present
600 */
601 
602 static char *
parse_grep_colors(const char * gc)603 parse_grep_colors(const char *gc)
604 {
605 static char seq[16];
606 char *col;
607 uint32_t len;
608 if (gc == NULL) return NULL;
609 col = strstr(gc, "ms=");
610 if (col == NULL) col = strstr(gc, "mt=");
611 if (col == NULL) return NULL;
612 len = 0;
613 col += 3;
614 while (*col != ':' && *col != 0 && len < sizeof(seq)-1)
615   seq[len++] = *col++;
616 seq[len] = 0;
617 return seq;
618 }
619 
620 
621 /*************************************************
622 *         Exit from the program                  *
623 *************************************************/
624 
625 /* If there has been a resource error, give a suitable message.
626 
627 Argument:  the return code
628 Returns:   does not return
629 */
630 
631 static void
pcre2grep_exit(int rc)632 pcre2grep_exit(int rc)
633 {
634 /* VMS does exit codes differently: both exit(1) and exit(0) return with a
635 status of 1, which is not helpful. To help with this problem, define a symbol
636 (akin to an environment variable) called "PCRE2GREP_RC" and put the exit code
637 therein. */
638 
639 #ifdef __VMS
640   char val_buf[4];
641   $DESCRIPTOR(sym_nam, "PCRE2GREP_RC");
642   $DESCRIPTOR(sym_val, val_buf);
643   sprintf(val_buf, "%d", rc);
644   sym_val.dsc$w_length = strlen(val_buf);
645   lib$set_symbol(&sym_nam, &sym_val);
646 #endif
647 
648 if (resource_error)
649   {
650   fprintf(stderr, "pcre2grep: Error %d, %d, %d or %d means that a resource "
651     "limit was exceeded.\n", PCRE2_ERROR_JIT_STACKLIMIT, PCRE2_ERROR_MATCHLIMIT,
652     PCRE2_ERROR_DEPTHLIMIT, PCRE2_ERROR_HEAPLIMIT);
653   fprintf(stderr, "pcre2grep: Check your regex for nested unlimited loops.\n");
654   }
655 exit(rc);
656 }
657 
658 
659 /*************************************************
660 *          Add item to chain of patterns         *
661 *************************************************/
662 
663 /* Used to add an item onto a chain, or just return an unconnected item if the
664 "after" argument is NULL.
665 
666 Arguments:
667   s          pattern string to add
668   patlen     length of pattern
669   after      if not NULL points to item to insert after
670 
671 Returns:     new pattern block or NULL on error
672 */
673 
674 static patstr *
add_pattern(char * s,PCRE2_SIZE patlen,patstr * after)675 add_pattern(char *s, PCRE2_SIZE patlen, patstr *after)
676 {
677 patstr *p = (patstr *)malloc(sizeof(patstr));
678 if (p == NULL)
679   {
680   fprintf(stderr, "pcre2grep: malloc failed\n");
681   pcre2grep_exit(2);
682   }
683 if (patlen > MAXPATLEN)
684   {
685   fprintf(stderr, "pcre2grep: pattern is too long (limit is %d bytes)\n",
686     MAXPATLEN);
687   free(p);
688   return NULL;
689   }
690 p->next = NULL;
691 p->string = s;
692 p->length = patlen;
693 p->compiled = NULL;
694 
695 if (after != NULL)
696   {
697   p->next = after->next;
698   after->next = p;
699   }
700 return p;
701 }
702 
703 
704 /*************************************************
705 *           Free chain of patterns               *
706 *************************************************/
707 
708 /* Used for several chains of patterns.
709 
710 Argument: pointer to start of chain
711 Returns:  nothing
712 */
713 
714 static void
free_pattern_chain(patstr * pc)715 free_pattern_chain(patstr *pc)
716 {
717 while (pc != NULL)
718   {
719   patstr *p = pc;
720   pc = p->next;
721   if (p->compiled != NULL) pcre2_code_free(p->compiled);
722   free(p);
723   }
724 }
725 
726 
727 /*************************************************
728 *           Free chain of file names             *
729 *************************************************/
730 
731 /*
732 Argument: pointer to start of chain
733 Returns:  nothing
734 */
735 
736 static void
free_file_chain(fnstr * fn)737 free_file_chain(fnstr *fn)
738 {
739 while (fn != NULL)
740   {
741   fnstr *f = fn;
742   fn = f->next;
743   free(f);
744   }
745 }
746 
747 
748 /*************************************************
749 *            OS-specific functions               *
750 *************************************************/
751 
752 /* These definitions are needed in all Windows environments, even those where
753 Unix-style directory scanning can be used (see below). */
754 
755 #ifdef WIN32
756 
757 #ifndef STRICT
758 # define STRICT
759 #endif
760 #ifndef WIN32_LEAN_AND_MEAN
761 # define WIN32_LEAN_AND_MEAN
762 #endif
763 
764 #include <windows.h>
765 
766 #define iswild(name) (strpbrk(name, "*?") != NULL)
767 
768 /* Convert ANSI BGR format to RGB used by Windows */
769 #define BGR_RGB(x) ((x & 1 ? 4 : 0) | (x & 2) | (x & 4 ? 1 : 0))
770 
771 static HANDLE hstdout;
772 static CONSOLE_SCREEN_BUFFER_INFO csbi;
773 static WORD match_colour;
774 
775 static WORD
decode_ANSI_colour(const char * cs)776 decode_ANSI_colour(const char *cs)
777 {
778 WORD result = csbi.wAttributes;
779 while (*cs)
780   {
781   if (isdigit(*cs))
782     {
783     int code = atoi(cs);
784     if (code == 1) result |= 0x08;
785     else if (code == 4) result |= 0x8000;
786     else if (code == 5) result |= 0x80;
787     else if (code >= 30 && code <= 37) result = (result & 0xF8) | BGR_RGB(code - 30);
788     else if (code == 39) result = (result & 0xF0) | (csbi.wAttributes & 0x0F);
789     else if (code >= 40 && code <= 47) result = (result & 0x8F) | (BGR_RGB(code - 40) << 4);
790     else if (code == 49) result = (result & 0x0F) | (csbi.wAttributes & 0xF0);
791     /* aixterm high intensity colour codes */
792     else if (code >= 90 && code <= 97) result = (result & 0xF0) | BGR_RGB(code - 90) | 0x08;
793     else if (code >= 100 && code <= 107) result = (result & 0x0F) | (BGR_RGB(code - 100) << 4) | 0x80;
794 
795     while (isdigit(*cs)) cs++;
796     }
797   if (*cs) cs++;
798   }
799 return result;
800 }
801 
802 
803 static void
init_colour_output()804 init_colour_output()
805 {
806 if (do_colour)
807   {
808   hstdout = GetStdHandle(STD_OUTPUT_HANDLE);
809   /* This fails when redirected to con; try again if so. */
810   if (!GetConsoleScreenBufferInfo(hstdout, &csbi) && !do_ansi)
811     {
812     HANDLE hcon = CreateFile("CONOUT$", GENERIC_READ | GENERIC_WRITE,
813       FILE_SHARE_WRITE, NULL, OPEN_EXISTING, 0, NULL);
814     GetConsoleScreenBufferInfo(hcon, &csbi);
815     CloseHandle(hcon);
816     }
817   match_colour = decode_ANSI_colour(colour_string);
818   /* No valid colour found - turn off colouring */
819   if (!match_colour) do_colour = FALSE;
820   }
821 }
822 
823 #endif  /* WIN32 */
824 
825 
826 /* The following sets of functions are defined so that they can be made system
827 specific. At present there are versions for Unix-style environments, Windows,
828 native z/OS, and "no support". */
829 
830 
831 /************* Directory scanning Unix-style and z/OS ***********/
832 
833 #if (defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H) || defined NATIVE_ZOS
834 #include <sys/types.h>
835 #include <sys/stat.h>
836 #include <dirent.h>
837 
838 #if defined NATIVE_ZOS
839 /************* Directory and PDS/E scanning for z/OS ***********/
840 /************* z/OS looks mostly like Unix with USS ************/
841 /* However, z/OS needs the #include statements in this header */
842 #include "pcrzosfs.h"
843 /* That header is not included in the main PCRE distribution because
844    other apparatus is needed to compile pcre2grep for z/OS. The header
845    can be found in the special z/OS distribution, which is available
846    from www.zaconsultants.net or from www.cbttape.org. */
847 #endif
848 
849 typedef DIR directory_type;
850 #define FILESEP '/'
851 
852 static int
isdirectory(char * filename)853 isdirectory(char *filename)
854 {
855 struct stat statbuf;
856 if (stat(filename, &statbuf) < 0)
857   return 0;        /* In the expectation that opening as a file will fail */
858 return S_ISDIR(statbuf.st_mode);
859 }
860 
861 static directory_type *
opendirectory(char * filename)862 opendirectory(char *filename)
863 {
864 return opendir(filename);
865 }
866 
867 static char *
readdirectory(directory_type * dir)868 readdirectory(directory_type *dir)
869 {
870 for (;;)
871   {
872   struct dirent *dent = readdir(dir);
873   if (dent == NULL) return NULL;
874   if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
875     return dent->d_name;
876   }
877 /* Control never reaches here */
878 }
879 
880 static void
closedirectory(directory_type * dir)881 closedirectory(directory_type *dir)
882 {
883 closedir(dir);
884 }
885 
886 
887 /************* Test for regular file, Unix-style **********/
888 
889 static int
isregfile(char * filename)890 isregfile(char *filename)
891 {
892 struct stat statbuf;
893 if (stat(filename, &statbuf) < 0)
894   return 1;        /* In the expectation that opening as a file will fail */
895 return S_ISREG(statbuf.st_mode);
896 }
897 
898 
899 #if defined NATIVE_ZOS
900 /************* Test for a terminal in z/OS **********/
901 /* isatty() does not work in a TSO environment, so always give FALSE.*/
902 
903 static BOOL
is_stdout_tty(void)904 is_stdout_tty(void)
905 {
906 return FALSE;
907 }
908 
909 static BOOL
is_file_tty(FILE * f)910 is_file_tty(FILE *f)
911 {
912 return FALSE;
913 }
914 
915 
916 /************* Test for a terminal, Unix-style **********/
917 
918 #else
919 static BOOL
is_stdout_tty(void)920 is_stdout_tty(void)
921 {
922 return isatty(fileno(stdout));
923 }
924 
925 static BOOL
is_file_tty(FILE * f)926 is_file_tty(FILE *f)
927 {
928 return isatty(fileno(f));
929 }
930 #endif
931 
932 
933 /************* Print optionally coloured match Unix-style and z/OS **********/
934 
935 static void
print_match(const void * buf,int length)936 print_match(const void *buf, int length)
937 {
938 if (length == 0) return;
939 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
940 FWRITE_IGNORE(buf, 1, length, stdout);
941 if (do_colour) fprintf(stdout, "%c[0m", 0x1b);
942 }
943 
944 /* End of Unix-style or native z/OS environment functions. */
945 
946 
947 /************* Directory scanning in Windows ***********/
948 
949 /* I (Philip Hazel) have no means of testing this code. It was contributed by
950 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
951 when it did not exist. David Byron added a patch that moved the #include of
952 <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
953 */
954 
955 #elif defined WIN32
956 
957 #ifndef INVALID_FILE_ATTRIBUTES
958 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
959 #endif
960 
961 typedef struct directory_type
962 {
963 HANDLE handle;
964 BOOL first;
965 WIN32_FIND_DATA data;
966 } directory_type;
967 
968 #define FILESEP '/'
969 
970 int
isdirectory(char * filename)971 isdirectory(char *filename)
972 {
973 DWORD attr = GetFileAttributes(filename);
974 if (attr == INVALID_FILE_ATTRIBUTES)
975   return 0;
976 return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0;
977 }
978 
979 directory_type *
opendirectory(char * filename)980 opendirectory(char *filename)
981 {
982 size_t len;
983 char *pattern;
984 directory_type *dir;
985 DWORD err;
986 len = strlen(filename);
987 pattern = (char *)malloc(len + 3);
988 dir = (directory_type *)malloc(sizeof(*dir));
989 if ((pattern == NULL) || (dir == NULL))
990   {
991   fprintf(stderr, "pcre2grep: malloc failed\n");
992   pcre2grep_exit(2);
993   }
994 memcpy(pattern, filename, len);
995 if (iswild(filename))
996   pattern[len] = 0;
997 else
998   memcpy(&(pattern[len]), "\\*", 3);
999 dir->handle = FindFirstFile(pattern, &(dir->data));
1000 if (dir->handle != INVALID_HANDLE_VALUE)
1001   {
1002   free(pattern);
1003   dir->first = TRUE;
1004   return dir;
1005   }
1006 err = GetLastError();
1007 free(pattern);
1008 free(dir);
1009 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
1010 return NULL;
1011 }
1012 
1013 char *
readdirectory(directory_type * dir)1014 readdirectory(directory_type *dir)
1015 {
1016 for (;;)
1017   {
1018   if (!dir->first)
1019     {
1020     if (!FindNextFile(dir->handle, &(dir->data)))
1021       return NULL;
1022     }
1023   else
1024     {
1025     dir->first = FALSE;
1026     }
1027   if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
1028     return dir->data.cFileName;
1029   }
1030 #ifndef _MSC_VER
1031 return NULL;   /* Keep compiler happy; never executed */
1032 #endif
1033 }
1034 
1035 void
closedirectory(directory_type * dir)1036 closedirectory(directory_type *dir)
1037 {
1038 FindClose(dir->handle);
1039 free(dir);
1040 }
1041 
1042 
1043 /************* Test for regular file in Windows **********/
1044 
1045 /* I don't know how to do this, or if it can be done; assume all paths are
1046 regular if they are not directories. */
1047 
isregfile(char * filename)1048 int isregfile(char *filename)
1049 {
1050 return !isdirectory(filename);
1051 }
1052 
1053 
1054 /************* Test for a terminal in Windows **********/
1055 
1056 static BOOL
is_stdout_tty(void)1057 is_stdout_tty(void)
1058 {
1059 return _isatty(_fileno(stdout));
1060 }
1061 
1062 static BOOL
is_file_tty(FILE * f)1063 is_file_tty(FILE *f)
1064 {
1065 return _isatty(_fileno(f));
1066 }
1067 
1068 
1069 /************* Print optionally coloured match in Windows **********/
1070 
1071 static void
print_match(const void * buf,int length)1072 print_match(const void *buf, int length)
1073 {
1074 if (length == 0) return;
1075 if (do_colour)
1076   {
1077   if (do_ansi) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1078     else SetConsoleTextAttribute(hstdout, match_colour);
1079   }
1080 FWRITE_IGNORE(buf, 1, length, stdout);
1081 if (do_colour)
1082   {
1083   if (do_ansi) fprintf(stdout, "%c[0m", 0x1b);
1084     else SetConsoleTextAttribute(hstdout, csbi.wAttributes);
1085   }
1086 }
1087 
1088 /* End of Windows functions */
1089 
1090 
1091 /************* Directory scanning when we can't do it ***********/
1092 
1093 /* The type is void, and apart from isdirectory(), the functions do nothing. */
1094 
1095 #else
1096 
1097 #define FILESEP 0
1098 typedef void directory_type;
1099 
isdirectory(char * filename)1100 int isdirectory(char *filename) { return 0; }
opendirectory(char * filename)1101 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
readdirectory(directory_type * dir)1102 char *readdirectory(directory_type *dir) { return (char*)0;}
closedirectory(directory_type * dir)1103 void closedirectory(directory_type *dir) {}
1104 
1105 
1106 /************* Test for regular file when we can't do it **********/
1107 
1108 /* Assume all files are regular. */
1109 
isregfile(char * filename)1110 int isregfile(char *filename) { return 1; }
1111 
1112 
1113 /************* Test for a terminal when we can't do it **********/
1114 
1115 static BOOL
is_stdout_tty(void)1116 is_stdout_tty(void)
1117 {
1118 return FALSE;
1119 }
1120 
1121 static BOOL
is_file_tty(FILE * f)1122 is_file_tty(FILE *f)
1123 {
1124 return FALSE;
1125 }
1126 
1127 
1128 /************* Print optionally coloured match when we can't do it **********/
1129 
1130 static void
print_match(const void * buf,int length)1131 print_match(const void *buf, int length)
1132 {
1133 if (length == 0) return;
1134 FWRITE_IGNORE(buf, 1, length, stdout);
1135 }
1136 
1137 #endif  /* End of system-specific functions */
1138 
1139 
1140 
1141 #ifndef HAVE_STRERROR
1142 /*************************************************
1143 *     Provide strerror() for non-ANSI libraries  *
1144 *************************************************/
1145 
1146 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1147 in their libraries, but can provide the same facility by this simple
1148 alternative function. */
1149 
1150 extern int   sys_nerr;
1151 extern char *sys_errlist[];
1152 
1153 char *
strerror(int n)1154 strerror(int n)
1155 {
1156 if (n < 0 || n >= sys_nerr) return "unknown error number";
1157 return sys_errlist[n];
1158 }
1159 #endif /* HAVE_STRERROR */
1160 
1161 
1162 
1163 /*************************************************
1164 *                Usage function                  *
1165 *************************************************/
1166 
1167 static int
usage(int rc)1168 usage(int rc)
1169 {
1170 option_item *op;
1171 fprintf(stderr, "Usage: pcre2grep [-");
1172 for (op = optionlist; op->one_char != 0; op++)
1173   {
1174   if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1175   }
1176 fprintf(stderr, "] [long options] [pattern] [files]\n");
1177 fprintf(stderr, "Type \"pcre2grep --help\" for more information and the long "
1178   "options.\n");
1179 return rc;
1180 }
1181 
1182 
1183 
1184 /*************************************************
1185 *                Help function                   *
1186 *************************************************/
1187 
1188 static void
help(void)1189 help(void)
1190 {
1191 option_item *op;
1192 
1193 printf("Usage: pcre2grep [OPTION]... [PATTERN] [FILE1 FILE2 ...]" STDOUT_NL);
1194 printf("Search for PATTERN in each FILE or standard input." STDOUT_NL);
1195 printf("PATTERN must be present if neither -e nor -f is used." STDOUT_NL);
1196 
1197 #ifdef SUPPORT_PCRE2GREP_CALLOUT
1198 #ifdef SUPPORT_PCRE2GREP_CALLOUT_FORK
1199 printf("All callout scripts in patterns are supported." STDOUT_NL);
1200 #else
1201 printf("Non-fork callout scripts in patterns are supported." STDOUT_NL);
1202 #endif
1203 #else
1204 printf("Callout scripts are not supported in this pcre2grep." STDOUT_NL);
1205 #endif
1206 
1207 printf("\"-\" can be used as a file name to mean STDIN." STDOUT_NL);
1208 
1209 #ifdef SUPPORT_LIBZ
1210 printf("Files whose names end in .gz are read using zlib." STDOUT_NL);
1211 #endif
1212 
1213 #ifdef SUPPORT_LIBBZ2
1214 printf("Files whose names end in .bz2 are read using bzlib2." STDOUT_NL);
1215 #endif
1216 
1217 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1218 printf("Other files and the standard input are read as plain files." STDOUT_NL STDOUT_NL);
1219 #else
1220 printf("All files are read as plain files, without any interpretation." STDOUT_NL STDOUT_NL);
1221 #endif
1222 
1223 printf("Example: pcre2grep -i " QUOT "hello.*world" QUOT " menu.h main.c" STDOUT_NL STDOUT_NL);
1224 printf("Options:" STDOUT_NL);
1225 
1226 for (op = optionlist; op->one_char != 0; op++)
1227   {
1228   int n;
1229   char s[4];
1230 
1231   if (op->one_char > 0 && (op->long_name)[0] == 0)
1232     n = 31 - printf("  -%c", op->one_char);
1233   else
1234     {
1235     if (op->one_char > 0) sprintf(s, "-%c,", op->one_char);
1236       else strcpy(s, "   ");
1237     n = 31 - printf("  %s --%s", s, op->long_name);
1238     }
1239 
1240   if (n < 1) n = 1;
1241   printf("%.*s%s" STDOUT_NL, n, "                           ", op->help_text);
1242   }
1243 
1244 printf(STDOUT_NL "Numbers may be followed by K or M, e.g. --max-buffer-size=100K." STDOUT_NL);
1245 printf("The default value for --buffer-size is %d." STDOUT_NL, PCRE2GREP_BUFSIZE);
1246 printf("The default value for --max-buffer-size is %d." STDOUT_NL, PCRE2GREP_MAX_BUFSIZE);
1247 printf("When reading patterns or file names from a file, trailing white" STDOUT_NL);
1248 printf("space is removed and blank lines are ignored." STDOUT_NL);
1249 printf("The maximum size of any pattern is %d bytes." STDOUT_NL, MAXPATLEN);
1250 
1251 printf(STDOUT_NL "With no FILEs, read standard input. If fewer than two FILEs given, assume -h." STDOUT_NL);
1252 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble." STDOUT_NL);
1253 }
1254 
1255 
1256 
1257 /*************************************************
1258 *            Test exclude/includes               *
1259 *************************************************/
1260 
1261 /* If any exclude pattern matches, the path is excluded. Otherwise, unless
1262 there are no includes, the path must match an include pattern.
1263 
1264 Arguments:
1265   path      the path to be matched
1266   ip        the chain of include patterns
1267   ep        the chain of exclude patterns
1268 
1269 Returns:    TRUE if the path is not excluded
1270 */
1271 
1272 static BOOL
test_incexc(char * path,patstr * ip,patstr * ep)1273 test_incexc(char *path, patstr *ip, patstr *ep)
1274 {
1275 int plen = strlen((const char *)path);
1276 
1277 for (; ep != NULL; ep = ep->next)
1278   {
1279   if (pcre2_match(ep->compiled, (PCRE2_SPTR)path, plen, 0, 0, match_data, NULL) >= 0)
1280     return FALSE;
1281   }
1282 
1283 if (ip == NULL) return TRUE;
1284 
1285 for (; ip != NULL; ip = ip->next)
1286   {
1287   if (pcre2_match(ip->compiled, (PCRE2_SPTR)path, plen, 0, 0, match_data, NULL) >= 0)
1288     return TRUE;
1289   }
1290 
1291 return FALSE;
1292 }
1293 
1294 
1295 
1296 /*************************************************
1297 *         Decode integer argument value          *
1298 *************************************************/
1299 
1300 /* Integer arguments can be followed by K or M. Avoid the use of strtoul()
1301 because SunOS4 doesn't have it. This is used only for unpicking arguments, so
1302 just keep it simple.
1303 
1304 Arguments:
1305   option_data   the option data string
1306   op            the option item (for error messages)
1307   longop        TRUE if option given in long form
1308 
1309 Returns:        a long integer
1310 */
1311 
1312 static long int
decode_number(char * option_data,option_item * op,BOOL longop)1313 decode_number(char *option_data, option_item *op, BOOL longop)
1314 {
1315 unsigned long int n = 0;
1316 char *endptr = option_data;
1317 while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
1318 while (isdigit((unsigned char)(*endptr)))
1319   n = n * 10 + (int)(*endptr++ - '0');
1320 if (toupper(*endptr) == 'K')
1321   {
1322   n *= 1024;
1323   endptr++;
1324   }
1325 else if (toupper(*endptr) == 'M')
1326   {
1327   n *= 1024*1024;
1328   endptr++;
1329   }
1330 
1331 if (*endptr != 0)   /* Error */
1332   {
1333   if (longop)
1334     {
1335     char *equals = strchr(op->long_name, '=');
1336     int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1337       (int)(equals - op->long_name);
1338     fprintf(stderr, "pcre2grep: Malformed number \"%s\" after --%.*s\n",
1339       option_data, nlen, op->long_name);
1340     }
1341   else
1342     fprintf(stderr, "pcre2grep: Malformed number \"%s\" after -%c\n",
1343       option_data, op->one_char);
1344   pcre2grep_exit(usage(2));
1345   }
1346 
1347 return n;
1348 }
1349 
1350 
1351 
1352 /*************************************************
1353 *       Add item to a chain of numbers           *
1354 *************************************************/
1355 
1356 /* Used to add an item onto a chain, or just return an unconnected item if the
1357 "after" argument is NULL.
1358 
1359 Arguments:
1360   n          the number to add
1361   after      if not NULL points to item to insert after
1362 
1363 Returns:     new number block
1364 */
1365 
1366 static omstr *
add_number(int n,omstr * after)1367 add_number(int n, omstr *after)
1368 {
1369 omstr *om = (omstr *)malloc(sizeof(omstr));
1370 
1371 if (om == NULL)
1372   {
1373   fprintf(stderr, "pcre2grep: malloc failed\n");
1374   pcre2grep_exit(2);
1375   }
1376 om->next = NULL;
1377 om->groupnum = n;
1378 
1379 if (after != NULL)
1380   {
1381   om->next = after->next;
1382   after->next = om;
1383   }
1384 return om;
1385 }
1386 
1387 
1388 
1389 /*************************************************
1390 *            Read one line of input              *
1391 *************************************************/
1392 
1393 /* Normally, input that is to be scanned is read using fread() (or gzread, or
1394 BZ2_read) into a large buffer, so many lines may be read at once. However,
1395 doing this for tty input means that no output appears until a lot of input has
1396 been typed. Instead, tty input is handled line by line. We cannot use fgets()
1397 for this, because it does not stop at a binary zero, and therefore there is no
1398 way of telling how many characters it has read, because there may be binary
1399 zeros embedded in the data. This function is also used for reading patterns
1400 from files (the -f option).
1401 
1402 Arguments:
1403   buffer     the buffer to read into
1404   length     the maximum number of characters to read
1405   f          the file
1406 
1407 Returns:     the number of characters read, zero at end of file
1408 */
1409 
1410 static PCRE2_SIZE
read_one_line(char * buffer,int length,FILE * f)1411 read_one_line(char *buffer, int length, FILE *f)
1412 {
1413 int c;
1414 int yield = 0;
1415 while ((c = fgetc(f)) != EOF)
1416   {
1417   buffer[yield++] = c;
1418   if (c == '\n' || yield >= length) break;
1419   }
1420 return yield;
1421 }
1422 
1423 
1424 
1425 /*************************************************
1426 *             Find end of line                   *
1427 *************************************************/
1428 
1429 /* The length of the endline sequence that is found is set via lenptr. This may
1430 be zero at the very end of the file if there is no line-ending sequence there.
1431 
1432 Arguments:
1433   p         current position in line
1434   endptr    end of available data
1435   lenptr    where to put the length of the eol sequence
1436 
1437 Returns:    pointer after the last byte of the line,
1438             including the newline byte(s)
1439 */
1440 
1441 static char *
end_of_line(char * p,char * endptr,int * lenptr)1442 end_of_line(char *p, char *endptr, int *lenptr)
1443 {
1444 switch(endlinetype)
1445   {
1446   default:      /* Just in case */
1447   case PCRE2_NEWLINE_LF:
1448   while (p < endptr && *p != '\n') p++;
1449   if (p < endptr)
1450     {
1451     *lenptr = 1;
1452     return p + 1;
1453     }
1454   *lenptr = 0;
1455   return endptr;
1456 
1457   case PCRE2_NEWLINE_CR:
1458   while (p < endptr && *p != '\r') p++;
1459   if (p < endptr)
1460     {
1461     *lenptr = 1;
1462     return p + 1;
1463     }
1464   *lenptr = 0;
1465   return endptr;
1466 
1467   case PCRE2_NEWLINE_NUL:
1468   while (p < endptr && *p != '\0') p++;
1469   if (p < endptr)
1470     {
1471     *lenptr = 1;
1472     return p + 1;
1473     }
1474   *lenptr = 0;
1475   return endptr;
1476 
1477   case PCRE2_NEWLINE_CRLF:
1478   for (;;)
1479     {
1480     while (p < endptr && *p != '\r') p++;
1481     if (++p >= endptr)
1482       {
1483       *lenptr = 0;
1484       return endptr;
1485       }
1486     if (*p == '\n')
1487       {
1488       *lenptr = 2;
1489       return p + 1;
1490       }
1491     }
1492   break;
1493 
1494   case PCRE2_NEWLINE_ANYCRLF:
1495   while (p < endptr)
1496     {
1497     int extra = 0;
1498     int c = *((unsigned char *)p);
1499 
1500     if (utf && c >= 0xc0)
1501       {
1502       int gcii, gcss;
1503       extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
1504       gcss = 6*extra;
1505       c = (c & utf8_table3[extra]) << gcss;
1506       for (gcii = 1; gcii <= extra; gcii++)
1507         {
1508         gcss -= 6;
1509         c |= (p[gcii] & 0x3f) << gcss;
1510         }
1511       }
1512 
1513     p += 1 + extra;
1514 
1515     switch (c)
1516       {
1517       case '\n':
1518       *lenptr = 1;
1519       return p;
1520 
1521       case '\r':
1522       if (p < endptr && *p == '\n')
1523         {
1524         *lenptr = 2;
1525         p++;
1526         }
1527       else *lenptr = 1;
1528       return p;
1529 
1530       default:
1531       break;
1532       }
1533     }   /* End of loop for ANYCRLF case */
1534 
1535   *lenptr = 0;  /* Must have hit the end */
1536   return endptr;
1537 
1538   case PCRE2_NEWLINE_ANY:
1539   while (p < endptr)
1540     {
1541     int extra = 0;
1542     int c = *((unsigned char *)p);
1543 
1544     if (utf && c >= 0xc0)
1545       {
1546       int gcii, gcss;
1547       extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
1548       gcss = 6*extra;
1549       c = (c & utf8_table3[extra]) << gcss;
1550       for (gcii = 1; gcii <= extra; gcii++)
1551         {
1552         gcss -= 6;
1553         c |= (p[gcii] & 0x3f) << gcss;
1554         }
1555       }
1556 
1557     p += 1 + extra;
1558 
1559     switch (c)
1560       {
1561       case '\n':    /* LF */
1562       case '\v':    /* VT */
1563       case '\f':    /* FF */
1564       *lenptr = 1;
1565       return p;
1566 
1567       case '\r':    /* CR */
1568       if (p < endptr && *p == '\n')
1569         {
1570         *lenptr = 2;
1571         p++;
1572         }
1573       else *lenptr = 1;
1574       return p;
1575 
1576 #ifndef EBCDIC
1577       case 0x85:    /* Unicode NEL */
1578       *lenptr = utf? 2 : 1;
1579       return p;
1580 
1581       case 0x2028:  /* Unicode LS */
1582       case 0x2029:  /* Unicode PS */
1583       *lenptr = 3;
1584       return p;
1585 #endif  /* Not EBCDIC */
1586 
1587       default:
1588       break;
1589       }
1590     }   /* End of loop for ANY case */
1591 
1592   *lenptr = 0;  /* Must have hit the end */
1593   return endptr;
1594   }     /* End of overall switch */
1595 }
1596 
1597 
1598 
1599 /*************************************************
1600 *         Find start of previous line            *
1601 *************************************************/
1602 
1603 /* This is called when looking back for before lines to print.
1604 
1605 Arguments:
1606   p         start of the subsequent line
1607   startptr  start of available data
1608 
1609 Returns:    pointer to the start of the previous line
1610 */
1611 
1612 static char *
previous_line(char * p,char * startptr)1613 previous_line(char *p, char *startptr)
1614 {
1615 switch(endlinetype)
1616   {
1617   default:      /* Just in case */
1618   case PCRE2_NEWLINE_LF:
1619   p--;
1620   while (p > startptr && p[-1] != '\n') p--;
1621   return p;
1622 
1623   case PCRE2_NEWLINE_CR:
1624   p--;
1625   while (p > startptr && p[-1] != '\n') p--;
1626   return p;
1627 
1628   case PCRE2_NEWLINE_NUL:
1629   p--;
1630   while (p > startptr && p[-1] != '\0') p--;
1631   return p;
1632 
1633   case PCRE2_NEWLINE_CRLF:
1634   for (;;)
1635     {
1636     p -= 2;
1637     while (p > startptr && p[-1] != '\n') p--;
1638     if (p <= startptr + 1 || p[-2] == '\r') return p;
1639     }
1640   /* Control can never get here */
1641 
1642   case PCRE2_NEWLINE_ANY:
1643   case PCRE2_NEWLINE_ANYCRLF:
1644   if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
1645   if (utf) while ((*p & 0xc0) == 0x80) p--;
1646 
1647   while (p > startptr)
1648     {
1649     unsigned int c;
1650     char *pp = p - 1;
1651 
1652     if (utf)
1653       {
1654       int extra = 0;
1655       while ((*pp & 0xc0) == 0x80) pp--;
1656       c = *((unsigned char *)pp);
1657       if (c >= 0xc0)
1658         {
1659         int gcii, gcss;
1660         extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
1661         gcss = 6*extra;
1662         c = (c & utf8_table3[extra]) << gcss;
1663         for (gcii = 1; gcii <= extra; gcii++)
1664           {
1665           gcss -= 6;
1666           c |= (pp[gcii] & 0x3f) << gcss;
1667           }
1668         }
1669       }
1670     else c = *((unsigned char *)pp);
1671 
1672     if (endlinetype == PCRE2_NEWLINE_ANYCRLF) switch (c)
1673       {
1674       case '\n':    /* LF */
1675       case '\r':    /* CR */
1676       return p;
1677 
1678       default:
1679       break;
1680       }
1681 
1682     else switch (c)
1683       {
1684       case '\n':    /* LF */
1685       case '\v':    /* VT */
1686       case '\f':    /* FF */
1687       case '\r':    /* CR */
1688 #ifndef EBCDIC
1689       case 0x85:    /* Unicode NEL */
1690       case 0x2028:  /* Unicode LS */
1691       case 0x2029:  /* Unicode PS */
1692 #endif  /* Not EBCDIC */
1693       return p;
1694 
1695       default:
1696       break;
1697       }
1698 
1699     p = pp;  /* Back one character */
1700     }        /* End of loop for ANY case */
1701 
1702   return startptr;  /* Hit start of data */
1703   }     /* End of overall switch */
1704 }
1705 
1706 
1707 
1708 /*************************************************
1709 *              Output newline at end             *
1710 *************************************************/
1711 
1712 /* This function is called if the final line of a file has been written to
1713 stdout, but it does not have a terminating newline.
1714 
1715 Arguments:  none
1716 Returns:    nothing
1717 */
1718 
1719 static void
write_final_newline(void)1720 write_final_newline(void)
1721 {
1722 switch(endlinetype)
1723   {
1724   default:      /* Just in case */
1725   case PCRE2_NEWLINE_LF:
1726   case PCRE2_NEWLINE_ANY:
1727   case PCRE2_NEWLINE_ANYCRLF:
1728   fprintf(stdout, "\n");
1729   break;
1730 
1731   case PCRE2_NEWLINE_CR:
1732   fprintf(stdout, "\r");
1733   break;
1734 
1735   case PCRE2_NEWLINE_CRLF:
1736   fprintf(stdout, "\r\n");
1737   break;
1738 
1739   case PCRE2_NEWLINE_NUL:
1740   fprintf(stdout, "%c", 0);
1741   break;
1742   }
1743 }
1744 
1745 
1746 /*************************************************
1747 *       Print the previous "after" lines         *
1748 *************************************************/
1749 
1750 /* This is called if we are about to lose said lines because of buffer filling,
1751 and at the end of the file. The data in the line is written using fwrite() so
1752 that a binary zero does not terminate it.
1753 
1754 Arguments:
1755   lastmatchnumber   the number of the last matching line, plus one
1756   lastmatchrestart  where we restarted after the last match
1757   endptr            end of available data
1758   printname         filename for printing
1759 
1760 Returns:            nothing
1761 */
1762 
1763 static void
do_after_lines(unsigned long int lastmatchnumber,char * lastmatchrestart,char * endptr,const char * printname)1764 do_after_lines(unsigned long int lastmatchnumber, char *lastmatchrestart,
1765   char *endptr, const char *printname)
1766 {
1767 if (after_context > 0 && lastmatchnumber > 0)
1768   {
1769   int count = 0;
1770   int ellength = 0;
1771   while (lastmatchrestart < endptr && count < after_context)
1772     {
1773     char *pp = end_of_line(lastmatchrestart, endptr, &ellength);
1774     if (ellength == 0 && pp == main_buffer + bufsize) break;
1775     if (printname != NULL) fprintf(stdout, "%s-", printname);
1776     if (number) fprintf(stdout, "%lu-", lastmatchnumber++);
1777     FWRITE_IGNORE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1778     lastmatchrestart = pp;
1779     count++;
1780     }
1781 
1782   /* If we have printed any lines, arrange for a hyphen separator if anything
1783   else follows. Also, if the last line is the final line in the file and it had
1784   no newline, add one. */
1785 
1786   if (count > 0)
1787     {
1788     hyphenpending = TRUE;
1789     if (ellength == 0 && lastmatchrestart >= endptr)
1790       write_final_newline();
1791     }
1792   }
1793 }
1794 
1795 
1796 
1797 /*************************************************
1798 *   Apply patterns to subject till one matches   *
1799 *************************************************/
1800 
1801 /* This function is called to run through all patterns, looking for a match. It
1802 is used multiple times for the same subject when colouring is enabled, in order
1803 to find all possible matches.
1804 
1805 Arguments:
1806   matchptr     the start of the subject
1807   length       the length of the subject to match
1808   options      options for pcre2_match
1809   startoffset  where to start matching
1810   mrc          address of where to put the result of pcre2_match()
1811 
1812 Returns:      TRUE if there was a match
1813               FALSE if there was no match
1814               invert if there was a non-fatal error
1815 */
1816 
1817 static BOOL
match_patterns(char * matchptr,PCRE2_SIZE length,unsigned int options,PCRE2_SIZE startoffset,int * mrc)1818 match_patterns(char *matchptr, PCRE2_SIZE length, unsigned int options,
1819   PCRE2_SIZE startoffset, int *mrc)
1820 {
1821 int i;
1822 PCRE2_SIZE slen = length;
1823 patstr *p = patterns;
1824 const char *msg = "this text:\n\n";
1825 
1826 if (slen > 200)
1827   {
1828   slen = 200;
1829   msg = "text that starts:\n\n";
1830   }
1831 
1832 for (i = 1; p != NULL; p = p->next, i++)
1833   {
1834   *mrc = pcre2_match(p->compiled, (PCRE2_SPTR)matchptr, (int)length,
1835     startoffset, options, match_data, match_context);
1836   if (*mrc >= 0) return TRUE;
1837   if (*mrc == PCRE2_ERROR_NOMATCH) continue;
1838   fprintf(stderr, "pcre2grep: pcre2_match() gave error %d while matching ", *mrc);
1839   if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i);
1840   fprintf(stderr, "%s", msg);
1841   FWRITE_IGNORE(matchptr, 1, slen, stderr);   /* In case binary zero included */
1842   fprintf(stderr, "\n\n");
1843   if (*mrc <= PCRE2_ERROR_UTF8_ERR1 &&
1844       *mrc >= PCRE2_ERROR_UTF8_ERR21)
1845     {
1846     unsigned char mbuffer[256];
1847     PCRE2_SIZE startchar = pcre2_get_startchar(match_data);
1848     (void)pcre2_get_error_message(*mrc, mbuffer, sizeof(mbuffer));
1849     fprintf(stderr, "%s at offset %" SIZ_FORM "\n\n", mbuffer, startchar);
1850     }
1851   if (*mrc == PCRE2_ERROR_MATCHLIMIT || *mrc == PCRE2_ERROR_DEPTHLIMIT ||
1852       *mrc == PCRE2_ERROR_HEAPLIMIT || *mrc == PCRE2_ERROR_JIT_STACKLIMIT)
1853     resource_error = TRUE;
1854   if (error_count++ > 20)
1855     {
1856     fprintf(stderr, "pcre2grep: Too many errors - abandoned.\n");
1857     pcre2grep_exit(2);
1858     }
1859   return invert;    /* No more matching; don't show the line again */
1860   }
1861 
1862 return FALSE;  /* No match, no errors */
1863 }
1864 
1865 
1866 
1867 /*************************************************
1868 *          Decode dollar escape sequence         *
1869 *************************************************/
1870 
1871 /* Called from various places to decode $ escapes in output strings. The escape
1872 sequences are as follows:
1873 
1874 $<digits> or ${<digits>} returns a capture number. However, if callout is TRUE,
1875 zero is never returned; '0' is substituted.
1876 
1877 $a returns bell.
1878 $b returns backspace.
1879 $e returns escape.
1880 $f returns form feed.
1881 $n returns newline.
1882 $r returns carriage return.
1883 $t returns tab.
1884 $v returns vertical tab.
1885 $o<digits> returns the character represented by the given octal
1886   number; up to three digits are processed.
1887 $o{<digits>} does the same, up to 7 digits, but gives an error for mode-invalid
1888   code points.
1889 $x<digits> returns the character represented by the given hexadecimal
1890   number; up to two digits are processed.
1891 $x{<digits} does the same, up to 6 digits, but gives an error for mode-invalid
1892   code points.
1893 Any other character is substituted by itself. E.g: $$ is replaced by a single
1894 dollar.
1895 
1896 Arguments:
1897   begin      the start of the whole string
1898   string     points to the $
1899   callout    TRUE if in a callout (inhibits error messages)
1900   value      where to return a value
1901   last       where to return pointer to the last used character
1902 
1903 Returns:     DDE_ERROR    after a syntax error
1904              DDE_CAPTURE  if *value is a capture number
1905              DDE_CHAR     if *value is a character code
1906 */
1907 
1908 static int
decode_dollar_escape(PCRE2_SPTR begin,PCRE2_SPTR string,BOOL callout,uint32_t * value,PCRE2_SPTR * last)1909 decode_dollar_escape(PCRE2_SPTR begin, PCRE2_SPTR string, BOOL callout,
1910   uint32_t *value, PCRE2_SPTR *last)
1911 {
1912 uint32_t c = 0;
1913 int base = 10;
1914 int dcount;
1915 int rc = DDE_CHAR;
1916 BOOL brace = FALSE;
1917 
1918 switch (*(++string))
1919   {
1920   case 0:   /* Syntax error: a character must be present after $. */
1921   if (!callout)
1922     fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
1923       (int)(string - begin), "no character after $");
1924   *last = string;
1925   return DDE_ERROR;
1926 
1927   case '{':
1928   brace = TRUE;
1929   string++;
1930   if (!isdigit(*string))  /* Syntax error: a decimal number required. */
1931     {
1932     if (!callout)
1933       fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
1934         (int)(string - begin), "decimal number expected");
1935     rc = DDE_ERROR;
1936     break;
1937     }
1938 
1939   /* Fall through */
1940 
1941   /* The maximum capture number is 65535, so any number greater than that will
1942   always be an unknown capture number. We just stop incrementing, in order to
1943   avoid overflow. */
1944 
1945   case '0': case '1': case '2': case '3': case '4':
1946   case '5': case '6': case '7': case '8': case '9':
1947   do
1948     {
1949     if (c <= 65535) c = c * 10 + (*string - '0');
1950     string++;
1951     }
1952   while (*string >= '0' && *string <= '9');
1953   string--;  /* Point to last digit */
1954 
1955   /* In a callout, capture number 0 is not available. No error can be given,
1956   so just return the character '0'. */
1957 
1958   if (callout && c == 0)
1959     {
1960     *value = '0';
1961     }
1962   else
1963     {
1964     *value = c;
1965     rc = DDE_CAPTURE;
1966     }
1967   break;
1968 
1969   /* Limit octal numbers to 3 digits without braces, or up to 7 with braces,
1970   for valid Unicode code points. */
1971 
1972   case 'o':
1973   base = 8;
1974   string++;
1975   if (*string == '{')
1976     {
1977     brace = TRUE;
1978     string++;
1979     dcount = 7;
1980     }
1981   else dcount = 3;
1982   for (; dcount > 0; dcount--)
1983     {
1984     if (*string < '0' || *string > '7') break;
1985     c = c * 8 + (*string++ - '0');
1986     }
1987   *value = c;
1988   string--;  /* Point to last digit */
1989   break;
1990 
1991   /* Limit hex numbers to 2 digits without braces, or up to 6 with braces,
1992   for valid Unicode code points. */
1993 
1994   case 'x':
1995   base = 16;
1996   string++;
1997   if (*string == '{')
1998     {
1999     brace = TRUE;
2000     string++;
2001     dcount = 6;
2002     }
2003   else dcount = 2;
2004   for (; dcount > 0; dcount--)
2005     {
2006     if (!isxdigit(*string)) break;
2007     if (*string >= '0' && *string <= '9')
2008       c = c *16 + *string++ - '0';
2009     else
2010       c = c * 16 + (*string++ | 0x20) - 'a' + 10;
2011     }
2012   *value = c;
2013   string--;  /* Point to last digit */
2014   break;
2015 
2016   case 'a': *value = '\a'; break;
2017   case 'b': *value = '\b'; break;
2018 #ifndef EBCDIC
2019   case 'e': *value = '\033'; break;
2020 #else
2021   case 'e': *value = '\047'; break;
2022 #endif
2023   case 'f': *value = '\f'; break;
2024   case 'n': *value = STDOUT_NL_CODE; break;
2025   case 'r': *value = '\r'; break;
2026   case 't': *value = '\t'; break;
2027   case 'v': *value = '\v'; break;
2028 
2029   default: *value = *string; break;
2030   }
2031 
2032 if (brace)
2033   {
2034   c = string[1];
2035   if (c != '}')
2036     {
2037     rc = DDE_ERROR;
2038     if (!callout)
2039       {
2040       if ((base == 8 && c >= '0' && c <= '7') ||
2041           (base == 16 && isxdigit(c)))
2042         {
2043         fprintf(stderr, "pcre2grep: Error in output text at offset %d: "
2044           "too many %s digits\n", (int)(string - begin),
2045           (base == 8)? "octal" : "hex");
2046         }
2047       else
2048         {
2049         fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
2050           (int)(string - begin), "missing closing brace");
2051         }
2052       }
2053     }
2054   else string++;
2055   }
2056 
2057 /* Check maximum code point values, but take note of STDOUT_NL_CODE. */
2058 
2059 if (rc == DDE_CHAR && *value != STDOUT_NL_CODE)
2060   {
2061   uint32_t max = utf? 0x0010ffffu : 0xffu;
2062   if (*value > max)
2063     {
2064     if (!callout)
2065       fprintf(stderr, "pcre2grep: Error in output text at offset %d: "
2066         "code point greater than 0x%x is invalid\n", (int)(string - begin), max);
2067     rc = DDE_ERROR;
2068     }
2069   }
2070 
2071 *last = string;
2072 return rc;
2073 }
2074 
2075 
2076 
2077 /*************************************************
2078 *          Check output text for errors          *
2079 *************************************************/
2080 
2081 /* Called early, to get errors before doing anything for -O text; also called
2082 from callouts to check before outputting.
2083 
2084 Arguments:
2085   string    an --output text string
2086   callout   TRUE if in a callout (stops printing errors)
2087 
2088 Returns:    TRUE if OK, FALSE on error
2089 */
2090 
2091 static BOOL
syntax_check_output_text(PCRE2_SPTR string,BOOL callout)2092 syntax_check_output_text(PCRE2_SPTR string, BOOL callout)
2093 {
2094 uint32_t value;
2095 PCRE2_SPTR begin = string;
2096 
2097 for (; *string != 0; string++)
2098   {
2099   if (*string == '$' &&
2100     decode_dollar_escape(begin, string, callout, &value, &string) == DDE_ERROR)
2101       return FALSE;
2102   }
2103 
2104 return TRUE;
2105 }
2106 
2107 
2108 /*************************************************
2109 *              Display output text               *
2110 *************************************************/
2111 
2112 /* Display the output text, which is assumed to have already been syntax
2113 checked. Output may contain escape sequences started by the dollar sign.
2114 
2115 Arguments:
2116   string:       the output text
2117   callout:      TRUE for the builtin callout, FALSE for --output
2118   subject       the start of the subject
2119   ovector:      capture offsets
2120   capture_top:  number of captures
2121 
2122 Returns:        TRUE if something was output, other than newline
2123                 FALSE if nothing was output, or newline was last output
2124 */
2125 
2126 static BOOL
display_output_text(PCRE2_SPTR string,BOOL callout,PCRE2_SPTR subject,PCRE2_SIZE * ovector,PCRE2_SIZE capture_top)2127 display_output_text(PCRE2_SPTR string, BOOL callout, PCRE2_SPTR subject,
2128   PCRE2_SIZE *ovector, PCRE2_SIZE capture_top)
2129 {
2130 uint32_t value;
2131 BOOL printed = FALSE;
2132 PCRE2_SPTR begin = string;
2133 
2134 for (; *string != 0; string++)
2135   {
2136   if (*string == '$')
2137     {
2138     switch(decode_dollar_escape(begin, string, callout, &value, &string))
2139       {
2140       case DDE_CHAR:
2141       if (value == STDOUT_NL_CODE)
2142         {
2143         fprintf(stdout, STDOUT_NL);
2144         printed = FALSE;
2145         continue;
2146         }
2147       break;  /* Will print value */
2148 
2149       case DDE_CAPTURE:
2150       if (value < capture_top)
2151         {
2152         PCRE2_SIZE capturesize;
2153         value *= 2;
2154         capturesize = ovector[value + 1] - ovector[value];
2155         if (capturesize > 0)
2156           {
2157           print_match(subject + ovector[value], capturesize);
2158           printed = TRUE;
2159           }
2160         }
2161       continue;
2162 
2163       default:  /* Should not occur */
2164       break;
2165       }
2166     }
2167 
2168   else value = *string;  /* Not a $ escape */
2169 
2170   if (utf && value <= 127) fprintf(stdout, "%c", *string); else
2171     {
2172     int i;
2173     int n = ord2utf8(value);
2174     for (i = 0; i < n; i++) fputc(utf8_buffer[i], stdout);
2175     }
2176 
2177   printed = TRUE;
2178   }
2179 
2180 return printed;
2181 }
2182 
2183 
2184 #ifdef SUPPORT_PCRE2GREP_CALLOUT
2185 
2186 /*************************************************
2187 *        Parse and execute callout scripts       *
2188 *************************************************/
2189 
2190 /* If SUPPORT_PCRE2GREP_CALLOUT_FORK is defined, this function parses a callout
2191 string block and executes the program specified by the string. The string is a
2192 list of substrings separated by pipe characters. The first substring represents
2193 the executable name, and the following substrings specify the arguments:
2194 
2195   program_name|param1|param2|...
2196 
2197 Any substring (including the program name) can contain escape sequences
2198 started by the dollar character. The escape sequences are substituted as
2199 follows:
2200 
2201   $<digits> or ${<digits>} is replaced by the captured substring of the given
2202   decimal number, which must be greater than zero. If the number is greater
2203   than the number of capturing substrings, or if the capture is unset, the
2204   replacement is empty.
2205 
2206   Any other character is substituted by itself. E.g: $$ is replaced by a single
2207   dollar or $| replaced by a pipe character.
2208 
2209 Alternatively, if string starts with pipe, the remainder is taken as an output
2210 string, same as --output. This is the only form that is supported if
2211 SUPPORT_PCRE2GREP_FORK is not defined. In this case, --om-separator is used to
2212 separate each callout, defaulting to newline.
2213 
2214 Example:
2215 
2216   echo -e "abcde\n12345" | pcre2grep \
2217     '(.)(..(.))(?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4)")()' -
2218 
2219   Output:
2220 
2221     Arg1: [a] [bcd] [d] Arg2: |a| ()
2222     abcde
2223     Arg1: [1] [234] [4] Arg2: |1| ()
2224     12345
2225 
2226 Arguments:
2227   blockptr     the callout block
2228 
2229 Returns:       currently it always returns with 0
2230 */
2231 
2232 static int
pcre2grep_callout(pcre2_callout_block * calloutptr,void * unused)2233 pcre2grep_callout(pcre2_callout_block *calloutptr, void *unused)
2234 {
2235 PCRE2_SIZE length = calloutptr->callout_string_length;
2236 PCRE2_SPTR string = calloutptr->callout_string;
2237 PCRE2_SPTR subject = calloutptr->subject;
2238 PCRE2_SIZE *ovector = calloutptr->offset_vector;
2239 PCRE2_SIZE capture_top = calloutptr->capture_top;
2240 
2241 #ifdef SUPPORT_PCRE2GREP_CALLOUT_FORK
2242 PCRE2_SIZE argsvectorlen = 2;
2243 PCRE2_SIZE argslen = 1;
2244 char *args;
2245 char *argsptr;
2246 char **argsvector;
2247 char **argsvectorptr;
2248 #ifndef WIN32
2249 pid_t pid;
2250 #endif
2251 int result = 0;
2252 #endif  /* SUPPORT_PCRE2GREP_CALLOUT_FORK */
2253 
2254 (void)unused;   /* Avoid compiler warning */
2255 
2256 /* Only callouts with strings are supported. */
2257 
2258 if (string == NULL || length == 0) return 0;
2259 
2260 /* If there's no command, output the remainder directly. */
2261 
2262 if (*string == '|')
2263   {
2264   string++;
2265   if (!syntax_check_output_text(string, TRUE)) return 0;
2266   (void)display_output_text(string, TRUE, subject, ovector, capture_top);
2267   return 0;
2268   }
2269 
2270 #ifndef SUPPORT_PCRE2GREP_CALLOUT_FORK
2271 return 0;
2272 #else
2273 
2274 /* Checking syntax and compute the number of string fragments. Callout strings
2275 are silently ignored in the event of a syntax error. */
2276 
2277 while (length > 0)
2278   {
2279   if (*string == '|')
2280     {
2281     argsvectorlen++;
2282     if (argsvectorlen > 10000) return 0;  /* Too many args */
2283     }
2284 
2285   else if (*string == '$')
2286     {
2287     uint32_t value;
2288     PCRE2_SPTR begin = string;
2289 
2290     switch (decode_dollar_escape(begin, string, TRUE, &value, &string))
2291       {
2292       case DDE_CAPTURE:
2293       if (value < capture_top)
2294         {
2295         value *= 2;
2296         argslen += ovector[value + 1] - ovector[value];
2297         }
2298       argslen--;   /* Negate the effect of argslen++ below. */
2299       break;
2300 
2301       case DDE_CHAR:
2302       if (value == STDOUT_NL_CODE) argslen += STDOUT_NL_LEN - 1;
2303         else if (utf && value > 127) argslen += ord2utf8(value) - 1;
2304       break;
2305 
2306       default:         /* Should not occur */
2307       case DDE_ERROR:
2308       return 0;
2309       }
2310 
2311     length -= (string - begin);
2312     }
2313 
2314   string++;
2315   length--;
2316   argslen++;
2317   }
2318 
2319 /* Get memory for the argument vector and its strings. */
2320 
2321 args = (char*)malloc(argslen);
2322 if (args == NULL) return 0;
2323 
2324 argsvector = (char**)malloc(argsvectorlen * sizeof(char*));
2325 if (argsvector == NULL)
2326   {
2327   free(args);
2328   return 0;
2329   }
2330 
2331 /* Now reprocess the string and set up the arguments. */
2332 
2333 argsptr = args;
2334 argsvectorptr = argsvector;
2335 *argsvectorptr++ = argsptr;
2336 
2337 length = calloutptr->callout_string_length;
2338 string = calloutptr->callout_string;
2339 
2340 while (length > 0)
2341   {
2342   if (*string == '|')
2343     {
2344     *argsptr++ = '\0';
2345     *argsvectorptr++ = argsptr;
2346     }
2347 
2348   else if (*string == '$')
2349     {
2350     uint32_t value;
2351     PCRE2_SPTR begin = string;
2352 
2353     switch (decode_dollar_escape(begin, string, TRUE, &value, &string))
2354       {
2355       case DDE_CAPTURE:
2356       if (value < capture_top)
2357         {
2358         PCRE2_SIZE capturesize;
2359         value *= 2;
2360         capturesize = ovector[value + 1] - ovector[value];
2361         memcpy(argsptr, subject + ovector[value], capturesize);
2362         argsptr += capturesize;
2363         }
2364       break;
2365 
2366       case DDE_CHAR:
2367       if (value == STDOUT_NL_CODE)
2368         {
2369         memcpy(argsptr, STDOUT_NL, STDOUT_NL_LEN);
2370         argsptr += STDOUT_NL_LEN;
2371         }
2372       else if (utf && value > 127)
2373         {
2374         int n = ord2utf8(value);
2375         memcpy(argsptr, utf8_buffer, n);
2376         argsptr += n;
2377         }
2378       else
2379         {
2380         *argsptr++ = value;
2381         }
2382       break;
2383 
2384       default:         /* Even though this should not occur, the string having */
2385       case DDE_ERROR:  /* been checked above, we need to include the free() */
2386       free(args);      /* calls so that source checkers do not complain. */
2387       free(argsvector);
2388       return 0;
2389       }
2390 
2391     length -= (string - begin);
2392     }
2393 
2394   else *argsptr++ = *string;
2395 
2396   /* Advance along the string */
2397 
2398   string++;
2399   length--;
2400   }
2401 
2402 *argsptr++ = '\0';
2403 *argsvectorptr = NULL;
2404 
2405 /* Running an external command is system-dependent. Handle Windows and VMS as
2406 necessary, otherwise assume fork(). */
2407 
2408 #ifdef WIN32
2409 result = _spawnvp(_P_WAIT, argsvector[0], (const char * const *)argsvector);
2410 
2411 #elif defined __VMS
2412   {
2413   char cmdbuf[500];
2414   short i = 0;
2415   int flags = CLI$M_NOCLISYM|CLI$M_NOLOGNAM|CLI$M_NOKEYPAD, status, retstat;
2416   $DESCRIPTOR(cmd, cmdbuf);
2417 
2418   cmdbuf[0] = 0;
2419   while (argsvector[i])
2420   {
2421     strcat(cmdbuf, argsvector[i]);
2422     strcat(cmdbuf, " ");
2423     i++;
2424   }
2425   cmd.dsc$w_length = strlen(cmdbuf) - 1;
2426   status = lib$spawn(&cmd, 0,0, &flags, 0,0, &retstat);
2427   if (!(status & 1)) result = 0;
2428   else result = retstat & 1 ? 0 : 1;
2429   }
2430 
2431 #else  /* Neither Windows nor VMS */
2432 pid = fork();
2433 if (pid == 0)
2434   {
2435   (void)execv(argsvector[0], argsvector);
2436   /* Control gets here if there is an error, e.g. a non-existent program */
2437   exit(1);
2438   }
2439 else if (pid > 0)
2440   (void)waitpid(pid, &result, 0);
2441 #endif  /* End Windows/VMS/other handling */
2442 
2443 free(args);
2444 free(argsvector);
2445 
2446 /* Currently negative return values are not supported, only zero (match
2447 continues) or non-zero (match fails). */
2448 
2449 return result != 0;
2450 #endif  /* SUPPORT_PCRE2GREP_CALLOUT_FORK */
2451 }
2452 #endif  /* SUPPORT_PCRE2GREP_CALLOUT */
2453 
2454 
2455 
2456 /*************************************************
2457 *     Read a portion of the file into buffer     *
2458 *************************************************/
2459 
2460 static int
fill_buffer(void * handle,int frtype,char * buffer,int length,BOOL input_line_buffered)2461 fill_buffer(void *handle, int frtype, char *buffer, int length,
2462   BOOL input_line_buffered)
2463 {
2464 (void)frtype;  /* Avoid warning when not used */
2465 
2466 #ifdef SUPPORT_LIBZ
2467 if (frtype == FR_LIBZ)
2468   return gzread((gzFile)handle, buffer, length);
2469 else
2470 #endif
2471 
2472 #ifdef SUPPORT_LIBBZ2
2473 if (frtype == FR_LIBBZ2)
2474   return BZ2_bzread((BZFILE *)handle, buffer, length);
2475 else
2476 #endif
2477 
2478 return (input_line_buffered ?
2479   read_one_line(buffer, length, (FILE *)handle) :
2480   fread(buffer, 1, length, (FILE *)handle));
2481 }
2482 
2483 
2484 
2485 /*************************************************
2486 *            Grep an individual file             *
2487 *************************************************/
2488 
2489 /* This is called from grep_or_recurse() below. It uses a buffer that is three
2490 times the value of bufthird. The matching point is never allowed to stray into
2491 the top third of the buffer, thus keeping more of the file available for
2492 context printing or for multiline scanning. For large files, the pointer will
2493 be in the middle third most of the time, so the bottom third is available for
2494 "before" context printing.
2495 
2496 Arguments:
2497   handle       the fopened FILE stream for a normal file
2498                the gzFile pointer when reading is via libz
2499                the BZFILE pointer when reading is via libbz2
2500   frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
2501   filename     the file name or NULL (for errors)
2502   printname    the file name if it is to be printed for each match
2503                or NULL if the file name is not to be printed
2504                it cannot be NULL if filenames[_nomatch]_only is set
2505 
2506 Returns:       0 if there was at least one match
2507                1 otherwise (no matches)
2508                2 if an overlong line is encountered
2509                3 if there is a read error on a .bz2 file
2510 */
2511 
2512 static int
pcre2grep(void * handle,int frtype,const char * filename,const char * printname)2513 pcre2grep(void *handle, int frtype, const char *filename, const char *printname)
2514 {
2515 int rc = 1;
2516 int filepos = 0;
2517 unsigned long int linenumber = 1;
2518 unsigned long int lastmatchnumber = 0;
2519 unsigned long int count = 0;
2520 long int count_matched_lines = 0;
2521 char *lastmatchrestart = main_buffer;
2522 char *ptr = main_buffer;
2523 char *endptr;
2524 PCRE2_SIZE bufflength;
2525 BOOL binary = FALSE;
2526 BOOL endhyphenpending = FALSE;
2527 BOOL lines_printed = FALSE;
2528 BOOL input_line_buffered = line_buffered;
2529 FILE *in = NULL;                    /* Ensure initialized */
2530 long stream_start = -1;             /* Only non-negative if relevant */
2531 
2532 /* Do the first read into the start of the buffer and set up the pointer to end
2533 of what we have. In the case of libz, a non-zipped .gz file will be read as a
2534 plain file. However, if a .bz2 file isn't actually bzipped, the first read will
2535 fail. */
2536 
2537 if (frtype != FR_LIBZ && frtype != FR_LIBBZ2)
2538   {
2539   in = (FILE *)handle;
2540   if (feof(in))
2541     return 1;
2542   if (is_file_tty(in))
2543     input_line_buffered = TRUE;
2544   else
2545     {
2546     if (count_limit >= 0  && filename == stdin_name)
2547       stream_start = ftell(in);
2548     }
2549   }
2550 else input_line_buffered = FALSE;
2551 
2552 bufflength = fill_buffer(handle, frtype, main_buffer, bufsize,
2553   input_line_buffered);
2554 
2555 #ifdef SUPPORT_LIBBZ2
2556 if (frtype == FR_LIBBZ2 && (int)bufflength < 0) return 2;   /* Gotcha: bufflength is PCRE2_SIZE */
2557 #endif
2558 
2559 endptr = main_buffer + bufflength;
2560 
2561 /* Unless binary-files=text, see if we have a binary file. This uses the same
2562 rule as GNU grep, namely, a search for a binary zero byte near the start of the
2563 file. However, when the newline convention is binary zero, we can't do this. */
2564 
2565 if (binary_files != BIN_TEXT)
2566   {
2567   if (endlinetype != PCRE2_NEWLINE_NUL)
2568     binary = memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength)
2569       != NULL;
2570   if (binary && binary_files == BIN_NOMATCH) return 1;
2571   }
2572 
2573 /* Loop while the current pointer is not at the end of the file. For large
2574 files, endptr will be at the end of the buffer when we are in the middle of the
2575 file, but ptr will never get there, because as soon as it gets over 2/3 of the
2576 way, the buffer is shifted left and re-filled. */
2577 
2578 while (ptr < endptr)
2579   {
2580   int endlinelength;
2581   int mrc = 0;
2582   unsigned int options = 0;
2583   BOOL match;
2584   BOOL line_matched = FALSE;
2585   char *t = ptr;
2586   PCRE2_SIZE length, linelength;
2587   PCRE2_SIZE startoffset = 0;
2588 
2589   /* If the -m option set a limit for the number of matched or non-matched
2590   lines, check it here. A limit of zero means that no matching is ever done.
2591   For stdin from a file, set the file position. */
2592 
2593   if (count_limit >= 0 && count_matched_lines >= count_limit)
2594     {
2595     if (stream_start >= 0)
2596       (void)fseek(handle, stream_start + (long int)filepos, SEEK_SET);
2597     rc = (count_limit == 0)? 1 : 0;
2598     break;
2599     }
2600 
2601   /* At this point, ptr is at the start of a line. We need to find the length
2602   of the subject string to pass to pcre2_match(). In multiline mode, it is the
2603   length remainder of the data in the buffer. Otherwise, it is the length of
2604   the next line, excluding the terminating newline. After matching, we always
2605   advance by the length of the next line. In multiline mode the PCRE2_FIRSTLINE
2606   option is used for compiling, so that any match is constrained to be in the
2607   first line. */
2608 
2609   t = end_of_line(t, endptr, &endlinelength);
2610   linelength = t - ptr - endlinelength;
2611   length = multiline? (PCRE2_SIZE)(endptr - ptr) : linelength;
2612 
2613   /* Check to see if the line we are looking at extends right to the very end
2614   of the buffer without a line terminator. This means the line is too long to
2615   handle at the current buffer size. Until the buffer reaches its maximum size,
2616   try doubling it and reading more data. */
2617 
2618   if (endlinelength == 0 && t == main_buffer + bufsize)
2619     {
2620     if (bufthird < max_bufthird)
2621       {
2622       char *new_buffer;
2623       int new_bufthird = 2*bufthird;
2624 
2625       if (new_bufthird > max_bufthird) new_bufthird = max_bufthird;
2626       new_buffer = (char *)malloc(3*new_bufthird);
2627 
2628       if (new_buffer == NULL)
2629         {
2630         fprintf(stderr,
2631           "pcre2grep: line %lu%s%s is too long for the internal buffer\n"
2632           "pcre2grep: not enough memory to increase the buffer size to %d\n",
2633           linenumber,
2634           (filename == NULL)? "" : " of file ",
2635           (filename == NULL)? "" : filename,
2636           new_bufthird);
2637         return 2;
2638         }
2639 
2640       /* Copy the data and adjust pointers to the new buffer location. */
2641 
2642       memcpy(new_buffer, main_buffer, bufsize);
2643       bufthird = new_bufthird;
2644       bufsize = 3*bufthird;
2645       ptr = new_buffer + (ptr - main_buffer);
2646       lastmatchrestart = new_buffer + (lastmatchrestart - main_buffer);
2647       free(main_buffer);
2648       main_buffer = new_buffer;
2649 
2650       /* Read more data into the buffer and then try to find the line ending
2651       again. */
2652 
2653       bufflength += fill_buffer(handle, frtype, main_buffer + bufflength,
2654         bufsize - bufflength, input_line_buffered);
2655       endptr = main_buffer + bufflength;
2656       continue;
2657       }
2658     else
2659       {
2660       fprintf(stderr,
2661         "pcre2grep: line %lu%s%s is too long for the internal buffer\n"
2662         "pcre2grep: the maximum buffer size is %d\n"
2663         "pcre2grep: use the --max-buffer-size option to change it\n",
2664         linenumber,
2665         (filename == NULL)? "" : " of file ",
2666         (filename == NULL)? "" : filename,
2667         bufthird);
2668       return 2;
2669       }
2670     }
2671 
2672   /* We come back here after a match when only_matching_count is non-zero, in
2673   order to find any further matches in the same line. This applies to
2674   --only-matching, --file-offsets, and --line-offsets. */
2675 
2676   ONLY_MATCHING_RESTART:
2677 
2678   /* Run through all the patterns until one matches or there is an error other
2679   than NOMATCH. This code is in a subroutine so that it can be re-used for
2680   finding subsequent matches when colouring matched lines. After finding one
2681   match, set PCRE2_NOTEMPTY to disable any further matches of null strings in
2682   this line. */
2683 
2684   match = match_patterns(ptr, length, options, startoffset, &mrc);
2685   options = PCRE2_NOTEMPTY;
2686 
2687   /* If it's a match or a not-match (as required), do what's wanted. NOTE: Use
2688   only FWRITE_IGNORE() - which is just a packaged fwrite() that ignores its
2689   return code - to output data lines, so that binary zeroes are treated as just
2690   another data character. */
2691 
2692   if (match != invert)
2693     {
2694     BOOL hyphenprinted = FALSE;
2695 
2696     /* We've failed if we want a file that doesn't have any matches. */
2697 
2698     if (filenames == FN_NOMATCH_ONLY) return 1;
2699 
2700     /* Remember that this line matched (for counting matched lines) */
2701 
2702     line_matched = TRUE;
2703 
2704     /* If all we want is a yes/no answer, we can return immediately. */
2705 
2706     if (quiet) return 0;
2707 
2708     /* Just count if just counting is wanted. */
2709 
2710     else if (count_only || show_total_count) count++;
2711 
2712     /* When handling a binary file and binary-files==binary, the "binary"
2713     variable will be set true (it's false in all other cases). In this
2714     situation we just want to output the file name. No need to scan further. */
2715 
2716     else if (binary)
2717       {
2718       fprintf(stdout, "Binary file %s matches" STDOUT_NL, filename);
2719       return 0;
2720       }
2721 
2722     /* Likewise, if all we want is a file name, there is no need to scan any
2723     more lines in the file. */
2724 
2725     else if (filenames == FN_MATCH_ONLY)
2726       {
2727       fprintf(stdout, "%s" STDOUT_NL, printname);
2728       return 0;
2729       }
2730 
2731     /* The --only-matching option prints just the substring that matched,
2732     and/or one or more captured portions of it, as long as these strings are
2733     not empty. The --file-offsets and --line-offsets options output offsets for
2734     the matching substring (all three set only_matching_count non-zero). None
2735     of these mutually exclusive options prints any context. Afterwards, adjust
2736     the start and then jump back to look for further matches in the same line.
2737     If we are in invert mode, however, nothing is printed and we do not restart
2738     - this could still be useful because the return code is set. */
2739 
2740     else if (only_matching_count != 0)
2741       {
2742       if (!invert)
2743         {
2744         PCRE2_SIZE oldstartoffset;
2745 
2746         if (printname != NULL) fprintf(stdout, "%s:", printname);
2747         if (number) fprintf(stdout, "%lu:", linenumber);
2748 
2749         /* Handle --line-offsets */
2750 
2751         if (line_offsets)
2752           fprintf(stdout, "%d,%d" STDOUT_NL, (int)(ptr + offsets[0] - ptr),
2753             (int)(offsets[1] - offsets[0]));
2754 
2755         /* Handle --file-offsets */
2756 
2757         else if (file_offsets)
2758           fprintf(stdout, "%d,%d" STDOUT_NL,
2759             (int)(filepos + ptr + offsets[0] - ptr),
2760             (int)(offsets[1] - offsets[0]));
2761 
2762         /* Handle --output (which has already been syntax checked) */
2763 
2764         else if (output_text != NULL)
2765           {
2766           if (display_output_text((PCRE2_SPTR)output_text, FALSE,
2767               (PCRE2_SPTR)ptr, offsets, mrc) || printname != NULL ||
2768               number)
2769             fprintf(stdout, STDOUT_NL);
2770           }
2771 
2772         /* Handle --only-matching, which may occur many times */
2773 
2774         else
2775           {
2776           BOOL printed = FALSE;
2777           omstr *om;
2778 
2779           for (om = only_matching; om != NULL; om = om->next)
2780             {
2781             int n = om->groupnum;
2782             if (n == 0 || n < mrc)
2783               {
2784               int plen = offsets[2*n + 1] - offsets[2*n];
2785               if (plen > 0)
2786                 {
2787                 if (printed && om_separator != NULL)
2788                   fprintf(stdout, "%s", om_separator);
2789                 print_match(ptr + offsets[n*2], plen);
2790                 printed = TRUE;
2791                 }
2792               }
2793             }
2794 
2795           if (printed || printname != NULL || number)
2796             fprintf(stdout, STDOUT_NL);
2797           }
2798 
2799         /* Prepare to repeat to find the next match in the line. */
2800 
2801         match = FALSE;
2802         if (line_buffered) fflush(stdout);
2803         rc = 0;                      /* Had some success */
2804 
2805         /* If the pattern contained a lookbehind that included \K, it is
2806         possible that the end of the match might be at or before the actual
2807         starting offset we have just used. In this case, start one character
2808         further on. */
2809 
2810         startoffset = offsets[1];    /* Restart after the match */
2811         oldstartoffset = pcre2_get_startchar(match_data);
2812         if (startoffset <= oldstartoffset)
2813           {
2814           if (startoffset >= length) goto END_ONE_MATCH;  /* Were at end */
2815           startoffset = oldstartoffset + 1;
2816           if (utf) while ((ptr[startoffset] & 0xc0) == 0x80) startoffset++;
2817           }
2818 
2819         /* If the current match ended past the end of the line (only possible
2820         in multiline mode), we must move on to the line in which it did end
2821         before searching for more matches. */
2822 
2823         while (startoffset > linelength)
2824           {
2825           ptr += linelength + endlinelength;
2826           filepos += (int)(linelength + endlinelength);
2827           linenumber++;
2828           startoffset -= (int)(linelength + endlinelength);
2829           t = end_of_line(ptr, endptr, &endlinelength);
2830           linelength = t - ptr - endlinelength;
2831           length = (PCRE2_SIZE)(endptr - ptr);
2832           }
2833 
2834         goto ONLY_MATCHING_RESTART;
2835         }
2836       }
2837 
2838     /* This is the default case when none of the above options is set. We print
2839     the matching lines(s), possibly preceded and/or followed by other lines of
2840     context. */
2841 
2842     else
2843       {
2844       lines_printed = TRUE;
2845 
2846       /* See if there is a requirement to print some "after" lines from a
2847       previous match. We never print any overlaps. */
2848 
2849       if (after_context > 0 && lastmatchnumber > 0)
2850         {
2851         int ellength;
2852         int linecount = 0;
2853         char *p = lastmatchrestart;
2854 
2855         while (p < ptr && linecount < after_context)
2856           {
2857           p = end_of_line(p, ptr, &ellength);
2858           linecount++;
2859           }
2860 
2861         /* It is important to advance lastmatchrestart during this printing so
2862         that it interacts correctly with any "before" printing below. Print
2863         each line's data using fwrite() in case there are binary zeroes. */
2864 
2865         while (lastmatchrestart < p)
2866           {
2867           char *pp = lastmatchrestart;
2868           if (printname != NULL) fprintf(stdout, "%s-", printname);
2869           if (number) fprintf(stdout, "%lu-", lastmatchnumber++);
2870           pp = end_of_line(pp, endptr, &ellength);
2871           FWRITE_IGNORE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
2872           lastmatchrestart = pp;
2873           }
2874         if (lastmatchrestart != ptr) hyphenpending = TRUE;
2875         }
2876 
2877       /* If there were non-contiguous lines printed above, insert hyphens. */
2878 
2879       if (hyphenpending)
2880         {
2881         fprintf(stdout, "--" STDOUT_NL);
2882         hyphenpending = FALSE;
2883         hyphenprinted = TRUE;
2884         }
2885 
2886       /* See if there is a requirement to print some "before" lines for this
2887       match. Again, don't print overlaps. */
2888 
2889       if (before_context > 0)
2890         {
2891         int linecount = 0;
2892         char *p = ptr;
2893 
2894         while (p > main_buffer &&
2895                (lastmatchnumber == 0 || p > lastmatchrestart) &&
2896                linecount < before_context)
2897           {
2898           linecount++;
2899           p = previous_line(p, main_buffer);
2900           }
2901 
2902         if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
2903           fprintf(stdout, "--" STDOUT_NL);
2904 
2905         while (p < ptr)
2906           {
2907           int ellength;
2908           char *pp = p;
2909           if (printname != NULL) fprintf(stdout, "%s-", printname);
2910           if (number) fprintf(stdout, "%lu-", linenumber - linecount--);
2911           pp = end_of_line(pp, endptr, &ellength);
2912           FWRITE_IGNORE(p, 1, pp - p, stdout);
2913           p = pp;
2914           }
2915         }
2916 
2917       /* Now print the matching line(s); ensure we set hyphenpending at the end
2918       of the file if any context lines are being output. */
2919 
2920       if (after_context > 0 || before_context > 0)
2921         endhyphenpending = TRUE;
2922 
2923       if (printname != NULL) fprintf(stdout, "%s:", printname);
2924       if (number) fprintf(stdout, "%lu:", linenumber);
2925 
2926       /* In multiline mode, or if colouring, we have to split the line(s) up
2927       and search for further matches, but not of course if the line is a
2928       non-match. In multiline mode this is necessary in case there is another
2929       match that spans the end of the current line. When colouring we want to
2930       colour all matches. */
2931 
2932       if ((multiline || do_colour) && !invert)
2933         {
2934         int plength;
2935         PCRE2_SIZE endprevious;
2936 
2937         /* The use of \K may make the end offset earlier than the start. In
2938         this situation, swap them round. */
2939 
2940         if (offsets[0] > offsets[1])
2941           {
2942           PCRE2_SIZE temp = offsets[0];
2943           offsets[0] = offsets[1];
2944           offsets[1] = temp;
2945           }
2946 
2947         FWRITE_IGNORE(ptr, 1, offsets[0], stdout);
2948         print_match(ptr + offsets[0], offsets[1] - offsets[0]);
2949 
2950         for (;;)
2951           {
2952           PCRE2_SIZE oldstartoffset = pcre2_get_startchar(match_data);
2953 
2954           endprevious = offsets[1];
2955           startoffset = endprevious;  /* Advance after previous match. */
2956 
2957           /* If the pattern contained a lookbehind that included \K, it is
2958           possible that the end of the match might be at or before the actual
2959           starting offset we have just used. In this case, start one character
2960           further on. */
2961 
2962           if (startoffset <= oldstartoffset)
2963             {
2964             startoffset = oldstartoffset + 1;
2965             if (utf) while ((ptr[startoffset] & 0xc0) == 0x80) startoffset++;
2966             }
2967 
2968           /* If the current match ended past the end of the line (only possible
2969           in multiline mode), we must move on to the line in which it did end
2970           before searching for more matches. Because the PCRE2_FIRSTLINE option
2971           is set, the start of the match will always be before the first
2972           newline sequence. */
2973 
2974           while (startoffset > linelength + endlinelength)
2975             {
2976             ptr += linelength + endlinelength;
2977             filepos += (int)(linelength + endlinelength);
2978             linenumber++;
2979             startoffset -= (int)(linelength + endlinelength);
2980             endprevious -= (int)(linelength + endlinelength);
2981             t = end_of_line(ptr, endptr, &endlinelength);
2982             linelength = t - ptr - endlinelength;
2983             length = (PCRE2_SIZE)(endptr - ptr);
2984             }
2985 
2986           /* If startoffset is at the exact end of the line it means this
2987           complete line was the final part of the match, so there is nothing
2988           more to do. */
2989 
2990           if (startoffset == linelength + endlinelength) break;
2991 
2992           /* Otherwise, run a match from within the final line, and if found,
2993           loop for any that may follow. */
2994 
2995           if (!match_patterns(ptr, length, options, startoffset, &mrc)) break;
2996 
2997           /* The use of \K may make the end offset earlier than the start. In
2998           this situation, swap them round. */
2999 
3000           if (offsets[0] > offsets[1])
3001             {
3002             PCRE2_SIZE temp = offsets[0];
3003             offsets[0] = offsets[1];
3004             offsets[1] = temp;
3005             }
3006 
3007           FWRITE_IGNORE(ptr + endprevious, 1, offsets[0] - endprevious, stdout);
3008           print_match(ptr + offsets[0], offsets[1] - offsets[0]);
3009           }
3010 
3011         /* In multiline mode, we may have already printed the complete line
3012         and its line-ending characters (if they matched the pattern), so there
3013         may be no more to print. */
3014 
3015         plength = (int)((linelength + endlinelength) - endprevious);
3016         if (plength > 0) FWRITE_IGNORE(ptr + endprevious, 1, plength, stdout);
3017         }
3018 
3019       /* Not colouring or multiline; no need to search for further matches. */
3020 
3021       else FWRITE_IGNORE(ptr, 1, linelength + endlinelength, stdout);
3022       }
3023 
3024     /* End of doing what has to be done for a match. If --line-buffered was
3025     given, flush the output. */
3026 
3027     if (line_buffered) fflush(stdout);
3028     rc = 0;    /* Had some success */
3029 
3030     /* Remember where the last match happened for after_context. We remember
3031     where we are about to restart, and that line's number. */
3032 
3033     lastmatchrestart = ptr + linelength + endlinelength;
3034     lastmatchnumber = linenumber + 1;
3035 
3036     /* If a line was printed and we are now at the end of the file and the last
3037     line had no newline, output one. */
3038 
3039     if (lines_printed && lastmatchrestart >= endptr && endlinelength == 0)
3040       write_final_newline();
3041     }
3042 
3043   /* For a match in multiline inverted mode (which of course did not cause
3044   anything to be printed), we have to move on to the end of the match before
3045   proceeding. */
3046 
3047   if (multiline && invert && match)
3048     {
3049     int ellength;
3050     char *endmatch = ptr + offsets[1];
3051     t = ptr;
3052     while (t < endmatch)
3053       {
3054       t = end_of_line(t, endptr, &ellength);
3055       if (t <= endmatch) linenumber++; else break;
3056       }
3057     endmatch = end_of_line(endmatch, endptr, &ellength);
3058     linelength = endmatch - ptr - ellength;
3059     }
3060 
3061   /* Advance to after the newline and increment the line number. The file
3062   offset to the current line is maintained in filepos. */
3063 
3064   END_ONE_MATCH:
3065   ptr += linelength + endlinelength;
3066   filepos += (int)(linelength + endlinelength);
3067   linenumber++;
3068 
3069   /* If there was at least one match (or a non-match, as required) in the line,
3070   increment the count for the -m option. */
3071 
3072   if (line_matched) count_matched_lines++;
3073 
3074   /* If input is line buffered, and the buffer is not yet full, read another
3075   line and add it into the buffer. */
3076 
3077   if (input_line_buffered && bufflength < (PCRE2_SIZE)bufsize)
3078     {
3079     int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
3080     bufflength += add;
3081     endptr += add;
3082     }
3083 
3084   /* If we haven't yet reached the end of the file (the buffer is full), and
3085   the current point is in the top 1/3 of the buffer, slide the buffer down by
3086   1/3 and refill it. Before we do this, if some unprinted "after" lines are
3087   about to be lost, print them. */
3088 
3089   if (bufflength >= (PCRE2_SIZE)bufsize && ptr > main_buffer + 2*bufthird)
3090     {
3091     if (after_context > 0 &&
3092         lastmatchnumber > 0 &&
3093         lastmatchrestart < main_buffer + bufthird)
3094       {
3095       do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
3096       lastmatchnumber = 0;  /* Indicates no after lines pending */
3097       }
3098 
3099     /* Now do the shuffle */
3100 
3101     (void)memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
3102     ptr -= bufthird;
3103 
3104     bufflength = 2*bufthird + fill_buffer(handle, frtype,
3105       main_buffer + 2*bufthird, bufthird, input_line_buffered);
3106     endptr = main_buffer + bufflength;
3107 
3108     /* Adjust any last match point */
3109 
3110     if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
3111     }
3112   }     /* Loop through the whole file */
3113 
3114 /* End of file; print final "after" lines if wanted; do_after_lines sets
3115 hyphenpending if it prints something. */
3116 
3117 if (only_matching_count == 0 && !(count_only|show_total_count))
3118   {
3119   do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
3120   hyphenpending |= endhyphenpending;
3121   }
3122 
3123 /* Print the file name if we are looking for those without matches and there
3124 were none. If we found a match, we won't have got this far. */
3125 
3126 if (filenames == FN_NOMATCH_ONLY)
3127   {
3128   fprintf(stdout, "%s" STDOUT_NL, printname);
3129   return 0;
3130   }
3131 
3132 /* Print the match count if wanted */
3133 
3134 if (count_only && !quiet)
3135   {
3136   if (count > 0 || !omit_zero_count)
3137     {
3138     if (printname != NULL && filenames != FN_NONE)
3139       fprintf(stdout, "%s:", printname);
3140     fprintf(stdout, "%lu" STDOUT_NL, count);
3141     counts_printed++;
3142     }
3143   }
3144 
3145 total_count += count;   /* Can be set without count_only */
3146 return rc;
3147 }
3148 
3149 
3150 
3151 /*************************************************
3152 *     Grep a file or recurse into a directory    *
3153 *************************************************/
3154 
3155 /* Given a path name, if it's a directory, scan all the files if we are
3156 recursing; if it's a file, grep it.
3157 
3158 Arguments:
3159   pathname          the path to investigate
3160   dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
3161   only_one_at_top   TRUE if the path is the only one at toplevel
3162 
3163 Returns:  -1 the file/directory was skipped
3164            0 if there was at least one match
3165            1 if there were no matches
3166            2 there was some kind of error
3167 
3168 However, file opening failures are suppressed if "silent" is set.
3169 */
3170 
3171 static int
grep_or_recurse(char * pathname,BOOL dir_recurse,BOOL only_one_at_top)3172 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
3173 {
3174 int rc = 1;
3175 int frtype;
3176 void *handle;
3177 char *lastcomp;
3178 FILE *in = NULL;           /* Ensure initialized */
3179 
3180 #ifdef SUPPORT_LIBZ
3181 gzFile ingz = NULL;
3182 #endif
3183 
3184 #ifdef SUPPORT_LIBBZ2
3185 BZFILE *inbz2 = NULL;
3186 #endif
3187 
3188 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
3189 int pathlen;
3190 #endif
3191 
3192 #if defined NATIVE_ZOS
3193 int zos_type;
3194 FILE *zos_test_file;
3195 #endif
3196 
3197 /* If the file name is "-" we scan stdin */
3198 
3199 if (strcmp(pathname, "-") == 0)
3200   {
3201   if (count_limit >= 0) setbuf(stdin, NULL);
3202   return pcre2grep(stdin, FR_PLAIN, stdin_name,
3203     (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
3204       stdin_name : NULL);
3205   }
3206 
3207 /* Inclusion and exclusion: --include-dir and --exclude-dir apply only to
3208 directories, whereas --include and --exclude apply to everything else. The test
3209 is against the final component of the path. */
3210 
3211 lastcomp = strrchr(pathname, FILESEP);
3212 lastcomp = (lastcomp == NULL)? pathname : lastcomp + 1;
3213 
3214 /* If the file is a directory, skip if not recursing or if explicitly excluded.
3215 Otherwise, scan the directory and recurse for each path within it. The scanning
3216 code is localized so it can be made system-specific. */
3217 
3218 
3219 /* For z/OS, determine the file type. */
3220 
3221 #if defined NATIVE_ZOS
3222 zos_test_file =  fopen(pathname,"rb");
3223 
3224 if (zos_test_file == NULL)
3225    {
3226    if (!silent) fprintf(stderr, "pcre2grep: failed to test next file %s\n",
3227      pathname, strerror(errno));
3228    return -1;
3229    }
3230 zos_type = identifyzosfiletype (zos_test_file);
3231 fclose (zos_test_file);
3232 
3233 /* Handle a PDS in separate code */
3234 
3235 if (zos_type == __ZOS_PDS || zos_type == __ZOS_PDSE)
3236    {
3237    return travelonpdsdir (pathname, only_one_at_top);
3238    }
3239 
3240 /* Deal with regular files in the normal way below. These types are:
3241    zos_type == __ZOS_PDS_MEMBER
3242    zos_type == __ZOS_PS
3243    zos_type == __ZOS_VSAM_KSDS
3244    zos_type == __ZOS_VSAM_ESDS
3245    zos_type == __ZOS_VSAM_RRDS
3246 */
3247 
3248 /* Handle a z/OS directory using common code. */
3249 
3250 else if (zos_type == __ZOS_HFS)
3251  {
3252 #endif  /* NATIVE_ZOS */
3253 
3254 
3255 /* Handle directories: common code for all OS */
3256 
3257 if (isdirectory(pathname))
3258   {
3259   if (dee_action == dee_SKIP ||
3260       !test_incexc(lastcomp, include_dir_patterns, exclude_dir_patterns))
3261     return -1;
3262 
3263   if (dee_action == dee_RECURSE)
3264     {
3265     char childpath[FNBUFSIZ];
3266     char *nextfile;
3267     directory_type *dir = opendirectory(pathname);
3268 
3269     if (dir == NULL)
3270       {
3271       if (!silent)
3272         fprintf(stderr, "pcre2grep: Failed to open directory %s: %s\n", pathname,
3273           strerror(errno));
3274       return 2;
3275       }
3276 
3277     while ((nextfile = readdirectory(dir)) != NULL)
3278       {
3279       int frc;
3280       int fnlength = strlen(pathname) + strlen(nextfile) + 2;
3281       if (fnlength > FNBUFSIZ)
3282         {
3283         fprintf(stderr, "pcre2grep: recursive filename is too long\n");
3284         rc = 2;
3285         break;
3286         }
3287       sprintf(childpath, "%s%c%s", pathname, FILESEP, nextfile);
3288 
3289       /* If the realpath() function is available, we can try to prevent endless
3290       recursion caused by a symlink pointing to a parent directory (GitHub
3291       issue #2 (old Bugzilla #2794). Original patch from Thomas Tempelmann.
3292       Modified to avoid using strlcat() because that isn't a standard C
3293       function, and also modified not to copy back the fully resolved path,
3294       because that affects the output from pcre2grep. */
3295 
3296 #ifdef HAVE_REALPATH
3297       {
3298       char resolvedpath[PATH_MAX];
3299       BOOL isSame;
3300       size_t rlen;
3301       if (realpath(childpath, resolvedpath) == NULL)
3302         continue;     /* This path is invalid - we can skip processing this */
3303       isSame = strcmp(pathname, resolvedpath) == 0;
3304       if (isSame) continue;    /* We have a recursion */
3305       rlen = strlen(resolvedpath);
3306       if (rlen++ < sizeof(resolvedpath) - 3)
3307         {
3308         BOOL contained;
3309         strcat(resolvedpath, "/");
3310         contained = strncmp(pathname, resolvedpath, rlen) == 0;
3311         if (contained) continue;    /* We have a recursion */
3312         }
3313       }
3314 #endif  /* HAVE_REALPATH */
3315 
3316       frc = grep_or_recurse(childpath, dir_recurse, FALSE);
3317       if (frc > 1) rc = frc;
3318        else if (frc == 0 && rc == 1) rc = 0;
3319       }
3320 
3321     closedirectory(dir);
3322     return rc;
3323     }
3324   }
3325 
3326 #ifdef WIN32
3327 if (iswild(pathname))
3328   {
3329   char buffer[1024];
3330   char *nextfile;
3331   char *name;
3332   directory_type *dir = opendirectory(pathname);
3333 
3334   if (dir == NULL)
3335     return 0;
3336 
3337   for (nextfile = name = pathname; *nextfile != 0; nextfile++)
3338     if (*nextfile == '/' || *nextfile == '\\')
3339       name = nextfile + 1;
3340   *name = 0;
3341 
3342   while ((nextfile = readdirectory(dir)) != NULL)
3343     {
3344     int frc;
3345     sprintf(buffer, "%.512s%.128s", pathname, nextfile);
3346     frc = grep_or_recurse(buffer, dir_recurse, FALSE);
3347     if (frc > 1) rc = frc;
3348      else if (frc == 0 && rc == 1) rc = 0;
3349     }
3350 
3351   closedirectory(dir);
3352   return rc;
3353   }
3354 #endif
3355 
3356 #if defined NATIVE_ZOS
3357  }
3358 #endif
3359 
3360 /* If the file is not a directory, check for a regular file, and if it is not,
3361 skip it if that's been requested. Otherwise, check for an explicit inclusion or
3362 exclusion. */
3363 
3364 else if (
3365 #if defined NATIVE_ZOS
3366         (zos_type == __ZOS_NOFILE && DEE_action == DEE_SKIP) ||
3367 #else  /* all other OS */
3368         (!isregfile(pathname) && DEE_action == DEE_SKIP) ||
3369 #endif
3370         !test_incexc(lastcomp, include_patterns, exclude_patterns))
3371   return -1;  /* File skipped */
3372 
3373 /* Control reaches here if we have a regular file, or if we have a directory
3374 and recursion or skipping was not requested, or if we have anything else and
3375 skipping was not requested. The scan proceeds. If this is the first and only
3376 argument at top level, we don't show the file name, unless we are only showing
3377 the file name, or the filename was forced (-H). */
3378 
3379 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
3380 pathlen = (int)(strlen(pathname));
3381 #endif
3382 
3383 /* Open using zlib if it is supported and the file name ends with .gz. */
3384 
3385 #ifdef SUPPORT_LIBZ
3386 if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
3387   {
3388   ingz = gzopen(pathname, "rb");
3389   if (ingz == NULL)
3390     {
3391     if (!silent)
3392       fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", pathname,
3393         strerror(errno));
3394     return 2;
3395     }
3396   handle = (void *)ingz;
3397   frtype = FR_LIBZ;
3398   }
3399 else
3400 #endif
3401 
3402 /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
3403 
3404 #ifdef SUPPORT_LIBBZ2
3405 if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
3406   {
3407   inbz2 = BZ2_bzopen(pathname, "rb");
3408   handle = (void *)inbz2;
3409   frtype = FR_LIBBZ2;
3410   }
3411 else
3412 #endif
3413 
3414 /* Otherwise use plain fopen(). The label is so that we can come back here if
3415 an attempt to read a .bz2 file indicates that it really is a plain file. */
3416 
3417 #ifdef SUPPORT_LIBBZ2
3418 PLAIN_FILE:
3419 #endif
3420   {
3421   in = fopen(pathname, "rb");
3422   handle = (void *)in;
3423   frtype = FR_PLAIN;
3424   }
3425 
3426 /* All the opening methods return errno when they fail. */
3427 
3428 if (handle == NULL)
3429   {
3430   if (!silent)
3431     fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", pathname,
3432       strerror(errno));
3433   return 2;
3434   }
3435 
3436 /* Now grep the file */
3437 
3438 rc = pcre2grep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
3439   (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
3440 
3441 /* Close in an appropriate manner. */
3442 
3443 #ifdef SUPPORT_LIBZ
3444 if (frtype == FR_LIBZ)
3445   gzclose(ingz);
3446 else
3447 #endif
3448 
3449 /* If it is a .bz2 file and the result is 3, it means that the first attempt to
3450 read failed. If the error indicates that the file isn't in fact bzipped, try
3451 again as a normal file. */
3452 
3453 #ifdef SUPPORT_LIBBZ2
3454 if (frtype == FR_LIBBZ2)
3455   {
3456   if (rc == 3)
3457     {
3458     int errnum;
3459     const char *err = BZ2_bzerror(inbz2, &errnum);
3460     if (errnum == BZ_DATA_ERROR_MAGIC)
3461       {
3462       BZ2_bzclose(inbz2);
3463       goto PLAIN_FILE;
3464       }
3465     else if (!silent)
3466       fprintf(stderr, "pcre2grep: Failed to read %s using bzlib: %s\n",
3467         pathname, err);
3468     rc = 2;    /* The normal "something went wrong" code */
3469     }
3470   BZ2_bzclose(inbz2);
3471   }
3472 else
3473 #endif
3474 
3475 /* Normal file close */
3476 
3477 fclose(in);
3478 
3479 /* Pass back the yield from pcre2grep(). */
3480 
3481 return rc;
3482 }
3483 
3484 
3485 
3486 /*************************************************
3487 *          Handle a no-data option               *
3488 *************************************************/
3489 
3490 static int
handle_option(int letter,int options)3491 handle_option(int letter, int options)
3492 {
3493 switch(letter)
3494   {
3495   case N_FOFFSETS: file_offsets = TRUE; break;
3496   case N_HELP: help(); pcre2grep_exit(0); break; /* Stops compiler warning */
3497   case N_LBUFFER: line_buffered = TRUE; break;
3498   case N_LOFFSETS: line_offsets = number = TRUE; break;
3499   case N_NOJIT: use_jit = FALSE; break;
3500   case N_ALLABSK: extra_options |= PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK; break;
3501   case 'a': binary_files = BIN_TEXT; break;
3502   case 'c': count_only = TRUE; break;
3503   case 'F': options |= PCRE2_LITERAL; break;
3504   case 'H': filenames = FN_FORCE; break;
3505   case 'I': binary_files = BIN_NOMATCH; break;
3506   case 'h': filenames = FN_NONE; break;
3507   case 'i': options |= PCRE2_CASELESS; break;
3508   case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
3509   case 'L': filenames = FN_NOMATCH_ONLY; break;
3510   case 'M': multiline = TRUE; options |= PCRE2_MULTILINE|PCRE2_FIRSTLINE; break;
3511   case 'n': number = TRUE; break;
3512 
3513   case 'o':
3514   only_matching_last = add_number(0, only_matching_last);
3515   if (only_matching == NULL) only_matching = only_matching_last;
3516   break;
3517 
3518   case 'q': quiet = TRUE; break;
3519   case 'r': dee_action = dee_RECURSE; break;
3520   case 's': silent = TRUE; break;
3521   case 't': show_total_count = TRUE; break;
3522   case 'u': options |= PCRE2_UTF; utf = TRUE; break;
3523   case 'U': options |= PCRE2_UTF|PCRE2_MATCH_INVALID_UTF; utf = TRUE; break;
3524   case 'v': invert = TRUE; break;
3525   case 'w': extra_options |= PCRE2_EXTRA_MATCH_WORD; break;
3526   case 'x': extra_options |= PCRE2_EXTRA_MATCH_LINE; break;
3527 
3528   case 'V':
3529     {
3530     unsigned char buffer[128];
3531     (void)pcre2_config(PCRE2_CONFIG_VERSION, buffer);
3532     fprintf(stdout, "pcre2grep version %s" STDOUT_NL, buffer);
3533     }
3534   pcre2grep_exit(0);
3535   break;
3536 
3537   default:
3538   fprintf(stderr, "pcre2grep: Unknown option -%c\n", letter);
3539   pcre2grep_exit(usage(2));
3540   }
3541 
3542 return options;
3543 }
3544 
3545 
3546 
3547 /*************************************************
3548 *          Construct printed ordinal             *
3549 *************************************************/
3550 
3551 /* This turns a number into "1st", "3rd", etc. */
3552 
3553 static char *
ordin(int n)3554 ordin(int n)
3555 {
3556 static char buffer[14];
3557 char *p = buffer;
3558 sprintf(p, "%d", n);
3559 while (*p != 0) p++;
3560 n %= 100;
3561 if (n >= 11 && n <= 13) n = 0;
3562 switch (n%10)
3563   {
3564   case 1: strcpy(p, "st"); break;
3565   case 2: strcpy(p, "nd"); break;
3566   case 3: strcpy(p, "rd"); break;
3567   default: strcpy(p, "th"); break;
3568   }
3569 return buffer;
3570 }
3571 
3572 
3573 
3574 /*************************************************
3575 *          Compile a single pattern              *
3576 *************************************************/
3577 
3578 /* Do nothing if the pattern has already been compiled. This is the case for
3579 include/exclude patterns read from a file.
3580 
3581 When the -F option has been used, each "pattern" may be a list of strings,
3582 separated by line breaks. They will be matched literally. We split such a
3583 string and compile the first substring, inserting an additional block into the
3584 pattern chain.
3585 
3586 Arguments:
3587   p              points to the pattern block
3588   options        the PCRE options
3589   fromfile       TRUE if the pattern was read from a file
3590   fromtext       file name or identifying text (e.g. "include")
3591   count          0 if this is the only command line pattern, or
3592                  number of the command line pattern, or
3593                  linenumber for a pattern from a file
3594 
3595 Returns:         TRUE on success, FALSE after an error
3596 */
3597 
3598 static BOOL
compile_pattern(patstr * p,int options,int fromfile,const char * fromtext,int count)3599 compile_pattern(patstr *p, int options, int fromfile, const char *fromtext,
3600   int count)
3601 {
3602 char *ps;
3603 int errcode;
3604 PCRE2_SIZE patlen, erroffset;
3605 PCRE2_UCHAR errmessbuffer[ERRBUFSIZ];
3606 
3607 if (p->compiled != NULL) return TRUE;
3608 ps = p->string;
3609 patlen = p->length;
3610 
3611 if ((options & PCRE2_LITERAL) != 0)
3612   {
3613   int ellength;
3614   char *eop = ps + patlen;
3615   char *pe = end_of_line(ps, eop, &ellength);
3616 
3617   if (ellength != 0)
3618     {
3619     patlen = pe - ps - ellength;
3620     if (add_pattern(pe, p->length-patlen-ellength, p) == NULL) return FALSE;
3621     }
3622   }
3623 
3624 p->compiled = pcre2_compile((PCRE2_SPTR)ps, patlen, options, &errcode,
3625   &erroffset, compile_context);
3626 
3627 /* Handle successful compile. Try JIT-compiling if supported and enabled. We
3628 ignore any JIT compiler errors, relying falling back to interpreting if
3629 anything goes wrong with JIT. */
3630 
3631 if (p->compiled != NULL)
3632   {
3633 #ifdef SUPPORT_PCRE2GREP_JIT
3634   if (use_jit) (void)pcre2_jit_compile(p->compiled, PCRE2_JIT_COMPLETE);
3635 #endif
3636   return TRUE;
3637   }
3638 
3639 /* Handle compile errors */
3640 
3641 if (erroffset > patlen) erroffset = patlen;
3642 pcre2_get_error_message(errcode, errmessbuffer, sizeof(errmessbuffer));
3643 
3644 if (fromfile)
3645   {
3646   fprintf(stderr, "pcre2grep: Error in regex in line %d of %s "
3647     "at offset %d: %s\n", count, fromtext, (int)erroffset, errmessbuffer);
3648   }
3649 else
3650   {
3651   if (count == 0)
3652     fprintf(stderr, "pcre2grep: Error in %s regex at offset %d: %s\n",
3653       fromtext, (int)erroffset, errmessbuffer);
3654   else
3655     fprintf(stderr, "pcre2grep: Error in %s %s regex at offset %d: %s\n",
3656       ordin(count), fromtext, (int)erroffset, errmessbuffer);
3657   }
3658 
3659 return FALSE;
3660 }
3661 
3662 
3663 
3664 /*************************************************
3665 *     Read and compile a file of patterns        *
3666 *************************************************/
3667 
3668 /* This is used for --filelist, --include-from, and --exclude-from.
3669 
3670 Arguments:
3671   name         the name of the file; "-" is stdin
3672   patptr       pointer to the pattern chain anchor
3673   patlastptr   pointer to the last pattern pointer
3674 
3675 Returns:       TRUE if all went well
3676 */
3677 
3678 static BOOL
read_pattern_file(char * name,patstr ** patptr,patstr ** patlastptr)3679 read_pattern_file(char *name, patstr **patptr, patstr **patlastptr)
3680 {
3681 int linenumber = 0;
3682 PCRE2_SIZE patlen;
3683 FILE *f;
3684 const char *filename;
3685 char buffer[MAXPATLEN+20];
3686 
3687 if (strcmp(name, "-") == 0)
3688   {
3689   f = stdin;
3690   filename = stdin_name;
3691   }
3692 else
3693   {
3694   f = fopen(name, "r");
3695   if (f == NULL)
3696     {
3697     fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", name, strerror(errno));
3698     return FALSE;
3699     }
3700   filename = name;
3701   }
3702 
3703 while ((patlen = read_one_line(buffer, sizeof(buffer), f)) > 0)
3704   {
3705   while (patlen > 0 && isspace((unsigned char)(buffer[patlen-1]))) patlen--;
3706   linenumber++;
3707   if (patlen == 0) continue;   /* Skip blank lines */
3708 
3709   /* Note: this call to add_pattern() puts a pointer to the local variable
3710   "buffer" into the pattern chain. However, that pointer is used only when
3711   compiling the pattern, which happens immediately below, so we flatten it
3712   afterwards, as a precaution against any later code trying to use it. */
3713 
3714   *patlastptr = add_pattern(buffer, patlen, *patlastptr);
3715   if (*patlastptr == NULL)
3716     {
3717     if (f != stdin) fclose(f);
3718     return FALSE;
3719     }
3720   if (*patptr == NULL) *patptr = *patlastptr;
3721 
3722   /* This loop is needed because compiling a "pattern" when -F is set may add
3723   on additional literal patterns if the original contains a newline. In the
3724   common case, it never will, because read_one_line() stops at a newline.
3725   However, the -N option can be used to give pcre2grep a different newline
3726   setting. */
3727 
3728   for(;;)
3729     {
3730     if (!compile_pattern(*patlastptr, pcre2_options, TRUE, filename,
3731         linenumber))
3732       {
3733       if (f != stdin) fclose(f);
3734       return FALSE;
3735       }
3736     (*patlastptr)->string = NULL;            /* Insurance */
3737     if ((*patlastptr)->next == NULL) break;
3738     *patlastptr = (*patlastptr)->next;
3739     }
3740   }
3741 
3742 if (f != stdin) fclose(f);
3743 return TRUE;
3744 }
3745 
3746 
3747 
3748 /*************************************************
3749 *                Main program                    *
3750 *************************************************/
3751 
3752 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
3753 
3754 int
main(int argc,char ** argv)3755 main(int argc, char **argv)
3756 {
3757 int i, j;
3758 int rc = 1;
3759 BOOL only_one_at_top;
3760 patstr *cp;
3761 fnstr *fn;
3762 omstr *om;
3763 const char *locale_from = "--locale";
3764 
3765 #ifdef SUPPORT_PCRE2GREP_JIT
3766 pcre2_jit_stack *jit_stack = NULL;
3767 #endif
3768 
3769 /* In Windows, stdout is set up as a text stream, which means that \n is
3770 converted to \r\n. This causes output lines that are copied from the input to
3771 change from ....\r\n to ....\r\r\n, which is not right. We therefore ensure
3772 that stdout is a binary stream. Note that this means all other output to stdout
3773 must use STDOUT_NL to terminate lines. */
3774 
3775 #ifdef WIN32
3776 _setmode(_fileno(stdout), _O_BINARY);
3777 #endif
3778 
3779 /* Process the options */
3780 
3781 for (i = 1; i < argc; i++)
3782   {
3783   option_item *op = NULL;
3784   char *option_data = (char *)"";    /* default to keep compiler happy */
3785   BOOL longop;
3786   BOOL longopwasequals = FALSE;
3787 
3788   if (argv[i][0] != '-') break;
3789 
3790   /* If we hit an argument that is just "-", it may be a reference to STDIN,
3791   but only if we have previously had -e or -f to define the patterns. */
3792 
3793   if (argv[i][1] == 0)
3794     {
3795     if (pattern_files != NULL || patterns != NULL) break;
3796       else pcre2grep_exit(usage(2));
3797     }
3798 
3799   /* Handle a long name option, or -- to terminate the options */
3800 
3801   if (argv[i][1] == '-')
3802     {
3803     char *arg = argv[i] + 2;
3804     char *argequals = strchr(arg, '=');
3805 
3806     if (*arg == 0)    /* -- terminates options */
3807       {
3808       i++;
3809       break;                /* out of the options-handling loop */
3810       }
3811 
3812     longop = TRUE;
3813 
3814     /* Some long options have data that follows after =, for example file=name.
3815     Some options have variations in the long name spelling: specifically, we
3816     allow "regexp" because GNU grep allows it, though I personally go along
3817     with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
3818     These options are entered in the table as "regex(p)". Options can be in
3819     both these categories. */
3820 
3821     for (op = optionlist; op->one_char != 0; op++)
3822       {
3823       char *opbra = strchr(op->long_name, '(');
3824       char *equals = strchr(op->long_name, '=');
3825 
3826       /* Handle options with only one spelling of the name */
3827 
3828       if (opbra == NULL)     /* Does not contain '(' */
3829         {
3830         if (equals == NULL)  /* Not thing=data case */
3831           {
3832           if (strcmp(arg, op->long_name) == 0) break;
3833           }
3834         else                 /* Special case xxx=data */
3835           {
3836           int oplen = (int)(equals - op->long_name);
3837           int arglen = (argequals == NULL)?
3838             (int)strlen(arg) : (int)(argequals - arg);
3839           if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
3840             {
3841             option_data = arg + arglen;
3842             if (*option_data == '=')
3843               {
3844               option_data++;
3845               longopwasequals = TRUE;
3846               }
3847             break;
3848             }
3849           }
3850         }
3851 
3852       /* Handle options with an alternate spelling of the name */
3853 
3854       else
3855         {
3856         char buff1[24];
3857         char buff2[24];
3858         int ret;
3859 
3860         int baselen = (int)(opbra - op->long_name);
3861         int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
3862         int arglen = (argequals == NULL || equals == NULL)?
3863           (int)strlen(arg) : (int)(argequals - arg);
3864 
3865         if ((ret = snprintf(buff1, sizeof(buff1), "%.*s", baselen, op->long_name),
3866              ret < 0 || ret > (int)sizeof(buff1)) ||
3867             (ret = snprintf(buff2, sizeof(buff2), "%s%.*s", buff1,
3868                      fulllen - baselen - 2, opbra + 1),
3869              ret < 0 || ret > (int)sizeof(buff2)))
3870           {
3871           fprintf(stderr, "pcre2grep: Buffer overflow when parsing %s option\n",
3872             op->long_name);
3873           pcre2grep_exit(2);
3874           }
3875 
3876         if (strncmp(arg, buff1, arglen) == 0 ||
3877            strncmp(arg, buff2, arglen) == 0)
3878           {
3879           if (equals != NULL && argequals != NULL)
3880             {
3881             option_data = argequals;
3882             if (*option_data == '=')
3883               {
3884               option_data++;
3885               longopwasequals = TRUE;
3886               }
3887             }
3888           break;
3889           }
3890         }
3891       }
3892 
3893     if (op->one_char == 0)
3894       {
3895       fprintf(stderr, "pcre2grep: Unknown option %s\n", argv[i]);
3896       pcre2grep_exit(usage(2));
3897       }
3898     }
3899 
3900   /* One-char options; many that have no data may be in a single argument; we
3901   continue till we hit the last one or one that needs data. */
3902 
3903   else
3904     {
3905     char *s = argv[i] + 1;
3906     longop = FALSE;
3907 
3908     while (*s != 0)
3909       {
3910       for (op = optionlist; op->one_char != 0; op++)
3911         {
3912         if (*s == op->one_char) break;
3913         }
3914       if (op->one_char == 0)
3915         {
3916         fprintf(stderr, "pcre2grep: Unknown option letter '%c' in \"%s\"\n",
3917           *s, argv[i]);
3918         pcre2grep_exit(usage(2));
3919         }
3920 
3921       option_data = s+1;
3922 
3923       /* Break out if this is the last character in the string; it's handled
3924       below like a single multi-char option. */
3925 
3926       if (*option_data == 0) break;
3927 
3928       /* Check for a single-character option that has data: OP_OP_NUMBER(S)
3929       are used for ones that either have a numerical number or defaults, i.e.
3930       the data is optional. If a digit follows, there is data; if not, carry on
3931       with other single-character options in the same string. */
3932 
3933       if (op->type == OP_OP_NUMBER || op->type == OP_OP_NUMBERS)
3934         {
3935         if (isdigit((unsigned char)s[1])) break;
3936         }
3937       else   /* Check for an option with data */
3938         {
3939         if (op->type != OP_NODATA) break;
3940         }
3941 
3942       /* Handle a single-character option with no data, then loop for the
3943       next character in the string. */
3944 
3945       pcre2_options = handle_option(*s++, pcre2_options);
3946       }
3947     }
3948 
3949   /* At this point we should have op pointing to a matched option. If the type
3950   is NO_DATA, it means that there is no data, and the option might set
3951   something in the PCRE options. */
3952 
3953   if (op->type == OP_NODATA)
3954     {
3955     pcre2_options = handle_option(op->one_char, pcre2_options);
3956     continue;
3957     }
3958 
3959   /* If the option type is OP_OP_STRING or OP_OP_NUMBER(S), it's an option that
3960   either has a value or defaults to something. It cannot have data in a
3961   separate item. At the moment, the only such options are "colo(u)r",
3962   and "only-matching". */
3963 
3964   if (*option_data == 0 &&
3965       (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER ||
3966        op->type == OP_OP_NUMBERS))
3967     {
3968     switch (op->one_char)
3969       {
3970       case N_COLOUR:
3971       colour_option = "auto";
3972       break;
3973 
3974       case 'o':
3975       only_matching_last = add_number(0, only_matching_last);
3976       if (only_matching == NULL) only_matching = only_matching_last;
3977       break;
3978       }
3979     continue;
3980     }
3981 
3982   /* Otherwise, find the data string for the option. */
3983 
3984   if (*option_data == 0)
3985     {
3986     if (i >= argc - 1 || longopwasequals)
3987       {
3988       fprintf(stderr, "pcre2grep: Data missing after %s\n", argv[i]);
3989       pcre2grep_exit(usage(2));
3990       }
3991     option_data = argv[++i];
3992     }
3993 
3994   /* If the option type is OP_OP_NUMBERS, the value is a number that is to be
3995   added to a chain of numbers. */
3996 
3997   if (op->type == OP_OP_NUMBERS)
3998     {
3999     unsigned long int n = decode_number(option_data, op, longop);
4000     omdatastr *omd = (omdatastr *)op->dataptr;
4001     *(omd->lastptr) = add_number((int)n, *(omd->lastptr));
4002     if (*(omd->anchor) == NULL) *(omd->anchor) = *(omd->lastptr);
4003     }
4004 
4005   /* If the option type is OP_PATLIST, it's the -e option, or one of the
4006   include/exclude options, which can be called multiple times to create lists
4007   of patterns. */
4008 
4009   else if (op->type == OP_PATLIST)
4010     {
4011     patdatastr *pd = (patdatastr *)op->dataptr;
4012     *(pd->lastptr) = add_pattern(option_data, (PCRE2_SIZE)strlen(option_data),
4013       *(pd->lastptr));
4014     if (*(pd->lastptr) == NULL) goto EXIT2;
4015     if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr);
4016     }
4017 
4018   /* If the option type is OP_FILELIST, it's one of the options that names a
4019   file. */
4020 
4021   else if (op->type == OP_FILELIST)
4022     {
4023     fndatastr *fd = (fndatastr *)op->dataptr;
4024     fn = (fnstr *)malloc(sizeof(fnstr));
4025     if (fn == NULL)
4026       {
4027       fprintf(stderr, "pcre2grep: malloc failed\n");
4028       goto EXIT2;
4029       }
4030     fn->next = NULL;
4031     fn->name = option_data;
4032     if (*(fd->anchor) == NULL)
4033       *(fd->anchor) = fn;
4034     else
4035       (*(fd->lastptr))->next = fn;
4036     *(fd->lastptr) = fn;
4037     }
4038 
4039   /* Handle OP_BINARY_FILES */
4040 
4041   else if (op->type == OP_BINFILES)
4042     {
4043     if (strcmp(option_data, "binary") == 0)
4044       binary_files = BIN_BINARY;
4045     else if (strcmp(option_data, "without-match") == 0)
4046       binary_files = BIN_NOMATCH;
4047     else if (strcmp(option_data, "text") == 0)
4048       binary_files = BIN_TEXT;
4049     else
4050       {
4051       fprintf(stderr, "pcre2grep: unknown value \"%s\" for binary-files\n",
4052         option_data);
4053       pcre2grep_exit(usage(2));
4054       }
4055     }
4056 
4057   /* Otherwise, deal with a single string or numeric data value. */
4058 
4059   else if (op->type != OP_NUMBER && op->type != OP_U32NUMBER &&
4060            op->type != OP_OP_NUMBER && op->type != OP_SIZE)
4061     {
4062     *((char **)op->dataptr) = option_data;
4063     }
4064   else
4065     {
4066     unsigned long int n = decode_number(option_data, op, longop);
4067     if (op->type == OP_U32NUMBER) *((uint32_t *)op->dataptr) = n;
4068       else if (op->type == OP_SIZE) *((PCRE2_SIZE *)op->dataptr) = n;
4069       else *((int *)op->dataptr) = n;
4070     }
4071   }
4072 
4073 /* Options have been decoded. If -C was used, its value is used as a default
4074 for -A and -B. */
4075 
4076 if (both_context > 0)
4077   {
4078   if (after_context == 0) after_context = both_context;
4079   if (before_context == 0) before_context = both_context;
4080   }
4081 
4082 /* Only one of --only-matching, --output, --file-offsets, or --line-offsets is
4083 permitted. They display, each in their own way, only the data that has matched.
4084 */
4085 
4086 only_matching_count = (only_matching != NULL) + (output_text != NULL) +
4087   file_offsets + line_offsets;
4088 
4089 if (only_matching_count > 1)
4090   {
4091   fprintf(stderr, "pcre2grep: Cannot mix --only-matching, --output, "
4092     "--file-offsets and/or --line-offsets\n");
4093   pcre2grep_exit(usage(2));
4094   }
4095 
4096 
4097 /* Check that there is a big enough ovector for all -o settings. */
4098 
4099 for (om = only_matching; om != NULL; om = om->next)
4100   {
4101   int n = om->groupnum;
4102   if (n > (int)capture_max)
4103     {
4104     fprintf(stderr, "pcre2grep: Requested group %d cannot be captured.\n", n);
4105     fprintf(stderr, "pcre2grep: Use --om-capture to increase the size of the capture vector.\n");
4106     goto EXIT2;
4107     }
4108   }
4109 
4110 /* Check the text supplied to --output for errors. */
4111 
4112 if (output_text != NULL &&
4113     !syntax_check_output_text((PCRE2_SPTR)output_text, FALSE))
4114   goto EXIT2;
4115 
4116 /* Set up default compile and match contexts and a match data block. */
4117 
4118 offset_size = capture_max + 1;
4119 compile_context = pcre2_compile_context_create(NULL);
4120 match_context = pcre2_match_context_create(NULL);
4121 match_data = pcre2_match_data_create(offset_size, NULL);
4122 offsets = pcre2_get_ovector_pointer(match_data);
4123 
4124 /* If string (script) callouts are supported, set up the callout processing
4125 function. */
4126 
4127 #ifdef SUPPORT_PCRE2GREP_CALLOUT
4128 pcre2_set_callout(match_context, pcre2grep_callout, NULL);
4129 #endif
4130 
4131 /* Put limits into the match data block. */
4132 
4133 if (heap_limit != PCRE2_UNSET) pcre2_set_heap_limit(match_context, heap_limit);
4134 if (match_limit > 0) pcre2_set_match_limit(match_context, match_limit);
4135 if (depth_limit > 0) pcre2_set_depth_limit(match_context, depth_limit);
4136 
4137 /* If a locale has not been provided as an option, see if the LC_CTYPE or
4138 LC_ALL environment variable is set, and if so, use it. */
4139 
4140 if (locale == NULL)
4141   {
4142   locale = getenv("LC_ALL");
4143   locale_from = "LC_ALL";
4144   }
4145 
4146 if (locale == NULL)
4147   {
4148   locale = getenv("LC_CTYPE");
4149   locale_from = "LC_CTYPE";
4150   }
4151 
4152 /* If a locale is set, use it to generate the tables the PCRE needs. Passing
4153 NULL to pcre2_maketables() means that malloc() is used to get the memory. */
4154 
4155 if (locale != NULL)
4156   {
4157   if (setlocale(LC_CTYPE, locale) == NULL)
4158     {
4159     fprintf(stderr, "pcre2grep: Failed to set locale %s (obtained from %s)\n",
4160       locale, locale_from);
4161     goto EXIT2;
4162     }
4163   character_tables = pcre2_maketables(NULL);
4164   pcre2_set_character_tables(compile_context, character_tables);
4165   }
4166 
4167 /* Sort out colouring */
4168 
4169 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
4170   {
4171   if (strcmp(colour_option, "always") == 0)
4172 #ifdef WIN32
4173     do_ansi = !is_stdout_tty(),
4174 #endif
4175     do_colour = TRUE;
4176   else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
4177   else
4178     {
4179     fprintf(stderr, "pcre2grep: Unknown colour setting \"%s\"\n",
4180       colour_option);
4181     goto EXIT2;
4182     }
4183   if (do_colour)
4184     {
4185     char *cs = getenv("PCRE2GREP_COLOUR");
4186     if (cs == NULL) cs = getenv("PCRE2GREP_COLOR");
4187     if (cs == NULL) cs = getenv("PCREGREP_COLOUR");
4188     if (cs == NULL) cs = getenv("PCREGREP_COLOR");
4189     if (cs == NULL) cs = parse_grep_colors(getenv("GREP_COLORS"));
4190     if (cs == NULL) cs = getenv("GREP_COLOR");
4191     if (cs != NULL)
4192       {
4193       if (strspn(cs, ";0123456789") == strlen(cs)) colour_string = cs;
4194       }
4195 #ifdef WIN32
4196     init_colour_output();
4197 #endif
4198     }
4199   }
4200 
4201 /* Sort out a newline setting. */
4202 
4203 if (newline_arg != NULL)
4204   {
4205   for (endlinetype = 1; endlinetype < (int)(sizeof(newlines)/sizeof(char *));
4206        endlinetype++)
4207     {
4208     if (strcmpic(newline_arg, newlines[endlinetype]) == 0) break;
4209     }
4210   if (endlinetype < (int)(sizeof(newlines)/sizeof(char *)))
4211     pcre2_set_newline(compile_context, endlinetype);
4212   else
4213     {
4214     fprintf(stderr, "pcre2grep: Invalid newline specifier \"%s\"\n",
4215       newline_arg);
4216     goto EXIT2;
4217     }
4218   }
4219 
4220 /* Find default newline convention */
4221 
4222 else
4223   {
4224   (void)pcre2_config(PCRE2_CONFIG_NEWLINE, &endlinetype);
4225   }
4226 
4227 /* Interpret the text values for -d and -D */
4228 
4229 if (dee_option != NULL)
4230   {
4231   if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
4232   else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
4233   else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
4234   else
4235     {
4236     fprintf(stderr, "pcre2grep: Invalid value \"%s\" for -d\n", dee_option);
4237     goto EXIT2;
4238     }
4239   }
4240 
4241 if (DEE_option != NULL)
4242   {
4243   if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
4244   else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
4245   else
4246     {
4247     fprintf(stderr, "pcre2grep: Invalid value \"%s\" for -D\n", DEE_option);
4248     goto EXIT2;
4249     }
4250   }
4251 
4252 /* Set the extra options */
4253 
4254 (void)pcre2_set_compile_extra_options(compile_context, extra_options);
4255 
4256 /* Check the values for Jeffrey Friedl's debugging options. */
4257 
4258 /* If use_jit is set, check whether JIT is available. If not, do not try
4259 to use JIT. */
4260 
4261 if (use_jit)
4262   {
4263   uint32_t answer;
4264   (void)pcre2_config(PCRE2_CONFIG_JIT, &answer);
4265   if (!answer) use_jit = FALSE;
4266   }
4267 
4268 /* Get memory for the main buffer. */
4269 
4270 if (bufthird <= 0)
4271   {
4272   fprintf(stderr, "pcre2grep: --buffer-size must be greater than zero\n");
4273   goto EXIT2;
4274   }
4275 
4276 bufsize = 3*bufthird;
4277 main_buffer = (char *)malloc(bufsize);
4278 
4279 if (main_buffer == NULL)
4280   {
4281   fprintf(stderr, "pcre2grep: malloc failed\n");
4282   goto EXIT2;
4283   }
4284 
4285 /* If no patterns were provided by -e, and there are no files provided by -f,
4286 the first argument is the one and only pattern, and it must exist. */
4287 
4288 if (patterns == NULL && pattern_files == NULL)
4289   {
4290   if (i >= argc) return usage(2);
4291   patterns = patterns_last = add_pattern(argv[i], (PCRE2_SIZE)strlen(argv[i]),
4292     NULL);
4293   i++;
4294   if (patterns == NULL) goto EXIT2;
4295   }
4296 
4297 /* Compile the patterns that were provided on the command line, either by
4298 multiple uses of -e or as a single unkeyed pattern. We cannot do this until
4299 after all the command-line options are read so that we know which PCRE options
4300 to use. When -F is used, compile_pattern() may add another block into the
4301 chain, so we must not access the next pointer till after the compile. */
4302 
4303 for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
4304   {
4305   if (!compile_pattern(cp, pcre2_options, FALSE, "command-line",
4306        (j == 1 && patterns->next == NULL)? 0 : j))
4307     goto EXIT2;
4308   }
4309 
4310 /* Read and compile the regular expressions that are provided in files. */
4311 
4312 for (fn = pattern_files; fn != NULL; fn = fn->next)
4313   {
4314   if (!read_pattern_file(fn->name, &patterns, &patterns_last)) goto EXIT2;
4315   }
4316 
4317 /* Unless JIT has been explicitly disabled, arrange a stack for it to use. */
4318 
4319 #ifdef SUPPORT_PCRE2GREP_JIT
4320 if (use_jit)
4321   {
4322   jit_stack = pcre2_jit_stack_create(32*1024, 1024*1024, NULL);
4323   if (jit_stack != NULL                        )
4324     pcre2_jit_stack_assign(match_context, NULL, jit_stack);
4325   }
4326 #endif
4327 
4328 /* -F, -w, and -x do not apply to include or exclude patterns, so we must
4329 adjust the options. */
4330 
4331 pcre2_options &= ~PCRE2_LITERAL;
4332 (void)pcre2_set_compile_extra_options(compile_context, 0);
4333 
4334 /* If there are include or exclude patterns read from the command line, compile
4335 them. */
4336 
4337 for (j = 0; j < 4; j++)
4338   {
4339   int k;
4340   for (k = 1, cp = *(incexlist[j]); cp != NULL; k++, cp = cp->next)
4341     {
4342     if (!compile_pattern(cp, pcre2_options, FALSE, incexname[j],
4343          (k == 1 && cp->next == NULL)? 0 : k))
4344       goto EXIT2;
4345     }
4346   }
4347 
4348 /* Read and compile include/exclude patterns from files. */
4349 
4350 for (fn = include_from; fn != NULL; fn = fn->next)
4351   {
4352   if (!read_pattern_file(fn->name, &include_patterns, &include_patterns_last))
4353     goto EXIT2;
4354   }
4355 
4356 for (fn = exclude_from; fn != NULL; fn = fn->next)
4357   {
4358   if (!read_pattern_file(fn->name, &exclude_patterns, &exclude_patterns_last))
4359     goto EXIT2;
4360   }
4361 
4362 /* If there are no files that contain lists of files to search, and there are
4363 no file arguments, search stdin, and then exit. */
4364 
4365 if (file_lists == NULL && i >= argc)
4366   {
4367   /* Using a buffered stdin, that then is seek is not portable,
4368      so attempt to remove the buffer, to workaround reported issues
4369      affecting several BSD and AIX */
4370   if (count_limit >= 0)
4371     setbuf(stdin, NULL);
4372   rc = pcre2grep(stdin, FR_PLAIN, stdin_name,
4373     (filenames > FN_DEFAULT)? stdin_name : NULL);
4374   goto EXIT;
4375   }
4376 
4377 /* If any files that contains a list of files to search have been specified,
4378 read them line by line and search the given files. */
4379 
4380 for (fn = file_lists; fn != NULL; fn = fn->next)
4381   {
4382   char buffer[FNBUFSIZ];
4383   FILE *fl;
4384   if (strcmp(fn->name, "-") == 0) fl = stdin; else
4385     {
4386     fl = fopen(fn->name, "rb");
4387     if (fl == NULL)
4388       {
4389       fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", fn->name,
4390         strerror(errno));
4391       goto EXIT2;
4392       }
4393     }
4394   while (fgets(buffer, sizeof(buffer), fl) != NULL)
4395     {
4396     int frc;
4397     char *end = buffer + (int)strlen(buffer);
4398     while (end > buffer && isspace(end[-1])) end--;
4399     *end = 0;
4400     if (*buffer != 0)
4401       {
4402       frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);
4403       if (frc > 1) rc = frc;
4404         else if (frc == 0 && rc == 1) rc = 0;
4405       }
4406     }
4407   if (fl != stdin) fclose(fl);
4408   }
4409 
4410 /* After handling file-list, work through remaining arguments. Pass in the fact
4411 that there is only one argument at top level - this suppresses the file name if
4412 the argument is not a directory and filenames are not otherwise forced. */
4413 
4414 only_one_at_top = i == argc - 1 && file_lists == NULL;
4415 
4416 for (; i < argc; i++)
4417   {
4418   int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
4419     only_one_at_top);
4420   if (frc > 1) rc = frc;
4421     else if (frc == 0 && rc == 1) rc = 0;
4422   }
4423 
4424 #ifdef SUPPORT_PCRE2GREP_CALLOUT
4425 /* If separating builtin echo callouts by implicit newline, add one more for
4426 the final item. */
4427 
4428 if (om_separator != NULL && strcmp(om_separator, STDOUT_NL) == 0)
4429   fprintf(stdout, STDOUT_NL);
4430 #endif
4431 
4432 /* Show the total number of matches if requested, but not if only one file's
4433 count was printed. */
4434 
4435 if (show_total_count && counts_printed != 1 && filenames != FN_NOMATCH_ONLY)
4436   {
4437   if (counts_printed != 0 && filenames >= FN_DEFAULT)
4438     fprintf(stdout, "TOTAL:");
4439   fprintf(stdout, "%lu" STDOUT_NL, total_count);
4440   }
4441 
4442 EXIT:
4443 #ifdef SUPPORT_PCRE2GREP_JIT
4444 pcre2_jit_free_unused_memory(NULL);
4445 if (jit_stack != NULL) pcre2_jit_stack_free(jit_stack);
4446 #endif
4447 
4448 free(main_buffer);
4449 if (character_tables != NULL) pcre2_maketables_free(NULL, character_tables);
4450 
4451 pcre2_compile_context_free(compile_context);
4452 pcre2_match_context_free(match_context);
4453 pcre2_match_data_free(match_data);
4454 
4455 free_pattern_chain(patterns);
4456 free_pattern_chain(include_patterns);
4457 free_pattern_chain(include_dir_patterns);
4458 free_pattern_chain(exclude_patterns);
4459 free_pattern_chain(exclude_dir_patterns);
4460 
4461 free_file_chain(exclude_from);
4462 free_file_chain(include_from);
4463 free_file_chain(pattern_files);
4464 free_file_chain(file_lists);
4465 
4466 while (only_matching != NULL)
4467   {
4468   omstr *this = only_matching;
4469   only_matching = this->next;
4470   free(this);
4471   }
4472 
4473 pcre2grep_exit(rc);
4474 
4475 EXIT2:
4476 rc = 2;
4477 goto EXIT;
4478 }
4479 
4480 /* End of pcre2grep */
4481