1 /*************************************************
2 * pcre2grep program *
3 *************************************************/
4
5 /* This is a grep program that uses the 8-bit PCRE regular expression library
6 via the PCRE2 updated API to do its pattern matching. On Unix-like, Windows,
7 and native z/OS systems it can recurse into directories, and in z/OS it can
8 handle PDS files.
9
10 Note that for native z/OS, in addition to defining the NATIVE_ZOS macro, an
11 additional header is required. That header is not included in the main PCRE2
12 distribution because other apparatus is needed to compile pcre2grep for z/OS.
13 The header can be found in the special z/OS distribution, which is available
14 from www.zaconsultants.net or from www.cbttape.org.
15
16 Copyright (c) 1997-2022 University of Cambridge
17
18 -----------------------------------------------------------------------------
19 Redistribution and use in source and binary forms, with or without
20 modification, are permitted provided that the following conditions are met:
21
22 * Redistributions of source code must retain the above copyright notice,
23 this list of conditions and the following disclaimer.
24
25 * Redistributions in binary form must reproduce the above copyright
26 notice, this list of conditions and the following disclaimer in the
27 documentation and/or other materials provided with the distribution.
28
29 * Neither the name of the University of Cambridge nor the names of its
30 contributors may be used to endorse or promote products derived from
31 this software without specific prior written permission.
32
33 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
34 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
35 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
37 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
38 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
39 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
40 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
41 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
43 POSSIBILITY OF SUCH DAMAGE.
44 -----------------------------------------------------------------------------
45 */
46
47 #ifdef HAVE_CONFIG_H
48 #include "config.h"
49 #endif
50
51 #include <ctype.h>
52 #include <locale.h>
53 #include <stdio.h>
54 #include <string.h>
55 #include <stdlib.h>
56 #include <errno.h>
57
58 #include <sys/types.h>
59 #include <sys/stat.h>
60
61 #if (defined _WIN32 || (defined HAVE_WINDOWS_H && HAVE_WINDOWS_H)) \
62 && !defined WIN32 && !defined(__CYGWIN__)
63 #define WIN32
64 #endif
65
66 /* Some CMake's define it still */
67 #if defined(__CYGWIN__) && defined(WIN32)
68 #undef WIN32
69 #endif
70
71 #ifdef __VMS
72 #include clidef
73 #include descrip
74 #include lib$routines
75 #endif
76
77 #ifdef WIN32
78 #include <io.h> /* For _setmode() */
79 #include <fcntl.h> /* For _O_BINARY */
80 #endif
81
82 #if defined(SUPPORT_PCRE2GREP_CALLOUT) && defined(SUPPORT_PCRE2GREP_CALLOUT_FORK)
83 #ifdef WIN32
84 #include <process.h>
85 #else
86 #include <sys/wait.h>
87 #endif
88 #endif
89
90 #ifdef HAVE_UNISTD_H
91 #include <unistd.h>
92 #endif
93
94 #ifdef SUPPORT_LIBZ
95 #include <zlib.h>
96 #endif
97
98 #ifdef SUPPORT_LIBBZ2
99 #include <bzlib.h>
100 #endif
101
102 #define PCRE2_CODE_UNIT_WIDTH 8
103 #include "pcre2.h"
104
105 /* Older versions of MSVC lack snprintf(). This define allows for
106 warning/error-free compilation and testing with MSVC compilers back to at least
107 MSVC 10/2010. Except for VC6 (which is missing some fundamentals and fails). */
108
109 #if defined(_MSC_VER) && (_MSC_VER < 1900)
110 #define snprintf _snprintf
111 #endif
112
113 /* old VC and older compilers don't support %td or %zu, and even some that claim to
114 be C99 don't support it (hence DISABLE_PERCENT_ZT). */
115
116 #if defined(DISABLE_PERCENT_ZT) || (defined(_MSC_VER) && (_MSC_VER < 1800)) || \
117 (!defined(_MSC_VER) && (!defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L))
118 #ifdef _WIN64
119 #define SIZ_FORM "llu"
120 #else
121 #define SIZ_FORM "lu"
122 #endif
123 #else
124 #define SIZ_FORM "zu"
125 #endif
126
127 #define FALSE 0
128 #define TRUE 1
129
130 typedef int BOOL;
131
132 #define DEFAULT_CAPTURE_MAX 50
133
134 #if BUFSIZ > 8192
135 #define MAXPATLEN BUFSIZ
136 #else
137 #define MAXPATLEN 8192
138 #endif
139
140 #define FNBUFSIZ 2048
141 #define ERRBUFSIZ 256
142
143 /* Values for the "filenames" variable, which specifies options for file name
144 output. The order is important; it is assumed that a file name is wanted for
145 all values greater than FN_DEFAULT. */
146
147 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
148
149 /* File reading styles */
150
151 enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
152
153 /* Actions for the -d and -D options */
154
155 enum { dee_READ, dee_SKIP, dee_RECURSE };
156 enum { DEE_READ, DEE_SKIP };
157
158 /* Actions for special processing options (flag bits) */
159
160 #define PO_WORD_MATCH 0x0001
161 #define PO_LINE_MATCH 0x0002
162 #define PO_FIXED_STRINGS 0x0004
163
164 /* Binary file options */
165
166 enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT };
167
168 /* Return values from decode_dollar_escape() */
169
170 enum { DDE_ERROR, DDE_CAPTURE, DDE_CHAR };
171
172 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
173 environments), a warning is issued if the value of fwrite() is ignored.
174 Unfortunately, casting to (void) does not suppress the warning. To get round
175 this, we use a macro that compiles a fudge. Oddly, this does not also seem to
176 apply to fprintf(). */
177
178 #define FWRITE_IGNORE(a,b,c,d) if (fwrite(a,b,c,d)) {}
179
180 /* Under Windows, we have to set stdout to be binary, so that it does not
181 convert \r\n at the ends of output lines to \r\r\n. However, that means that
182 any messages written to stdout must have \r\n as their line terminator. This is
183 handled by using STDOUT_NL as the newline string. We also use a normal double
184 quote for the example, as single quotes aren't usually available. */
185
186 #ifdef WIN32
187 #define STDOUT_NL "\r\n"
188 #define STDOUT_NL_LEN 2
189 #define QUOT "\""
190 #else
191 #define STDOUT_NL "\n"
192 #define STDOUT_NL_LEN 1
193 #define QUOT "'"
194 #endif
195
196 /* This code is returned from decode_dollar_escape() when $n is encountered,
197 and used to mean "output STDOUT_NL". It is, of course, not a valid Unicode code
198 point. */
199
200 #define STDOUT_NL_CODE 0x7fffffffu
201
202
203
204 /*************************************************
205 * Global variables *
206 *************************************************/
207
208 static const char *colour_string = "1;31";
209 static const char *colour_option = NULL;
210 static const char *dee_option = NULL;
211 static const char *DEE_option = NULL;
212 static const char *locale = NULL;
213 static const char *newline_arg = NULL;
214 static const char *om_separator = NULL;
215 static const char *stdin_name = "(standard input)";
216 static const char *output_text = NULL;
217
218 static char *main_buffer = NULL;
219
220 static const char *printname_nl = STDOUT_NL; /* Changed to NULL for -Z */
221 static int printname_colon = ':'; /* Changed to 0 for -Z */
222 static int printname_hyphen = '-'; /* Changed to 0 for -Z */
223
224 static int after_context = 0;
225 static int before_context = 0;
226 static int binary_files = BIN_BINARY;
227 static int both_context = 0;
228 static int endlinetype;
229
230 static int count_limit = -1; /* Not long, so that it works with OP_NUMBER */
231 static unsigned long int counts_printed = 0;
232 static unsigned long int total_count = 0;
233
234 static PCRE2_SIZE bufthird = PCRE2GREP_BUFSIZE;
235 static PCRE2_SIZE max_bufthird = PCRE2GREP_MAX_BUFSIZE;
236 static PCRE2_SIZE bufsize = 3*PCRE2GREP_BUFSIZE;
237
238 #ifdef WIN32
239 static int dee_action = dee_SKIP;
240 #else
241 static int dee_action = dee_READ;
242 #endif
243
244 static int DEE_action = DEE_READ;
245 static int error_count = 0;
246 static int filenames = FN_DEFAULT;
247
248 #ifdef SUPPORT_PCRE2GREP_JIT
249 static BOOL use_jit = TRUE;
250 #else
251 static BOOL use_jit = FALSE;
252 #endif
253
254 static const uint8_t *character_tables = NULL;
255
256 static uint32_t pcre2_options = 0;
257 static uint32_t extra_options = 0;
258 static PCRE2_SIZE heap_limit = PCRE2_UNSET;
259 static uint32_t match_limit = 0;
260 static uint32_t depth_limit = 0;
261
262 static pcre2_compile_context *compile_context;
263 static pcre2_match_context *match_context;
264 static pcre2_match_data *match_data, *match_data_pair[2];
265 static PCRE2_SIZE *offsets, *offsets_pair[2];
266 static int match_data_toggle;
267 static uint32_t offset_size;
268 static uint32_t capture_max = DEFAULT_CAPTURE_MAX;
269
270 static BOOL all_matches = FALSE;
271 static BOOL count_only = FALSE;
272 static BOOL do_colour = FALSE;
273 #ifdef WIN32
274 static BOOL do_ansi = FALSE;
275 #endif
276 static BOOL file_offsets = FALSE;
277 static BOOL hyphenpending = FALSE;
278 static BOOL invert = FALSE;
279 static BOOL line_buffered = FALSE;
280 static BOOL line_offsets = FALSE;
281 static BOOL multiline = FALSE;
282 static BOOL number = FALSE;
283 static BOOL omit_zero_count = FALSE;
284 static BOOL resource_error = FALSE;
285 static BOOL quiet = FALSE;
286 static BOOL show_total_count = FALSE;
287 static BOOL silent = FALSE;
288 static BOOL utf = FALSE;
289
290 static uint8_t utf8_buffer[8];
291
292
293 /* Structure for list of --only-matching capturing numbers. */
294
295 typedef struct omstr {
296 struct omstr *next;
297 int groupnum;
298 } omstr;
299
300 static omstr *only_matching = NULL;
301 static omstr *only_matching_last = NULL;
302 static int only_matching_count;
303
304 /* Structure for holding the two variables that describe a number chain. */
305
306 typedef struct omdatastr {
307 omstr **anchor;
308 omstr **lastptr;
309 } omdatastr;
310
311 static omdatastr only_matching_data = { &only_matching, &only_matching_last };
312
313 /* Structure for list of file names (for -f and --{in,ex}clude-from) */
314
315 typedef struct fnstr {
316 struct fnstr *next;
317 char *name;
318 } fnstr;
319
320 static fnstr *exclude_from = NULL;
321 static fnstr *exclude_from_last = NULL;
322 static fnstr *include_from = NULL;
323 static fnstr *include_from_last = NULL;
324
325 static fnstr *file_lists = NULL;
326 static fnstr *file_lists_last = NULL;
327 static fnstr *pattern_files = NULL;
328 static fnstr *pattern_files_last = NULL;
329
330 /* Structure for holding the two variables that describe a file name chain. */
331
332 typedef struct fndatastr {
333 fnstr **anchor;
334 fnstr **lastptr;
335 } fndatastr;
336
337 static fndatastr exclude_from_data = { &exclude_from, &exclude_from_last };
338 static fndatastr include_from_data = { &include_from, &include_from_last };
339 static fndatastr file_lists_data = { &file_lists, &file_lists_last };
340 static fndatastr pattern_files_data = { &pattern_files, &pattern_files_last };
341
342 /* Structure for pattern and its compiled form; used for matching patterns and
343 also for include/exclude patterns. */
344
345 typedef struct patstr {
346 struct patstr *next;
347 char *string;
348 PCRE2_SIZE length;
349 pcre2_code *compiled;
350 } patstr;
351
352 static patstr *patterns = NULL;
353 static patstr *patterns_last = NULL;
354 static patstr *include_patterns = NULL;
355 static patstr *include_patterns_last = NULL;
356 static patstr *exclude_patterns = NULL;
357 static patstr *exclude_patterns_last = NULL;
358 static patstr *include_dir_patterns = NULL;
359 static patstr *include_dir_patterns_last = NULL;
360 static patstr *exclude_dir_patterns = NULL;
361 static patstr *exclude_dir_patterns_last = NULL;
362
363 /* Structure holding the two variables that describe a pattern chain. A pointer
364 to such structures is used for each appropriate option. */
365
366 typedef struct patdatastr {
367 patstr **anchor;
368 patstr **lastptr;
369 } patdatastr;
370
371 static patdatastr match_patdata = { &patterns, &patterns_last };
372 static patdatastr include_patdata = { &include_patterns, &include_patterns_last };
373 static patdatastr exclude_patdata = { &exclude_patterns, &exclude_patterns_last };
374 static patdatastr include_dir_patdata = { &include_dir_patterns, &include_dir_patterns_last };
375 static patdatastr exclude_dir_patdata = { &exclude_dir_patterns, &exclude_dir_patterns_last };
376
377 static patstr **incexlist[4] = { &include_patterns, &exclude_patterns,
378 &include_dir_patterns, &exclude_dir_patterns };
379
380 static const char *incexname[4] = { "--include", "--exclude",
381 "--include-dir", "--exclude-dir" };
382
383 /* Structure for options and list of them */
384
385 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_U32NUMBER, OP_SIZE,
386 OP_OP_NUMBER, OP_OP_NUMBERS, OP_PATLIST, OP_FILELIST, OP_BINFILES };
387
388 typedef struct option_item {
389 int type;
390 int one_char;
391 void *dataptr;
392 const char *long_name;
393 const char *help_text;
394 } option_item;
395
396 /* Options without a single-letter equivalent get a negative value. This can be
397 used to identify them. */
398
399 #define N_COLOUR (-1)
400 #define N_EXCLUDE (-2)
401 #define N_EXCLUDE_DIR (-3)
402 #define N_HELP (-4)
403 #define N_INCLUDE (-5)
404 #define N_INCLUDE_DIR (-6)
405 #define N_LABEL (-7)
406 #define N_LOCALE (-8)
407 #define N_NULL (-9)
408 #define N_LOFFSETS (-10)
409 #define N_FOFFSETS (-11)
410 #define N_LBUFFER (-12)
411 #define N_H_LIMIT (-13)
412 #define N_M_LIMIT (-14)
413 #define N_M_LIMIT_DEP (-15)
414 #define N_BUFSIZE (-16)
415 #define N_NOJIT (-17)
416 #define N_FILE_LIST (-18)
417 #define N_BINARY_FILES (-19)
418 #define N_EXCLUDE_FROM (-20)
419 #define N_INCLUDE_FROM (-21)
420 #define N_OM_SEPARATOR (-22)
421 #define N_MAX_BUFSIZE (-23)
422 #define N_OM_CAPTURE (-24)
423 #define N_ALLABSK (-25)
424
425 static option_item optionlist[] = {
426 { OP_NODATA, N_NULL, NULL, "", "terminate options" },
427 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
428 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
429 { OP_NODATA, 'a', NULL, "text", "treat binary files as text" },
430 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
431 { OP_BINFILES, N_BINARY_FILES, NULL, "binary-files=word", "set treatment of binary files" },
432 { OP_SIZE, N_BUFSIZE,&bufthird, "buffer-size=number", "set processing buffer starting size" },
433 { OP_SIZE, N_MAX_BUFSIZE,&max_bufthird, "max-buffer-size=number", "set processing buffer maximum size" },
434 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
435 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
436 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
437 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
438 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
439 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
440 { OP_PATLIST, 'e', &match_patdata, "regex(p)=pattern", "specify pattern (may be used more than once)" },
441 { OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" },
442 { OP_FILELIST, 'f', &pattern_files_data, "file=path", "read patterns from file" },
443 { OP_FILELIST, N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" },
444 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
445 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
446 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
447 { OP_NODATA, 'I', NULL, "", "treat binary files as not matching (ignore)" },
448 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
449 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
450 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
451 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
452 { OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" },
453 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
454 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
455 { OP_SIZE, N_H_LIMIT, &heap_limit, "heap-limit=number", "set PCRE2 heap limit option (kibibytes)" },
456 { OP_U32NUMBER, N_M_LIMIT, &match_limit, "match-limit=number", "set PCRE2 match limit option" },
457 { OP_U32NUMBER, N_M_LIMIT_DEP, &depth_limit, "depth-limit=number", "set PCRE2 depth limit option" },
458 { OP_U32NUMBER, N_M_LIMIT_DEP, &depth_limit, "recursion-limit=number", "obsolete synonym for depth-limit" },
459 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
460 { OP_NUMBER, 'm', &count_limit, "max-count=number", "stop after <number> matched lines" },
461 { OP_STRING, 'N', &newline_arg, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF, ANY, or NUL)" },
462 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
463 #ifdef SUPPORT_PCRE2GREP_JIT
464 { OP_NODATA, N_NOJIT, NULL, "no-jit", "do not use just-in-time compiler optimization" },
465 #else
466 { OP_NODATA, N_NOJIT, NULL, "no-jit", "ignored: this pcre2grep does not support JIT" },
467 #endif
468 { OP_STRING, 'O', &output_text, "output=text", "show only this text (possibly expanded)" },
469 { OP_OP_NUMBERS, 'o', &only_matching_data, "only-matching=n", "show only the part of the line that matched" },
470 { OP_STRING, N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" },
471 { OP_U32NUMBER, N_OM_CAPTURE, &capture_max, "om-capture=n", "set capture count for --only-matching" },
472 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
473 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
474 { OP_PATLIST, N_EXCLUDE,&exclude_patdata, "exclude=pattern","exclude matching files when recursing" },
475 { OP_PATLIST, N_INCLUDE,&include_patdata, "include=pattern","include matching files when recursing" },
476 { OP_PATLIST, N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude-dir=pattern","exclude matching directories when recursing" },
477 { OP_PATLIST, N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" },
478 { OP_FILELIST, N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" },
479 { OP_FILELIST, N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" },
480 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
481 { OP_NODATA, 't', NULL, "total-count", "print total count of matching lines" },
482 { OP_NODATA, 'u', NULL, "utf", "use UTF mode" },
483 { OP_NODATA, 'U', NULL, "utf-allow-invalid", "use UTF mode, allow for invalid code units" },
484 { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
485 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
486 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
487 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
488 { OP_NODATA, N_ALLABSK, NULL, "allow-lookaround-bsk", "allow \\K in lookarounds" },
489 { OP_NODATA, 'Z', NULL, "null", "output 0 byte after file names" },
490 { OP_NODATA, 0, NULL, NULL, NULL }
491 };
492
493 /* Table of names for newline types. Must be kept in step with the definitions
494 of PCRE2_NEWLINE_xx in pcre2.h. */
495
496 static const char *newlines[] = {
497 "DEFAULT", "CR", "LF", "CRLF", "ANY", "ANYCRLF", "NUL" };
498
499 /* UTF-8 tables */
500
501 const int utf8_table1[] =
502 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};
503 const int utf8_table1_size = sizeof(utf8_table1) / sizeof(int);
504
505 const int utf8_table2[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
506 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
507
508 const char utf8_table4[] = {
509 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
510 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
511 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
512 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
513
514
515 #if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE)
516 /*************************************************
517 * Emulated memmove() for systems without it *
518 *************************************************/
519
520 /* This function can make use of bcopy() if it is available. Otherwise do it by
521 steam, as there are some non-Unix environments that lack both memmove() and
522 bcopy(). */
523
524 static void *
emulated_memmove(void * d,const void * s,size_t n)525 emulated_memmove(void *d, const void *s, size_t n)
526 {
527 #ifdef HAVE_BCOPY
528 bcopy(s, d, n);
529 return d;
530 #else
531 size_t i;
532 unsigned char *dest = (unsigned char *)d;
533 const unsigned char *src = (const unsigned char *)s;
534 if (dest > src)
535 {
536 dest += n;
537 src += n;
538 for (i = 0; i < n; ++i) *(--dest) = *(--src);
539 return (void *)dest;
540 }
541 else
542 {
543 for (i = 0; i < n; ++i) *dest++ = *src++;
544 return (void *)(dest - n);
545 }
546 #endif /* not HAVE_BCOPY */
547 }
548 #undef memmove
549 #define memmove(d,s,n) emulated_memmove(d,s,n)
550 #endif /* not VPCOMPAT && not HAVE_MEMMOVE */
551
552
553
554 /*************************************************
555 * Convert code point to UTF-8 *
556 *************************************************/
557
558 /* A static buffer is used. Returns the number of bytes. */
559
560 static int
ord2utf8(uint32_t value)561 ord2utf8(uint32_t value)
562 {
563 int i, j;
564 uint8_t *utf8bytes = utf8_buffer;
565 for (i = 0; i < utf8_table1_size; i++)
566 if (value <= (uint32_t)utf8_table1[i]) break;
567 utf8bytes += i;
568 for (j = i; j > 0; j--)
569 {
570 *utf8bytes-- = 0x80 | (value & 0x3f);
571 value >>= 6;
572 }
573 *utf8bytes = utf8_table2[i] | value;
574 return i + 1;
575 }
576
577
578
579 /*************************************************
580 * Case-independent string compare *
581 *************************************************/
582
583 static int
strcmpic(const char * str1,const char * str2)584 strcmpic(const char *str1, const char *str2)
585 {
586 unsigned int c1, c2;
587 while (*str1 != '\0' || *str2 != '\0')
588 {
589 c1 = tolower(*str1++);
590 c2 = tolower(*str2++);
591 if (c1 != c2) return ((c1 > c2) << 1) - 1;
592 }
593 return 0;
594 }
595
596
597 /*************************************************
598 * Parse GREP_COLORS *
599 *************************************************/
600
601 /* Extract ms or mt from GREP_COLORS.
602
603 Argument: the string, possibly NULL
604 Returns: the value of ms or mt, or NULL if neither present
605 */
606
607 static char *
parse_grep_colors(const char * gc)608 parse_grep_colors(const char *gc)
609 {
610 static char seq[16];
611 char *col;
612 uint32_t len;
613 if (gc == NULL) return NULL;
614 col = strstr(gc, "ms=");
615 if (col == NULL) col = strstr(gc, "mt=");
616 if (col == NULL) return NULL;
617 len = 0;
618 col += 3;
619 while (*col != ':' && *col != 0 && len < sizeof(seq)-1)
620 seq[len++] = *col++;
621 seq[len] = 0;
622 return seq;
623 }
624
625
626 /*************************************************
627 * Exit from the program *
628 *************************************************/
629
630 /* If there has been a resource error, give a suitable message.
631
632 Argument: the return code
633 Returns: does not return
634 */
635
636 static void
pcre2grep_exit(int rc)637 pcre2grep_exit(int rc)
638 {
639 /* VMS does exit codes differently: both exit(1) and exit(0) return with a
640 status of 1, which is not helpful. To help with this problem, define a symbol
641 (akin to an environment variable) called "PCRE2GREP_RC" and put the exit code
642 therein. */
643
644 #ifdef __VMS
645 char val_buf[4];
646 $DESCRIPTOR(sym_nam, "PCRE2GREP_RC");
647 $DESCRIPTOR(sym_val, val_buf);
648 sprintf(val_buf, "%d", rc);
649 sym_val.dsc$w_length = strlen(val_buf);
650 lib$set_symbol(&sym_nam, &sym_val);
651 #endif
652
653 if (resource_error)
654 {
655 fprintf(stderr, "pcre2grep: Error %d, %d, %d or %d means that a resource "
656 "limit was exceeded.\n", PCRE2_ERROR_JIT_STACKLIMIT, PCRE2_ERROR_MATCHLIMIT,
657 PCRE2_ERROR_DEPTHLIMIT, PCRE2_ERROR_HEAPLIMIT);
658 fprintf(stderr, "pcre2grep: Check your regex for nested unlimited loops.\n");
659 }
660 exit(rc);
661 }
662
663
664 /*************************************************
665 * Add item to chain of patterns *
666 *************************************************/
667
668 /* Used to add an item onto a chain, or just return an unconnected item if the
669 "after" argument is NULL.
670
671 Arguments:
672 s pattern string to add
673 patlen length of pattern
674 after if not NULL points to item to insert after
675
676 Returns: new pattern block or NULL on error
677 */
678
679 static patstr *
add_pattern(char * s,PCRE2_SIZE patlen,patstr * after)680 add_pattern(char *s, PCRE2_SIZE patlen, patstr *after)
681 {
682 patstr *p = (patstr *)malloc(sizeof(patstr));
683
684 /* LCOV_EXCL_START - These won't be hit in normal testing. */
685
686 if (p == NULL)
687 {
688 fprintf(stderr, "pcre2grep: malloc failed\n");
689 pcre2grep_exit(2);
690 }
691 if (patlen > MAXPATLEN)
692 {
693 fprintf(stderr, "pcre2grep: pattern is too long (limit is %d bytes)\n",
694 MAXPATLEN);
695 free(p);
696 return NULL;
697 }
698
699 /* LCOV_EXCL_STOP */
700
701 p->next = NULL;
702 p->string = s;
703 p->length = patlen;
704 p->compiled = NULL;
705
706 if (after != NULL)
707 {
708 p->next = after->next;
709 after->next = p;
710 }
711 return p;
712 }
713
714
715 /*************************************************
716 * Free chain of patterns *
717 *************************************************/
718
719 /* Used for several chains of patterns.
720
721 Argument: pointer to start of chain
722 Returns: nothing
723 */
724
725 static void
free_pattern_chain(patstr * pc)726 free_pattern_chain(patstr *pc)
727 {
728 while (pc != NULL)
729 {
730 patstr *p = pc;
731 pc = p->next;
732 if (p->compiled != NULL) pcre2_code_free(p->compiled);
733 free(p);
734 }
735 }
736
737
738 /*************************************************
739 * Free chain of file names *
740 *************************************************/
741
742 /*
743 Argument: pointer to start of chain
744 Returns: nothing
745 */
746
747 static void
free_file_chain(fnstr * fn)748 free_file_chain(fnstr *fn)
749 {
750 while (fn != NULL)
751 {
752 fnstr *f = fn;
753 fn = f->next;
754 free(f);
755 }
756 }
757
758
759 /*************************************************
760 * OS-specific functions *
761 *************************************************/
762
763 /* These definitions are needed in all Windows environments, even those where
764 Unix-style directory scanning can be used (see below). */
765
766 #ifdef WIN32
767
768 #ifndef STRICT
769 # define STRICT
770 #endif
771 #ifndef WIN32_LEAN_AND_MEAN
772 # define WIN32_LEAN_AND_MEAN
773 #endif
774
775 #include <windows.h>
776
777 #define iswild(name) (strpbrk(name, "*?") != NULL)
778
779 /* Convert ANSI BGR format to RGB used by Windows */
780 #define BGR_RGB(x) ((x & 1 ? 4 : 0) | (x & 2) | (x & 4 ? 1 : 0))
781
782 static HANDLE hstdout;
783 static CONSOLE_SCREEN_BUFFER_INFO csbi;
784 static WORD match_colour;
785
786 static WORD
decode_ANSI_colour(const char * cs)787 decode_ANSI_colour(const char *cs)
788 {
789 WORD result = csbi.wAttributes;
790 while (*cs)
791 {
792 if (isdigit(*cs))
793 {
794 int code = atoi(cs);
795 if (code == 1) result |= 0x08;
796 else if (code == 4) result |= 0x8000;
797 else if (code == 5) result |= 0x80;
798 else if (code >= 30 && code <= 37) result = (result & 0xF8) | BGR_RGB(code - 30);
799 else if (code == 39) result = (result & 0xF0) | (csbi.wAttributes & 0x0F);
800 else if (code >= 40 && code <= 47) result = (result & 0x8F) | (BGR_RGB(code - 40) << 4);
801 else if (code == 49) result = (result & 0x0F) | (csbi.wAttributes & 0xF0);
802 /* aixterm high intensity colour codes */
803 else if (code >= 90 && code <= 97) result = (result & 0xF0) | BGR_RGB(code - 90) | 0x08;
804 else if (code >= 100 && code <= 107) result = (result & 0x0F) | (BGR_RGB(code - 100) << 4) | 0x80;
805
806 while (isdigit(*cs)) cs++;
807 }
808 if (*cs) cs++;
809 }
810 return result;
811 }
812
813
814 static void
init_colour_output()815 init_colour_output()
816 {
817 if (do_colour)
818 {
819 hstdout = GetStdHandle(STD_OUTPUT_HANDLE);
820 /* This fails when redirected to con; try again if so. */
821 if (!GetConsoleScreenBufferInfo(hstdout, &csbi) && !do_ansi)
822 {
823 HANDLE hcon = CreateFile("CONOUT$", GENERIC_READ | GENERIC_WRITE,
824 FILE_SHARE_WRITE, NULL, OPEN_EXISTING, 0, NULL);
825 GetConsoleScreenBufferInfo(hcon, &csbi);
826 CloseHandle(hcon);
827 }
828 match_colour = decode_ANSI_colour(colour_string);
829 /* No valid colour found - turn off colouring */
830 if (!match_colour) do_colour = FALSE;
831 }
832 }
833
834 #endif /* WIN32 */
835
836
837 /* The following sets of functions are defined so that they can be made system
838 specific. At present there are versions for Unix-style environments, Windows,
839 native z/OS, and "no support". */
840
841
842 /************* Directory scanning Unix-style and z/OS ***********/
843
844 #if (defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H) || defined NATIVE_ZOS
845 #include <sys/types.h>
846 #include <sys/stat.h>
847 #include <dirent.h>
848
849 #if defined NATIVE_ZOS
850 /************* Directory and PDS/E scanning for z/OS ***********/
851 /************* z/OS looks mostly like Unix with USS ************/
852 /* However, z/OS needs the #include statements in this header */
853 #include "pcrzosfs.h"
854 /* That header is not included in the main PCRE distribution because
855 other apparatus is needed to compile pcre2grep for z/OS. The header
856 can be found in the special z/OS distribution, which is available
857 from www.zaconsultants.net or from www.cbttape.org. */
858 #endif
859
860 typedef DIR directory_type;
861 #define FILESEP '/'
862
863 static int
isdirectory(char * filename)864 isdirectory(char *filename)
865 {
866 struct stat statbuf;
867 if (stat(filename, &statbuf) < 0)
868 return 0; /* In the expectation that opening as a file will fail */
869 return S_ISDIR(statbuf.st_mode);
870 }
871
872 static directory_type *
opendirectory(char * filename)873 opendirectory(char *filename)
874 {
875 return opendir(filename);
876 }
877
878 static char *
readdirectory(directory_type * dir)879 readdirectory(directory_type *dir)
880 {
881 for (;;)
882 {
883 struct dirent *dent = readdir(dir);
884 if (dent == NULL) return NULL;
885 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
886 return dent->d_name;
887 }
888 /* Control never reaches here */
889 }
890
891 static void
closedirectory(directory_type * dir)892 closedirectory(directory_type *dir)
893 {
894 closedir(dir);
895 }
896
897
898 /************* Test for regular file, Unix-style **********/
899
900 static int
isregfile(char * filename)901 isregfile(char *filename)
902 {
903 struct stat statbuf;
904 if (stat(filename, &statbuf) < 0)
905 return 1; /* In the expectation that opening as a file will fail */
906 return S_ISREG(statbuf.st_mode);
907 }
908
909
910 #if defined NATIVE_ZOS
911 /************* Test for a terminal in z/OS **********/
912 /* isatty() does not work in a TSO environment, so always give FALSE.*/
913
914 static BOOL
is_stdout_tty(void)915 is_stdout_tty(void)
916 {
917 return FALSE;
918 }
919
920 static BOOL
is_file_tty(FILE * f)921 is_file_tty(FILE *f)
922 {
923 return FALSE;
924 }
925
926
927 /************* Test for a terminal, Unix-style **********/
928
929 #else
930 static BOOL
is_stdout_tty(void)931 is_stdout_tty(void)
932 {
933 return isatty(fileno(stdout));
934 }
935
936 static BOOL
is_file_tty(FILE * f)937 is_file_tty(FILE *f)
938 {
939 return isatty(fileno(f));
940 }
941 #endif
942
943
944 /************* Print optionally coloured match Unix-style and z/OS **********/
945
946 static void
print_match(const void * buf,int length)947 print_match(const void *buf, int length)
948 {
949 if (length == 0) return;
950 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
951 FWRITE_IGNORE(buf, 1, length, stdout);
952 if (do_colour) fprintf(stdout, "%c[0m", 0x1b);
953 }
954
955 /* End of Unix-style or native z/OS environment functions. */
956
957
958 /************* Directory scanning in Windows ***********/
959
960 /* I (Philip Hazel) have no means of testing this code. It was contributed by
961 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
962 when it did not exist. David Byron added a patch that moved the #include of
963 <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
964 */
965
966 #elif defined WIN32
967
968 #ifndef INVALID_FILE_ATTRIBUTES
969 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
970 #endif
971
972 typedef struct directory_type
973 {
974 HANDLE handle;
975 BOOL first;
976 WIN32_FIND_DATA data;
977 } directory_type;
978
979 #define FILESEP '/'
980
981 int
isdirectory(char * filename)982 isdirectory(char *filename)
983 {
984 DWORD attr = GetFileAttributes(filename);
985 if (attr == INVALID_FILE_ATTRIBUTES)
986 return 0;
987 return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0;
988 }
989
990 directory_type *
opendirectory(char * filename)991 opendirectory(char *filename)
992 {
993 size_t len;
994 char *pattern;
995 directory_type *dir;
996 DWORD err;
997 len = strlen(filename);
998 pattern = (char *)malloc(len + 3);
999 dir = (directory_type *)malloc(sizeof(*dir));
1000 if ((pattern == NULL) || (dir == NULL))
1001 {
1002 fprintf(stderr, "pcre2grep: malloc failed\n");
1003 pcre2grep_exit(2);
1004 }
1005 memcpy(pattern, filename, len);
1006 if (iswild(filename))
1007 pattern[len] = 0;
1008 else
1009 memcpy(&(pattern[len]), "\\*", 3);
1010 dir->handle = FindFirstFile(pattern, &(dir->data));
1011 if (dir->handle != INVALID_HANDLE_VALUE)
1012 {
1013 free(pattern);
1014 dir->first = TRUE;
1015 return dir;
1016 }
1017 err = GetLastError();
1018 free(pattern);
1019 free(dir);
1020 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
1021 return NULL;
1022 }
1023
1024 char *
readdirectory(directory_type * dir)1025 readdirectory(directory_type *dir)
1026 {
1027 for (;;)
1028 {
1029 if (!dir->first)
1030 {
1031 if (!FindNextFile(dir->handle, &(dir->data)))
1032 return NULL;
1033 }
1034 else
1035 {
1036 dir->first = FALSE;
1037 }
1038 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
1039 return dir->data.cFileName;
1040 }
1041 #ifndef _MSC_VER
1042 return NULL; /* Keep compiler happy; never executed */
1043 #endif
1044 }
1045
1046 void
closedirectory(directory_type * dir)1047 closedirectory(directory_type *dir)
1048 {
1049 FindClose(dir->handle);
1050 free(dir);
1051 }
1052
1053
1054 /************* Test for regular file in Windows **********/
1055
1056 /* I don't know how to do this, or if it can be done; assume all paths are
1057 regular if they are not directories. */
1058
isregfile(char * filename)1059 int isregfile(char *filename)
1060 {
1061 return !isdirectory(filename);
1062 }
1063
1064
1065 /************* Test for a terminal in Windows **********/
1066
1067 static BOOL
is_stdout_tty(void)1068 is_stdout_tty(void)
1069 {
1070 return _isatty(_fileno(stdout));
1071 }
1072
1073 static BOOL
is_file_tty(FILE * f)1074 is_file_tty(FILE *f)
1075 {
1076 return _isatty(_fileno(f));
1077 }
1078
1079
1080 /************* Print optionally coloured match in Windows **********/
1081
1082 static void
print_match(const void * buf,int length)1083 print_match(const void *buf, int length)
1084 {
1085 if (length == 0) return;
1086 if (do_colour)
1087 {
1088 if (do_ansi) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1089 else SetConsoleTextAttribute(hstdout, match_colour);
1090 }
1091 FWRITE_IGNORE(buf, 1, length, stdout);
1092 if (do_colour)
1093 {
1094 if (do_ansi) fprintf(stdout, "%c[0m", 0x1b);
1095 else SetConsoleTextAttribute(hstdout, csbi.wAttributes);
1096 }
1097 }
1098
1099 /* End of Windows functions */
1100
1101
1102 /************* Directory scanning when we can't do it ***********/
1103
1104 /* The type is void, and apart from isdirectory(), the functions do nothing. */
1105
1106 #else
1107
1108 #define FILESEP 0
1109 typedef void directory_type;
1110
isdirectory(char * filename)1111 int isdirectory(char *filename) { return 0; }
opendirectory(char * filename)1112 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
readdirectory(directory_type * dir)1113 char *readdirectory(directory_type *dir) { return (char*)0;}
closedirectory(directory_type * dir)1114 void closedirectory(directory_type *dir) {}
1115
1116
1117 /************* Test for regular file when we can't do it **********/
1118
1119 /* Assume all files are regular. */
1120
isregfile(char * filename)1121 int isregfile(char *filename) { return 1; }
1122
1123
1124 /************* Test for a terminal when we can't do it **********/
1125
1126 static BOOL
is_stdout_tty(void)1127 is_stdout_tty(void)
1128 {
1129 return FALSE;
1130 }
1131
1132 static BOOL
is_file_tty(FILE * f)1133 is_file_tty(FILE *f)
1134 {
1135 return FALSE;
1136 }
1137
1138
1139 /************* Print optionally coloured match when we can't do it **********/
1140
1141 static void
print_match(const void * buf,int length)1142 print_match(const void *buf, int length)
1143 {
1144 if (length == 0) return;
1145 FWRITE_IGNORE(buf, 1, length, stdout);
1146 }
1147
1148 #endif /* End of system-specific functions */
1149
1150
1151
1152 #ifndef HAVE_STRERROR
1153 /*************************************************
1154 * Provide strerror() for non-ANSI libraries *
1155 *************************************************/
1156
1157 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1158 in their libraries, but can provide the same facility by this simple
1159 alternative function. */
1160
1161 extern int sys_nerr;
1162 extern char *sys_errlist[];
1163
1164 char *
strerror(int n)1165 strerror(int n)
1166 {
1167 if (n < 0 || n >= sys_nerr) return "unknown error number";
1168 return sys_errlist[n];
1169 }
1170 #endif /* HAVE_STRERROR */
1171
1172
1173
1174 /*************************************************
1175 * Usage function *
1176 *************************************************/
1177
1178 static int
usage(int rc)1179 usage(int rc)
1180 {
1181 option_item *op;
1182 fprintf(stderr, "Usage: pcre2grep [-");
1183 for (op = optionlist; op->one_char != 0; op++)
1184 {
1185 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1186 }
1187 fprintf(stderr, "] [long options] [pattern] [files]\n");
1188 fprintf(stderr, "Type \"pcre2grep --help\" for more information and the long "
1189 "options.\n");
1190 return rc;
1191 }
1192
1193
1194
1195 /*************************************************
1196 * Help function *
1197 *************************************************/
1198
1199 static void
help(void)1200 help(void)
1201 {
1202 option_item *op;
1203
1204 printf("Usage: pcre2grep [OPTION]... [PATTERN] [FILE1 FILE2 ...]" STDOUT_NL);
1205 printf("Search for PATTERN in each FILE or standard input." STDOUT_NL);
1206 printf("PATTERN must be present if neither -e nor -f is used." STDOUT_NL);
1207
1208 #ifdef SUPPORT_PCRE2GREP_CALLOUT
1209 #ifdef SUPPORT_PCRE2GREP_CALLOUT_FORK
1210 printf("All callout scripts in patterns are supported." STDOUT_NL);
1211 #else
1212 printf("Non-fork callout scripts in patterns are supported." STDOUT_NL);
1213 #endif
1214 #else
1215 printf("Callout scripts are not supported in this pcre2grep." STDOUT_NL);
1216 #endif
1217
1218 printf("\"-\" can be used as a file name to mean STDIN." STDOUT_NL);
1219
1220 #ifdef SUPPORT_LIBZ
1221 printf("Files whose names end in .gz are read using zlib." STDOUT_NL);
1222 #endif
1223
1224 #ifdef SUPPORT_LIBBZ2
1225 printf("Files whose names end in .bz2 are read using bzlib2." STDOUT_NL);
1226 #endif
1227
1228 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1229 printf("Other files and the standard input are read as plain files." STDOUT_NL STDOUT_NL);
1230 #else
1231 printf("All files are read as plain files, without any interpretation." STDOUT_NL STDOUT_NL);
1232 #endif
1233
1234 printf("Example: pcre2grep -i " QUOT "hello.*world" QUOT " menu.h main.c" STDOUT_NL STDOUT_NL);
1235 printf("Options:" STDOUT_NL);
1236
1237 for (op = optionlist; op->one_char != 0; op++)
1238 {
1239 int n;
1240 char s[4];
1241
1242 if (op->one_char > 0 && (op->long_name)[0] == 0)
1243 n = 31 - printf(" -%c", op->one_char);
1244 else
1245 {
1246 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char);
1247 else strcpy(s, " ");
1248 n = 31 - printf(" %s --%s", s, op->long_name);
1249 }
1250
1251 if (n < 1) n = 1;
1252 printf("%.*s%s" STDOUT_NL, n, " ", op->help_text);
1253 }
1254
1255 printf(STDOUT_NL "Numbers may be followed by K or M, e.g. --max-buffer-size=100K." STDOUT_NL);
1256 printf("The default value for --buffer-size is %d." STDOUT_NL, PCRE2GREP_BUFSIZE);
1257 printf("The default value for --max-buffer-size is %d." STDOUT_NL, PCRE2GREP_MAX_BUFSIZE);
1258 printf("When reading patterns or file names from a file, trailing white" STDOUT_NL);
1259 printf("space is removed and blank lines are ignored." STDOUT_NL);
1260 printf("The maximum size of any pattern is %d bytes." STDOUT_NL, MAXPATLEN);
1261
1262 printf(STDOUT_NL "With no FILEs, read standard input. If fewer than two FILEs given, assume -h." STDOUT_NL);
1263 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble." STDOUT_NL);
1264 }
1265
1266
1267
1268 /*************************************************
1269 * Test exclude/includes *
1270 *************************************************/
1271
1272 /* If any exclude pattern matches, the path is excluded. Otherwise, unless
1273 there are no includes, the path must match an include pattern.
1274
1275 Arguments:
1276 path the path to be matched
1277 ip the chain of include patterns
1278 ep the chain of exclude patterns
1279
1280 Returns: TRUE if the path is not excluded
1281 */
1282
1283 static BOOL
test_incexc(char * path,patstr * ip,patstr * ep)1284 test_incexc(char *path, patstr *ip, patstr *ep)
1285 {
1286 int plen = strlen((const char *)path);
1287
1288 for (; ep != NULL; ep = ep->next)
1289 {
1290 if (pcre2_match(ep->compiled, (PCRE2_SPTR)path, plen, 0, 0, match_data, NULL) >= 0)
1291 return FALSE;
1292 }
1293
1294 if (ip == NULL) return TRUE;
1295
1296 for (; ip != NULL; ip = ip->next)
1297 {
1298 if (pcre2_match(ip->compiled, (PCRE2_SPTR)path, plen, 0, 0, match_data, NULL) >= 0)
1299 return TRUE;
1300 }
1301
1302 return FALSE;
1303 }
1304
1305
1306
1307 /*************************************************
1308 * Decode integer argument value *
1309 *************************************************/
1310
1311 /* Integer arguments can be followed by K or M. Avoid the use of strtoul()
1312 because SunOS4 doesn't have it. This is used only for unpicking arguments, so
1313 just keep it simple.
1314
1315 Arguments:
1316 option_data the option data string
1317 op the option item (for error messages)
1318 longop TRUE if option given in long form
1319
1320 Returns: a long integer
1321 */
1322
1323 static long int
decode_number(char * option_data,option_item * op,BOOL longop)1324 decode_number(char *option_data, option_item *op, BOOL longop)
1325 {
1326 unsigned long int n = 0;
1327 char *endptr = option_data;
1328 while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
1329 while (isdigit((unsigned char)(*endptr)))
1330 n = n * 10 + (int)(*endptr++ - '0');
1331 if (toupper(*endptr) == 'K')
1332 {
1333 n *= 1024;
1334 endptr++;
1335 }
1336 else if (toupper(*endptr) == 'M')
1337 {
1338 n *= 1024*1024;
1339 endptr++;
1340 }
1341
1342 if (*endptr != 0) /* Error */
1343 {
1344 if (longop)
1345 {
1346 char *equals = strchr(op->long_name, '=');
1347 int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1348 (int)(equals - op->long_name);
1349 fprintf(stderr, "pcre2grep: Malformed number \"%s\" after --%.*s\n",
1350 option_data, nlen, op->long_name);
1351 }
1352 else
1353 fprintf(stderr, "pcre2grep: Malformed number \"%s\" after -%c\n",
1354 option_data, op->one_char);
1355 pcre2grep_exit(usage(2));
1356 }
1357
1358 return n;
1359 }
1360
1361
1362
1363 /*************************************************
1364 * Add item to a chain of numbers *
1365 *************************************************/
1366
1367 /* Used to add an item onto a chain, or just return an unconnected item if the
1368 "after" argument is NULL.
1369
1370 Arguments:
1371 n the number to add
1372 after if not NULL points to item to insert after
1373
1374 Returns: new number block
1375 */
1376
1377 static omstr *
add_number(int n,omstr * after)1378 add_number(int n, omstr *after)
1379 {
1380 omstr *om = (omstr *)malloc(sizeof(omstr));
1381
1382 /* LCOV_EXCL_START - These lines won't be hit in normal testing. */
1383
1384 if (om == NULL)
1385 {
1386 fprintf(stderr, "pcre2grep: malloc failed\n");
1387 pcre2grep_exit(2);
1388 }
1389
1390 /* LCOV_EXCL_STOP */
1391
1392 om->next = NULL;
1393 om->groupnum = n;
1394
1395 if (after != NULL)
1396 {
1397 om->next = after->next;
1398 after->next = om;
1399 }
1400 return om;
1401 }
1402
1403
1404
1405 /*************************************************
1406 * Read one line of input *
1407 *************************************************/
1408
1409 /* Normally, input that is to be scanned is read using fread() (or gzread, or
1410 BZ2_read) into a large buffer, so many lines may be read at once. However,
1411 doing this for tty input means that no output appears until a lot of input has
1412 been typed. Instead, tty input is handled line by line. We cannot use fgets()
1413 for this, because it does not stop at a binary zero, and therefore there is no
1414 way of telling how many characters it has read, because there may be binary
1415 zeros embedded in the data. This function is also used for reading patterns
1416 from files (the -f option).
1417
1418 Arguments:
1419 buffer the buffer to read into
1420 length the maximum number of characters to read
1421 f the file
1422
1423 Returns: the number of characters read, zero at end of file
1424 */
1425
1426 static PCRE2_SIZE
read_one_line(char * buffer,PCRE2_SIZE length,FILE * f)1427 read_one_line(char *buffer, PCRE2_SIZE length, FILE *f)
1428 {
1429 int c;
1430 PCRE2_SIZE yield = 0;
1431 while ((c = fgetc(f)) != EOF)
1432 {
1433 buffer[yield++] = c;
1434 if (c == '\n' || yield >= length) break;
1435 }
1436 return yield;
1437 }
1438
1439
1440
1441 /*************************************************
1442 * Find end of line *
1443 *************************************************/
1444
1445 /* The length of the endline sequence that is found is set via lenptr. This may
1446 be zero at the very end of the file if there is no line-ending sequence there.
1447
1448 Arguments:
1449 p current position in line
1450 endptr end of available data
1451 lenptr where to put the length of the eol sequence
1452
1453 Returns: pointer after the last byte of the line,
1454 including the newline byte(s)
1455 */
1456
1457 static char *
end_of_line(char * p,char * endptr,int * lenptr)1458 end_of_line(char *p, char *endptr, int *lenptr)
1459 {
1460 switch(endlinetype)
1461 {
1462 default: /* Just in case */
1463 case PCRE2_NEWLINE_LF:
1464 while (p < endptr && *p != '\n') p++;
1465 if (p < endptr)
1466 {
1467 *lenptr = 1;
1468 return p + 1;
1469 }
1470 *lenptr = 0;
1471 return endptr;
1472
1473 case PCRE2_NEWLINE_CR:
1474 while (p < endptr && *p != '\r') p++;
1475 if (p < endptr)
1476 {
1477 *lenptr = 1;
1478 return p + 1;
1479 }
1480 *lenptr = 0;
1481 return endptr;
1482
1483 case PCRE2_NEWLINE_NUL:
1484 while (p < endptr && *p != '\0') p++;
1485 if (p < endptr)
1486 {
1487 *lenptr = 1;
1488 return p + 1;
1489 }
1490 *lenptr = 0;
1491 return endptr;
1492
1493 case PCRE2_NEWLINE_CRLF:
1494 for (;;)
1495 {
1496 while (p < endptr && *p != '\r') p++;
1497 if (++p >= endptr)
1498 {
1499 *lenptr = 0;
1500 return endptr;
1501 }
1502 if (*p == '\n')
1503 {
1504 *lenptr = 2;
1505 return p + 1;
1506 }
1507 }
1508 break;
1509
1510 case PCRE2_NEWLINE_ANYCRLF:
1511 while (p < endptr)
1512 {
1513 int extra = 0;
1514 int c = *((unsigned char *)p);
1515
1516 if (utf && c >= 0xc0)
1517 {
1518 int gcii, gcss;
1519 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1520 gcss = 6*extra;
1521 c = (c & utf8_table3[extra]) << gcss;
1522 for (gcii = 1; gcii <= extra; gcii++)
1523 {
1524 gcss -= 6;
1525 c |= (p[gcii] & 0x3f) << gcss;
1526 }
1527 }
1528
1529 p += 1 + extra;
1530
1531 switch (c)
1532 {
1533 case '\n':
1534 *lenptr = 1;
1535 return p;
1536
1537 case '\r':
1538 if (p < endptr && *p == '\n')
1539 {
1540 *lenptr = 2;
1541 p++;
1542 }
1543 else *lenptr = 1;
1544 return p;
1545
1546 default:
1547 break;
1548 }
1549 } /* End of loop for ANYCRLF case */
1550
1551 *lenptr = 0; /* Must have hit the end */
1552 return endptr;
1553
1554 case PCRE2_NEWLINE_ANY:
1555 while (p < endptr)
1556 {
1557 int extra = 0;
1558 int c = *((unsigned char *)p);
1559
1560 if (utf && c >= 0xc0)
1561 {
1562 int gcii, gcss;
1563 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1564 gcss = 6*extra;
1565 c = (c & utf8_table3[extra]) << gcss;
1566 for (gcii = 1; gcii <= extra; gcii++)
1567 {
1568 gcss -= 6;
1569 c |= (p[gcii] & 0x3f) << gcss;
1570 }
1571 }
1572
1573 p += 1 + extra;
1574
1575 switch (c)
1576 {
1577 case '\n': /* LF */
1578 case '\v': /* VT */
1579 case '\f': /* FF */
1580 *lenptr = 1;
1581 return p;
1582
1583 case '\r': /* CR */
1584 if (p < endptr && *p == '\n')
1585 {
1586 *lenptr = 2;
1587 p++;
1588 }
1589 else *lenptr = 1;
1590 return p;
1591
1592 #ifndef EBCDIC
1593 case 0x85: /* Unicode NEL */
1594 *lenptr = utf? 2 : 1;
1595 return p;
1596
1597 case 0x2028: /* Unicode LS */
1598 case 0x2029: /* Unicode PS */
1599 *lenptr = 3;
1600 return p;
1601 #endif /* Not EBCDIC */
1602
1603 default:
1604 break;
1605 }
1606 } /* End of loop for ANY case */
1607
1608 *lenptr = 0; /* Must have hit the end */
1609 return endptr;
1610 } /* End of overall switch */
1611 }
1612
1613
1614
1615 /*************************************************
1616 * Find start of previous line *
1617 *************************************************/
1618
1619 /* This is called when looking back for before lines to print.
1620
1621 Arguments:
1622 p start of the subsequent line
1623 startptr start of available data
1624
1625 Returns: pointer to the start of the previous line
1626 */
1627
1628 static char *
previous_line(char * p,char * startptr)1629 previous_line(char *p, char *startptr)
1630 {
1631 switch(endlinetype)
1632 {
1633 default: /* Just in case */
1634 case PCRE2_NEWLINE_LF:
1635 p--;
1636 while (p > startptr && p[-1] != '\n') p--;
1637 return p;
1638
1639 case PCRE2_NEWLINE_CR:
1640 p--;
1641 while (p > startptr && p[-1] != '\n') p--;
1642 return p;
1643
1644 case PCRE2_NEWLINE_NUL:
1645 p--;
1646 while (p > startptr && p[-1] != '\0') p--;
1647 return p;
1648
1649 case PCRE2_NEWLINE_CRLF:
1650 for (;;)
1651 {
1652 p -= 2;
1653 while (p > startptr && p[-1] != '\n') p--;
1654 if (p <= startptr + 1 || p[-2] == '\r') return p;
1655 }
1656 /* Control can never get here */
1657
1658 case PCRE2_NEWLINE_ANY:
1659 case PCRE2_NEWLINE_ANYCRLF:
1660 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
1661 if (utf) while ((*p & 0xc0) == 0x80) p--;
1662
1663 while (p > startptr)
1664 {
1665 unsigned int c;
1666 char *pp = p - 1;
1667
1668 if (utf)
1669 {
1670 int extra = 0;
1671 while ((*pp & 0xc0) == 0x80) pp--;
1672 c = *((unsigned char *)pp);
1673 if (c >= 0xc0)
1674 {
1675 int gcii, gcss;
1676 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1677 gcss = 6*extra;
1678 c = (c & utf8_table3[extra]) << gcss;
1679 for (gcii = 1; gcii <= extra; gcii++)
1680 {
1681 gcss -= 6;
1682 c |= (pp[gcii] & 0x3f) << gcss;
1683 }
1684 }
1685 }
1686 else c = *((unsigned char *)pp);
1687
1688 if (endlinetype == PCRE2_NEWLINE_ANYCRLF) switch (c)
1689 {
1690 case '\n': /* LF */
1691 case '\r': /* CR */
1692 return p;
1693
1694 default:
1695 break;
1696 }
1697
1698 else switch (c)
1699 {
1700 case '\n': /* LF */
1701 case '\v': /* VT */
1702 case '\f': /* FF */
1703 case '\r': /* CR */
1704 #ifndef EBCDIC
1705 case 0x85: /* Unicode NEL */
1706 case 0x2028: /* Unicode LS */
1707 case 0x2029: /* Unicode PS */
1708 #endif /* Not EBCDIC */
1709 return p;
1710
1711 default:
1712 break;
1713 }
1714
1715 p = pp; /* Back one character */
1716 } /* End of loop for ANY case */
1717
1718 return startptr; /* Hit start of data */
1719 } /* End of overall switch */
1720 }
1721
1722
1723
1724 /*************************************************
1725 * Output newline at end *
1726 *************************************************/
1727
1728 /* This function is called if the final line of a file has been written to
1729 stdout, but it does not have a terminating newline.
1730
1731 Arguments: none
1732 Returns: nothing
1733 */
1734
1735 static void
write_final_newline(void)1736 write_final_newline(void)
1737 {
1738 switch(endlinetype)
1739 {
1740 default: /* Just in case */
1741 case PCRE2_NEWLINE_LF:
1742 case PCRE2_NEWLINE_ANY:
1743 case PCRE2_NEWLINE_ANYCRLF:
1744 fprintf(stdout, "\n");
1745 break;
1746
1747 case PCRE2_NEWLINE_CR:
1748 fprintf(stdout, "\r");
1749 break;
1750
1751 case PCRE2_NEWLINE_CRLF:
1752 fprintf(stdout, "\r\n");
1753 break;
1754
1755 case PCRE2_NEWLINE_NUL:
1756 fprintf(stdout, "%c", 0);
1757 break;
1758 }
1759 }
1760
1761
1762 /*************************************************
1763 * Print the previous "after" lines *
1764 *************************************************/
1765
1766 /* This is called if we are about to lose said lines because of buffer filling,
1767 and at the end of the file. The data in the line is written using fwrite() so
1768 that a binary zero does not terminate it.
1769
1770 Arguments:
1771 lastmatchnumber the number of the last matching line, plus one
1772 lastmatchrestart where we restarted after the last match
1773 endptr end of available data
1774 printname filename for printing
1775
1776 Returns: nothing
1777 */
1778
1779 static void
do_after_lines(unsigned long int lastmatchnumber,char * lastmatchrestart,char * endptr,const char * printname)1780 do_after_lines(unsigned long int lastmatchnumber, char *lastmatchrestart,
1781 char *endptr, const char *printname)
1782 {
1783 if (after_context > 0 && lastmatchnumber > 0)
1784 {
1785 int count = 0;
1786 int ellength = 0;
1787 while (lastmatchrestart < endptr && count < after_context)
1788 {
1789 char *pp = end_of_line(lastmatchrestart, endptr, &ellength);
1790 if (ellength == 0 && pp == main_buffer + bufsize) break;
1791 if (printname != NULL) fprintf(stdout, "%s%c", printname, printname_hyphen);
1792 if (number) fprintf(stdout, "%lu-", lastmatchnumber++);
1793 FWRITE_IGNORE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1794 lastmatchrestart = pp;
1795 count++;
1796 }
1797
1798 /* If we have printed any lines, arrange for a hyphen separator if anything
1799 else follows. Also, if the last line is the final line in the file and it had
1800 no newline, add one. */
1801
1802 if (count > 0)
1803 {
1804 hyphenpending = TRUE;
1805 if (ellength == 0 && lastmatchrestart >= endptr)
1806 write_final_newline();
1807 }
1808 }
1809 }
1810
1811
1812
1813 /*************************************************
1814 * Apply patterns to subject till one matches *
1815 *************************************************/
1816
1817 /* This function is called to run through all the patterns, looking for a
1818 match. When all possible matches are required, for example, for colouring, it
1819 checks all patterns for matching, and returns the earliest match. Otherwise, it
1820 returns the first pattern that has matched.
1821
1822 Arguments:
1823 matchptr the start of the subject
1824 length the length of the subject to match
1825 options options for pcre2_match
1826 startoffset where to start matching
1827 mrc address of where to put the result of pcre2_match()
1828
1829 Returns: TRUE if there was a match, match_data and offsets are set
1830 FALSE if there was no match (but no errors)
1831 invert if there was a non-fatal error
1832 */
1833
1834 static BOOL
match_patterns(char * matchptr,PCRE2_SIZE length,unsigned int options,PCRE2_SIZE startoffset,int * mrc)1835 match_patterns(char *matchptr, PCRE2_SIZE length, unsigned int options,
1836 PCRE2_SIZE startoffset, int *mrc)
1837 {
1838 PCRE2_SIZE slen = length;
1839 int first = -1;
1840 int firstrc = 0;
1841 patstr *p = patterns;
1842 const char *msg = "this text:\n\n";
1843
1844 if (slen > 200)
1845 {
1846 slen = 200;
1847 msg = "text that starts:\n\n";
1848 }
1849
1850 for (int i = 1; p != NULL; p = p->next, i++)
1851 {
1852 int rc = pcre2_match(p->compiled, (PCRE2_SPTR)matchptr, (int)length,
1853 startoffset, options, match_data, match_context);
1854 if (rc == PCRE2_ERROR_NOMATCH) continue;
1855
1856 /* Handle a successful match. When all_matches is false, we are done.
1857 Otherwise we must save the earliest match. */
1858
1859 if (rc >= 0)
1860 {
1861 if (!all_matches)
1862 {
1863 *mrc = rc;
1864 return TRUE;
1865 }
1866
1867 if (first < 0 || offsets[0] < offsets_pair[first][0] ||
1868 (offsets[0] == offsets_pair[first][0] &&
1869 offsets[1] > offsets_pair[first][1]))
1870 {
1871 first = match_data_toggle;
1872 firstrc = rc;
1873 match_data_toggle ^= 1;
1874 match_data = match_data_pair[match_data_toggle];
1875 offsets = offsets_pair[match_data_toggle];
1876 }
1877 continue;
1878 }
1879
1880 /* Deal with PCRE2 error. */
1881
1882 fprintf(stderr, "pcre2grep: pcre2_match() gave error %d while matching ", rc);
1883 if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i);
1884 fprintf(stderr, "%s", msg);
1885 FWRITE_IGNORE(matchptr, 1, slen, stderr); /* In case binary zero included */
1886 fprintf(stderr, "\n\n");
1887 if (rc <= PCRE2_ERROR_UTF8_ERR1 &&
1888 rc >= PCRE2_ERROR_UTF8_ERR21)
1889 {
1890 unsigned char mbuffer[256];
1891 PCRE2_SIZE startchar = pcre2_get_startchar(match_data);
1892 (void)pcre2_get_error_message(rc, mbuffer, sizeof(mbuffer));
1893 fprintf(stderr, "%s at offset %" SIZ_FORM "\n\n", mbuffer, startchar);
1894 }
1895 if (rc == PCRE2_ERROR_MATCHLIMIT || rc == PCRE2_ERROR_DEPTHLIMIT ||
1896 rc == PCRE2_ERROR_HEAPLIMIT || rc == PCRE2_ERROR_JIT_STACKLIMIT)
1897 resource_error = TRUE;
1898 if (error_count++ > 20)
1899 {
1900 fprintf(stderr, "pcre2grep: Too many errors - abandoned.\n");
1901 pcre2grep_exit(2);
1902 }
1903 return invert; /* No more matching; don't show the line again */
1904 }
1905
1906 /* We get here when all patterns have been tried. If all_matches is false,
1907 this means that none of them matched. If all_matches is true, matched_first
1908 will be non-NULL if there was at least one match, and it will point to the
1909 appropriate match_data block. */
1910
1911 if (!all_matches || first < 0) return FALSE;
1912
1913 match_data_toggle = first;
1914 match_data = match_data_pair[first];
1915 offsets = offsets_pair[first];
1916 *mrc = firstrc;
1917 return TRUE;
1918 }
1919
1920
1921
1922 /*************************************************
1923 * Decode dollar escape sequence *
1924 *************************************************/
1925
1926 /* Called from various places to decode $ escapes in output strings. The escape
1927 sequences are as follows:
1928
1929 $<digits> or ${<digits>} returns a capture number. However, if callout is TRUE,
1930 zero is never returned; '0' is substituted.
1931
1932 $a returns bell.
1933 $b returns backspace.
1934 $e returns escape.
1935 $f returns form feed.
1936 $n returns newline.
1937 $r returns carriage return.
1938 $t returns tab.
1939 $v returns vertical tab.
1940 $o<digits> returns the character represented by the given octal
1941 number; up to three digits are processed.
1942 $o{<digits>} does the same, up to 7 digits, but gives an error for mode-invalid
1943 code points.
1944 $x<digits> returns the character represented by the given hexadecimal
1945 number; up to two digits are processed.
1946 $x{<digits} does the same, up to 6 digits, but gives an error for mode-invalid
1947 code points.
1948 Any other character is substituted by itself. E.g: $$ is replaced by a single
1949 dollar.
1950
1951 Arguments:
1952 begin the start of the whole string
1953 string points to the $
1954 callout TRUE if in a callout (inhibits error messages)
1955 value where to return a value
1956 last where to return pointer to the last used character
1957
1958 Returns: DDE_ERROR after a syntax error
1959 DDE_CAPTURE if *value is a capture number
1960 DDE_CHAR if *value is a character code
1961 */
1962
1963 static int
decode_dollar_escape(PCRE2_SPTR begin,PCRE2_SPTR string,BOOL callout,uint32_t * value,PCRE2_SPTR * last)1964 decode_dollar_escape(PCRE2_SPTR begin, PCRE2_SPTR string, BOOL callout,
1965 uint32_t *value, PCRE2_SPTR *last)
1966 {
1967 uint32_t c = 0;
1968 int base = 10;
1969 int dcount;
1970 int rc = DDE_CHAR;
1971 BOOL brace = FALSE;
1972
1973 switch (*(++string))
1974 {
1975 case 0: /* Syntax error: a character must be present after $. */
1976 if (!callout)
1977 fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
1978 (int)(string - begin), "no character after $");
1979 *last = string;
1980 return DDE_ERROR;
1981
1982 case '{':
1983 brace = TRUE;
1984 string++;
1985 if (!isdigit(*string)) /* Syntax error: a decimal number required. */
1986 {
1987 if (!callout)
1988 fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
1989 (int)(string - begin), "decimal number expected");
1990 rc = DDE_ERROR;
1991 break;
1992 }
1993
1994 /* Fall through */
1995
1996 /* The maximum capture number is 65535, so any number greater than that will
1997 always be an unknown capture number. We just stop incrementing, in order to
1998 avoid overflow. */
1999
2000 case '0': case '1': case '2': case '3': case '4':
2001 case '5': case '6': case '7': case '8': case '9':
2002 do
2003 {
2004 if (c <= 65535) c = c * 10 + (*string - '0');
2005 string++;
2006 }
2007 while (*string >= '0' && *string <= '9');
2008 string--; /* Point to last digit */
2009
2010 /* In a callout, capture number 0 is not available. No error can be given,
2011 so just return the character '0'. */
2012
2013 if (callout && c == 0)
2014 {
2015 *value = '0';
2016 }
2017 else
2018 {
2019 *value = c;
2020 rc = DDE_CAPTURE;
2021 }
2022 break;
2023
2024 /* Limit octal numbers to 3 digits without braces, or up to 7 with braces,
2025 for valid Unicode code points. */
2026
2027 case 'o':
2028 base = 8;
2029 string++;
2030 if (*string == '{')
2031 {
2032 brace = TRUE;
2033 string++;
2034 dcount = 7;
2035 }
2036 else dcount = 3;
2037 for (; dcount > 0; dcount--)
2038 {
2039 if (*string < '0' || *string > '7') break;
2040 c = c * 8 + (*string++ - '0');
2041 }
2042 *value = c;
2043 string--; /* Point to last digit */
2044 break;
2045
2046 /* Limit hex numbers to 2 digits without braces, or up to 6 with braces,
2047 for valid Unicode code points. */
2048
2049 case 'x':
2050 base = 16;
2051 string++;
2052 if (*string == '{')
2053 {
2054 brace = TRUE;
2055 string++;
2056 dcount = 6;
2057 }
2058 else dcount = 2;
2059 for (; dcount > 0; dcount--)
2060 {
2061 if (!isxdigit(*string)) break;
2062 if (*string >= '0' && *string <= '9')
2063 c = c *16 + *string++ - '0';
2064 else
2065 c = c * 16 + (*string++ | 0x20) - 'a' + 10;
2066 }
2067 *value = c;
2068 string--; /* Point to last digit */
2069 break;
2070
2071 case 'a': *value = '\a'; break;
2072 case 'b': *value = '\b'; break;
2073 #ifndef EBCDIC
2074 case 'e': *value = '\033'; break;
2075 #else
2076 case 'e': *value = '\047'; break;
2077 #endif
2078 case 'f': *value = '\f'; break;
2079 case 'n': *value = STDOUT_NL_CODE; break;
2080 case 'r': *value = '\r'; break;
2081 case 't': *value = '\t'; break;
2082 case 'v': *value = '\v'; break;
2083
2084 default: *value = *string; break;
2085 }
2086
2087 if (brace)
2088 {
2089 c = string[1];
2090 if (c != '}')
2091 {
2092 rc = DDE_ERROR;
2093 if (!callout)
2094 {
2095 if ((base == 8 && c >= '0' && c <= '7') ||
2096 (base == 16 && isxdigit(c)))
2097 {
2098 fprintf(stderr, "pcre2grep: Error in output text at offset %d: "
2099 "too many %s digits\n", (int)(string - begin),
2100 (base == 8)? "octal" : "hex");
2101 }
2102 else
2103 {
2104 fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
2105 (int)(string - begin), "missing closing brace");
2106 }
2107 }
2108 }
2109 else string++;
2110 }
2111
2112 /* Check maximum code point values, but take note of STDOUT_NL_CODE. */
2113
2114 if (rc == DDE_CHAR && *value != STDOUT_NL_CODE)
2115 {
2116 uint32_t max = utf? 0x0010ffffu : 0xffu;
2117 if (*value > max)
2118 {
2119 if (!callout)
2120 fprintf(stderr, "pcre2grep: Error in output text at offset %d: "
2121 "code point greater than 0x%x is invalid\n", (int)(string - begin), max);
2122 rc = DDE_ERROR;
2123 }
2124 }
2125
2126 *last = string;
2127 return rc;
2128 }
2129
2130
2131
2132 /*************************************************
2133 * Check output text for errors *
2134 *************************************************/
2135
2136 /* Called early, to get errors before doing anything for -O text; also called
2137 from callouts to check before outputting.
2138
2139 Arguments:
2140 string an --output text string
2141 callout TRUE if in a callout (stops printing errors)
2142
2143 Returns: TRUE if OK, FALSE on error
2144 */
2145
2146 static BOOL
syntax_check_output_text(PCRE2_SPTR string,BOOL callout)2147 syntax_check_output_text(PCRE2_SPTR string, BOOL callout)
2148 {
2149 uint32_t value;
2150 PCRE2_SPTR begin = string;
2151
2152 for (; *string != 0; string++)
2153 {
2154 if (*string == '$' &&
2155 decode_dollar_escape(begin, string, callout, &value, &string) == DDE_ERROR)
2156 return FALSE;
2157 }
2158
2159 return TRUE;
2160 }
2161
2162
2163 /*************************************************
2164 * Display output text *
2165 *************************************************/
2166
2167 /* Display the output text, which is assumed to have already been syntax
2168 checked. Output may contain escape sequences started by the dollar sign.
2169
2170 Arguments:
2171 string: the output text
2172 callout: TRUE for the builtin callout, FALSE for --output
2173 subject the start of the subject
2174 ovector: capture offsets
2175 capture_top: number of captures
2176
2177 Returns: TRUE if something was output, other than newline
2178 FALSE if nothing was output, or newline was last output
2179 */
2180
2181 static BOOL
display_output_text(PCRE2_SPTR string,BOOL callout,PCRE2_SPTR subject,PCRE2_SIZE * ovector,PCRE2_SIZE capture_top)2182 display_output_text(PCRE2_SPTR string, BOOL callout, PCRE2_SPTR subject,
2183 PCRE2_SIZE *ovector, PCRE2_SIZE capture_top)
2184 {
2185 uint32_t value;
2186 BOOL printed = FALSE;
2187 PCRE2_SPTR begin = string;
2188
2189 for (; *string != 0; string++)
2190 {
2191 if (*string == '$')
2192 {
2193 switch(decode_dollar_escape(begin, string, callout, &value, &string))
2194 {
2195 case DDE_CHAR:
2196 if (value == STDOUT_NL_CODE)
2197 {
2198 fprintf(stdout, STDOUT_NL);
2199 printed = FALSE;
2200 continue;
2201 }
2202 break; /* Will print value */
2203
2204 case DDE_CAPTURE:
2205 if (value < capture_top)
2206 {
2207 PCRE2_SIZE capturesize;
2208 value *= 2;
2209 capturesize = ovector[value + 1] - ovector[value];
2210 if (capturesize > 0)
2211 {
2212 print_match(subject + ovector[value], capturesize);
2213 printed = TRUE;
2214 }
2215 }
2216 continue;
2217
2218 /* LCOV_EXCL_START */
2219 default: /* Should not occur */
2220 break;
2221 /* LCOV_EXCL_STOP */
2222 }
2223 }
2224
2225 else value = *string; /* Not a $ escape */
2226
2227 if (!utf || value <= 127) fprintf(stdout, "%c", value); else
2228 {
2229 int n = ord2utf8(value);
2230 for (int i = 0; i < n; i++) fputc(utf8_buffer[i], stdout);
2231 }
2232
2233 printed = TRUE;
2234 }
2235
2236 return printed;
2237 }
2238
2239
2240 #ifdef SUPPORT_PCRE2GREP_CALLOUT
2241
2242 /*************************************************
2243 * Parse and execute callout scripts *
2244 *************************************************/
2245
2246 /* If SUPPORT_PCRE2GREP_CALLOUT_FORK is defined, this function parses a callout
2247 string block and executes the program specified by the string. The string is a
2248 list of substrings separated by pipe characters. The first substring represents
2249 the executable name, and the following substrings specify the arguments:
2250
2251 program_name|param1|param2|...
2252
2253 Any substring (including the program name) can contain escape sequences
2254 started by the dollar character. The escape sequences are substituted as
2255 follows:
2256
2257 $<digits> or ${<digits>} is replaced by the captured substring of the given
2258 decimal number, which must be greater than zero. If the number is greater
2259 than the number of capturing substrings, or if the capture is unset, the
2260 replacement is empty.
2261
2262 Any other character is substituted by itself. E.g: $$ is replaced by a single
2263 dollar or $| replaced by a pipe character.
2264
2265 Alternatively, if string starts with pipe, the remainder is taken as an output
2266 string, same as --output. This is the only form that is supported if
2267 SUPPORT_PCRE2GREP_FORK is not defined. In this case, --om-separator is used to
2268 separate each callout, defaulting to newline.
2269
2270 Example:
2271
2272 echo -e "abcde\n12345" | pcre2grep \
2273 '(.)(..(.))(?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4)")()' -
2274
2275 Output:
2276
2277 Arg1: [a] [bcd] [d] Arg2: |a| ()
2278 abcde
2279 Arg1: [1] [234] [4] Arg2: |1| ()
2280 12345
2281
2282 Arguments:
2283 blockptr the callout block
2284
2285 Returns: currently it always returns with 0
2286 */
2287
2288 static int
pcre2grep_callout(pcre2_callout_block * calloutptr,void * unused)2289 pcre2grep_callout(pcre2_callout_block *calloutptr, void *unused)
2290 {
2291 PCRE2_SIZE length = calloutptr->callout_string_length;
2292 PCRE2_SPTR string = calloutptr->callout_string;
2293 PCRE2_SPTR subject = calloutptr->subject;
2294 PCRE2_SIZE *ovector = calloutptr->offset_vector;
2295 PCRE2_SIZE capture_top = calloutptr->capture_top;
2296
2297 #ifdef SUPPORT_PCRE2GREP_CALLOUT_FORK
2298 PCRE2_SIZE argsvectorlen = 2;
2299 PCRE2_SIZE argslen = 1;
2300 char *args;
2301 char *argsptr;
2302 char **argsvector;
2303 char **argsvectorptr;
2304 #ifndef WIN32
2305 pid_t pid;
2306 #endif
2307 int result = 0;
2308 #endif /* SUPPORT_PCRE2GREP_CALLOUT_FORK */
2309
2310 (void)unused; /* Avoid compiler warning */
2311
2312 /* Only callouts with strings are supported. */
2313
2314 if (string == NULL || length == 0) return 0;
2315
2316 /* If there's no command, output the remainder directly. */
2317
2318 if (*string == '|')
2319 {
2320 string++;
2321 if (!syntax_check_output_text(string, TRUE)) return 0;
2322 (void)display_output_text(string, TRUE, subject, ovector, capture_top);
2323 return 0;
2324 }
2325
2326 #ifndef SUPPORT_PCRE2GREP_CALLOUT_FORK
2327 return 0;
2328 #else
2329
2330 /* Checking syntax and compute the number of string fragments. Callout strings
2331 are silently ignored in the event of a syntax error. */
2332
2333 while (length > 0)
2334 {
2335 if (*string == '|')
2336 {
2337 argsvectorlen++;
2338 if (argsvectorlen > 10000) return 0; /* Too many args */
2339 }
2340
2341 else if (*string == '$')
2342 {
2343 uint32_t value;
2344 PCRE2_SPTR begin = string;
2345
2346 switch (decode_dollar_escape(begin, string, TRUE, &value, &string))
2347 {
2348 case DDE_CAPTURE:
2349 if (value < capture_top)
2350 {
2351 value *= 2;
2352 argslen += ovector[value + 1] - ovector[value];
2353 }
2354 argslen--; /* Negate the effect of argslen++ below. */
2355 break;
2356
2357 case DDE_CHAR:
2358 if (value == STDOUT_NL_CODE) argslen += STDOUT_NL_LEN - 1;
2359 else if (utf && value > 127) argslen += ord2utf8(value) - 1;
2360 break;
2361
2362 /* LCOV_EXCL_START */
2363 default: /* Should not occur */
2364 case DDE_ERROR:
2365 return 0;
2366 /* LCOV_EXCL_STOP */
2367 }
2368
2369 length -= (string - begin);
2370 }
2371
2372 string++;
2373 length--;
2374 argslen++;
2375 }
2376
2377 /* Get memory for the argument vector and its strings. */
2378
2379 args = (char*)malloc(argslen);
2380 if (args == NULL) return 0;
2381
2382 argsvector = (char**)malloc(argsvectorlen * sizeof(char*));
2383 if (argsvector == NULL)
2384 {
2385 /* LCOV_EXCL_START */
2386 free(args);
2387 return 0;
2388 /* LCOV_EXCL_STOP */
2389 }
2390
2391 /* Now reprocess the string and set up the arguments. */
2392
2393 argsptr = args;
2394 argsvectorptr = argsvector;
2395 *argsvectorptr++ = argsptr;
2396
2397 length = calloutptr->callout_string_length;
2398 string = calloutptr->callout_string;
2399
2400 while (length > 0)
2401 {
2402 if (*string == '|')
2403 {
2404 *argsptr++ = '\0';
2405 *argsvectorptr++ = argsptr;
2406 }
2407
2408 else if (*string == '$')
2409 {
2410 uint32_t value;
2411 PCRE2_SPTR begin = string;
2412
2413 switch (decode_dollar_escape(begin, string, TRUE, &value, &string))
2414 {
2415 case DDE_CAPTURE:
2416 if (value < capture_top)
2417 {
2418 PCRE2_SIZE capturesize;
2419 value *= 2;
2420 capturesize = ovector[value + 1] - ovector[value];
2421 memcpy(argsptr, subject + ovector[value], capturesize);
2422 argsptr += capturesize;
2423 }
2424 break;
2425
2426 case DDE_CHAR:
2427 if (value == STDOUT_NL_CODE)
2428 {
2429 memcpy(argsptr, STDOUT_NL, STDOUT_NL_LEN);
2430 argsptr += STDOUT_NL_LEN;
2431 }
2432 else if (utf && value > 127)
2433 {
2434 int n = ord2utf8(value);
2435 memcpy(argsptr, utf8_buffer, n);
2436 argsptr += n;
2437 }
2438 else
2439 {
2440 *argsptr++ = value;
2441 }
2442 break;
2443
2444 /* LCOV_EXCL_START */
2445 default: /* Even though this should not occur, the string having */
2446 case DDE_ERROR: /* been checked above, we need to include the free() */
2447 free(args); /* calls so that source checkers do not complain. */
2448 free(argsvector);
2449 return 0;
2450 /* LCOV_EXCL_STOP */
2451 }
2452
2453 length -= (string - begin);
2454 }
2455
2456 else *argsptr++ = *string;
2457
2458 /* Advance along the string */
2459
2460 string++;
2461 length--;
2462 }
2463
2464 *argsptr++ = '\0';
2465 *argsvectorptr = NULL;
2466
2467 /* Running an external command is system-dependent. Handle Windows and VMS as
2468 necessary, otherwise assume fork(). */
2469
2470 #ifdef WIN32
2471 result = _spawnvp(_P_WAIT, argsvector[0], (const char * const *)argsvector);
2472
2473 #elif defined __VMS
2474 {
2475 char cmdbuf[500];
2476 short i = 0;
2477 int flags = CLI$M_NOCLISYM|CLI$M_NOLOGNAM|CLI$M_NOKEYPAD, status, retstat;
2478 $DESCRIPTOR(cmd, cmdbuf);
2479
2480 cmdbuf[0] = 0;
2481 while (argsvector[i])
2482 {
2483 strcat(cmdbuf, argsvector[i]);
2484 strcat(cmdbuf, " ");
2485 i++;
2486 }
2487 cmd.dsc$w_length = strlen(cmdbuf) - 1;
2488 status = lib$spawn(&cmd, 0,0, &flags, 0,0, &retstat);
2489 if (!(status & 1)) result = 0;
2490 else result = retstat & 1 ? 0 : 1;
2491 }
2492
2493 #else /* Neither Windows nor VMS */
2494 pid = fork();
2495 if (pid == 0)
2496 {
2497 (void)execv(argsvector[0], argsvector);
2498 /* Control gets here if there is an error, e.g. a non-existent program */
2499 exit(1);
2500 }
2501 else if (pid > 0)
2502 {
2503 (void)fflush(stdout);
2504 (void)waitpid(pid, &result, 0);
2505 (void)fflush(stdout);
2506 }
2507 #endif /* End Windows/VMS/other handling */
2508
2509 free(args);
2510 free(argsvector);
2511
2512 /* Currently negative return values are not supported, only zero (match
2513 continues) or non-zero (match fails). */
2514
2515 return result != 0;
2516 #endif /* SUPPORT_PCRE2GREP_CALLOUT_FORK */
2517 }
2518 #endif /* SUPPORT_PCRE2GREP_CALLOUT */
2519
2520
2521
2522 /*************************************************
2523 * Read a portion of the file into buffer *
2524 *************************************************/
2525
2526 static PCRE2_SIZE
fill_buffer(void * handle,int frtype,char * buffer,PCRE2_SIZE length,BOOL input_line_buffered)2527 fill_buffer(void *handle, int frtype, char *buffer, PCRE2_SIZE length,
2528 BOOL input_line_buffered)
2529 {
2530 (void)frtype; /* Avoid warning when not used */
2531
2532 #ifdef SUPPORT_LIBZ
2533 if (frtype == FR_LIBZ)
2534 return gzread((gzFile)handle, buffer, length);
2535 else
2536 #endif
2537
2538 #ifdef SUPPORT_LIBBZ2
2539 if (frtype == FR_LIBBZ2)
2540 return (PCRE2_SIZE)BZ2_bzread((BZFILE *)handle, buffer, length);
2541 else
2542 #endif
2543
2544 return (input_line_buffered ?
2545 read_one_line(buffer, length, (FILE *)handle) :
2546 fread(buffer, 1, length, (FILE *)handle));
2547 }
2548
2549
2550
2551 /*************************************************
2552 * Grep an individual file *
2553 *************************************************/
2554
2555 /* This is called from grep_or_recurse() below. It uses a buffer that is three
2556 times the value of bufthird. The matching point is never allowed to stray into
2557 the top third of the buffer, thus keeping more of the file available for
2558 context printing or for multiline scanning. For large files, the pointer will
2559 be in the middle third most of the time, so the bottom third is available for
2560 "before" context printing.
2561
2562 Arguments:
2563 handle the fopened FILE stream for a normal file
2564 the gzFile pointer when reading is via libz
2565 the BZFILE pointer when reading is via libbz2
2566 frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
2567 filename the file name or NULL (for errors)
2568 printname the file name if it is to be printed for each match
2569 or NULL if the file name is not to be printed
2570 it cannot be NULL if filenames[_nomatch]_only is set
2571
2572 Returns: 0 if there was at least one match
2573 1 otherwise (no matches)
2574 2 if an overlong line is encountered
2575 3 if there is a read error on a .bz2 file
2576 */
2577
2578 static int
pcre2grep(void * handle,int frtype,const char * filename,const char * printname)2579 pcre2grep(void *handle, int frtype, const char *filename, const char *printname)
2580 {
2581 int rc = 1;
2582 int filepos = 0;
2583 unsigned long int linenumber = 1;
2584 unsigned long int lastmatchnumber = 0;
2585 unsigned long int count = 0;
2586 long int count_matched_lines = 0;
2587 char *lastmatchrestart = main_buffer;
2588 char *ptr = main_buffer;
2589 char *endptr;
2590 PCRE2_SIZE bufflength;
2591 BOOL binary = FALSE;
2592 BOOL endhyphenpending = FALSE;
2593 BOOL lines_printed = FALSE;
2594 BOOL input_line_buffered = line_buffered;
2595 FILE *in = NULL; /* Ensure initialized */
2596 long stream_start = -1; /* Only non-negative if relevant */
2597
2598 /* Do the first read into the start of the buffer and set up the pointer to end
2599 of what we have. In the case of libz, a non-zipped .gz file will be read as a
2600 plain file. However, if a .bz2 file isn't actually bzipped, the first read will
2601 fail. */
2602
2603 if (frtype != FR_LIBZ && frtype != FR_LIBBZ2)
2604 {
2605 in = (FILE *)handle;
2606 if (feof(in)) return 1;
2607 if (is_file_tty(in)) input_line_buffered = TRUE;
2608 else
2609 {
2610 if (count_limit >= 0 && filename == stdin_name)
2611 stream_start = ftell(in);
2612 }
2613 }
2614 else input_line_buffered = FALSE;
2615
2616 bufflength = fill_buffer(handle, frtype, main_buffer, bufsize,
2617 input_line_buffered);
2618
2619 #ifdef SUPPORT_LIBBZ2
2620 if (frtype == FR_LIBBZ2 && (int)bufflength < 0) return 3; /* Gotcha: bufflength is PCRE2_SIZE */
2621 #endif
2622
2623 endptr = main_buffer + bufflength;
2624
2625 /* Unless binary-files=text, see if we have a binary file. This uses the same
2626 rule as GNU grep, namely, a search for a binary zero byte near the start of the
2627 file. However, when the newline convention is binary zero, we can't do this. */
2628
2629 if (binary_files != BIN_TEXT)
2630 {
2631 if (endlinetype != PCRE2_NEWLINE_NUL)
2632 binary = memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength)
2633 != NULL;
2634 if (binary && binary_files == BIN_NOMATCH) return 1;
2635 }
2636
2637 /* Loop while the current pointer is not at the end of the file. For large
2638 files, endptr will be at the end of the buffer when we are in the middle of the
2639 file, but ptr will never get there, because as soon as it gets over 2/3 of the
2640 way, the buffer is shifted left and re-filled. */
2641
2642 while (ptr < endptr)
2643 {
2644 int endlinelength;
2645 int mrc = 0;
2646 unsigned int options = 0;
2647 BOOL match;
2648 BOOL line_matched = FALSE;
2649 char *t = ptr;
2650 PCRE2_SIZE length, linelength;
2651 PCRE2_SIZE startoffset = 0;
2652
2653 /* If the -m option set a limit for the number of matched or non-matched
2654 lines, check it here. A limit of zero means that no matching is ever done.
2655 For stdin from a file, set the file position. */
2656
2657 if (count_limit >= 0 && count_matched_lines >= count_limit)
2658 {
2659 if (stream_start >= 0)
2660 (void)fseek(handle, stream_start + (long int)filepos, SEEK_SET);
2661 rc = (count_limit == 0)? 1 : 0;
2662 break;
2663 }
2664
2665 /* At this point, ptr is at the start of a line. We need to find the length
2666 of the subject string to pass to pcre2_match(). In multiline mode, it is the
2667 length remainder of the data in the buffer. Otherwise, it is the length of
2668 the next line, excluding the terminating newline. After matching, we always
2669 advance by the length of the next line. In multiline mode the PCRE2_FIRSTLINE
2670 option is used for compiling, so that any match is constrained to be in the
2671 first line. */
2672
2673 t = end_of_line(t, endptr, &endlinelength);
2674 linelength = t - ptr - endlinelength;
2675 length = multiline? (PCRE2_SIZE)(endptr - ptr) : linelength;
2676
2677 /* Check to see if the line we are looking at extends right to the very end
2678 of the buffer without a line terminator. This means the line is too long to
2679 handle at the current buffer size. Until the buffer reaches its maximum size,
2680 try doubling it and reading more data. */
2681
2682 if (endlinelength == 0 && t == main_buffer + bufsize)
2683 {
2684 if (bufthird < max_bufthird)
2685 {
2686 char *new_buffer;
2687 PCRE2_SIZE new_bufthird = 2*bufthird;
2688
2689 if (new_bufthird > max_bufthird) new_bufthird = max_bufthird;
2690 new_buffer = (char *)malloc(3*new_bufthird);
2691
2692 if (new_buffer == NULL)
2693 {
2694 /* LCOV_EXCL_START */
2695 fprintf(stderr,
2696 "pcre2grep: line %lu%s%s is too long for the internal buffer\n"
2697 "pcre2grep: not enough memory to increase the buffer size to %"
2698 SIZ_FORM "\n",
2699 linenumber,
2700 (filename == NULL)? "" : " of file ",
2701 (filename == NULL)? "" : filename,
2702 new_bufthird);
2703 return 2;
2704 /* LCOV_EXCL_STOP */
2705 }
2706
2707 /* Copy the data and adjust pointers to the new buffer location. */
2708
2709 memcpy(new_buffer, main_buffer, bufsize);
2710 bufthird = new_bufthird;
2711 bufsize = 3*bufthird;
2712 ptr = new_buffer + (ptr - main_buffer);
2713 lastmatchrestart = new_buffer + (lastmatchrestart - main_buffer);
2714 free(main_buffer);
2715 main_buffer = new_buffer;
2716
2717 /* Read more data into the buffer and then try to find the line ending
2718 again. */
2719
2720 bufflength += fill_buffer(handle, frtype, main_buffer + bufflength,
2721 bufsize - bufflength, input_line_buffered);
2722 endptr = main_buffer + bufflength;
2723 continue;
2724 }
2725 else
2726 {
2727 fprintf(stderr,
2728 "pcre2grep: line %lu%s%s is too long for the internal buffer\n"
2729 "pcre2grep: the maximum buffer size is %" SIZ_FORM "\n"
2730 "pcre2grep: use the --max-buffer-size option to change it\n",
2731 linenumber,
2732 (filename == NULL)? "" : " of file ",
2733 (filename == NULL)? "" : filename,
2734 bufthird);
2735 return 2;
2736 }
2737 }
2738
2739 /* We come back here after a match when only_matching_count is non-zero, in
2740 order to find any further matches in the same line. This applies to
2741 --only-matching, --file-offsets, and --line-offsets. */
2742
2743 ONLY_MATCHING_RESTART:
2744
2745 /* Run through all the patterns until one matches or there is an error other
2746 than NOMATCH. This code is in a subroutine so that it can be re-used for
2747 finding subsequent matches when colouring matched lines. After finding one
2748 match, set PCRE2_NOTEMPTY to disable any further matches of null strings in
2749 this line. */
2750
2751 match = match_patterns(ptr, length, options, startoffset, &mrc);
2752 options = PCRE2_NOTEMPTY;
2753
2754 /* If it's a match or a not-match (as required), do what's wanted. NOTE: Use
2755 only FWRITE_IGNORE() - which is just a packaged fwrite() that ignores its
2756 return code - to output data lines, so that binary zeroes are treated as just
2757 another data character. */
2758
2759 if (match != invert)
2760 {
2761 BOOL hyphenprinted = FALSE;
2762
2763 /* We've failed if we want a file that doesn't have any matches. */
2764
2765 if (filenames == FN_NOMATCH_ONLY) return 1;
2766
2767 /* Remember that this line matched (for counting matched lines) */
2768
2769 line_matched = TRUE;
2770
2771 /* If all we want is a yes/no answer, we can return immediately. */
2772
2773 if (quiet) return 0;
2774
2775 /* Just count if just counting is wanted. */
2776
2777 else if (count_only || show_total_count) count++;
2778
2779 /* When handling a binary file and binary-files==binary, the "binary"
2780 variable will be set true (it's false in all other cases). In this
2781 situation we just want to output the file name. No need to scan further. */
2782
2783 else if (binary)
2784 {
2785 fprintf(stdout, "Binary file %s matches" STDOUT_NL, filename);
2786 return 0;
2787 }
2788
2789 /* Likewise, if all we want is a file name, there is no need to scan any
2790 more lines in the file. */
2791
2792 else if (filenames == FN_MATCH_ONLY)
2793 {
2794 fprintf(stdout, "%s", printname);
2795 if (printname_nl == NULL) fprintf(stdout, "%c", 0);
2796 else fprintf(stdout, "%s", printname_nl);
2797 return 0;
2798 }
2799
2800 /* The --only-matching option prints just the substring that matched,
2801 and/or one or more captured portions of it, as long as these strings are
2802 not empty. The --file-offsets and --line-offsets options output offsets for
2803 the matching substring (all three set only_matching_count non-zero). None
2804 of these mutually exclusive options prints any context. Afterwards, adjust
2805 the start and then jump back to look for further matches in the same line.
2806 If we are in invert mode, however, nothing is printed and we do not restart
2807 - this could still be useful because the return code is set. */
2808
2809 else if (only_matching_count != 0)
2810 {
2811 if (!invert)
2812 {
2813 PCRE2_SIZE oldstartoffset;
2814
2815 if (printname != NULL) fprintf(stdout, "%s%c", printname,
2816 printname_colon);
2817 if (number) fprintf(stdout, "%lu:", linenumber);
2818
2819 /* Handle --line-offsets */
2820
2821 if (line_offsets)
2822 fprintf(stdout, "%d,%d" STDOUT_NL, (int)(ptr + offsets[0] - ptr),
2823 (int)(offsets[1] - offsets[0]));
2824
2825 /* Handle --file-offsets */
2826
2827 else if (file_offsets)
2828 fprintf(stdout, "%d,%d" STDOUT_NL,
2829 (int)(filepos + ptr + offsets[0] - ptr),
2830 (int)(offsets[1] - offsets[0]));
2831
2832 /* Handle --output (which has already been syntax checked) */
2833
2834 else if (output_text != NULL)
2835 {
2836 (void)display_output_text((PCRE2_SPTR)output_text, FALSE,
2837 (PCRE2_SPTR)ptr, offsets, mrc);
2838 fprintf(stdout, STDOUT_NL);
2839 }
2840
2841 /* Handle --only-matching, which may occur many times */
2842
2843 else
2844 {
2845 BOOL printed = FALSE;
2846 omstr *om;
2847
2848 for (om = only_matching; om != NULL; om = om->next)
2849 {
2850 int n = om->groupnum;
2851 if (n == 0 || n < mrc)
2852 {
2853 int plen = offsets[2*n + 1] - offsets[2*n];
2854 if (plen > 0)
2855 {
2856 if (printed && om_separator != NULL)
2857 fprintf(stdout, "%s", om_separator);
2858 print_match(ptr + offsets[n*2], plen);
2859 printed = TRUE;
2860 }
2861 }
2862 }
2863 if (printed || printname != NULL || number)
2864 fprintf(stdout, STDOUT_NL);
2865 }
2866
2867 /* Prepare to repeat to find the next match in the line. */
2868
2869 match = FALSE;
2870 if (line_buffered) fflush(stdout);
2871 rc = 0; /* Had some success */
2872
2873 /* If the pattern contained a lookbehind that included \K, it is
2874 possible that the end of the match might be at or before the actual
2875 starting offset we have just used. In this case, start one character
2876 further on. */
2877
2878 startoffset = offsets[1]; /* Restart after the match */
2879 oldstartoffset = pcre2_get_startchar(match_data);
2880 if (startoffset <= oldstartoffset)
2881 {
2882 if (startoffset >= length) goto END_ONE_MATCH; /* Were at end */
2883 startoffset = oldstartoffset + 1;
2884 if (utf) while ((ptr[startoffset] & 0xc0) == 0x80) startoffset++;
2885 }
2886
2887 /* If the current match ended past the end of the line (only possible
2888 in multiline mode), we must move on to the line in which it did end
2889 before searching for more matches. */
2890
2891 while (startoffset > linelength)
2892 {
2893 ptr += linelength + endlinelength;
2894 filepos += (int)(linelength + endlinelength);
2895 linenumber++;
2896 startoffset -= (int)(linelength + endlinelength);
2897 t = end_of_line(ptr, endptr, &endlinelength);
2898 linelength = t - ptr - endlinelength;
2899 length = (PCRE2_SIZE)(endptr - ptr);
2900 }
2901
2902 goto ONLY_MATCHING_RESTART;
2903 }
2904 }
2905
2906 /* This is the default case when none of the above options is set. We print
2907 the matching lines(s), possibly preceded and/or followed by other lines of
2908 context. */
2909
2910 else
2911 {
2912 lines_printed = TRUE;
2913
2914 /* See if there is a requirement to print some "after" lines from a
2915 previous match. We never print any overlaps. */
2916
2917 if (after_context > 0 && lastmatchnumber > 0)
2918 {
2919 int ellength;
2920 int linecount = 0;
2921 char *p = lastmatchrestart;
2922
2923 while (p < ptr && linecount < after_context)
2924 {
2925 p = end_of_line(p, ptr, &ellength);
2926 linecount++;
2927 }
2928
2929 /* It is important to advance lastmatchrestart during this printing so
2930 that it interacts correctly with any "before" printing below. Print
2931 each line's data using fwrite() in case there are binary zeroes. */
2932
2933 while (lastmatchrestart < p)
2934 {
2935 char *pp = lastmatchrestart;
2936 if (printname != NULL) fprintf(stdout, "%s%c", printname,
2937 printname_hyphen);
2938 if (number) fprintf(stdout, "%lu-", lastmatchnumber++);
2939 pp = end_of_line(pp, endptr, &ellength);
2940 FWRITE_IGNORE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
2941 lastmatchrestart = pp;
2942 }
2943 if (lastmatchrestart != ptr) hyphenpending = TRUE;
2944 }
2945
2946 /* If there were non-contiguous lines printed above, insert hyphens. */
2947
2948 if (hyphenpending)
2949 {
2950 fprintf(stdout, "--" STDOUT_NL);
2951 hyphenpending = FALSE;
2952 hyphenprinted = TRUE;
2953 }
2954
2955 /* See if there is a requirement to print some "before" lines for this
2956 match. Again, don't print overlaps. */
2957
2958 if (before_context > 0)
2959 {
2960 int linecount = 0;
2961 char *p = ptr;
2962
2963 while (p > main_buffer &&
2964 (lastmatchnumber == 0 || p > lastmatchrestart) &&
2965 linecount < before_context)
2966 {
2967 linecount++;
2968 p = previous_line(p, main_buffer);
2969 }
2970
2971 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
2972 fprintf(stdout, "--" STDOUT_NL);
2973
2974 while (p < ptr)
2975 {
2976 int ellength;
2977 char *pp = p;
2978 if (printname != NULL) fprintf(stdout, "%s%c", printname,
2979 printname_hyphen);
2980 if (number) fprintf(stdout, "%lu-", linenumber - linecount--);
2981 pp = end_of_line(pp, endptr, &ellength);
2982 FWRITE_IGNORE(p, 1, pp - p, stdout);
2983 p = pp;
2984 }
2985 }
2986
2987 /* Now print the matching line(s); ensure we set hyphenpending at the end
2988 of the file if any context lines are being output. */
2989
2990 if (after_context > 0 || before_context > 0)
2991 endhyphenpending = TRUE;
2992
2993 if (printname != NULL) fprintf(stdout, "%s%c", printname,
2994 printname_colon);
2995 if (number) fprintf(stdout, "%lu:", linenumber);
2996
2997 /* In multiline mode, or if colouring, we have to split the line(s) up
2998 and search for further matches, but not of course if the line is a
2999 non-match. In multiline mode this is necessary in case there is another
3000 match that spans the end of the current line. When colouring we want to
3001 colour all matches. */
3002
3003 if ((multiline || do_colour) && !invert)
3004 {
3005 int plength;
3006 PCRE2_SIZE endprevious;
3007
3008 /* The use of \K may make the end offset earlier than the start. In
3009 this situation, swap them round. */
3010
3011 if (offsets[0] > offsets[1])
3012 {
3013 PCRE2_SIZE temp = offsets[0];
3014 offsets[0] = offsets[1];
3015 offsets[1] = temp;
3016 }
3017
3018 FWRITE_IGNORE(ptr, 1, offsets[0], stdout);
3019 print_match(ptr + offsets[0], offsets[1] - offsets[0]);
3020
3021 for (;;)
3022 {
3023 PCRE2_SIZE oldstartoffset = pcre2_get_startchar(match_data);
3024
3025 endprevious = offsets[1];
3026 startoffset = endprevious; /* Advance after previous match. */
3027
3028 /* If the pattern contained a lookbehind that included \K, it is
3029 possible that the end of the match might be at or before the actual
3030 starting offset we have just used. In this case, start one character
3031 further on. */
3032
3033 if (startoffset <= oldstartoffset)
3034 {
3035 startoffset = oldstartoffset + 1;
3036 if (utf) while ((ptr[startoffset] & 0xc0) == 0x80) startoffset++;
3037 }
3038
3039 /* If the current match ended past the end of the line (only possible
3040 in multiline mode), we must move on to the line in which it did end
3041 before searching for more matches. Because the PCRE2_FIRSTLINE option
3042 is set, the start of the match will always be before the first
3043 newline sequence. */
3044
3045 while (startoffset > linelength + endlinelength)
3046 {
3047 ptr += linelength + endlinelength;
3048 filepos += (int)(linelength + endlinelength);
3049 linenumber++;
3050 startoffset -= (int)(linelength + endlinelength);
3051 endprevious -= (int)(linelength + endlinelength);
3052 t = end_of_line(ptr, endptr, &endlinelength);
3053 linelength = t - ptr - endlinelength;
3054 length = (PCRE2_SIZE)(endptr - ptr);
3055 }
3056
3057 /* If startoffset is at the exact end of the line it means this
3058 complete line was the final part of the match, so there is nothing
3059 more to do. */
3060
3061 if (startoffset == linelength + endlinelength) break;
3062
3063 /* Otherwise, run a match from within the final line, and if found,
3064 loop for any that may follow. */
3065
3066 if (!match_patterns(ptr, length, options, startoffset, &mrc)) break;
3067
3068 /* The use of \K may make the end offset earlier than the start. In
3069 this situation, swap them round. */
3070
3071 if (offsets[0] > offsets[1])
3072 {
3073 PCRE2_SIZE temp = offsets[0];
3074 offsets[0] = offsets[1];
3075 offsets[1] = temp;
3076 }
3077
3078 FWRITE_IGNORE(ptr + endprevious, 1, offsets[0] - endprevious, stdout);
3079 print_match(ptr + offsets[0], offsets[1] - offsets[0]);
3080 }
3081
3082 /* In multiline mode, we may have already printed the complete line
3083 and its line-ending characters (if they matched the pattern), so there
3084 may be no more to print. */
3085
3086 plength = (int)((linelength + endlinelength) - endprevious);
3087 if (plength > 0) FWRITE_IGNORE(ptr + endprevious, 1, plength, stdout);
3088 }
3089
3090 /* Not colouring or multiline; no need to search for further matches. */
3091
3092 else FWRITE_IGNORE(ptr, 1, linelength + endlinelength, stdout);
3093 }
3094
3095 /* End of doing what has to be done for a match. If --line-buffered was
3096 given, flush the output. */
3097
3098 if (line_buffered) fflush(stdout);
3099 rc = 0; /* Had some success */
3100
3101 /* Remember where the last match happened for after_context. We remember
3102 where we are about to restart, and that line's number. */
3103
3104 lastmatchrestart = ptr + linelength + endlinelength;
3105 lastmatchnumber = linenumber + 1;
3106
3107 /* If a line was printed and we are now at the end of the file and the last
3108 line had no newline, output one. */
3109
3110 if (lines_printed && lastmatchrestart >= endptr && endlinelength == 0)
3111 write_final_newline();
3112 }
3113
3114 /* For a match in multiline inverted mode (which of course did not cause
3115 anything to be printed), we have to move on to the end of the match before
3116 proceeding. */
3117
3118 if (multiline && invert && match)
3119 {
3120 int ellength;
3121 char *endmatch = ptr + offsets[1];
3122 t = ptr;
3123 while (t < endmatch)
3124 {
3125 t = end_of_line(t, endptr, &ellength);
3126 if (t <= endmatch) linenumber++; else break;
3127 }
3128 endmatch = end_of_line(endmatch, endptr, &ellength);
3129 linelength = endmatch - ptr - ellength;
3130 }
3131
3132 /* Advance to after the newline and increment the line number. The file
3133 offset to the current line is maintained in filepos. */
3134
3135 END_ONE_MATCH:
3136 ptr += linelength + endlinelength;
3137 filepos += (int)(linelength + endlinelength);
3138 linenumber++;
3139
3140 /* If there was at least one match (or a non-match, as required) in the line,
3141 increment the count for the -m option. */
3142
3143 if (line_matched) count_matched_lines++;
3144
3145 /* If input is line buffered, and the buffer is not yet full, read another
3146 line and add it into the buffer. */
3147
3148 if (input_line_buffered && bufflength < (PCRE2_SIZE)bufsize)
3149 {
3150 PCRE2_SIZE add = read_one_line(ptr, bufsize - (ptr - main_buffer), in);
3151 bufflength += add;
3152 endptr += add;
3153 }
3154
3155 /* If we haven't yet reached the end of the file (the buffer is full), and
3156 the current point is in the top 1/3 of the buffer, slide the buffer down by
3157 1/3 and refill it. Before we do this, if some unprinted "after" lines are
3158 about to be lost, print them. */
3159
3160 if (bufflength >= (PCRE2_SIZE)bufsize && ptr > main_buffer + 2*bufthird)
3161 {
3162 if (after_context > 0 &&
3163 lastmatchnumber > 0 &&
3164 lastmatchrestart < main_buffer + bufthird)
3165 {
3166 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
3167 lastmatchnumber = 0; /* Indicates no after lines pending */
3168 }
3169
3170 /* Now do the shuffle */
3171
3172 (void)memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
3173 ptr -= bufthird;
3174
3175 bufflength = 2*bufthird + fill_buffer(handle, frtype,
3176 main_buffer + 2*bufthird, bufthird, input_line_buffered);
3177 endptr = main_buffer + bufflength;
3178
3179 /* Adjust any last match point */
3180
3181 if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
3182 }
3183 } /* Loop through the whole file */
3184
3185 /* End of file; print final "after" lines if wanted; do_after_lines sets
3186 hyphenpending if it prints something. */
3187
3188 if (only_matching_count == 0 && !(count_only|show_total_count))
3189 {
3190 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
3191 hyphenpending |= endhyphenpending;
3192 }
3193
3194 /* Print the file name if we are looking for those without matches and there
3195 were none. If we found a match, we won't have got this far. */
3196
3197 if (filenames == FN_NOMATCH_ONLY)
3198 {
3199 fprintf(stdout, "%s", printname);
3200 if (printname_nl == NULL) fprintf(stdout, "%c", 0);
3201 else fprintf(stdout, "%s", printname_nl);
3202 return 0;
3203 }
3204
3205 /* Print the match count if wanted */
3206
3207 if (count_only && !quiet)
3208 {
3209 if (count > 0 || !omit_zero_count)
3210 {
3211 if (printname != NULL && filenames != FN_NONE)
3212 fprintf(stdout, "%s%c", printname, printname_colon);
3213 fprintf(stdout, "%lu" STDOUT_NL, count);
3214 counts_printed++;
3215 }
3216 }
3217
3218 total_count += count; /* Can be set without count_only */
3219 return rc;
3220 }
3221
3222
3223
3224 /*************************************************
3225 * Grep a file or recurse into a directory *
3226 *************************************************/
3227
3228 /* Given a path name, if it's a directory, scan all the files if we are
3229 recursing; if it's a file, grep it.
3230
3231 Arguments:
3232 pathname the path to investigate
3233 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
3234 only_one_at_top TRUE if the path is the only one at toplevel
3235
3236 Returns: -1 the file/directory was skipped
3237 0 if there was at least one match
3238 1 if there were no matches
3239 2 there was some kind of error
3240
3241 However, file opening failures are suppressed if "silent" is set.
3242 */
3243
3244 static int
grep_or_recurse(char * pathname,BOOL dir_recurse,BOOL only_one_at_top)3245 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
3246 {
3247 int rc = 1;
3248 int frtype;
3249 void *handle;
3250 char *lastcomp;
3251 FILE *in = NULL; /* Ensure initialized */
3252
3253 #ifdef SUPPORT_LIBZ
3254 gzFile ingz = NULL;
3255 #endif
3256
3257 #ifdef SUPPORT_LIBBZ2
3258 BZFILE *inbz2 = NULL;
3259 #endif
3260
3261 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
3262 int pathlen;
3263 #endif
3264
3265 #if defined NATIVE_ZOS
3266 int zos_type;
3267 FILE *zos_test_file;
3268 #endif
3269
3270 /* If the file name is "-" we scan stdin */
3271
3272 if (strcmp(pathname, "-") == 0)
3273 {
3274 if (count_limit >= 0) setbuf(stdin, NULL);
3275 return pcre2grep(stdin, FR_PLAIN, stdin_name,
3276 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
3277 stdin_name : NULL);
3278 }
3279
3280 /* Inclusion and exclusion: --include-dir and --exclude-dir apply only to
3281 directories, whereas --include and --exclude apply to everything else. The test
3282 is against the final component of the path. */
3283
3284 lastcomp = strrchr(pathname, FILESEP);
3285 lastcomp = (lastcomp == NULL)? pathname : lastcomp + 1;
3286
3287 /* If the file is a directory, skip if not recursing or if explicitly excluded.
3288 Otherwise, scan the directory and recurse for each path within it. The scanning
3289 code is localized so it can be made system-specific. */
3290
3291
3292 /* For z/OS, determine the file type. */
3293
3294 #if defined NATIVE_ZOS
3295 zos_test_file = fopen(pathname,"rb");
3296
3297 if (zos_test_file == NULL)
3298 {
3299 if (!silent) fprintf(stderr, "pcre2grep: failed to test next file %s\n",
3300 pathname, strerror(errno));
3301 return -1;
3302 }
3303 zos_type = identifyzosfiletype (zos_test_file);
3304 fclose (zos_test_file);
3305
3306 /* Handle a PDS in separate code */
3307
3308 if (zos_type == __ZOS_PDS || zos_type == __ZOS_PDSE)
3309 {
3310 return travelonpdsdir (pathname, only_one_at_top);
3311 }
3312
3313 /* Deal with regular files in the normal way below. These types are:
3314 zos_type == __ZOS_PDS_MEMBER
3315 zos_type == __ZOS_PS
3316 zos_type == __ZOS_VSAM_KSDS
3317 zos_type == __ZOS_VSAM_ESDS
3318 zos_type == __ZOS_VSAM_RRDS
3319 */
3320
3321 /* Handle a z/OS directory using common code. */
3322
3323 else if (zos_type == __ZOS_HFS)
3324 {
3325 #endif /* NATIVE_ZOS */
3326
3327
3328 /* Handle directories: common code for all OS */
3329
3330 if (isdirectory(pathname))
3331 {
3332 if (dee_action == dee_SKIP ||
3333 !test_incexc(lastcomp, include_dir_patterns, exclude_dir_patterns))
3334 return -1;
3335
3336 if (dee_action == dee_RECURSE)
3337 {
3338 char childpath[FNBUFSIZ];
3339 char *nextfile;
3340 directory_type *dir = opendirectory(pathname);
3341
3342 if (dir == NULL)
3343 {
3344 /* LCOV_EXCL_START - this is a "never" event */
3345 if (!silent)
3346 fprintf(stderr, "pcre2grep: Failed to open directory %s: %s\n", pathname,
3347 strerror(errno));
3348 return 2;
3349 /* LCOV_EXCL_STOP */
3350 }
3351
3352 while ((nextfile = readdirectory(dir)) != NULL)
3353 {
3354 int frc;
3355 int fnlength = strlen(pathname) + strlen(nextfile) + 2;
3356 if (fnlength > FNBUFSIZ)
3357 {
3358 /* LCOV_EXCL_START - this is a "never" event */
3359 fprintf(stderr, "pcre2grep: recursive filename is too long\n");
3360 rc = 2;
3361 break;
3362 /* LCOV_EXCL_STOP */
3363 }
3364 sprintf(childpath, "%s%c%s", pathname, FILESEP, nextfile);
3365
3366 /* If the realpath() function is available, we can try to prevent endless
3367 recursion caused by a symlink pointing to a parent directory (GitHub
3368 issue #2 (old Bugzilla #2794). Original patch from Thomas Tempelmann.
3369 Modified to avoid using strlcat() because that isn't a standard C
3370 function, and also modified not to copy back the fully resolved path,
3371 because that affects the output from pcre2grep. */
3372
3373 #ifdef HAVE_REALPATH
3374 {
3375 char resolvedpath[PATH_MAX];
3376 BOOL isSame;
3377 size_t rlen;
3378 if (realpath(childpath, resolvedpath) == NULL)
3379 /* LCOV_EXCL_START - this is a "never" event */
3380 continue; /* This path is invalid - we can skip processing this */
3381 /* LCOV_EXCL_STOP */
3382 isSame = strcmp(pathname, resolvedpath) == 0;
3383 if (isSame) continue; /* We have a recursion */
3384 rlen = strlen(resolvedpath);
3385 if (rlen++ < sizeof(resolvedpath) - 3)
3386 {
3387 BOOL contained;
3388 strcat(resolvedpath, "/");
3389 contained = strncmp(pathname, resolvedpath, rlen) == 0;
3390 if (contained) continue; /* We have a recursion */
3391 }
3392 }
3393 #endif /* HAVE_REALPATH */
3394
3395 frc = grep_or_recurse(childpath, dir_recurse, FALSE);
3396 if (frc > 1) rc = frc;
3397 else if (frc == 0 && rc == 1) rc = 0;
3398 }
3399
3400 closedirectory(dir);
3401 return rc;
3402 }
3403 }
3404
3405 #ifdef WIN32
3406 if (iswild(pathname))
3407 {
3408 char buffer[1024];
3409 char *nextfile;
3410 char *name;
3411 directory_type *dir = opendirectory(pathname);
3412
3413 if (dir == NULL)
3414 return 0;
3415
3416 for (nextfile = name = pathname; *nextfile != 0; nextfile++)
3417 if (*nextfile == '/' || *nextfile == '\\')
3418 name = nextfile + 1;
3419 *name = 0;
3420
3421 while ((nextfile = readdirectory(dir)) != NULL)
3422 {
3423 int frc;
3424 sprintf(buffer, "%.512s%.128s", pathname, nextfile);
3425 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
3426 if (frc > 1) rc = frc;
3427 else if (frc == 0 && rc == 1) rc = 0;
3428 }
3429
3430 closedirectory(dir);
3431 return rc;
3432 }
3433 #endif
3434
3435 #if defined NATIVE_ZOS
3436 }
3437 #endif
3438
3439 /* If the file is not a directory, check for a regular file, and if it is not,
3440 skip it if that's been requested. Otherwise, check for an explicit inclusion or
3441 exclusion. */
3442
3443 else if (
3444 #if defined NATIVE_ZOS
3445 (zos_type == __ZOS_NOFILE && DEE_action == DEE_SKIP) ||
3446 #else /* all other OS */
3447 (!isregfile(pathname) && DEE_action == DEE_SKIP) ||
3448 #endif
3449 !test_incexc(lastcomp, include_patterns, exclude_patterns))
3450 return -1; /* File skipped */
3451
3452 /* Control reaches here if we have a regular file, or if we have a directory
3453 and recursion or skipping was not requested, or if we have anything else and
3454 skipping was not requested. The scan proceeds. If this is the first and only
3455 argument at top level, we don't show the file name, unless we are only showing
3456 the file name, or the filename was forced (-H). */
3457
3458 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
3459 pathlen = (int)(strlen(pathname));
3460 #endif
3461
3462 /* Open using zlib if it is supported and the file name ends with .gz. */
3463
3464 #ifdef SUPPORT_LIBZ
3465 if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
3466 {
3467 ingz = gzopen(pathname, "rb");
3468 if (ingz == NULL)
3469 {
3470 /* LCOV_EXCL_START */
3471 if (!silent)
3472 fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", pathname,
3473 strerror(errno));
3474 return 2;
3475 /* LCOV_EXCL_STOP */
3476 }
3477 handle = (void *)ingz;
3478 frtype = FR_LIBZ;
3479 }
3480 else
3481 #endif
3482
3483 /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
3484
3485 #ifdef SUPPORT_LIBBZ2
3486 if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
3487 {
3488 inbz2 = BZ2_bzopen(pathname, "rb");
3489 handle = (void *)inbz2;
3490 frtype = FR_LIBBZ2;
3491 }
3492 else
3493 #endif
3494
3495 /* Otherwise use plain fopen(). The label is so that we can come back here if
3496 an attempt to read a .bz2 file indicates that it really is a plain file. */
3497
3498 #ifdef SUPPORT_LIBBZ2
3499 PLAIN_FILE:
3500 #endif
3501 {
3502 in = fopen(pathname, "rb");
3503 handle = (void *)in;
3504 frtype = FR_PLAIN;
3505 }
3506
3507 /* All the opening methods return errno when they fail. */
3508
3509 if (handle == NULL)
3510 {
3511 if (!silent)
3512 fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", pathname,
3513 strerror(errno));
3514 return 2;
3515 }
3516
3517 /* Now grep the file */
3518
3519 rc = pcre2grep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
3520 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
3521
3522 /* Close in an appropriate manner. */
3523
3524 #ifdef SUPPORT_LIBZ
3525 if (frtype == FR_LIBZ)
3526 gzclose(ingz);
3527 else
3528 #endif
3529
3530 /* If it is a .bz2 file and the result is 3, it means that the first attempt to
3531 read failed. If the error indicates that the file isn't in fact bzipped, try
3532 again as a normal file. */
3533
3534 #ifdef SUPPORT_LIBBZ2
3535 if (frtype == FR_LIBBZ2)
3536 {
3537 if (rc == 3)
3538 {
3539 int errnum;
3540 const char *err = BZ2_bzerror(inbz2, &errnum);
3541 if (errnum == BZ_DATA_ERROR_MAGIC)
3542 {
3543 BZ2_bzclose(inbz2);
3544 goto PLAIN_FILE;
3545 }
3546 /* LCOV_EXCL_START */
3547 else if (!silent)
3548 fprintf(stderr, "pcre2grep: Failed to read %s using bzlib: %s\n",
3549 pathname, err);
3550 rc = 2; /* The normal "something went wrong" code */
3551 /* LCOV_EXCL_STOP */
3552 }
3553 BZ2_bzclose(inbz2);
3554 }
3555 else
3556 #endif
3557
3558 /* Normal file close */
3559
3560 fclose(in);
3561
3562 /* Pass back the yield from pcre2grep(). */
3563
3564 return rc;
3565 }
3566
3567
3568
3569 /*************************************************
3570 * Handle a no-data option *
3571 *************************************************/
3572
3573 /* This is called when a known option has been identified. */
3574
3575 static int
handle_option(int letter,int options)3576 handle_option(int letter, int options)
3577 {
3578 switch(letter)
3579 {
3580 case N_FOFFSETS: file_offsets = TRUE; break;
3581 case N_HELP: help(); pcre2grep_exit(0); break; /* Stops compiler warning */
3582 case N_LBUFFER: line_buffered = TRUE; break;
3583 case N_LOFFSETS: line_offsets = number = TRUE; break;
3584 case N_NOJIT: use_jit = FALSE; break;
3585 case N_ALLABSK: extra_options |= PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK; break;
3586 case 'a': binary_files = BIN_TEXT; break;
3587 case 'c': count_only = TRUE; break;
3588 case 'F': options |= PCRE2_LITERAL; break;
3589 case 'H': filenames = FN_FORCE; break;
3590 case 'I': binary_files = BIN_NOMATCH; break;
3591 case 'h': filenames = FN_NONE; break;
3592 case 'i': options |= PCRE2_CASELESS; break;
3593 case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
3594 case 'L': filenames = FN_NOMATCH_ONLY; break;
3595 case 'M': multiline = TRUE; options |= PCRE2_MULTILINE|PCRE2_FIRSTLINE; break;
3596 case 'n': number = TRUE; break;
3597
3598 case 'o':
3599 only_matching_last = add_number(0, only_matching_last);
3600 if (only_matching == NULL) only_matching = only_matching_last;
3601 break;
3602
3603 case 'q': quiet = TRUE; break;
3604 case 'r': dee_action = dee_RECURSE; break;
3605 case 's': silent = TRUE; break;
3606 case 't': show_total_count = TRUE; break;
3607 case 'u': options |= PCRE2_UTF; utf = TRUE; break;
3608 case 'U': options |= PCRE2_UTF|PCRE2_MATCH_INVALID_UTF; utf = TRUE; break;
3609 case 'v': invert = TRUE; break;
3610
3611 case 'V':
3612 {
3613 unsigned char buffer[128];
3614 (void)pcre2_config(PCRE2_CONFIG_VERSION, buffer);
3615 fprintf(stdout, "pcre2grep version %s" STDOUT_NL, buffer);
3616 }
3617 pcre2grep_exit(0);
3618 break; /* LCOV_EXCL_LINE - statement kept to avoid compiler warning */
3619
3620 case 'w': extra_options |= PCRE2_EXTRA_MATCH_WORD; break;
3621 case 'x': extra_options |= PCRE2_EXTRA_MATCH_LINE; break;
3622 case 'Z': printname_colon = printname_hyphen = 0; printname_nl = NULL; break;
3623
3624 /* LCOV_EXCL_START - this is a "never event" */
3625 default:
3626 fprintf(stderr, "pcre2grep: Unknown option -%c\n", letter);
3627 pcre2grep_exit(usage(2));
3628 /* LCOV_EXCL_STOP */
3629 }
3630
3631 return options;
3632 }
3633
3634
3635
3636 /*************************************************
3637 * Construct printed ordinal *
3638 *************************************************/
3639
3640 /* This turns a number into "1st", "3rd", etc. */
3641
3642 static char *
ordin(int n)3643 ordin(int n)
3644 {
3645 static char buffer[14];
3646 char *p = buffer;
3647 sprintf(p, "%d", n);
3648 while (*p != 0) p++;
3649 n %= 100;
3650 if (n >= 11 && n <= 13) n = 0;
3651 switch (n%10)
3652 {
3653 case 1: strcpy(p, "st"); break;
3654 case 2: strcpy(p, "nd"); break;
3655 case 3: strcpy(p, "rd"); break;
3656 default: strcpy(p, "th"); break;
3657 }
3658 return buffer;
3659 }
3660
3661
3662
3663 /*************************************************
3664 * Compile a single pattern *
3665 *************************************************/
3666
3667 /* Do nothing if the pattern has already been compiled. This is the case for
3668 include/exclude patterns read from a file.
3669
3670 When the -F option has been used, each "pattern" may be a list of strings,
3671 separated by line breaks. They will be matched literally. We split such a
3672 string and compile the first substring, inserting an additional block into the
3673 pattern chain.
3674
3675 Arguments:
3676 p points to the pattern block
3677 options the PCRE options
3678 fromfile TRUE if the pattern was read from a file
3679 fromtext file name or identifying text (e.g. "include")
3680 count 0 if this is the only command line pattern, or
3681 number of the command line pattern, or
3682 linenumber for a pattern from a file
3683
3684 Returns: TRUE on success, FALSE after an error
3685 */
3686
3687 static BOOL
compile_pattern(patstr * p,int options,int fromfile,const char * fromtext,int count)3688 compile_pattern(patstr *p, int options, int fromfile, const char *fromtext,
3689 int count)
3690 {
3691 char *ps;
3692 int errcode;
3693 PCRE2_SIZE patlen, erroffset;
3694 PCRE2_UCHAR errmessbuffer[ERRBUFSIZ];
3695
3696 if (p->compiled != NULL) return TRUE;
3697 ps = p->string;
3698 patlen = p->length;
3699
3700 if ((options & PCRE2_LITERAL) != 0)
3701 {
3702 int ellength;
3703 char *eop = ps + patlen;
3704 char *pe = end_of_line(ps, eop, &ellength);
3705
3706 if (ellength != 0)
3707 {
3708 patlen = pe - ps - ellength;
3709 if (add_pattern(pe, p->length-patlen-ellength, p) == NULL) return FALSE;
3710 }
3711 }
3712
3713 p->compiled = pcre2_compile((PCRE2_SPTR)ps, patlen, options, &errcode,
3714 &erroffset, compile_context);
3715
3716 /* Handle successful compile. Try JIT-compiling if supported and enabled. We
3717 ignore any JIT compiler errors, relying falling back to interpreting if
3718 anything goes wrong with JIT. */
3719
3720 if (p->compiled != NULL)
3721 {
3722 #ifdef SUPPORT_PCRE2GREP_JIT
3723 if (use_jit) (void)pcre2_jit_compile(p->compiled, PCRE2_JIT_COMPLETE);
3724 #endif
3725 return TRUE;
3726 }
3727
3728 /* Handle compile errors */
3729
3730 if (erroffset > patlen) erroffset = patlen;
3731 pcre2_get_error_message(errcode, errmessbuffer, sizeof(errmessbuffer));
3732
3733 if (fromfile)
3734 {
3735 fprintf(stderr, "pcre2grep: Error in regex in line %d of %s "
3736 "at offset %d: %s\n", count, fromtext, (int)erroffset, errmessbuffer);
3737 }
3738 else
3739 {
3740 if (count == 0)
3741 fprintf(stderr, "pcre2grep: Error in %s regex at offset %d: %s\n",
3742 fromtext, (int)erroffset, errmessbuffer);
3743 else
3744 fprintf(stderr, "pcre2grep: Error in %s %s regex at offset %d: %s\n",
3745 ordin(count), fromtext, (int)erroffset, errmessbuffer);
3746 }
3747
3748 return FALSE;
3749 }
3750
3751
3752
3753 /*************************************************
3754 * Read and compile a file of patterns *
3755 *************************************************/
3756
3757 /* This is used for --filelist, --include-from, and --exclude-from.
3758
3759 Arguments:
3760 name the name of the file; "-" is stdin
3761 patptr pointer to the pattern chain anchor
3762 patlastptr pointer to the last pattern pointer
3763
3764 Returns: TRUE if all went well
3765 */
3766
3767 static BOOL
read_pattern_file(char * name,patstr ** patptr,patstr ** patlastptr)3768 read_pattern_file(char *name, patstr **patptr, patstr **patlastptr)
3769 {
3770 int linenumber = 0;
3771 PCRE2_SIZE patlen;
3772 FILE *f;
3773 const char *filename;
3774 char buffer[MAXPATLEN+20];
3775
3776 if (strcmp(name, "-") == 0)
3777 {
3778 f = stdin;
3779 filename = stdin_name;
3780 }
3781 else
3782 {
3783 f = fopen(name, "r");
3784 if (f == NULL)
3785 {
3786 fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", name, strerror(errno));
3787 return FALSE;
3788 }
3789 filename = name;
3790 }
3791
3792 while ((patlen = read_one_line(buffer, sizeof(buffer), f)) > 0)
3793 {
3794 while (patlen > 0 && isspace((unsigned char)(buffer[patlen-1]))) patlen--;
3795 linenumber++;
3796 if (patlen == 0) continue; /* Skip blank lines */
3797
3798 /* Note: this call to add_pattern() puts a pointer to the local variable
3799 "buffer" into the pattern chain. However, that pointer is used only when
3800 compiling the pattern, which happens immediately below, so we flatten it
3801 afterwards, as a precaution against any later code trying to use it. */
3802
3803 *patlastptr = add_pattern(buffer, patlen, *patlastptr);
3804 if (*patlastptr == NULL)
3805 {
3806 /* LCOV_EXCL_START - won't happen in testing */
3807 if (f != stdin) fclose(f);
3808 return FALSE;
3809 /* LCOV_EXCL_STOP */
3810 }
3811 if (*patptr == NULL) *patptr = *patlastptr;
3812
3813 /* This loop is needed because compiling a "pattern" when -F is set may add
3814 on additional literal patterns if the original contains a newline. In the
3815 common case, it never will, because read_one_line() stops at a newline.
3816 However, the -N option can be used to give pcre2grep a different newline
3817 setting. */
3818
3819 for(;;)
3820 {
3821 if (!compile_pattern(*patlastptr, pcre2_options, TRUE, filename,
3822 linenumber))
3823 {
3824 if (f != stdin) fclose(f);
3825 return FALSE;
3826 }
3827 (*patlastptr)->string = NULL; /* Insurance */
3828 if ((*patlastptr)->next == NULL) break;
3829 *patlastptr = (*patlastptr)->next;
3830 }
3831 }
3832
3833 if (f != stdin) fclose(f);
3834 return TRUE;
3835 }
3836
3837
3838
3839 /*************************************************
3840 * Main program *
3841 *************************************************/
3842
3843 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
3844
3845 int
main(int argc,char ** argv)3846 main(int argc, char **argv)
3847 {
3848 int i, j;
3849 int rc = 1;
3850 BOOL only_one_at_top;
3851 patstr *cp;
3852 fnstr *fn;
3853 omstr *om;
3854 const char *locale_from = "--locale";
3855
3856 #ifdef SUPPORT_PCRE2GREP_JIT
3857 pcre2_jit_stack *jit_stack = NULL;
3858 #endif
3859
3860 /* In Windows, stdout is set up as a text stream, which means that \n is
3861 converted to \r\n. This causes output lines that are copied from the input to
3862 change from ....\r\n to ....\r\r\n, which is not right. We therefore ensure
3863 that stdout is a binary stream. Note that this means all other output to stdout
3864 must use STDOUT_NL to terminate lines. */
3865
3866 #ifdef WIN32
3867 _setmode(_fileno(stdout), _O_BINARY);
3868 #endif
3869
3870 /* Process the options */
3871
3872 for (i = 1; i < argc; i++)
3873 {
3874 option_item *op = NULL;
3875 char *option_data = (char *)""; /* default to keep compiler happy */
3876 BOOL longop;
3877 BOOL longopwasequals = FALSE;
3878
3879 if (argv[i][0] != '-') break;
3880
3881 /* If we hit an argument that is just "-", it may be a reference to STDIN,
3882 but only if we have previously had -e or -f to define the patterns. */
3883
3884 if (argv[i][1] == 0)
3885 {
3886 if (pattern_files != NULL || patterns != NULL) break;
3887 else pcre2grep_exit(usage(2));
3888 }
3889
3890 /* Handle a long name option, or -- to terminate the options */
3891
3892 if (argv[i][1] == '-')
3893 {
3894 char *arg = argv[i] + 2;
3895 char *argequals = strchr(arg, '=');
3896
3897 if (*arg == 0) /* -- terminates options */
3898 {
3899 i++;
3900 break; /* out of the options-handling loop */
3901 }
3902
3903 longop = TRUE;
3904
3905 /* Some long options have data that follows after =, for example file=name.
3906 Some options have variations in the long name spelling: specifically, we
3907 allow "regexp" because GNU grep allows it, though I personally go along
3908 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
3909 These options are entered in the table as "regex(p)". Options can be in
3910 both these categories. */
3911
3912 for (op = optionlist; op->one_char != 0; op++)
3913 {
3914 char *opbra = strchr(op->long_name, '(');
3915 char *equals = strchr(op->long_name, '=');
3916
3917 /* Handle options with only one spelling of the name */
3918
3919 if (opbra == NULL) /* Does not contain '(' */
3920 {
3921 if (equals == NULL) /* Not thing=data case */
3922 {
3923 if (strcmp(arg, op->long_name) == 0) break;
3924 }
3925 else /* Special case xxx=data */
3926 {
3927 int oplen = (int)(equals - op->long_name);
3928 int arglen = (argequals == NULL)?
3929 (int)strlen(arg) : (int)(argequals - arg);
3930 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
3931 {
3932 option_data = arg + arglen;
3933 if (*option_data == '=')
3934 {
3935 option_data++;
3936 longopwasequals = TRUE;
3937 }
3938 break;
3939 }
3940 }
3941 }
3942
3943 /* Handle options with an alternate spelling of the name */
3944
3945 else
3946 {
3947 char buff1[24];
3948 char buff2[24];
3949 int ret;
3950
3951 int baselen = (int)(opbra - op->long_name);
3952 int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
3953 int arglen = (argequals == NULL || equals == NULL)?
3954 (int)strlen(arg) : (int)(argequals - arg);
3955
3956 if ((ret = snprintf(buff1, sizeof(buff1), "%.*s", baselen, op->long_name),
3957 ret < 0 || ret > (int)sizeof(buff1)) ||
3958 (ret = snprintf(buff2, sizeof(buff2), "%s%.*s", buff1,
3959 fulllen - baselen - 2, opbra + 1),
3960 ret < 0 || ret > (int)sizeof(buff2)))
3961 {
3962 /* LCOV_EXCL_START - this is a "never" event */
3963 fprintf(stderr, "pcre2grep: Buffer overflow when parsing %s option\n",
3964 op->long_name);
3965 pcre2grep_exit(2);
3966 /* LCOV_EXCL_STOP */
3967 }
3968
3969 if (strncmp(arg, buff1, arglen) == 0 ||
3970 strncmp(arg, buff2, arglen) == 0)
3971 {
3972 if (equals != NULL && argequals != NULL)
3973 {
3974 option_data = argequals;
3975 if (*option_data == '=')
3976 {
3977 option_data++;
3978 longopwasequals = TRUE;
3979 }
3980 }
3981 break;
3982 }
3983 }
3984 }
3985
3986 if (op->one_char == 0)
3987 {
3988 fprintf(stderr, "pcre2grep: Unknown option %s\n", argv[i]);
3989 pcre2grep_exit(usage(2));
3990 }
3991 }
3992
3993 /* One-char options; many that have no data may be in a single argument; we
3994 continue till we hit the last one or one that needs data. */
3995
3996 else
3997 {
3998 char *s = argv[i] + 1;
3999 longop = FALSE;
4000
4001 while (*s != 0)
4002 {
4003 for (op = optionlist; op->one_char != 0; op++)
4004 {
4005 if (*s == op->one_char) break;
4006 }
4007 if (op->one_char == 0)
4008 {
4009 fprintf(stderr, "pcre2grep: Unknown option letter '%c' in \"%s\"\n",
4010 *s, argv[i]);
4011 pcre2grep_exit(usage(2));
4012 }
4013
4014 option_data = s+1;
4015
4016 /* Break out if this is the last character in the string; it's handled
4017 below like a single multi-char option. */
4018
4019 if (*option_data == 0) break;
4020
4021 /* Check for a single-character option that has data: OP_OP_NUMBER(S)
4022 are used for ones that either have a numerical number or defaults, i.e.
4023 the data is optional. If a digit follows, there is data; if not, carry on
4024 with other single-character options in the same string. */
4025
4026 if (op->type == OP_OP_NUMBER || op->type == OP_OP_NUMBERS)
4027 {
4028 if (isdigit((unsigned char)s[1])) break;
4029 }
4030 else /* Check for an option with data */
4031 {
4032 if (op->type != OP_NODATA) break;
4033 }
4034
4035 /* Handle a single-character option with no data, then loop for the
4036 next character in the string. */
4037
4038 pcre2_options = handle_option(*s++, pcre2_options);
4039 }
4040 }
4041
4042 /* At this point we should have op pointing to a matched option. If the type
4043 is NO_DATA, it means that there is no data, and the option might set
4044 something in the PCRE options. */
4045
4046 if (op->type == OP_NODATA)
4047 {
4048 pcre2_options = handle_option(op->one_char, pcre2_options);
4049 continue;
4050 }
4051
4052 /* If the option type is OP_OP_STRING or OP_OP_NUMBER(S), it's an option that
4053 either has a value or defaults to something. It cannot have data in a
4054 separate item. At the moment, the only such options are "colo(u)r",
4055 and "only-matching". */
4056
4057 if (*option_data == 0 &&
4058 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER ||
4059 op->type == OP_OP_NUMBERS))
4060 {
4061 switch (op->one_char)
4062 {
4063 case N_COLOUR:
4064 colour_option = "auto";
4065 break;
4066
4067 case 'o':
4068 only_matching_last = add_number(0, only_matching_last);
4069 if (only_matching == NULL) only_matching = only_matching_last;
4070 break;
4071 }
4072 continue;
4073 }
4074
4075 /* Otherwise, find the data string for the option. */
4076
4077 if (*option_data == 0)
4078 {
4079 if (i >= argc - 1 || longopwasequals)
4080 {
4081 fprintf(stderr, "pcre2grep: Data missing after %s\n", argv[i]);
4082 pcre2grep_exit(usage(2));
4083 }
4084 option_data = argv[++i];
4085 }
4086
4087 /* If the option type is OP_OP_NUMBERS, the value is a number that is to be
4088 added to a chain of numbers. */
4089
4090 if (op->type == OP_OP_NUMBERS)
4091 {
4092 unsigned long int n = decode_number(option_data, op, longop);
4093 omdatastr *omd = (omdatastr *)op->dataptr;
4094 *(omd->lastptr) = add_number((int)n, *(omd->lastptr));
4095 if (*(omd->anchor) == NULL) *(omd->anchor) = *(omd->lastptr);
4096 }
4097
4098 /* If the option type is OP_PATLIST, it's the -e option, or one of the
4099 include/exclude options, which can be called multiple times to create lists
4100 of patterns. */
4101
4102 else if (op->type == OP_PATLIST)
4103 {
4104 patdatastr *pd = (patdatastr *)op->dataptr;
4105 *(pd->lastptr) = add_pattern(option_data, (PCRE2_SIZE)strlen(option_data),
4106 *(pd->lastptr));
4107 if (*(pd->lastptr) == NULL) goto EXIT2;
4108 if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr);
4109 }
4110
4111 /* If the option type is OP_FILELIST, it's one of the options that names a
4112 file. */
4113
4114 else if (op->type == OP_FILELIST)
4115 {
4116 fndatastr *fd = (fndatastr *)op->dataptr;
4117 fn = (fnstr *)malloc(sizeof(fnstr));
4118 if (fn == NULL)
4119 {
4120 /* LCOV_EXCL_START */
4121 fprintf(stderr, "pcre2grep: malloc failed\n");
4122 goto EXIT2;
4123 /* LCOV_EXCL_STOP */
4124 }
4125 fn->next = NULL;
4126 fn->name = option_data;
4127 if (*(fd->anchor) == NULL)
4128 *(fd->anchor) = fn;
4129 else
4130 (*(fd->lastptr))->next = fn;
4131 *(fd->lastptr) = fn;
4132 }
4133
4134 /* Handle OP_BINARY_FILES */
4135
4136 else if (op->type == OP_BINFILES)
4137 {
4138 if (strcmp(option_data, "binary") == 0)
4139 binary_files = BIN_BINARY;
4140 else if (strcmp(option_data, "without-match") == 0)
4141 binary_files = BIN_NOMATCH;
4142 else if (strcmp(option_data, "text") == 0)
4143 binary_files = BIN_TEXT;
4144 else
4145 {
4146 fprintf(stderr, "pcre2grep: unknown value \"%s\" for binary-files\n",
4147 option_data);
4148 pcre2grep_exit(usage(2));
4149 }
4150 }
4151
4152 /* Otherwise, deal with a single string or numeric data value. */
4153
4154 else if (op->type != OP_NUMBER && op->type != OP_U32NUMBER &&
4155 op->type != OP_OP_NUMBER && op->type != OP_SIZE)
4156 {
4157 *((char **)op->dataptr) = option_data;
4158 }
4159 else
4160 {
4161 unsigned long int n = decode_number(option_data, op, longop);
4162 if (op->type == OP_U32NUMBER) *((uint32_t *)op->dataptr) = n;
4163 else if (op->type == OP_SIZE) *((PCRE2_SIZE *)op->dataptr) = n;
4164 else *((int *)op->dataptr) = n;
4165 }
4166 }
4167
4168 /* Options have been decoded. If -C was used, its value is used as a default
4169 for -A and -B. */
4170
4171 if (both_context > 0)
4172 {
4173 if (after_context == 0) after_context = both_context;
4174 if (before_context == 0) before_context = both_context;
4175 }
4176
4177 /* Only one of --only-matching, --output, --file-offsets, or --line-offsets is
4178 permitted. They display, each in their own way, only the data that has matched.
4179 */
4180
4181 only_matching_count = (only_matching != NULL) + (output_text != NULL) +
4182 file_offsets + line_offsets;
4183
4184 if (only_matching_count > 1)
4185 {
4186 fprintf(stderr, "pcre2grep: Cannot mix --only-matching, --output, "
4187 "--file-offsets and/or --line-offsets\n");
4188 pcre2grep_exit(usage(2));
4189 }
4190
4191 /* Check that there is a big enough ovector for all -o settings. */
4192
4193 for (om = only_matching; om != NULL; om = om->next)
4194 {
4195 int n = om->groupnum;
4196 if (n > (int)capture_max)
4197 {
4198 fprintf(stderr, "pcre2grep: Requested group %d cannot be captured.\n", n);
4199 fprintf(stderr, "pcre2grep: Use --om-capture to increase the size of the capture vector.\n");
4200 goto EXIT2;
4201 }
4202 }
4203
4204 /* Check the text supplied to --output for errors. */
4205
4206 if (output_text != NULL &&
4207 !syntax_check_output_text((PCRE2_SPTR)output_text, FALSE))
4208 goto EXIT2;
4209
4210 /* Set up default compile and match contexts and match data blocks. */
4211
4212 offset_size = capture_max + 1;
4213 compile_context = pcre2_compile_context_create(NULL);
4214 match_context = pcre2_match_context_create(NULL);
4215 match_data_pair[0] = pcre2_match_data_create(offset_size, NULL);
4216 match_data_pair[1] = pcre2_match_data_create(offset_size, NULL);
4217 offsets_pair[0] = pcre2_get_ovector_pointer(match_data_pair[0]);
4218 offsets_pair[1] = pcre2_get_ovector_pointer(match_data_pair[1]);
4219 match_data = match_data_pair[0];
4220 offsets = offsets_pair[0];
4221 match_data_toggle = 0;
4222
4223 /* If string (script) callouts are supported, set up the callout processing
4224 function. */
4225
4226 #ifdef SUPPORT_PCRE2GREP_CALLOUT
4227 pcre2_set_callout(match_context, pcre2grep_callout, NULL);
4228 #endif
4229
4230 /* Put limits into the match data block. */
4231
4232 if (heap_limit != PCRE2_UNSET) pcre2_set_heap_limit(match_context, heap_limit);
4233 if (match_limit > 0) pcre2_set_match_limit(match_context, match_limit);
4234 if (depth_limit > 0) pcre2_set_depth_limit(match_context, depth_limit);
4235
4236 /* If a locale has not been provided as an option, see if the LC_CTYPE or
4237 LC_ALL environment variable is set, and if so, use it. */
4238
4239 if (locale == NULL)
4240 {
4241 locale = getenv("LC_ALL");
4242 locale_from = "LC_ALL";
4243 }
4244
4245 if (locale == NULL)
4246 {
4247 locale = getenv("LC_CTYPE");
4248 locale_from = "LC_CTYPE";
4249 }
4250
4251 /* If a locale is set, use it to generate the tables the PCRE needs. Passing
4252 NULL to pcre2_maketables() means that malloc() is used to get the memory. */
4253
4254 if (locale != NULL)
4255 {
4256 if (setlocale(LC_CTYPE, locale) == NULL)
4257 {
4258 fprintf(stderr, "pcre2grep: Failed to set locale %s (obtained from %s)\n",
4259 locale, locale_from);
4260 goto EXIT2;
4261 }
4262 character_tables = pcre2_maketables(NULL);
4263 pcre2_set_character_tables(compile_context, character_tables);
4264 }
4265
4266 /* Sort out colouring */
4267
4268 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
4269 {
4270 if (strcmp(colour_option, "always") == 0)
4271 #ifdef WIN32
4272 do_ansi = !is_stdout_tty(),
4273 #endif
4274 do_colour = TRUE;
4275 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
4276 else
4277 {
4278 fprintf(stderr, "pcre2grep: Unknown colour setting \"%s\"\n",
4279 colour_option);
4280 goto EXIT2;
4281 }
4282 if (do_colour)
4283 {
4284 char *cs = getenv("PCRE2GREP_COLOUR");
4285 if (cs == NULL) cs = getenv("PCRE2GREP_COLOR");
4286 if (cs == NULL) cs = getenv("PCREGREP_COLOUR");
4287 if (cs == NULL) cs = getenv("PCREGREP_COLOR");
4288 if (cs == NULL) cs = parse_grep_colors(getenv("GREP_COLORS"));
4289 if (cs == NULL) cs = getenv("GREP_COLOR");
4290 if (cs != NULL)
4291 {
4292 if (strspn(cs, ";0123456789") == strlen(cs)) colour_string = cs;
4293 }
4294 #ifdef WIN32
4295 init_colour_output();
4296 #endif
4297 }
4298 }
4299
4300 /* When colouring or otherwise identifying matching substrings, we need to find
4301 all possible matches when there are multiple patterns. */
4302
4303 all_matches = do_colour || only_matching_count != 0;
4304
4305 /* Sort out a newline setting. */
4306
4307 if (newline_arg != NULL)
4308 {
4309 for (endlinetype = 1; endlinetype < (int)(sizeof(newlines)/sizeof(char *));
4310 endlinetype++)
4311 {
4312 if (strcmpic(newline_arg, newlines[endlinetype]) == 0) break;
4313 }
4314 if (endlinetype < (int)(sizeof(newlines)/sizeof(char *)))
4315 pcre2_set_newline(compile_context, endlinetype);
4316 else
4317 {
4318 fprintf(stderr, "pcre2grep: Invalid newline specifier \"%s\"\n",
4319 newline_arg);
4320 goto EXIT2;
4321 }
4322 }
4323
4324 /* Find default newline convention */
4325
4326 else
4327 {
4328 (void)pcre2_config(PCRE2_CONFIG_NEWLINE, &endlinetype);
4329 }
4330
4331 /* Interpret the text values for -d and -D */
4332
4333 if (dee_option != NULL)
4334 {
4335 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
4336 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
4337 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
4338 else
4339 {
4340 fprintf(stderr, "pcre2grep: Invalid value \"%s\" for -d\n", dee_option);
4341 goto EXIT2;
4342 }
4343 }
4344
4345 if (DEE_option != NULL)
4346 {
4347 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
4348 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
4349 else
4350 {
4351 fprintf(stderr, "pcre2grep: Invalid value \"%s\" for -D\n", DEE_option);
4352 goto EXIT2;
4353 }
4354 }
4355
4356 /* Set the extra options */
4357
4358 (void)pcre2_set_compile_extra_options(compile_context, extra_options);
4359
4360 /* If use_jit is set, check whether JIT is available. If not, do not try
4361 to use JIT. */
4362
4363 if (use_jit)
4364 {
4365 uint32_t answer;
4366 (void)pcre2_config(PCRE2_CONFIG_JIT, &answer);
4367 if (!answer) use_jit = FALSE;
4368 }
4369
4370 /* Get memory for the main buffer. */
4371
4372 if (bufthird <= 0)
4373 {
4374 fprintf(stderr, "pcre2grep: --buffer-size must be greater than zero\n");
4375 goto EXIT2;
4376 }
4377
4378 bufsize = 3*bufthird;
4379 main_buffer = (char *)malloc(bufsize);
4380
4381 if (main_buffer == NULL)
4382 {
4383 /* LCOV_EXCL_START */
4384 fprintf(stderr, "pcre2grep: malloc failed\n");
4385 goto EXIT2;
4386 /* LCOV_EXCL_STOP */
4387 }
4388
4389 /* If no patterns were provided by -e, and there are no files provided by -f,
4390 the first argument is the one and only pattern, and it must exist. */
4391
4392 if (patterns == NULL && pattern_files == NULL)
4393 {
4394 if (i >= argc) return usage(2);
4395 patterns = patterns_last = add_pattern(argv[i], (PCRE2_SIZE)strlen(argv[i]),
4396 NULL);
4397 i++;
4398 if (patterns == NULL) goto EXIT2;
4399 }
4400
4401 /* Compile the patterns that were provided on the command line, either by
4402 multiple uses of -e or as a single unkeyed pattern. We cannot do this until
4403 after all the command-line options are read so that we know which PCRE options
4404 to use. When -F is used, compile_pattern() may add another block into the
4405 chain, so we must not access the next pointer till after the compile. */
4406
4407 for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
4408 {
4409 if (!compile_pattern(cp, pcre2_options, FALSE, "command-line",
4410 (j == 1 && patterns->next == NULL)? 0 : j))
4411 goto EXIT2;
4412 }
4413
4414 /* Read and compile the regular expressions that are provided in files. */
4415
4416 for (fn = pattern_files; fn != NULL; fn = fn->next)
4417 {
4418 if (!read_pattern_file(fn->name, &patterns, &patterns_last)) goto EXIT2;
4419 }
4420
4421 /* Unless JIT has been explicitly disabled, arrange a stack for it to use. */
4422
4423 #ifdef SUPPORT_PCRE2GREP_JIT
4424 if (use_jit)
4425 {
4426 jit_stack = pcre2_jit_stack_create(32*1024, 1024*1024, NULL);
4427 if (jit_stack != NULL )
4428 pcre2_jit_stack_assign(match_context, NULL, jit_stack);
4429 }
4430 #endif
4431
4432 /* -F, -w, and -x do not apply to include or exclude patterns, so we must
4433 adjust the options. */
4434
4435 pcre2_options &= ~PCRE2_LITERAL;
4436 (void)pcre2_set_compile_extra_options(compile_context, 0);
4437
4438 /* If there are include or exclude patterns read from the command line, compile
4439 them. */
4440
4441 for (j = 0; j < 4; j++)
4442 {
4443 int k;
4444 for (k = 1, cp = *(incexlist[j]); cp != NULL; k++, cp = cp->next)
4445 {
4446 if (!compile_pattern(cp, pcre2_options, FALSE, incexname[j],
4447 (k == 1 && cp->next == NULL)? 0 : k))
4448 goto EXIT2;
4449 }
4450 }
4451
4452 /* Read and compile include/exclude patterns from files. */
4453
4454 for (fn = include_from; fn != NULL; fn = fn->next)
4455 {
4456 if (!read_pattern_file(fn->name, &include_patterns, &include_patterns_last))
4457 goto EXIT2;
4458 }
4459
4460 for (fn = exclude_from; fn != NULL; fn = fn->next)
4461 {
4462 if (!read_pattern_file(fn->name, &exclude_patterns, &exclude_patterns_last))
4463 goto EXIT2;
4464 }
4465
4466 /* If there are no files that contain lists of files to search, and there are
4467 no file arguments, search stdin, and then exit. */
4468
4469 if (file_lists == NULL && i >= argc)
4470 {
4471 /* Using a buffered stdin, that then is seek is not portable,
4472 so attempt to remove the buffer, to workaround reported issues
4473 affecting several BSD and AIX */
4474 if (count_limit >= 0)
4475 setbuf(stdin, NULL);
4476 rc = pcre2grep(stdin, FR_PLAIN, stdin_name,
4477 (filenames > FN_DEFAULT)? stdin_name : NULL);
4478 goto EXIT;
4479 }
4480
4481 /* If any files that contains a list of files to search have been specified,
4482 read them line by line and search the given files. */
4483
4484 for (fn = file_lists; fn != NULL; fn = fn->next)
4485 {
4486 char buffer[FNBUFSIZ];
4487 FILE *fl;
4488 if (strcmp(fn->name, "-") == 0) fl = stdin; else
4489 {
4490 fl = fopen(fn->name, "rb");
4491 if (fl == NULL)
4492 {
4493 fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", fn->name,
4494 strerror(errno));
4495 goto EXIT2;
4496 }
4497 }
4498 while (fgets(buffer, sizeof(buffer), fl) != NULL)
4499 {
4500 int frc;
4501 char *end = buffer + (int)strlen(buffer);
4502 while (end > buffer && isspace(end[-1])) end--;
4503 *end = 0;
4504 if (*buffer != 0)
4505 {
4506 frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);
4507 if (frc > 1) rc = frc;
4508 else if (frc == 0 && rc == 1) rc = 0;
4509 }
4510 }
4511 if (fl != stdin) fclose(fl);
4512 }
4513
4514 /* After handling file-list, work through remaining arguments. Pass in the fact
4515 that there is only one argument at top level - this suppresses the file name if
4516 the argument is not a directory and filenames are not otherwise forced. */
4517
4518 only_one_at_top = i == argc - 1 && file_lists == NULL;
4519
4520 for (; i < argc; i++)
4521 {
4522 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
4523 only_one_at_top);
4524 if (frc > 1) rc = frc;
4525 else if (frc == 0 && rc == 1) rc = 0;
4526 }
4527
4528 /* Show the total number of matches if requested, but not if only one file's
4529 count was printed. */
4530
4531 if (show_total_count && counts_printed != 1 && filenames != FN_NOMATCH_ONLY)
4532 {
4533 if (counts_printed != 0 && filenames >= FN_DEFAULT)
4534 fprintf(stdout, "TOTAL:");
4535 fprintf(stdout, "%lu" STDOUT_NL, total_count);
4536 }
4537
4538 EXIT:
4539 #ifdef SUPPORT_PCRE2GREP_JIT
4540 pcre2_jit_free_unused_memory(NULL);
4541 if (jit_stack != NULL) pcre2_jit_stack_free(jit_stack);
4542 #endif
4543
4544 free(main_buffer);
4545 if (character_tables != NULL) pcre2_maketables_free(NULL, character_tables);
4546
4547 pcre2_compile_context_free(compile_context);
4548 pcre2_match_context_free(match_context);
4549 pcre2_match_data_free(match_data_pair[0]);
4550 pcre2_match_data_free(match_data_pair[1]);
4551
4552 free_pattern_chain(patterns);
4553 free_pattern_chain(include_patterns);
4554 free_pattern_chain(include_dir_patterns);
4555 free_pattern_chain(exclude_patterns);
4556 free_pattern_chain(exclude_dir_patterns);
4557
4558 free_file_chain(exclude_from);
4559 free_file_chain(include_from);
4560 free_file_chain(pattern_files);
4561 free_file_chain(file_lists);
4562
4563 while (only_matching != NULL)
4564 {
4565 omstr *this = only_matching;
4566 only_matching = this->next;
4567 free(this);
4568 }
4569
4570 pcre2grep_exit(rc);
4571
4572 EXIT2:
4573 rc = 2;
4574 goto EXIT;
4575 }
4576
4577 /* End of pcre2grep */
4578