1 /*************************************************
2 * pcre2grep program *
3 *************************************************/
4
5 /* This is a grep program that uses the 8-bit PCRE regular expression library
6 via the PCRE2 updated API to do its pattern matching. On Unix-like, Windows,
7 and native z/OS systems it can recurse into directories, and in z/OS it can
8 handle PDS files.
9
10 Note that for native z/OS, in addition to defining the NATIVE_ZOS macro, an
11 additional header is required. That header is not included in the main PCRE2
12 distribution because other apparatus is needed to compile pcre2grep for z/OS.
13 The header can be found in the special z/OS distribution, which is available
14 from www.zaconsultants.net or from www.cbttape.org.
15
16 Copyright (c) 1997-2020 University of Cambridge
17
18 -----------------------------------------------------------------------------
19 Redistribution and use in source and binary forms, with or without
20 modification, are permitted provided that the following conditions are met:
21
22 * Redistributions of source code must retain the above copyright notice,
23 this list of conditions and the following disclaimer.
24
25 * Redistributions in binary form must reproduce the above copyright
26 notice, this list of conditions and the following disclaimer in the
27 documentation and/or other materials provided with the distribution.
28
29 * Neither the name of the University of Cambridge nor the names of its
30 contributors may be used to endorse or promote products derived from
31 this software without specific prior written permission.
32
33 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
34 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
35 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
37 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
38 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
39 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
40 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
41 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
43 POSSIBILITY OF SUCH DAMAGE.
44 -----------------------------------------------------------------------------
45 */
46
47 #ifdef HAVE_CONFIG_H
48 #include "config.h"
49 #endif
50
51 #include <ctype.h>
52 #include <locale.h>
53 #include <stdio.h>
54 #include <string.h>
55 #include <stdlib.h>
56 #include <errno.h>
57
58 #include <sys/types.h>
59 #include <sys/stat.h>
60
61 #if (defined _WIN32 || (defined HAVE_WINDOWS_H && HAVE_WINDOWS_H)) \
62 && !defined WIN32 && !defined(__CYGWIN__)
63 #define WIN32
64 #endif
65
66 /* Some cmake's define it still */
67 #if defined(__CYGWIN__) && defined(WIN32)
68 #undef WIN32
69 #endif
70
71 #ifdef __VMS
72 #include clidef
73 #include descrip
74 #include lib$routines
75 #endif
76
77 #ifdef WIN32
78 #include <io.h> /* For _setmode() */
79 #include <fcntl.h> /* For _O_BINARY */
80 #endif
81
82 #if defined(SUPPORT_PCRE2GREP_CALLOUT) && defined(SUPPORT_PCRE2GREP_CALLOUT_FORK)
83 #ifdef WIN32
84 #include <process.h>
85 #else
86 #include <sys/wait.h>
87 #endif
88 #endif
89
90 #ifdef HAVE_UNISTD_H
91 #include <unistd.h>
92 #endif
93
94 #ifdef SUPPORT_LIBZ
95 #include <zlib.h>
96 #endif
97
98 #ifdef SUPPORT_LIBBZ2
99 #include <bzlib.h>
100 #endif
101
102 #define PCRE2_CODE_UNIT_WIDTH 8
103 #include "pcre2.h"
104
105 /* Older versions of MSVC lack snprintf(). This define allows for
106 warning/error-free compilation and testing with MSVC compilers back to at least
107 MSVC 10/2010. Except for VC6 (which is missing some fundamentals and fails). */
108
109 #if defined(_MSC_VER) && (_MSC_VER < 1900)
110 #define snprintf _snprintf
111 #endif
112
113 /* VC and older compilers don't support %td or %zu, and even some that claim to
114 be C99 don't support it (hence DISABLE_PERCENT_ZT). */
115
116 #if defined(_MSC_VER) || !defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L || defined(DISABLE_PERCENT_ZT)
117 #define PTR_FORM "lu"
118 #define SIZ_FORM "lu"
119 #define SIZ_CAST (unsigned long int)
120 #else
121 #define PTR_FORM "td"
122 #define SIZ_FORM "zu"
123 #define SIZ_CAST
124 #endif
125
126 #define FALSE 0
127 #define TRUE 1
128
129 typedef int BOOL;
130
131 #define DEFAULT_CAPTURE_MAX 50
132
133 #if BUFSIZ > 8192
134 #define MAXPATLEN BUFSIZ
135 #else
136 #define MAXPATLEN 8192
137 #endif
138
139 #define FNBUFSIZ 2048
140 #define ERRBUFSIZ 256
141
142 /* Values for the "filenames" variable, which specifies options for file name
143 output. The order is important; it is assumed that a file name is wanted for
144 all values greater than FN_DEFAULT. */
145
146 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
147
148 /* File reading styles */
149
150 enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
151
152 /* Actions for the -d and -D options */
153
154 enum { dee_READ, dee_SKIP, dee_RECURSE };
155 enum { DEE_READ, DEE_SKIP };
156
157 /* Actions for special processing options (flag bits) */
158
159 #define PO_WORD_MATCH 0x0001
160 #define PO_LINE_MATCH 0x0002
161 #define PO_FIXED_STRINGS 0x0004
162
163 /* Binary file options */
164
165 enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT };
166
167 /* Return values from decode_dollar_escape() */
168
169 enum { DDE_ERROR, DDE_CAPTURE, DDE_CHAR };
170
171 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
172 environments), a warning is issued if the value of fwrite() is ignored.
173 Unfortunately, casting to (void) does not suppress the warning. To get round
174 this, we use a macro that compiles a fudge. Oddly, this does not also seem to
175 apply to fprintf(). */
176
177 #define FWRITE_IGNORE(a,b,c,d) if (fwrite(a,b,c,d)) {}
178
179 /* Under Windows, we have to set stdout to be binary, so that it does not
180 convert \r\n at the ends of output lines to \r\r\n. However, that means that
181 any messages written to stdout must have \r\n as their line terminator. This is
182 handled by using STDOUT_NL as the newline string. We also use a normal double
183 quote for the example, as single quotes aren't usually available. */
184
185 #ifdef WIN32
186 #define STDOUT_NL "\r\n"
187 #define STDOUT_NL_LEN 2
188 #define QUOT "\""
189 #else
190 #define STDOUT_NL "\n"
191 #define STDOUT_NL_LEN 1
192 #define QUOT "'"
193 #endif
194
195 /* This code is returned from decode_dollar_escape() when $n is encountered,
196 and used to mean "output STDOUT_NL". It is, of course, not a valid Unicode code
197 point. */
198
199 #define STDOUT_NL_CODE 0x7fffffffu
200
201
202
203 /*************************************************
204 * Global variables *
205 *************************************************/
206
207 /* Jeffrey Friedl has some debugging requirements that are not part of the
208 regular code. */
209
210 #ifdef JFRIEDL_DEBUG
211 static int S_arg = -1;
212 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
213 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
214 static const char *jfriedl_prefix = "";
215 static const char *jfriedl_postfix = "";
216 #endif
217
218 static const char *colour_string = "1;31";
219 static const char *colour_option = NULL;
220 static const char *dee_option = NULL;
221 static const char *DEE_option = NULL;
222 static const char *locale = NULL;
223 static const char *newline_arg = NULL;
224 static const char *om_separator = NULL;
225 static const char *stdin_name = "(standard input)";
226 static const char *output_text = NULL;
227
228 static char *main_buffer = NULL;
229
230 static int after_context = 0;
231 static int before_context = 0;
232 static int binary_files = BIN_BINARY;
233 static int both_context = 0;
234 static int bufthird = PCRE2GREP_BUFSIZE;
235 static int max_bufthird = PCRE2GREP_MAX_BUFSIZE;
236 static int bufsize = 3*PCRE2GREP_BUFSIZE;
237 static int endlinetype;
238
239 static int count_limit = -1; /* Not long, so that it works with OP_NUMBER */
240 static unsigned long int counts_printed = 0;
241 static unsigned long int total_count = 0;
242
243 #ifdef WIN32
244 static int dee_action = dee_SKIP;
245 #else
246 static int dee_action = dee_READ;
247 #endif
248
249 static int DEE_action = DEE_READ;
250 static int error_count = 0;
251 static int filenames = FN_DEFAULT;
252
253 #ifdef SUPPORT_PCRE2GREP_JIT
254 static BOOL use_jit = TRUE;
255 #else
256 static BOOL use_jit = FALSE;
257 #endif
258
259 static const uint8_t *character_tables = NULL;
260
261 static uint32_t pcre2_options = 0;
262 static uint32_t extra_options = 0;
263 static PCRE2_SIZE heap_limit = PCRE2_UNSET;
264 static uint32_t match_limit = 0;
265 static uint32_t depth_limit = 0;
266
267 static pcre2_compile_context *compile_context;
268 static pcre2_match_context *match_context;
269 static pcre2_match_data *match_data;
270 static PCRE2_SIZE *offsets;
271 static uint32_t offset_size;
272 static uint32_t capture_max = DEFAULT_CAPTURE_MAX;
273
274 static BOOL count_only = FALSE;
275 static BOOL do_colour = FALSE;
276 #ifdef WIN32
277 static BOOL do_ansi = FALSE;
278 #endif
279 static BOOL file_offsets = FALSE;
280 static BOOL hyphenpending = FALSE;
281 static BOOL invert = FALSE;
282 static BOOL line_buffered = FALSE;
283 static BOOL line_offsets = FALSE;
284 static BOOL multiline = FALSE;
285 static BOOL number = FALSE;
286 static BOOL omit_zero_count = FALSE;
287 static BOOL resource_error = FALSE;
288 static BOOL quiet = FALSE;
289 static BOOL show_total_count = FALSE;
290 static BOOL silent = FALSE;
291 static BOOL utf = FALSE;
292
293 static uint8_t utf8_buffer[8];
294
295
296 /* Structure for list of --only-matching capturing numbers. */
297
298 typedef struct omstr {
299 struct omstr *next;
300 int groupnum;
301 } omstr;
302
303 static omstr *only_matching = NULL;
304 static omstr *only_matching_last = NULL;
305 static int only_matching_count;
306
307 /* Structure for holding the two variables that describe a number chain. */
308
309 typedef struct omdatastr {
310 omstr **anchor;
311 omstr **lastptr;
312 } omdatastr;
313
314 static omdatastr only_matching_data = { &only_matching, &only_matching_last };
315
316 /* Structure for list of file names (for -f and --{in,ex}clude-from) */
317
318 typedef struct fnstr {
319 struct fnstr *next;
320 char *name;
321 } fnstr;
322
323 static fnstr *exclude_from = NULL;
324 static fnstr *exclude_from_last = NULL;
325 static fnstr *include_from = NULL;
326 static fnstr *include_from_last = NULL;
327
328 static fnstr *file_lists = NULL;
329 static fnstr *file_lists_last = NULL;
330 static fnstr *pattern_files = NULL;
331 static fnstr *pattern_files_last = NULL;
332
333 /* Structure for holding the two variables that describe a file name chain. */
334
335 typedef struct fndatastr {
336 fnstr **anchor;
337 fnstr **lastptr;
338 } fndatastr;
339
340 static fndatastr exclude_from_data = { &exclude_from, &exclude_from_last };
341 static fndatastr include_from_data = { &include_from, &include_from_last };
342 static fndatastr file_lists_data = { &file_lists, &file_lists_last };
343 static fndatastr pattern_files_data = { &pattern_files, &pattern_files_last };
344
345 /* Structure for pattern and its compiled form; used for matching patterns and
346 also for include/exclude patterns. */
347
348 typedef struct patstr {
349 struct patstr *next;
350 char *string;
351 PCRE2_SIZE length;
352 pcre2_code *compiled;
353 } patstr;
354
355 static patstr *patterns = NULL;
356 static patstr *patterns_last = NULL;
357 static patstr *include_patterns = NULL;
358 static patstr *include_patterns_last = NULL;
359 static patstr *exclude_patterns = NULL;
360 static patstr *exclude_patterns_last = NULL;
361 static patstr *include_dir_patterns = NULL;
362 static patstr *include_dir_patterns_last = NULL;
363 static patstr *exclude_dir_patterns = NULL;
364 static patstr *exclude_dir_patterns_last = NULL;
365
366 /* Structure holding the two variables that describe a pattern chain. A pointer
367 to such structures is used for each appropriate option. */
368
369 typedef struct patdatastr {
370 patstr **anchor;
371 patstr **lastptr;
372 } patdatastr;
373
374 static patdatastr match_patdata = { &patterns, &patterns_last };
375 static patdatastr include_patdata = { &include_patterns, &include_patterns_last };
376 static patdatastr exclude_patdata = { &exclude_patterns, &exclude_patterns_last };
377 static patdatastr include_dir_patdata = { &include_dir_patterns, &include_dir_patterns_last };
378 static patdatastr exclude_dir_patdata = { &exclude_dir_patterns, &exclude_dir_patterns_last };
379
380 static patstr **incexlist[4] = { &include_patterns, &exclude_patterns,
381 &include_dir_patterns, &exclude_dir_patterns };
382
383 static const char *incexname[4] = { "--include", "--exclude",
384 "--include-dir", "--exclude-dir" };
385
386 /* Structure for options and list of them */
387
388 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_U32NUMBER, OP_SIZE,
389 OP_OP_NUMBER, OP_OP_NUMBERS, OP_PATLIST, OP_FILELIST, OP_BINFILES };
390
391 typedef struct option_item {
392 int type;
393 int one_char;
394 void *dataptr;
395 const char *long_name;
396 const char *help_text;
397 } option_item;
398
399 /* Options without a single-letter equivalent get a negative value. This can be
400 used to identify them. */
401
402 #define N_COLOUR (-1)
403 #define N_EXCLUDE (-2)
404 #define N_EXCLUDE_DIR (-3)
405 #define N_HELP (-4)
406 #define N_INCLUDE (-5)
407 #define N_INCLUDE_DIR (-6)
408 #define N_LABEL (-7)
409 #define N_LOCALE (-8)
410 #define N_NULL (-9)
411 #define N_LOFFSETS (-10)
412 #define N_FOFFSETS (-11)
413 #define N_LBUFFER (-12)
414 #define N_H_LIMIT (-13)
415 #define N_M_LIMIT (-14)
416 #define N_M_LIMIT_DEP (-15)
417 #define N_BUFSIZE (-16)
418 #define N_NOJIT (-17)
419 #define N_FILE_LIST (-18)
420 #define N_BINARY_FILES (-19)
421 #define N_EXCLUDE_FROM (-20)
422 #define N_INCLUDE_FROM (-21)
423 #define N_OM_SEPARATOR (-22)
424 #define N_MAX_BUFSIZE (-23)
425 #define N_OM_CAPTURE (-24)
426
427 static option_item optionlist[] = {
428 { OP_NODATA, N_NULL, NULL, "", "terminate options" },
429 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
430 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
431 { OP_NODATA, 'a', NULL, "text", "treat binary files as text" },
432 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
433 { OP_BINFILES, N_BINARY_FILES, NULL, "binary-files=word", "set treatment of binary files" },
434 { OP_NUMBER, N_BUFSIZE,&bufthird, "buffer-size=number", "set processing buffer starting size" },
435 { OP_NUMBER, N_MAX_BUFSIZE,&max_bufthird, "max-buffer-size=number", "set processing buffer maximum size" },
436 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
437 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
438 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
439 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
440 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
441 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
442 { OP_PATLIST, 'e', &match_patdata, "regex(p)=pattern", "specify pattern (may be used more than once)" },
443 { OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" },
444 { OP_FILELIST, 'f', &pattern_files_data, "file=path", "read patterns from file" },
445 { OP_FILELIST, N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" },
446 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
447 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
448 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
449 { OP_NODATA, 'I', NULL, "", "treat binary files as not matching (ignore)" },
450 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
451 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
452 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
453 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
454 { OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" },
455 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
456 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
457 { OP_SIZE, N_H_LIMIT, &heap_limit, "heap-limit=number", "set PCRE2 heap limit option (kibibytes)" },
458 { OP_U32NUMBER, N_M_LIMIT, &match_limit, "match-limit=number", "set PCRE2 match limit option" },
459 { OP_U32NUMBER, N_M_LIMIT_DEP, &depth_limit, "depth-limit=number", "set PCRE2 depth limit option" },
460 { OP_U32NUMBER, N_M_LIMIT_DEP, &depth_limit, "recursion-limit=number", "obsolete synonym for depth-limit" },
461 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
462 { OP_NUMBER, 'm', &count_limit, "max-count=number", "stop after <number> matched lines" },
463 { OP_STRING, 'N', &newline_arg, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF, ANY, or NUL)" },
464 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
465 #ifdef SUPPORT_PCRE2GREP_JIT
466 { OP_NODATA, N_NOJIT, NULL, "no-jit", "do not use just-in-time compiler optimization" },
467 #else
468 { OP_NODATA, N_NOJIT, NULL, "no-jit", "ignored: this pcre2grep does not support JIT" },
469 #endif
470 { OP_STRING, 'O', &output_text, "output=text", "show only this text (possibly expanded)" },
471 { OP_OP_NUMBERS, 'o', &only_matching_data, "only-matching=n", "show only the part of the line that matched" },
472 { OP_STRING, N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" },
473 { OP_U32NUMBER, N_OM_CAPTURE, &capture_max, "om-capture=n", "set capture count for --only-matching" },
474 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
475 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
476 { OP_PATLIST, N_EXCLUDE,&exclude_patdata, "exclude=pattern","exclude matching files when recursing" },
477 { OP_PATLIST, N_INCLUDE,&include_patdata, "include=pattern","include matching files when recursing" },
478 { OP_PATLIST, N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude-dir=pattern","exclude matching directories when recursing" },
479 { OP_PATLIST, N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" },
480 { OP_FILELIST, N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" },
481 { OP_FILELIST, N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" },
482 #ifdef JFRIEDL_DEBUG
483 { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
484 #endif
485 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
486 { OP_NODATA, 't', NULL, "total-count", "print total count of matching lines" },
487 { OP_NODATA, 'u', NULL, "utf", "use UTF mode" },
488 { OP_NODATA, 'U', NULL, "utf-allow-invalid", "use UTF mode, allow for invalid code units" },
489 { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
490 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
491 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
492 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
493 { OP_NODATA, 0, NULL, NULL, NULL }
494 };
495
496 /* Table of names for newline types. Must be kept in step with the definitions
497 of PCRE2_NEWLINE_xx in pcre2.h. */
498
499 static const char *newlines[] = {
500 "DEFAULT", "CR", "LF", "CRLF", "ANY", "ANYCRLF", "NUL" };
501
502 /* UTF-8 tables */
503
504 const int utf8_table1[] =
505 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};
506 const int utf8_table1_size = sizeof(utf8_table1) / sizeof(int);
507
508 const int utf8_table2[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
509 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
510
511 const char utf8_table4[] = {
512 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
513 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
514 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
515 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
516
517
518 #if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE)
519 /*************************************************
520 * Emulated memmove() for systems without it *
521 *************************************************/
522
523 /* This function can make use of bcopy() if it is available. Otherwise do it by
524 steam, as there are some non-Unix environments that lack both memmove() and
525 bcopy(). */
526
527 static void *
emulated_memmove(void * d,const void * s,size_t n)528 emulated_memmove(void *d, const void *s, size_t n)
529 {
530 #ifdef HAVE_BCOPY
531 bcopy(s, d, n);
532 return d;
533 #else
534 size_t i;
535 unsigned char *dest = (unsigned char *)d;
536 const unsigned char *src = (const unsigned char *)s;
537 if (dest > src)
538 {
539 dest += n;
540 src += n;
541 for (i = 0; i < n; ++i) *(--dest) = *(--src);
542 return (void *)dest;
543 }
544 else
545 {
546 for (i = 0; i < n; ++i) *dest++ = *src++;
547 return (void *)(dest - n);
548 }
549 #endif /* not HAVE_BCOPY */
550 }
551 #undef memmove
552 #define memmove(d,s,n) emulated_memmove(d,s,n)
553 #endif /* not VPCOMPAT && not HAVE_MEMMOVE */
554
555
556
557 /*************************************************
558 * Convert code point to UTF-8 *
559 *************************************************/
560
561 /* A static buffer is used. Returns the number of bytes. */
562
563 static int
ord2utf8(uint32_t value)564 ord2utf8(uint32_t value)
565 {
566 int i, j;
567 uint8_t *utf8bytes = utf8_buffer;
568 for (i = 0; i < utf8_table1_size; i++)
569 if (value <= (uint32_t)utf8_table1[i]) break;
570 utf8bytes += i;
571 for (j = i; j > 0; j--)
572 {
573 *utf8bytes-- = 0x80 | (value & 0x3f);
574 value >>= 6;
575 }
576 *utf8bytes = utf8_table2[i] | value;
577 return i + 1;
578 }
579
580
581
582 /*************************************************
583 * Case-independent string compare *
584 *************************************************/
585
586 static int
strcmpic(const char * str1,const char * str2)587 strcmpic(const char *str1, const char *str2)
588 {
589 unsigned int c1, c2;
590 while (*str1 != '\0' || *str2 != '\0')
591 {
592 c1 = tolower(*str1++);
593 c2 = tolower(*str2++);
594 if (c1 != c2) return ((c1 > c2) << 1) - 1;
595 }
596 return 0;
597 }
598
599
600 /*************************************************
601 * Parse GREP_COLORS *
602 *************************************************/
603
604 /* Extract ms or mt from GREP_COLORS.
605
606 Argument: the string, possibly NULL
607 Returns: the value of ms or mt, or NULL if neither present
608 */
609
610 static char *
parse_grep_colors(const char * gc)611 parse_grep_colors(const char *gc)
612 {
613 static char seq[16];
614 char *col;
615 uint32_t len;
616 if (gc == NULL) return NULL;
617 col = strstr(gc, "ms=");
618 if (col == NULL) col = strstr(gc, "mt=");
619 if (col == NULL) return NULL;
620 len = 0;
621 col += 3;
622 while (*col != ':' && *col != 0 && len < sizeof(seq)-1)
623 seq[len++] = *col++;
624 seq[len] = 0;
625 return seq;
626 }
627
628
629 /*************************************************
630 * Exit from the program *
631 *************************************************/
632
633 /* If there has been a resource error, give a suitable message.
634
635 Argument: the return code
636 Returns: does not return
637 */
638
639 static void
pcre2grep_exit(int rc)640 pcre2grep_exit(int rc)
641 {
642 /* VMS does exit codes differently: both exit(1) and exit(0) return with a
643 status of 1, which is not helpful. To help with this problem, define a symbol
644 (akin to an environment variable) called "PCRE2GREP_RC" and put the exit code
645 therein. */
646
647 #ifdef __VMS
648 char val_buf[4];
649 $DESCRIPTOR(sym_nam, "PCRE2GREP_RC");
650 $DESCRIPTOR(sym_val, val_buf);
651 sprintf(val_buf, "%d", rc);
652 sym_val.dsc$w_length = strlen(val_buf);
653 lib$set_symbol(&sym_nam, &sym_val);
654 #endif
655
656 if (resource_error)
657 {
658 fprintf(stderr, "pcre2grep: Error %d, %d, %d or %d means that a resource "
659 "limit was exceeded.\n", PCRE2_ERROR_JIT_STACKLIMIT, PCRE2_ERROR_MATCHLIMIT,
660 PCRE2_ERROR_DEPTHLIMIT, PCRE2_ERROR_HEAPLIMIT);
661 fprintf(stderr, "pcre2grep: Check your regex for nested unlimited loops.\n");
662 }
663 exit(rc);
664 }
665
666
667 /*************************************************
668 * Add item to chain of patterns *
669 *************************************************/
670
671 /* Used to add an item onto a chain, or just return an unconnected item if the
672 "after" argument is NULL.
673
674 Arguments:
675 s pattern string to add
676 patlen length of pattern
677 after if not NULL points to item to insert after
678
679 Returns: new pattern block or NULL on error
680 */
681
682 static patstr *
add_pattern(char * s,PCRE2_SIZE patlen,patstr * after)683 add_pattern(char *s, PCRE2_SIZE patlen, patstr *after)
684 {
685 patstr *p = (patstr *)malloc(sizeof(patstr));
686 if (p == NULL)
687 {
688 fprintf(stderr, "pcre2grep: malloc failed\n");
689 pcre2grep_exit(2);
690 }
691 if (patlen > MAXPATLEN)
692 {
693 fprintf(stderr, "pcre2grep: pattern is too long (limit is %d bytes)\n",
694 MAXPATLEN);
695 free(p);
696 return NULL;
697 }
698 p->next = NULL;
699 p->string = s;
700 p->length = patlen;
701 p->compiled = NULL;
702
703 if (after != NULL)
704 {
705 p->next = after->next;
706 after->next = p;
707 }
708 return p;
709 }
710
711
712 /*************************************************
713 * Free chain of patterns *
714 *************************************************/
715
716 /* Used for several chains of patterns.
717
718 Argument: pointer to start of chain
719 Returns: nothing
720 */
721
722 static void
free_pattern_chain(patstr * pc)723 free_pattern_chain(patstr *pc)
724 {
725 while (pc != NULL)
726 {
727 patstr *p = pc;
728 pc = p->next;
729 if (p->compiled != NULL) pcre2_code_free(p->compiled);
730 free(p);
731 }
732 }
733
734
735 /*************************************************
736 * Free chain of file names *
737 *************************************************/
738
739 /*
740 Argument: pointer to start of chain
741 Returns: nothing
742 */
743
744 static void
free_file_chain(fnstr * fn)745 free_file_chain(fnstr *fn)
746 {
747 while (fn != NULL)
748 {
749 fnstr *f = fn;
750 fn = f->next;
751 free(f);
752 }
753 }
754
755
756 /*************************************************
757 * OS-specific functions *
758 *************************************************/
759
760 /* These definitions are needed in all Windows environments, even those where
761 Unix-style directory scanning can be used (see below). */
762
763 #ifdef WIN32
764
765 #ifndef STRICT
766 # define STRICT
767 #endif
768 #ifndef WIN32_LEAN_AND_MEAN
769 # define WIN32_LEAN_AND_MEAN
770 #endif
771
772 #include <windows.h>
773
774 #define iswild(name) (strpbrk(name, "*?") != NULL)
775
776 /* Convert ANSI BGR format to RGB used by Windows */
777 #define BGR_RGB(x) ((x & 1 ? 4 : 0) | (x & 2) | (x & 4 ? 1 : 0))
778
779 static HANDLE hstdout;
780 static CONSOLE_SCREEN_BUFFER_INFO csbi;
781 static WORD match_colour;
782
783 static WORD
decode_ANSI_colour(const char * cs)784 decode_ANSI_colour(const char *cs)
785 {
786 WORD result = csbi.wAttributes;
787 while (*cs)
788 {
789 if (isdigit(*cs))
790 {
791 int code = atoi(cs);
792 if (code == 1) result |= 0x08;
793 else if (code == 4) result |= 0x8000;
794 else if (code == 5) result |= 0x80;
795 else if (code >= 30 && code <= 37) result = (result & 0xF8) | BGR_RGB(code - 30);
796 else if (code == 39) result = (result & 0xF0) | (csbi.wAttributes & 0x0F);
797 else if (code >= 40 && code <= 47) result = (result & 0x8F) | (BGR_RGB(code - 40) << 4);
798 else if (code == 49) result = (result & 0x0F) | (csbi.wAttributes & 0xF0);
799 /* aixterm high intensity colour codes */
800 else if (code >= 90 && code <= 97) result = (result & 0xF0) | BGR_RGB(code - 90) | 0x08;
801 else if (code >= 100 && code <= 107) result = (result & 0x0F) | (BGR_RGB(code - 100) << 4) | 0x80;
802
803 while (isdigit(*cs)) cs++;
804 }
805 if (*cs) cs++;
806 }
807 return result;
808 }
809
810
811 static void
init_colour_output()812 init_colour_output()
813 {
814 if (do_colour)
815 {
816 hstdout = GetStdHandle(STD_OUTPUT_HANDLE);
817 /* This fails when redirected to con; try again if so. */
818 if (!GetConsoleScreenBufferInfo(hstdout, &csbi) && !do_ansi)
819 {
820 HANDLE hcon = CreateFile("CONOUT$", GENERIC_READ | GENERIC_WRITE,
821 FILE_SHARE_WRITE, NULL, OPEN_EXISTING, 0, NULL);
822 GetConsoleScreenBufferInfo(hcon, &csbi);
823 CloseHandle(hcon);
824 }
825 match_colour = decode_ANSI_colour(colour_string);
826 /* No valid colour found - turn off colouring */
827 if (!match_colour) do_colour = FALSE;
828 }
829 }
830
831 #endif /* WIN32 */
832
833
834 /* The following sets of functions are defined so that they can be made system
835 specific. At present there are versions for Unix-style environments, Windows,
836 native z/OS, and "no support". */
837
838
839 /************* Directory scanning Unix-style and z/OS ***********/
840
841 #if (defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H) || defined NATIVE_ZOS
842 #include <sys/types.h>
843 #include <sys/stat.h>
844 #include <dirent.h>
845
846 #if defined NATIVE_ZOS
847 /************* Directory and PDS/E scanning for z/OS ***********/
848 /************* z/OS looks mostly like Unix with USS ************/
849 /* However, z/OS needs the #include statements in this header */
850 #include "pcrzosfs.h"
851 /* That header is not included in the main PCRE distribution because
852 other apparatus is needed to compile pcre2grep for z/OS. The header
853 can be found in the special z/OS distribution, which is available
854 from www.zaconsultants.net or from www.cbttape.org. */
855 #endif
856
857 typedef DIR directory_type;
858 #define FILESEP '/'
859
860 static int
isdirectory(char * filename)861 isdirectory(char *filename)
862 {
863 struct stat statbuf;
864 if (stat(filename, &statbuf) < 0)
865 return 0; /* In the expectation that opening as a file will fail */
866 return S_ISDIR(statbuf.st_mode);
867 }
868
869 static directory_type *
opendirectory(char * filename)870 opendirectory(char *filename)
871 {
872 return opendir(filename);
873 }
874
875 static char *
readdirectory(directory_type * dir)876 readdirectory(directory_type *dir)
877 {
878 for (;;)
879 {
880 struct dirent *dent = readdir(dir);
881 if (dent == NULL) return NULL;
882 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
883 return dent->d_name;
884 }
885 /* Control never reaches here */
886 }
887
888 static void
closedirectory(directory_type * dir)889 closedirectory(directory_type *dir)
890 {
891 closedir(dir);
892 }
893
894
895 /************* Test for regular file, Unix-style **********/
896
897 static int
isregfile(char * filename)898 isregfile(char *filename)
899 {
900 struct stat statbuf;
901 if (stat(filename, &statbuf) < 0)
902 return 1; /* In the expectation that opening as a file will fail */
903 return S_ISREG(statbuf.st_mode);
904 }
905
906
907 #if defined NATIVE_ZOS
908 /************* Test for a terminal in z/OS **********/
909 /* isatty() does not work in a TSO environment, so always give FALSE.*/
910
911 static BOOL
is_stdout_tty(void)912 is_stdout_tty(void)
913 {
914 return FALSE;
915 }
916
917 static BOOL
is_file_tty(FILE * f)918 is_file_tty(FILE *f)
919 {
920 return FALSE;
921 }
922
923
924 /************* Test for a terminal, Unix-style **********/
925
926 #else
927 static BOOL
is_stdout_tty(void)928 is_stdout_tty(void)
929 {
930 return isatty(fileno(stdout));
931 }
932
933 static BOOL
is_file_tty(FILE * f)934 is_file_tty(FILE *f)
935 {
936 return isatty(fileno(f));
937 }
938 #endif
939
940
941 /************* Print optionally coloured match Unix-style and z/OS **********/
942
943 static void
print_match(const void * buf,int length)944 print_match(const void *buf, int length)
945 {
946 if (length == 0) return;
947 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
948 FWRITE_IGNORE(buf, 1, length, stdout);
949 if (do_colour) fprintf(stdout, "%c[0m", 0x1b);
950 }
951
952 /* End of Unix-style or native z/OS environment functions. */
953
954
955 /************* Directory scanning in Windows ***********/
956
957 /* I (Philip Hazel) have no means of testing this code. It was contributed by
958 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
959 when it did not exist. David Byron added a patch that moved the #include of
960 <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
961 */
962
963 #elif defined WIN32
964
965 #ifndef INVALID_FILE_ATTRIBUTES
966 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
967 #endif
968
969 typedef struct directory_type
970 {
971 HANDLE handle;
972 BOOL first;
973 WIN32_FIND_DATA data;
974 } directory_type;
975
976 #define FILESEP '/'
977
978 int
isdirectory(char * filename)979 isdirectory(char *filename)
980 {
981 DWORD attr = GetFileAttributes(filename);
982 if (attr == INVALID_FILE_ATTRIBUTES)
983 return 0;
984 return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0;
985 }
986
987 directory_type *
opendirectory(char * filename)988 opendirectory(char *filename)
989 {
990 size_t len;
991 char *pattern;
992 directory_type *dir;
993 DWORD err;
994 len = strlen(filename);
995 pattern = (char *)malloc(len + 3);
996 dir = (directory_type *)malloc(sizeof(*dir));
997 if ((pattern == NULL) || (dir == NULL))
998 {
999 fprintf(stderr, "pcre2grep: malloc failed\n");
1000 pcre2grep_exit(2);
1001 }
1002 memcpy(pattern, filename, len);
1003 if (iswild(filename))
1004 pattern[len] = 0;
1005 else
1006 memcpy(&(pattern[len]), "\\*", 3);
1007 dir->handle = FindFirstFile(pattern, &(dir->data));
1008 if (dir->handle != INVALID_HANDLE_VALUE)
1009 {
1010 free(pattern);
1011 dir->first = TRUE;
1012 return dir;
1013 }
1014 err = GetLastError();
1015 free(pattern);
1016 free(dir);
1017 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
1018 return NULL;
1019 }
1020
1021 char *
readdirectory(directory_type * dir)1022 readdirectory(directory_type *dir)
1023 {
1024 for (;;)
1025 {
1026 if (!dir->first)
1027 {
1028 if (!FindNextFile(dir->handle, &(dir->data)))
1029 return NULL;
1030 }
1031 else
1032 {
1033 dir->first = FALSE;
1034 }
1035 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
1036 return dir->data.cFileName;
1037 }
1038 #ifndef _MSC_VER
1039 return NULL; /* Keep compiler happy; never executed */
1040 #endif
1041 }
1042
1043 void
closedirectory(directory_type * dir)1044 closedirectory(directory_type *dir)
1045 {
1046 FindClose(dir->handle);
1047 free(dir);
1048 }
1049
1050
1051 /************* Test for regular file in Windows **********/
1052
1053 /* I don't know how to do this, or if it can be done; assume all paths are
1054 regular if they are not directories. */
1055
isregfile(char * filename)1056 int isregfile(char *filename)
1057 {
1058 return !isdirectory(filename);
1059 }
1060
1061
1062 /************* Test for a terminal in Windows **********/
1063
1064 static BOOL
is_stdout_tty(void)1065 is_stdout_tty(void)
1066 {
1067 return _isatty(_fileno(stdout));
1068 }
1069
1070 static BOOL
is_file_tty(FILE * f)1071 is_file_tty(FILE *f)
1072 {
1073 return _isatty(_fileno(f));
1074 }
1075
1076
1077 /************* Print optionally coloured match in Windows **********/
1078
1079 static void
print_match(const void * buf,int length)1080 print_match(const void *buf, int length)
1081 {
1082 if (length == 0) return;
1083 if (do_colour)
1084 {
1085 if (do_ansi) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1086 else SetConsoleTextAttribute(hstdout, match_colour);
1087 }
1088 FWRITE_IGNORE(buf, 1, length, stdout);
1089 if (do_colour)
1090 {
1091 if (do_ansi) fprintf(stdout, "%c[0m", 0x1b);
1092 else SetConsoleTextAttribute(hstdout, csbi.wAttributes);
1093 }
1094 }
1095
1096 /* End of Windows functions */
1097
1098
1099 /************* Directory scanning when we can't do it ***********/
1100
1101 /* The type is void, and apart from isdirectory(), the functions do nothing. */
1102
1103 #else
1104
1105 #define FILESEP 0
1106 typedef void directory_type;
1107
isdirectory(char * filename)1108 int isdirectory(char *filename) { return 0; }
opendirectory(char * filename)1109 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
readdirectory(directory_type * dir)1110 char *readdirectory(directory_type *dir) { return (char*)0;}
closedirectory(directory_type * dir)1111 void closedirectory(directory_type *dir) {}
1112
1113
1114 /************* Test for regular file when we can't do it **********/
1115
1116 /* Assume all files are regular. */
1117
isregfile(char * filename)1118 int isregfile(char *filename) { return 1; }
1119
1120
1121 /************* Test for a terminal when we can't do it **********/
1122
1123 static BOOL
is_stdout_tty(void)1124 is_stdout_tty(void)
1125 {
1126 return FALSE;
1127 }
1128
1129 static BOOL
is_file_tty(FILE * f)1130 is_file_tty(FILE *f)
1131 {
1132 return FALSE;
1133 }
1134
1135
1136 /************* Print optionally coloured match when we can't do it **********/
1137
1138 static void
print_match(const void * buf,int length)1139 print_match(const void *buf, int length)
1140 {
1141 if (length == 0) return;
1142 FWRITE_IGNORE(buf, 1, length, stdout);
1143 }
1144
1145 #endif /* End of system-specific functions */
1146
1147
1148
1149 #ifndef HAVE_STRERROR
1150 /*************************************************
1151 * Provide strerror() for non-ANSI libraries *
1152 *************************************************/
1153
1154 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1155 in their libraries, but can provide the same facility by this simple
1156 alternative function. */
1157
1158 extern int sys_nerr;
1159 extern char *sys_errlist[];
1160
1161 char *
strerror(int n)1162 strerror(int n)
1163 {
1164 if (n < 0 || n >= sys_nerr) return "unknown error number";
1165 return sys_errlist[n];
1166 }
1167 #endif /* HAVE_STRERROR */
1168
1169
1170
1171 /*************************************************
1172 * Usage function *
1173 *************************************************/
1174
1175 static int
usage(int rc)1176 usage(int rc)
1177 {
1178 option_item *op;
1179 fprintf(stderr, "Usage: pcre2grep [-");
1180 for (op = optionlist; op->one_char != 0; op++)
1181 {
1182 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1183 }
1184 fprintf(stderr, "] [long options] [pattern] [files]\n");
1185 fprintf(stderr, "Type \"pcre2grep --help\" for more information and the long "
1186 "options.\n");
1187 return rc;
1188 }
1189
1190
1191
1192 /*************************************************
1193 * Help function *
1194 *************************************************/
1195
1196 static void
help(void)1197 help(void)
1198 {
1199 option_item *op;
1200
1201 printf("Usage: pcre2grep [OPTION]... [PATTERN] [FILE1 FILE2 ...]" STDOUT_NL);
1202 printf("Search for PATTERN in each FILE or standard input." STDOUT_NL);
1203 printf("PATTERN must be present if neither -e nor -f is used." STDOUT_NL);
1204
1205 #ifdef SUPPORT_PCRE2GREP_CALLOUT
1206 #ifdef SUPPORT_PCRE2GREP_CALLOUT_FORK
1207 printf("All callout scripts in patterns are supported." STDOUT_NL);
1208 #else
1209 printf("Non-fork callout scripts in patterns are supported." STDOUT_NL);
1210 #endif
1211 #else
1212 printf("Callout scripts are not supported in this pcre2grep." STDOUT_NL);
1213 #endif
1214
1215 printf("\"-\" can be used as a file name to mean STDIN." STDOUT_NL);
1216
1217 #ifdef SUPPORT_LIBZ
1218 printf("Files whose names end in .gz are read using zlib." STDOUT_NL);
1219 #endif
1220
1221 #ifdef SUPPORT_LIBBZ2
1222 printf("Files whose names end in .bz2 are read using bzlib2." STDOUT_NL);
1223 #endif
1224
1225 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1226 printf("Other files and the standard input are read as plain files." STDOUT_NL STDOUT_NL);
1227 #else
1228 printf("All files are read as plain files, without any interpretation." STDOUT_NL STDOUT_NL);
1229 #endif
1230
1231 printf("Example: pcre2grep -i " QUOT "hello.*world" QUOT " menu.h main.c" STDOUT_NL STDOUT_NL);
1232 printf("Options:" STDOUT_NL);
1233
1234 for (op = optionlist; op->one_char != 0; op++)
1235 {
1236 int n;
1237 char s[4];
1238
1239 if (op->one_char > 0 && (op->long_name)[0] == 0)
1240 n = 31 - printf(" -%c", op->one_char);
1241 else
1242 {
1243 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char);
1244 else strcpy(s, " ");
1245 n = 31 - printf(" %s --%s", s, op->long_name);
1246 }
1247
1248 if (n < 1) n = 1;
1249 printf("%.*s%s" STDOUT_NL, n, " ", op->help_text);
1250 }
1251
1252 printf(STDOUT_NL "Numbers may be followed by K or M, e.g. --max-buffer-size=100K." STDOUT_NL);
1253 printf("The default value for --buffer-size is %d." STDOUT_NL, PCRE2GREP_BUFSIZE);
1254 printf("The default value for --max-buffer-size is %d." STDOUT_NL, PCRE2GREP_MAX_BUFSIZE);
1255 printf("When reading patterns or file names from a file, trailing white" STDOUT_NL);
1256 printf("space is removed and blank lines are ignored." STDOUT_NL);
1257 printf("The maximum size of any pattern is %d bytes." STDOUT_NL, MAXPATLEN);
1258
1259 printf(STDOUT_NL "With no FILEs, read standard input. If fewer than two FILEs given, assume -h." STDOUT_NL);
1260 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble." STDOUT_NL);
1261 }
1262
1263
1264
1265 /*************************************************
1266 * Test exclude/includes *
1267 *************************************************/
1268
1269 /* If any exclude pattern matches, the path is excluded. Otherwise, unless
1270 there are no includes, the path must match an include pattern.
1271
1272 Arguments:
1273 path the path to be matched
1274 ip the chain of include patterns
1275 ep the chain of exclude patterns
1276
1277 Returns: TRUE if the path is not excluded
1278 */
1279
1280 static BOOL
test_incexc(char * path,patstr * ip,patstr * ep)1281 test_incexc(char *path, patstr *ip, patstr *ep)
1282 {
1283 int plen = strlen((const char *)path);
1284
1285 for (; ep != NULL; ep = ep->next)
1286 {
1287 if (pcre2_match(ep->compiled, (PCRE2_SPTR)path, plen, 0, 0, match_data, NULL) >= 0)
1288 return FALSE;
1289 }
1290
1291 if (ip == NULL) return TRUE;
1292
1293 for (; ip != NULL; ip = ip->next)
1294 {
1295 if (pcre2_match(ip->compiled, (PCRE2_SPTR)path, plen, 0, 0, match_data, NULL) >= 0)
1296 return TRUE;
1297 }
1298
1299 return FALSE;
1300 }
1301
1302
1303
1304 /*************************************************
1305 * Decode integer argument value *
1306 *************************************************/
1307
1308 /* Integer arguments can be followed by K or M. Avoid the use of strtoul()
1309 because SunOS4 doesn't have it. This is used only for unpicking arguments, so
1310 just keep it simple.
1311
1312 Arguments:
1313 option_data the option data string
1314 op the option item (for error messages)
1315 longop TRUE if option given in long form
1316
1317 Returns: a long integer
1318 */
1319
1320 static long int
decode_number(char * option_data,option_item * op,BOOL longop)1321 decode_number(char *option_data, option_item *op, BOOL longop)
1322 {
1323 unsigned long int n = 0;
1324 char *endptr = option_data;
1325 while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
1326 while (isdigit((unsigned char)(*endptr)))
1327 n = n * 10 + (int)(*endptr++ - '0');
1328 if (toupper(*endptr) == 'K')
1329 {
1330 n *= 1024;
1331 endptr++;
1332 }
1333 else if (toupper(*endptr) == 'M')
1334 {
1335 n *= 1024*1024;
1336 endptr++;
1337 }
1338
1339 if (*endptr != 0) /* Error */
1340 {
1341 if (longop)
1342 {
1343 char *equals = strchr(op->long_name, '=');
1344 int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1345 (int)(equals - op->long_name);
1346 fprintf(stderr, "pcre2grep: Malformed number \"%s\" after --%.*s\n",
1347 option_data, nlen, op->long_name);
1348 }
1349 else
1350 fprintf(stderr, "pcre2grep: Malformed number \"%s\" after -%c\n",
1351 option_data, op->one_char);
1352 pcre2grep_exit(usage(2));
1353 }
1354
1355 return n;
1356 }
1357
1358
1359
1360 /*************************************************
1361 * Add item to a chain of numbers *
1362 *************************************************/
1363
1364 /* Used to add an item onto a chain, or just return an unconnected item if the
1365 "after" argument is NULL.
1366
1367 Arguments:
1368 n the number to add
1369 after if not NULL points to item to insert after
1370
1371 Returns: new number block
1372 */
1373
1374 static omstr *
add_number(int n,omstr * after)1375 add_number(int n, omstr *after)
1376 {
1377 omstr *om = (omstr *)malloc(sizeof(omstr));
1378
1379 if (om == NULL)
1380 {
1381 fprintf(stderr, "pcre2grep: malloc failed\n");
1382 pcre2grep_exit(2);
1383 }
1384 om->next = NULL;
1385 om->groupnum = n;
1386
1387 if (after != NULL)
1388 {
1389 om->next = after->next;
1390 after->next = om;
1391 }
1392 return om;
1393 }
1394
1395
1396
1397 /*************************************************
1398 * Read one line of input *
1399 *************************************************/
1400
1401 /* Normally, input that is to be scanned is read using fread() (or gzread, or
1402 BZ2_read) into a large buffer, so many lines may be read at once. However,
1403 doing this for tty input means that no output appears until a lot of input has
1404 been typed. Instead, tty input is handled line by line. We cannot use fgets()
1405 for this, because it does not stop at a binary zero, and therefore there is no
1406 way of telling how many characters it has read, because there may be binary
1407 zeros embedded in the data. This function is also used for reading patterns
1408 from files (the -f option).
1409
1410 Arguments:
1411 buffer the buffer to read into
1412 length the maximum number of characters to read
1413 f the file
1414
1415 Returns: the number of characters read, zero at end of file
1416 */
1417
1418 static PCRE2_SIZE
read_one_line(char * buffer,int length,FILE * f)1419 read_one_line(char *buffer, int length, FILE *f)
1420 {
1421 int c;
1422 int yield = 0;
1423 while ((c = fgetc(f)) != EOF)
1424 {
1425 buffer[yield++] = c;
1426 if (c == '\n' || yield >= length) break;
1427 }
1428 return yield;
1429 }
1430
1431
1432
1433 /*************************************************
1434 * Find end of line *
1435 *************************************************/
1436
1437 /* The length of the endline sequence that is found is set via lenptr. This may
1438 be zero at the very end of the file if there is no line-ending sequence there.
1439
1440 Arguments:
1441 p current position in line
1442 endptr end of available data
1443 lenptr where to put the length of the eol sequence
1444
1445 Returns: pointer after the last byte of the line,
1446 including the newline byte(s)
1447 */
1448
1449 static char *
end_of_line(char * p,char * endptr,int * lenptr)1450 end_of_line(char *p, char *endptr, int *lenptr)
1451 {
1452 switch(endlinetype)
1453 {
1454 default: /* Just in case */
1455 case PCRE2_NEWLINE_LF:
1456 while (p < endptr && *p != '\n') p++;
1457 if (p < endptr)
1458 {
1459 *lenptr = 1;
1460 return p + 1;
1461 }
1462 *lenptr = 0;
1463 return endptr;
1464
1465 case PCRE2_NEWLINE_CR:
1466 while (p < endptr && *p != '\r') p++;
1467 if (p < endptr)
1468 {
1469 *lenptr = 1;
1470 return p + 1;
1471 }
1472 *lenptr = 0;
1473 return endptr;
1474
1475 case PCRE2_NEWLINE_NUL:
1476 while (p < endptr && *p != '\0') p++;
1477 if (p < endptr)
1478 {
1479 *lenptr = 1;
1480 return p + 1;
1481 }
1482 *lenptr = 0;
1483 return endptr;
1484
1485 case PCRE2_NEWLINE_CRLF:
1486 for (;;)
1487 {
1488 while (p < endptr && *p != '\r') p++;
1489 if (++p >= endptr)
1490 {
1491 *lenptr = 0;
1492 return endptr;
1493 }
1494 if (*p == '\n')
1495 {
1496 *lenptr = 2;
1497 return p + 1;
1498 }
1499 }
1500 break;
1501
1502 case PCRE2_NEWLINE_ANYCRLF:
1503 while (p < endptr)
1504 {
1505 int extra = 0;
1506 int c = *((unsigned char *)p);
1507
1508 if (utf && c >= 0xc0)
1509 {
1510 int gcii, gcss;
1511 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1512 gcss = 6*extra;
1513 c = (c & utf8_table3[extra]) << gcss;
1514 for (gcii = 1; gcii <= extra; gcii++)
1515 {
1516 gcss -= 6;
1517 c |= (p[gcii] & 0x3f) << gcss;
1518 }
1519 }
1520
1521 p += 1 + extra;
1522
1523 switch (c)
1524 {
1525 case '\n':
1526 *lenptr = 1;
1527 return p;
1528
1529 case '\r':
1530 if (p < endptr && *p == '\n')
1531 {
1532 *lenptr = 2;
1533 p++;
1534 }
1535 else *lenptr = 1;
1536 return p;
1537
1538 default:
1539 break;
1540 }
1541 } /* End of loop for ANYCRLF case */
1542
1543 *lenptr = 0; /* Must have hit the end */
1544 return endptr;
1545
1546 case PCRE2_NEWLINE_ANY:
1547 while (p < endptr)
1548 {
1549 int extra = 0;
1550 int c = *((unsigned char *)p);
1551
1552 if (utf && c >= 0xc0)
1553 {
1554 int gcii, gcss;
1555 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1556 gcss = 6*extra;
1557 c = (c & utf8_table3[extra]) << gcss;
1558 for (gcii = 1; gcii <= extra; gcii++)
1559 {
1560 gcss -= 6;
1561 c |= (p[gcii] & 0x3f) << gcss;
1562 }
1563 }
1564
1565 p += 1 + extra;
1566
1567 switch (c)
1568 {
1569 case '\n': /* LF */
1570 case '\v': /* VT */
1571 case '\f': /* FF */
1572 *lenptr = 1;
1573 return p;
1574
1575 case '\r': /* CR */
1576 if (p < endptr && *p == '\n')
1577 {
1578 *lenptr = 2;
1579 p++;
1580 }
1581 else *lenptr = 1;
1582 return p;
1583
1584 #ifndef EBCDIC
1585 case 0x85: /* Unicode NEL */
1586 *lenptr = utf? 2 : 1;
1587 return p;
1588
1589 case 0x2028: /* Unicode LS */
1590 case 0x2029: /* Unicode PS */
1591 *lenptr = 3;
1592 return p;
1593 #endif /* Not EBCDIC */
1594
1595 default:
1596 break;
1597 }
1598 } /* End of loop for ANY case */
1599
1600 *lenptr = 0; /* Must have hit the end */
1601 return endptr;
1602 } /* End of overall switch */
1603 }
1604
1605
1606
1607 /*************************************************
1608 * Find start of previous line *
1609 *************************************************/
1610
1611 /* This is called when looking back for before lines to print.
1612
1613 Arguments:
1614 p start of the subsequent line
1615 startptr start of available data
1616
1617 Returns: pointer to the start of the previous line
1618 */
1619
1620 static char *
previous_line(char * p,char * startptr)1621 previous_line(char *p, char *startptr)
1622 {
1623 switch(endlinetype)
1624 {
1625 default: /* Just in case */
1626 case PCRE2_NEWLINE_LF:
1627 p--;
1628 while (p > startptr && p[-1] != '\n') p--;
1629 return p;
1630
1631 case PCRE2_NEWLINE_CR:
1632 p--;
1633 while (p > startptr && p[-1] != '\n') p--;
1634 return p;
1635
1636 case PCRE2_NEWLINE_NUL:
1637 p--;
1638 while (p > startptr && p[-1] != '\0') p--;
1639 return p;
1640
1641 case PCRE2_NEWLINE_CRLF:
1642 for (;;)
1643 {
1644 p -= 2;
1645 while (p > startptr && p[-1] != '\n') p--;
1646 if (p <= startptr + 1 || p[-2] == '\r') return p;
1647 }
1648 /* Control can never get here */
1649
1650 case PCRE2_NEWLINE_ANY:
1651 case PCRE2_NEWLINE_ANYCRLF:
1652 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
1653 if (utf) while ((*p & 0xc0) == 0x80) p--;
1654
1655 while (p > startptr)
1656 {
1657 unsigned int c;
1658 char *pp = p - 1;
1659
1660 if (utf)
1661 {
1662 int extra = 0;
1663 while ((*pp & 0xc0) == 0x80) pp--;
1664 c = *((unsigned char *)pp);
1665 if (c >= 0xc0)
1666 {
1667 int gcii, gcss;
1668 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1669 gcss = 6*extra;
1670 c = (c & utf8_table3[extra]) << gcss;
1671 for (gcii = 1; gcii <= extra; gcii++)
1672 {
1673 gcss -= 6;
1674 c |= (pp[gcii] & 0x3f) << gcss;
1675 }
1676 }
1677 }
1678 else c = *((unsigned char *)pp);
1679
1680 if (endlinetype == PCRE2_NEWLINE_ANYCRLF) switch (c)
1681 {
1682 case '\n': /* LF */
1683 case '\r': /* CR */
1684 return p;
1685
1686 default:
1687 break;
1688 }
1689
1690 else switch (c)
1691 {
1692 case '\n': /* LF */
1693 case '\v': /* VT */
1694 case '\f': /* FF */
1695 case '\r': /* CR */
1696 #ifndef EBCDIC
1697 case 0x85: /* Unicode NEL */
1698 case 0x2028: /* Unicode LS */
1699 case 0x2029: /* Unicode PS */
1700 #endif /* Not EBCDIC */
1701 return p;
1702
1703 default:
1704 break;
1705 }
1706
1707 p = pp; /* Back one character */
1708 } /* End of loop for ANY case */
1709
1710 return startptr; /* Hit start of data */
1711 } /* End of overall switch */
1712 }
1713
1714
1715
1716 /*************************************************
1717 * Output newline at end *
1718 *************************************************/
1719
1720 /* This function is called if the final line of a file has been written to
1721 stdout, but it does not have a terminating newline.
1722
1723 Arguments: none
1724 Returns: nothing
1725 */
1726
1727 static void
write_final_newline(void)1728 write_final_newline(void)
1729 {
1730 switch(endlinetype)
1731 {
1732 default: /* Just in case */
1733 case PCRE2_NEWLINE_LF:
1734 case PCRE2_NEWLINE_ANY:
1735 case PCRE2_NEWLINE_ANYCRLF:
1736 fprintf(stdout, "\n");
1737 break;
1738
1739 case PCRE2_NEWLINE_CR:
1740 fprintf(stdout, "\r");
1741 break;
1742
1743 case PCRE2_NEWLINE_CRLF:
1744 fprintf(stdout, "\r\n");
1745 break;
1746
1747 case PCRE2_NEWLINE_NUL:
1748 fprintf(stdout, "%c", 0);
1749 break;
1750 }
1751 }
1752
1753
1754 /*************************************************
1755 * Print the previous "after" lines *
1756 *************************************************/
1757
1758 /* This is called if we are about to lose said lines because of buffer filling,
1759 and at the end of the file. The data in the line is written using fwrite() so
1760 that a binary zero does not terminate it.
1761
1762 Arguments:
1763 lastmatchnumber the number of the last matching line, plus one
1764 lastmatchrestart where we restarted after the last match
1765 endptr end of available data
1766 printname filename for printing
1767
1768 Returns: nothing
1769 */
1770
1771 static void
do_after_lines(unsigned long int lastmatchnumber,char * lastmatchrestart,char * endptr,const char * printname)1772 do_after_lines(unsigned long int lastmatchnumber, char *lastmatchrestart,
1773 char *endptr, const char *printname)
1774 {
1775 if (after_context > 0 && lastmatchnumber > 0)
1776 {
1777 int count = 0;
1778 int ellength = 0;
1779 while (lastmatchrestart < endptr && count < after_context)
1780 {
1781 char *pp = end_of_line(lastmatchrestart, endptr, &ellength);
1782 if (ellength == 0 && pp == main_buffer + bufsize) break;
1783 if (printname != NULL) fprintf(stdout, "%s-", printname);
1784 if (number) fprintf(stdout, "%lu-", lastmatchnumber++);
1785 FWRITE_IGNORE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1786 lastmatchrestart = pp;
1787 count++;
1788 }
1789
1790 /* If we have printed any lines, arrange for a hyphen separator if anything
1791 else follows. Also, if the last line is the final line in the file and it had
1792 no newline, add one. */
1793
1794 if (count > 0)
1795 {
1796 hyphenpending = TRUE;
1797 if (ellength == 0 && lastmatchrestart >= endptr)
1798 write_final_newline();
1799 }
1800 }
1801 }
1802
1803
1804
1805 /*************************************************
1806 * Apply patterns to subject till one matches *
1807 *************************************************/
1808
1809 /* This function is called to run through all patterns, looking for a match. It
1810 is used multiple times for the same subject when colouring is enabled, in order
1811 to find all possible matches.
1812
1813 Arguments:
1814 matchptr the start of the subject
1815 length the length of the subject to match
1816 options options for pcre_exec
1817 startoffset where to start matching
1818 mrc address of where to put the result of pcre2_match()
1819
1820 Returns: TRUE if there was a match
1821 FALSE if there was no match
1822 invert if there was a non-fatal error
1823 */
1824
1825 static BOOL
match_patterns(char * matchptr,PCRE2_SIZE length,unsigned int options,PCRE2_SIZE startoffset,int * mrc)1826 match_patterns(char *matchptr, PCRE2_SIZE length, unsigned int options,
1827 PCRE2_SIZE startoffset, int *mrc)
1828 {
1829 int i;
1830 PCRE2_SIZE slen = length;
1831 patstr *p = patterns;
1832 const char *msg = "this text:\n\n";
1833
1834 if (slen > 200)
1835 {
1836 slen = 200;
1837 msg = "text that starts:\n\n";
1838 }
1839
1840 for (i = 1; p != NULL; p = p->next, i++)
1841 {
1842 *mrc = pcre2_match(p->compiled, (PCRE2_SPTR)matchptr, (int)length,
1843 startoffset, options, match_data, match_context);
1844 if (*mrc >= 0) return TRUE;
1845 if (*mrc == PCRE2_ERROR_NOMATCH) continue;
1846 fprintf(stderr, "pcre2grep: pcre2_match() gave error %d while matching ", *mrc);
1847 if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i);
1848 fprintf(stderr, "%s", msg);
1849 FWRITE_IGNORE(matchptr, 1, slen, stderr); /* In case binary zero included */
1850 fprintf(stderr, "\n\n");
1851 if (*mrc <= PCRE2_ERROR_UTF8_ERR1 &&
1852 *mrc >= PCRE2_ERROR_UTF8_ERR21)
1853 {
1854 unsigned char mbuffer[256];
1855 PCRE2_SIZE startchar = pcre2_get_startchar(match_data);
1856 (void)pcre2_get_error_message(*mrc, mbuffer, sizeof(mbuffer));
1857 fprintf(stderr, "%s at offset %" SIZ_FORM "\n\n", mbuffer,
1858 SIZ_CAST startchar);
1859 }
1860 if (*mrc == PCRE2_ERROR_MATCHLIMIT || *mrc == PCRE2_ERROR_DEPTHLIMIT ||
1861 *mrc == PCRE2_ERROR_HEAPLIMIT || *mrc == PCRE2_ERROR_JIT_STACKLIMIT)
1862 resource_error = TRUE;
1863 if (error_count++ > 20)
1864 {
1865 fprintf(stderr, "pcre2grep: Too many errors - abandoned.\n");
1866 pcre2grep_exit(2);
1867 }
1868 return invert; /* No more matching; don't show the line again */
1869 }
1870
1871 return FALSE; /* No match, no errors */
1872 }
1873
1874
1875
1876 /*************************************************
1877 * Decode dollar escape sequence *
1878 *************************************************/
1879
1880 /* Called from various places to decode $ escapes in output strings. The escape
1881 sequences are as follows:
1882
1883 $<digits> or ${<digits>} returns a capture number. However, if callout is TRUE,
1884 zero is never returned; '0' is substituted.
1885
1886 $a returns bell.
1887 $b returns backspace.
1888 $e returns escape.
1889 $f returns form feed.
1890 $n returns newline.
1891 $r returns carriage return.
1892 $t returns tab.
1893 $v returns vertical tab.
1894 $o<digits> returns the character represented by the given octal
1895 number; up to three digits are processed.
1896 $o{<digits>} does the same, up to 7 digits, but gives an error for mode-invalid
1897 code points.
1898 $x<digits> returns the character represented by the given hexadecimal
1899 number; up to two digits are processed.
1900 $x{<digits} does the same, up to 6 digits, but gives an error for mode-invalid
1901 code points.
1902 Any other character is substituted by itself. E.g: $$ is replaced by a single
1903 dollar.
1904
1905 Arguments:
1906 begin the start of the whole string
1907 string points to the $
1908 callout TRUE if in a callout (inhibits error messages)
1909 value where to return a value
1910 last where to return pointer to the last used character
1911
1912 Returns: DDE_ERROR after a syntax error
1913 DDE_CAPTURE if *value is a capture number
1914 DDE_CHAR if *value is a character code
1915 */
1916
1917 static int
decode_dollar_escape(PCRE2_SPTR begin,PCRE2_SPTR string,BOOL callout,uint32_t * value,PCRE2_SPTR * last)1918 decode_dollar_escape(PCRE2_SPTR begin, PCRE2_SPTR string, BOOL callout,
1919 uint32_t *value, PCRE2_SPTR *last)
1920 {
1921 uint32_t c = 0;
1922 int base = 10;
1923 int dcount;
1924 int rc = DDE_CHAR;
1925 BOOL brace = FALSE;
1926
1927 switch (*(++string))
1928 {
1929 case 0: /* Syntax error: a character must be present after $. */
1930 if (!callout)
1931 fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
1932 (int)(string - begin), "no character after $");
1933 *last = string;
1934 return DDE_ERROR;
1935
1936 case '{':
1937 brace = TRUE;
1938 string++;
1939 if (!isdigit(*string)) /* Syntax error: a decimal number required. */
1940 {
1941 if (!callout)
1942 fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
1943 (int)(string - begin), "decimal number expected");
1944 rc = DDE_ERROR;
1945 break;
1946 }
1947
1948 /* Fall through */
1949
1950 /* The maximum capture number is 65535, so any number greater than that will
1951 always be an unknown capture number. We just stop incrementing, in order to
1952 avoid overflow. */
1953
1954 case '0': case '1': case '2': case '3': case '4':
1955 case '5': case '6': case '7': case '8': case '9':
1956 do
1957 {
1958 if (c <= 65535) c = c * 10 + (*string - '0');
1959 string++;
1960 }
1961 while (*string >= '0' && *string <= '9');
1962 string--; /* Point to last digit */
1963
1964 /* In a callout, capture number 0 is not available. No error can be given,
1965 so just return the character '0'. */
1966
1967 if (callout && c == 0)
1968 {
1969 *value = '0';
1970 }
1971 else
1972 {
1973 *value = c;
1974 rc = DDE_CAPTURE;
1975 }
1976 break;
1977
1978 /* Limit octal numbers to 3 digits without braces, or up to 7 with braces,
1979 for valid Unicode code points. */
1980
1981 case 'o':
1982 base = 8;
1983 string++;
1984 if (*string == '{')
1985 {
1986 brace = TRUE;
1987 string++;
1988 dcount = 7;
1989 }
1990 else dcount = 3;
1991 for (; dcount > 0; dcount--)
1992 {
1993 if (*string < '0' || *string > '7') break;
1994 c = c * 8 + (*string++ - '0');
1995 }
1996 *value = c;
1997 string--; /* Point to last digit */
1998 break;
1999
2000 /* Limit hex numbers to 2 digits without braces, or up to 6 with braces,
2001 for valid Unicode code points. */
2002
2003 case 'x':
2004 base = 16;
2005 string++;
2006 if (*string == '{')
2007 {
2008 brace = TRUE;
2009 string++;
2010 dcount = 6;
2011 }
2012 else dcount = 2;
2013 for (; dcount > 0; dcount--)
2014 {
2015 if (!isxdigit(*string)) break;
2016 if (*string >= '0' && *string <= '9')
2017 c = c *16 + *string++ - '0';
2018 else
2019 c = c * 16 + (*string++ | 0x20) - 'a' + 10;
2020 }
2021 *value = c;
2022 string--; /* Point to last digit */
2023 break;
2024
2025 case 'a': *value = '\a'; break;
2026 case 'b': *value = '\b'; break;
2027 #ifndef EBCDIC
2028 case 'e': *value = '\033'; break;
2029 #else
2030 case 'e': *value = '\047'; break;
2031 #endif
2032 case 'f': *value = '\f'; break;
2033 case 'n': *value = STDOUT_NL_CODE; break;
2034 case 'r': *value = '\r'; break;
2035 case 't': *value = '\t'; break;
2036 case 'v': *value = '\v'; break;
2037
2038 default: *value = *string; break;
2039 }
2040
2041 if (brace)
2042 {
2043 c = string[1];
2044 if (c != '}')
2045 {
2046 rc = DDE_ERROR;
2047 if (!callout)
2048 {
2049 if ((base == 8 && c >= '0' && c <= '7') ||
2050 (base == 16 && isxdigit(c)))
2051 {
2052 fprintf(stderr, "pcre2grep: Error in output text at offset %d: "
2053 "too many %s digits\n", (int)(string - begin),
2054 (base == 8)? "octal" : "hex");
2055 }
2056 else
2057 {
2058 fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
2059 (int)(string - begin), "missing closing brace");
2060 }
2061 }
2062 }
2063 else string++;
2064 }
2065
2066 /* Check maximum code point values, but take note of STDOUT_NL_CODE. */
2067
2068 if (rc == DDE_CHAR && *value != STDOUT_NL_CODE)
2069 {
2070 uint32_t max = utf? 0x0010ffffu : 0xffu;
2071 if (*value > max)
2072 {
2073 if (!callout)
2074 fprintf(stderr, "pcre2grep: Error in output text at offset %d: "
2075 "code point greater than 0x%x is invalid\n", (int)(string - begin), max);
2076 rc = DDE_ERROR;
2077 }
2078 }
2079
2080 *last = string;
2081 return rc;
2082 }
2083
2084
2085
2086 /*************************************************
2087 * Check output text for errors *
2088 *************************************************/
2089
2090 /* Called early, to get errors before doing anything for -O text; also called
2091 from callouts to check before outputting.
2092
2093 Arguments:
2094 string an --output text string
2095 callout TRUE if in a callout (stops printing errors)
2096
2097 Returns: TRUE if OK, FALSE on error
2098 */
2099
2100 static BOOL
syntax_check_output_text(PCRE2_SPTR string,BOOL callout)2101 syntax_check_output_text(PCRE2_SPTR string, BOOL callout)
2102 {
2103 uint32_t value;
2104 PCRE2_SPTR begin = string;
2105
2106 for (; *string != 0; string++)
2107 {
2108 if (*string == '$' &&
2109 decode_dollar_escape(begin, string, callout, &value, &string) == DDE_ERROR)
2110 return FALSE;
2111 }
2112
2113 return TRUE;
2114 }
2115
2116
2117 /*************************************************
2118 * Display output text *
2119 *************************************************/
2120
2121 /* Display the output text, which is assumed to have already been syntax
2122 checked. Output may contain escape sequences started by the dollar sign.
2123
2124 Arguments:
2125 string: the output text
2126 callout: TRUE for the builtin callout, FALSE for --output
2127 subject the start of the subject
2128 ovector: capture offsets
2129 capture_top: number of captures
2130
2131 Returns: TRUE if something was output, other than newline
2132 FALSE if nothing was output, or newline was last output
2133 */
2134
2135 static BOOL
display_output_text(PCRE2_SPTR string,BOOL callout,PCRE2_SPTR subject,PCRE2_SIZE * ovector,PCRE2_SIZE capture_top)2136 display_output_text(PCRE2_SPTR string, BOOL callout, PCRE2_SPTR subject,
2137 PCRE2_SIZE *ovector, PCRE2_SIZE capture_top)
2138 {
2139 uint32_t value;
2140 BOOL printed = FALSE;
2141 PCRE2_SPTR begin = string;
2142
2143 for (; *string != 0; string++)
2144 {
2145 if (*string == '$')
2146 {
2147 switch(decode_dollar_escape(begin, string, callout, &value, &string))
2148 {
2149 case DDE_CHAR:
2150 if (value == STDOUT_NL_CODE)
2151 {
2152 fprintf(stdout, STDOUT_NL);
2153 printed = FALSE;
2154 continue;
2155 }
2156 break; /* Will print value */
2157
2158 case DDE_CAPTURE:
2159 if (value < capture_top)
2160 {
2161 PCRE2_SIZE capturesize;
2162 value *= 2;
2163 capturesize = ovector[value + 1] - ovector[value];
2164 if (capturesize > 0)
2165 {
2166 print_match(subject + ovector[value], capturesize);
2167 printed = TRUE;
2168 }
2169 }
2170 continue;
2171
2172 default: /* Should not occur */
2173 break;
2174 }
2175 }
2176
2177 else value = *string; /* Not a $ escape */
2178
2179 if (utf && value <= 127) fprintf(stdout, "%c", *string); else
2180 {
2181 int i;
2182 int n = ord2utf8(value);
2183 for (i = 0; i < n; i++) fputc(utf8_buffer[i], stdout);
2184 }
2185
2186 printed = TRUE;
2187 }
2188
2189 return printed;
2190 }
2191
2192
2193 #ifdef SUPPORT_PCRE2GREP_CALLOUT
2194
2195 /*************************************************
2196 * Parse and execute callout scripts *
2197 *************************************************/
2198
2199 /* If SUPPORT_PCRE2GREP_CALLOUT_FORK is defined, this function parses a callout
2200 string block and executes the program specified by the string. The string is a
2201 list of substrings separated by pipe characters. The first substring represents
2202 the executable name, and the following substrings specify the arguments:
2203
2204 program_name|param1|param2|...
2205
2206 Any substring (including the program name) can contain escape sequences
2207 started by the dollar character. The escape sequences are substituted as
2208 follows:
2209
2210 $<digits> or ${<digits>} is replaced by the captured substring of the given
2211 decimal number, which must be greater than zero. If the number is greater
2212 than the number of capturing substrings, or if the capture is unset, the
2213 replacement is empty.
2214
2215 Any other character is substituted by itself. E.g: $$ is replaced by a single
2216 dollar or $| replaced by a pipe character.
2217
2218 Alternatively, if string starts with pipe, the remainder is taken as an output
2219 string, same as --output. This is the only form that is supported if
2220 SUPPORT_PCRE2GREP_FORK is not defined. In this case, --om-separator is used to
2221 separate each callout, defaulting to newline.
2222
2223 Example:
2224
2225 echo -e "abcde\n12345" | pcre2grep \
2226 '(.)(..(.))(?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4)")()' -
2227
2228 Output:
2229
2230 Arg1: [a] [bcd] [d] Arg2: |a| ()
2231 abcde
2232 Arg1: [1] [234] [4] Arg2: |1| ()
2233 12345
2234
2235 Arguments:
2236 blockptr the callout block
2237
2238 Returns: currently it always returns with 0
2239 */
2240
2241 static int
pcre2grep_callout(pcre2_callout_block * calloutptr,void * unused)2242 pcre2grep_callout(pcre2_callout_block *calloutptr, void *unused)
2243 {
2244 PCRE2_SIZE length = calloutptr->callout_string_length;
2245 PCRE2_SPTR string = calloutptr->callout_string;
2246 PCRE2_SPTR subject = calloutptr->subject;
2247 PCRE2_SIZE *ovector = calloutptr->offset_vector;
2248 PCRE2_SIZE capture_top = calloutptr->capture_top;
2249
2250 #ifdef SUPPORT_PCRE2GREP_CALLOUT_FORK
2251 PCRE2_SIZE argsvectorlen = 2;
2252 PCRE2_SIZE argslen = 1;
2253 char *args;
2254 char *argsptr;
2255 char **argsvector;
2256 char **argsvectorptr;
2257 #ifndef WIN32
2258 pid_t pid;
2259 #endif
2260 int result = 0;
2261 #endif /* SUPPORT_PCRE2GREP_CALLOUT_FORK */
2262
2263 (void)unused; /* Avoid compiler warning */
2264
2265 /* Only callouts with strings are supported. */
2266
2267 if (string == NULL || length == 0) return 0;
2268
2269 /* If there's no command, output the remainder directly. */
2270
2271 if (*string == '|')
2272 {
2273 string++;
2274 if (!syntax_check_output_text(string, TRUE)) return 0;
2275 (void)display_output_text(string, TRUE, subject, ovector, capture_top);
2276 return 0;
2277 }
2278
2279 #ifndef SUPPORT_PCRE2GREP_CALLOUT_FORK
2280 return 0;
2281 #else
2282
2283 /* Checking syntax and compute the number of string fragments. Callout strings
2284 are silently ignored in the event of a syntax error. */
2285
2286 while (length > 0)
2287 {
2288 if (*string == '|')
2289 {
2290 argsvectorlen++;
2291 if (argsvectorlen > 10000) return 0; /* Too many args */
2292 }
2293
2294 else if (*string == '$')
2295 {
2296 uint32_t value;
2297 PCRE2_SPTR begin = string;
2298
2299 switch (decode_dollar_escape(begin, string, TRUE, &value, &string))
2300 {
2301 case DDE_CAPTURE:
2302 if (value < capture_top)
2303 {
2304 value *= 2;
2305 argslen += ovector[value + 1] - ovector[value];
2306 }
2307 argslen--; /* Negate the effect of argslen++ below. */
2308 break;
2309
2310 case DDE_CHAR:
2311 if (value == STDOUT_NL_CODE) argslen += STDOUT_NL_LEN - 1;
2312 else if (utf && value > 127) argslen += ord2utf8(value) - 1;
2313 break;
2314
2315 default: /* Should not occur */
2316 case DDE_ERROR:
2317 return 0;
2318 }
2319
2320 length -= (string - begin);
2321 }
2322
2323 string++;
2324 length--;
2325 argslen++;
2326 }
2327
2328 /* Get memory for the argument vector and its strings. */
2329
2330 args = (char*)malloc(argslen);
2331 if (args == NULL) return 0;
2332
2333 argsvector = (char**)malloc(argsvectorlen * sizeof(char*));
2334 if (argsvector == NULL)
2335 {
2336 free(args);
2337 return 0;
2338 }
2339
2340 /* Now reprocess the string and set up the arguments. */
2341
2342 argsptr = args;
2343 argsvectorptr = argsvector;
2344 *argsvectorptr++ = argsptr;
2345
2346 length = calloutptr->callout_string_length;
2347 string = calloutptr->callout_string;
2348
2349 while (length > 0)
2350 {
2351 if (*string == '|')
2352 {
2353 *argsptr++ = '\0';
2354 *argsvectorptr++ = argsptr;
2355 }
2356
2357 else if (*string == '$')
2358 {
2359 uint32_t value;
2360 PCRE2_SPTR begin = string;
2361
2362 switch (decode_dollar_escape(begin, string, TRUE, &value, &string))
2363 {
2364 case DDE_CAPTURE:
2365 if (value < capture_top)
2366 {
2367 PCRE2_SIZE capturesize;
2368 value *= 2;
2369 capturesize = ovector[value + 1] - ovector[value];
2370 memcpy(argsptr, subject + ovector[value], capturesize);
2371 argsptr += capturesize;
2372 }
2373 break;
2374
2375 case DDE_CHAR:
2376 if (value == STDOUT_NL_CODE)
2377 {
2378 memcpy(argsptr, STDOUT_NL, STDOUT_NL_LEN);
2379 argsptr += STDOUT_NL_LEN;
2380 }
2381 else if (utf && value > 127)
2382 {
2383 int n = ord2utf8(value);
2384 memcpy(argsptr, utf8_buffer, n);
2385 argsptr += n;
2386 }
2387 else
2388 {
2389 *argsptr++ = value;
2390 }
2391 break;
2392
2393 default: /* Even though this should not occur, the string having */
2394 case DDE_ERROR: /* been checked above, we need to include the free() */
2395 free(args); /* calls so that source checkers do not complain. */
2396 free(argsvector);
2397 return 0;
2398 }
2399
2400 length -= (string - begin);
2401 }
2402
2403 else *argsptr++ = *string;
2404
2405 /* Advance along the string */
2406
2407 string++;
2408 length--;
2409 }
2410
2411 *argsptr++ = '\0';
2412 *argsvectorptr = NULL;
2413
2414 /* Running an external command is system-dependent. Handle Windows and VMS as
2415 necessary, otherwise assume fork(). */
2416
2417 #ifdef WIN32
2418 result = _spawnvp(_P_WAIT, argsvector[0], (const char * const *)argsvector);
2419
2420 #elif defined __VMS
2421 {
2422 char cmdbuf[500];
2423 short i = 0;
2424 int flags = CLI$M_NOCLISYM|CLI$M_NOLOGNAM|CLI$M_NOKEYPAD, status, retstat;
2425 $DESCRIPTOR(cmd, cmdbuf);
2426
2427 cmdbuf[0] = 0;
2428 while (argsvector[i])
2429 {
2430 strcat(cmdbuf, argsvector[i]);
2431 strcat(cmdbuf, " ");
2432 i++;
2433 }
2434 cmd.dsc$w_length = strlen(cmdbuf) - 1;
2435 status = lib$spawn(&cmd, 0,0, &flags, 0,0, &retstat);
2436 if (!(status & 1)) result = 0;
2437 else result = retstat & 1 ? 0 : 1;
2438 }
2439
2440 #else /* Neither Windows nor VMS */
2441 pid = fork();
2442 if (pid == 0)
2443 {
2444 (void)execv(argsvector[0], argsvector);
2445 /* Control gets here if there is an error, e.g. a non-existent program */
2446 exit(1);
2447 }
2448 else if (pid > 0)
2449 (void)waitpid(pid, &result, 0);
2450 #endif /* End Windows/VMS/other handling */
2451
2452 free(args);
2453 free(argsvector);
2454
2455 /* Currently negative return values are not supported, only zero (match
2456 continues) or non-zero (match fails). */
2457
2458 return result != 0;
2459 #endif /* SUPPORT_PCRE2GREP_CALLOUT_FORK */
2460 }
2461 #endif /* SUPPORT_PCRE2GREP_CALLOUT */
2462
2463
2464
2465 /*************************************************
2466 * Read a portion of the file into buffer *
2467 *************************************************/
2468
2469 static int
fill_buffer(void * handle,int frtype,char * buffer,int length,BOOL input_line_buffered)2470 fill_buffer(void *handle, int frtype, char *buffer, int length,
2471 BOOL input_line_buffered)
2472 {
2473 (void)frtype; /* Avoid warning when not used */
2474
2475 #ifdef SUPPORT_LIBZ
2476 if (frtype == FR_LIBZ)
2477 return gzread((gzFile)handle, buffer, length);
2478 else
2479 #endif
2480
2481 #ifdef SUPPORT_LIBBZ2
2482 if (frtype == FR_LIBBZ2)
2483 return BZ2_bzread((BZFILE *)handle, buffer, length);
2484 else
2485 #endif
2486
2487 return (input_line_buffered ?
2488 read_one_line(buffer, length, (FILE *)handle) :
2489 fread(buffer, 1, length, (FILE *)handle));
2490 }
2491
2492
2493
2494 /*************************************************
2495 * Grep an individual file *
2496 *************************************************/
2497
2498 /* This is called from grep_or_recurse() below. It uses a buffer that is three
2499 times the value of bufthird. The matching point is never allowed to stray into
2500 the top third of the buffer, thus keeping more of the file available for
2501 context printing or for multiline scanning. For large files, the pointer will
2502 be in the middle third most of the time, so the bottom third is available for
2503 "before" context printing.
2504
2505 Arguments:
2506 handle the fopened FILE stream for a normal file
2507 the gzFile pointer when reading is via libz
2508 the BZFILE pointer when reading is via libbz2
2509 frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
2510 filename the file name or NULL (for errors)
2511 printname the file name if it is to be printed for each match
2512 or NULL if the file name is not to be printed
2513 it cannot be NULL if filenames[_nomatch]_only is set
2514
2515 Returns: 0 if there was at least one match
2516 1 otherwise (no matches)
2517 2 if an overlong line is encountered
2518 3 if there is a read error on a .bz2 file
2519 */
2520
2521 static int
pcre2grep(void * handle,int frtype,const char * filename,const char * printname)2522 pcre2grep(void *handle, int frtype, const char *filename, const char *printname)
2523 {
2524 int rc = 1;
2525 int filepos = 0;
2526 unsigned long int linenumber = 1;
2527 unsigned long int lastmatchnumber = 0;
2528 unsigned long int count = 0;
2529 long int count_matched_lines = 0;
2530 char *lastmatchrestart = main_buffer;
2531 char *ptr = main_buffer;
2532 char *endptr;
2533 PCRE2_SIZE bufflength;
2534 BOOL binary = FALSE;
2535 BOOL endhyphenpending = FALSE;
2536 BOOL lines_printed = FALSE;
2537 BOOL input_line_buffered = line_buffered;
2538 FILE *in = NULL; /* Ensure initialized */
2539
2540 /* Do the first read into the start of the buffer and set up the pointer to end
2541 of what we have. In the case of libz, a non-zipped .gz file will be read as a
2542 plain file. However, if a .bz2 file isn't actually bzipped, the first read will
2543 fail. */
2544
2545 if (frtype != FR_LIBZ && frtype != FR_LIBBZ2)
2546 {
2547 in = (FILE *)handle;
2548 if (is_file_tty(in)) input_line_buffered = TRUE;
2549 }
2550 else input_line_buffered = FALSE;
2551
2552 bufflength = fill_buffer(handle, frtype, main_buffer, bufsize,
2553 input_line_buffered);
2554
2555 #ifdef SUPPORT_LIBBZ2
2556 if (frtype == FR_LIBBZ2 && (int)bufflength < 0) return 2; /* Gotcha: bufflength is PCRE2_SIZE */
2557 #endif
2558
2559 endptr = main_buffer + bufflength;
2560
2561 /* Unless binary-files=text, see if we have a binary file. This uses the same
2562 rule as GNU grep, namely, a search for a binary zero byte near the start of the
2563 file. However, when the newline convention is binary zero, we can't do this. */
2564
2565 if (binary_files != BIN_TEXT)
2566 {
2567 if (endlinetype != PCRE2_NEWLINE_NUL)
2568 binary = memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength)
2569 != NULL;
2570 if (binary && binary_files == BIN_NOMATCH) return 1;
2571 }
2572
2573 /* Loop while the current pointer is not at the end of the file. For large
2574 files, endptr will be at the end of the buffer when we are in the middle of the
2575 file, but ptr will never get there, because as soon as it gets over 2/3 of the
2576 way, the buffer is shifted left and re-filled. */
2577
2578 while (ptr < endptr)
2579 {
2580 int endlinelength;
2581 int mrc = 0;
2582 unsigned int options = 0;
2583 BOOL match;
2584 BOOL line_matched = FALSE;
2585 char *t = ptr;
2586 PCRE2_SIZE length, linelength;
2587 PCRE2_SIZE startoffset = 0;
2588
2589 /* If the -m option set a limit for the number of matched or non-matched
2590 lines, check it here. A limit of zero means that no matching is ever done.
2591 For stdin from a file, set the file position. */
2592
2593 if (count_limit >= 0 && count_matched_lines >= count_limit)
2594 {
2595 if (frtype == FR_PLAIN && filename == stdin_name && !is_file_tty(handle))
2596 (void)fseek(handle, (long int)filepos, SEEK_SET);
2597 rc = (count_limit == 0)? 1 : 0;
2598 break;
2599 }
2600
2601 /* At this point, ptr is at the start of a line. We need to find the length
2602 of the subject string to pass to pcre2_match(). In multiline mode, it is the
2603 length remainder of the data in the buffer. Otherwise, it is the length of
2604 the next line, excluding the terminating newline. After matching, we always
2605 advance by the length of the next line. In multiline mode the PCRE2_FIRSTLINE
2606 option is used for compiling, so that any match is constrained to be in the
2607 first line. */
2608
2609 t = end_of_line(t, endptr, &endlinelength);
2610 linelength = t - ptr - endlinelength;
2611 length = multiline? (PCRE2_SIZE)(endptr - ptr) : linelength;
2612
2613 /* Check to see if the line we are looking at extends right to the very end
2614 of the buffer without a line terminator. This means the line is too long to
2615 handle at the current buffer size. Until the buffer reaches its maximum size,
2616 try doubling it and reading more data. */
2617
2618 if (endlinelength == 0 && t == main_buffer + bufsize)
2619 {
2620 if (bufthird < max_bufthird)
2621 {
2622 char *new_buffer;
2623 int new_bufthird = 2*bufthird;
2624
2625 if (new_bufthird > max_bufthird) new_bufthird = max_bufthird;
2626 new_buffer = (char *)malloc(3*new_bufthird);
2627
2628 if (new_buffer == NULL)
2629 {
2630 fprintf(stderr,
2631 "pcre2grep: line %lu%s%s is too long for the internal buffer\n"
2632 "pcre2grep: not enough memory to increase the buffer size to %d\n",
2633 linenumber,
2634 (filename == NULL)? "" : " of file ",
2635 (filename == NULL)? "" : filename,
2636 new_bufthird);
2637 return 2;
2638 }
2639
2640 /* Copy the data and adjust pointers to the new buffer location. */
2641
2642 memcpy(new_buffer, main_buffer, bufsize);
2643 bufthird = new_bufthird;
2644 bufsize = 3*bufthird;
2645 ptr = new_buffer + (ptr - main_buffer);
2646 lastmatchrestart = new_buffer + (lastmatchrestart - main_buffer);
2647 free(main_buffer);
2648 main_buffer = new_buffer;
2649
2650 /* Read more data into the buffer and then try to find the line ending
2651 again. */
2652
2653 bufflength += fill_buffer(handle, frtype, main_buffer + bufflength,
2654 bufsize - bufflength, input_line_buffered);
2655 endptr = main_buffer + bufflength;
2656 continue;
2657 }
2658 else
2659 {
2660 fprintf(stderr,
2661 "pcre2grep: line %lu%s%s is too long for the internal buffer\n"
2662 "pcre2grep: the maximum buffer size is %d\n"
2663 "pcre2grep: use the --max-buffer-size option to change it\n",
2664 linenumber,
2665 (filename == NULL)? "" : " of file ",
2666 (filename == NULL)? "" : filename,
2667 bufthird);
2668 return 2;
2669 }
2670 }
2671
2672 /* Extra processing for Jeffrey Friedl's debugging. */
2673
2674 #ifdef JFRIEDL_DEBUG
2675 if (jfriedl_XT || jfriedl_XR)
2676 {
2677 # include <sys/time.h>
2678 # include <time.h>
2679 struct timeval start_time, end_time;
2680 struct timezone dummy;
2681 int i;
2682
2683 if (jfriedl_XT)
2684 {
2685 unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
2686 const char *orig = ptr;
2687 ptr = malloc(newlen + 1);
2688 if (!ptr) {
2689 printf("out of memory");
2690 pcre2grep_exit(2);
2691 }
2692 endptr = ptr;
2693 strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
2694 for (i = 0; i < jfriedl_XT; i++) {
2695 strncpy(endptr, orig, length);
2696 endptr += length;
2697 }
2698 strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
2699 length = newlen;
2700 }
2701
2702 if (gettimeofday(&start_time, &dummy) != 0)
2703 perror("bad gettimeofday");
2704
2705
2706 for (i = 0; i < jfriedl_XR; i++)
2707 match = (pcre_exec(patterns->compiled, patterns->hint, ptr, length, 0,
2708 PCRE2_NOTEMPTY, offsets, offset_size) >= 0);
2709
2710 if (gettimeofday(&end_time, &dummy) != 0)
2711 perror("bad gettimeofday");
2712
2713 double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
2714 -
2715 (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
2716
2717 printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
2718 return 0;
2719 }
2720 #endif
2721
2722 /* We come back here after a match when only_matching_count is non-zero, in
2723 order to find any further matches in the same line. This applies to
2724 --only-matching, --file-offsets, and --line-offsets. */
2725
2726 ONLY_MATCHING_RESTART:
2727
2728 /* Run through all the patterns until one matches or there is an error other
2729 than NOMATCH. This code is in a subroutine so that it can be re-used for
2730 finding subsequent matches when colouring matched lines. After finding one
2731 match, set PCRE2_NOTEMPTY to disable any further matches of null strings in
2732 this line. */
2733
2734 match = match_patterns(ptr, length, options, startoffset, &mrc);
2735 options = PCRE2_NOTEMPTY;
2736
2737 /* If it's a match or a not-match (as required), do what's wanted. NOTE: Use
2738 only FWRITE_IGNORE() - which is just a packaged fwrite() that ignores its
2739 return code - to output data lines, so that binary zeroes are treated as just
2740 another data character. */
2741
2742 if (match != invert)
2743 {
2744 BOOL hyphenprinted = FALSE;
2745
2746 /* We've failed if we want a file that doesn't have any matches. */
2747
2748 if (filenames == FN_NOMATCH_ONLY) return 1;
2749
2750 /* Remember that this line matched (for counting matched lines) */
2751
2752 line_matched = TRUE;
2753
2754 /* If all we want is a yes/no answer, we can return immediately. */
2755
2756 if (quiet) return 0;
2757
2758 /* Just count if just counting is wanted. */
2759
2760 else if (count_only || show_total_count) count++;
2761
2762 /* When handling a binary file and binary-files==binary, the "binary"
2763 variable will be set true (it's false in all other cases). In this
2764 situation we just want to output the file name. No need to scan further. */
2765
2766 else if (binary)
2767 {
2768 fprintf(stdout, "Binary file %s matches" STDOUT_NL, filename);
2769 return 0;
2770 }
2771
2772 /* Likewise, if all we want is a file name, there is no need to scan any
2773 more lines in the file. */
2774
2775 else if (filenames == FN_MATCH_ONLY)
2776 {
2777 fprintf(stdout, "%s" STDOUT_NL, printname);
2778 return 0;
2779 }
2780
2781 /* The --only-matching option prints just the substring that matched,
2782 and/or one or more captured portions of it, as long as these strings are
2783 not empty. The --file-offsets and --line-offsets options output offsets for
2784 the matching substring (all three set only_matching_count non-zero). None
2785 of these mutually exclusive options prints any context. Afterwards, adjust
2786 the start and then jump back to look for further matches in the same line.
2787 If we are in invert mode, however, nothing is printed and we do not restart
2788 - this could still be useful because the return code is set. */
2789
2790 else if (only_matching_count != 0)
2791 {
2792 if (!invert)
2793 {
2794 PCRE2_SIZE oldstartoffset;
2795
2796 if (printname != NULL) fprintf(stdout, "%s:", printname);
2797 if (number) fprintf(stdout, "%lu:", linenumber);
2798
2799 /* Handle --line-offsets */
2800
2801 if (line_offsets)
2802 fprintf(stdout, "%d,%d" STDOUT_NL, (int)(ptr + offsets[0] - ptr),
2803 (int)(offsets[1] - offsets[0]));
2804
2805 /* Handle --file-offsets */
2806
2807 else if (file_offsets)
2808 fprintf(stdout, "%d,%d" STDOUT_NL,
2809 (int)(filepos + ptr + offsets[0] - ptr),
2810 (int)(offsets[1] - offsets[0]));
2811
2812 /* Handle --output (which has already been syntax checked) */
2813
2814 else if (output_text != NULL)
2815 {
2816 if (display_output_text((PCRE2_SPTR)output_text, FALSE,
2817 (PCRE2_SPTR)ptr, offsets, mrc) || printname != NULL ||
2818 number)
2819 fprintf(stdout, STDOUT_NL);
2820 }
2821
2822 /* Handle --only-matching, which may occur many times */
2823
2824 else
2825 {
2826 BOOL printed = FALSE;
2827 omstr *om;
2828
2829 for (om = only_matching; om != NULL; om = om->next)
2830 {
2831 int n = om->groupnum;
2832 if (n == 0 || n < mrc)
2833 {
2834 int plen = offsets[2*n + 1] - offsets[2*n];
2835 if (plen > 0)
2836 {
2837 if (printed && om_separator != NULL)
2838 fprintf(stdout, "%s", om_separator);
2839 print_match(ptr + offsets[n*2], plen);
2840 printed = TRUE;
2841 }
2842 }
2843 }
2844
2845 if (printed || printname != NULL || number)
2846 fprintf(stdout, STDOUT_NL);
2847 }
2848
2849 /* Prepare to repeat to find the next match in the line. */
2850
2851 match = FALSE;
2852 if (line_buffered) fflush(stdout);
2853 rc = 0; /* Had some success */
2854
2855 /* If the pattern contained a lookbehind that included \K, it is
2856 possible that the end of the match might be at or before the actual
2857 starting offset we have just used. In this case, start one character
2858 further on. */
2859
2860 startoffset = offsets[1]; /* Restart after the match */
2861 oldstartoffset = pcre2_get_startchar(match_data);
2862 if (startoffset <= oldstartoffset)
2863 {
2864 if (startoffset >= length) goto END_ONE_MATCH; /* Were at end */
2865 startoffset = oldstartoffset + 1;
2866 if (utf) while ((ptr[startoffset] & 0xc0) == 0x80) startoffset++;
2867 }
2868
2869 /* If the current match ended past the end of the line (only possible
2870 in multiline mode), we must move on to the line in which it did end
2871 before searching for more matches. */
2872
2873 while (startoffset > linelength)
2874 {
2875 ptr += linelength + endlinelength;
2876 filepos += (int)(linelength + endlinelength);
2877 linenumber++;
2878 startoffset -= (int)(linelength + endlinelength);
2879 t = end_of_line(ptr, endptr, &endlinelength);
2880 linelength = t - ptr - endlinelength;
2881 length = (PCRE2_SIZE)(endptr - ptr);
2882 }
2883
2884 goto ONLY_MATCHING_RESTART;
2885 }
2886 }
2887
2888 /* This is the default case when none of the above options is set. We print
2889 the matching lines(s), possibly preceded and/or followed by other lines of
2890 context. */
2891
2892 else
2893 {
2894 lines_printed = TRUE;
2895
2896 /* See if there is a requirement to print some "after" lines from a
2897 previous match. We never print any overlaps. */
2898
2899 if (after_context > 0 && lastmatchnumber > 0)
2900 {
2901 int ellength;
2902 int linecount = 0;
2903 char *p = lastmatchrestart;
2904
2905 while (p < ptr && linecount < after_context)
2906 {
2907 p = end_of_line(p, ptr, &ellength);
2908 linecount++;
2909 }
2910
2911 /* It is important to advance lastmatchrestart during this printing so
2912 that it interacts correctly with any "before" printing below. Print
2913 each line's data using fwrite() in case there are binary zeroes. */
2914
2915 while (lastmatchrestart < p)
2916 {
2917 char *pp = lastmatchrestart;
2918 if (printname != NULL) fprintf(stdout, "%s-", printname);
2919 if (number) fprintf(stdout, "%lu-", lastmatchnumber++);
2920 pp = end_of_line(pp, endptr, &ellength);
2921 FWRITE_IGNORE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
2922 lastmatchrestart = pp;
2923 }
2924 if (lastmatchrestart != ptr) hyphenpending = TRUE;
2925 }
2926
2927 /* If there were non-contiguous lines printed above, insert hyphens. */
2928
2929 if (hyphenpending)
2930 {
2931 fprintf(stdout, "--" STDOUT_NL);
2932 hyphenpending = FALSE;
2933 hyphenprinted = TRUE;
2934 }
2935
2936 /* See if there is a requirement to print some "before" lines for this
2937 match. Again, don't print overlaps. */
2938
2939 if (before_context > 0)
2940 {
2941 int linecount = 0;
2942 char *p = ptr;
2943
2944 while (p > main_buffer &&
2945 (lastmatchnumber == 0 || p > lastmatchrestart) &&
2946 linecount < before_context)
2947 {
2948 linecount++;
2949 p = previous_line(p, main_buffer);
2950 }
2951
2952 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
2953 fprintf(stdout, "--" STDOUT_NL);
2954
2955 while (p < ptr)
2956 {
2957 int ellength;
2958 char *pp = p;
2959 if (printname != NULL) fprintf(stdout, "%s-", printname);
2960 if (number) fprintf(stdout, "%lu-", linenumber - linecount--);
2961 pp = end_of_line(pp, endptr, &ellength);
2962 FWRITE_IGNORE(p, 1, pp - p, stdout);
2963 p = pp;
2964 }
2965 }
2966
2967 /* Now print the matching line(s); ensure we set hyphenpending at the end
2968 of the file if any context lines are being output. */
2969
2970 if (after_context > 0 || before_context > 0)
2971 endhyphenpending = TRUE;
2972
2973 if (printname != NULL) fprintf(stdout, "%s:", printname);
2974 if (number) fprintf(stdout, "%lu:", linenumber);
2975
2976 /* This extra option, for Jeffrey Friedl's debugging requirements,
2977 replaces the matched string, or a specific captured string if it exists,
2978 with X. When this happens, colouring is ignored. */
2979
2980 #ifdef JFRIEDL_DEBUG
2981 if (S_arg >= 0 && S_arg < mrc)
2982 {
2983 int first = S_arg * 2;
2984 int last = first + 1;
2985 FWRITE_IGNORE(ptr, 1, offsets[first], stdout);
2986 fprintf(stdout, "X");
2987 FWRITE_IGNORE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
2988 }
2989 else
2990 #endif
2991
2992 /* In multiline mode, or if colouring, we have to split the line(s) up
2993 and search for further matches, but not of course if the line is a
2994 non-match. In multiline mode this is necessary in case there is another
2995 match that spans the end of the current line. When colouring we want to
2996 colour all matches. */
2997
2998 if ((multiline || do_colour) && !invert)
2999 {
3000 int plength;
3001 PCRE2_SIZE endprevious;
3002
3003 /* The use of \K may make the end offset earlier than the start. In
3004 this situation, swap them round. */
3005
3006 if (offsets[0] > offsets[1])
3007 {
3008 PCRE2_SIZE temp = offsets[0];
3009 offsets[0] = offsets[1];
3010 offsets[1] = temp;
3011 }
3012
3013 FWRITE_IGNORE(ptr, 1, offsets[0], stdout);
3014 print_match(ptr + offsets[0], offsets[1] - offsets[0]);
3015
3016 for (;;)
3017 {
3018 PCRE2_SIZE oldstartoffset = pcre2_get_startchar(match_data);
3019
3020 endprevious = offsets[1];
3021 startoffset = endprevious; /* Advance after previous match. */
3022
3023 /* If the pattern contained a lookbehind that included \K, it is
3024 possible that the end of the match might be at or before the actual
3025 starting offset we have just used. In this case, start one character
3026 further on. */
3027
3028 if (startoffset <= oldstartoffset)
3029 {
3030 startoffset = oldstartoffset + 1;
3031 if (utf) while ((ptr[startoffset] & 0xc0) == 0x80) startoffset++;
3032 }
3033
3034 /* If the current match ended past the end of the line (only possible
3035 in multiline mode), we must move on to the line in which it did end
3036 before searching for more matches. Because the PCRE2_FIRSTLINE option
3037 is set, the start of the match will always be before the first
3038 newline sequence. */
3039
3040 while (startoffset > linelength + endlinelength)
3041 {
3042 ptr += linelength + endlinelength;
3043 filepos += (int)(linelength + endlinelength);
3044 linenumber++;
3045 startoffset -= (int)(linelength + endlinelength);
3046 endprevious -= (int)(linelength + endlinelength);
3047 t = end_of_line(ptr, endptr, &endlinelength);
3048 linelength = t - ptr - endlinelength;
3049 length = (PCRE2_SIZE)(endptr - ptr);
3050 }
3051
3052 /* If startoffset is at the exact end of the line it means this
3053 complete line was the final part of the match, so there is nothing
3054 more to do. */
3055
3056 if (startoffset == linelength + endlinelength) break;
3057
3058 /* Otherwise, run a match from within the final line, and if found,
3059 loop for any that may follow. */
3060
3061 if (!match_patterns(ptr, length, options, startoffset, &mrc)) break;
3062
3063 /* The use of \K may make the end offset earlier than the start. In
3064 this situation, swap them round. */
3065
3066 if (offsets[0] > offsets[1])
3067 {
3068 PCRE2_SIZE temp = offsets[0];
3069 offsets[0] = offsets[1];
3070 offsets[1] = temp;
3071 }
3072
3073 FWRITE_IGNORE(ptr + endprevious, 1, offsets[0] - endprevious, stdout);
3074 print_match(ptr + offsets[0], offsets[1] - offsets[0]);
3075 }
3076
3077 /* In multiline mode, we may have already printed the complete line
3078 and its line-ending characters (if they matched the pattern), so there
3079 may be no more to print. */
3080
3081 plength = (int)((linelength + endlinelength) - endprevious);
3082 if (plength > 0) FWRITE_IGNORE(ptr + endprevious, 1, plength, stdout);
3083 }
3084
3085 /* Not colouring or multiline; no need to search for further matches. */
3086
3087 else FWRITE_IGNORE(ptr, 1, linelength + endlinelength, stdout);
3088 }
3089
3090 /* End of doing what has to be done for a match. If --line-buffered was
3091 given, flush the output. */
3092
3093 if (line_buffered) fflush(stdout);
3094 rc = 0; /* Had some success */
3095
3096 /* Remember where the last match happened for after_context. We remember
3097 where we are about to restart, and that line's number. */
3098
3099 lastmatchrestart = ptr + linelength + endlinelength;
3100 lastmatchnumber = linenumber + 1;
3101
3102 /* If a line was printed and we are now at the end of the file and the last
3103 line had no newline, output one. */
3104
3105 if (lines_printed && lastmatchrestart >= endptr && endlinelength == 0)
3106 write_final_newline();
3107 }
3108
3109 /* For a match in multiline inverted mode (which of course did not cause
3110 anything to be printed), we have to move on to the end of the match before
3111 proceeding. */
3112
3113 if (multiline && invert && match)
3114 {
3115 int ellength;
3116 char *endmatch = ptr + offsets[1];
3117 t = ptr;
3118 while (t < endmatch)
3119 {
3120 t = end_of_line(t, endptr, &ellength);
3121 if (t <= endmatch) linenumber++; else break;
3122 }
3123 endmatch = end_of_line(endmatch, endptr, &ellength);
3124 linelength = endmatch - ptr - ellength;
3125 }
3126
3127 /* Advance to after the newline and increment the line number. The file
3128 offset to the current line is maintained in filepos. */
3129
3130 END_ONE_MATCH:
3131 ptr += linelength + endlinelength;
3132 filepos += (int)(linelength + endlinelength);
3133 linenumber++;
3134
3135 /* If there was at least one match (or a non-match, as required) in the line,
3136 increment the count for the -m option. */
3137
3138 if (line_matched) count_matched_lines++;
3139
3140 /* If input is line buffered, and the buffer is not yet full, read another
3141 line and add it into the buffer. */
3142
3143 if (input_line_buffered && bufflength < (PCRE2_SIZE)bufsize)
3144 {
3145 int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
3146 bufflength += add;
3147 endptr += add;
3148 }
3149
3150 /* If we haven't yet reached the end of the file (the buffer is full), and
3151 the current point is in the top 1/3 of the buffer, slide the buffer down by
3152 1/3 and refill it. Before we do this, if some unprinted "after" lines are
3153 about to be lost, print them. */
3154
3155 if (bufflength >= (PCRE2_SIZE)bufsize && ptr > main_buffer + 2*bufthird)
3156 {
3157 if (after_context > 0 &&
3158 lastmatchnumber > 0 &&
3159 lastmatchrestart < main_buffer + bufthird)
3160 {
3161 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
3162 lastmatchnumber = 0; /* Indicates no after lines pending */
3163 }
3164
3165 /* Now do the shuffle */
3166
3167 (void)memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
3168 ptr -= bufthird;
3169
3170 bufflength = 2*bufthird + fill_buffer(handle, frtype,
3171 main_buffer + 2*bufthird, bufthird, input_line_buffered);
3172 endptr = main_buffer + bufflength;
3173
3174 /* Adjust any last match point */
3175
3176 if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
3177 }
3178 } /* Loop through the whole file */
3179
3180 /* End of file; print final "after" lines if wanted; do_after_lines sets
3181 hyphenpending if it prints something. */
3182
3183 if (only_matching_count == 0 && !(count_only|show_total_count))
3184 {
3185 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
3186 hyphenpending |= endhyphenpending;
3187 }
3188
3189 /* Print the file name if we are looking for those without matches and there
3190 were none. If we found a match, we won't have got this far. */
3191
3192 if (filenames == FN_NOMATCH_ONLY)
3193 {
3194 fprintf(stdout, "%s" STDOUT_NL, printname);
3195 return 0;
3196 }
3197
3198 /* Print the match count if wanted */
3199
3200 if (count_only && !quiet)
3201 {
3202 if (count > 0 || !omit_zero_count)
3203 {
3204 if (printname != NULL && filenames != FN_NONE)
3205 fprintf(stdout, "%s:", printname);
3206 fprintf(stdout, "%lu" STDOUT_NL, count);
3207 counts_printed++;
3208 }
3209 }
3210
3211 total_count += count; /* Can be set without count_only */
3212 return rc;
3213 }
3214
3215
3216
3217 /*************************************************
3218 * Grep a file or recurse into a directory *
3219 *************************************************/
3220
3221 /* Given a path name, if it's a directory, scan all the files if we are
3222 recursing; if it's a file, grep it.
3223
3224 Arguments:
3225 pathname the path to investigate
3226 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
3227 only_one_at_top TRUE if the path is the only one at toplevel
3228
3229 Returns: -1 the file/directory was skipped
3230 0 if there was at least one match
3231 1 if there were no matches
3232 2 there was some kind of error
3233
3234 However, file opening failures are suppressed if "silent" is set.
3235 */
3236
3237 static int
grep_or_recurse(char * pathname,BOOL dir_recurse,BOOL only_one_at_top)3238 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
3239 {
3240 int rc = 1;
3241 int frtype;
3242 void *handle;
3243 char *lastcomp;
3244 FILE *in = NULL; /* Ensure initialized */
3245
3246 #ifdef SUPPORT_LIBZ
3247 gzFile ingz = NULL;
3248 #endif
3249
3250 #ifdef SUPPORT_LIBBZ2
3251 BZFILE *inbz2 = NULL;
3252 #endif
3253
3254 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
3255 int pathlen;
3256 #endif
3257
3258 #if defined NATIVE_ZOS
3259 int zos_type;
3260 FILE *zos_test_file;
3261 #endif
3262
3263 /* If the file name is "-" we scan stdin */
3264
3265 if (strcmp(pathname, "-") == 0)
3266 {
3267 return pcre2grep(stdin, FR_PLAIN, stdin_name,
3268 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
3269 stdin_name : NULL);
3270 }
3271
3272 /* Inclusion and exclusion: --include-dir and --exclude-dir apply only to
3273 directories, whereas --include and --exclude apply to everything else. The test
3274 is against the final component of the path. */
3275
3276 lastcomp = strrchr(pathname, FILESEP);
3277 lastcomp = (lastcomp == NULL)? pathname : lastcomp + 1;
3278
3279 /* If the file is a directory, skip if not recursing or if explicitly excluded.
3280 Otherwise, scan the directory and recurse for each path within it. The scanning
3281 code is localized so it can be made system-specific. */
3282
3283
3284 /* For z/OS, determine the file type. */
3285
3286 #if defined NATIVE_ZOS
3287 zos_test_file = fopen(pathname,"rb");
3288
3289 if (zos_test_file == NULL)
3290 {
3291 if (!silent) fprintf(stderr, "pcre2grep: failed to test next file %s\n",
3292 pathname, strerror(errno));
3293 return -1;
3294 }
3295 zos_type = identifyzosfiletype (zos_test_file);
3296 fclose (zos_test_file);
3297
3298 /* Handle a PDS in separate code */
3299
3300 if (zos_type == __ZOS_PDS || zos_type == __ZOS_PDSE)
3301 {
3302 return travelonpdsdir (pathname, only_one_at_top);
3303 }
3304
3305 /* Deal with regular files in the normal way below. These types are:
3306 zos_type == __ZOS_PDS_MEMBER
3307 zos_type == __ZOS_PS
3308 zos_type == __ZOS_VSAM_KSDS
3309 zos_type == __ZOS_VSAM_ESDS
3310 zos_type == __ZOS_VSAM_RRDS
3311 */
3312
3313 /* Handle a z/OS directory using common code. */
3314
3315 else if (zos_type == __ZOS_HFS)
3316 {
3317 #endif /* NATIVE_ZOS */
3318
3319
3320 /* Handle directories: common code for all OS */
3321
3322 if (isdirectory(pathname))
3323 {
3324 if (dee_action == dee_SKIP ||
3325 !test_incexc(lastcomp, include_dir_patterns, exclude_dir_patterns))
3326 return -1;
3327
3328 if (dee_action == dee_RECURSE)
3329 {
3330 char buffer[FNBUFSIZ];
3331 char *nextfile;
3332 directory_type *dir = opendirectory(pathname);
3333
3334 if (dir == NULL)
3335 {
3336 if (!silent)
3337 fprintf(stderr, "pcre2grep: Failed to open directory %s: %s\n", pathname,
3338 strerror(errno));
3339 return 2;
3340 }
3341
3342 while ((nextfile = readdirectory(dir)) != NULL)
3343 {
3344 int frc;
3345 int fnlength = strlen(pathname) + strlen(nextfile) + 2;
3346 if (fnlength > FNBUFSIZ)
3347 {
3348 fprintf(stderr, "pcre2grep: recursive filename is too long\n");
3349 rc = 2;
3350 break;
3351 }
3352 sprintf(buffer, "%s%c%s", pathname, FILESEP, nextfile);
3353 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
3354 if (frc > 1) rc = frc;
3355 else if (frc == 0 && rc == 1) rc = 0;
3356 }
3357
3358 closedirectory(dir);
3359 return rc;
3360 }
3361 }
3362
3363 #ifdef WIN32
3364 if (iswild(pathname))
3365 {
3366 char buffer[1024];
3367 char *nextfile;
3368 char *name;
3369 directory_type *dir = opendirectory(pathname);
3370
3371 if (dir == NULL)
3372 return 0;
3373
3374 for (nextfile = name = pathname; *nextfile != 0; nextfile++)
3375 if (*nextfile == '/' || *nextfile == '\\')
3376 name = nextfile + 1;
3377 *name = 0;
3378
3379 while ((nextfile = readdirectory(dir)) != NULL)
3380 {
3381 int frc;
3382 sprintf(buffer, "%.512s%.128s", pathname, nextfile);
3383 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
3384 if (frc > 1) rc = frc;
3385 else if (frc == 0 && rc == 1) rc = 0;
3386 }
3387
3388 closedirectory(dir);
3389 return rc;
3390 }
3391 #endif
3392
3393 #if defined NATIVE_ZOS
3394 }
3395 #endif
3396
3397 /* If the file is not a directory, check for a regular file, and if it is not,
3398 skip it if that's been requested. Otherwise, check for an explicit inclusion or
3399 exclusion. */
3400
3401 else if (
3402 #if defined NATIVE_ZOS
3403 (zos_type == __ZOS_NOFILE && DEE_action == DEE_SKIP) ||
3404 #else /* all other OS */
3405 (!isregfile(pathname) && DEE_action == DEE_SKIP) ||
3406 #endif
3407 !test_incexc(lastcomp, include_patterns, exclude_patterns))
3408 return -1; /* File skipped */
3409
3410 /* Control reaches here if we have a regular file, or if we have a directory
3411 and recursion or skipping was not requested, or if we have anything else and
3412 skipping was not requested. The scan proceeds. If this is the first and only
3413 argument at top level, we don't show the file name, unless we are only showing
3414 the file name, or the filename was forced (-H). */
3415
3416 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
3417 pathlen = (int)(strlen(pathname));
3418 #endif
3419
3420 /* Open using zlib if it is supported and the file name ends with .gz. */
3421
3422 #ifdef SUPPORT_LIBZ
3423 if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
3424 {
3425 ingz = gzopen(pathname, "rb");
3426 if (ingz == NULL)
3427 {
3428 if (!silent)
3429 fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", pathname,
3430 strerror(errno));
3431 return 2;
3432 }
3433 handle = (void *)ingz;
3434 frtype = FR_LIBZ;
3435 }
3436 else
3437 #endif
3438
3439 /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
3440
3441 #ifdef SUPPORT_LIBBZ2
3442 if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
3443 {
3444 inbz2 = BZ2_bzopen(pathname, "rb");
3445 handle = (void *)inbz2;
3446 frtype = FR_LIBBZ2;
3447 }
3448 else
3449 #endif
3450
3451 /* Otherwise use plain fopen(). The label is so that we can come back here if
3452 an attempt to read a .bz2 file indicates that it really is a plain file. */
3453
3454 #ifdef SUPPORT_LIBBZ2
3455 PLAIN_FILE:
3456 #endif
3457 {
3458 in = fopen(pathname, "rb");
3459 handle = (void *)in;
3460 frtype = FR_PLAIN;
3461 }
3462
3463 /* All the opening methods return errno when they fail. */
3464
3465 if (handle == NULL)
3466 {
3467 if (!silent)
3468 fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", pathname,
3469 strerror(errno));
3470 return 2;
3471 }
3472
3473 /* Now grep the file */
3474
3475 rc = pcre2grep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
3476 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
3477
3478 /* Close in an appropriate manner. */
3479
3480 #ifdef SUPPORT_LIBZ
3481 if (frtype == FR_LIBZ)
3482 gzclose(ingz);
3483 else
3484 #endif
3485
3486 /* If it is a .bz2 file and the result is 3, it means that the first attempt to
3487 read failed. If the error indicates that the file isn't in fact bzipped, try
3488 again as a normal file. */
3489
3490 #ifdef SUPPORT_LIBBZ2
3491 if (frtype == FR_LIBBZ2)
3492 {
3493 if (rc == 3)
3494 {
3495 int errnum;
3496 const char *err = BZ2_bzerror(inbz2, &errnum);
3497 if (errnum == BZ_DATA_ERROR_MAGIC)
3498 {
3499 BZ2_bzclose(inbz2);
3500 goto PLAIN_FILE;
3501 }
3502 else if (!silent)
3503 fprintf(stderr, "pcre2grep: Failed to read %s using bzlib: %s\n",
3504 pathname, err);
3505 rc = 2; /* The normal "something went wrong" code */
3506 }
3507 BZ2_bzclose(inbz2);
3508 }
3509 else
3510 #endif
3511
3512 /* Normal file close */
3513
3514 fclose(in);
3515
3516 /* Pass back the yield from pcre2grep(). */
3517
3518 return rc;
3519 }
3520
3521
3522
3523 /*************************************************
3524 * Handle a single-letter, no data option *
3525 *************************************************/
3526
3527 static int
handle_option(int letter,int options)3528 handle_option(int letter, int options)
3529 {
3530 switch(letter)
3531 {
3532 case N_FOFFSETS: file_offsets = TRUE; break;
3533 case N_HELP: help(); pcre2grep_exit(0); break; /* Stops compiler warning */
3534 case N_LBUFFER: line_buffered = TRUE; break;
3535 case N_LOFFSETS: line_offsets = number = TRUE; break;
3536 case N_NOJIT: use_jit = FALSE; break;
3537 case 'a': binary_files = BIN_TEXT; break;
3538 case 'c': count_only = TRUE; break;
3539 case 'F': options |= PCRE2_LITERAL; break;
3540 case 'H': filenames = FN_FORCE; break;
3541 case 'I': binary_files = BIN_NOMATCH; break;
3542 case 'h': filenames = FN_NONE; break;
3543 case 'i': options |= PCRE2_CASELESS; break;
3544 case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
3545 case 'L': filenames = FN_NOMATCH_ONLY; break;
3546 case 'M': multiline = TRUE; options |= PCRE2_MULTILINE|PCRE2_FIRSTLINE; break;
3547 case 'n': number = TRUE; break;
3548
3549 case 'o':
3550 only_matching_last = add_number(0, only_matching_last);
3551 if (only_matching == NULL) only_matching = only_matching_last;
3552 break;
3553
3554 case 'q': quiet = TRUE; break;
3555 case 'r': dee_action = dee_RECURSE; break;
3556 case 's': silent = TRUE; break;
3557 case 't': show_total_count = TRUE; break;
3558 case 'u': options |= PCRE2_UTF; utf = TRUE; break;
3559 case 'U': options |= PCRE2_UTF|PCRE2_MATCH_INVALID_UTF; utf = TRUE; break;
3560 case 'v': invert = TRUE; break;
3561 case 'w': extra_options |= PCRE2_EXTRA_MATCH_WORD; break;
3562 case 'x': extra_options |= PCRE2_EXTRA_MATCH_LINE; break;
3563
3564 case 'V':
3565 {
3566 unsigned char buffer[128];
3567 (void)pcre2_config(PCRE2_CONFIG_VERSION, buffer);
3568 fprintf(stdout, "pcre2grep version %s" STDOUT_NL, buffer);
3569 }
3570 pcre2grep_exit(0);
3571 break;
3572
3573 default:
3574 fprintf(stderr, "pcre2grep: Unknown option -%c\n", letter);
3575 pcre2grep_exit(usage(2));
3576 }
3577
3578 return options;
3579 }
3580
3581
3582
3583 /*************************************************
3584 * Construct printed ordinal *
3585 *************************************************/
3586
3587 /* This turns a number into "1st", "3rd", etc. */
3588
3589 static char *
ordin(int n)3590 ordin(int n)
3591 {
3592 static char buffer[14];
3593 char *p = buffer;
3594 sprintf(p, "%d", n);
3595 while (*p != 0) p++;
3596 n %= 100;
3597 if (n >= 11 && n <= 13) n = 0;
3598 switch (n%10)
3599 {
3600 case 1: strcpy(p, "st"); break;
3601 case 2: strcpy(p, "nd"); break;
3602 case 3: strcpy(p, "rd"); break;
3603 default: strcpy(p, "th"); break;
3604 }
3605 return buffer;
3606 }
3607
3608
3609
3610 /*************************************************
3611 * Compile a single pattern *
3612 *************************************************/
3613
3614 /* Do nothing if the pattern has already been compiled. This is the case for
3615 include/exclude patterns read from a file.
3616
3617 When the -F option has been used, each "pattern" may be a list of strings,
3618 separated by line breaks. They will be matched literally. We split such a
3619 string and compile the first substring, inserting an additional block into the
3620 pattern chain.
3621
3622 Arguments:
3623 p points to the pattern block
3624 options the PCRE options
3625 fromfile TRUE if the pattern was read from a file
3626 fromtext file name or identifying text (e.g. "include")
3627 count 0 if this is the only command line pattern, or
3628 number of the command line pattern, or
3629 linenumber for a pattern from a file
3630
3631 Returns: TRUE on success, FALSE after an error
3632 */
3633
3634 static BOOL
compile_pattern(patstr * p,int options,int fromfile,const char * fromtext,int count)3635 compile_pattern(patstr *p, int options, int fromfile, const char *fromtext,
3636 int count)
3637 {
3638 char *ps;
3639 int errcode;
3640 PCRE2_SIZE patlen, erroffset;
3641 PCRE2_UCHAR errmessbuffer[ERRBUFSIZ];
3642
3643 if (p->compiled != NULL) return TRUE;
3644 ps = p->string;
3645 patlen = p->length;
3646
3647 if ((options & PCRE2_LITERAL) != 0)
3648 {
3649 int ellength;
3650 char *eop = ps + patlen;
3651 char *pe = end_of_line(ps, eop, &ellength);
3652
3653 if (ellength != 0)
3654 {
3655 patlen = pe - ps - ellength;
3656 if (add_pattern(pe, p->length-patlen-ellength, p) == NULL) return FALSE;
3657 }
3658 }
3659
3660 p->compiled = pcre2_compile((PCRE2_SPTR)ps, patlen, options, &errcode,
3661 &erroffset, compile_context);
3662
3663 /* Handle successful compile. Try JIT-compiling if supported and enabled. We
3664 ignore any JIT compiler errors, relying falling back to interpreting if
3665 anything goes wrong with JIT. */
3666
3667 if (p->compiled != NULL)
3668 {
3669 #ifdef SUPPORT_PCRE2GREP_JIT
3670 if (use_jit) (void)pcre2_jit_compile(p->compiled, PCRE2_JIT_COMPLETE);
3671 #endif
3672 return TRUE;
3673 }
3674
3675 /* Handle compile errors */
3676
3677 if (erroffset > patlen) erroffset = patlen;
3678 pcre2_get_error_message(errcode, errmessbuffer, sizeof(errmessbuffer));
3679
3680 if (fromfile)
3681 {
3682 fprintf(stderr, "pcre2grep: Error in regex in line %d of %s "
3683 "at offset %d: %s\n", count, fromtext, (int)erroffset, errmessbuffer);
3684 }
3685 else
3686 {
3687 if (count == 0)
3688 fprintf(stderr, "pcre2grep: Error in %s regex at offset %d: %s\n",
3689 fromtext, (int)erroffset, errmessbuffer);
3690 else
3691 fprintf(stderr, "pcre2grep: Error in %s %s regex at offset %d: %s\n",
3692 ordin(count), fromtext, (int)erroffset, errmessbuffer);
3693 }
3694
3695 return FALSE;
3696 }
3697
3698
3699
3700 /*************************************************
3701 * Read and compile a file of patterns *
3702 *************************************************/
3703
3704 /* This is used for --filelist, --include-from, and --exclude-from.
3705
3706 Arguments:
3707 name the name of the file; "-" is stdin
3708 patptr pointer to the pattern chain anchor
3709 patlastptr pointer to the last pattern pointer
3710
3711 Returns: TRUE if all went well
3712 */
3713
3714 static BOOL
read_pattern_file(char * name,patstr ** patptr,patstr ** patlastptr)3715 read_pattern_file(char *name, patstr **patptr, patstr **patlastptr)
3716 {
3717 int linenumber = 0;
3718 PCRE2_SIZE patlen;
3719 FILE *f;
3720 const char *filename;
3721 char buffer[MAXPATLEN+20];
3722
3723 if (strcmp(name, "-") == 0)
3724 {
3725 f = stdin;
3726 filename = stdin_name;
3727 }
3728 else
3729 {
3730 f = fopen(name, "r");
3731 if (f == NULL)
3732 {
3733 fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", name, strerror(errno));
3734 return FALSE;
3735 }
3736 filename = name;
3737 }
3738
3739 while ((patlen = read_one_line(buffer, sizeof(buffer), f)) > 0)
3740 {
3741 while (patlen > 0 && isspace((unsigned char)(buffer[patlen-1]))) patlen--;
3742 linenumber++;
3743 if (patlen == 0) continue; /* Skip blank lines */
3744
3745 /* Note: this call to add_pattern() puts a pointer to the local variable
3746 "buffer" into the pattern chain. However, that pointer is used only when
3747 compiling the pattern, which happens immediately below, so we flatten it
3748 afterwards, as a precaution against any later code trying to use it. */
3749
3750 *patlastptr = add_pattern(buffer, patlen, *patlastptr);
3751 if (*patlastptr == NULL)
3752 {
3753 if (f != stdin) fclose(f);
3754 return FALSE;
3755 }
3756 if (*patptr == NULL) *patptr = *patlastptr;
3757
3758 /* This loop is needed because compiling a "pattern" when -F is set may add
3759 on additional literal patterns if the original contains a newline. In the
3760 common case, it never will, because read_one_line() stops at a newline.
3761 However, the -N option can be used to give pcre2grep a different newline
3762 setting. */
3763
3764 for(;;)
3765 {
3766 if (!compile_pattern(*patlastptr, pcre2_options, TRUE, filename,
3767 linenumber))
3768 {
3769 if (f != stdin) fclose(f);
3770 return FALSE;
3771 }
3772 (*patlastptr)->string = NULL; /* Insurance */
3773 if ((*patlastptr)->next == NULL) break;
3774 *patlastptr = (*patlastptr)->next;
3775 }
3776 }
3777
3778 if (f != stdin) fclose(f);
3779 return TRUE;
3780 }
3781
3782
3783
3784 /*************************************************
3785 * Main program *
3786 *************************************************/
3787
3788 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
3789
3790 int
main(int argc,char ** argv)3791 main(int argc, char **argv)
3792 {
3793 int i, j;
3794 int rc = 1;
3795 BOOL only_one_at_top;
3796 patstr *cp;
3797 fnstr *fn;
3798 omstr *om;
3799 const char *locale_from = "--locale";
3800
3801 #ifdef SUPPORT_PCRE2GREP_JIT
3802 pcre2_jit_stack *jit_stack = NULL;
3803 #endif
3804
3805 /* In Windows, stdout is set up as a text stream, which means that \n is
3806 converted to \r\n. This causes output lines that are copied from the input to
3807 change from ....\r\n to ....\r\r\n, which is not right. We therefore ensure
3808 that stdout is a binary stream. Note that this means all other output to stdout
3809 must use STDOUT_NL to terminate lines. */
3810
3811 #ifdef WIN32
3812 _setmode(_fileno(stdout), _O_BINARY);
3813 #endif
3814
3815 /* Process the options */
3816
3817 for (i = 1; i < argc; i++)
3818 {
3819 option_item *op = NULL;
3820 char *option_data = (char *)""; /* default to keep compiler happy */
3821 BOOL longop;
3822 BOOL longopwasequals = FALSE;
3823
3824 if (argv[i][0] != '-') break;
3825
3826 /* If we hit an argument that is just "-", it may be a reference to STDIN,
3827 but only if we have previously had -e or -f to define the patterns. */
3828
3829 if (argv[i][1] == 0)
3830 {
3831 if (pattern_files != NULL || patterns != NULL) break;
3832 else pcre2grep_exit(usage(2));
3833 }
3834
3835 /* Handle a long name option, or -- to terminate the options */
3836
3837 if (argv[i][1] == '-')
3838 {
3839 char *arg = argv[i] + 2;
3840 char *argequals = strchr(arg, '=');
3841
3842 if (*arg == 0) /* -- terminates options */
3843 {
3844 i++;
3845 break; /* out of the options-handling loop */
3846 }
3847
3848 longop = TRUE;
3849
3850 /* Some long options have data that follows after =, for example file=name.
3851 Some options have variations in the long name spelling: specifically, we
3852 allow "regexp" because GNU grep allows it, though I personally go along
3853 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
3854 These options are entered in the table as "regex(p)". Options can be in
3855 both these categories. */
3856
3857 for (op = optionlist; op->one_char != 0; op++)
3858 {
3859 char *opbra = strchr(op->long_name, '(');
3860 char *equals = strchr(op->long_name, '=');
3861
3862 /* Handle options with only one spelling of the name */
3863
3864 if (opbra == NULL) /* Does not contain '(' */
3865 {
3866 if (equals == NULL) /* Not thing=data case */
3867 {
3868 if (strcmp(arg, op->long_name) == 0) break;
3869 }
3870 else /* Special case xxx=data */
3871 {
3872 int oplen = (int)(equals - op->long_name);
3873 int arglen = (argequals == NULL)?
3874 (int)strlen(arg) : (int)(argequals - arg);
3875 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
3876 {
3877 option_data = arg + arglen;
3878 if (*option_data == '=')
3879 {
3880 option_data++;
3881 longopwasequals = TRUE;
3882 }
3883 break;
3884 }
3885 }
3886 }
3887
3888 /* Handle options with an alternate spelling of the name */
3889
3890 else
3891 {
3892 char buff1[24];
3893 char buff2[24];
3894 int ret;
3895
3896 int baselen = (int)(opbra - op->long_name);
3897 int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
3898 int arglen = (argequals == NULL || equals == NULL)?
3899 (int)strlen(arg) : (int)(argequals - arg);
3900
3901 if ((ret = snprintf(buff1, sizeof(buff1), "%.*s", baselen, op->long_name),
3902 ret < 0 || ret > (int)sizeof(buff1)) ||
3903 (ret = snprintf(buff2, sizeof(buff2), "%s%.*s", buff1,
3904 fulllen - baselen - 2, opbra + 1),
3905 ret < 0 || ret > (int)sizeof(buff2)))
3906 {
3907 fprintf(stderr, "pcre2grep: Buffer overflow when parsing %s option\n",
3908 op->long_name);
3909 pcre2grep_exit(2);
3910 }
3911
3912 if (strncmp(arg, buff1, arglen) == 0 ||
3913 strncmp(arg, buff2, arglen) == 0)
3914 {
3915 if (equals != NULL && argequals != NULL)
3916 {
3917 option_data = argequals;
3918 if (*option_data == '=')
3919 {
3920 option_data++;
3921 longopwasequals = TRUE;
3922 }
3923 }
3924 break;
3925 }
3926 }
3927 }
3928
3929 if (op->one_char == 0)
3930 {
3931 fprintf(stderr, "pcre2grep: Unknown option %s\n", argv[i]);
3932 pcre2grep_exit(usage(2));
3933 }
3934 }
3935
3936 /* Jeffrey Friedl's debugging harness uses these additional options which
3937 are not in the right form for putting in the option table because they use
3938 only one hyphen, yet are more than one character long. By putting them
3939 separately here, they will not get displayed as part of the help() output,
3940 but I don't think Jeffrey will care about that. */
3941
3942 #ifdef JFRIEDL_DEBUG
3943 else if (strcmp(argv[i], "-pre") == 0) {
3944 jfriedl_prefix = argv[++i];
3945 continue;
3946 } else if (strcmp(argv[i], "-post") == 0) {
3947 jfriedl_postfix = argv[++i];
3948 continue;
3949 } else if (strcmp(argv[i], "-XT") == 0) {
3950 sscanf(argv[++i], "%d", &jfriedl_XT);
3951 continue;
3952 } else if (strcmp(argv[i], "-XR") == 0) {
3953 sscanf(argv[++i], "%d", &jfriedl_XR);
3954 continue;
3955 }
3956 #endif
3957
3958
3959 /* One-char options; many that have no data may be in a single argument; we
3960 continue till we hit the last one or one that needs data. */
3961
3962 else
3963 {
3964 char *s = argv[i] + 1;
3965 longop = FALSE;
3966
3967 while (*s != 0)
3968 {
3969 for (op = optionlist; op->one_char != 0; op++)
3970 {
3971 if (*s == op->one_char) break;
3972 }
3973 if (op->one_char == 0)
3974 {
3975 fprintf(stderr, "pcre2grep: Unknown option letter '%c' in \"%s\"\n",
3976 *s, argv[i]);
3977 pcre2grep_exit(usage(2));
3978 }
3979
3980 option_data = s+1;
3981
3982 /* Break out if this is the last character in the string; it's handled
3983 below like a single multi-char option. */
3984
3985 if (*option_data == 0) break;
3986
3987 /* Check for a single-character option that has data: OP_OP_NUMBER(S)
3988 are used for ones that either have a numerical number or defaults, i.e.
3989 the data is optional. If a digit follows, there is data; if not, carry on
3990 with other single-character options in the same string. */
3991
3992 if (op->type == OP_OP_NUMBER || op->type == OP_OP_NUMBERS)
3993 {
3994 if (isdigit((unsigned char)s[1])) break;
3995 }
3996 else /* Check for an option with data */
3997 {
3998 if (op->type != OP_NODATA) break;
3999 }
4000
4001 /* Handle a single-character option with no data, then loop for the
4002 next character in the string. */
4003
4004 pcre2_options = handle_option(*s++, pcre2_options);
4005 }
4006 }
4007
4008 /* At this point we should have op pointing to a matched option. If the type
4009 is NO_DATA, it means that there is no data, and the option might set
4010 something in the PCRE options. */
4011
4012 if (op->type == OP_NODATA)
4013 {
4014 pcre2_options = handle_option(op->one_char, pcre2_options);
4015 continue;
4016 }
4017
4018 /* If the option type is OP_OP_STRING or OP_OP_NUMBER(S), it's an option that
4019 either has a value or defaults to something. It cannot have data in a
4020 separate item. At the moment, the only such options are "colo(u)r",
4021 "only-matching", and Jeffrey Friedl's special -S debugging option. */
4022
4023 if (*option_data == 0 &&
4024 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER ||
4025 op->type == OP_OP_NUMBERS))
4026 {
4027 switch (op->one_char)
4028 {
4029 case N_COLOUR:
4030 colour_option = "auto";
4031 break;
4032
4033 case 'o':
4034 only_matching_last = add_number(0, only_matching_last);
4035 if (only_matching == NULL) only_matching = only_matching_last;
4036 break;
4037
4038 #ifdef JFRIEDL_DEBUG
4039 case 'S':
4040 S_arg = 0;
4041 break;
4042 #endif
4043 }
4044 continue;
4045 }
4046
4047 /* Otherwise, find the data string for the option. */
4048
4049 if (*option_data == 0)
4050 {
4051 if (i >= argc - 1 || longopwasequals)
4052 {
4053 fprintf(stderr, "pcre2grep: Data missing after %s\n", argv[i]);
4054 pcre2grep_exit(usage(2));
4055 }
4056 option_data = argv[++i];
4057 }
4058
4059 /* If the option type is OP_OP_NUMBERS, the value is a number that is to be
4060 added to a chain of numbers. */
4061
4062 if (op->type == OP_OP_NUMBERS)
4063 {
4064 unsigned long int n = decode_number(option_data, op, longop);
4065 omdatastr *omd = (omdatastr *)op->dataptr;
4066 *(omd->lastptr) = add_number((int)n, *(omd->lastptr));
4067 if (*(omd->anchor) == NULL) *(omd->anchor) = *(omd->lastptr);
4068 }
4069
4070 /* If the option type is OP_PATLIST, it's the -e option, or one of the
4071 include/exclude options, which can be called multiple times to create lists
4072 of patterns. */
4073
4074 else if (op->type == OP_PATLIST)
4075 {
4076 patdatastr *pd = (patdatastr *)op->dataptr;
4077 *(pd->lastptr) = add_pattern(option_data, (PCRE2_SIZE)strlen(option_data),
4078 *(pd->lastptr));
4079 if (*(pd->lastptr) == NULL) goto EXIT2;
4080 if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr);
4081 }
4082
4083 /* If the option type is OP_FILELIST, it's one of the options that names a
4084 file. */
4085
4086 else if (op->type == OP_FILELIST)
4087 {
4088 fndatastr *fd = (fndatastr *)op->dataptr;
4089 fn = (fnstr *)malloc(sizeof(fnstr));
4090 if (fn == NULL)
4091 {
4092 fprintf(stderr, "pcre2grep: malloc failed\n");
4093 goto EXIT2;
4094 }
4095 fn->next = NULL;
4096 fn->name = option_data;
4097 if (*(fd->anchor) == NULL)
4098 *(fd->anchor) = fn;
4099 else
4100 (*(fd->lastptr))->next = fn;
4101 *(fd->lastptr) = fn;
4102 }
4103
4104 /* Handle OP_BINARY_FILES */
4105
4106 else if (op->type == OP_BINFILES)
4107 {
4108 if (strcmp(option_data, "binary") == 0)
4109 binary_files = BIN_BINARY;
4110 else if (strcmp(option_data, "without-match") == 0)
4111 binary_files = BIN_NOMATCH;
4112 else if (strcmp(option_data, "text") == 0)
4113 binary_files = BIN_TEXT;
4114 else
4115 {
4116 fprintf(stderr, "pcre2grep: unknown value \"%s\" for binary-files\n",
4117 option_data);
4118 pcre2grep_exit(usage(2));
4119 }
4120 }
4121
4122 /* Otherwise, deal with a single string or numeric data value. */
4123
4124 else if (op->type != OP_NUMBER && op->type != OP_U32NUMBER &&
4125 op->type != OP_OP_NUMBER && op->type != OP_SIZE)
4126 {
4127 *((char **)op->dataptr) = option_data;
4128 }
4129 else
4130 {
4131 unsigned long int n = decode_number(option_data, op, longop);
4132 if (op->type == OP_U32NUMBER) *((uint32_t *)op->dataptr) = n;
4133 else if (op->type == OP_SIZE) *((PCRE2_SIZE *)op->dataptr) = n;
4134 else *((int *)op->dataptr) = n;
4135 }
4136 }
4137
4138 /* Options have been decoded. If -C was used, its value is used as a default
4139 for -A and -B. */
4140
4141 if (both_context > 0)
4142 {
4143 if (after_context == 0) after_context = both_context;
4144 if (before_context == 0) before_context = both_context;
4145 }
4146
4147 /* Only one of --only-matching, --output, --file-offsets, or --line-offsets is
4148 permitted. They display, each in their own way, only the data that has matched.
4149 */
4150
4151 only_matching_count = (only_matching != NULL) + (output_text != NULL) +
4152 file_offsets + line_offsets;
4153
4154 if (only_matching_count > 1)
4155 {
4156 fprintf(stderr, "pcre2grep: Cannot mix --only-matching, --output, "
4157 "--file-offsets and/or --line-offsets\n");
4158 pcre2grep_exit(usage(2));
4159 }
4160
4161
4162 /* Check that there is a big enough ovector for all -o settings. */
4163
4164 for (om = only_matching; om != NULL; om = om->next)
4165 {
4166 int n = om->groupnum;
4167 if (n > (int)capture_max)
4168 {
4169 fprintf(stderr, "pcre2grep: Requested group %d cannot be captured.\n", n);
4170 fprintf(stderr, "pcre2grep: Use --om-capture to increase the size of the capture vector.\n");
4171 goto EXIT2;
4172 }
4173 }
4174
4175 /* Check the text supplied to --output for errors. */
4176
4177 if (output_text != NULL &&
4178 !syntax_check_output_text((PCRE2_SPTR)output_text, FALSE))
4179 goto EXIT2;
4180
4181 /* Set up default compile and match contexts and a match data block. */
4182
4183 offset_size = capture_max + 1;
4184 compile_context = pcre2_compile_context_create(NULL);
4185 match_context = pcre2_match_context_create(NULL);
4186 match_data = pcre2_match_data_create(offset_size, NULL);
4187 offsets = pcre2_get_ovector_pointer(match_data);
4188
4189 /* If string (script) callouts are supported, set up the callout processing
4190 function. */
4191
4192 #ifdef SUPPORT_PCRE2GREP_CALLOUT
4193 pcre2_set_callout(match_context, pcre2grep_callout, NULL);
4194 #endif
4195
4196 /* Put limits into the match data block. */
4197
4198 if (heap_limit != PCRE2_UNSET) pcre2_set_heap_limit(match_context, heap_limit);
4199 if (match_limit > 0) pcre2_set_match_limit(match_context, match_limit);
4200 if (depth_limit > 0) pcre2_set_depth_limit(match_context, depth_limit);
4201
4202 /* If a locale has not been provided as an option, see if the LC_CTYPE or
4203 LC_ALL environment variable is set, and if so, use it. */
4204
4205 if (locale == NULL)
4206 {
4207 locale = getenv("LC_ALL");
4208 locale_from = "LC_ALL";
4209 }
4210
4211 if (locale == NULL)
4212 {
4213 locale = getenv("LC_CTYPE");
4214 locale_from = "LC_CTYPE";
4215 }
4216
4217 /* If a locale is set, use it to generate the tables the PCRE needs. Passing
4218 NULL to pcre2_maketables() means that malloc() is used to get the memory. */
4219
4220 if (locale != NULL)
4221 {
4222 if (setlocale(LC_CTYPE, locale) == NULL)
4223 {
4224 fprintf(stderr, "pcre2grep: Failed to set locale %s (obtained from %s)\n",
4225 locale, locale_from);
4226 goto EXIT2;
4227 }
4228 character_tables = pcre2_maketables(NULL);
4229 pcre2_set_character_tables(compile_context, character_tables);
4230 }
4231
4232 /* Sort out colouring */
4233
4234 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
4235 {
4236 if (strcmp(colour_option, "always") == 0)
4237 #ifdef WIN32
4238 do_ansi = !is_stdout_tty(),
4239 #endif
4240 do_colour = TRUE;
4241 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
4242 else
4243 {
4244 fprintf(stderr, "pcre2grep: Unknown colour setting \"%s\"\n",
4245 colour_option);
4246 goto EXIT2;
4247 }
4248 if (do_colour)
4249 {
4250 char *cs = getenv("PCRE2GREP_COLOUR");
4251 if (cs == NULL) cs = getenv("PCRE2GREP_COLOR");
4252 if (cs == NULL) cs = getenv("PCREGREP_COLOUR");
4253 if (cs == NULL) cs = getenv("PCREGREP_COLOR");
4254 if (cs == NULL) cs = parse_grep_colors(getenv("GREP_COLORS"));
4255 if (cs == NULL) cs = getenv("GREP_COLOR");
4256 if (cs != NULL)
4257 {
4258 if (strspn(cs, ";0123456789") == strlen(cs)) colour_string = cs;
4259 }
4260 #ifdef WIN32
4261 init_colour_output();
4262 #endif
4263 }
4264 }
4265
4266 /* Sort out a newline setting. */
4267
4268 if (newline_arg != NULL)
4269 {
4270 for (endlinetype = 1; endlinetype < (int)(sizeof(newlines)/sizeof(char *));
4271 endlinetype++)
4272 {
4273 if (strcmpic(newline_arg, newlines[endlinetype]) == 0) break;
4274 }
4275 if (endlinetype < (int)(sizeof(newlines)/sizeof(char *)))
4276 pcre2_set_newline(compile_context, endlinetype);
4277 else
4278 {
4279 fprintf(stderr, "pcre2grep: Invalid newline specifier \"%s\"\n",
4280 newline_arg);
4281 goto EXIT2;
4282 }
4283 }
4284
4285 /* Find default newline convention */
4286
4287 else
4288 {
4289 (void)pcre2_config(PCRE2_CONFIG_NEWLINE, &endlinetype);
4290 }
4291
4292 /* Interpret the text values for -d and -D */
4293
4294 if (dee_option != NULL)
4295 {
4296 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
4297 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
4298 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
4299 else
4300 {
4301 fprintf(stderr, "pcre2grep: Invalid value \"%s\" for -d\n", dee_option);
4302 goto EXIT2;
4303 }
4304 }
4305
4306 if (DEE_option != NULL)
4307 {
4308 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
4309 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
4310 else
4311 {
4312 fprintf(stderr, "pcre2grep: Invalid value \"%s\" for -D\n", DEE_option);
4313 goto EXIT2;
4314 }
4315 }
4316
4317 /* Set the extra options */
4318
4319 (void)pcre2_set_compile_extra_options(compile_context, extra_options);
4320
4321 /* Check the values for Jeffrey Friedl's debugging options. */
4322
4323 #ifdef JFRIEDL_DEBUG
4324 if (S_arg > 9)
4325 {
4326 fprintf(stderr, "pcre2grep: bad value for -S option\n");
4327 return 2;
4328 }
4329 if (jfriedl_XT != 0 || jfriedl_XR != 0)
4330 {
4331 if (jfriedl_XT == 0) jfriedl_XT = 1;
4332 if (jfriedl_XR == 0) jfriedl_XR = 1;
4333 }
4334 #endif
4335
4336 /* If use_jit is set, check whether JIT is available. If not, do not try
4337 to use JIT. */
4338
4339 if (use_jit)
4340 {
4341 uint32_t answer;
4342 (void)pcre2_config(PCRE2_CONFIG_JIT, &answer);
4343 if (!answer) use_jit = FALSE;
4344 }
4345
4346 /* Get memory for the main buffer. */
4347
4348 if (bufthird <= 0)
4349 {
4350 fprintf(stderr, "pcre2grep: --buffer-size must be greater than zero\n");
4351 goto EXIT2;
4352 }
4353
4354 bufsize = 3*bufthird;
4355 main_buffer = (char *)malloc(bufsize);
4356
4357 if (main_buffer == NULL)
4358 {
4359 fprintf(stderr, "pcre2grep: malloc failed\n");
4360 goto EXIT2;
4361 }
4362
4363 /* If no patterns were provided by -e, and there are no files provided by -f,
4364 the first argument is the one and only pattern, and it must exist. */
4365
4366 if (patterns == NULL && pattern_files == NULL)
4367 {
4368 if (i >= argc) return usage(2);
4369 patterns = patterns_last = add_pattern(argv[i], (PCRE2_SIZE)strlen(argv[i]),
4370 NULL);
4371 i++;
4372 if (patterns == NULL) goto EXIT2;
4373 }
4374
4375 /* Compile the patterns that were provided on the command line, either by
4376 multiple uses of -e or as a single unkeyed pattern. We cannot do this until
4377 after all the command-line options are read so that we know which PCRE options
4378 to use. When -F is used, compile_pattern() may add another block into the
4379 chain, so we must not access the next pointer till after the compile. */
4380
4381 for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
4382 {
4383 if (!compile_pattern(cp, pcre2_options, FALSE, "command-line",
4384 (j == 1 && patterns->next == NULL)? 0 : j))
4385 goto EXIT2;
4386 }
4387
4388 /* Read and compile the regular expressions that are provided in files. */
4389
4390 for (fn = pattern_files; fn != NULL; fn = fn->next)
4391 {
4392 if (!read_pattern_file(fn->name, &patterns, &patterns_last)) goto EXIT2;
4393 }
4394
4395 /* Unless JIT has been explicitly disabled, arrange a stack for it to use. */
4396
4397 #ifdef SUPPORT_PCRE2GREP_JIT
4398 if (use_jit)
4399 {
4400 jit_stack = pcre2_jit_stack_create(32*1024, 1024*1024, NULL);
4401 if (jit_stack != NULL )
4402 pcre2_jit_stack_assign(match_context, NULL, jit_stack);
4403 }
4404 #endif
4405
4406 /* -F, -w, and -x do not apply to include or exclude patterns, so we must
4407 adjust the options. */
4408
4409 pcre2_options &= ~PCRE2_LITERAL;
4410 (void)pcre2_set_compile_extra_options(compile_context, 0);
4411
4412 /* If there are include or exclude patterns read from the command line, compile
4413 them. */
4414
4415 for (j = 0; j < 4; j++)
4416 {
4417 int k;
4418 for (k = 1, cp = *(incexlist[j]); cp != NULL; k++, cp = cp->next)
4419 {
4420 if (!compile_pattern(cp, pcre2_options, FALSE, incexname[j],
4421 (k == 1 && cp->next == NULL)? 0 : k))
4422 goto EXIT2;
4423 }
4424 }
4425
4426 /* Read and compile include/exclude patterns from files. */
4427
4428 for (fn = include_from; fn != NULL; fn = fn->next)
4429 {
4430 if (!read_pattern_file(fn->name, &include_patterns, &include_patterns_last))
4431 goto EXIT2;
4432 }
4433
4434 for (fn = exclude_from; fn != NULL; fn = fn->next)
4435 {
4436 if (!read_pattern_file(fn->name, &exclude_patterns, &exclude_patterns_last))
4437 goto EXIT2;
4438 }
4439
4440 /* If there are no files that contain lists of files to search, and there are
4441 no file arguments, search stdin, and then exit. */
4442
4443 if (file_lists == NULL && i >= argc)
4444 {
4445 rc = pcre2grep(stdin, FR_PLAIN, stdin_name,
4446 (filenames > FN_DEFAULT)? stdin_name : NULL);
4447 goto EXIT;
4448 }
4449
4450 /* If any files that contains a list of files to search have been specified,
4451 read them line by line and search the given files. */
4452
4453 for (fn = file_lists; fn != NULL; fn = fn->next)
4454 {
4455 char buffer[FNBUFSIZ];
4456 FILE *fl;
4457 if (strcmp(fn->name, "-") == 0) fl = stdin; else
4458 {
4459 fl = fopen(fn->name, "rb");
4460 if (fl == NULL)
4461 {
4462 fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", fn->name,
4463 strerror(errno));
4464 goto EXIT2;
4465 }
4466 }
4467 while (fgets(buffer, sizeof(buffer), fl) != NULL)
4468 {
4469 int frc;
4470 char *end = buffer + (int)strlen(buffer);
4471 while (end > buffer && isspace(end[-1])) end--;
4472 *end = 0;
4473 if (*buffer != 0)
4474 {
4475 frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);
4476 if (frc > 1) rc = frc;
4477 else if (frc == 0 && rc == 1) rc = 0;
4478 }
4479 }
4480 if (fl != stdin) fclose(fl);
4481 }
4482
4483 /* After handling file-list, work through remaining arguments. Pass in the fact
4484 that there is only one argument at top level - this suppresses the file name if
4485 the argument is not a directory and filenames are not otherwise forced. */
4486
4487 only_one_at_top = i == argc - 1 && file_lists == NULL;
4488
4489 for (; i < argc; i++)
4490 {
4491 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
4492 only_one_at_top);
4493 if (frc > 1) rc = frc;
4494 else if (frc == 0 && rc == 1) rc = 0;
4495 }
4496
4497 #ifdef SUPPORT_PCRE2GREP_CALLOUT
4498 /* If separating builtin echo callouts by implicit newline, add one more for
4499 the final item. */
4500
4501 if (om_separator != NULL && strcmp(om_separator, STDOUT_NL) == 0)
4502 fprintf(stdout, STDOUT_NL);
4503 #endif
4504
4505 /* Show the total number of matches if requested, but not if only one file's
4506 count was printed. */
4507
4508 if (show_total_count && counts_printed != 1 && filenames != FN_NOMATCH_ONLY)
4509 {
4510 if (counts_printed != 0 && filenames >= FN_DEFAULT)
4511 fprintf(stdout, "TOTAL:");
4512 fprintf(stdout, "%lu" STDOUT_NL, total_count);
4513 }
4514
4515 EXIT:
4516 #ifdef SUPPORT_PCRE2GREP_JIT
4517 pcre2_jit_free_unused_memory(NULL);
4518 if (jit_stack != NULL) pcre2_jit_stack_free(jit_stack);
4519 #endif
4520
4521 free(main_buffer);
4522 if (character_tables != NULL) pcre2_maketables_free(NULL, character_tables);
4523
4524 pcre2_compile_context_free(compile_context);
4525 pcre2_match_context_free(match_context);
4526 pcre2_match_data_free(match_data);
4527
4528 free_pattern_chain(patterns);
4529 free_pattern_chain(include_patterns);
4530 free_pattern_chain(include_dir_patterns);
4531 free_pattern_chain(exclude_patterns);
4532 free_pattern_chain(exclude_dir_patterns);
4533
4534 free_file_chain(exclude_from);
4535 free_file_chain(include_from);
4536 free_file_chain(pattern_files);
4537 free_file_chain(file_lists);
4538
4539 while (only_matching != NULL)
4540 {
4541 omstr *this = only_matching;
4542 only_matching = this->next;
4543 free(this);
4544 }
4545
4546 pcre2grep_exit(rc);
4547
4548 EXIT2:
4549 rc = 2;
4550 goto EXIT;
4551 }
4552
4553 /* End of pcre2grep */
4554