1 /*************************************************
2 * pcre2grep program *
3 *************************************************/
4
5 /* This is a grep program that uses the 8-bit PCRE regular expression library
6 via the PCRE2 updated API to do its pattern matching. On Unix-like, Windows,
7 and native z/OS systems it can recurse into directories, and in z/OS it can
8 handle PDS files.
9
10 Note that for native z/OS, in addition to defining the NATIVE_ZOS macro, an
11 additional header is required. That header is not included in the main PCRE2
12 distribution because other apparatus is needed to compile pcre2grep for z/OS.
13 The header can be found in the special z/OS distribution, which is available
14 from www.zaconsultants.net or from www.cbttape.org.
15
16 Copyright (c) 1997-2018 University of Cambridge
17
18 -----------------------------------------------------------------------------
19 Redistribution and use in source and binary forms, with or without
20 modification, are permitted provided that the following conditions are met:
21
22 * Redistributions of source code must retain the above copyright notice,
23 this list of conditions and the following disclaimer.
24
25 * Redistributions in binary form must reproduce the above copyright
26 notice, this list of conditions and the following disclaimer in the
27 documentation and/or other materials provided with the distribution.
28
29 * Neither the name of the University of Cambridge nor the names of its
30 contributors may be used to endorse or promote products derived from
31 this software without specific prior written permission.
32
33 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
34 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
35 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
37 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
38 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
39 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
40 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
41 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
43 POSSIBILITY OF SUCH DAMAGE.
44 -----------------------------------------------------------------------------
45 */
46
47 #ifdef HAVE_CONFIG_H
48 #include "config.h"
49 #endif
50
51 #include <ctype.h>
52 #include <locale.h>
53 #include <stdio.h>
54 #include <string.h>
55 #include <stdlib.h>
56 #include <errno.h>
57
58 #include <sys/types.h>
59 #include <sys/stat.h>
60
61 #if (defined _WIN32 || (defined HAVE_WINDOWS_H && HAVE_WINDOWS_H)) \
62 && !defined WIN32 && !defined(__CYGWIN__)
63 #define WIN32
64 #endif
65
66 /* Some cmake's define it still */
67 #if defined(__CYGWIN__) && defined(WIN32)
68 #undef WIN32
69 #endif
70
71 #ifdef WIN32
72 #include <io.h> /* For _setmode() */
73 #include <fcntl.h> /* For _O_BINARY */
74 #endif
75
76 #ifdef SUPPORT_PCRE2GREP_CALLOUT
77 #ifdef WIN32
78 #include <process.h>
79 #else
80 #include <sys/wait.h>
81 #endif
82 #endif
83
84 #ifdef HAVE_UNISTD_H
85 #include <unistd.h>
86 #endif
87
88 #ifdef SUPPORT_LIBZ
89 #include <zlib.h>
90 #endif
91
92 #ifdef SUPPORT_LIBBZ2
93 #include <bzlib.h>
94 #endif
95
96 #define PCRE2_CODE_UNIT_WIDTH 8
97 #include "pcre2.h"
98
99 /* Older versions of MSVC lack snprintf(). This define allows for
100 warning/error-free compilation and testing with MSVC compilers back to at least
101 MSVC 10/2010. Except for VC6 (which is missing some fundamentals and fails). */
102
103 #if defined(_MSC_VER) && (_MSC_VER < 1900)
104 #define snprintf _snprintf
105 #endif
106
107 #define FALSE 0
108 #define TRUE 1
109
110 typedef int BOOL;
111
112 #define OFFSET_SIZE 33
113
114 #if BUFSIZ > 8192
115 #define MAXPATLEN BUFSIZ
116 #else
117 #define MAXPATLEN 8192
118 #endif
119
120 #define FNBUFSIZ 2048
121 #define ERRBUFSIZ 256
122
123 /* Values for the "filenames" variable, which specifies options for file name
124 output. The order is important; it is assumed that a file name is wanted for
125 all values greater than FN_DEFAULT. */
126
127 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
128
129 /* File reading styles */
130
131 enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
132
133 /* Actions for the -d and -D options */
134
135 enum { dee_READ, dee_SKIP, dee_RECURSE };
136 enum { DEE_READ, DEE_SKIP };
137
138 /* Actions for special processing options (flag bits) */
139
140 #define PO_WORD_MATCH 0x0001
141 #define PO_LINE_MATCH 0x0002
142 #define PO_FIXED_STRINGS 0x0004
143
144 /* Binary file options */
145
146 enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT };
147
148 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
149 environments), a warning is issued if the value of fwrite() is ignored.
150 Unfortunately, casting to (void) does not suppress the warning. To get round
151 this, we use a macro that compiles a fudge. Oddly, this does not also seem to
152 apply to fprintf(). */
153
154 #define FWRITE_IGNORE(a,b,c,d) if (fwrite(a,b,c,d)) {}
155
156 /* Under Windows, we have to set stdout to be binary, so that it does not
157 convert \r\n at the ends of output lines to \r\r\n. However, that means that
158 any messages written to stdout must have \r\n as their line terminator. This is
159 handled by using STDOUT_NL as the newline string. We also use a normal double
160 quote for the example, as single quotes aren't usually available. */
161
162 #ifdef WIN32
163 #define STDOUT_NL "\r\n"
164 #define QUOT "\""
165 #else
166 #define STDOUT_NL "\n"
167 #define QUOT "'"
168 #endif
169
170
171
172 /*************************************************
173 * Global variables *
174 *************************************************/
175
176 /* Jeffrey Friedl has some debugging requirements that are not part of the
177 regular code. */
178
179 #ifdef JFRIEDL_DEBUG
180 static int S_arg = -1;
181 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
182 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
183 static const char *jfriedl_prefix = "";
184 static const char *jfriedl_postfix = "";
185 #endif
186
187 static const char *colour_string = "1;31";
188 static const char *colour_option = NULL;
189 static const char *dee_option = NULL;
190 static const char *DEE_option = NULL;
191 static const char *locale = NULL;
192 static const char *newline_arg = NULL;
193 static const char *om_separator = NULL;
194 static const char *stdin_name = "(standard input)";
195 static const char *output_text = NULL;
196
197 static char *main_buffer = NULL;
198
199 static int after_context = 0;
200 static int before_context = 0;
201 static int binary_files = BIN_BINARY;
202 static int both_context = 0;
203 static int bufthird = PCRE2GREP_BUFSIZE;
204 static int max_bufthird = PCRE2GREP_MAX_BUFSIZE;
205 static int bufsize = 3*PCRE2GREP_BUFSIZE;
206 static int endlinetype;
207
208 static unsigned long int total_count = 0;
209 static unsigned long int counts_printed = 0;
210
211 #ifdef WIN32
212 static int dee_action = dee_SKIP;
213 #else
214 static int dee_action = dee_READ;
215 #endif
216
217 static int DEE_action = DEE_READ;
218 static int error_count = 0;
219 static int filenames = FN_DEFAULT;
220
221 #ifdef SUPPORT_PCRE2GREP_JIT
222 static BOOL use_jit = TRUE;
223 #else
224 static BOOL use_jit = FALSE;
225 #endif
226
227 static const uint8_t *character_tables = NULL;
228
229 static uint32_t pcre2_options = 0;
230 static uint32_t extra_options = 0;
231 static PCRE2_SIZE heap_limit = PCRE2_UNSET;
232 static uint32_t match_limit = 0;
233 static uint32_t depth_limit = 0;
234
235 static pcre2_compile_context *compile_context;
236 static pcre2_match_context *match_context;
237 static pcre2_match_data *match_data;
238 static PCRE2_SIZE *offsets;
239
240 static BOOL count_only = FALSE;
241 static BOOL do_colour = FALSE;
242 #ifdef WIN32
243 static BOOL do_ansi = FALSE;
244 #endif
245 static BOOL file_offsets = FALSE;
246 static BOOL hyphenpending = FALSE;
247 static BOOL invert = FALSE;
248 static BOOL line_buffered = FALSE;
249 static BOOL line_offsets = FALSE;
250 static BOOL multiline = FALSE;
251 static BOOL number = FALSE;
252 static BOOL omit_zero_count = FALSE;
253 static BOOL resource_error = FALSE;
254 static BOOL quiet = FALSE;
255 static BOOL show_total_count = FALSE;
256 static BOOL silent = FALSE;
257 static BOOL utf = FALSE;
258
259 /* Structure for list of --only-matching capturing numbers. */
260
261 typedef struct omstr {
262 struct omstr *next;
263 int groupnum;
264 } omstr;
265
266 static omstr *only_matching = NULL;
267 static omstr *only_matching_last = NULL;
268 static int only_matching_count;
269
270 /* Structure for holding the two variables that describe a number chain. */
271
272 typedef struct omdatastr {
273 omstr **anchor;
274 omstr **lastptr;
275 } omdatastr;
276
277 static omdatastr only_matching_data = { &only_matching, &only_matching_last };
278
279 /* Structure for list of file names (for -f and --{in,ex}clude-from) */
280
281 typedef struct fnstr {
282 struct fnstr *next;
283 char *name;
284 } fnstr;
285
286 static fnstr *exclude_from = NULL;
287 static fnstr *exclude_from_last = NULL;
288 static fnstr *include_from = NULL;
289 static fnstr *include_from_last = NULL;
290
291 static fnstr *file_lists = NULL;
292 static fnstr *file_lists_last = NULL;
293 static fnstr *pattern_files = NULL;
294 static fnstr *pattern_files_last = NULL;
295
296 /* Structure for holding the two variables that describe a file name chain. */
297
298 typedef struct fndatastr {
299 fnstr **anchor;
300 fnstr **lastptr;
301 } fndatastr;
302
303 static fndatastr exclude_from_data = { &exclude_from, &exclude_from_last };
304 static fndatastr include_from_data = { &include_from, &include_from_last };
305 static fndatastr file_lists_data = { &file_lists, &file_lists_last };
306 static fndatastr pattern_files_data = { &pattern_files, &pattern_files_last };
307
308 /* Structure for pattern and its compiled form; used for matching patterns and
309 also for include/exclude patterns. */
310
311 typedef struct patstr {
312 struct patstr *next;
313 char *string;
314 PCRE2_SIZE length;
315 pcre2_code *compiled;
316 } patstr;
317
318 static patstr *patterns = NULL;
319 static patstr *patterns_last = NULL;
320 static patstr *include_patterns = NULL;
321 static patstr *include_patterns_last = NULL;
322 static patstr *exclude_patterns = NULL;
323 static patstr *exclude_patterns_last = NULL;
324 static patstr *include_dir_patterns = NULL;
325 static patstr *include_dir_patterns_last = NULL;
326 static patstr *exclude_dir_patterns = NULL;
327 static patstr *exclude_dir_patterns_last = NULL;
328
329 /* Structure holding the two variables that describe a pattern chain. A pointer
330 to such structures is used for each appropriate option. */
331
332 typedef struct patdatastr {
333 patstr **anchor;
334 patstr **lastptr;
335 } patdatastr;
336
337 static patdatastr match_patdata = { &patterns, &patterns_last };
338 static patdatastr include_patdata = { &include_patterns, &include_patterns_last };
339 static patdatastr exclude_patdata = { &exclude_patterns, &exclude_patterns_last };
340 static patdatastr include_dir_patdata = { &include_dir_patterns, &include_dir_patterns_last };
341 static patdatastr exclude_dir_patdata = { &exclude_dir_patterns, &exclude_dir_patterns_last };
342
343 static patstr **incexlist[4] = { &include_patterns, &exclude_patterns,
344 &include_dir_patterns, &exclude_dir_patterns };
345
346 static const char *incexname[4] = { "--include", "--exclude",
347 "--include-dir", "--exclude-dir" };
348
349 /* Structure for options and list of them */
350
351 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_U32NUMBER, OP_SIZE,
352 OP_OP_NUMBER, OP_OP_NUMBERS, OP_PATLIST, OP_FILELIST, OP_BINFILES };
353
354 typedef struct option_item {
355 int type;
356 int one_char;
357 void *dataptr;
358 const char *long_name;
359 const char *help_text;
360 } option_item;
361
362 /* Options without a single-letter equivalent get a negative value. This can be
363 used to identify them. */
364
365 #define N_COLOUR (-1)
366 #define N_EXCLUDE (-2)
367 #define N_EXCLUDE_DIR (-3)
368 #define N_HELP (-4)
369 #define N_INCLUDE (-5)
370 #define N_INCLUDE_DIR (-6)
371 #define N_LABEL (-7)
372 #define N_LOCALE (-8)
373 #define N_NULL (-9)
374 #define N_LOFFSETS (-10)
375 #define N_FOFFSETS (-11)
376 #define N_LBUFFER (-12)
377 #define N_H_LIMIT (-13)
378 #define N_M_LIMIT (-14)
379 #define N_M_LIMIT_DEP (-15)
380 #define N_BUFSIZE (-16)
381 #define N_NOJIT (-17)
382 #define N_FILE_LIST (-18)
383 #define N_BINARY_FILES (-19)
384 #define N_EXCLUDE_FROM (-20)
385 #define N_INCLUDE_FROM (-21)
386 #define N_OM_SEPARATOR (-22)
387 #define N_MAX_BUFSIZE (-23)
388
389 static option_item optionlist[] = {
390 { OP_NODATA, N_NULL, NULL, "", "terminate options" },
391 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
392 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
393 { OP_NODATA, 'a', NULL, "text", "treat binary files as text" },
394 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
395 { OP_BINFILES, N_BINARY_FILES, NULL, "binary-files=word", "set treatment of binary files" },
396 { OP_NUMBER, N_BUFSIZE,&bufthird, "buffer-size=number", "set processing buffer starting size" },
397 { OP_NUMBER, N_MAX_BUFSIZE,&max_bufthird, "max-buffer-size=number", "set processing buffer maximum size" },
398 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
399 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
400 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
401 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
402 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
403 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
404 { OP_PATLIST, 'e', &match_patdata, "regex(p)=pattern", "specify pattern (may be used more than once)" },
405 { OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" },
406 { OP_FILELIST, 'f', &pattern_files_data, "file=path", "read patterns from file" },
407 { OP_FILELIST, N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" },
408 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
409 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
410 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
411 { OP_NODATA, 'I', NULL, "", "treat binary files as not matching (ignore)" },
412 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
413 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
414 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
415 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
416 { OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" },
417 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
418 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
419 { OP_SIZE, N_H_LIMIT, &heap_limit, "heap-limit=number", "set PCRE2 heap limit option (kibibytes)" },
420 { OP_U32NUMBER, N_M_LIMIT, &match_limit, "match-limit=number", "set PCRE2 match limit option" },
421 { OP_U32NUMBER, N_M_LIMIT_DEP, &depth_limit, "depth-limit=number", "set PCRE2 depth limit option" },
422 { OP_U32NUMBER, N_M_LIMIT_DEP, &depth_limit, "recursion-limit=number", "obsolete synonym for depth-limit" },
423 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
424 { OP_STRING, 'N', &newline_arg, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF, ANY, or NUL)" },
425 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
426 #ifdef SUPPORT_PCRE2GREP_JIT
427 { OP_NODATA, N_NOJIT, NULL, "no-jit", "do not use just-in-time compiler optimization" },
428 #else
429 { OP_NODATA, N_NOJIT, NULL, "no-jit", "ignored: this pcre2grep does not support JIT" },
430 #endif
431 { OP_STRING, 'O', &output_text, "output=text", "show only this text (possibly expanded)" },
432 { OP_OP_NUMBERS, 'o', &only_matching_data, "only-matching=n", "show only the part of the line that matched" },
433 { OP_STRING, N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" },
434 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
435 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
436 { OP_PATLIST, N_EXCLUDE,&exclude_patdata, "exclude=pattern","exclude matching files when recursing" },
437 { OP_PATLIST, N_INCLUDE,&include_patdata, "include=pattern","include matching files when recursing" },
438 { OP_PATLIST, N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude-dir=pattern","exclude matching directories when recursing" },
439 { OP_PATLIST, N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" },
440 { OP_FILELIST, N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" },
441 { OP_FILELIST, N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" },
442 #ifdef JFRIEDL_DEBUG
443 { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
444 #endif
445 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
446 { OP_NODATA, 't', NULL, "total-count", "print total count of matching lines" },
447 { OP_NODATA, 'u', NULL, "utf", "use UTF mode" },
448 { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
449 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
450 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
451 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
452 { OP_NODATA, 0, NULL, NULL, NULL }
453 };
454
455 /* Table of names for newline types. Must be kept in step with the definitions
456 of PCRE2_NEWLINE_xx in pcre2.h. */
457
458 static const char *newlines[] = {
459 "DEFAULT", "CR", "LF", "CRLF", "ANY", "ANYCRLF", "NUL" };
460
461 /* UTF-8 tables - used only when the newline setting is "any". */
462
463 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
464
465 const char utf8_table4[] = {
466 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
467 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
468 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
469 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
470
471
472 #if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE)
473 /*************************************************
474 * Emulated memmove() for systems without it *
475 *************************************************/
476
477 /* This function can make use of bcopy() if it is available. Otherwise do it by
478 steam, as there are some non-Unix environments that lack both memmove() and
479 bcopy(). */
480
481 static void *
emulated_memmove(void * d,const void * s,size_t n)482 emulated_memmove(void *d, const void *s, size_t n)
483 {
484 #ifdef HAVE_BCOPY
485 bcopy(s, d, n);
486 return d;
487 #else
488 size_t i;
489 unsigned char *dest = (unsigned char *)d;
490 const unsigned char *src = (const unsigned char *)s;
491 if (dest > src)
492 {
493 dest += n;
494 src += n;
495 for (i = 0; i < n; ++i) *(--dest) = *(--src);
496 return (void *)dest;
497 }
498 else
499 {
500 for (i = 0; i < n; ++i) *dest++ = *src++;
501 return (void *)(dest - n);
502 }
503 #endif /* not HAVE_BCOPY */
504 }
505 #undef memmove
506 #define memmove(d,s,n) emulated_memmove(d,s,n)
507 #endif /* not VPCOMPAT && not HAVE_MEMMOVE */
508
509
510 /*************************************************
511 * Case-independent string compare *
512 *************************************************/
513
514 static int
strcmpic(const char * str1,const char * str2)515 strcmpic(const char *str1, const char *str2)
516 {
517 unsigned int c1, c2;
518 while (*str1 != '\0' || *str2 != '\0')
519 {
520 c1 = tolower(*str1++);
521 c2 = tolower(*str2++);
522 if (c1 != c2) return ((c1 > c2) << 1) - 1;
523 }
524 return 0;
525 }
526
527
528 /*************************************************
529 * Parse GREP_COLORS *
530 *************************************************/
531
532 /* Extract ms or mt from GREP_COLORS.
533
534 Argument: the string, possibly NULL
535 Returns: the value of ms or mt, or NULL if neither present
536 */
537
538 static char *
parse_grep_colors(const char * gc)539 parse_grep_colors(const char *gc)
540 {
541 static char seq[16];
542 char *col;
543 uint32_t len;
544 if (gc == NULL) return NULL;
545 col = strstr(gc, "ms=");
546 if (col == NULL) col = strstr(gc, "mt=");
547 if (col == NULL) return NULL;
548 len = 0;
549 col += 3;
550 while (*col != ':' && *col != 0 && len < sizeof(seq)-1)
551 seq[len++] = *col++;
552 seq[len] = 0;
553 return seq;
554 }
555
556
557 /*************************************************
558 * Exit from the program *
559 *************************************************/
560
561 /* If there has been a resource error, give a suitable message.
562
563 Argument: the return code
564 Returns: does not return
565 */
566
567 static void
pcre2grep_exit(int rc)568 pcre2grep_exit(int rc)
569 {
570 /* VMS does exit codes differently: both exit(1) and exit(0) return with a
571 status of 1, which is not helpful. To help with this problem, define a symbol
572 (akin to an environment variable) called "PCRE2GREP_RC" and put the exit code
573 therein. */
574
575 #ifdef __VMS
576 #include descrip
577 #include lib$routines
578 char val_buf[4];
579 $DESCRIPTOR(sym_nam, "PCRE2GREP_RC");
580 $DESCRIPTOR(sym_val, val_buf);
581 sprintf(val_buf, "%d", rc);
582 sym_val.dsc$w_length = strlen(val_buf);
583 lib$set_symbol(&sym_nam, &sym_val);
584 #endif
585
586 if (resource_error)
587 {
588 fprintf(stderr, "pcre2grep: Error %d, %d, %d or %d means that a resource "
589 "limit was exceeded.\n", PCRE2_ERROR_JIT_STACKLIMIT, PCRE2_ERROR_MATCHLIMIT,
590 PCRE2_ERROR_DEPTHLIMIT, PCRE2_ERROR_HEAPLIMIT);
591 fprintf(stderr, "pcre2grep: Check your regex for nested unlimited loops.\n");
592 }
593 exit(rc);
594 }
595
596
597 /*************************************************
598 * Add item to chain of patterns *
599 *************************************************/
600
601 /* Used to add an item onto a chain, or just return an unconnected item if the
602 "after" argument is NULL.
603
604 Arguments:
605 s pattern string to add
606 patlen length of pattern
607 after if not NULL points to item to insert after
608
609 Returns: new pattern block or NULL on error
610 */
611
612 static patstr *
add_pattern(char * s,PCRE2_SIZE patlen,patstr * after)613 add_pattern(char *s, PCRE2_SIZE patlen, patstr *after)
614 {
615 patstr *p = (patstr *)malloc(sizeof(patstr));
616 if (p == NULL)
617 {
618 fprintf(stderr, "pcre2grep: malloc failed\n");
619 pcre2grep_exit(2);
620 }
621 if (patlen > MAXPATLEN)
622 {
623 fprintf(stderr, "pcre2grep: pattern is too long (limit is %d bytes)\n",
624 MAXPATLEN);
625 free(p);
626 return NULL;
627 }
628 p->next = NULL;
629 p->string = s;
630 p->length = patlen;
631 p->compiled = NULL;
632
633 if (after != NULL)
634 {
635 p->next = after->next;
636 after->next = p;
637 }
638 return p;
639 }
640
641
642 /*************************************************
643 * Free chain of patterns *
644 *************************************************/
645
646 /* Used for several chains of patterns.
647
648 Argument: pointer to start of chain
649 Returns: nothing
650 */
651
652 static void
free_pattern_chain(patstr * pc)653 free_pattern_chain(patstr *pc)
654 {
655 while (pc != NULL)
656 {
657 patstr *p = pc;
658 pc = p->next;
659 if (p->compiled != NULL) pcre2_code_free(p->compiled);
660 free(p);
661 }
662 }
663
664
665 /*************************************************
666 * Free chain of file names *
667 *************************************************/
668
669 /*
670 Argument: pointer to start of chain
671 Returns: nothing
672 */
673
674 static void
free_file_chain(fnstr * fn)675 free_file_chain(fnstr *fn)
676 {
677 while (fn != NULL)
678 {
679 fnstr *f = fn;
680 fn = f->next;
681 free(f);
682 }
683 }
684
685
686 /*************************************************
687 * OS-specific functions *
688 *************************************************/
689
690 /* These definitions are needed in all Windows environments, even those where
691 Unix-style directory scanning can be used (see below). */
692
693 #ifdef WIN32
694
695 #ifndef STRICT
696 # define STRICT
697 #endif
698 #ifndef WIN32_LEAN_AND_MEAN
699 # define WIN32_LEAN_AND_MEAN
700 #endif
701
702 #include <windows.h>
703
704 #define iswild(name) (strpbrk(name, "*?") != NULL)
705
706 /* Convert ANSI BGR format to RGB used by Windows */
707 #define BGR_RGB(x) ((x & 1 ? 4 : 0) | (x & 2) | (x & 4 ? 1 : 0))
708
709 static HANDLE hstdout;
710 static CONSOLE_SCREEN_BUFFER_INFO csbi;
711 static WORD match_colour;
712
713 static WORD
decode_ANSI_colour(const char * cs)714 decode_ANSI_colour(const char *cs)
715 {
716 WORD result = csbi.wAttributes;
717 while (*cs)
718 {
719 if (isdigit(*cs))
720 {
721 int code = atoi(cs);
722 if (code == 1) result |= 0x08;
723 else if (code == 4) result |= 0x8000;
724 else if (code == 5) result |= 0x80;
725 else if (code >= 30 && code <= 37) result = (result & 0xF8) | BGR_RGB(code - 30);
726 else if (code == 39) result = (result & 0xF0) | (csbi.wAttributes & 0x0F);
727 else if (code >= 40 && code <= 47) result = (result & 0x8F) | (BGR_RGB(code - 40) << 4);
728 else if (code == 49) result = (result & 0x0F) | (csbi.wAttributes & 0xF0);
729 /* aixterm high intensity colour codes */
730 else if (code >= 90 && code <= 97) result = (result & 0xF0) | BGR_RGB(code - 90) | 0x08;
731 else if (code >= 100 && code <= 107) result = (result & 0x0F) | (BGR_RGB(code - 100) << 4) | 0x80;
732
733 while (isdigit(*cs)) cs++;
734 }
735 if (*cs) cs++;
736 }
737 return result;
738 }
739
740
741 static void
init_colour_output()742 init_colour_output()
743 {
744 if (do_colour)
745 {
746 hstdout = GetStdHandle(STD_OUTPUT_HANDLE);
747 /* This fails when redirected to con; try again if so. */
748 if (!GetConsoleScreenBufferInfo(hstdout, &csbi) && !do_ansi)
749 {
750 HANDLE hcon = CreateFile("CONOUT$", GENERIC_READ | GENERIC_WRITE,
751 FILE_SHARE_WRITE, NULL, OPEN_EXISTING, 0, NULL);
752 GetConsoleScreenBufferInfo(hcon, &csbi);
753 CloseHandle(hcon);
754 }
755 match_colour = decode_ANSI_colour(colour_string);
756 /* No valid colour found - turn off colouring */
757 if (!match_colour) do_colour = FALSE;
758 }
759 }
760
761 #endif /* WIN32 */
762
763
764 /* The following sets of functions are defined so that they can be made system
765 specific. At present there are versions for Unix-style environments, Windows,
766 native z/OS, and "no support". */
767
768
769 /************* Directory scanning Unix-style and z/OS ***********/
770
771 #if (defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H) || defined NATIVE_ZOS
772 #include <sys/types.h>
773 #include <sys/stat.h>
774 #include <dirent.h>
775
776 #if defined NATIVE_ZOS
777 /************* Directory and PDS/E scanning for z/OS ***********/
778 /************* z/OS looks mostly like Unix with USS ************/
779 /* However, z/OS needs the #include statements in this header */
780 #include "pcrzosfs.h"
781 /* That header is not included in the main PCRE distribution because
782 other apparatus is needed to compile pcre2grep for z/OS. The header
783 can be found in the special z/OS distribution, which is available
784 from www.zaconsultants.net or from www.cbttape.org. */
785 #endif
786
787 typedef DIR directory_type;
788 #define FILESEP '/'
789
790 static int
isdirectory(char * filename)791 isdirectory(char *filename)
792 {
793 struct stat statbuf;
794 if (stat(filename, &statbuf) < 0)
795 return 0; /* In the expectation that opening as a file will fail */
796 return S_ISDIR(statbuf.st_mode);
797 }
798
799 static directory_type *
opendirectory(char * filename)800 opendirectory(char *filename)
801 {
802 return opendir(filename);
803 }
804
805 static char *
readdirectory(directory_type * dir)806 readdirectory(directory_type *dir)
807 {
808 for (;;)
809 {
810 struct dirent *dent = readdir(dir);
811 if (dent == NULL) return NULL;
812 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
813 return dent->d_name;
814 }
815 /* Control never reaches here */
816 }
817
818 static void
closedirectory(directory_type * dir)819 closedirectory(directory_type *dir)
820 {
821 closedir(dir);
822 }
823
824
825 /************* Test for regular file, Unix-style **********/
826
827 static int
isregfile(char * filename)828 isregfile(char *filename)
829 {
830 struct stat statbuf;
831 if (stat(filename, &statbuf) < 0)
832 return 1; /* In the expectation that opening as a file will fail */
833 return S_ISREG(statbuf.st_mode);
834 }
835
836
837 #if defined NATIVE_ZOS
838 /************* Test for a terminal in z/OS **********/
839 /* isatty() does not work in a TSO environment, so always give FALSE.*/
840
841 static BOOL
is_stdout_tty(void)842 is_stdout_tty(void)
843 {
844 return FALSE;
845 }
846
847 static BOOL
is_file_tty(FILE * f)848 is_file_tty(FILE *f)
849 {
850 return FALSE;
851 }
852
853
854 /************* Test for a terminal, Unix-style **********/
855
856 #else
857 static BOOL
is_stdout_tty(void)858 is_stdout_tty(void)
859 {
860 return isatty(fileno(stdout));
861 }
862
863 static BOOL
is_file_tty(FILE * f)864 is_file_tty(FILE *f)
865 {
866 return isatty(fileno(f));
867 }
868 #endif
869
870
871 /************* Print optionally coloured match Unix-style and z/OS **********/
872
873 static void
print_match(const void * buf,int length)874 print_match(const void *buf, int length)
875 {
876 if (length == 0) return;
877 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
878 FWRITE_IGNORE(buf, 1, length, stdout);
879 if (do_colour) fprintf(stdout, "%c[0m", 0x1b);
880 }
881
882 /* End of Unix-style or native z/OS environment functions. */
883
884
885 /************* Directory scanning in Windows ***********/
886
887 /* I (Philip Hazel) have no means of testing this code. It was contributed by
888 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
889 when it did not exist. David Byron added a patch that moved the #include of
890 <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
891 */
892
893 #elif defined WIN32
894
895 #ifndef INVALID_FILE_ATTRIBUTES
896 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
897 #endif
898
899 typedef struct directory_type
900 {
901 HANDLE handle;
902 BOOL first;
903 WIN32_FIND_DATA data;
904 } directory_type;
905
906 #define FILESEP '/'
907
908 int
isdirectory(char * filename)909 isdirectory(char *filename)
910 {
911 DWORD attr = GetFileAttributes(filename);
912 if (attr == INVALID_FILE_ATTRIBUTES)
913 return 0;
914 return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0;
915 }
916
917 directory_type *
opendirectory(char * filename)918 opendirectory(char *filename)
919 {
920 size_t len;
921 char *pattern;
922 directory_type *dir;
923 DWORD err;
924 len = strlen(filename);
925 pattern = (char *)malloc(len + 3);
926 dir = (directory_type *)malloc(sizeof(*dir));
927 if ((pattern == NULL) || (dir == NULL))
928 {
929 fprintf(stderr, "pcre2grep: malloc failed\n");
930 pcre2grep_exit(2);
931 }
932 memcpy(pattern, filename, len);
933 if (iswild(filename))
934 pattern[len] = 0;
935 else
936 memcpy(&(pattern[len]), "\\*", 3);
937 dir->handle = FindFirstFile(pattern, &(dir->data));
938 if (dir->handle != INVALID_HANDLE_VALUE)
939 {
940 free(pattern);
941 dir->first = TRUE;
942 return dir;
943 }
944 err = GetLastError();
945 free(pattern);
946 free(dir);
947 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
948 return NULL;
949 }
950
951 char *
readdirectory(directory_type * dir)952 readdirectory(directory_type *dir)
953 {
954 for (;;)
955 {
956 if (!dir->first)
957 {
958 if (!FindNextFile(dir->handle, &(dir->data)))
959 return NULL;
960 }
961 else
962 {
963 dir->first = FALSE;
964 }
965 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
966 return dir->data.cFileName;
967 }
968 #ifndef _MSC_VER
969 return NULL; /* Keep compiler happy; never executed */
970 #endif
971 }
972
973 void
closedirectory(directory_type * dir)974 closedirectory(directory_type *dir)
975 {
976 FindClose(dir->handle);
977 free(dir);
978 }
979
980
981 /************* Test for regular file in Windows **********/
982
983 /* I don't know how to do this, or if it can be done; assume all paths are
984 regular if they are not directories. */
985
isregfile(char * filename)986 int isregfile(char *filename)
987 {
988 return !isdirectory(filename);
989 }
990
991
992 /************* Test for a terminal in Windows **********/
993
994 static BOOL
is_stdout_tty(void)995 is_stdout_tty(void)
996 {
997 return _isatty(_fileno(stdout));
998 }
999
1000 static BOOL
is_file_tty(FILE * f)1001 is_file_tty(FILE *f)
1002 {
1003 return _isatty(_fileno(f));
1004 }
1005
1006
1007 /************* Print optionally coloured match in Windows **********/
1008
1009 static void
print_match(const void * buf,int length)1010 print_match(const void *buf, int length)
1011 {
1012 if (length == 0) return;
1013 if (do_colour)
1014 {
1015 if (do_ansi) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1016 else SetConsoleTextAttribute(hstdout, match_colour);
1017 }
1018 FWRITE_IGNORE(buf, 1, length, stdout);
1019 if (do_colour)
1020 {
1021 if (do_ansi) fprintf(stdout, "%c[0m", 0x1b);
1022 else SetConsoleTextAttribute(hstdout, csbi.wAttributes);
1023 }
1024 }
1025
1026 /* End of Windows functions */
1027
1028
1029 /************* Directory scanning when we can't do it ***********/
1030
1031 /* The type is void, and apart from isdirectory(), the functions do nothing. */
1032
1033 #else
1034
1035 #define FILESEP 0
1036 typedef void directory_type;
1037
isdirectory(char * filename)1038 int isdirectory(char *filename) { return 0; }
opendirectory(char * filename)1039 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
readdirectory(directory_type * dir)1040 char *readdirectory(directory_type *dir) { return (char*)0;}
closedirectory(directory_type * dir)1041 void closedirectory(directory_type *dir) {}
1042
1043
1044 /************* Test for regular file when we can't do it **********/
1045
1046 /* Assume all files are regular. */
1047
isregfile(char * filename)1048 int isregfile(char *filename) { return 1; }
1049
1050
1051 /************* Test for a terminal when we can't do it **********/
1052
1053 static BOOL
is_stdout_tty(void)1054 is_stdout_tty(void)
1055 {
1056 return FALSE;
1057 }
1058
1059 static BOOL
is_file_tty(FILE * f)1060 is_file_tty(FILE *f)
1061 {
1062 return FALSE;
1063 }
1064
1065
1066 /************* Print optionally coloured match when we can't do it **********/
1067
1068 static void
print_match(const void * buf,int length)1069 print_match(const void *buf, int length)
1070 {
1071 if (length == 0) return;
1072 FWRITE_IGNORE(buf, 1, length, stdout);
1073 }
1074
1075 #endif /* End of system-specific functions */
1076
1077
1078
1079 #ifndef HAVE_STRERROR
1080 /*************************************************
1081 * Provide strerror() for non-ANSI libraries *
1082 *************************************************/
1083
1084 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1085 in their libraries, but can provide the same facility by this simple
1086 alternative function. */
1087
1088 extern int sys_nerr;
1089 extern char *sys_errlist[];
1090
1091 char *
strerror(int n)1092 strerror(int n)
1093 {
1094 if (n < 0 || n >= sys_nerr) return "unknown error number";
1095 return sys_errlist[n];
1096 }
1097 #endif /* HAVE_STRERROR */
1098
1099
1100
1101 /*************************************************
1102 * Usage function *
1103 *************************************************/
1104
1105 static int
usage(int rc)1106 usage(int rc)
1107 {
1108 option_item *op;
1109 fprintf(stderr, "Usage: pcre2grep [-");
1110 for (op = optionlist; op->one_char != 0; op++)
1111 {
1112 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1113 }
1114 fprintf(stderr, "] [long options] [pattern] [files]\n");
1115 fprintf(stderr, "Type \"pcre2grep --help\" for more information and the long "
1116 "options.\n");
1117 return rc;
1118 }
1119
1120
1121
1122 /*************************************************
1123 * Help function *
1124 *************************************************/
1125
1126 static void
help(void)1127 help(void)
1128 {
1129 option_item *op;
1130
1131 printf("Usage: pcre2grep [OPTION]... [PATTERN] [FILE1 FILE2 ...]" STDOUT_NL);
1132 printf("Search for PATTERN in each FILE or standard input." STDOUT_NL);
1133 printf("PATTERN must be present if neither -e nor -f is used." STDOUT_NL);
1134
1135 #ifdef SUPPORT_PCRE2GREP_CALLOUT
1136 printf("Callout scripts in patterns are supported." STDOUT_NL);
1137 #else
1138 printf("Callout scripts are not supported in this pcre2grep." STDOUT_NL);
1139 #endif
1140
1141 printf("\"-\" can be used as a file name to mean STDIN." STDOUT_NL);
1142
1143 #ifdef SUPPORT_LIBZ
1144 printf("Files whose names end in .gz are read using zlib." STDOUT_NL);
1145 #endif
1146
1147 #ifdef SUPPORT_LIBBZ2
1148 printf("Files whose names end in .bz2 are read using bzlib2." STDOUT_NL);
1149 #endif
1150
1151 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1152 printf("Other files and the standard input are read as plain files." STDOUT_NL STDOUT_NL);
1153 #else
1154 printf("All files are read as plain files, without any interpretation." STDOUT_NL STDOUT_NL);
1155 #endif
1156
1157 printf("Example: pcre2grep -i " QUOT "hello.*world" QUOT " menu.h main.c" STDOUT_NL STDOUT_NL);
1158 printf("Options:" STDOUT_NL);
1159
1160 for (op = optionlist; op->one_char != 0; op++)
1161 {
1162 int n;
1163 char s[4];
1164
1165 if (op->one_char > 0 && (op->long_name)[0] == 0)
1166 n = 31 - printf(" -%c", op->one_char);
1167 else
1168 {
1169 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char);
1170 else strcpy(s, " ");
1171 n = 31 - printf(" %s --%s", s, op->long_name);
1172 }
1173
1174 if (n < 1) n = 1;
1175 printf("%.*s%s" STDOUT_NL, n, " ", op->help_text);
1176 }
1177
1178 printf(STDOUT_NL "Numbers may be followed by K or M, e.g. --max-buffer-size=100K." STDOUT_NL);
1179 printf("The default value for --buffer-size is %d." STDOUT_NL, PCRE2GREP_BUFSIZE);
1180 printf("The default value for --max-buffer-size is %d." STDOUT_NL, PCRE2GREP_MAX_BUFSIZE);
1181 printf("When reading patterns or file names from a file, trailing white" STDOUT_NL);
1182 printf("space is removed and blank lines are ignored." STDOUT_NL);
1183 printf("The maximum size of any pattern is %d bytes." STDOUT_NL, MAXPATLEN);
1184
1185 printf(STDOUT_NL "With no FILEs, read standard input. If fewer than two FILEs given, assume -h." STDOUT_NL);
1186 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble." STDOUT_NL);
1187 }
1188
1189
1190
1191 /*************************************************
1192 * Test exclude/includes *
1193 *************************************************/
1194
1195 /* If any exclude pattern matches, the path is excluded. Otherwise, unless
1196 there are no includes, the path must match an include pattern.
1197
1198 Arguments:
1199 path the path to be matched
1200 ip the chain of include patterns
1201 ep the chain of exclude patterns
1202
1203 Returns: TRUE if the path is not excluded
1204 */
1205
1206 static BOOL
test_incexc(char * path,patstr * ip,patstr * ep)1207 test_incexc(char *path, patstr *ip, patstr *ep)
1208 {
1209 int plen = strlen((const char *)path);
1210
1211 for (; ep != NULL; ep = ep->next)
1212 {
1213 if (pcre2_match(ep->compiled, (PCRE2_SPTR)path, plen, 0, 0, match_data, NULL) >= 0)
1214 return FALSE;
1215 }
1216
1217 if (ip == NULL) return TRUE;
1218
1219 for (; ip != NULL; ip = ip->next)
1220 {
1221 if (pcre2_match(ip->compiled, (PCRE2_SPTR)path, plen, 0, 0, match_data, NULL) >= 0)
1222 return TRUE;
1223 }
1224
1225 return FALSE;
1226 }
1227
1228
1229
1230 /*************************************************
1231 * Decode integer argument value *
1232 *************************************************/
1233
1234 /* Integer arguments can be followed by K or M. Avoid the use of strtoul()
1235 because SunOS4 doesn't have it. This is used only for unpicking arguments, so
1236 just keep it simple.
1237
1238 Arguments:
1239 option_data the option data string
1240 op the option item (for error messages)
1241 longop TRUE if option given in long form
1242
1243 Returns: a long integer
1244 */
1245
1246 static long int
decode_number(char * option_data,option_item * op,BOOL longop)1247 decode_number(char *option_data, option_item *op, BOOL longop)
1248 {
1249 unsigned long int n = 0;
1250 char *endptr = option_data;
1251 while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
1252 while (isdigit((unsigned char)(*endptr)))
1253 n = n * 10 + (int)(*endptr++ - '0');
1254 if (toupper(*endptr) == 'K')
1255 {
1256 n *= 1024;
1257 endptr++;
1258 }
1259 else if (toupper(*endptr) == 'M')
1260 {
1261 n *= 1024*1024;
1262 endptr++;
1263 }
1264
1265 if (*endptr != 0) /* Error */
1266 {
1267 if (longop)
1268 {
1269 char *equals = strchr(op->long_name, '=');
1270 int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1271 (int)(equals - op->long_name);
1272 fprintf(stderr, "pcre2grep: Malformed number \"%s\" after --%.*s\n",
1273 option_data, nlen, op->long_name);
1274 }
1275 else
1276 fprintf(stderr, "pcre2grep: Malformed number \"%s\" after -%c\n",
1277 option_data, op->one_char);
1278 pcre2grep_exit(usage(2));
1279 }
1280
1281 return n;
1282 }
1283
1284
1285
1286 /*************************************************
1287 * Add item to a chain of numbers *
1288 *************************************************/
1289
1290 /* Used to add an item onto a chain, or just return an unconnected item if the
1291 "after" argument is NULL.
1292
1293 Arguments:
1294 n the number to add
1295 after if not NULL points to item to insert after
1296
1297 Returns: new number block
1298 */
1299
1300 static omstr *
add_number(int n,omstr * after)1301 add_number(int n, omstr *after)
1302 {
1303 omstr *om = (omstr *)malloc(sizeof(omstr));
1304
1305 if (om == NULL)
1306 {
1307 fprintf(stderr, "pcre2grep: malloc failed\n");
1308 pcre2grep_exit(2);
1309 }
1310 om->next = NULL;
1311 om->groupnum = n;
1312
1313 if (after != NULL)
1314 {
1315 om->next = after->next;
1316 after->next = om;
1317 }
1318 return om;
1319 }
1320
1321
1322
1323 /*************************************************
1324 * Read one line of input *
1325 *************************************************/
1326
1327 /* Normally, input that is to be scanned is read using fread() (or gzread, or
1328 BZ2_read) into a large buffer, so many lines may be read at once. However,
1329 doing this for tty input means that no output appears until a lot of input has
1330 been typed. Instead, tty input is handled line by line. We cannot use fgets()
1331 for this, because it does not stop at a binary zero, and therefore there is no
1332 way of telling how many characters it has read, because there may be binary
1333 zeros embedded in the data. This function is also used for reading patterns
1334 from files (the -f option).
1335
1336 Arguments:
1337 buffer the buffer to read into
1338 length the maximum number of characters to read
1339 f the file
1340
1341 Returns: the number of characters read, zero at end of file
1342 */
1343
1344 static PCRE2_SIZE
read_one_line(char * buffer,int length,FILE * f)1345 read_one_line(char *buffer, int length, FILE *f)
1346 {
1347 int c;
1348 int yield = 0;
1349 while ((c = fgetc(f)) != EOF)
1350 {
1351 buffer[yield++] = c;
1352 if (c == '\n' || yield >= length) break;
1353 }
1354 return yield;
1355 }
1356
1357
1358
1359 /*************************************************
1360 * Find end of line *
1361 *************************************************/
1362
1363 /* The length of the endline sequence that is found is set via lenptr. This may
1364 be zero at the very end of the file if there is no line-ending sequence there.
1365
1366 Arguments:
1367 p current position in line
1368 endptr end of available data
1369 lenptr where to put the length of the eol sequence
1370
1371 Returns: pointer after the last byte of the line,
1372 including the newline byte(s)
1373 */
1374
1375 static char *
end_of_line(char * p,char * endptr,int * lenptr)1376 end_of_line(char *p, char *endptr, int *lenptr)
1377 {
1378 switch(endlinetype)
1379 {
1380 default: /* Just in case */
1381 case PCRE2_NEWLINE_LF:
1382 while (p < endptr && *p != '\n') p++;
1383 if (p < endptr)
1384 {
1385 *lenptr = 1;
1386 return p + 1;
1387 }
1388 *lenptr = 0;
1389 return endptr;
1390
1391 case PCRE2_NEWLINE_CR:
1392 while (p < endptr && *p != '\r') p++;
1393 if (p < endptr)
1394 {
1395 *lenptr = 1;
1396 return p + 1;
1397 }
1398 *lenptr = 0;
1399 return endptr;
1400
1401 case PCRE2_NEWLINE_NUL:
1402 while (p < endptr && *p != '\0') p++;
1403 if (p < endptr)
1404 {
1405 *lenptr = 1;
1406 return p + 1;
1407 }
1408 *lenptr = 0;
1409 return endptr;
1410
1411 case PCRE2_NEWLINE_CRLF:
1412 for (;;)
1413 {
1414 while (p < endptr && *p != '\r') p++;
1415 if (++p >= endptr)
1416 {
1417 *lenptr = 0;
1418 return endptr;
1419 }
1420 if (*p == '\n')
1421 {
1422 *lenptr = 2;
1423 return p + 1;
1424 }
1425 }
1426 break;
1427
1428 case PCRE2_NEWLINE_ANYCRLF:
1429 while (p < endptr)
1430 {
1431 int extra = 0;
1432 int c = *((unsigned char *)p);
1433
1434 if (utf && c >= 0xc0)
1435 {
1436 int gcii, gcss;
1437 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1438 gcss = 6*extra;
1439 c = (c & utf8_table3[extra]) << gcss;
1440 for (gcii = 1; gcii <= extra; gcii++)
1441 {
1442 gcss -= 6;
1443 c |= (p[gcii] & 0x3f) << gcss;
1444 }
1445 }
1446
1447 p += 1 + extra;
1448
1449 switch (c)
1450 {
1451 case '\n':
1452 *lenptr = 1;
1453 return p;
1454
1455 case '\r':
1456 if (p < endptr && *p == '\n')
1457 {
1458 *lenptr = 2;
1459 p++;
1460 }
1461 else *lenptr = 1;
1462 return p;
1463
1464 default:
1465 break;
1466 }
1467 } /* End of loop for ANYCRLF case */
1468
1469 *lenptr = 0; /* Must have hit the end */
1470 return endptr;
1471
1472 case PCRE2_NEWLINE_ANY:
1473 while (p < endptr)
1474 {
1475 int extra = 0;
1476 int c = *((unsigned char *)p);
1477
1478 if (utf && c >= 0xc0)
1479 {
1480 int gcii, gcss;
1481 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1482 gcss = 6*extra;
1483 c = (c & utf8_table3[extra]) << gcss;
1484 for (gcii = 1; gcii <= extra; gcii++)
1485 {
1486 gcss -= 6;
1487 c |= (p[gcii] & 0x3f) << gcss;
1488 }
1489 }
1490
1491 p += 1 + extra;
1492
1493 switch (c)
1494 {
1495 case '\n': /* LF */
1496 case '\v': /* VT */
1497 case '\f': /* FF */
1498 *lenptr = 1;
1499 return p;
1500
1501 case '\r': /* CR */
1502 if (p < endptr && *p == '\n')
1503 {
1504 *lenptr = 2;
1505 p++;
1506 }
1507 else *lenptr = 1;
1508 return p;
1509
1510 #ifndef EBCDIC
1511 case 0x85: /* Unicode NEL */
1512 *lenptr = utf? 2 : 1;
1513 return p;
1514
1515 case 0x2028: /* Unicode LS */
1516 case 0x2029: /* Unicode PS */
1517 *lenptr = 3;
1518 return p;
1519 #endif /* Not EBCDIC */
1520
1521 default:
1522 break;
1523 }
1524 } /* End of loop for ANY case */
1525
1526 *lenptr = 0; /* Must have hit the end */
1527 return endptr;
1528 } /* End of overall switch */
1529 }
1530
1531
1532
1533 /*************************************************
1534 * Find start of previous line *
1535 *************************************************/
1536
1537 /* This is called when looking back for before lines to print.
1538
1539 Arguments:
1540 p start of the subsequent line
1541 startptr start of available data
1542
1543 Returns: pointer to the start of the previous line
1544 */
1545
1546 static char *
previous_line(char * p,char * startptr)1547 previous_line(char *p, char *startptr)
1548 {
1549 switch(endlinetype)
1550 {
1551 default: /* Just in case */
1552 case PCRE2_NEWLINE_LF:
1553 p--;
1554 while (p > startptr && p[-1] != '\n') p--;
1555 return p;
1556
1557 case PCRE2_NEWLINE_CR:
1558 p--;
1559 while (p > startptr && p[-1] != '\n') p--;
1560 return p;
1561
1562 case PCRE2_NEWLINE_NUL:
1563 p--;
1564 while (p > startptr && p[-1] != '\0') p--;
1565 return p;
1566
1567 case PCRE2_NEWLINE_CRLF:
1568 for (;;)
1569 {
1570 p -= 2;
1571 while (p > startptr && p[-1] != '\n') p--;
1572 if (p <= startptr + 1 || p[-2] == '\r') return p;
1573 }
1574 /* Control can never get here */
1575
1576 case PCRE2_NEWLINE_ANY:
1577 case PCRE2_NEWLINE_ANYCRLF:
1578 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
1579 if (utf) while ((*p & 0xc0) == 0x80) p--;
1580
1581 while (p > startptr)
1582 {
1583 unsigned int c;
1584 char *pp = p - 1;
1585
1586 if (utf)
1587 {
1588 int extra = 0;
1589 while ((*pp & 0xc0) == 0x80) pp--;
1590 c = *((unsigned char *)pp);
1591 if (c >= 0xc0)
1592 {
1593 int gcii, gcss;
1594 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1595 gcss = 6*extra;
1596 c = (c & utf8_table3[extra]) << gcss;
1597 for (gcii = 1; gcii <= extra; gcii++)
1598 {
1599 gcss -= 6;
1600 c |= (pp[gcii] & 0x3f) << gcss;
1601 }
1602 }
1603 }
1604 else c = *((unsigned char *)pp);
1605
1606 if (endlinetype == PCRE2_NEWLINE_ANYCRLF) switch (c)
1607 {
1608 case '\n': /* LF */
1609 case '\r': /* CR */
1610 return p;
1611
1612 default:
1613 break;
1614 }
1615
1616 else switch (c)
1617 {
1618 case '\n': /* LF */
1619 case '\v': /* VT */
1620 case '\f': /* FF */
1621 case '\r': /* CR */
1622 #ifndef EBCDIC
1623 case 0x85: /* Unicode NEL */
1624 case 0x2028: /* Unicode LS */
1625 case 0x2029: /* Unicode PS */
1626 #endif /* Not EBCDIC */
1627 return p;
1628
1629 default:
1630 break;
1631 }
1632
1633 p = pp; /* Back one character */
1634 } /* End of loop for ANY case */
1635
1636 return startptr; /* Hit start of data */
1637 } /* End of overall switch */
1638 }
1639
1640
1641
1642 /*************************************************
1643 * Print the previous "after" lines *
1644 *************************************************/
1645
1646 /* This is called if we are about to lose said lines because of buffer filling,
1647 and at the end of the file. The data in the line is written using fwrite() so
1648 that a binary zero does not terminate it.
1649
1650 Arguments:
1651 lastmatchnumber the number of the last matching line, plus one
1652 lastmatchrestart where we restarted after the last match
1653 endptr end of available data
1654 printname filename for printing
1655
1656 Returns: nothing
1657 */
1658
1659 static void
do_after_lines(unsigned long int lastmatchnumber,char * lastmatchrestart,char * endptr,const char * printname)1660 do_after_lines(unsigned long int lastmatchnumber, char *lastmatchrestart,
1661 char *endptr, const char *printname)
1662 {
1663 if (after_context > 0 && lastmatchnumber > 0)
1664 {
1665 int count = 0;
1666 while (lastmatchrestart < endptr && count < after_context)
1667 {
1668 int ellength;
1669 char *pp = end_of_line(lastmatchrestart, endptr, &ellength);
1670 if (ellength == 0 && pp == main_buffer + bufsize) break;
1671 if (printname != NULL) fprintf(stdout, "%s-", printname);
1672 if (number) fprintf(stdout, "%lu-", lastmatchnumber++);
1673 FWRITE_IGNORE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1674 lastmatchrestart = pp;
1675 count++;
1676 }
1677 if (count > 0) hyphenpending = TRUE;
1678 }
1679 }
1680
1681
1682
1683 /*************************************************
1684 * Apply patterns to subject till one matches *
1685 *************************************************/
1686
1687 /* This function is called to run through all patterns, looking for a match. It
1688 is used multiple times for the same subject when colouring is enabled, in order
1689 to find all possible matches.
1690
1691 Arguments:
1692 matchptr the start of the subject
1693 length the length of the subject to match
1694 options options for pcre_exec
1695 startoffset where to start matching
1696 mrc address of where to put the result of pcre2_match()
1697
1698 Returns: TRUE if there was a match
1699 FALSE if there was no match
1700 invert if there was a non-fatal error
1701 */
1702
1703 static BOOL
match_patterns(char * matchptr,PCRE2_SIZE length,unsigned int options,PCRE2_SIZE startoffset,int * mrc)1704 match_patterns(char *matchptr, PCRE2_SIZE length, unsigned int options,
1705 PCRE2_SIZE startoffset, int *mrc)
1706 {
1707 int i;
1708 PCRE2_SIZE slen = length;
1709 patstr *p = patterns;
1710 const char *msg = "this text:\n\n";
1711
1712 if (slen > 200)
1713 {
1714 slen = 200;
1715 msg = "text that starts:\n\n";
1716 }
1717 for (i = 1; p != NULL; p = p->next, i++)
1718 {
1719 *mrc = pcre2_match(p->compiled, (PCRE2_SPTR)matchptr, (int)length,
1720 startoffset, options, match_data, match_context);
1721 if (*mrc >= 0) return TRUE;
1722 if (*mrc == PCRE2_ERROR_NOMATCH) continue;
1723 fprintf(stderr, "pcre2grep: pcre2_match() gave error %d while matching ", *mrc);
1724 if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i);
1725 fprintf(stderr, "%s", msg);
1726 FWRITE_IGNORE(matchptr, 1, slen, stderr); /* In case binary zero included */
1727 fprintf(stderr, "\n\n");
1728 if (*mrc == PCRE2_ERROR_MATCHLIMIT || *mrc == PCRE2_ERROR_DEPTHLIMIT ||
1729 *mrc == PCRE2_ERROR_HEAPLIMIT || *mrc == PCRE2_ERROR_JIT_STACKLIMIT)
1730 resource_error = TRUE;
1731 if (error_count++ > 20)
1732 {
1733 fprintf(stderr, "pcre2grep: Too many errors - abandoned.\n");
1734 pcre2grep_exit(2);
1735 }
1736 return invert; /* No more matching; don't show the line again */
1737 }
1738
1739 return FALSE; /* No match, no errors */
1740 }
1741
1742
1743 /*************************************************
1744 * Check output text for errors *
1745 *************************************************/
1746
1747 static BOOL
syntax_check_output_text(PCRE2_SPTR string,BOOL callout)1748 syntax_check_output_text(PCRE2_SPTR string, BOOL callout)
1749 {
1750 PCRE2_SPTR begin = string;
1751 for (; *string != 0; string++)
1752 {
1753 if (*string == '$')
1754 {
1755 PCRE2_SIZE capture_id = 0;
1756 BOOL brace = FALSE;
1757
1758 string++;
1759
1760 /* Syntax error: a character must be present after $. */
1761 if (*string == 0)
1762 {
1763 if (!callout)
1764 fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
1765 (int)(string - begin), "no character after $");
1766 return FALSE;
1767 }
1768
1769 if (*string == '{')
1770 {
1771 /* Must be a decimal number in braces, e.g: {5} or {38} */
1772 string++;
1773
1774 brace = TRUE;
1775 }
1776
1777 if ((*string >= '1' && *string <= '9') || (!callout && *string == '0'))
1778 {
1779 do
1780 {
1781 /* Maximum capture id is 65535. */
1782 if (capture_id <= 65535)
1783 capture_id = capture_id * 10 + (*string - '0');
1784
1785 string++;
1786 }
1787 while (*string >= '0' && *string <= '9');
1788
1789 if (brace)
1790 {
1791 /* Syntax error: closing brace is missing. */
1792 if (*string != '}')
1793 {
1794 if (!callout)
1795 fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
1796 (int)(string - begin), "missing closing brace");
1797 return FALSE;
1798 }
1799 }
1800 else
1801 {
1802 /* To negate the effect of the for. */
1803 string--;
1804 }
1805 }
1806 else if (brace)
1807 {
1808 /* Syntax error: a decimal number required. */
1809 if (!callout)
1810 fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
1811 (int)(string - begin), "decimal number expected");
1812 return FALSE;
1813 }
1814 else if (*string == 'o')
1815 {
1816 string++;
1817
1818 if (*string < '0' || *string > '7')
1819 {
1820 /* Syntax error: an octal number required. */
1821 if (!callout)
1822 fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
1823 (int)(string - begin), "octal number expected");
1824 return FALSE;
1825 }
1826 }
1827 else if (*string == 'x')
1828 {
1829 string++;
1830
1831 if (!isxdigit((unsigned char)*string))
1832 {
1833 /* Syntax error: a hexdecimal number required. */
1834 if (!callout)
1835 fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
1836 (int)(string - begin), "hexadecimal number expected");
1837 return FALSE;
1838 }
1839 }
1840 }
1841 }
1842
1843 return TRUE;
1844 }
1845
1846
1847 /*************************************************
1848 * Display output text *
1849 *************************************************/
1850
1851 /* Display the output text, which is assumed to have already been syntax
1852 checked. Output may contain escape sequences started by the dollar sign. The
1853 escape sequences are substituted as follows:
1854
1855 $<digits> or ${<digits>} is replaced by the captured substring of the given
1856 decimal number; zero will substitute the whole match. If the number is
1857 greater than the number of capturing substrings, or if the capture is unset,
1858 the replacement is empty.
1859
1860 $a is replaced by bell.
1861 $b is replaced by backspace.
1862 $e is replaced by escape.
1863 $f is replaced by form feed.
1864 $n is replaced by newline.
1865 $r is replaced by carriage return.
1866 $t is replaced by tab.
1867 $v is replaced by vertical tab.
1868
1869 $o<digits> is replaced by the character represented by the given octal
1870 number; up to three digits are processed.
1871
1872 $x<digits> is replaced by the character represented by the given hexadecimal
1873 number; up to two digits are processed.
1874
1875 Any other character is substituted by itself. E.g: $$ is replaced by a single
1876 dollar.
1877
1878 Arguments:
1879 string: the output text
1880 callout: TRUE for the builtin callout, FALSE for --output
1881 subject the start of the subject
1882 ovector: capture offsets
1883 capture_top: number of captures
1884
1885 Returns: TRUE if something was output, other than newline
1886 FALSE if nothing was output, or newline was last output
1887 */
1888
1889 static BOOL
display_output_text(PCRE2_SPTR string,BOOL callout,PCRE2_SPTR subject,PCRE2_SIZE * ovector,PCRE2_SIZE capture_top)1890 display_output_text(PCRE2_SPTR string, BOOL callout, PCRE2_SPTR subject,
1891 PCRE2_SIZE *ovector, PCRE2_SIZE capture_top)
1892 {
1893 BOOL printed = FALSE;
1894
1895 for (; *string != 0; string++)
1896 {
1897 int ch = EOF;
1898 if (*string == '$')
1899 {
1900 PCRE2_SIZE capture_id = 0;
1901 BOOL brace = FALSE;
1902
1903 string++;
1904
1905 if (*string == '{')
1906 {
1907 /* Must be a decimal number in braces, e.g: {5} or {38} */
1908 string++;
1909
1910 brace = TRUE;
1911 }
1912
1913 if ((*string >= '1' && *string <= '9') || (!callout && *string == '0'))
1914 {
1915 do
1916 {
1917 /* Maximum capture id is 65535. */
1918 if (capture_id <= 65535)
1919 capture_id = capture_id * 10 + (*string - '0');
1920
1921 string++;
1922 }
1923 while (*string >= '0' && *string <= '9');
1924
1925 if (!brace)
1926 {
1927 /* To negate the effect of the for. */
1928 string--;
1929 }
1930
1931 if (capture_id < capture_top)
1932 {
1933 PCRE2_SIZE capturesize;
1934 capture_id *= 2;
1935
1936 capturesize = ovector[capture_id + 1] - ovector[capture_id];
1937 if (capturesize > 0)
1938 {
1939 print_match(subject + ovector[capture_id], capturesize);
1940 printed = TRUE;
1941 }
1942 }
1943 }
1944 else if (*string == 'a') ch = '\a';
1945 else if (*string == 'b') ch = '\b';
1946 #ifndef EBCDIC
1947 else if (*string == 'e') ch = '\033';
1948 #else
1949 else if (*string == 'e') ch = '\047';
1950 #endif
1951 else if (*string == 'f') ch = '\f';
1952 else if (*string == 'r') ch = '\r';
1953 else if (*string == 't') ch = '\t';
1954 else if (*string == 'v') ch = '\v';
1955 else if (*string == 'n')
1956 {
1957 fprintf(stdout, STDOUT_NL);
1958 printed = FALSE;
1959 }
1960 else if (*string == 'o')
1961 {
1962 string++;
1963
1964 ch = *string - '0';
1965 if (string[1] >= '0' && string[1] <= '7')
1966 {
1967 string++;
1968 ch = ch * 8 + (*string - '0');
1969 }
1970 if (string[1] >= '0' && string[1] <= '7')
1971 {
1972 string++;
1973 ch = ch * 8 + (*string - '0');
1974 }
1975 }
1976 else if (*string == 'x')
1977 {
1978 string++;
1979
1980 if (*string >= '0' && *string <= '9')
1981 ch = *string - '0';
1982 else
1983 ch = (*string | 0x20) - 'a' + 10;
1984 if (isxdigit((unsigned char)string[1]))
1985 {
1986 string++;
1987 ch *= 16;
1988 if (*string >= '0' && *string <= '9')
1989 ch += *string - '0';
1990 else
1991 ch += (*string | 0x20) - 'a' + 10;
1992 }
1993 }
1994 else
1995 {
1996 ch = *string;
1997 }
1998 }
1999 else
2000 {
2001 ch = *string;
2002 }
2003 if (ch != EOF)
2004 {
2005 fprintf(stdout, "%c", ch);
2006 printed = TRUE;
2007 }
2008 }
2009
2010 return printed;
2011 }
2012
2013
2014 #ifdef SUPPORT_PCRE2GREP_CALLOUT
2015
2016 /*************************************************
2017 * Parse and execute callout scripts *
2018 *************************************************/
2019
2020 /* This function parses a callout string block and executes the
2021 program specified by the string. The string is a list of substrings
2022 separated by pipe characters. The first substring represents the
2023 executable name, and the following substrings specify the arguments:
2024
2025 program_name|param1|param2|...
2026
2027 Any substring (including the program name) can contain escape sequences
2028 started by the dollar character. The escape sequences are substituted as
2029 follows:
2030
2031 $<digits> or ${<digits>} is replaced by the captured substring of the given
2032 decimal number, which must be greater than zero. If the number is greater
2033 than the number of capturing substrings, or if the capture is unset, the
2034 replacement is empty.
2035
2036 Any other character is substituted by itself. E.g: $$ is replaced by a single
2037 dollar or $| replaced by a pipe character.
2038
2039 Alternatively, if string starts with pipe, the remainder is taken as an output
2040 string, same as --output. In this case, --om-separator is used to separate each
2041 callout, defaulting to newline.
2042
2043 Example:
2044
2045 echo -e "abcde\n12345" | pcre2grep \
2046 '(.)(..(.))(?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4)")()' -
2047
2048 Output:
2049
2050 Arg1: [a] [bcd] [d] Arg2: |a| ()
2051 abcde
2052 Arg1: [1] [234] [4] Arg2: |1| ()
2053 12345
2054
2055 Arguments:
2056 blockptr the callout block
2057
2058 Returns: currently it always returns with 0
2059 */
2060
2061 static int
pcre2grep_callout(pcre2_callout_block * calloutptr,void * unused)2062 pcre2grep_callout(pcre2_callout_block *calloutptr, void *unused)
2063 {
2064 PCRE2_SIZE length = calloutptr->callout_string_length;
2065 PCRE2_SPTR string = calloutptr->callout_string;
2066 PCRE2_SPTR subject = calloutptr->subject;
2067 PCRE2_SIZE *ovector = calloutptr->offset_vector;
2068 PCRE2_SIZE capture_top = calloutptr->capture_top;
2069 PCRE2_SIZE argsvectorlen = 2;
2070 PCRE2_SIZE argslen = 1;
2071 char *args;
2072 char *argsptr;
2073 char **argsvector;
2074 char **argsvectorptr;
2075 #ifndef WIN32
2076 pid_t pid;
2077 #endif
2078 int result = 0;
2079
2080 (void)unused; /* Avoid compiler warning */
2081
2082 /* Only callout with strings are supported. */
2083 if (string == NULL || length == 0) return 0;
2084
2085 /* If there's no command, output the remainder directly. */
2086
2087 if (*string == '|')
2088 {
2089 string++;
2090 if (!syntax_check_output_text(string, TRUE)) return 0;
2091 (void)display_output_text(string, TRUE, subject, ovector, capture_top);
2092 return 0;
2093 }
2094
2095 /* Checking syntax and compute the number of string fragments. Callout strings
2096 are ignored in case of a syntax error. */
2097
2098 while (length > 0)
2099 {
2100 if (*string == '|')
2101 {
2102 argsvectorlen++;
2103
2104 /* Maximum 10000 arguments allowed. */
2105 if (argsvectorlen > 10000) return 0;
2106 }
2107 else if (*string == '$')
2108 {
2109 PCRE2_SIZE capture_id = 0;
2110
2111 string++;
2112 length--;
2113
2114 /* Syntax error: a character must be present after $. */
2115 if (length == 0) return 0;
2116
2117 if (*string >= '1' && *string <= '9')
2118 {
2119 do
2120 {
2121 /* Maximum capture id is 65535. */
2122 if (capture_id <= 65535)
2123 capture_id = capture_id * 10 + (*string - '0');
2124
2125 string++;
2126 length--;
2127 }
2128 while (length > 0 && *string >= '0' && *string <= '9');
2129
2130 /* To negate the effect of string++ below. */
2131 string--;
2132 length++;
2133 }
2134 else if (*string == '{')
2135 {
2136 /* Must be a decimal number in braces, e.g: {5} or {38} */
2137 string++;
2138 length--;
2139
2140 /* Syntax error: a decimal number required. */
2141 if (length == 0) return 0;
2142 if (*string < '1' || *string > '9') return 0;
2143
2144 do
2145 {
2146 /* Maximum capture id is 65535. */
2147 if (capture_id <= 65535)
2148 capture_id = capture_id * 10 + (*string - '0');
2149
2150 string++;
2151 length--;
2152
2153 /* Syntax error: no more characters */
2154 if (length == 0) return 0;
2155 }
2156 while (*string >= '0' && *string <= '9');
2157
2158 /* Syntax error: closing brace is missing. */
2159 if (*string != '}') return 0;
2160 }
2161
2162 if (capture_id > 0)
2163 {
2164 if (capture_id < capture_top)
2165 {
2166 capture_id *= 2;
2167 argslen += ovector[capture_id + 1] - ovector[capture_id];
2168 }
2169
2170 /* To negate the effect of argslen++ below. */
2171 argslen--;
2172 }
2173 }
2174
2175 string++;
2176 length--;
2177 argslen++;
2178 }
2179
2180 args = (char*)malloc(argslen);
2181 if (args == NULL) return 0;
2182
2183 argsvector = (char**)malloc(argsvectorlen * sizeof(char*));
2184 if (argsvector == NULL)
2185 {
2186 free(args);
2187 return 0;
2188 }
2189
2190 argsptr = args;
2191 argsvectorptr = argsvector;
2192
2193 *argsvectorptr++ = argsptr;
2194
2195 length = calloutptr->callout_string_length;
2196 string = calloutptr->callout_string;
2197
2198 while (length > 0)
2199 {
2200 if (*string == '|')
2201 {
2202 *argsptr++ = '\0';
2203 *argsvectorptr++ = argsptr;
2204 }
2205 else if (*string == '$')
2206 {
2207 string++;
2208 length--;
2209
2210 if ((*string >= '1' && *string <= '9') || *string == '{')
2211 {
2212 PCRE2_SIZE capture_id = 0;
2213
2214 if (*string != '{')
2215 {
2216 do
2217 {
2218 /* Maximum capture id is 65535. */
2219 if (capture_id <= 65535)
2220 capture_id = capture_id * 10 + (*string - '0');
2221
2222 string++;
2223 length--;
2224 }
2225 while (length > 0 && *string >= '0' && *string <= '9');
2226
2227 /* To negate the effect of string++ below. */
2228 string--;
2229 length++;
2230 }
2231 else
2232 {
2233 string++;
2234 length--;
2235
2236 do
2237 {
2238 /* Maximum capture id is 65535. */
2239 if (capture_id <= 65535)
2240 capture_id = capture_id * 10 + (*string - '0');
2241
2242 string++;
2243 length--;
2244 }
2245 while (*string != '}');
2246 }
2247
2248 if (capture_id < capture_top)
2249 {
2250 PCRE2_SIZE capturesize;
2251 capture_id *= 2;
2252
2253 capturesize = ovector[capture_id + 1] - ovector[capture_id];
2254 memcpy(argsptr, subject + ovector[capture_id], capturesize);
2255 argsptr += capturesize;
2256 }
2257 }
2258 else
2259 {
2260 *argsptr++ = *string;
2261 }
2262 }
2263 else
2264 {
2265 *argsptr++ = *string;
2266 }
2267
2268 string++;
2269 length--;
2270 }
2271
2272 *argsptr++ = '\0';
2273 *argsvectorptr = NULL;
2274
2275 #ifdef WIN32
2276 result = _spawnvp(_P_WAIT, argsvector[0], (const char * const *)argsvector);
2277 #else
2278 pid = fork();
2279
2280 if (pid == 0)
2281 {
2282 (void)execv(argsvector[0], argsvector);
2283 /* Control gets here if there is an error, e.g. a non-existent program */
2284 exit(1);
2285 }
2286 else if (pid > 0)
2287 (void)waitpid(pid, &result, 0);
2288 #endif
2289
2290 free(args);
2291 free(argsvector);
2292
2293 /* Currently negative return values are not supported, only zero (match
2294 continues) or non-zero (match fails). */
2295
2296 return result != 0;
2297 }
2298
2299 #endif
2300
2301
2302
2303 /*************************************************
2304 * Read a portion of the file into buffer *
2305 *************************************************/
2306
2307 static int
fill_buffer(void * handle,int frtype,char * buffer,int length,BOOL input_line_buffered)2308 fill_buffer(void *handle, int frtype, char *buffer, int length,
2309 BOOL input_line_buffered)
2310 {
2311 (void)frtype; /* Avoid warning when not used */
2312
2313 #ifdef SUPPORT_LIBZ
2314 if (frtype == FR_LIBZ)
2315 return gzread((gzFile)handle, buffer, length);
2316 else
2317 #endif
2318
2319 #ifdef SUPPORT_LIBBZ2
2320 if (frtype == FR_LIBBZ2)
2321 return BZ2_bzread((BZFILE *)handle, buffer, length);
2322 else
2323 #endif
2324
2325 return (input_line_buffered ?
2326 read_one_line(buffer, length, (FILE *)handle) :
2327 fread(buffer, 1, length, (FILE *)handle));
2328 }
2329
2330
2331
2332 /*************************************************
2333 * Grep an individual file *
2334 *************************************************/
2335
2336 /* This is called from grep_or_recurse() below. It uses a buffer that is three
2337 times the value of bufthird. The matching point is never allowed to stray into
2338 the top third of the buffer, thus keeping more of the file available for
2339 context printing or for multiline scanning. For large files, the pointer will
2340 be in the middle third most of the time, so the bottom third is available for
2341 "before" context printing.
2342
2343 Arguments:
2344 handle the fopened FILE stream for a normal file
2345 the gzFile pointer when reading is via libz
2346 the BZFILE pointer when reading is via libbz2
2347 frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
2348 filename the file name or NULL (for errors)
2349 printname the file name if it is to be printed for each match
2350 or NULL if the file name is not to be printed
2351 it cannot be NULL if filenames[_nomatch]_only is set
2352
2353 Returns: 0 if there was at least one match
2354 1 otherwise (no matches)
2355 2 if an overlong line is encountered
2356 3 if there is a read error on a .bz2 file
2357 */
2358
2359 static int
pcre2grep(void * handle,int frtype,const char * filename,const char * printname)2360 pcre2grep(void *handle, int frtype, const char *filename, const char *printname)
2361 {
2362 int rc = 1;
2363 int filepos = 0;
2364 unsigned long int linenumber = 1;
2365 unsigned long int lastmatchnumber = 0;
2366 unsigned long int count = 0;
2367 char *lastmatchrestart = main_buffer;
2368 char *ptr = main_buffer;
2369 char *endptr;
2370 PCRE2_SIZE bufflength;
2371 BOOL binary = FALSE;
2372 BOOL endhyphenpending = FALSE;
2373 BOOL input_line_buffered = line_buffered;
2374 FILE *in = NULL; /* Ensure initialized */
2375
2376 /* Do the first read into the start of the buffer and set up the pointer to end
2377 of what we have. In the case of libz, a non-zipped .gz file will be read as a
2378 plain file. However, if a .bz2 file isn't actually bzipped, the first read will
2379 fail. */
2380
2381 if (frtype != FR_LIBZ && frtype != FR_LIBBZ2)
2382 {
2383 in = (FILE *)handle;
2384 if (is_file_tty(in)) input_line_buffered = TRUE;
2385 }
2386 else input_line_buffered = FALSE;
2387
2388 bufflength = fill_buffer(handle, frtype, main_buffer, bufsize,
2389 input_line_buffered);
2390
2391 #ifdef SUPPORT_LIBBZ2
2392 if (frtype == FR_LIBBZ2 && (int)bufflength < 0) return 2; /* Gotcha: bufflength is PCRE2_SIZE; */
2393 #endif
2394
2395 endptr = main_buffer + bufflength;
2396
2397 /* Unless binary-files=text, see if we have a binary file. This uses the same
2398 rule as GNU grep, namely, a search for a binary zero byte near the start of the
2399 file. However, when the newline convention is binary zero, we can't do this. */
2400
2401 if (binary_files != BIN_TEXT)
2402 {
2403 if (endlinetype != PCRE2_NEWLINE_NUL)
2404 binary = memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength)
2405 != NULL;
2406 if (binary && binary_files == BIN_NOMATCH) return 1;
2407 }
2408
2409 /* Loop while the current pointer is not at the end of the file. For large
2410 files, endptr will be at the end of the buffer when we are in the middle of the
2411 file, but ptr will never get there, because as soon as it gets over 2/3 of the
2412 way, the buffer is shifted left and re-filled. */
2413
2414 while (ptr < endptr)
2415 {
2416 int endlinelength;
2417 int mrc = 0;
2418 unsigned int options = 0;
2419 BOOL match;
2420 char *t = ptr;
2421 PCRE2_SIZE length, linelength;
2422 PCRE2_SIZE startoffset = 0;
2423
2424 /* At this point, ptr is at the start of a line. We need to find the length
2425 of the subject string to pass to pcre2_match(). In multiline mode, it is the
2426 length remainder of the data in the buffer. Otherwise, it is the length of
2427 the next line, excluding the terminating newline. After matching, we always
2428 advance by the length of the next line. In multiline mode the PCRE2_FIRSTLINE
2429 option is used for compiling, so that any match is constrained to be in the
2430 first line. */
2431
2432 t = end_of_line(t, endptr, &endlinelength);
2433 linelength = t - ptr - endlinelength;
2434 length = multiline? (PCRE2_SIZE)(endptr - ptr) : linelength;
2435
2436 /* Check to see if the line we are looking at extends right to the very end
2437 of the buffer without a line terminator. This means the line is too long to
2438 handle at the current buffer size. Until the buffer reaches its maximum size,
2439 try doubling it and reading more data. */
2440
2441 if (endlinelength == 0 && t == main_buffer + bufsize)
2442 {
2443 if (bufthird < max_bufthird)
2444 {
2445 char *new_buffer;
2446 int new_bufthird = 2*bufthird;
2447
2448 if (new_bufthird > max_bufthird) new_bufthird = max_bufthird;
2449 new_buffer = (char *)malloc(3*new_bufthird);
2450
2451 if (new_buffer == NULL)
2452 {
2453 fprintf(stderr,
2454 "pcre2grep: line %lu%s%s is too long for the internal buffer\n"
2455 "pcre2grep: not enough memory to increase the buffer size to %d\n",
2456 linenumber,
2457 (filename == NULL)? "" : " of file ",
2458 (filename == NULL)? "" : filename,
2459 new_bufthird);
2460 return 2;
2461 }
2462
2463 /* Copy the data and adjust pointers to the new buffer location. */
2464
2465 memcpy(new_buffer, main_buffer, bufsize);
2466 bufthird = new_bufthird;
2467 bufsize = 3*bufthird;
2468 ptr = new_buffer + (ptr - main_buffer);
2469 lastmatchrestart = new_buffer + (lastmatchrestart - main_buffer);
2470 free(main_buffer);
2471 main_buffer = new_buffer;
2472
2473 /* Read more data into the buffer and then try to find the line ending
2474 again. */
2475
2476 bufflength += fill_buffer(handle, frtype, main_buffer + bufflength,
2477 bufsize - bufflength, input_line_buffered);
2478 endptr = main_buffer + bufflength;
2479 continue;
2480 }
2481 else
2482 {
2483 fprintf(stderr,
2484 "pcre2grep: line %lu%s%s is too long for the internal buffer\n"
2485 "pcre2grep: the maximum buffer size is %d\n"
2486 "pcre2grep: use the --max-buffer-size option to change it\n",
2487 linenumber,
2488 (filename == NULL)? "" : " of file ",
2489 (filename == NULL)? "" : filename,
2490 bufthird);
2491 return 2;
2492 }
2493 }
2494
2495 /* Extra processing for Jeffrey Friedl's debugging. */
2496
2497 #ifdef JFRIEDL_DEBUG
2498 if (jfriedl_XT || jfriedl_XR)
2499 {
2500 # include <sys/time.h>
2501 # include <time.h>
2502 struct timeval start_time, end_time;
2503 struct timezone dummy;
2504 int i;
2505
2506 if (jfriedl_XT)
2507 {
2508 unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
2509 const char *orig = ptr;
2510 ptr = malloc(newlen + 1);
2511 if (!ptr) {
2512 printf("out of memory");
2513 pcre2grep_exit(2);
2514 }
2515 endptr = ptr;
2516 strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
2517 for (i = 0; i < jfriedl_XT; i++) {
2518 strncpy(endptr, orig, length);
2519 endptr += length;
2520 }
2521 strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
2522 length = newlen;
2523 }
2524
2525 if (gettimeofday(&start_time, &dummy) != 0)
2526 perror("bad gettimeofday");
2527
2528
2529 for (i = 0; i < jfriedl_XR; i++)
2530 match = (pcre_exec(patterns->compiled, patterns->hint, ptr, length, 0,
2531 PCRE2_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
2532
2533 if (gettimeofday(&end_time, &dummy) != 0)
2534 perror("bad gettimeofday");
2535
2536 double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
2537 -
2538 (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
2539
2540 printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
2541 return 0;
2542 }
2543 #endif
2544
2545 /* We come back here after a match when only_matching_count is non-zero, in
2546 order to find any further matches in the same line. This applies to
2547 --only-matching, --file-offsets, and --line-offsets. */
2548
2549 ONLY_MATCHING_RESTART:
2550
2551 /* Run through all the patterns until one matches or there is an error other
2552 than NOMATCH. This code is in a subroutine so that it can be re-used for
2553 finding subsequent matches when colouring matched lines. After finding one
2554 match, set PCRE2_NOTEMPTY to disable any further matches of null strings in
2555 this line. */
2556
2557 match = match_patterns(ptr, length, options, startoffset, &mrc);
2558 options = PCRE2_NOTEMPTY;
2559
2560 /* If it's a match or a not-match (as required), do what's wanted. NOTE: Use
2561 only FWRITE_IGNORE() - which is just a packaged fwrite() that ignores its
2562 return code - to output data lines, so that binary zeroes are treated as just
2563 another data character. */
2564
2565 if (match != invert)
2566 {
2567 BOOL hyphenprinted = FALSE;
2568
2569 /* We've failed if we want a file that doesn't have any matches. */
2570
2571 if (filenames == FN_NOMATCH_ONLY) return 1;
2572
2573 /* If all we want is a yes/no answer, we can return immediately. */
2574
2575 if (quiet) return 0;
2576
2577 /* Just count if just counting is wanted. */
2578
2579 else if (count_only || show_total_count) count++;
2580
2581 /* When handling a binary file and binary-files==binary, the "binary"
2582 variable will be set true (it's false in all other cases). In this
2583 situation we just want to output the file name. No need to scan further. */
2584
2585 else if (binary)
2586 {
2587 fprintf(stdout, "Binary file %s matches" STDOUT_NL, filename);
2588 return 0;
2589 }
2590
2591 /* Likewise, if all we want is a file name, there is no need to scan any
2592 more lines in the file. */
2593
2594 else if (filenames == FN_MATCH_ONLY)
2595 {
2596 fprintf(stdout, "%s" STDOUT_NL, printname);
2597 return 0;
2598 }
2599
2600 /* The --only-matching option prints just the substring that matched,
2601 and/or one or more captured portions of it, as long as these strings are
2602 not empty. The --file-offsets and --line-offsets options output offsets for
2603 the matching substring (all three set only_matching_count non-zero). None
2604 of these mutually exclusive options prints any context. Afterwards, adjust
2605 the start and then jump back to look for further matches in the same line.
2606 If we are in invert mode, however, nothing is printed and we do not restart
2607 - this could still be useful because the return code is set. */
2608
2609 else if (only_matching_count != 0)
2610 {
2611 if (!invert)
2612 {
2613 PCRE2_SIZE oldstartoffset;
2614
2615 if (printname != NULL) fprintf(stdout, "%s:", printname);
2616 if (number) fprintf(stdout, "%lu:", linenumber);
2617
2618 /* Handle --line-offsets */
2619
2620 if (line_offsets)
2621 fprintf(stdout, "%d,%d" STDOUT_NL, (int)(ptr + offsets[0] - ptr),
2622 (int)(offsets[1] - offsets[0]));
2623
2624 /* Handle --file-offsets */
2625
2626 else if (file_offsets)
2627 fprintf(stdout, "%d,%d" STDOUT_NL,
2628 (int)(filepos + ptr + offsets[0] - ptr),
2629 (int)(offsets[1] - offsets[0]));
2630
2631 /* Handle --output (which has already been syntax checked) */
2632
2633 else if (output_text != NULL)
2634 {
2635 if (display_output_text((PCRE2_SPTR)output_text, FALSE,
2636 (PCRE2_SPTR)ptr, offsets, mrc) || printname != NULL ||
2637 number)
2638 fprintf(stdout, STDOUT_NL);
2639 }
2640
2641 /* Handle --only-matching, which may occur many times */
2642
2643 else
2644 {
2645 BOOL printed = FALSE;
2646 omstr *om;
2647
2648 for (om = only_matching; om != NULL; om = om->next)
2649 {
2650 int n = om->groupnum;
2651 if (n < mrc)
2652 {
2653 int plen = offsets[2*n + 1] - offsets[2*n];
2654 if (plen > 0)
2655 {
2656 if (printed && om_separator != NULL)
2657 fprintf(stdout, "%s", om_separator);
2658 print_match(ptr + offsets[n*2], plen);
2659 printed = TRUE;
2660 }
2661 }
2662 }
2663
2664 if (printed || printname != NULL || number)
2665 fprintf(stdout, STDOUT_NL);
2666 }
2667
2668 /* Prepare to repeat to find the next match in the line. */
2669
2670 match = FALSE;
2671 if (line_buffered) fflush(stdout);
2672 rc = 0; /* Had some success */
2673
2674 /* If the pattern contained a lookbehind that included \K, it is
2675 possible that the end of the match might be at or before the actual
2676 starting offset we have just used. In this case, start one character
2677 further on. */
2678
2679 startoffset = offsets[1]; /* Restart after the match */
2680 oldstartoffset = pcre2_get_startchar(match_data);
2681 if (startoffset <= oldstartoffset)
2682 {
2683 if (startoffset >= length) goto END_ONE_MATCH; /* Were at end */
2684 startoffset = oldstartoffset + 1;
2685 if (utf) while ((ptr[startoffset] & 0xc0) == 0x80) startoffset++;
2686 }
2687
2688 /* If the current match ended past the end of the line (only possible
2689 in multiline mode), we must move on to the line in which it did end
2690 before searching for more matches. */
2691
2692 while (startoffset > linelength)
2693 {
2694 ptr += linelength + endlinelength;
2695 filepos += (int)(linelength + endlinelength);
2696 linenumber++;
2697 startoffset -= (int)(linelength + endlinelength);
2698 t = end_of_line(ptr, endptr, &endlinelength);
2699 linelength = t - ptr - endlinelength;
2700 length = (PCRE2_SIZE)(endptr - ptr);
2701 }
2702
2703 goto ONLY_MATCHING_RESTART;
2704 }
2705 }
2706
2707 /* This is the default case when none of the above options is set. We print
2708 the matching lines(s), possibly preceded and/or followed by other lines of
2709 context. */
2710
2711 else
2712 {
2713 /* See if there is a requirement to print some "after" lines from a
2714 previous match. We never print any overlaps. */
2715
2716 if (after_context > 0 && lastmatchnumber > 0)
2717 {
2718 int ellength;
2719 int linecount = 0;
2720 char *p = lastmatchrestart;
2721
2722 while (p < ptr && linecount < after_context)
2723 {
2724 p = end_of_line(p, ptr, &ellength);
2725 linecount++;
2726 }
2727
2728 /* It is important to advance lastmatchrestart during this printing so
2729 that it interacts correctly with any "before" printing below. Print
2730 each line's data using fwrite() in case there are binary zeroes. */
2731
2732 while (lastmatchrestart < p)
2733 {
2734 char *pp = lastmatchrestart;
2735 if (printname != NULL) fprintf(stdout, "%s-", printname);
2736 if (number) fprintf(stdout, "%lu-", lastmatchnumber++);
2737 pp = end_of_line(pp, endptr, &ellength);
2738 FWRITE_IGNORE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
2739 lastmatchrestart = pp;
2740 }
2741 if (lastmatchrestart != ptr) hyphenpending = TRUE;
2742 }
2743
2744 /* If there were non-contiguous lines printed above, insert hyphens. */
2745
2746 if (hyphenpending)
2747 {
2748 fprintf(stdout, "--" STDOUT_NL);
2749 hyphenpending = FALSE;
2750 hyphenprinted = TRUE;
2751 }
2752
2753 /* See if there is a requirement to print some "before" lines for this
2754 match. Again, don't print overlaps. */
2755
2756 if (before_context > 0)
2757 {
2758 int linecount = 0;
2759 char *p = ptr;
2760
2761 while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
2762 linecount < before_context)
2763 {
2764 linecount++;
2765 p = previous_line(p, main_buffer);
2766 }
2767
2768 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
2769 fprintf(stdout, "--" STDOUT_NL);
2770
2771 while (p < ptr)
2772 {
2773 int ellength;
2774 char *pp = p;
2775 if (printname != NULL) fprintf(stdout, "%s-", printname);
2776 if (number) fprintf(stdout, "%lu-", linenumber - linecount--);
2777 pp = end_of_line(pp, endptr, &ellength);
2778 FWRITE_IGNORE(p, 1, pp - p, stdout);
2779 p = pp;
2780 }
2781 }
2782
2783 /* Now print the matching line(s); ensure we set hyphenpending at the end
2784 of the file if any context lines are being output. */
2785
2786 if (after_context > 0 || before_context > 0)
2787 endhyphenpending = TRUE;
2788
2789 if (printname != NULL) fprintf(stdout, "%s:", printname);
2790 if (number) fprintf(stdout, "%lu:", linenumber);
2791
2792 /* This extra option, for Jeffrey Friedl's debugging requirements,
2793 replaces the matched string, or a specific captured string if it exists,
2794 with X. When this happens, colouring is ignored. */
2795
2796 #ifdef JFRIEDL_DEBUG
2797 if (S_arg >= 0 && S_arg < mrc)
2798 {
2799 int first = S_arg * 2;
2800 int last = first + 1;
2801 FWRITE_IGNORE(ptr, 1, offsets[first], stdout);
2802 fprintf(stdout, "X");
2803 FWRITE_IGNORE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
2804 }
2805 else
2806 #endif
2807
2808 /* In multiline mode, or if colouring, we have to split the line(s) up
2809 and search for further matches, but not of course if the line is a
2810 non-match. In multiline mode this is necessary in case there is another
2811 match that spans the end of the current line. When colouring we want to
2812 colour all matches. */
2813
2814 if ((multiline || do_colour) && !invert)
2815 {
2816 int plength;
2817 PCRE2_SIZE endprevious;
2818
2819 /* The use of \K may make the end offset earlier than the start. In
2820 this situation, swap them round. */
2821
2822 if (offsets[0] > offsets[1])
2823 {
2824 PCRE2_SIZE temp = offsets[0];
2825 offsets[0] = offsets[1];
2826 offsets[1] = temp;
2827 }
2828
2829 FWRITE_IGNORE(ptr, 1, offsets[0], stdout);
2830 print_match(ptr + offsets[0], offsets[1] - offsets[0]);
2831
2832 for (;;)
2833 {
2834 PCRE2_SIZE oldstartoffset = pcre2_get_startchar(match_data);
2835
2836 endprevious = offsets[1];
2837 startoffset = endprevious; /* Advance after previous match. */
2838
2839 /* If the pattern contained a lookbehind that included \K, it is
2840 possible that the end of the match might be at or before the actual
2841 starting offset we have just used. In this case, start one character
2842 further on. */
2843
2844 if (startoffset <= oldstartoffset)
2845 {
2846 startoffset = oldstartoffset + 1;
2847 if (utf) while ((ptr[startoffset] & 0xc0) == 0x80) startoffset++;
2848 }
2849
2850 /* If the current match ended past the end of the line (only possible
2851 in multiline mode), we must move on to the line in which it did end
2852 before searching for more matches. Because the PCRE2_FIRSTLINE option
2853 is set, the start of the match will always be before the first
2854 newline sequence. */
2855
2856 while (startoffset > linelength + endlinelength)
2857 {
2858 ptr += linelength + endlinelength;
2859 filepos += (int)(linelength + endlinelength);
2860 linenumber++;
2861 startoffset -= (int)(linelength + endlinelength);
2862 endprevious -= (int)(linelength + endlinelength);
2863 t = end_of_line(ptr, endptr, &endlinelength);
2864 linelength = t - ptr - endlinelength;
2865 length = (PCRE2_SIZE)(endptr - ptr);
2866 }
2867
2868 /* If startoffset is at the exact end of the line it means this
2869 complete line was the final part of the match, so there is nothing
2870 more to do. */
2871
2872 if (startoffset == linelength + endlinelength) break;
2873
2874 /* Otherwise, run a match from within the final line, and if found,
2875 loop for any that may follow. */
2876
2877 if (!match_patterns(ptr, length, options, startoffset, &mrc)) break;
2878
2879 /* The use of \K may make the end offset earlier than the start. In
2880 this situation, swap them round. */
2881
2882 if (offsets[0] > offsets[1])
2883 {
2884 PCRE2_SIZE temp = offsets[0];
2885 offsets[0] = offsets[1];
2886 offsets[1] = temp;
2887 }
2888
2889 FWRITE_IGNORE(ptr + endprevious, 1, offsets[0] - endprevious, stdout);
2890 print_match(ptr + offsets[0], offsets[1] - offsets[0]);
2891 }
2892
2893 /* In multiline mode, we may have already printed the complete line
2894 and its line-ending characters (if they matched the pattern), so there
2895 may be no more to print. */
2896
2897 plength = (int)((linelength + endlinelength) - endprevious);
2898 if (plength > 0) FWRITE_IGNORE(ptr + endprevious, 1, plength, stdout);
2899 }
2900
2901 /* Not colouring or multiline; no need to search for further matches. */
2902
2903 else FWRITE_IGNORE(ptr, 1, linelength + endlinelength, stdout);
2904 }
2905
2906 /* End of doing what has to be done for a match. If --line-buffered was
2907 given, flush the output. */
2908
2909 if (line_buffered) fflush(stdout);
2910 rc = 0; /* Had some success */
2911
2912 /* Remember where the last match happened for after_context. We remember
2913 where we are about to restart, and that line's number. */
2914
2915 lastmatchrestart = ptr + linelength + endlinelength;
2916 lastmatchnumber = linenumber + 1;
2917 }
2918
2919 /* For a match in multiline inverted mode (which of course did not cause
2920 anything to be printed), we have to move on to the end of the match before
2921 proceeding. */
2922
2923 if (multiline && invert && match)
2924 {
2925 int ellength;
2926 char *endmatch = ptr + offsets[1];
2927 t = ptr;
2928 while (t < endmatch)
2929 {
2930 t = end_of_line(t, endptr, &ellength);
2931 if (t <= endmatch) linenumber++; else break;
2932 }
2933 endmatch = end_of_line(endmatch, endptr, &ellength);
2934 linelength = endmatch - ptr - ellength;
2935 }
2936
2937 /* Advance to after the newline and increment the line number. The file
2938 offset to the current line is maintained in filepos. */
2939
2940 END_ONE_MATCH:
2941 ptr += linelength + endlinelength;
2942 filepos += (int)(linelength + endlinelength);
2943 linenumber++;
2944
2945 /* If input is line buffered, and the buffer is not yet full, read another
2946 line and add it into the buffer. */
2947
2948 if (input_line_buffered && bufflength < (PCRE2_SIZE)bufsize)
2949 {
2950 int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
2951 bufflength += add;
2952 endptr += add;
2953 }
2954
2955 /* If we haven't yet reached the end of the file (the buffer is full), and
2956 the current point is in the top 1/3 of the buffer, slide the buffer down by
2957 1/3 and refill it. Before we do this, if some unprinted "after" lines are
2958 about to be lost, print them. */
2959
2960 if (bufflength >= (PCRE2_SIZE)bufsize && ptr > main_buffer + 2*bufthird)
2961 {
2962 if (after_context > 0 &&
2963 lastmatchnumber > 0 &&
2964 lastmatchrestart < main_buffer + bufthird)
2965 {
2966 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
2967 lastmatchnumber = 0; /* Indicates no after lines pending */
2968 }
2969
2970 /* Now do the shuffle */
2971
2972 (void)memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
2973 ptr -= bufthird;
2974
2975 bufflength = 2*bufthird + fill_buffer(handle, frtype,
2976 main_buffer + 2*bufthird, bufthird, input_line_buffered);
2977 endptr = main_buffer + bufflength;
2978
2979 /* Adjust any last match point */
2980
2981 if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
2982 }
2983 } /* Loop through the whole file */
2984
2985 /* End of file; print final "after" lines if wanted; do_after_lines sets
2986 hyphenpending if it prints something. */
2987
2988 if (only_matching_count == 0 && !(count_only|show_total_count))
2989 {
2990 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
2991 hyphenpending |= endhyphenpending;
2992 }
2993
2994 /* Print the file name if we are looking for those without matches and there
2995 were none. If we found a match, we won't have got this far. */
2996
2997 if (filenames == FN_NOMATCH_ONLY)
2998 {
2999 fprintf(stdout, "%s" STDOUT_NL, printname);
3000 return 0;
3001 }
3002
3003 /* Print the match count if wanted */
3004
3005 if (count_only && !quiet)
3006 {
3007 if (count > 0 || !omit_zero_count)
3008 {
3009 if (printname != NULL && filenames != FN_NONE)
3010 fprintf(stdout, "%s:", printname);
3011 fprintf(stdout, "%lu" STDOUT_NL, count);
3012 counts_printed++;
3013 }
3014 }
3015
3016 total_count += count; /* Can be set without count_only */
3017 return rc;
3018 }
3019
3020
3021
3022 /*************************************************
3023 * Grep a file or recurse into a directory *
3024 *************************************************/
3025
3026 /* Given a path name, if it's a directory, scan all the files if we are
3027 recursing; if it's a file, grep it.
3028
3029 Arguments:
3030 pathname the path to investigate
3031 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
3032 only_one_at_top TRUE if the path is the only one at toplevel
3033
3034 Returns: -1 the file/directory was skipped
3035 0 if there was at least one match
3036 1 if there were no matches
3037 2 there was some kind of error
3038
3039 However, file opening failures are suppressed if "silent" is set.
3040 */
3041
3042 static int
grep_or_recurse(char * pathname,BOOL dir_recurse,BOOL only_one_at_top)3043 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
3044 {
3045 int rc = 1;
3046 int frtype;
3047 void *handle;
3048 char *lastcomp;
3049 FILE *in = NULL; /* Ensure initialized */
3050
3051 #ifdef SUPPORT_LIBZ
3052 gzFile ingz = NULL;
3053 #endif
3054
3055 #ifdef SUPPORT_LIBBZ2
3056 BZFILE *inbz2 = NULL;
3057 #endif
3058
3059 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
3060 int pathlen;
3061 #endif
3062
3063 #if defined NATIVE_ZOS
3064 int zos_type;
3065 FILE *zos_test_file;
3066 #endif
3067
3068 /* If the file name is "-" we scan stdin */
3069
3070 if (strcmp(pathname, "-") == 0)
3071 {
3072 return pcre2grep(stdin, FR_PLAIN, stdin_name,
3073 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
3074 stdin_name : NULL);
3075 }
3076
3077 /* Inclusion and exclusion: --include-dir and --exclude-dir apply only to
3078 directories, whereas --include and --exclude apply to everything else. The test
3079 is against the final component of the path. */
3080
3081 lastcomp = strrchr(pathname, FILESEP);
3082 lastcomp = (lastcomp == NULL)? pathname : lastcomp + 1;
3083
3084 /* If the file is a directory, skip if not recursing or if explicitly excluded.
3085 Otherwise, scan the directory and recurse for each path within it. The scanning
3086 code is localized so it can be made system-specific. */
3087
3088
3089 /* For z/OS, determine the file type. */
3090
3091 #if defined NATIVE_ZOS
3092 zos_test_file = fopen(pathname,"rb");
3093
3094 if (zos_test_file == NULL)
3095 {
3096 if (!silent) fprintf(stderr, "pcre2grep: failed to test next file %s\n",
3097 pathname, strerror(errno));
3098 return -1;
3099 }
3100 zos_type = identifyzosfiletype (zos_test_file);
3101 fclose (zos_test_file);
3102
3103 /* Handle a PDS in separate code */
3104
3105 if (zos_type == __ZOS_PDS || zos_type == __ZOS_PDSE)
3106 {
3107 return travelonpdsdir (pathname, only_one_at_top);
3108 }
3109
3110 /* Deal with regular files in the normal way below. These types are:
3111 zos_type == __ZOS_PDS_MEMBER
3112 zos_type == __ZOS_PS
3113 zos_type == __ZOS_VSAM_KSDS
3114 zos_type == __ZOS_VSAM_ESDS
3115 zos_type == __ZOS_VSAM_RRDS
3116 */
3117
3118 /* Handle a z/OS directory using common code. */
3119
3120 else if (zos_type == __ZOS_HFS)
3121 {
3122 #endif /* NATIVE_ZOS */
3123
3124
3125 /* Handle directories: common code for all OS */
3126
3127 if (isdirectory(pathname))
3128 {
3129 if (dee_action == dee_SKIP ||
3130 !test_incexc(lastcomp, include_dir_patterns, exclude_dir_patterns))
3131 return -1;
3132
3133 if (dee_action == dee_RECURSE)
3134 {
3135 char buffer[FNBUFSIZ];
3136 char *nextfile;
3137 directory_type *dir = opendirectory(pathname);
3138
3139 if (dir == NULL)
3140 {
3141 if (!silent)
3142 fprintf(stderr, "pcre2grep: Failed to open directory %s: %s\n", pathname,
3143 strerror(errno));
3144 return 2;
3145 }
3146
3147 while ((nextfile = readdirectory(dir)) != NULL)
3148 {
3149 int frc;
3150 int fnlength = strlen(pathname) + strlen(nextfile) + 2;
3151 if (fnlength > FNBUFSIZ)
3152 {
3153 fprintf(stderr, "pcre2grep: recursive filename is too long\n");
3154 rc = 2;
3155 break;
3156 }
3157 sprintf(buffer, "%s%c%s", pathname, FILESEP, nextfile);
3158 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
3159 if (frc > 1) rc = frc;
3160 else if (frc == 0 && rc == 1) rc = 0;
3161 }
3162
3163 closedirectory(dir);
3164 return rc;
3165 }
3166 }
3167
3168 #ifdef WIN32
3169 if (iswild(pathname))
3170 {
3171 char buffer[1024];
3172 char *nextfile;
3173 char *name;
3174 directory_type *dir = opendirectory(pathname);
3175
3176 if (dir == NULL)
3177 return 0;
3178
3179 for (nextfile = name = pathname; *nextfile != 0; nextfile++)
3180 if (*nextfile == '/' || *nextfile == '\\')
3181 name = nextfile + 1;
3182 *name = 0;
3183
3184 while ((nextfile = readdirectory(dir)) != NULL)
3185 {
3186 int frc;
3187 sprintf(buffer, "%.512s%.128s", pathname, nextfile);
3188 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
3189 if (frc > 1) rc = frc;
3190 else if (frc == 0 && rc == 1) rc = 0;
3191 }
3192
3193 closedirectory(dir);
3194 return rc;
3195 }
3196 #endif
3197
3198 #if defined NATIVE_ZOS
3199 }
3200 #endif
3201
3202 /* If the file is not a directory, check for a regular file, and if it is not,
3203 skip it if that's been requested. Otherwise, check for an explicit inclusion or
3204 exclusion. */
3205
3206 else if (
3207 #if defined NATIVE_ZOS
3208 (zos_type == __ZOS_NOFILE && DEE_action == DEE_SKIP) ||
3209 #else /* all other OS */
3210 (!isregfile(pathname) && DEE_action == DEE_SKIP) ||
3211 #endif
3212 !test_incexc(lastcomp, include_patterns, exclude_patterns))
3213 return -1; /* File skipped */
3214
3215 /* Control reaches here if we have a regular file, or if we have a directory
3216 and recursion or skipping was not requested, or if we have anything else and
3217 skipping was not requested. The scan proceeds. If this is the first and only
3218 argument at top level, we don't show the file name, unless we are only showing
3219 the file name, or the filename was forced (-H). */
3220
3221 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
3222 pathlen = (int)(strlen(pathname));
3223 #endif
3224
3225 /* Open using zlib if it is supported and the file name ends with .gz. */
3226
3227 #ifdef SUPPORT_LIBZ
3228 if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
3229 {
3230 ingz = gzopen(pathname, "rb");
3231 if (ingz == NULL)
3232 {
3233 if (!silent)
3234 fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", pathname,
3235 strerror(errno));
3236 return 2;
3237 }
3238 handle = (void *)ingz;
3239 frtype = FR_LIBZ;
3240 }
3241 else
3242 #endif
3243
3244 /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
3245
3246 #ifdef SUPPORT_LIBBZ2
3247 if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
3248 {
3249 inbz2 = BZ2_bzopen(pathname, "rb");
3250 handle = (void *)inbz2;
3251 frtype = FR_LIBBZ2;
3252 }
3253 else
3254 #endif
3255
3256 /* Otherwise use plain fopen(). The label is so that we can come back here if
3257 an attempt to read a .bz2 file indicates that it really is a plain file. */
3258
3259 #ifdef SUPPORT_LIBBZ2
3260 PLAIN_FILE:
3261 #endif
3262 {
3263 in = fopen(pathname, "rb");
3264 handle = (void *)in;
3265 frtype = FR_PLAIN;
3266 }
3267
3268 /* All the opening methods return errno when they fail. */
3269
3270 if (handle == NULL)
3271 {
3272 if (!silent)
3273 fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", pathname,
3274 strerror(errno));
3275 return 2;
3276 }
3277
3278 /* Now grep the file */
3279
3280 rc = pcre2grep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
3281 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
3282
3283 /* Close in an appropriate manner. */
3284
3285 #ifdef SUPPORT_LIBZ
3286 if (frtype == FR_LIBZ)
3287 gzclose(ingz);
3288 else
3289 #endif
3290
3291 /* If it is a .bz2 file and the result is 3, it means that the first attempt to
3292 read failed. If the error indicates that the file isn't in fact bzipped, try
3293 again as a normal file. */
3294
3295 #ifdef SUPPORT_LIBBZ2
3296 if (frtype == FR_LIBBZ2)
3297 {
3298 if (rc == 3)
3299 {
3300 int errnum;
3301 const char *err = BZ2_bzerror(inbz2, &errnum);
3302 if (errnum == BZ_DATA_ERROR_MAGIC)
3303 {
3304 BZ2_bzclose(inbz2);
3305 goto PLAIN_FILE;
3306 }
3307 else if (!silent)
3308 fprintf(stderr, "pcre2grep: Failed to read %s using bzlib: %s\n",
3309 pathname, err);
3310 rc = 2; /* The normal "something went wrong" code */
3311 }
3312 BZ2_bzclose(inbz2);
3313 }
3314 else
3315 #endif
3316
3317 /* Normal file close */
3318
3319 fclose(in);
3320
3321 /* Pass back the yield from pcre2grep(). */
3322
3323 return rc;
3324 }
3325
3326
3327
3328 /*************************************************
3329 * Handle a single-letter, no data option *
3330 *************************************************/
3331
3332 static int
handle_option(int letter,int options)3333 handle_option(int letter, int options)
3334 {
3335 switch(letter)
3336 {
3337 case N_FOFFSETS: file_offsets = TRUE; break;
3338 case N_HELP: help(); pcre2grep_exit(0); break; /* Stops compiler warning */
3339 case N_LBUFFER: line_buffered = TRUE; break;
3340 case N_LOFFSETS: line_offsets = number = TRUE; break;
3341 case N_NOJIT: use_jit = FALSE; break;
3342 case 'a': binary_files = BIN_TEXT; break;
3343 case 'c': count_only = TRUE; break;
3344 case 'F': options |= PCRE2_LITERAL; break;
3345 case 'H': filenames = FN_FORCE; break;
3346 case 'I': binary_files = BIN_NOMATCH; break;
3347 case 'h': filenames = FN_NONE; break;
3348 case 'i': options |= PCRE2_CASELESS; break;
3349 case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
3350 case 'L': filenames = FN_NOMATCH_ONLY; break;
3351 case 'M': multiline = TRUE; options |= PCRE2_MULTILINE|PCRE2_FIRSTLINE; break;
3352 case 'n': number = TRUE; break;
3353
3354 case 'o':
3355 only_matching_last = add_number(0, only_matching_last);
3356 if (only_matching == NULL) only_matching = only_matching_last;
3357 break;
3358
3359 case 'q': quiet = TRUE; break;
3360 case 'r': dee_action = dee_RECURSE; break;
3361 case 's': silent = TRUE; break;
3362 case 't': show_total_count = TRUE; break;
3363 case 'u': options |= PCRE2_UTF; utf = TRUE; break;
3364 case 'v': invert = TRUE; break;
3365 case 'w': extra_options |= PCRE2_EXTRA_MATCH_WORD; break;
3366 case 'x': extra_options |= PCRE2_EXTRA_MATCH_LINE; break;
3367
3368 case 'V':
3369 {
3370 unsigned char buffer[128];
3371 (void)pcre2_config(PCRE2_CONFIG_VERSION, buffer);
3372 fprintf(stdout, "pcre2grep version %s" STDOUT_NL, buffer);
3373 }
3374 pcre2grep_exit(0);
3375 break;
3376
3377 default:
3378 fprintf(stderr, "pcre2grep: Unknown option -%c\n", letter);
3379 pcre2grep_exit(usage(2));
3380 }
3381
3382 return options;
3383 }
3384
3385
3386
3387 /*************************************************
3388 * Construct printed ordinal *
3389 *************************************************/
3390
3391 /* This turns a number into "1st", "3rd", etc. */
3392
3393 static char *
ordin(int n)3394 ordin(int n)
3395 {
3396 static char buffer[14];
3397 char *p = buffer;
3398 sprintf(p, "%d", n);
3399 while (*p != 0) p++;
3400 n %= 100;
3401 if (n >= 11 && n <= 13) n = 0;
3402 switch (n%10)
3403 {
3404 case 1: strcpy(p, "st"); break;
3405 case 2: strcpy(p, "nd"); break;
3406 case 3: strcpy(p, "rd"); break;
3407 default: strcpy(p, "th"); break;
3408 }
3409 return buffer;
3410 }
3411
3412
3413
3414 /*************************************************
3415 * Compile a single pattern *
3416 *************************************************/
3417
3418 /* Do nothing if the pattern has already been compiled. This is the case for
3419 include/exclude patterns read from a file.
3420
3421 When the -F option has been used, each "pattern" may be a list of strings,
3422 separated by line breaks. They will be matched literally. We split such a
3423 string and compile the first substring, inserting an additional block into the
3424 pattern chain.
3425
3426 Arguments:
3427 p points to the pattern block
3428 options the PCRE options
3429 fromfile TRUE if the pattern was read from a file
3430 fromtext file name or identifying text (e.g. "include")
3431 count 0 if this is the only command line pattern, or
3432 number of the command line pattern, or
3433 linenumber for a pattern from a file
3434
3435 Returns: TRUE on success, FALSE after an error
3436 */
3437
3438 static BOOL
compile_pattern(patstr * p,int options,int fromfile,const char * fromtext,int count)3439 compile_pattern(patstr *p, int options, int fromfile, const char *fromtext,
3440 int count)
3441 {
3442 char *ps;
3443 int errcode;
3444 PCRE2_SIZE patlen, erroffset;
3445 PCRE2_UCHAR errmessbuffer[ERRBUFSIZ];
3446
3447 if (p->compiled != NULL) return TRUE;
3448 ps = p->string;
3449 patlen = p->length;
3450
3451 if ((options & PCRE2_LITERAL) != 0)
3452 {
3453 int ellength;
3454 char *eop = ps + patlen;
3455 char *pe = end_of_line(ps, eop, &ellength);
3456
3457 if (ellength != 0)
3458 {
3459 patlen = pe - ps - ellength;
3460 if (add_pattern(pe, p->length-patlen-ellength, p) == NULL) return FALSE;
3461 }
3462 }
3463
3464 p->compiled = pcre2_compile((PCRE2_SPTR)ps, patlen, options, &errcode,
3465 &erroffset, compile_context);
3466
3467 /* Handle successful compile. Try JIT-compiling if supported and enabled. We
3468 ignore any JIT compiler errors, relying falling back to interpreting if
3469 anything goes wrong with JIT. */
3470
3471 if (p->compiled != NULL)
3472 {
3473 #ifdef SUPPORT_PCRE2GREP_JIT
3474 if (use_jit) (void)pcre2_jit_compile(p->compiled, PCRE2_JIT_COMPLETE);
3475 #endif
3476 return TRUE;
3477 }
3478
3479 /* Handle compile errors */
3480
3481 if (erroffset > patlen) erroffset = patlen;
3482 pcre2_get_error_message(errcode, errmessbuffer, sizeof(errmessbuffer));
3483
3484 if (fromfile)
3485 {
3486 fprintf(stderr, "pcre2grep: Error in regex in line %d of %s "
3487 "at offset %d: %s\n", count, fromtext, (int)erroffset, errmessbuffer);
3488 }
3489 else
3490 {
3491 if (count == 0)
3492 fprintf(stderr, "pcre2grep: Error in %s regex at offset %d: %s\n",
3493 fromtext, (int)erroffset, errmessbuffer);
3494 else
3495 fprintf(stderr, "pcre2grep: Error in %s %s regex at offset %d: %s\n",
3496 ordin(count), fromtext, (int)erroffset, errmessbuffer);
3497 }
3498
3499 return FALSE;
3500 }
3501
3502
3503
3504 /*************************************************
3505 * Read and compile a file of patterns *
3506 *************************************************/
3507
3508 /* This is used for --filelist, --include-from, and --exclude-from.
3509
3510 Arguments:
3511 name the name of the file; "-" is stdin
3512 patptr pointer to the pattern chain anchor
3513 patlastptr pointer to the last pattern pointer
3514
3515 Returns: TRUE if all went well
3516 */
3517
3518 static BOOL
read_pattern_file(char * name,patstr ** patptr,patstr ** patlastptr)3519 read_pattern_file(char *name, patstr **patptr, patstr **patlastptr)
3520 {
3521 int linenumber = 0;
3522 PCRE2_SIZE patlen;
3523 FILE *f;
3524 const char *filename;
3525 char buffer[MAXPATLEN+20];
3526
3527 if (strcmp(name, "-") == 0)
3528 {
3529 f = stdin;
3530 filename = stdin_name;
3531 }
3532 else
3533 {
3534 f = fopen(name, "r");
3535 if (f == NULL)
3536 {
3537 fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", name, strerror(errno));
3538 return FALSE;
3539 }
3540 filename = name;
3541 }
3542
3543 while ((patlen = read_one_line(buffer, sizeof(buffer), f)) > 0)
3544 {
3545 while (patlen > 0 && isspace((unsigned char)(buffer[patlen-1]))) patlen--;
3546 linenumber++;
3547 if (patlen == 0) continue; /* Skip blank lines */
3548
3549 /* Note: this call to add_pattern() puts a pointer to the local variable
3550 "buffer" into the pattern chain. However, that pointer is used only when
3551 compiling the pattern, which happens immediately below, so we flatten it
3552 afterwards, as a precaution against any later code trying to use it. */
3553
3554 *patlastptr = add_pattern(buffer, patlen, *patlastptr);
3555 if (*patlastptr == NULL)
3556 {
3557 if (f != stdin) fclose(f);
3558 return FALSE;
3559 }
3560 if (*patptr == NULL) *patptr = *patlastptr;
3561
3562 /* This loop is needed because compiling a "pattern" when -F is set may add
3563 on additional literal patterns if the original contains a newline. In the
3564 common case, it never will, because read_one_line() stops at a newline.
3565 However, the -N option can be used to give pcre2grep a different newline
3566 setting. */
3567
3568 for(;;)
3569 {
3570 if (!compile_pattern(*patlastptr, pcre2_options, TRUE, filename,
3571 linenumber))
3572 {
3573 if (f != stdin) fclose(f);
3574 return FALSE;
3575 }
3576 (*patlastptr)->string = NULL; /* Insurance */
3577 if ((*patlastptr)->next == NULL) break;
3578 *patlastptr = (*patlastptr)->next;
3579 }
3580 }
3581
3582 if (f != stdin) fclose(f);
3583 return TRUE;
3584 }
3585
3586
3587
3588 /*************************************************
3589 * Main program *
3590 *************************************************/
3591
3592 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
3593
3594 int
main(int argc,char ** argv)3595 main(int argc, char **argv)
3596 {
3597 int i, j;
3598 int rc = 1;
3599 BOOL only_one_at_top;
3600 patstr *cp;
3601 fnstr *fn;
3602 const char *locale_from = "--locale";
3603
3604 #ifdef SUPPORT_PCRE2GREP_JIT
3605 pcre2_jit_stack *jit_stack = NULL;
3606 #endif
3607
3608 /* In Windows, stdout is set up as a text stream, which means that \n is
3609 converted to \r\n. This causes output lines that are copied from the input to
3610 change from ....\r\n to ....\r\r\n, which is not right. We therefore ensure
3611 that stdout is a binary stream. Note that this means all other output to stdout
3612 must use STDOUT_NL to terminate lines. */
3613
3614 #ifdef WIN32
3615 _setmode(_fileno(stdout), _O_BINARY);
3616 #endif
3617
3618 /* Set up a default compile and match contexts and a match data block. */
3619
3620 compile_context = pcre2_compile_context_create(NULL);
3621 match_context = pcre2_match_context_create(NULL);
3622 match_data = pcre2_match_data_create(OFFSET_SIZE, NULL);
3623 offsets = pcre2_get_ovector_pointer(match_data);
3624
3625 /* If string (script) callouts are supported, set up the callout processing
3626 function. */
3627
3628 #ifdef SUPPORT_PCRE2GREP_CALLOUT
3629 pcre2_set_callout(match_context, pcre2grep_callout, NULL);
3630 #endif
3631
3632 /* Process the options */
3633
3634 for (i = 1; i < argc; i++)
3635 {
3636 option_item *op = NULL;
3637 char *option_data = (char *)""; /* default to keep compiler happy */
3638 BOOL longop;
3639 BOOL longopwasequals = FALSE;
3640
3641 if (argv[i][0] != '-') break;
3642
3643 /* If we hit an argument that is just "-", it may be a reference to STDIN,
3644 but only if we have previously had -e or -f to define the patterns. */
3645
3646 if (argv[i][1] == 0)
3647 {
3648 if (pattern_files != NULL || patterns != NULL) break;
3649 else pcre2grep_exit(usage(2));
3650 }
3651
3652 /* Handle a long name option, or -- to terminate the options */
3653
3654 if (argv[i][1] == '-')
3655 {
3656 char *arg = argv[i] + 2;
3657 char *argequals = strchr(arg, '=');
3658
3659 if (*arg == 0) /* -- terminates options */
3660 {
3661 i++;
3662 break; /* out of the options-handling loop */
3663 }
3664
3665 longop = TRUE;
3666
3667 /* Some long options have data that follows after =, for example file=name.
3668 Some options have variations in the long name spelling: specifically, we
3669 allow "regexp" because GNU grep allows it, though I personally go along
3670 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
3671 These options are entered in the table as "regex(p)". Options can be in
3672 both these categories. */
3673
3674 for (op = optionlist; op->one_char != 0; op++)
3675 {
3676 char *opbra = strchr(op->long_name, '(');
3677 char *equals = strchr(op->long_name, '=');
3678
3679 /* Handle options with only one spelling of the name */
3680
3681 if (opbra == NULL) /* Does not contain '(' */
3682 {
3683 if (equals == NULL) /* Not thing=data case */
3684 {
3685 if (strcmp(arg, op->long_name) == 0) break;
3686 }
3687 else /* Special case xxx=data */
3688 {
3689 int oplen = (int)(equals - op->long_name);
3690 int arglen = (argequals == NULL)?
3691 (int)strlen(arg) : (int)(argequals - arg);
3692 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
3693 {
3694 option_data = arg + arglen;
3695 if (*option_data == '=')
3696 {
3697 option_data++;
3698 longopwasequals = TRUE;
3699 }
3700 break;
3701 }
3702 }
3703 }
3704
3705 /* Handle options with an alternate spelling of the name */
3706
3707 else
3708 {
3709 char buff1[24];
3710 char buff2[24];
3711 int ret;
3712
3713 int baselen = (int)(opbra - op->long_name);
3714 int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
3715 int arglen = (argequals == NULL || equals == NULL)?
3716 (int)strlen(arg) : (int)(argequals - arg);
3717
3718 if ((ret = snprintf(buff1, sizeof(buff1), "%.*s", baselen, op->long_name),
3719 ret < 0 || ret > (int)sizeof(buff1)) ||
3720 (ret = snprintf(buff2, sizeof(buff2), "%s%.*s", buff1,
3721 fulllen - baselen - 2, opbra + 1),
3722 ret < 0 || ret > (int)sizeof(buff2)))
3723 {
3724 fprintf(stderr, "pcre2grep: Buffer overflow when parsing %s option\n",
3725 op->long_name);
3726 pcre2grep_exit(2);
3727 }
3728
3729 if (strncmp(arg, buff1, arglen) == 0 ||
3730 strncmp(arg, buff2, arglen) == 0)
3731 {
3732 if (equals != NULL && argequals != NULL)
3733 {
3734 option_data = argequals;
3735 if (*option_data == '=')
3736 {
3737 option_data++;
3738 longopwasequals = TRUE;
3739 }
3740 }
3741 break;
3742 }
3743 }
3744 }
3745
3746 if (op->one_char == 0)
3747 {
3748 fprintf(stderr, "pcre2grep: Unknown option %s\n", argv[i]);
3749 pcre2grep_exit(usage(2));
3750 }
3751 }
3752
3753 /* Jeffrey Friedl's debugging harness uses these additional options which
3754 are not in the right form for putting in the option table because they use
3755 only one hyphen, yet are more than one character long. By putting them
3756 separately here, they will not get displayed as part of the help() output,
3757 but I don't think Jeffrey will care about that. */
3758
3759 #ifdef JFRIEDL_DEBUG
3760 else if (strcmp(argv[i], "-pre") == 0) {
3761 jfriedl_prefix = argv[++i];
3762 continue;
3763 } else if (strcmp(argv[i], "-post") == 0) {
3764 jfriedl_postfix = argv[++i];
3765 continue;
3766 } else if (strcmp(argv[i], "-XT") == 0) {
3767 sscanf(argv[++i], "%d", &jfriedl_XT);
3768 continue;
3769 } else if (strcmp(argv[i], "-XR") == 0) {
3770 sscanf(argv[++i], "%d", &jfriedl_XR);
3771 continue;
3772 }
3773 #endif
3774
3775
3776 /* One-char options; many that have no data may be in a single argument; we
3777 continue till we hit the last one or one that needs data. */
3778
3779 else
3780 {
3781 char *s = argv[i] + 1;
3782 longop = FALSE;
3783
3784 while (*s != 0)
3785 {
3786 for (op = optionlist; op->one_char != 0; op++)
3787 {
3788 if (*s == op->one_char) break;
3789 }
3790 if (op->one_char == 0)
3791 {
3792 fprintf(stderr, "pcre2grep: Unknown option letter '%c' in \"%s\"\n",
3793 *s, argv[i]);
3794 pcre2grep_exit(usage(2));
3795 }
3796
3797 option_data = s+1;
3798
3799 /* Break out if this is the last character in the string; it's handled
3800 below like a single multi-char option. */
3801
3802 if (*option_data == 0) break;
3803
3804 /* Check for a single-character option that has data: OP_OP_NUMBER(S)
3805 are used for ones that either have a numerical number or defaults, i.e.
3806 the data is optional. If a digit follows, there is data; if not, carry on
3807 with other single-character options in the same string. */
3808
3809 if (op->type == OP_OP_NUMBER || op->type == OP_OP_NUMBERS)
3810 {
3811 if (isdigit((unsigned char)s[1])) break;
3812 }
3813 else /* Check for an option with data */
3814 {
3815 if (op->type != OP_NODATA) break;
3816 }
3817
3818 /* Handle a single-character option with no data, then loop for the
3819 next character in the string. */
3820
3821 pcre2_options = handle_option(*s++, pcre2_options);
3822 }
3823 }
3824
3825 /* At this point we should have op pointing to a matched option. If the type
3826 is NO_DATA, it means that there is no data, and the option might set
3827 something in the PCRE options. */
3828
3829 if (op->type == OP_NODATA)
3830 {
3831 pcre2_options = handle_option(op->one_char, pcre2_options);
3832 continue;
3833 }
3834
3835 /* If the option type is OP_OP_STRING or OP_OP_NUMBER(S), it's an option that
3836 either has a value or defaults to something. It cannot have data in a
3837 separate item. At the moment, the only such options are "colo(u)r",
3838 "only-matching", and Jeffrey Friedl's special -S debugging option. */
3839
3840 if (*option_data == 0 &&
3841 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER ||
3842 op->type == OP_OP_NUMBERS))
3843 {
3844 switch (op->one_char)
3845 {
3846 case N_COLOUR:
3847 colour_option = "auto";
3848 break;
3849
3850 case 'o':
3851 only_matching_last = add_number(0, only_matching_last);
3852 if (only_matching == NULL) only_matching = only_matching_last;
3853 break;
3854
3855 #ifdef JFRIEDL_DEBUG
3856 case 'S':
3857 S_arg = 0;
3858 break;
3859 #endif
3860 }
3861 continue;
3862 }
3863
3864 /* Otherwise, find the data string for the option. */
3865
3866 if (*option_data == 0)
3867 {
3868 if (i >= argc - 1 || longopwasequals)
3869 {
3870 fprintf(stderr, "pcre2grep: Data missing after %s\n", argv[i]);
3871 pcre2grep_exit(usage(2));
3872 }
3873 option_data = argv[++i];
3874 }
3875
3876 /* If the option type is OP_OP_NUMBERS, the value is a number that is to be
3877 added to a chain of numbers. */
3878
3879 if (op->type == OP_OP_NUMBERS)
3880 {
3881 unsigned long int n = decode_number(option_data, op, longop);
3882 omdatastr *omd = (omdatastr *)op->dataptr;
3883 *(omd->lastptr) = add_number((int)n, *(omd->lastptr));
3884 if (*(omd->anchor) == NULL) *(omd->anchor) = *(omd->lastptr);
3885 }
3886
3887 /* If the option type is OP_PATLIST, it's the -e option, or one of the
3888 include/exclude options, which can be called multiple times to create lists
3889 of patterns. */
3890
3891 else if (op->type == OP_PATLIST)
3892 {
3893 patdatastr *pd = (patdatastr *)op->dataptr;
3894 *(pd->lastptr) = add_pattern(option_data, (PCRE2_SIZE)strlen(option_data),
3895 *(pd->lastptr));
3896 if (*(pd->lastptr) == NULL) goto EXIT2;
3897 if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr);
3898 }
3899
3900 /* If the option type is OP_FILELIST, it's one of the options that names a
3901 file. */
3902
3903 else if (op->type == OP_FILELIST)
3904 {
3905 fndatastr *fd = (fndatastr *)op->dataptr;
3906 fn = (fnstr *)malloc(sizeof(fnstr));
3907 if (fn == NULL)
3908 {
3909 fprintf(stderr, "pcre2grep: malloc failed\n");
3910 goto EXIT2;
3911 }
3912 fn->next = NULL;
3913 fn->name = option_data;
3914 if (*(fd->anchor) == NULL)
3915 *(fd->anchor) = fn;
3916 else
3917 (*(fd->lastptr))->next = fn;
3918 *(fd->lastptr) = fn;
3919 }
3920
3921 /* Handle OP_BINARY_FILES */
3922
3923 else if (op->type == OP_BINFILES)
3924 {
3925 if (strcmp(option_data, "binary") == 0)
3926 binary_files = BIN_BINARY;
3927 else if (strcmp(option_data, "without-match") == 0)
3928 binary_files = BIN_NOMATCH;
3929 else if (strcmp(option_data, "text") == 0)
3930 binary_files = BIN_TEXT;
3931 else
3932 {
3933 fprintf(stderr, "pcre2grep: unknown value \"%s\" for binary-files\n",
3934 option_data);
3935 pcre2grep_exit(usage(2));
3936 }
3937 }
3938
3939 /* Otherwise, deal with a single string or numeric data value. */
3940
3941 else if (op->type != OP_NUMBER && op->type != OP_U32NUMBER &&
3942 op->type != OP_OP_NUMBER && op->type != OP_SIZE)
3943 {
3944 *((char **)op->dataptr) = option_data;
3945 }
3946 else
3947 {
3948 unsigned long int n = decode_number(option_data, op, longop);
3949 if (op->type == OP_U32NUMBER) *((uint32_t *)op->dataptr) = n;
3950 else if (op->type == OP_SIZE) *((PCRE2_SIZE *)op->dataptr) = n;
3951 else *((int *)op->dataptr) = n;
3952 }
3953 }
3954
3955 /* Options have been decoded. If -C was used, its value is used as a default
3956 for -A and -B. */
3957
3958 if (both_context > 0)
3959 {
3960 if (after_context == 0) after_context = both_context;
3961 if (before_context == 0) before_context = both_context;
3962 }
3963
3964 /* Only one of --only-matching, --output, --file-offsets, or --line-offsets is
3965 permitted. They display, each in their own way, only the data that has matched.
3966 */
3967
3968 only_matching_count = (only_matching != NULL) + (output_text != NULL) +
3969 file_offsets + line_offsets;
3970
3971 if (only_matching_count > 1)
3972 {
3973 fprintf(stderr, "pcre2grep: Cannot mix --only-matching, --output, "
3974 "--file-offsets and/or --line-offsets\n");
3975 pcre2grep_exit(usage(2));
3976 }
3977
3978 /* Check the text supplied to --output for errors. */
3979
3980 if (output_text != NULL &&
3981 !syntax_check_output_text((PCRE2_SPTR)output_text, FALSE))
3982 goto EXIT2;
3983
3984 /* Put limits into the match data block. */
3985
3986 if (heap_limit != PCRE2_UNSET) pcre2_set_heap_limit(match_context, heap_limit);
3987 if (match_limit > 0) pcre2_set_match_limit(match_context, match_limit);
3988 if (depth_limit > 0) pcre2_set_depth_limit(match_context, depth_limit);
3989
3990 /* If a locale has not been provided as an option, see if the LC_CTYPE or
3991 LC_ALL environment variable is set, and if so, use it. */
3992
3993 if (locale == NULL)
3994 {
3995 locale = getenv("LC_ALL");
3996 locale_from = "LC_ALL";
3997 }
3998
3999 if (locale == NULL)
4000 {
4001 locale = getenv("LC_CTYPE");
4002 locale_from = "LC_CTYPE";
4003 }
4004
4005 /* If a locale is set, use it to generate the tables the PCRE needs. Passing
4006 NULL to pcre2_maketables() means that malloc() is used to get the memory. */
4007
4008 if (locale != NULL)
4009 {
4010 if (setlocale(LC_CTYPE, locale) == NULL)
4011 {
4012 fprintf(stderr, "pcre2grep: Failed to set locale %s (obtained from %s)\n",
4013 locale, locale_from);
4014 goto EXIT2;
4015 }
4016 character_tables = pcre2_maketables(NULL);
4017 pcre2_set_character_tables(compile_context, character_tables);
4018 }
4019
4020 /* Sort out colouring */
4021
4022 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
4023 {
4024 if (strcmp(colour_option, "always") == 0)
4025 #ifdef WIN32
4026 do_ansi = !is_stdout_tty(),
4027 #endif
4028 do_colour = TRUE;
4029 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
4030 else
4031 {
4032 fprintf(stderr, "pcre2grep: Unknown colour setting \"%s\"\n",
4033 colour_option);
4034 goto EXIT2;
4035 }
4036 if (do_colour)
4037 {
4038 char *cs = getenv("PCRE2GREP_COLOUR");
4039 if (cs == NULL) cs = getenv("PCRE2GREP_COLOR");
4040 if (cs == NULL) cs = getenv("PCREGREP_COLOUR");
4041 if (cs == NULL) cs = getenv("PCREGREP_COLOR");
4042 if (cs == NULL) cs = parse_grep_colors(getenv("GREP_COLORS"));
4043 if (cs == NULL) cs = getenv("GREP_COLOR");
4044 if (cs != NULL)
4045 {
4046 if (strspn(cs, ";0123456789") == strlen(cs)) colour_string = cs;
4047 }
4048 #ifdef WIN32
4049 init_colour_output();
4050 #endif
4051 }
4052 }
4053
4054 /* Sort out a newline setting. */
4055
4056 if (newline_arg != NULL)
4057 {
4058 for (endlinetype = 1; endlinetype < (int)(sizeof(newlines)/sizeof(char *));
4059 endlinetype++)
4060 {
4061 if (strcmpic(newline_arg, newlines[endlinetype]) == 0) break;
4062 }
4063 if (endlinetype < (int)(sizeof(newlines)/sizeof(char *)))
4064 pcre2_set_newline(compile_context, endlinetype);
4065 else
4066 {
4067 fprintf(stderr, "pcre2grep: Invalid newline specifier \"%s\"\n",
4068 newline_arg);
4069 goto EXIT2;
4070 }
4071 }
4072
4073 /* Find default newline convention */
4074
4075 else
4076 {
4077 (void)pcre2_config(PCRE2_CONFIG_NEWLINE, &endlinetype);
4078 }
4079
4080 /* Interpret the text values for -d and -D */
4081
4082 if (dee_option != NULL)
4083 {
4084 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
4085 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
4086 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
4087 else
4088 {
4089 fprintf(stderr, "pcre2grep: Invalid value \"%s\" for -d\n", dee_option);
4090 goto EXIT2;
4091 }
4092 }
4093
4094 if (DEE_option != NULL)
4095 {
4096 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
4097 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
4098 else
4099 {
4100 fprintf(stderr, "pcre2grep: Invalid value \"%s\" for -D\n", DEE_option);
4101 goto EXIT2;
4102 }
4103 }
4104
4105 /* Set the extra options */
4106
4107 (void)pcre2_set_compile_extra_options(compile_context, extra_options);
4108
4109 /* Check the values for Jeffrey Friedl's debugging options. */
4110
4111 #ifdef JFRIEDL_DEBUG
4112 if (S_arg > 9)
4113 {
4114 fprintf(stderr, "pcre2grep: bad value for -S option\n");
4115 return 2;
4116 }
4117 if (jfriedl_XT != 0 || jfriedl_XR != 0)
4118 {
4119 if (jfriedl_XT == 0) jfriedl_XT = 1;
4120 if (jfriedl_XR == 0) jfriedl_XR = 1;
4121 }
4122 #endif
4123
4124 /* If use_jit is set, check whether JIT is available. If not, do not try
4125 to use JIT. */
4126
4127 if (use_jit)
4128 {
4129 uint32_t answer;
4130 (void)pcre2_config(PCRE2_CONFIG_JIT, &answer);
4131 if (!answer) use_jit = FALSE;
4132 }
4133
4134 /* Get memory for the main buffer. */
4135
4136 if (bufthird <= 0)
4137 {
4138 fprintf(stderr, "pcre2grep: --buffer-size must be greater than zero\n");
4139 goto EXIT2;
4140 }
4141
4142 bufsize = 3*bufthird;
4143 main_buffer = (char *)malloc(bufsize);
4144
4145 if (main_buffer == NULL)
4146 {
4147 fprintf(stderr, "pcre2grep: malloc failed\n");
4148 goto EXIT2;
4149 }
4150
4151 /* If no patterns were provided by -e, and there are no files provided by -f,
4152 the first argument is the one and only pattern, and it must exist. */
4153
4154 if (patterns == NULL && pattern_files == NULL)
4155 {
4156 if (i >= argc) return usage(2);
4157 patterns = patterns_last = add_pattern(argv[i], (PCRE2_SIZE)strlen(argv[i]),
4158 NULL);
4159 i++;
4160 if (patterns == NULL) goto EXIT2;
4161 }
4162
4163 /* Compile the patterns that were provided on the command line, either by
4164 multiple uses of -e or as a single unkeyed pattern. We cannot do this until
4165 after all the command-line options are read so that we know which PCRE options
4166 to use. When -F is used, compile_pattern() may add another block into the
4167 chain, so we must not access the next pointer till after the compile. */
4168
4169 for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
4170 {
4171 if (!compile_pattern(cp, pcre2_options, FALSE, "command-line",
4172 (j == 1 && patterns->next == NULL)? 0 : j))
4173 goto EXIT2;
4174 }
4175
4176 /* Read and compile the regular expressions that are provided in files. */
4177
4178 for (fn = pattern_files; fn != NULL; fn = fn->next)
4179 {
4180 if (!read_pattern_file(fn->name, &patterns, &patterns_last)) goto EXIT2;
4181 }
4182
4183 /* Unless JIT has been explicitly disabled, arrange a stack for it to use. */
4184
4185 #ifdef SUPPORT_PCRE2GREP_JIT
4186 if (use_jit)
4187 {
4188 jit_stack = pcre2_jit_stack_create(32*1024, 1024*1024, NULL);
4189 if (jit_stack != NULL )
4190 pcre2_jit_stack_assign(match_context, NULL, jit_stack);
4191 }
4192 #endif
4193
4194 /* -F, -w, and -x do not apply to include or exclude patterns, so we must
4195 adjust the options. */
4196
4197 pcre2_options &= ~PCRE2_LITERAL;
4198 (void)pcre2_set_compile_extra_options(compile_context, 0);
4199
4200 /* If there are include or exclude patterns read from the command line, compile
4201 them. */
4202
4203 for (j = 0; j < 4; j++)
4204 {
4205 int k;
4206 for (k = 1, cp = *(incexlist[j]); cp != NULL; k++, cp = cp->next)
4207 {
4208 if (!compile_pattern(cp, pcre2_options, FALSE, incexname[j],
4209 (k == 1 && cp->next == NULL)? 0 : k))
4210 goto EXIT2;
4211 }
4212 }
4213
4214 /* Read and compile include/exclude patterns from files. */
4215
4216 for (fn = include_from; fn != NULL; fn = fn->next)
4217 {
4218 if (!read_pattern_file(fn->name, &include_patterns, &include_patterns_last))
4219 goto EXIT2;
4220 }
4221
4222 for (fn = exclude_from; fn != NULL; fn = fn->next)
4223 {
4224 if (!read_pattern_file(fn->name, &exclude_patterns, &exclude_patterns_last))
4225 goto EXIT2;
4226 }
4227
4228 /* If there are no files that contain lists of files to search, and there are
4229 no file arguments, search stdin, and then exit. */
4230
4231 if (file_lists == NULL && i >= argc)
4232 {
4233 rc = pcre2grep(stdin, FR_PLAIN, stdin_name,
4234 (filenames > FN_DEFAULT)? stdin_name : NULL);
4235 goto EXIT;
4236 }
4237
4238 /* If any files that contains a list of files to search have been specified,
4239 read them line by line and search the given files. */
4240
4241 for (fn = file_lists; fn != NULL; fn = fn->next)
4242 {
4243 char buffer[FNBUFSIZ];
4244 FILE *fl;
4245 if (strcmp(fn->name, "-") == 0) fl = stdin; else
4246 {
4247 fl = fopen(fn->name, "rb");
4248 if (fl == NULL)
4249 {
4250 fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", fn->name,
4251 strerror(errno));
4252 goto EXIT2;
4253 }
4254 }
4255 while (fgets(buffer, sizeof(buffer), fl) != NULL)
4256 {
4257 int frc;
4258 char *end = buffer + (int)strlen(buffer);
4259 while (end > buffer && isspace(end[-1])) end--;
4260 *end = 0;
4261 if (*buffer != 0)
4262 {
4263 frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);
4264 if (frc > 1) rc = frc;
4265 else if (frc == 0 && rc == 1) rc = 0;
4266 }
4267 }
4268 if (fl != stdin) fclose(fl);
4269 }
4270
4271 /* After handling file-list, work through remaining arguments. Pass in the fact
4272 that there is only one argument at top level - this suppresses the file name if
4273 the argument is not a directory and filenames are not otherwise forced. */
4274
4275 only_one_at_top = i == argc - 1 && file_lists == NULL;
4276
4277 for (; i < argc; i++)
4278 {
4279 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
4280 only_one_at_top);
4281 if (frc > 1) rc = frc;
4282 else if (frc == 0 && rc == 1) rc = 0;
4283 }
4284
4285 #ifdef SUPPORT_PCRE2GREP_CALLOUT
4286 /* If separating builtin echo callouts by implicit newline, add one more for
4287 the final item. */
4288
4289 if (om_separator != NULL && strcmp(om_separator, STDOUT_NL) == 0)
4290 fprintf(stdout, STDOUT_NL);
4291 #endif
4292
4293 /* Show the total number of matches if requested, but not if only one file's
4294 count was printed. */
4295
4296 if (show_total_count && counts_printed != 1 && filenames != FN_NOMATCH_ONLY)
4297 {
4298 if (counts_printed != 0 && filenames >= FN_DEFAULT)
4299 fprintf(stdout, "TOTAL:");
4300 fprintf(stdout, "%lu" STDOUT_NL, total_count);
4301 }
4302
4303 EXIT:
4304 #ifdef SUPPORT_PCRE2GREP_JIT
4305 if (jit_stack != NULL) pcre2_jit_stack_free(jit_stack);
4306 #endif
4307
4308 free(main_buffer);
4309 free((void *)character_tables);
4310
4311 pcre2_compile_context_free(compile_context);
4312 pcre2_match_context_free(match_context);
4313 pcre2_match_data_free(match_data);
4314
4315 free_pattern_chain(patterns);
4316 free_pattern_chain(include_patterns);
4317 free_pattern_chain(include_dir_patterns);
4318 free_pattern_chain(exclude_patterns);
4319 free_pattern_chain(exclude_dir_patterns);
4320
4321 free_file_chain(exclude_from);
4322 free_file_chain(include_from);
4323 free_file_chain(pattern_files);
4324 free_file_chain(file_lists);
4325
4326 while (only_matching != NULL)
4327 {
4328 omstr *this = only_matching;
4329 only_matching = this->next;
4330 free(this);
4331 }
4332
4333 pcre2grep_exit(rc);
4334
4335 EXIT2:
4336 rc = 2;
4337 goto EXIT;
4338 }
4339
4340 /* End of pcre2grep */
4341