1 /*************************************************
2 * pcre2grep program *
3 *************************************************/
4
5 /* This is a grep program that uses the 8-bit PCRE regular expression library
6 via the PCRE2 updated API to do its pattern matching. On Unix-like, Windows,
7 and native z/OS systems it can recurse into directories, and in z/OS it can
8 handle PDS files.
9
10 Note that for native z/OS, in addition to defining the NATIVE_ZOS macro, an
11 additional header is required. That header is not included in the main PCRE2
12 distribution because other apparatus is needed to compile pcre2grep for z/OS.
13 The header can be found in the special z/OS distribution, which is available
14 from www.zaconsultants.net or from www.cbttape.org.
15
16 Copyright (c) 1997-2018 University of Cambridge
17
18 -----------------------------------------------------------------------------
19 Redistribution and use in source and binary forms, with or without
20 modification, are permitted provided that the following conditions are met:
21
22 * Redistributions of source code must retain the above copyright notice,
23 this list of conditions and the following disclaimer.
24
25 * Redistributions in binary form must reproduce the above copyright
26 notice, this list of conditions and the following disclaimer in the
27 documentation and/or other materials provided with the distribution.
28
29 * Neither the name of the University of Cambridge nor the names of its
30 contributors may be used to endorse or promote products derived from
31 this software without specific prior written permission.
32
33 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
34 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
35 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
37 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
38 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
39 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
40 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
41 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
43 POSSIBILITY OF SUCH DAMAGE.
44 -----------------------------------------------------------------------------
45 */
46
47 #ifdef HAVE_CONFIG_H
48 #include "config.h"
49 #endif
50
51 #include <ctype.h>
52 #include <locale.h>
53 #include <stdio.h>
54 #include <string.h>
55 #include <stdlib.h>
56 #include <errno.h>
57
58 #include <sys/types.h>
59 #include <sys/stat.h>
60
61 #if (defined _WIN32 || (defined HAVE_WINDOWS_H && HAVE_WINDOWS_H)) \
62 && !defined WIN32 && !defined(__CYGWIN__)
63 #define WIN32
64 #endif
65
66 /* Some cmake's define it still */
67 #if defined(__CYGWIN__) && defined(WIN32)
68 #undef WIN32
69 #endif
70
71 #ifdef __VMS
72 #include clidef
73 #include descrip
74 #include lib$routines
75 #endif
76
77 #ifdef WIN32
78 #include <io.h> /* For _setmode() */
79 #include <fcntl.h> /* For _O_BINARY */
80 #endif
81
82 #if defined(SUPPORT_PCRE2GREP_CALLOUT) && defined(SUPPORT_PCRE2GREP_CALLOUT_FORK)
83 #ifdef WIN32
84 #include <process.h>
85 #else
86 #include <sys/wait.h>
87 #endif
88 #endif
89
90 #ifdef HAVE_UNISTD_H
91 #include <unistd.h>
92 #endif
93
94 #ifdef SUPPORT_LIBZ
95 #include <zlib.h>
96 #endif
97
98 #ifdef SUPPORT_LIBBZ2
99 #include <bzlib.h>
100 #endif
101
102 #define PCRE2_CODE_UNIT_WIDTH 8
103 #include "pcre2.h"
104
105 /* Older versions of MSVC lack snprintf(). This define allows for
106 warning/error-free compilation and testing with MSVC compilers back to at least
107 MSVC 10/2010. Except for VC6 (which is missing some fundamentals and fails). */
108
109 #if defined(_MSC_VER) && (_MSC_VER < 1900)
110 #define snprintf _snprintf
111 #endif
112
113 #define FALSE 0
114 #define TRUE 1
115
116 typedef int BOOL;
117
118 #define OFFSET_SIZE 33
119
120 #if BUFSIZ > 8192
121 #define MAXPATLEN BUFSIZ
122 #else
123 #define MAXPATLEN 8192
124 #endif
125
126 #define FNBUFSIZ 2048
127 #define ERRBUFSIZ 256
128
129 /* Values for the "filenames" variable, which specifies options for file name
130 output. The order is important; it is assumed that a file name is wanted for
131 all values greater than FN_DEFAULT. */
132
133 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
134
135 /* File reading styles */
136
137 enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
138
139 /* Actions for the -d and -D options */
140
141 enum { dee_READ, dee_SKIP, dee_RECURSE };
142 enum { DEE_READ, DEE_SKIP };
143
144 /* Actions for special processing options (flag bits) */
145
146 #define PO_WORD_MATCH 0x0001
147 #define PO_LINE_MATCH 0x0002
148 #define PO_FIXED_STRINGS 0x0004
149
150 /* Binary file options */
151
152 enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT };
153
154 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
155 environments), a warning is issued if the value of fwrite() is ignored.
156 Unfortunately, casting to (void) does not suppress the warning. To get round
157 this, we use a macro that compiles a fudge. Oddly, this does not also seem to
158 apply to fprintf(). */
159
160 #define FWRITE_IGNORE(a,b,c,d) if (fwrite(a,b,c,d)) {}
161
162 /* Under Windows, we have to set stdout to be binary, so that it does not
163 convert \r\n at the ends of output lines to \r\r\n. However, that means that
164 any messages written to stdout must have \r\n as their line terminator. This is
165 handled by using STDOUT_NL as the newline string. We also use a normal double
166 quote for the example, as single quotes aren't usually available. */
167
168 #ifdef WIN32
169 #define STDOUT_NL "\r\n"
170 #define QUOT "\""
171 #else
172 #define STDOUT_NL "\n"
173 #define QUOT "'"
174 #endif
175
176
177
178 /*************************************************
179 * Global variables *
180 *************************************************/
181
182 /* Jeffrey Friedl has some debugging requirements that are not part of the
183 regular code. */
184
185 #ifdef JFRIEDL_DEBUG
186 static int S_arg = -1;
187 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
188 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
189 static const char *jfriedl_prefix = "";
190 static const char *jfriedl_postfix = "";
191 #endif
192
193 static const char *colour_string = "1;31";
194 static const char *colour_option = NULL;
195 static const char *dee_option = NULL;
196 static const char *DEE_option = NULL;
197 static const char *locale = NULL;
198 static const char *newline_arg = NULL;
199 static const char *om_separator = NULL;
200 static const char *stdin_name = "(standard input)";
201 static const char *output_text = NULL;
202
203 static char *main_buffer = NULL;
204
205 static int after_context = 0;
206 static int before_context = 0;
207 static int binary_files = BIN_BINARY;
208 static int both_context = 0;
209 static int bufthird = PCRE2GREP_BUFSIZE;
210 static int max_bufthird = PCRE2GREP_MAX_BUFSIZE;
211 static int bufsize = 3*PCRE2GREP_BUFSIZE;
212 static int endlinetype;
213
214 static unsigned long int total_count = 0;
215 static unsigned long int counts_printed = 0;
216
217 #ifdef WIN32
218 static int dee_action = dee_SKIP;
219 #else
220 static int dee_action = dee_READ;
221 #endif
222
223 static int DEE_action = DEE_READ;
224 static int error_count = 0;
225 static int filenames = FN_DEFAULT;
226
227 #ifdef SUPPORT_PCRE2GREP_JIT
228 static BOOL use_jit = TRUE;
229 #else
230 static BOOL use_jit = FALSE;
231 #endif
232
233 static const uint8_t *character_tables = NULL;
234
235 static uint32_t pcre2_options = 0;
236 static uint32_t extra_options = 0;
237 static PCRE2_SIZE heap_limit = PCRE2_UNSET;
238 static uint32_t match_limit = 0;
239 static uint32_t depth_limit = 0;
240
241 static pcre2_compile_context *compile_context;
242 static pcre2_match_context *match_context;
243 static pcre2_match_data *match_data;
244 static PCRE2_SIZE *offsets;
245
246 static BOOL count_only = FALSE;
247 static BOOL do_colour = FALSE;
248 #ifdef WIN32
249 static BOOL do_ansi = FALSE;
250 #endif
251 static BOOL file_offsets = FALSE;
252 static BOOL hyphenpending = FALSE;
253 static BOOL invert = FALSE;
254 static BOOL line_buffered = FALSE;
255 static BOOL line_offsets = FALSE;
256 static BOOL multiline = FALSE;
257 static BOOL number = FALSE;
258 static BOOL omit_zero_count = FALSE;
259 static BOOL resource_error = FALSE;
260 static BOOL quiet = FALSE;
261 static BOOL show_total_count = FALSE;
262 static BOOL silent = FALSE;
263 static BOOL utf = FALSE;
264
265 /* Structure for list of --only-matching capturing numbers. */
266
267 typedef struct omstr {
268 struct omstr *next;
269 int groupnum;
270 } omstr;
271
272 static omstr *only_matching = NULL;
273 static omstr *only_matching_last = NULL;
274 static int only_matching_count;
275
276 /* Structure for holding the two variables that describe a number chain. */
277
278 typedef struct omdatastr {
279 omstr **anchor;
280 omstr **lastptr;
281 } omdatastr;
282
283 static omdatastr only_matching_data = { &only_matching, &only_matching_last };
284
285 /* Structure for list of file names (for -f and --{in,ex}clude-from) */
286
287 typedef struct fnstr {
288 struct fnstr *next;
289 char *name;
290 } fnstr;
291
292 static fnstr *exclude_from = NULL;
293 static fnstr *exclude_from_last = NULL;
294 static fnstr *include_from = NULL;
295 static fnstr *include_from_last = NULL;
296
297 static fnstr *file_lists = NULL;
298 static fnstr *file_lists_last = NULL;
299 static fnstr *pattern_files = NULL;
300 static fnstr *pattern_files_last = NULL;
301
302 /* Structure for holding the two variables that describe a file name chain. */
303
304 typedef struct fndatastr {
305 fnstr **anchor;
306 fnstr **lastptr;
307 } fndatastr;
308
309 static fndatastr exclude_from_data = { &exclude_from, &exclude_from_last };
310 static fndatastr include_from_data = { &include_from, &include_from_last };
311 static fndatastr file_lists_data = { &file_lists, &file_lists_last };
312 static fndatastr pattern_files_data = { &pattern_files, &pattern_files_last };
313
314 /* Structure for pattern and its compiled form; used for matching patterns and
315 also for include/exclude patterns. */
316
317 typedef struct patstr {
318 struct patstr *next;
319 char *string;
320 PCRE2_SIZE length;
321 pcre2_code *compiled;
322 } patstr;
323
324 static patstr *patterns = NULL;
325 static patstr *patterns_last = NULL;
326 static patstr *include_patterns = NULL;
327 static patstr *include_patterns_last = NULL;
328 static patstr *exclude_patterns = NULL;
329 static patstr *exclude_patterns_last = NULL;
330 static patstr *include_dir_patterns = NULL;
331 static patstr *include_dir_patterns_last = NULL;
332 static patstr *exclude_dir_patterns = NULL;
333 static patstr *exclude_dir_patterns_last = NULL;
334
335 /* Structure holding the two variables that describe a pattern chain. A pointer
336 to such structures is used for each appropriate option. */
337
338 typedef struct patdatastr {
339 patstr **anchor;
340 patstr **lastptr;
341 } patdatastr;
342
343 static patdatastr match_patdata = { &patterns, &patterns_last };
344 static patdatastr include_patdata = { &include_patterns, &include_patterns_last };
345 static patdatastr exclude_patdata = { &exclude_patterns, &exclude_patterns_last };
346 static patdatastr include_dir_patdata = { &include_dir_patterns, &include_dir_patterns_last };
347 static patdatastr exclude_dir_patdata = { &exclude_dir_patterns, &exclude_dir_patterns_last };
348
349 static patstr **incexlist[4] = { &include_patterns, &exclude_patterns,
350 &include_dir_patterns, &exclude_dir_patterns };
351
352 static const char *incexname[4] = { "--include", "--exclude",
353 "--include-dir", "--exclude-dir" };
354
355 /* Structure for options and list of them */
356
357 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_U32NUMBER, OP_SIZE,
358 OP_OP_NUMBER, OP_OP_NUMBERS, OP_PATLIST, OP_FILELIST, OP_BINFILES };
359
360 typedef struct option_item {
361 int type;
362 int one_char;
363 void *dataptr;
364 const char *long_name;
365 const char *help_text;
366 } option_item;
367
368 /* Options without a single-letter equivalent get a negative value. This can be
369 used to identify them. */
370
371 #define N_COLOUR (-1)
372 #define N_EXCLUDE (-2)
373 #define N_EXCLUDE_DIR (-3)
374 #define N_HELP (-4)
375 #define N_INCLUDE (-5)
376 #define N_INCLUDE_DIR (-6)
377 #define N_LABEL (-7)
378 #define N_LOCALE (-8)
379 #define N_NULL (-9)
380 #define N_LOFFSETS (-10)
381 #define N_FOFFSETS (-11)
382 #define N_LBUFFER (-12)
383 #define N_H_LIMIT (-13)
384 #define N_M_LIMIT (-14)
385 #define N_M_LIMIT_DEP (-15)
386 #define N_BUFSIZE (-16)
387 #define N_NOJIT (-17)
388 #define N_FILE_LIST (-18)
389 #define N_BINARY_FILES (-19)
390 #define N_EXCLUDE_FROM (-20)
391 #define N_INCLUDE_FROM (-21)
392 #define N_OM_SEPARATOR (-22)
393 #define N_MAX_BUFSIZE (-23)
394
395 static option_item optionlist[] = {
396 { OP_NODATA, N_NULL, NULL, "", "terminate options" },
397 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
398 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
399 { OP_NODATA, 'a', NULL, "text", "treat binary files as text" },
400 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
401 { OP_BINFILES, N_BINARY_FILES, NULL, "binary-files=word", "set treatment of binary files" },
402 { OP_NUMBER, N_BUFSIZE,&bufthird, "buffer-size=number", "set processing buffer starting size" },
403 { OP_NUMBER, N_MAX_BUFSIZE,&max_bufthird, "max-buffer-size=number", "set processing buffer maximum size" },
404 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
405 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
406 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
407 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
408 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
409 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
410 { OP_PATLIST, 'e', &match_patdata, "regex(p)=pattern", "specify pattern (may be used more than once)" },
411 { OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" },
412 { OP_FILELIST, 'f', &pattern_files_data, "file=path", "read patterns from file" },
413 { OP_FILELIST, N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" },
414 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
415 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
416 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
417 { OP_NODATA, 'I', NULL, "", "treat binary files as not matching (ignore)" },
418 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
419 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
420 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
421 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
422 { OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" },
423 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
424 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
425 { OP_SIZE, N_H_LIMIT, &heap_limit, "heap-limit=number", "set PCRE2 heap limit option (kibibytes)" },
426 { OP_U32NUMBER, N_M_LIMIT, &match_limit, "match-limit=number", "set PCRE2 match limit option" },
427 { OP_U32NUMBER, N_M_LIMIT_DEP, &depth_limit, "depth-limit=number", "set PCRE2 depth limit option" },
428 { OP_U32NUMBER, N_M_LIMIT_DEP, &depth_limit, "recursion-limit=number", "obsolete synonym for depth-limit" },
429 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
430 { OP_STRING, 'N', &newline_arg, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF, ANY, or NUL)" },
431 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
432 #ifdef SUPPORT_PCRE2GREP_JIT
433 { OP_NODATA, N_NOJIT, NULL, "no-jit", "do not use just-in-time compiler optimization" },
434 #else
435 { OP_NODATA, N_NOJIT, NULL, "no-jit", "ignored: this pcre2grep does not support JIT" },
436 #endif
437 { OP_STRING, 'O', &output_text, "output=text", "show only this text (possibly expanded)" },
438 { OP_OP_NUMBERS, 'o', &only_matching_data, "only-matching=n", "show only the part of the line that matched" },
439 { OP_STRING, N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" },
440 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
441 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
442 { OP_PATLIST, N_EXCLUDE,&exclude_patdata, "exclude=pattern","exclude matching files when recursing" },
443 { OP_PATLIST, N_INCLUDE,&include_patdata, "include=pattern","include matching files when recursing" },
444 { OP_PATLIST, N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude-dir=pattern","exclude matching directories when recursing" },
445 { OP_PATLIST, N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" },
446 { OP_FILELIST, N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" },
447 { OP_FILELIST, N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" },
448 #ifdef JFRIEDL_DEBUG
449 { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
450 #endif
451 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
452 { OP_NODATA, 't', NULL, "total-count", "print total count of matching lines" },
453 { OP_NODATA, 'u', NULL, "utf", "use UTF mode" },
454 { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
455 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
456 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
457 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
458 { OP_NODATA, 0, NULL, NULL, NULL }
459 };
460
461 /* Table of names for newline types. Must be kept in step with the definitions
462 of PCRE2_NEWLINE_xx in pcre2.h. */
463
464 static const char *newlines[] = {
465 "DEFAULT", "CR", "LF", "CRLF", "ANY", "ANYCRLF", "NUL" };
466
467 /* UTF-8 tables - used only when the newline setting is "any". */
468
469 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
470
471 const char utf8_table4[] = {
472 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
473 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
474 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
475 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
476
477
478 #if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE)
479 /*************************************************
480 * Emulated memmove() for systems without it *
481 *************************************************/
482
483 /* This function can make use of bcopy() if it is available. Otherwise do it by
484 steam, as there are some non-Unix environments that lack both memmove() and
485 bcopy(). */
486
487 static void *
emulated_memmove(void * d,const void * s,size_t n)488 emulated_memmove(void *d, const void *s, size_t n)
489 {
490 #ifdef HAVE_BCOPY
491 bcopy(s, d, n);
492 return d;
493 #else
494 size_t i;
495 unsigned char *dest = (unsigned char *)d;
496 const unsigned char *src = (const unsigned char *)s;
497 if (dest > src)
498 {
499 dest += n;
500 src += n;
501 for (i = 0; i < n; ++i) *(--dest) = *(--src);
502 return (void *)dest;
503 }
504 else
505 {
506 for (i = 0; i < n; ++i) *dest++ = *src++;
507 return (void *)(dest - n);
508 }
509 #endif /* not HAVE_BCOPY */
510 }
511 #undef memmove
512 #define memmove(d,s,n) emulated_memmove(d,s,n)
513 #endif /* not VPCOMPAT && not HAVE_MEMMOVE */
514
515
516 /*************************************************
517 * Case-independent string compare *
518 *************************************************/
519
520 static int
strcmpic(const char * str1,const char * str2)521 strcmpic(const char *str1, const char *str2)
522 {
523 unsigned int c1, c2;
524 while (*str1 != '\0' || *str2 != '\0')
525 {
526 c1 = tolower(*str1++);
527 c2 = tolower(*str2++);
528 if (c1 != c2) return ((c1 > c2) << 1) - 1;
529 }
530 return 0;
531 }
532
533
534 /*************************************************
535 * Parse GREP_COLORS *
536 *************************************************/
537
538 /* Extract ms or mt from GREP_COLORS.
539
540 Argument: the string, possibly NULL
541 Returns: the value of ms or mt, or NULL if neither present
542 */
543
544 static char *
parse_grep_colors(const char * gc)545 parse_grep_colors(const char *gc)
546 {
547 static char seq[16];
548 char *col;
549 uint32_t len;
550 if (gc == NULL) return NULL;
551 col = strstr(gc, "ms=");
552 if (col == NULL) col = strstr(gc, "mt=");
553 if (col == NULL) return NULL;
554 len = 0;
555 col += 3;
556 while (*col != ':' && *col != 0 && len < sizeof(seq)-1)
557 seq[len++] = *col++;
558 seq[len] = 0;
559 return seq;
560 }
561
562
563 /*************************************************
564 * Exit from the program *
565 *************************************************/
566
567 /* If there has been a resource error, give a suitable message.
568
569 Argument: the return code
570 Returns: does not return
571 */
572
573 static void
pcre2grep_exit(int rc)574 pcre2grep_exit(int rc)
575 {
576 /* VMS does exit codes differently: both exit(1) and exit(0) return with a
577 status of 1, which is not helpful. To help with this problem, define a symbol
578 (akin to an environment variable) called "PCRE2GREP_RC" and put the exit code
579 therein. */
580
581 #ifdef __VMS
582 char val_buf[4];
583 $DESCRIPTOR(sym_nam, "PCRE2GREP_RC");
584 $DESCRIPTOR(sym_val, val_buf);
585 sprintf(val_buf, "%d", rc);
586 sym_val.dsc$w_length = strlen(val_buf);
587 lib$set_symbol(&sym_nam, &sym_val);
588 #endif
589
590 if (resource_error)
591 {
592 fprintf(stderr, "pcre2grep: Error %d, %d, %d or %d means that a resource "
593 "limit was exceeded.\n", PCRE2_ERROR_JIT_STACKLIMIT, PCRE2_ERROR_MATCHLIMIT,
594 PCRE2_ERROR_DEPTHLIMIT, PCRE2_ERROR_HEAPLIMIT);
595 fprintf(stderr, "pcre2grep: Check your regex for nested unlimited loops.\n");
596 }
597 exit(rc);
598 }
599
600
601 /*************************************************
602 * Add item to chain of patterns *
603 *************************************************/
604
605 /* Used to add an item onto a chain, or just return an unconnected item if the
606 "after" argument is NULL.
607
608 Arguments:
609 s pattern string to add
610 patlen length of pattern
611 after if not NULL points to item to insert after
612
613 Returns: new pattern block or NULL on error
614 */
615
616 static patstr *
add_pattern(char * s,PCRE2_SIZE patlen,patstr * after)617 add_pattern(char *s, PCRE2_SIZE patlen, patstr *after)
618 {
619 patstr *p = (patstr *)malloc(sizeof(patstr));
620 if (p == NULL)
621 {
622 fprintf(stderr, "pcre2grep: malloc failed\n");
623 pcre2grep_exit(2);
624 }
625 if (patlen > MAXPATLEN)
626 {
627 fprintf(stderr, "pcre2grep: pattern is too long (limit is %d bytes)\n",
628 MAXPATLEN);
629 free(p);
630 return NULL;
631 }
632 p->next = NULL;
633 p->string = s;
634 p->length = patlen;
635 p->compiled = NULL;
636
637 if (after != NULL)
638 {
639 p->next = after->next;
640 after->next = p;
641 }
642 return p;
643 }
644
645
646 /*************************************************
647 * Free chain of patterns *
648 *************************************************/
649
650 /* Used for several chains of patterns.
651
652 Argument: pointer to start of chain
653 Returns: nothing
654 */
655
656 static void
free_pattern_chain(patstr * pc)657 free_pattern_chain(patstr *pc)
658 {
659 while (pc != NULL)
660 {
661 patstr *p = pc;
662 pc = p->next;
663 if (p->compiled != NULL) pcre2_code_free(p->compiled);
664 free(p);
665 }
666 }
667
668
669 /*************************************************
670 * Free chain of file names *
671 *************************************************/
672
673 /*
674 Argument: pointer to start of chain
675 Returns: nothing
676 */
677
678 static void
free_file_chain(fnstr * fn)679 free_file_chain(fnstr *fn)
680 {
681 while (fn != NULL)
682 {
683 fnstr *f = fn;
684 fn = f->next;
685 free(f);
686 }
687 }
688
689
690 /*************************************************
691 * OS-specific functions *
692 *************************************************/
693
694 /* These definitions are needed in all Windows environments, even those where
695 Unix-style directory scanning can be used (see below). */
696
697 #ifdef WIN32
698
699 #ifndef STRICT
700 # define STRICT
701 #endif
702 #ifndef WIN32_LEAN_AND_MEAN
703 # define WIN32_LEAN_AND_MEAN
704 #endif
705
706 #include <windows.h>
707
708 #define iswild(name) (strpbrk(name, "*?") != NULL)
709
710 /* Convert ANSI BGR format to RGB used by Windows */
711 #define BGR_RGB(x) ((x & 1 ? 4 : 0) | (x & 2) | (x & 4 ? 1 : 0))
712
713 static HANDLE hstdout;
714 static CONSOLE_SCREEN_BUFFER_INFO csbi;
715 static WORD match_colour;
716
717 static WORD
decode_ANSI_colour(const char * cs)718 decode_ANSI_colour(const char *cs)
719 {
720 WORD result = csbi.wAttributes;
721 while (*cs)
722 {
723 if (isdigit(*cs))
724 {
725 int code = atoi(cs);
726 if (code == 1) result |= 0x08;
727 else if (code == 4) result |= 0x8000;
728 else if (code == 5) result |= 0x80;
729 else if (code >= 30 && code <= 37) result = (result & 0xF8) | BGR_RGB(code - 30);
730 else if (code == 39) result = (result & 0xF0) | (csbi.wAttributes & 0x0F);
731 else if (code >= 40 && code <= 47) result = (result & 0x8F) | (BGR_RGB(code - 40) << 4);
732 else if (code == 49) result = (result & 0x0F) | (csbi.wAttributes & 0xF0);
733 /* aixterm high intensity colour codes */
734 else if (code >= 90 && code <= 97) result = (result & 0xF0) | BGR_RGB(code - 90) | 0x08;
735 else if (code >= 100 && code <= 107) result = (result & 0x0F) | (BGR_RGB(code - 100) << 4) | 0x80;
736
737 while (isdigit(*cs)) cs++;
738 }
739 if (*cs) cs++;
740 }
741 return result;
742 }
743
744
745 static void
init_colour_output()746 init_colour_output()
747 {
748 if (do_colour)
749 {
750 hstdout = GetStdHandle(STD_OUTPUT_HANDLE);
751 /* This fails when redirected to con; try again if so. */
752 if (!GetConsoleScreenBufferInfo(hstdout, &csbi) && !do_ansi)
753 {
754 HANDLE hcon = CreateFile("CONOUT$", GENERIC_READ | GENERIC_WRITE,
755 FILE_SHARE_WRITE, NULL, OPEN_EXISTING, 0, NULL);
756 GetConsoleScreenBufferInfo(hcon, &csbi);
757 CloseHandle(hcon);
758 }
759 match_colour = decode_ANSI_colour(colour_string);
760 /* No valid colour found - turn off colouring */
761 if (!match_colour) do_colour = FALSE;
762 }
763 }
764
765 #endif /* WIN32 */
766
767
768 /* The following sets of functions are defined so that they can be made system
769 specific. At present there are versions for Unix-style environments, Windows,
770 native z/OS, and "no support". */
771
772
773 /************* Directory scanning Unix-style and z/OS ***********/
774
775 #if (defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H) || defined NATIVE_ZOS
776 #include <sys/types.h>
777 #include <sys/stat.h>
778 #include <dirent.h>
779
780 #if defined NATIVE_ZOS
781 /************* Directory and PDS/E scanning for z/OS ***********/
782 /************* z/OS looks mostly like Unix with USS ************/
783 /* However, z/OS needs the #include statements in this header */
784 #include "pcrzosfs.h"
785 /* That header is not included in the main PCRE distribution because
786 other apparatus is needed to compile pcre2grep for z/OS. The header
787 can be found in the special z/OS distribution, which is available
788 from www.zaconsultants.net or from www.cbttape.org. */
789 #endif
790
791 typedef DIR directory_type;
792 #define FILESEP '/'
793
794 static int
isdirectory(char * filename)795 isdirectory(char *filename)
796 {
797 struct stat statbuf;
798 if (stat(filename, &statbuf) < 0)
799 return 0; /* In the expectation that opening as a file will fail */
800 return S_ISDIR(statbuf.st_mode);
801 }
802
803 static directory_type *
opendirectory(char * filename)804 opendirectory(char *filename)
805 {
806 return opendir(filename);
807 }
808
809 static char *
readdirectory(directory_type * dir)810 readdirectory(directory_type *dir)
811 {
812 for (;;)
813 {
814 struct dirent *dent = readdir(dir);
815 if (dent == NULL) return NULL;
816 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
817 return dent->d_name;
818 }
819 /* Control never reaches here */
820 }
821
822 static void
closedirectory(directory_type * dir)823 closedirectory(directory_type *dir)
824 {
825 closedir(dir);
826 }
827
828
829 /************* Test for regular file, Unix-style **********/
830
831 static int
isregfile(char * filename)832 isregfile(char *filename)
833 {
834 struct stat statbuf;
835 if (stat(filename, &statbuf) < 0)
836 return 1; /* In the expectation that opening as a file will fail */
837 return S_ISREG(statbuf.st_mode);
838 }
839
840
841 #if defined NATIVE_ZOS
842 /************* Test for a terminal in z/OS **********/
843 /* isatty() does not work in a TSO environment, so always give FALSE.*/
844
845 static BOOL
is_stdout_tty(void)846 is_stdout_tty(void)
847 {
848 return FALSE;
849 }
850
851 static BOOL
is_file_tty(FILE * f)852 is_file_tty(FILE *f)
853 {
854 return FALSE;
855 }
856
857
858 /************* Test for a terminal, Unix-style **********/
859
860 #else
861 static BOOL
is_stdout_tty(void)862 is_stdout_tty(void)
863 {
864 return isatty(fileno(stdout));
865 }
866
867 static BOOL
is_file_tty(FILE * f)868 is_file_tty(FILE *f)
869 {
870 return isatty(fileno(f));
871 }
872 #endif
873
874
875 /************* Print optionally coloured match Unix-style and z/OS **********/
876
877 static void
print_match(const void * buf,int length)878 print_match(const void *buf, int length)
879 {
880 if (length == 0) return;
881 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
882 FWRITE_IGNORE(buf, 1, length, stdout);
883 if (do_colour) fprintf(stdout, "%c[0m", 0x1b);
884 }
885
886 /* End of Unix-style or native z/OS environment functions. */
887
888
889 /************* Directory scanning in Windows ***********/
890
891 /* I (Philip Hazel) have no means of testing this code. It was contributed by
892 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
893 when it did not exist. David Byron added a patch that moved the #include of
894 <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
895 */
896
897 #elif defined WIN32
898
899 #ifndef INVALID_FILE_ATTRIBUTES
900 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
901 #endif
902
903 typedef struct directory_type
904 {
905 HANDLE handle;
906 BOOL first;
907 WIN32_FIND_DATA data;
908 } directory_type;
909
910 #define FILESEP '/'
911
912 int
isdirectory(char * filename)913 isdirectory(char *filename)
914 {
915 DWORD attr = GetFileAttributes(filename);
916 if (attr == INVALID_FILE_ATTRIBUTES)
917 return 0;
918 return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0;
919 }
920
921 directory_type *
opendirectory(char * filename)922 opendirectory(char *filename)
923 {
924 size_t len;
925 char *pattern;
926 directory_type *dir;
927 DWORD err;
928 len = strlen(filename);
929 pattern = (char *)malloc(len + 3);
930 dir = (directory_type *)malloc(sizeof(*dir));
931 if ((pattern == NULL) || (dir == NULL))
932 {
933 fprintf(stderr, "pcre2grep: malloc failed\n");
934 pcre2grep_exit(2);
935 }
936 memcpy(pattern, filename, len);
937 if (iswild(filename))
938 pattern[len] = 0;
939 else
940 memcpy(&(pattern[len]), "\\*", 3);
941 dir->handle = FindFirstFile(pattern, &(dir->data));
942 if (dir->handle != INVALID_HANDLE_VALUE)
943 {
944 free(pattern);
945 dir->first = TRUE;
946 return dir;
947 }
948 err = GetLastError();
949 free(pattern);
950 free(dir);
951 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
952 return NULL;
953 }
954
955 char *
readdirectory(directory_type * dir)956 readdirectory(directory_type *dir)
957 {
958 for (;;)
959 {
960 if (!dir->first)
961 {
962 if (!FindNextFile(dir->handle, &(dir->data)))
963 return NULL;
964 }
965 else
966 {
967 dir->first = FALSE;
968 }
969 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
970 return dir->data.cFileName;
971 }
972 #ifndef _MSC_VER
973 return NULL; /* Keep compiler happy; never executed */
974 #endif
975 }
976
977 void
closedirectory(directory_type * dir)978 closedirectory(directory_type *dir)
979 {
980 FindClose(dir->handle);
981 free(dir);
982 }
983
984
985 /************* Test for regular file in Windows **********/
986
987 /* I don't know how to do this, or if it can be done; assume all paths are
988 regular if they are not directories. */
989
isregfile(char * filename)990 int isregfile(char *filename)
991 {
992 return !isdirectory(filename);
993 }
994
995
996 /************* Test for a terminal in Windows **********/
997
998 static BOOL
is_stdout_tty(void)999 is_stdout_tty(void)
1000 {
1001 return _isatty(_fileno(stdout));
1002 }
1003
1004 static BOOL
is_file_tty(FILE * f)1005 is_file_tty(FILE *f)
1006 {
1007 return _isatty(_fileno(f));
1008 }
1009
1010
1011 /************* Print optionally coloured match in Windows **********/
1012
1013 static void
print_match(const void * buf,int length)1014 print_match(const void *buf, int length)
1015 {
1016 if (length == 0) return;
1017 if (do_colour)
1018 {
1019 if (do_ansi) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1020 else SetConsoleTextAttribute(hstdout, match_colour);
1021 }
1022 FWRITE_IGNORE(buf, 1, length, stdout);
1023 if (do_colour)
1024 {
1025 if (do_ansi) fprintf(stdout, "%c[0m", 0x1b);
1026 else SetConsoleTextAttribute(hstdout, csbi.wAttributes);
1027 }
1028 }
1029
1030 /* End of Windows functions */
1031
1032
1033 /************* Directory scanning when we can't do it ***********/
1034
1035 /* The type is void, and apart from isdirectory(), the functions do nothing. */
1036
1037 #else
1038
1039 #define FILESEP 0
1040 typedef void directory_type;
1041
isdirectory(char * filename)1042 int isdirectory(char *filename) { return 0; }
opendirectory(char * filename)1043 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
readdirectory(directory_type * dir)1044 char *readdirectory(directory_type *dir) { return (char*)0;}
closedirectory(directory_type * dir)1045 void closedirectory(directory_type *dir) {}
1046
1047
1048 /************* Test for regular file when we can't do it **********/
1049
1050 /* Assume all files are regular. */
1051
isregfile(char * filename)1052 int isregfile(char *filename) { return 1; }
1053
1054
1055 /************* Test for a terminal when we can't do it **********/
1056
1057 static BOOL
is_stdout_tty(void)1058 is_stdout_tty(void)
1059 {
1060 return FALSE;
1061 }
1062
1063 static BOOL
is_file_tty(FILE * f)1064 is_file_tty(FILE *f)
1065 {
1066 return FALSE;
1067 }
1068
1069
1070 /************* Print optionally coloured match when we can't do it **********/
1071
1072 static void
print_match(const void * buf,int length)1073 print_match(const void *buf, int length)
1074 {
1075 if (length == 0) return;
1076 FWRITE_IGNORE(buf, 1, length, stdout);
1077 }
1078
1079 #endif /* End of system-specific functions */
1080
1081
1082
1083 #ifndef HAVE_STRERROR
1084 /*************************************************
1085 * Provide strerror() for non-ANSI libraries *
1086 *************************************************/
1087
1088 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1089 in their libraries, but can provide the same facility by this simple
1090 alternative function. */
1091
1092 extern int sys_nerr;
1093 extern char *sys_errlist[];
1094
1095 char *
strerror(int n)1096 strerror(int n)
1097 {
1098 if (n < 0 || n >= sys_nerr) return "unknown error number";
1099 return sys_errlist[n];
1100 }
1101 #endif /* HAVE_STRERROR */
1102
1103
1104
1105 /*************************************************
1106 * Usage function *
1107 *************************************************/
1108
1109 static int
usage(int rc)1110 usage(int rc)
1111 {
1112 option_item *op;
1113 fprintf(stderr, "Usage: pcre2grep [-");
1114 for (op = optionlist; op->one_char != 0; op++)
1115 {
1116 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1117 }
1118 fprintf(stderr, "] [long options] [pattern] [files]\n");
1119 fprintf(stderr, "Type \"pcre2grep --help\" for more information and the long "
1120 "options.\n");
1121 return rc;
1122 }
1123
1124
1125
1126 /*************************************************
1127 * Help function *
1128 *************************************************/
1129
1130 static void
help(void)1131 help(void)
1132 {
1133 option_item *op;
1134
1135 printf("Usage: pcre2grep [OPTION]... [PATTERN] [FILE1 FILE2 ...]" STDOUT_NL);
1136 printf("Search for PATTERN in each FILE or standard input." STDOUT_NL);
1137 printf("PATTERN must be present if neither -e nor -f is used." STDOUT_NL);
1138
1139 #ifdef SUPPORT_PCRE2GREP_CALLOUT
1140 #ifdef SUPPORT_PCRE2GREP_CALLOUT_FORK
1141 printf("All callout scripts in patterns are supported." STDOUT_NL);
1142 #else
1143 printf("Non-fork callout scripts in patterns are supported." STDOUT_NL);
1144 #endif
1145 #else
1146 printf("Callout scripts are not supported in this pcre2grep." STDOUT_NL);
1147 #endif
1148
1149 printf("\"-\" can be used as a file name to mean STDIN." STDOUT_NL);
1150
1151 #ifdef SUPPORT_LIBZ
1152 printf("Files whose names end in .gz are read using zlib." STDOUT_NL);
1153 #endif
1154
1155 #ifdef SUPPORT_LIBBZ2
1156 printf("Files whose names end in .bz2 are read using bzlib2." STDOUT_NL);
1157 #endif
1158
1159 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1160 printf("Other files and the standard input are read as plain files." STDOUT_NL STDOUT_NL);
1161 #else
1162 printf("All files are read as plain files, without any interpretation." STDOUT_NL STDOUT_NL);
1163 #endif
1164
1165 printf("Example: pcre2grep -i " QUOT "hello.*world" QUOT " menu.h main.c" STDOUT_NL STDOUT_NL);
1166 printf("Options:" STDOUT_NL);
1167
1168 for (op = optionlist; op->one_char != 0; op++)
1169 {
1170 int n;
1171 char s[4];
1172
1173 if (op->one_char > 0 && (op->long_name)[0] == 0)
1174 n = 31 - printf(" -%c", op->one_char);
1175 else
1176 {
1177 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char);
1178 else strcpy(s, " ");
1179 n = 31 - printf(" %s --%s", s, op->long_name);
1180 }
1181
1182 if (n < 1) n = 1;
1183 printf("%.*s%s" STDOUT_NL, n, " ", op->help_text);
1184 }
1185
1186 printf(STDOUT_NL "Numbers may be followed by K or M, e.g. --max-buffer-size=100K." STDOUT_NL);
1187 printf("The default value for --buffer-size is %d." STDOUT_NL, PCRE2GREP_BUFSIZE);
1188 printf("The default value for --max-buffer-size is %d." STDOUT_NL, PCRE2GREP_MAX_BUFSIZE);
1189 printf("When reading patterns or file names from a file, trailing white" STDOUT_NL);
1190 printf("space is removed and blank lines are ignored." STDOUT_NL);
1191 printf("The maximum size of any pattern is %d bytes." STDOUT_NL, MAXPATLEN);
1192
1193 printf(STDOUT_NL "With no FILEs, read standard input. If fewer than two FILEs given, assume -h." STDOUT_NL);
1194 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble." STDOUT_NL);
1195 }
1196
1197
1198
1199 /*************************************************
1200 * Test exclude/includes *
1201 *************************************************/
1202
1203 /* If any exclude pattern matches, the path is excluded. Otherwise, unless
1204 there are no includes, the path must match an include pattern.
1205
1206 Arguments:
1207 path the path to be matched
1208 ip the chain of include patterns
1209 ep the chain of exclude patterns
1210
1211 Returns: TRUE if the path is not excluded
1212 */
1213
1214 static BOOL
test_incexc(char * path,patstr * ip,patstr * ep)1215 test_incexc(char *path, patstr *ip, patstr *ep)
1216 {
1217 int plen = strlen((const char *)path);
1218
1219 for (; ep != NULL; ep = ep->next)
1220 {
1221 if (pcre2_match(ep->compiled, (PCRE2_SPTR)path, plen, 0, 0, match_data, NULL) >= 0)
1222 return FALSE;
1223 }
1224
1225 if (ip == NULL) return TRUE;
1226
1227 for (; ip != NULL; ip = ip->next)
1228 {
1229 if (pcre2_match(ip->compiled, (PCRE2_SPTR)path, plen, 0, 0, match_data, NULL) >= 0)
1230 return TRUE;
1231 }
1232
1233 return FALSE;
1234 }
1235
1236
1237
1238 /*************************************************
1239 * Decode integer argument value *
1240 *************************************************/
1241
1242 /* Integer arguments can be followed by K or M. Avoid the use of strtoul()
1243 because SunOS4 doesn't have it. This is used only for unpicking arguments, so
1244 just keep it simple.
1245
1246 Arguments:
1247 option_data the option data string
1248 op the option item (for error messages)
1249 longop TRUE if option given in long form
1250
1251 Returns: a long integer
1252 */
1253
1254 static long int
decode_number(char * option_data,option_item * op,BOOL longop)1255 decode_number(char *option_data, option_item *op, BOOL longop)
1256 {
1257 unsigned long int n = 0;
1258 char *endptr = option_data;
1259 while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
1260 while (isdigit((unsigned char)(*endptr)))
1261 n = n * 10 + (int)(*endptr++ - '0');
1262 if (toupper(*endptr) == 'K')
1263 {
1264 n *= 1024;
1265 endptr++;
1266 }
1267 else if (toupper(*endptr) == 'M')
1268 {
1269 n *= 1024*1024;
1270 endptr++;
1271 }
1272
1273 if (*endptr != 0) /* Error */
1274 {
1275 if (longop)
1276 {
1277 char *equals = strchr(op->long_name, '=');
1278 int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1279 (int)(equals - op->long_name);
1280 fprintf(stderr, "pcre2grep: Malformed number \"%s\" after --%.*s\n",
1281 option_data, nlen, op->long_name);
1282 }
1283 else
1284 fprintf(stderr, "pcre2grep: Malformed number \"%s\" after -%c\n",
1285 option_data, op->one_char);
1286 pcre2grep_exit(usage(2));
1287 }
1288
1289 return n;
1290 }
1291
1292
1293
1294 /*************************************************
1295 * Add item to a chain of numbers *
1296 *************************************************/
1297
1298 /* Used to add an item onto a chain, or just return an unconnected item if the
1299 "after" argument is NULL.
1300
1301 Arguments:
1302 n the number to add
1303 after if not NULL points to item to insert after
1304
1305 Returns: new number block
1306 */
1307
1308 static omstr *
add_number(int n,omstr * after)1309 add_number(int n, omstr *after)
1310 {
1311 omstr *om = (omstr *)malloc(sizeof(omstr));
1312
1313 if (om == NULL)
1314 {
1315 fprintf(stderr, "pcre2grep: malloc failed\n");
1316 pcre2grep_exit(2);
1317 }
1318 om->next = NULL;
1319 om->groupnum = n;
1320
1321 if (after != NULL)
1322 {
1323 om->next = after->next;
1324 after->next = om;
1325 }
1326 return om;
1327 }
1328
1329
1330
1331 /*************************************************
1332 * Read one line of input *
1333 *************************************************/
1334
1335 /* Normally, input that is to be scanned is read using fread() (or gzread, or
1336 BZ2_read) into a large buffer, so many lines may be read at once. However,
1337 doing this for tty input means that no output appears until a lot of input has
1338 been typed. Instead, tty input is handled line by line. We cannot use fgets()
1339 for this, because it does not stop at a binary zero, and therefore there is no
1340 way of telling how many characters it has read, because there may be binary
1341 zeros embedded in the data. This function is also used for reading patterns
1342 from files (the -f option).
1343
1344 Arguments:
1345 buffer the buffer to read into
1346 length the maximum number of characters to read
1347 f the file
1348
1349 Returns: the number of characters read, zero at end of file
1350 */
1351
1352 static PCRE2_SIZE
read_one_line(char * buffer,int length,FILE * f)1353 read_one_line(char *buffer, int length, FILE *f)
1354 {
1355 int c;
1356 int yield = 0;
1357 while ((c = fgetc(f)) != EOF)
1358 {
1359 buffer[yield++] = c;
1360 if (c == '\n' || yield >= length) break;
1361 }
1362 return yield;
1363 }
1364
1365
1366
1367 /*************************************************
1368 * Find end of line *
1369 *************************************************/
1370
1371 /* The length of the endline sequence that is found is set via lenptr. This may
1372 be zero at the very end of the file if there is no line-ending sequence there.
1373
1374 Arguments:
1375 p current position in line
1376 endptr end of available data
1377 lenptr where to put the length of the eol sequence
1378
1379 Returns: pointer after the last byte of the line,
1380 including the newline byte(s)
1381 */
1382
1383 static char *
end_of_line(char * p,char * endptr,int * lenptr)1384 end_of_line(char *p, char *endptr, int *lenptr)
1385 {
1386 switch(endlinetype)
1387 {
1388 default: /* Just in case */
1389 case PCRE2_NEWLINE_LF:
1390 while (p < endptr && *p != '\n') p++;
1391 if (p < endptr)
1392 {
1393 *lenptr = 1;
1394 return p + 1;
1395 }
1396 *lenptr = 0;
1397 return endptr;
1398
1399 case PCRE2_NEWLINE_CR:
1400 while (p < endptr && *p != '\r') p++;
1401 if (p < endptr)
1402 {
1403 *lenptr = 1;
1404 return p + 1;
1405 }
1406 *lenptr = 0;
1407 return endptr;
1408
1409 case PCRE2_NEWLINE_NUL:
1410 while (p < endptr && *p != '\0') p++;
1411 if (p < endptr)
1412 {
1413 *lenptr = 1;
1414 return p + 1;
1415 }
1416 *lenptr = 0;
1417 return endptr;
1418
1419 case PCRE2_NEWLINE_CRLF:
1420 for (;;)
1421 {
1422 while (p < endptr && *p != '\r') p++;
1423 if (++p >= endptr)
1424 {
1425 *lenptr = 0;
1426 return endptr;
1427 }
1428 if (*p == '\n')
1429 {
1430 *lenptr = 2;
1431 return p + 1;
1432 }
1433 }
1434 break;
1435
1436 case PCRE2_NEWLINE_ANYCRLF:
1437 while (p < endptr)
1438 {
1439 int extra = 0;
1440 int c = *((unsigned char *)p);
1441
1442 if (utf && c >= 0xc0)
1443 {
1444 int gcii, gcss;
1445 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1446 gcss = 6*extra;
1447 c = (c & utf8_table3[extra]) << gcss;
1448 for (gcii = 1; gcii <= extra; gcii++)
1449 {
1450 gcss -= 6;
1451 c |= (p[gcii] & 0x3f) << gcss;
1452 }
1453 }
1454
1455 p += 1 + extra;
1456
1457 switch (c)
1458 {
1459 case '\n':
1460 *lenptr = 1;
1461 return p;
1462
1463 case '\r':
1464 if (p < endptr && *p == '\n')
1465 {
1466 *lenptr = 2;
1467 p++;
1468 }
1469 else *lenptr = 1;
1470 return p;
1471
1472 default:
1473 break;
1474 }
1475 } /* End of loop for ANYCRLF case */
1476
1477 *lenptr = 0; /* Must have hit the end */
1478 return endptr;
1479
1480 case PCRE2_NEWLINE_ANY:
1481 while (p < endptr)
1482 {
1483 int extra = 0;
1484 int c = *((unsigned char *)p);
1485
1486 if (utf && c >= 0xc0)
1487 {
1488 int gcii, gcss;
1489 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1490 gcss = 6*extra;
1491 c = (c & utf8_table3[extra]) << gcss;
1492 for (gcii = 1; gcii <= extra; gcii++)
1493 {
1494 gcss -= 6;
1495 c |= (p[gcii] & 0x3f) << gcss;
1496 }
1497 }
1498
1499 p += 1 + extra;
1500
1501 switch (c)
1502 {
1503 case '\n': /* LF */
1504 case '\v': /* VT */
1505 case '\f': /* FF */
1506 *lenptr = 1;
1507 return p;
1508
1509 case '\r': /* CR */
1510 if (p < endptr && *p == '\n')
1511 {
1512 *lenptr = 2;
1513 p++;
1514 }
1515 else *lenptr = 1;
1516 return p;
1517
1518 #ifndef EBCDIC
1519 case 0x85: /* Unicode NEL */
1520 *lenptr = utf? 2 : 1;
1521 return p;
1522
1523 case 0x2028: /* Unicode LS */
1524 case 0x2029: /* Unicode PS */
1525 *lenptr = 3;
1526 return p;
1527 #endif /* Not EBCDIC */
1528
1529 default:
1530 break;
1531 }
1532 } /* End of loop for ANY case */
1533
1534 *lenptr = 0; /* Must have hit the end */
1535 return endptr;
1536 } /* End of overall switch */
1537 }
1538
1539
1540
1541 /*************************************************
1542 * Find start of previous line *
1543 *************************************************/
1544
1545 /* This is called when looking back for before lines to print.
1546
1547 Arguments:
1548 p start of the subsequent line
1549 startptr start of available data
1550
1551 Returns: pointer to the start of the previous line
1552 */
1553
1554 static char *
previous_line(char * p,char * startptr)1555 previous_line(char *p, char *startptr)
1556 {
1557 switch(endlinetype)
1558 {
1559 default: /* Just in case */
1560 case PCRE2_NEWLINE_LF:
1561 p--;
1562 while (p > startptr && p[-1] != '\n') p--;
1563 return p;
1564
1565 case PCRE2_NEWLINE_CR:
1566 p--;
1567 while (p > startptr && p[-1] != '\n') p--;
1568 return p;
1569
1570 case PCRE2_NEWLINE_NUL:
1571 p--;
1572 while (p > startptr && p[-1] != '\0') p--;
1573 return p;
1574
1575 case PCRE2_NEWLINE_CRLF:
1576 for (;;)
1577 {
1578 p -= 2;
1579 while (p > startptr && p[-1] != '\n') p--;
1580 if (p <= startptr + 1 || p[-2] == '\r') return p;
1581 }
1582 /* Control can never get here */
1583
1584 case PCRE2_NEWLINE_ANY:
1585 case PCRE2_NEWLINE_ANYCRLF:
1586 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
1587 if (utf) while ((*p & 0xc0) == 0x80) p--;
1588
1589 while (p > startptr)
1590 {
1591 unsigned int c;
1592 char *pp = p - 1;
1593
1594 if (utf)
1595 {
1596 int extra = 0;
1597 while ((*pp & 0xc0) == 0x80) pp--;
1598 c = *((unsigned char *)pp);
1599 if (c >= 0xc0)
1600 {
1601 int gcii, gcss;
1602 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1603 gcss = 6*extra;
1604 c = (c & utf8_table3[extra]) << gcss;
1605 for (gcii = 1; gcii <= extra; gcii++)
1606 {
1607 gcss -= 6;
1608 c |= (pp[gcii] & 0x3f) << gcss;
1609 }
1610 }
1611 }
1612 else c = *((unsigned char *)pp);
1613
1614 if (endlinetype == PCRE2_NEWLINE_ANYCRLF) switch (c)
1615 {
1616 case '\n': /* LF */
1617 case '\r': /* CR */
1618 return p;
1619
1620 default:
1621 break;
1622 }
1623
1624 else switch (c)
1625 {
1626 case '\n': /* LF */
1627 case '\v': /* VT */
1628 case '\f': /* FF */
1629 case '\r': /* CR */
1630 #ifndef EBCDIC
1631 case 0x85: /* Unicode NEL */
1632 case 0x2028: /* Unicode LS */
1633 case 0x2029: /* Unicode PS */
1634 #endif /* Not EBCDIC */
1635 return p;
1636
1637 default:
1638 break;
1639 }
1640
1641 p = pp; /* Back one character */
1642 } /* End of loop for ANY case */
1643
1644 return startptr; /* Hit start of data */
1645 } /* End of overall switch */
1646 }
1647
1648
1649
1650 /*************************************************
1651 * Print the previous "after" lines *
1652 *************************************************/
1653
1654 /* This is called if we are about to lose said lines because of buffer filling,
1655 and at the end of the file. The data in the line is written using fwrite() so
1656 that a binary zero does not terminate it.
1657
1658 Arguments:
1659 lastmatchnumber the number of the last matching line, plus one
1660 lastmatchrestart where we restarted after the last match
1661 endptr end of available data
1662 printname filename for printing
1663
1664 Returns: nothing
1665 */
1666
1667 static void
do_after_lines(unsigned long int lastmatchnumber,char * lastmatchrestart,char * endptr,const char * printname)1668 do_after_lines(unsigned long int lastmatchnumber, char *lastmatchrestart,
1669 char *endptr, const char *printname)
1670 {
1671 if (after_context > 0 && lastmatchnumber > 0)
1672 {
1673 int count = 0;
1674 while (lastmatchrestart < endptr && count < after_context)
1675 {
1676 int ellength;
1677 char *pp = end_of_line(lastmatchrestart, endptr, &ellength);
1678 if (ellength == 0 && pp == main_buffer + bufsize) break;
1679 if (printname != NULL) fprintf(stdout, "%s-", printname);
1680 if (number) fprintf(stdout, "%lu-", lastmatchnumber++);
1681 FWRITE_IGNORE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1682 lastmatchrestart = pp;
1683 count++;
1684 }
1685 if (count > 0) hyphenpending = TRUE;
1686 }
1687 }
1688
1689
1690
1691 /*************************************************
1692 * Apply patterns to subject till one matches *
1693 *************************************************/
1694
1695 /* This function is called to run through all patterns, looking for a match. It
1696 is used multiple times for the same subject when colouring is enabled, in order
1697 to find all possible matches.
1698
1699 Arguments:
1700 matchptr the start of the subject
1701 length the length of the subject to match
1702 options options for pcre_exec
1703 startoffset where to start matching
1704 mrc address of where to put the result of pcre2_match()
1705
1706 Returns: TRUE if there was a match
1707 FALSE if there was no match
1708 invert if there was a non-fatal error
1709 */
1710
1711 static BOOL
match_patterns(char * matchptr,PCRE2_SIZE length,unsigned int options,PCRE2_SIZE startoffset,int * mrc)1712 match_patterns(char *matchptr, PCRE2_SIZE length, unsigned int options,
1713 PCRE2_SIZE startoffset, int *mrc)
1714 {
1715 int i;
1716 PCRE2_SIZE slen = length;
1717 patstr *p = patterns;
1718 const char *msg = "this text:\n\n";
1719
1720 if (slen > 200)
1721 {
1722 slen = 200;
1723 msg = "text that starts:\n\n";
1724 }
1725 for (i = 1; p != NULL; p = p->next, i++)
1726 {
1727 *mrc = pcre2_match(p->compiled, (PCRE2_SPTR)matchptr, (int)length,
1728 startoffset, options, match_data, match_context);
1729 if (*mrc >= 0) return TRUE;
1730 if (*mrc == PCRE2_ERROR_NOMATCH) continue;
1731 fprintf(stderr, "pcre2grep: pcre2_match() gave error %d while matching ", *mrc);
1732 if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i);
1733 fprintf(stderr, "%s", msg);
1734 FWRITE_IGNORE(matchptr, 1, slen, stderr); /* In case binary zero included */
1735 fprintf(stderr, "\n\n");
1736 if (*mrc == PCRE2_ERROR_MATCHLIMIT || *mrc == PCRE2_ERROR_DEPTHLIMIT ||
1737 *mrc == PCRE2_ERROR_HEAPLIMIT || *mrc == PCRE2_ERROR_JIT_STACKLIMIT)
1738 resource_error = TRUE;
1739 if (error_count++ > 20)
1740 {
1741 fprintf(stderr, "pcre2grep: Too many errors - abandoned.\n");
1742 pcre2grep_exit(2);
1743 }
1744 return invert; /* No more matching; don't show the line again */
1745 }
1746
1747 return FALSE; /* No match, no errors */
1748 }
1749
1750
1751 /*************************************************
1752 * Check output text for errors *
1753 *************************************************/
1754
1755 static BOOL
syntax_check_output_text(PCRE2_SPTR string,BOOL callout)1756 syntax_check_output_text(PCRE2_SPTR string, BOOL callout)
1757 {
1758 PCRE2_SPTR begin = string;
1759 for (; *string != 0; string++)
1760 {
1761 if (*string == '$')
1762 {
1763 PCRE2_SIZE capture_id = 0;
1764 BOOL brace = FALSE;
1765
1766 string++;
1767
1768 /* Syntax error: a character must be present after $. */
1769 if (*string == 0)
1770 {
1771 if (!callout)
1772 fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
1773 (int)(string - begin), "no character after $");
1774 return FALSE;
1775 }
1776
1777 if (*string == '{')
1778 {
1779 /* Must be a decimal number in braces, e.g: {5} or {38} */
1780 string++;
1781
1782 brace = TRUE;
1783 }
1784
1785 if ((*string >= '1' && *string <= '9') || (!callout && *string == '0'))
1786 {
1787 do
1788 {
1789 /* Maximum capture id is 65535. */
1790 if (capture_id <= 65535)
1791 capture_id = capture_id * 10 + (*string - '0');
1792
1793 string++;
1794 }
1795 while (*string >= '0' && *string <= '9');
1796
1797 if (brace)
1798 {
1799 /* Syntax error: closing brace is missing. */
1800 if (*string != '}')
1801 {
1802 if (!callout)
1803 fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
1804 (int)(string - begin), "missing closing brace");
1805 return FALSE;
1806 }
1807 }
1808 else
1809 {
1810 /* To negate the effect of the for. */
1811 string--;
1812 }
1813 }
1814 else if (brace)
1815 {
1816 /* Syntax error: a decimal number required. */
1817 if (!callout)
1818 fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
1819 (int)(string - begin), "decimal number expected");
1820 return FALSE;
1821 }
1822 else if (*string == 'o')
1823 {
1824 string++;
1825
1826 if (*string < '0' || *string > '7')
1827 {
1828 /* Syntax error: an octal number required. */
1829 if (!callout)
1830 fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
1831 (int)(string - begin), "octal number expected");
1832 return FALSE;
1833 }
1834 }
1835 else if (*string == 'x')
1836 {
1837 string++;
1838
1839 if (!isxdigit((unsigned char)*string))
1840 {
1841 /* Syntax error: a hexdecimal number required. */
1842 if (!callout)
1843 fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
1844 (int)(string - begin), "hexadecimal number expected");
1845 return FALSE;
1846 }
1847 }
1848 }
1849 }
1850
1851 return TRUE;
1852 }
1853
1854
1855 /*************************************************
1856 * Display output text *
1857 *************************************************/
1858
1859 /* Display the output text, which is assumed to have already been syntax
1860 checked. Output may contain escape sequences started by the dollar sign. The
1861 escape sequences are substituted as follows:
1862
1863 $<digits> or ${<digits>} is replaced by the captured substring of the given
1864 decimal number; zero will substitute the whole match. If the number is
1865 greater than the number of capturing substrings, or if the capture is unset,
1866 the replacement is empty.
1867
1868 $a is replaced by bell.
1869 $b is replaced by backspace.
1870 $e is replaced by escape.
1871 $f is replaced by form feed.
1872 $n is replaced by newline.
1873 $r is replaced by carriage return.
1874 $t is replaced by tab.
1875 $v is replaced by vertical tab.
1876
1877 $o<digits> is replaced by the character represented by the given octal
1878 number; up to three digits are processed.
1879
1880 $x<digits> is replaced by the character represented by the given hexadecimal
1881 number; up to two digits are processed.
1882
1883 Any other character is substituted by itself. E.g: $$ is replaced by a single
1884 dollar.
1885
1886 Arguments:
1887 string: the output text
1888 callout: TRUE for the builtin callout, FALSE for --output
1889 subject the start of the subject
1890 ovector: capture offsets
1891 capture_top: number of captures
1892
1893 Returns: TRUE if something was output, other than newline
1894 FALSE if nothing was output, or newline was last output
1895 */
1896
1897 static BOOL
display_output_text(PCRE2_SPTR string,BOOL callout,PCRE2_SPTR subject,PCRE2_SIZE * ovector,PCRE2_SIZE capture_top)1898 display_output_text(PCRE2_SPTR string, BOOL callout, PCRE2_SPTR subject,
1899 PCRE2_SIZE *ovector, PCRE2_SIZE capture_top)
1900 {
1901 BOOL printed = FALSE;
1902
1903 for (; *string != 0; string++)
1904 {
1905 int ch = EOF;
1906 if (*string == '$')
1907 {
1908 PCRE2_SIZE capture_id = 0;
1909 BOOL brace = FALSE;
1910
1911 string++;
1912
1913 if (*string == '{')
1914 {
1915 /* Must be a decimal number in braces, e.g: {5} or {38} */
1916 string++;
1917
1918 brace = TRUE;
1919 }
1920
1921 if ((*string >= '1' && *string <= '9') || (!callout && *string == '0'))
1922 {
1923 do
1924 {
1925 /* Maximum capture id is 65535. */
1926 if (capture_id <= 65535)
1927 capture_id = capture_id * 10 + (*string - '0');
1928
1929 string++;
1930 }
1931 while (*string >= '0' && *string <= '9');
1932
1933 if (!brace)
1934 {
1935 /* To negate the effect of the for. */
1936 string--;
1937 }
1938
1939 if (capture_id < capture_top)
1940 {
1941 PCRE2_SIZE capturesize;
1942 capture_id *= 2;
1943
1944 capturesize = ovector[capture_id + 1] - ovector[capture_id];
1945 if (capturesize > 0)
1946 {
1947 print_match(subject + ovector[capture_id], capturesize);
1948 printed = TRUE;
1949 }
1950 }
1951 }
1952 else if (*string == 'a') ch = '\a';
1953 else if (*string == 'b') ch = '\b';
1954 #ifndef EBCDIC
1955 else if (*string == 'e') ch = '\033';
1956 #else
1957 else if (*string == 'e') ch = '\047';
1958 #endif
1959 else if (*string == 'f') ch = '\f';
1960 else if (*string == 'r') ch = '\r';
1961 else if (*string == 't') ch = '\t';
1962 else if (*string == 'v') ch = '\v';
1963 else if (*string == 'n')
1964 {
1965 fprintf(stdout, STDOUT_NL);
1966 printed = FALSE;
1967 }
1968 else if (*string == 'o')
1969 {
1970 string++;
1971
1972 ch = *string - '0';
1973 if (string[1] >= '0' && string[1] <= '7')
1974 {
1975 string++;
1976 ch = ch * 8 + (*string - '0');
1977 }
1978 if (string[1] >= '0' && string[1] <= '7')
1979 {
1980 string++;
1981 ch = ch * 8 + (*string - '0');
1982 }
1983 }
1984 else if (*string == 'x')
1985 {
1986 string++;
1987
1988 if (*string >= '0' && *string <= '9')
1989 ch = *string - '0';
1990 else
1991 ch = (*string | 0x20) - 'a' + 10;
1992 if (isxdigit((unsigned char)string[1]))
1993 {
1994 string++;
1995 ch *= 16;
1996 if (*string >= '0' && *string <= '9')
1997 ch += *string - '0';
1998 else
1999 ch += (*string | 0x20) - 'a' + 10;
2000 }
2001 }
2002 else
2003 {
2004 ch = *string;
2005 }
2006 }
2007 else
2008 {
2009 ch = *string;
2010 }
2011 if (ch != EOF)
2012 {
2013 fprintf(stdout, "%c", ch);
2014 printed = TRUE;
2015 }
2016 }
2017
2018 return printed;
2019 }
2020
2021
2022 #ifdef SUPPORT_PCRE2GREP_CALLOUT
2023
2024 /*************************************************
2025 * Parse and execute callout scripts *
2026 *************************************************/
2027
2028 /* If SUPPORT_PCRE2GREP_CALLOUT_FORK is defined, this function parses a callout
2029 string block and executes the program specified by the string. The string is a
2030 list of substrings separated by pipe characters. The first substring represents
2031 the executable name, and the following substrings specify the arguments:
2032
2033 program_name|param1|param2|...
2034
2035 Any substring (including the program name) can contain escape sequences
2036 started by the dollar character. The escape sequences are substituted as
2037 follows:
2038
2039 $<digits> or ${<digits>} is replaced by the captured substring of the given
2040 decimal number, which must be greater than zero. If the number is greater
2041 than the number of capturing substrings, or if the capture is unset, the
2042 replacement is empty.
2043
2044 Any other character is substituted by itself. E.g: $$ is replaced by a single
2045 dollar or $| replaced by a pipe character.
2046
2047 Alternatively, if string starts with pipe, the remainder is taken as an output
2048 string, same as --output. This is the only form that is supported if
2049 SUPPORT_PCRE2GREP_FORK is not defined. In this case, --om-separator is used to
2050 separate each callout, defaulting to newline.
2051
2052 Example:
2053
2054 echo -e "abcde\n12345" | pcre2grep \
2055 '(.)(..(.))(?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4)")()' -
2056
2057 Output:
2058
2059 Arg1: [a] [bcd] [d] Arg2: |a| ()
2060 abcde
2061 Arg1: [1] [234] [4] Arg2: |1| ()
2062 12345
2063
2064 Arguments:
2065 blockptr the callout block
2066
2067 Returns: currently it always returns with 0
2068 */
2069
2070 static int
pcre2grep_callout(pcre2_callout_block * calloutptr,void * unused)2071 pcre2grep_callout(pcre2_callout_block *calloutptr, void *unused)
2072 {
2073 PCRE2_SIZE length = calloutptr->callout_string_length;
2074 PCRE2_SPTR string = calloutptr->callout_string;
2075 PCRE2_SPTR subject = calloutptr->subject;
2076 PCRE2_SIZE *ovector = calloutptr->offset_vector;
2077 PCRE2_SIZE capture_top = calloutptr->capture_top;
2078
2079 #ifdef SUPPORT_PCRE2GREP_CALLOUT_FORK
2080 PCRE2_SIZE argsvectorlen = 2;
2081 PCRE2_SIZE argslen = 1;
2082 char *args;
2083 char *argsptr;
2084 char **argsvector;
2085 char **argsvectorptr;
2086 #ifndef WIN32
2087 pid_t pid;
2088 #endif
2089 int result = 0;
2090 #endif /* SUPPORT_PCRE2GREP_CALLOUT_FORK */
2091
2092 (void)unused; /* Avoid compiler warning */
2093
2094 /* Only callout with strings are supported. */
2095
2096 if (string == NULL || length == 0) return 0;
2097
2098 /* If there's no command, output the remainder directly. */
2099
2100 if (*string == '|')
2101 {
2102 string++;
2103 if (!syntax_check_output_text(string, TRUE)) return 0;
2104 (void)display_output_text(string, TRUE, subject, ovector, capture_top);
2105 return 0;
2106 }
2107
2108 #ifndef SUPPORT_PCRE2GREP_CALLOUT_FORK
2109 return 0;
2110 #else
2111
2112 /* Checking syntax and compute the number of string fragments. Callout strings
2113 are ignored in case of a syntax error. */
2114
2115 while (length > 0)
2116 {
2117 if (*string == '|')
2118 {
2119 argsvectorlen++;
2120
2121 /* Maximum 10000 arguments allowed. */
2122 if (argsvectorlen > 10000) return 0;
2123 }
2124 else if (*string == '$')
2125 {
2126 PCRE2_SIZE capture_id = 0;
2127
2128 string++;
2129 length--;
2130
2131 /* Syntax error: a character must be present after $. */
2132 if (length == 0) return 0;
2133
2134 if (*string >= '1' && *string <= '9')
2135 {
2136 do
2137 {
2138 /* Maximum capture id is 65535. */
2139 if (capture_id <= 65535)
2140 capture_id = capture_id * 10 + (*string - '0');
2141
2142 string++;
2143 length--;
2144 }
2145 while (length > 0 && *string >= '0' && *string <= '9');
2146
2147 /* To negate the effect of string++ below. */
2148 string--;
2149 length++;
2150 }
2151 else if (*string == '{')
2152 {
2153 /* Must be a decimal number in braces, e.g: {5} or {38} */
2154 string++;
2155 length--;
2156
2157 /* Syntax error: a decimal number required. */
2158 if (length == 0) return 0;
2159 if (*string < '1' || *string > '9') return 0;
2160
2161 do
2162 {
2163 /* Maximum capture id is 65535. */
2164 if (capture_id <= 65535)
2165 capture_id = capture_id * 10 + (*string - '0');
2166
2167 string++;
2168 length--;
2169
2170 /* Syntax error: no more characters */
2171 if (length == 0) return 0;
2172 }
2173 while (*string >= '0' && *string <= '9');
2174
2175 /* Syntax error: closing brace is missing. */
2176 if (*string != '}') return 0;
2177 }
2178
2179 if (capture_id > 0)
2180 {
2181 if (capture_id < capture_top)
2182 {
2183 capture_id *= 2;
2184 argslen += ovector[capture_id + 1] - ovector[capture_id];
2185 }
2186
2187 /* To negate the effect of argslen++ below. */
2188 argslen--;
2189 }
2190 }
2191
2192 string++;
2193 length--;
2194 argslen++;
2195 }
2196
2197 args = (char*)malloc(argslen);
2198 if (args == NULL) return 0;
2199
2200 argsvector = (char**)malloc(argsvectorlen * sizeof(char*));
2201 if (argsvector == NULL)
2202 {
2203 free(args);
2204 return 0;
2205 }
2206
2207 argsptr = args;
2208 argsvectorptr = argsvector;
2209
2210 *argsvectorptr++ = argsptr;
2211
2212 length = calloutptr->callout_string_length;
2213 string = calloutptr->callout_string;
2214
2215 while (length > 0)
2216 {
2217 if (*string == '|')
2218 {
2219 *argsptr++ = '\0';
2220 *argsvectorptr++ = argsptr;
2221 }
2222 else if (*string == '$')
2223 {
2224 string++;
2225 length--;
2226
2227 if ((*string >= '1' && *string <= '9') || *string == '{')
2228 {
2229 PCRE2_SIZE capture_id = 0;
2230
2231 if (*string != '{')
2232 {
2233 do
2234 {
2235 /* Maximum capture id is 65535. */
2236 if (capture_id <= 65535)
2237 capture_id = capture_id * 10 + (*string - '0');
2238
2239 string++;
2240 length--;
2241 }
2242 while (length > 0 && *string >= '0' && *string <= '9');
2243
2244 /* To negate the effect of string++ below. */
2245 string--;
2246 length++;
2247 }
2248 else
2249 {
2250 string++;
2251 length--;
2252
2253 do
2254 {
2255 /* Maximum capture id is 65535. */
2256 if (capture_id <= 65535)
2257 capture_id = capture_id * 10 + (*string - '0');
2258
2259 string++;
2260 length--;
2261 }
2262 while (*string != '}');
2263 }
2264
2265 if (capture_id < capture_top)
2266 {
2267 PCRE2_SIZE capturesize;
2268 capture_id *= 2;
2269
2270 capturesize = ovector[capture_id + 1] - ovector[capture_id];
2271 memcpy(argsptr, subject + ovector[capture_id], capturesize);
2272 argsptr += capturesize;
2273 }
2274 }
2275 else
2276 {
2277 *argsptr++ = *string;
2278 }
2279 }
2280 else
2281 {
2282 *argsptr++ = *string;
2283 }
2284
2285 string++;
2286 length--;
2287 }
2288
2289 *argsptr++ = '\0';
2290 *argsvectorptr = NULL;
2291
2292 /* Running an external command is system-dependent. Handle Windows and VMS as
2293 necessary, otherwise assume fork(). */
2294
2295 #ifdef WIN32
2296 result = _spawnvp(_P_WAIT, argsvector[0], (const char * const *)argsvector);
2297
2298 #elif defined __VMS
2299 {
2300 char cmdbuf[500];
2301 short i = 0;
2302 int flags = CLI$M_NOCLISYM|CLI$M_NOLOGNAM|CLI$M_NOKEYPAD, status, retstat;
2303 $DESCRIPTOR(cmd, cmdbuf);
2304
2305 cmdbuf[0] = 0;
2306 while (argsvector[i])
2307 {
2308 strcat(cmdbuf, argsvector[i]);
2309 strcat(cmdbuf, " ");
2310 i++;
2311 }
2312 cmd.dsc$w_length = strlen(cmdbuf) - 1;
2313 status = lib$spawn(&cmd, 0,0, &flags, 0,0, &retstat);
2314 if (!(status & 1)) result = 0;
2315 else result = retstat & 1 ? 0 : 1;
2316 }
2317
2318 #else /* Neither Windows nor VMS */
2319 pid = fork();
2320 if (pid == 0)
2321 {
2322 (void)execv(argsvector[0], argsvector);
2323 /* Control gets here if there is an error, e.g. a non-existent program */
2324 exit(1);
2325 }
2326 else if (pid > 0)
2327 (void)waitpid(pid, &result, 0);
2328 #endif /* End Windows/VMS/other handling */
2329
2330 free(args);
2331 free(argsvector);
2332
2333 /* Currently negative return values are not supported, only zero (match
2334 continues) or non-zero (match fails). */
2335
2336 return result != 0;
2337 #endif /* SUPPORT_PCRE2GREP_CALLOUT_FORK */
2338 }
2339 #endif /* SUPPORT_PCRE2GREP_CALLOUT */
2340
2341
2342
2343 /*************************************************
2344 * Read a portion of the file into buffer *
2345 *************************************************/
2346
2347 static int
fill_buffer(void * handle,int frtype,char * buffer,int length,BOOL input_line_buffered)2348 fill_buffer(void *handle, int frtype, char *buffer, int length,
2349 BOOL input_line_buffered)
2350 {
2351 (void)frtype; /* Avoid warning when not used */
2352
2353 #ifdef SUPPORT_LIBZ
2354 if (frtype == FR_LIBZ)
2355 return gzread((gzFile)handle, buffer, length);
2356 else
2357 #endif
2358
2359 #ifdef SUPPORT_LIBBZ2
2360 if (frtype == FR_LIBBZ2)
2361 return BZ2_bzread((BZFILE *)handle, buffer, length);
2362 else
2363 #endif
2364
2365 return (input_line_buffered ?
2366 read_one_line(buffer, length, (FILE *)handle) :
2367 fread(buffer, 1, length, (FILE *)handle));
2368 }
2369
2370
2371
2372 /*************************************************
2373 * Grep an individual file *
2374 *************************************************/
2375
2376 /* This is called from grep_or_recurse() below. It uses a buffer that is three
2377 times the value of bufthird. The matching point is never allowed to stray into
2378 the top third of the buffer, thus keeping more of the file available for
2379 context printing or for multiline scanning. For large files, the pointer will
2380 be in the middle third most of the time, so the bottom third is available for
2381 "before" context printing.
2382
2383 Arguments:
2384 handle the fopened FILE stream for a normal file
2385 the gzFile pointer when reading is via libz
2386 the BZFILE pointer when reading is via libbz2
2387 frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
2388 filename the file name or NULL (for errors)
2389 printname the file name if it is to be printed for each match
2390 or NULL if the file name is not to be printed
2391 it cannot be NULL if filenames[_nomatch]_only is set
2392
2393 Returns: 0 if there was at least one match
2394 1 otherwise (no matches)
2395 2 if an overlong line is encountered
2396 3 if there is a read error on a .bz2 file
2397 */
2398
2399 static int
pcre2grep(void * handle,int frtype,const char * filename,const char * printname)2400 pcre2grep(void *handle, int frtype, const char *filename, const char *printname)
2401 {
2402 int rc = 1;
2403 int filepos = 0;
2404 unsigned long int linenumber = 1;
2405 unsigned long int lastmatchnumber = 0;
2406 unsigned long int count = 0;
2407 char *lastmatchrestart = main_buffer;
2408 char *ptr = main_buffer;
2409 char *endptr;
2410 PCRE2_SIZE bufflength;
2411 BOOL binary = FALSE;
2412 BOOL endhyphenpending = FALSE;
2413 BOOL input_line_buffered = line_buffered;
2414 FILE *in = NULL; /* Ensure initialized */
2415
2416 /* Do the first read into the start of the buffer and set up the pointer to end
2417 of what we have. In the case of libz, a non-zipped .gz file will be read as a
2418 plain file. However, if a .bz2 file isn't actually bzipped, the first read will
2419 fail. */
2420
2421 if (frtype != FR_LIBZ && frtype != FR_LIBBZ2)
2422 {
2423 in = (FILE *)handle;
2424 if (is_file_tty(in)) input_line_buffered = TRUE;
2425 }
2426 else input_line_buffered = FALSE;
2427
2428 bufflength = fill_buffer(handle, frtype, main_buffer, bufsize,
2429 input_line_buffered);
2430
2431 #ifdef SUPPORT_LIBBZ2
2432 if (frtype == FR_LIBBZ2 && (int)bufflength < 0) return 2; /* Gotcha: bufflength is PCRE2_SIZE; */
2433 #endif
2434
2435 endptr = main_buffer + bufflength;
2436
2437 /* Unless binary-files=text, see if we have a binary file. This uses the same
2438 rule as GNU grep, namely, a search for a binary zero byte near the start of the
2439 file. However, when the newline convention is binary zero, we can't do this. */
2440
2441 if (binary_files != BIN_TEXT)
2442 {
2443 if (endlinetype != PCRE2_NEWLINE_NUL)
2444 binary = memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength)
2445 != NULL;
2446 if (binary && binary_files == BIN_NOMATCH) return 1;
2447 }
2448
2449 /* Loop while the current pointer is not at the end of the file. For large
2450 files, endptr will be at the end of the buffer when we are in the middle of the
2451 file, but ptr will never get there, because as soon as it gets over 2/3 of the
2452 way, the buffer is shifted left and re-filled. */
2453
2454 while (ptr < endptr)
2455 {
2456 int endlinelength;
2457 int mrc = 0;
2458 unsigned int options = 0;
2459 BOOL match;
2460 char *t = ptr;
2461 PCRE2_SIZE length, linelength;
2462 PCRE2_SIZE startoffset = 0;
2463
2464 /* At this point, ptr is at the start of a line. We need to find the length
2465 of the subject string to pass to pcre2_match(). In multiline mode, it is the
2466 length remainder of the data in the buffer. Otherwise, it is the length of
2467 the next line, excluding the terminating newline. After matching, we always
2468 advance by the length of the next line. In multiline mode the PCRE2_FIRSTLINE
2469 option is used for compiling, so that any match is constrained to be in the
2470 first line. */
2471
2472 t = end_of_line(t, endptr, &endlinelength);
2473 linelength = t - ptr - endlinelength;
2474 length = multiline? (PCRE2_SIZE)(endptr - ptr) : linelength;
2475
2476 /* Check to see if the line we are looking at extends right to the very end
2477 of the buffer without a line terminator. This means the line is too long to
2478 handle at the current buffer size. Until the buffer reaches its maximum size,
2479 try doubling it and reading more data. */
2480
2481 if (endlinelength == 0 && t == main_buffer + bufsize)
2482 {
2483 if (bufthird < max_bufthird)
2484 {
2485 char *new_buffer;
2486 int new_bufthird = 2*bufthird;
2487
2488 if (new_bufthird > max_bufthird) new_bufthird = max_bufthird;
2489 new_buffer = (char *)malloc(3*new_bufthird);
2490
2491 if (new_buffer == NULL)
2492 {
2493 fprintf(stderr,
2494 "pcre2grep: line %lu%s%s is too long for the internal buffer\n"
2495 "pcre2grep: not enough memory to increase the buffer size to %d\n",
2496 linenumber,
2497 (filename == NULL)? "" : " of file ",
2498 (filename == NULL)? "" : filename,
2499 new_bufthird);
2500 return 2;
2501 }
2502
2503 /* Copy the data and adjust pointers to the new buffer location. */
2504
2505 memcpy(new_buffer, main_buffer, bufsize);
2506 bufthird = new_bufthird;
2507 bufsize = 3*bufthird;
2508 ptr = new_buffer + (ptr - main_buffer);
2509 lastmatchrestart = new_buffer + (lastmatchrestart - main_buffer);
2510 free(main_buffer);
2511 main_buffer = new_buffer;
2512
2513 /* Read more data into the buffer and then try to find the line ending
2514 again. */
2515
2516 bufflength += fill_buffer(handle, frtype, main_buffer + bufflength,
2517 bufsize - bufflength, input_line_buffered);
2518 endptr = main_buffer + bufflength;
2519 continue;
2520 }
2521 else
2522 {
2523 fprintf(stderr,
2524 "pcre2grep: line %lu%s%s is too long for the internal buffer\n"
2525 "pcre2grep: the maximum buffer size is %d\n"
2526 "pcre2grep: use the --max-buffer-size option to change it\n",
2527 linenumber,
2528 (filename == NULL)? "" : " of file ",
2529 (filename == NULL)? "" : filename,
2530 bufthird);
2531 return 2;
2532 }
2533 }
2534
2535 /* Extra processing for Jeffrey Friedl's debugging. */
2536
2537 #ifdef JFRIEDL_DEBUG
2538 if (jfriedl_XT || jfriedl_XR)
2539 {
2540 # include <sys/time.h>
2541 # include <time.h>
2542 struct timeval start_time, end_time;
2543 struct timezone dummy;
2544 int i;
2545
2546 if (jfriedl_XT)
2547 {
2548 unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
2549 const char *orig = ptr;
2550 ptr = malloc(newlen + 1);
2551 if (!ptr) {
2552 printf("out of memory");
2553 pcre2grep_exit(2);
2554 }
2555 endptr = ptr;
2556 strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
2557 for (i = 0; i < jfriedl_XT; i++) {
2558 strncpy(endptr, orig, length);
2559 endptr += length;
2560 }
2561 strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
2562 length = newlen;
2563 }
2564
2565 if (gettimeofday(&start_time, &dummy) != 0)
2566 perror("bad gettimeofday");
2567
2568
2569 for (i = 0; i < jfriedl_XR; i++)
2570 match = (pcre_exec(patterns->compiled, patterns->hint, ptr, length, 0,
2571 PCRE2_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
2572
2573 if (gettimeofday(&end_time, &dummy) != 0)
2574 perror("bad gettimeofday");
2575
2576 double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
2577 -
2578 (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
2579
2580 printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
2581 return 0;
2582 }
2583 #endif
2584
2585 /* We come back here after a match when only_matching_count is non-zero, in
2586 order to find any further matches in the same line. This applies to
2587 --only-matching, --file-offsets, and --line-offsets. */
2588
2589 ONLY_MATCHING_RESTART:
2590
2591 /* Run through all the patterns until one matches or there is an error other
2592 than NOMATCH. This code is in a subroutine so that it can be re-used for
2593 finding subsequent matches when colouring matched lines. After finding one
2594 match, set PCRE2_NOTEMPTY to disable any further matches of null strings in
2595 this line. */
2596
2597 match = match_patterns(ptr, length, options, startoffset, &mrc);
2598 options = PCRE2_NOTEMPTY;
2599
2600 /* If it's a match or a not-match (as required), do what's wanted. NOTE: Use
2601 only FWRITE_IGNORE() - which is just a packaged fwrite() that ignores its
2602 return code - to output data lines, so that binary zeroes are treated as just
2603 another data character. */
2604
2605 if (match != invert)
2606 {
2607 BOOL hyphenprinted = FALSE;
2608
2609 /* We've failed if we want a file that doesn't have any matches. */
2610
2611 if (filenames == FN_NOMATCH_ONLY) return 1;
2612
2613 /* If all we want is a yes/no answer, we can return immediately. */
2614
2615 if (quiet) return 0;
2616
2617 /* Just count if just counting is wanted. */
2618
2619 else if (count_only || show_total_count) count++;
2620
2621 /* When handling a binary file and binary-files==binary, the "binary"
2622 variable will be set true (it's false in all other cases). In this
2623 situation we just want to output the file name. No need to scan further. */
2624
2625 else if (binary)
2626 {
2627 fprintf(stdout, "Binary file %s matches" STDOUT_NL, filename);
2628 return 0;
2629 }
2630
2631 /* Likewise, if all we want is a file name, there is no need to scan any
2632 more lines in the file. */
2633
2634 else if (filenames == FN_MATCH_ONLY)
2635 {
2636 fprintf(stdout, "%s" STDOUT_NL, printname);
2637 return 0;
2638 }
2639
2640 /* The --only-matching option prints just the substring that matched,
2641 and/or one or more captured portions of it, as long as these strings are
2642 not empty. The --file-offsets and --line-offsets options output offsets for
2643 the matching substring (all three set only_matching_count non-zero). None
2644 of these mutually exclusive options prints any context. Afterwards, adjust
2645 the start and then jump back to look for further matches in the same line.
2646 If we are in invert mode, however, nothing is printed and we do not restart
2647 - this could still be useful because the return code is set. */
2648
2649 else if (only_matching_count != 0)
2650 {
2651 if (!invert)
2652 {
2653 PCRE2_SIZE oldstartoffset;
2654
2655 if (printname != NULL) fprintf(stdout, "%s:", printname);
2656 if (number) fprintf(stdout, "%lu:", linenumber);
2657
2658 /* Handle --line-offsets */
2659
2660 if (line_offsets)
2661 fprintf(stdout, "%d,%d" STDOUT_NL, (int)(ptr + offsets[0] - ptr),
2662 (int)(offsets[1] - offsets[0]));
2663
2664 /* Handle --file-offsets */
2665
2666 else if (file_offsets)
2667 fprintf(stdout, "%d,%d" STDOUT_NL,
2668 (int)(filepos + ptr + offsets[0] - ptr),
2669 (int)(offsets[1] - offsets[0]));
2670
2671 /* Handle --output (which has already been syntax checked) */
2672
2673 else if (output_text != NULL)
2674 {
2675 if (display_output_text((PCRE2_SPTR)output_text, FALSE,
2676 (PCRE2_SPTR)ptr, offsets, mrc) || printname != NULL ||
2677 number)
2678 fprintf(stdout, STDOUT_NL);
2679 }
2680
2681 /* Handle --only-matching, which may occur many times */
2682
2683 else
2684 {
2685 BOOL printed = FALSE;
2686 omstr *om;
2687
2688 for (om = only_matching; om != NULL; om = om->next)
2689 {
2690 int n = om->groupnum;
2691 if (n < mrc)
2692 {
2693 int plen = offsets[2*n + 1] - offsets[2*n];
2694 if (plen > 0)
2695 {
2696 if (printed && om_separator != NULL)
2697 fprintf(stdout, "%s", om_separator);
2698 print_match(ptr + offsets[n*2], plen);
2699 printed = TRUE;
2700 }
2701 }
2702 }
2703
2704 if (printed || printname != NULL || number)
2705 fprintf(stdout, STDOUT_NL);
2706 }
2707
2708 /* Prepare to repeat to find the next match in the line. */
2709
2710 match = FALSE;
2711 if (line_buffered) fflush(stdout);
2712 rc = 0; /* Had some success */
2713
2714 /* If the pattern contained a lookbehind that included \K, it is
2715 possible that the end of the match might be at or before the actual
2716 starting offset we have just used. In this case, start one character
2717 further on. */
2718
2719 startoffset = offsets[1]; /* Restart after the match */
2720 oldstartoffset = pcre2_get_startchar(match_data);
2721 if (startoffset <= oldstartoffset)
2722 {
2723 if (startoffset >= length) goto END_ONE_MATCH; /* Were at end */
2724 startoffset = oldstartoffset + 1;
2725 if (utf) while ((ptr[startoffset] & 0xc0) == 0x80) startoffset++;
2726 }
2727
2728 /* If the current match ended past the end of the line (only possible
2729 in multiline mode), we must move on to the line in which it did end
2730 before searching for more matches. */
2731
2732 while (startoffset > linelength)
2733 {
2734 ptr += linelength + endlinelength;
2735 filepos += (int)(linelength + endlinelength);
2736 linenumber++;
2737 startoffset -= (int)(linelength + endlinelength);
2738 t = end_of_line(ptr, endptr, &endlinelength);
2739 linelength = t - ptr - endlinelength;
2740 length = (PCRE2_SIZE)(endptr - ptr);
2741 }
2742
2743 goto ONLY_MATCHING_RESTART;
2744 }
2745 }
2746
2747 /* This is the default case when none of the above options is set. We print
2748 the matching lines(s), possibly preceded and/or followed by other lines of
2749 context. */
2750
2751 else
2752 {
2753 /* See if there is a requirement to print some "after" lines from a
2754 previous match. We never print any overlaps. */
2755
2756 if (after_context > 0 && lastmatchnumber > 0)
2757 {
2758 int ellength;
2759 int linecount = 0;
2760 char *p = lastmatchrestart;
2761
2762 while (p < ptr && linecount < after_context)
2763 {
2764 p = end_of_line(p, ptr, &ellength);
2765 linecount++;
2766 }
2767
2768 /* It is important to advance lastmatchrestart during this printing so
2769 that it interacts correctly with any "before" printing below. Print
2770 each line's data using fwrite() in case there are binary zeroes. */
2771
2772 while (lastmatchrestart < p)
2773 {
2774 char *pp = lastmatchrestart;
2775 if (printname != NULL) fprintf(stdout, "%s-", printname);
2776 if (number) fprintf(stdout, "%lu-", lastmatchnumber++);
2777 pp = end_of_line(pp, endptr, &ellength);
2778 FWRITE_IGNORE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
2779 lastmatchrestart = pp;
2780 }
2781 if (lastmatchrestart != ptr) hyphenpending = TRUE;
2782 }
2783
2784 /* If there were non-contiguous lines printed above, insert hyphens. */
2785
2786 if (hyphenpending)
2787 {
2788 fprintf(stdout, "--" STDOUT_NL);
2789 hyphenpending = FALSE;
2790 hyphenprinted = TRUE;
2791 }
2792
2793 /* See if there is a requirement to print some "before" lines for this
2794 match. Again, don't print overlaps. */
2795
2796 if (before_context > 0)
2797 {
2798 int linecount = 0;
2799 char *p = ptr;
2800
2801 while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
2802 linecount < before_context)
2803 {
2804 linecount++;
2805 p = previous_line(p, main_buffer);
2806 }
2807
2808 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
2809 fprintf(stdout, "--" STDOUT_NL);
2810
2811 while (p < ptr)
2812 {
2813 int ellength;
2814 char *pp = p;
2815 if (printname != NULL) fprintf(stdout, "%s-", printname);
2816 if (number) fprintf(stdout, "%lu-", linenumber - linecount--);
2817 pp = end_of_line(pp, endptr, &ellength);
2818 FWRITE_IGNORE(p, 1, pp - p, stdout);
2819 p = pp;
2820 }
2821 }
2822
2823 /* Now print the matching line(s); ensure we set hyphenpending at the end
2824 of the file if any context lines are being output. */
2825
2826 if (after_context > 0 || before_context > 0)
2827 endhyphenpending = TRUE;
2828
2829 if (printname != NULL) fprintf(stdout, "%s:", printname);
2830 if (number) fprintf(stdout, "%lu:", linenumber);
2831
2832 /* This extra option, for Jeffrey Friedl's debugging requirements,
2833 replaces the matched string, or a specific captured string if it exists,
2834 with X. When this happens, colouring is ignored. */
2835
2836 #ifdef JFRIEDL_DEBUG
2837 if (S_arg >= 0 && S_arg < mrc)
2838 {
2839 int first = S_arg * 2;
2840 int last = first + 1;
2841 FWRITE_IGNORE(ptr, 1, offsets[first], stdout);
2842 fprintf(stdout, "X");
2843 FWRITE_IGNORE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
2844 }
2845 else
2846 #endif
2847
2848 /* In multiline mode, or if colouring, we have to split the line(s) up
2849 and search for further matches, but not of course if the line is a
2850 non-match. In multiline mode this is necessary in case there is another
2851 match that spans the end of the current line. When colouring we want to
2852 colour all matches. */
2853
2854 if ((multiline || do_colour) && !invert)
2855 {
2856 int plength;
2857 PCRE2_SIZE endprevious;
2858
2859 /* The use of \K may make the end offset earlier than the start. In
2860 this situation, swap them round. */
2861
2862 if (offsets[0] > offsets[1])
2863 {
2864 PCRE2_SIZE temp = offsets[0];
2865 offsets[0] = offsets[1];
2866 offsets[1] = temp;
2867 }
2868
2869 FWRITE_IGNORE(ptr, 1, offsets[0], stdout);
2870 print_match(ptr + offsets[0], offsets[1] - offsets[0]);
2871
2872 for (;;)
2873 {
2874 PCRE2_SIZE oldstartoffset = pcre2_get_startchar(match_data);
2875
2876 endprevious = offsets[1];
2877 startoffset = endprevious; /* Advance after previous match. */
2878
2879 /* If the pattern contained a lookbehind that included \K, it is
2880 possible that the end of the match might be at or before the actual
2881 starting offset we have just used. In this case, start one character
2882 further on. */
2883
2884 if (startoffset <= oldstartoffset)
2885 {
2886 startoffset = oldstartoffset + 1;
2887 if (utf) while ((ptr[startoffset] & 0xc0) == 0x80) startoffset++;
2888 }
2889
2890 /* If the current match ended past the end of the line (only possible
2891 in multiline mode), we must move on to the line in which it did end
2892 before searching for more matches. Because the PCRE2_FIRSTLINE option
2893 is set, the start of the match will always be before the first
2894 newline sequence. */
2895
2896 while (startoffset > linelength + endlinelength)
2897 {
2898 ptr += linelength + endlinelength;
2899 filepos += (int)(linelength + endlinelength);
2900 linenumber++;
2901 startoffset -= (int)(linelength + endlinelength);
2902 endprevious -= (int)(linelength + endlinelength);
2903 t = end_of_line(ptr, endptr, &endlinelength);
2904 linelength = t - ptr - endlinelength;
2905 length = (PCRE2_SIZE)(endptr - ptr);
2906 }
2907
2908 /* If startoffset is at the exact end of the line it means this
2909 complete line was the final part of the match, so there is nothing
2910 more to do. */
2911
2912 if (startoffset == linelength + endlinelength) break;
2913
2914 /* Otherwise, run a match from within the final line, and if found,
2915 loop for any that may follow. */
2916
2917 if (!match_patterns(ptr, length, options, startoffset, &mrc)) break;
2918
2919 /* The use of \K may make the end offset earlier than the start. In
2920 this situation, swap them round. */
2921
2922 if (offsets[0] > offsets[1])
2923 {
2924 PCRE2_SIZE temp = offsets[0];
2925 offsets[0] = offsets[1];
2926 offsets[1] = temp;
2927 }
2928
2929 FWRITE_IGNORE(ptr + endprevious, 1, offsets[0] - endprevious, stdout);
2930 print_match(ptr + offsets[0], offsets[1] - offsets[0]);
2931 }
2932
2933 /* In multiline mode, we may have already printed the complete line
2934 and its line-ending characters (if they matched the pattern), so there
2935 may be no more to print. */
2936
2937 plength = (int)((linelength + endlinelength) - endprevious);
2938 if (plength > 0) FWRITE_IGNORE(ptr + endprevious, 1, plength, stdout);
2939 }
2940
2941 /* Not colouring or multiline; no need to search for further matches. */
2942
2943 else FWRITE_IGNORE(ptr, 1, linelength + endlinelength, stdout);
2944 }
2945
2946 /* End of doing what has to be done for a match. If --line-buffered was
2947 given, flush the output. */
2948
2949 if (line_buffered) fflush(stdout);
2950 rc = 0; /* Had some success */
2951
2952 /* Remember where the last match happened for after_context. We remember
2953 where we are about to restart, and that line's number. */
2954
2955 lastmatchrestart = ptr + linelength + endlinelength;
2956 lastmatchnumber = linenumber + 1;
2957 }
2958
2959 /* For a match in multiline inverted mode (which of course did not cause
2960 anything to be printed), we have to move on to the end of the match before
2961 proceeding. */
2962
2963 if (multiline && invert && match)
2964 {
2965 int ellength;
2966 char *endmatch = ptr + offsets[1];
2967 t = ptr;
2968 while (t < endmatch)
2969 {
2970 t = end_of_line(t, endptr, &ellength);
2971 if (t <= endmatch) linenumber++; else break;
2972 }
2973 endmatch = end_of_line(endmatch, endptr, &ellength);
2974 linelength = endmatch - ptr - ellength;
2975 }
2976
2977 /* Advance to after the newline and increment the line number. The file
2978 offset to the current line is maintained in filepos. */
2979
2980 END_ONE_MATCH:
2981 ptr += linelength + endlinelength;
2982 filepos += (int)(linelength + endlinelength);
2983 linenumber++;
2984
2985 /* If input is line buffered, and the buffer is not yet full, read another
2986 line and add it into the buffer. */
2987
2988 if (input_line_buffered && bufflength < (PCRE2_SIZE)bufsize)
2989 {
2990 int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
2991 bufflength += add;
2992 endptr += add;
2993 }
2994
2995 /* If we haven't yet reached the end of the file (the buffer is full), and
2996 the current point is in the top 1/3 of the buffer, slide the buffer down by
2997 1/3 and refill it. Before we do this, if some unprinted "after" lines are
2998 about to be lost, print them. */
2999
3000 if (bufflength >= (PCRE2_SIZE)bufsize && ptr > main_buffer + 2*bufthird)
3001 {
3002 if (after_context > 0 &&
3003 lastmatchnumber > 0 &&
3004 lastmatchrestart < main_buffer + bufthird)
3005 {
3006 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
3007 lastmatchnumber = 0; /* Indicates no after lines pending */
3008 }
3009
3010 /* Now do the shuffle */
3011
3012 (void)memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
3013 ptr -= bufthird;
3014
3015 bufflength = 2*bufthird + fill_buffer(handle, frtype,
3016 main_buffer + 2*bufthird, bufthird, input_line_buffered);
3017 endptr = main_buffer + bufflength;
3018
3019 /* Adjust any last match point */
3020
3021 if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
3022 }
3023 } /* Loop through the whole file */
3024
3025 /* End of file; print final "after" lines if wanted; do_after_lines sets
3026 hyphenpending if it prints something. */
3027
3028 if (only_matching_count == 0 && !(count_only|show_total_count))
3029 {
3030 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
3031 hyphenpending |= endhyphenpending;
3032 }
3033
3034 /* Print the file name if we are looking for those without matches and there
3035 were none. If we found a match, we won't have got this far. */
3036
3037 if (filenames == FN_NOMATCH_ONLY)
3038 {
3039 fprintf(stdout, "%s" STDOUT_NL, printname);
3040 return 0;
3041 }
3042
3043 /* Print the match count if wanted */
3044
3045 if (count_only && !quiet)
3046 {
3047 if (count > 0 || !omit_zero_count)
3048 {
3049 if (printname != NULL && filenames != FN_NONE)
3050 fprintf(stdout, "%s:", printname);
3051 fprintf(stdout, "%lu" STDOUT_NL, count);
3052 counts_printed++;
3053 }
3054 }
3055
3056 total_count += count; /* Can be set without count_only */
3057 return rc;
3058 }
3059
3060
3061
3062 /*************************************************
3063 * Grep a file or recurse into a directory *
3064 *************************************************/
3065
3066 /* Given a path name, if it's a directory, scan all the files if we are
3067 recursing; if it's a file, grep it.
3068
3069 Arguments:
3070 pathname the path to investigate
3071 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
3072 only_one_at_top TRUE if the path is the only one at toplevel
3073
3074 Returns: -1 the file/directory was skipped
3075 0 if there was at least one match
3076 1 if there were no matches
3077 2 there was some kind of error
3078
3079 However, file opening failures are suppressed if "silent" is set.
3080 */
3081
3082 static int
grep_or_recurse(char * pathname,BOOL dir_recurse,BOOL only_one_at_top)3083 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
3084 {
3085 int rc = 1;
3086 int frtype;
3087 void *handle;
3088 char *lastcomp;
3089 FILE *in = NULL; /* Ensure initialized */
3090
3091 #ifdef SUPPORT_LIBZ
3092 gzFile ingz = NULL;
3093 #endif
3094
3095 #ifdef SUPPORT_LIBBZ2
3096 BZFILE *inbz2 = NULL;
3097 #endif
3098
3099 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
3100 int pathlen;
3101 #endif
3102
3103 #if defined NATIVE_ZOS
3104 int zos_type;
3105 FILE *zos_test_file;
3106 #endif
3107
3108 /* If the file name is "-" we scan stdin */
3109
3110 if (strcmp(pathname, "-") == 0)
3111 {
3112 return pcre2grep(stdin, FR_PLAIN, stdin_name,
3113 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
3114 stdin_name : NULL);
3115 }
3116
3117 /* Inclusion and exclusion: --include-dir and --exclude-dir apply only to
3118 directories, whereas --include and --exclude apply to everything else. The test
3119 is against the final component of the path. */
3120
3121 lastcomp = strrchr(pathname, FILESEP);
3122 lastcomp = (lastcomp == NULL)? pathname : lastcomp + 1;
3123
3124 /* If the file is a directory, skip if not recursing or if explicitly excluded.
3125 Otherwise, scan the directory and recurse for each path within it. The scanning
3126 code is localized so it can be made system-specific. */
3127
3128
3129 /* For z/OS, determine the file type. */
3130
3131 #if defined NATIVE_ZOS
3132 zos_test_file = fopen(pathname,"rb");
3133
3134 if (zos_test_file == NULL)
3135 {
3136 if (!silent) fprintf(stderr, "pcre2grep: failed to test next file %s\n",
3137 pathname, strerror(errno));
3138 return -1;
3139 }
3140 zos_type = identifyzosfiletype (zos_test_file);
3141 fclose (zos_test_file);
3142
3143 /* Handle a PDS in separate code */
3144
3145 if (zos_type == __ZOS_PDS || zos_type == __ZOS_PDSE)
3146 {
3147 return travelonpdsdir (pathname, only_one_at_top);
3148 }
3149
3150 /* Deal with regular files in the normal way below. These types are:
3151 zos_type == __ZOS_PDS_MEMBER
3152 zos_type == __ZOS_PS
3153 zos_type == __ZOS_VSAM_KSDS
3154 zos_type == __ZOS_VSAM_ESDS
3155 zos_type == __ZOS_VSAM_RRDS
3156 */
3157
3158 /* Handle a z/OS directory using common code. */
3159
3160 else if (zos_type == __ZOS_HFS)
3161 {
3162 #endif /* NATIVE_ZOS */
3163
3164
3165 /* Handle directories: common code for all OS */
3166
3167 if (isdirectory(pathname))
3168 {
3169 if (dee_action == dee_SKIP ||
3170 !test_incexc(lastcomp, include_dir_patterns, exclude_dir_patterns))
3171 return -1;
3172
3173 if (dee_action == dee_RECURSE)
3174 {
3175 char buffer[FNBUFSIZ];
3176 char *nextfile;
3177 directory_type *dir = opendirectory(pathname);
3178
3179 if (dir == NULL)
3180 {
3181 if (!silent)
3182 fprintf(stderr, "pcre2grep: Failed to open directory %s: %s\n", pathname,
3183 strerror(errno));
3184 return 2;
3185 }
3186
3187 while ((nextfile = readdirectory(dir)) != NULL)
3188 {
3189 int frc;
3190 int fnlength = strlen(pathname) + strlen(nextfile) + 2;
3191 if (fnlength > FNBUFSIZ)
3192 {
3193 fprintf(stderr, "pcre2grep: recursive filename is too long\n");
3194 rc = 2;
3195 break;
3196 }
3197 sprintf(buffer, "%s%c%s", pathname, FILESEP, nextfile);
3198 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
3199 if (frc > 1) rc = frc;
3200 else if (frc == 0 && rc == 1) rc = 0;
3201 }
3202
3203 closedirectory(dir);
3204 return rc;
3205 }
3206 }
3207
3208 #ifdef WIN32
3209 if (iswild(pathname))
3210 {
3211 char buffer[1024];
3212 char *nextfile;
3213 char *name;
3214 directory_type *dir = opendirectory(pathname);
3215
3216 if (dir == NULL)
3217 return 0;
3218
3219 for (nextfile = name = pathname; *nextfile != 0; nextfile++)
3220 if (*nextfile == '/' || *nextfile == '\\')
3221 name = nextfile + 1;
3222 *name = 0;
3223
3224 while ((nextfile = readdirectory(dir)) != NULL)
3225 {
3226 int frc;
3227 sprintf(buffer, "%.512s%.128s", pathname, nextfile);
3228 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
3229 if (frc > 1) rc = frc;
3230 else if (frc == 0 && rc == 1) rc = 0;
3231 }
3232
3233 closedirectory(dir);
3234 return rc;
3235 }
3236 #endif
3237
3238 #if defined NATIVE_ZOS
3239 }
3240 #endif
3241
3242 /* If the file is not a directory, check for a regular file, and if it is not,
3243 skip it if that's been requested. Otherwise, check for an explicit inclusion or
3244 exclusion. */
3245
3246 else if (
3247 #if defined NATIVE_ZOS
3248 (zos_type == __ZOS_NOFILE && DEE_action == DEE_SKIP) ||
3249 #else /* all other OS */
3250 (!isregfile(pathname) && DEE_action == DEE_SKIP) ||
3251 #endif
3252 !test_incexc(lastcomp, include_patterns, exclude_patterns))
3253 return -1; /* File skipped */
3254
3255 /* Control reaches here if we have a regular file, or if we have a directory
3256 and recursion or skipping was not requested, or if we have anything else and
3257 skipping was not requested. The scan proceeds. If this is the first and only
3258 argument at top level, we don't show the file name, unless we are only showing
3259 the file name, or the filename was forced (-H). */
3260
3261 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
3262 pathlen = (int)(strlen(pathname));
3263 #endif
3264
3265 /* Open using zlib if it is supported and the file name ends with .gz. */
3266
3267 #ifdef SUPPORT_LIBZ
3268 if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
3269 {
3270 ingz = gzopen(pathname, "rb");
3271 if (ingz == NULL)
3272 {
3273 if (!silent)
3274 fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", pathname,
3275 strerror(errno));
3276 return 2;
3277 }
3278 handle = (void *)ingz;
3279 frtype = FR_LIBZ;
3280 }
3281 else
3282 #endif
3283
3284 /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
3285
3286 #ifdef SUPPORT_LIBBZ2
3287 if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
3288 {
3289 inbz2 = BZ2_bzopen(pathname, "rb");
3290 handle = (void *)inbz2;
3291 frtype = FR_LIBBZ2;
3292 }
3293 else
3294 #endif
3295
3296 /* Otherwise use plain fopen(). The label is so that we can come back here if
3297 an attempt to read a .bz2 file indicates that it really is a plain file. */
3298
3299 #ifdef SUPPORT_LIBBZ2
3300 PLAIN_FILE:
3301 #endif
3302 {
3303 in = fopen(pathname, "rb");
3304 handle = (void *)in;
3305 frtype = FR_PLAIN;
3306 }
3307
3308 /* All the opening methods return errno when they fail. */
3309
3310 if (handle == NULL)
3311 {
3312 if (!silent)
3313 fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", pathname,
3314 strerror(errno));
3315 return 2;
3316 }
3317
3318 /* Now grep the file */
3319
3320 rc = pcre2grep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
3321 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
3322
3323 /* Close in an appropriate manner. */
3324
3325 #ifdef SUPPORT_LIBZ
3326 if (frtype == FR_LIBZ)
3327 gzclose(ingz);
3328 else
3329 #endif
3330
3331 /* If it is a .bz2 file and the result is 3, it means that the first attempt to
3332 read failed. If the error indicates that the file isn't in fact bzipped, try
3333 again as a normal file. */
3334
3335 #ifdef SUPPORT_LIBBZ2
3336 if (frtype == FR_LIBBZ2)
3337 {
3338 if (rc == 3)
3339 {
3340 int errnum;
3341 const char *err = BZ2_bzerror(inbz2, &errnum);
3342 if (errnum == BZ_DATA_ERROR_MAGIC)
3343 {
3344 BZ2_bzclose(inbz2);
3345 goto PLAIN_FILE;
3346 }
3347 else if (!silent)
3348 fprintf(stderr, "pcre2grep: Failed to read %s using bzlib: %s\n",
3349 pathname, err);
3350 rc = 2; /* The normal "something went wrong" code */
3351 }
3352 BZ2_bzclose(inbz2);
3353 }
3354 else
3355 #endif
3356
3357 /* Normal file close */
3358
3359 fclose(in);
3360
3361 /* Pass back the yield from pcre2grep(). */
3362
3363 return rc;
3364 }
3365
3366
3367
3368 /*************************************************
3369 * Handle a single-letter, no data option *
3370 *************************************************/
3371
3372 static int
handle_option(int letter,int options)3373 handle_option(int letter, int options)
3374 {
3375 switch(letter)
3376 {
3377 case N_FOFFSETS: file_offsets = TRUE; break;
3378 case N_HELP: help(); pcre2grep_exit(0); break; /* Stops compiler warning */
3379 case N_LBUFFER: line_buffered = TRUE; break;
3380 case N_LOFFSETS: line_offsets = number = TRUE; break;
3381 case N_NOJIT: use_jit = FALSE; break;
3382 case 'a': binary_files = BIN_TEXT; break;
3383 case 'c': count_only = TRUE; break;
3384 case 'F': options |= PCRE2_LITERAL; break;
3385 case 'H': filenames = FN_FORCE; break;
3386 case 'I': binary_files = BIN_NOMATCH; break;
3387 case 'h': filenames = FN_NONE; break;
3388 case 'i': options |= PCRE2_CASELESS; break;
3389 case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
3390 case 'L': filenames = FN_NOMATCH_ONLY; break;
3391 case 'M': multiline = TRUE; options |= PCRE2_MULTILINE|PCRE2_FIRSTLINE; break;
3392 case 'n': number = TRUE; break;
3393
3394 case 'o':
3395 only_matching_last = add_number(0, only_matching_last);
3396 if (only_matching == NULL) only_matching = only_matching_last;
3397 break;
3398
3399 case 'q': quiet = TRUE; break;
3400 case 'r': dee_action = dee_RECURSE; break;
3401 case 's': silent = TRUE; break;
3402 case 't': show_total_count = TRUE; break;
3403 case 'u': options |= PCRE2_UTF; utf = TRUE; break;
3404 case 'v': invert = TRUE; break;
3405 case 'w': extra_options |= PCRE2_EXTRA_MATCH_WORD; break;
3406 case 'x': extra_options |= PCRE2_EXTRA_MATCH_LINE; break;
3407
3408 case 'V':
3409 {
3410 unsigned char buffer[128];
3411 (void)pcre2_config(PCRE2_CONFIG_VERSION, buffer);
3412 fprintf(stdout, "pcre2grep version %s" STDOUT_NL, buffer);
3413 }
3414 pcre2grep_exit(0);
3415 break;
3416
3417 default:
3418 fprintf(stderr, "pcre2grep: Unknown option -%c\n", letter);
3419 pcre2grep_exit(usage(2));
3420 }
3421
3422 return options;
3423 }
3424
3425
3426
3427 /*************************************************
3428 * Construct printed ordinal *
3429 *************************************************/
3430
3431 /* This turns a number into "1st", "3rd", etc. */
3432
3433 static char *
ordin(int n)3434 ordin(int n)
3435 {
3436 static char buffer[14];
3437 char *p = buffer;
3438 sprintf(p, "%d", n);
3439 while (*p != 0) p++;
3440 n %= 100;
3441 if (n >= 11 && n <= 13) n = 0;
3442 switch (n%10)
3443 {
3444 case 1: strcpy(p, "st"); break;
3445 case 2: strcpy(p, "nd"); break;
3446 case 3: strcpy(p, "rd"); break;
3447 default: strcpy(p, "th"); break;
3448 }
3449 return buffer;
3450 }
3451
3452
3453
3454 /*************************************************
3455 * Compile a single pattern *
3456 *************************************************/
3457
3458 /* Do nothing if the pattern has already been compiled. This is the case for
3459 include/exclude patterns read from a file.
3460
3461 When the -F option has been used, each "pattern" may be a list of strings,
3462 separated by line breaks. They will be matched literally. We split such a
3463 string and compile the first substring, inserting an additional block into the
3464 pattern chain.
3465
3466 Arguments:
3467 p points to the pattern block
3468 options the PCRE options
3469 fromfile TRUE if the pattern was read from a file
3470 fromtext file name or identifying text (e.g. "include")
3471 count 0 if this is the only command line pattern, or
3472 number of the command line pattern, or
3473 linenumber for a pattern from a file
3474
3475 Returns: TRUE on success, FALSE after an error
3476 */
3477
3478 static BOOL
compile_pattern(patstr * p,int options,int fromfile,const char * fromtext,int count)3479 compile_pattern(patstr *p, int options, int fromfile, const char *fromtext,
3480 int count)
3481 {
3482 char *ps;
3483 int errcode;
3484 PCRE2_SIZE patlen, erroffset;
3485 PCRE2_UCHAR errmessbuffer[ERRBUFSIZ];
3486
3487 if (p->compiled != NULL) return TRUE;
3488 ps = p->string;
3489 patlen = p->length;
3490
3491 if ((options & PCRE2_LITERAL) != 0)
3492 {
3493 int ellength;
3494 char *eop = ps + patlen;
3495 char *pe = end_of_line(ps, eop, &ellength);
3496
3497 if (ellength != 0)
3498 {
3499 patlen = pe - ps - ellength;
3500 if (add_pattern(pe, p->length-patlen-ellength, p) == NULL) return FALSE;
3501 }
3502 }
3503
3504 p->compiled = pcre2_compile((PCRE2_SPTR)ps, patlen, options, &errcode,
3505 &erroffset, compile_context);
3506
3507 /* Handle successful compile. Try JIT-compiling if supported and enabled. We
3508 ignore any JIT compiler errors, relying falling back to interpreting if
3509 anything goes wrong with JIT. */
3510
3511 if (p->compiled != NULL)
3512 {
3513 #ifdef SUPPORT_PCRE2GREP_JIT
3514 if (use_jit) (void)pcre2_jit_compile(p->compiled, PCRE2_JIT_COMPLETE);
3515 #endif
3516 return TRUE;
3517 }
3518
3519 /* Handle compile errors */
3520
3521 if (erroffset > patlen) erroffset = patlen;
3522 pcre2_get_error_message(errcode, errmessbuffer, sizeof(errmessbuffer));
3523
3524 if (fromfile)
3525 {
3526 fprintf(stderr, "pcre2grep: Error in regex in line %d of %s "
3527 "at offset %d: %s\n", count, fromtext, (int)erroffset, errmessbuffer);
3528 }
3529 else
3530 {
3531 if (count == 0)
3532 fprintf(stderr, "pcre2grep: Error in %s regex at offset %d: %s\n",
3533 fromtext, (int)erroffset, errmessbuffer);
3534 else
3535 fprintf(stderr, "pcre2grep: Error in %s %s regex at offset %d: %s\n",
3536 ordin(count), fromtext, (int)erroffset, errmessbuffer);
3537 }
3538
3539 return FALSE;
3540 }
3541
3542
3543
3544 /*************************************************
3545 * Read and compile a file of patterns *
3546 *************************************************/
3547
3548 /* This is used for --filelist, --include-from, and --exclude-from.
3549
3550 Arguments:
3551 name the name of the file; "-" is stdin
3552 patptr pointer to the pattern chain anchor
3553 patlastptr pointer to the last pattern pointer
3554
3555 Returns: TRUE if all went well
3556 */
3557
3558 static BOOL
read_pattern_file(char * name,patstr ** patptr,patstr ** patlastptr)3559 read_pattern_file(char *name, patstr **patptr, patstr **patlastptr)
3560 {
3561 int linenumber = 0;
3562 PCRE2_SIZE patlen;
3563 FILE *f;
3564 const char *filename;
3565 char buffer[MAXPATLEN+20];
3566
3567 if (strcmp(name, "-") == 0)
3568 {
3569 f = stdin;
3570 filename = stdin_name;
3571 }
3572 else
3573 {
3574 f = fopen(name, "r");
3575 if (f == NULL)
3576 {
3577 fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", name, strerror(errno));
3578 return FALSE;
3579 }
3580 filename = name;
3581 }
3582
3583 while ((patlen = read_one_line(buffer, sizeof(buffer), f)) > 0)
3584 {
3585 while (patlen > 0 && isspace((unsigned char)(buffer[patlen-1]))) patlen--;
3586 linenumber++;
3587 if (patlen == 0) continue; /* Skip blank lines */
3588
3589 /* Note: this call to add_pattern() puts a pointer to the local variable
3590 "buffer" into the pattern chain. However, that pointer is used only when
3591 compiling the pattern, which happens immediately below, so we flatten it
3592 afterwards, as a precaution against any later code trying to use it. */
3593
3594 *patlastptr = add_pattern(buffer, patlen, *patlastptr);
3595 if (*patlastptr == NULL)
3596 {
3597 if (f != stdin) fclose(f);
3598 return FALSE;
3599 }
3600 if (*patptr == NULL) *patptr = *patlastptr;
3601
3602 /* This loop is needed because compiling a "pattern" when -F is set may add
3603 on additional literal patterns if the original contains a newline. In the
3604 common case, it never will, because read_one_line() stops at a newline.
3605 However, the -N option can be used to give pcre2grep a different newline
3606 setting. */
3607
3608 for(;;)
3609 {
3610 if (!compile_pattern(*patlastptr, pcre2_options, TRUE, filename,
3611 linenumber))
3612 {
3613 if (f != stdin) fclose(f);
3614 return FALSE;
3615 }
3616 (*patlastptr)->string = NULL; /* Insurance */
3617 if ((*patlastptr)->next == NULL) break;
3618 *patlastptr = (*patlastptr)->next;
3619 }
3620 }
3621
3622 if (f != stdin) fclose(f);
3623 return TRUE;
3624 }
3625
3626
3627
3628 /*************************************************
3629 * Main program *
3630 *************************************************/
3631
3632 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
3633
3634 int
main(int argc,char ** argv)3635 main(int argc, char **argv)
3636 {
3637 int i, j;
3638 int rc = 1;
3639 BOOL only_one_at_top;
3640 patstr *cp;
3641 fnstr *fn;
3642 const char *locale_from = "--locale";
3643
3644 #ifdef SUPPORT_PCRE2GREP_JIT
3645 pcre2_jit_stack *jit_stack = NULL;
3646 #endif
3647
3648 /* In Windows, stdout is set up as a text stream, which means that \n is
3649 converted to \r\n. This causes output lines that are copied from the input to
3650 change from ....\r\n to ....\r\r\n, which is not right. We therefore ensure
3651 that stdout is a binary stream. Note that this means all other output to stdout
3652 must use STDOUT_NL to terminate lines. */
3653
3654 #ifdef WIN32
3655 _setmode(_fileno(stdout), _O_BINARY);
3656 #endif
3657
3658 /* Set up a default compile and match contexts and a match data block. */
3659
3660 compile_context = pcre2_compile_context_create(NULL);
3661 match_context = pcre2_match_context_create(NULL);
3662 match_data = pcre2_match_data_create(OFFSET_SIZE, NULL);
3663 offsets = pcre2_get_ovector_pointer(match_data);
3664
3665 /* If string (script) callouts are supported, set up the callout processing
3666 function. */
3667
3668 #ifdef SUPPORT_PCRE2GREP_CALLOUT
3669 pcre2_set_callout(match_context, pcre2grep_callout, NULL);
3670 #endif
3671
3672 /* Process the options */
3673
3674 for (i = 1; i < argc; i++)
3675 {
3676 option_item *op = NULL;
3677 char *option_data = (char *)""; /* default to keep compiler happy */
3678 BOOL longop;
3679 BOOL longopwasequals = FALSE;
3680
3681 if (argv[i][0] != '-') break;
3682
3683 /* If we hit an argument that is just "-", it may be a reference to STDIN,
3684 but only if we have previously had -e or -f to define the patterns. */
3685
3686 if (argv[i][1] == 0)
3687 {
3688 if (pattern_files != NULL || patterns != NULL) break;
3689 else pcre2grep_exit(usage(2));
3690 }
3691
3692 /* Handle a long name option, or -- to terminate the options */
3693
3694 if (argv[i][1] == '-')
3695 {
3696 char *arg = argv[i] + 2;
3697 char *argequals = strchr(arg, '=');
3698
3699 if (*arg == 0) /* -- terminates options */
3700 {
3701 i++;
3702 break; /* out of the options-handling loop */
3703 }
3704
3705 longop = TRUE;
3706
3707 /* Some long options have data that follows after =, for example file=name.
3708 Some options have variations in the long name spelling: specifically, we
3709 allow "regexp" because GNU grep allows it, though I personally go along
3710 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
3711 These options are entered in the table as "regex(p)". Options can be in
3712 both these categories. */
3713
3714 for (op = optionlist; op->one_char != 0; op++)
3715 {
3716 char *opbra = strchr(op->long_name, '(');
3717 char *equals = strchr(op->long_name, '=');
3718
3719 /* Handle options with only one spelling of the name */
3720
3721 if (opbra == NULL) /* Does not contain '(' */
3722 {
3723 if (equals == NULL) /* Not thing=data case */
3724 {
3725 if (strcmp(arg, op->long_name) == 0) break;
3726 }
3727 else /* Special case xxx=data */
3728 {
3729 int oplen = (int)(equals - op->long_name);
3730 int arglen = (argequals == NULL)?
3731 (int)strlen(arg) : (int)(argequals - arg);
3732 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
3733 {
3734 option_data = arg + arglen;
3735 if (*option_data == '=')
3736 {
3737 option_data++;
3738 longopwasequals = TRUE;
3739 }
3740 break;
3741 }
3742 }
3743 }
3744
3745 /* Handle options with an alternate spelling of the name */
3746
3747 else
3748 {
3749 char buff1[24];
3750 char buff2[24];
3751 int ret;
3752
3753 int baselen = (int)(opbra - op->long_name);
3754 int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
3755 int arglen = (argequals == NULL || equals == NULL)?
3756 (int)strlen(arg) : (int)(argequals - arg);
3757
3758 if ((ret = snprintf(buff1, sizeof(buff1), "%.*s", baselen, op->long_name),
3759 ret < 0 || ret > (int)sizeof(buff1)) ||
3760 (ret = snprintf(buff2, sizeof(buff2), "%s%.*s", buff1,
3761 fulllen - baselen - 2, opbra + 1),
3762 ret < 0 || ret > (int)sizeof(buff2)))
3763 {
3764 fprintf(stderr, "pcre2grep: Buffer overflow when parsing %s option\n",
3765 op->long_name);
3766 pcre2grep_exit(2);
3767 }
3768
3769 if (strncmp(arg, buff1, arglen) == 0 ||
3770 strncmp(arg, buff2, arglen) == 0)
3771 {
3772 if (equals != NULL && argequals != NULL)
3773 {
3774 option_data = argequals;
3775 if (*option_data == '=')
3776 {
3777 option_data++;
3778 longopwasequals = TRUE;
3779 }
3780 }
3781 break;
3782 }
3783 }
3784 }
3785
3786 if (op->one_char == 0)
3787 {
3788 fprintf(stderr, "pcre2grep: Unknown option %s\n", argv[i]);
3789 pcre2grep_exit(usage(2));
3790 }
3791 }
3792
3793 /* Jeffrey Friedl's debugging harness uses these additional options which
3794 are not in the right form for putting in the option table because they use
3795 only one hyphen, yet are more than one character long. By putting them
3796 separately here, they will not get displayed as part of the help() output,
3797 but I don't think Jeffrey will care about that. */
3798
3799 #ifdef JFRIEDL_DEBUG
3800 else if (strcmp(argv[i], "-pre") == 0) {
3801 jfriedl_prefix = argv[++i];
3802 continue;
3803 } else if (strcmp(argv[i], "-post") == 0) {
3804 jfriedl_postfix = argv[++i];
3805 continue;
3806 } else if (strcmp(argv[i], "-XT") == 0) {
3807 sscanf(argv[++i], "%d", &jfriedl_XT);
3808 continue;
3809 } else if (strcmp(argv[i], "-XR") == 0) {
3810 sscanf(argv[++i], "%d", &jfriedl_XR);
3811 continue;
3812 }
3813 #endif
3814
3815
3816 /* One-char options; many that have no data may be in a single argument; we
3817 continue till we hit the last one or one that needs data. */
3818
3819 else
3820 {
3821 char *s = argv[i] + 1;
3822 longop = FALSE;
3823
3824 while (*s != 0)
3825 {
3826 for (op = optionlist; op->one_char != 0; op++)
3827 {
3828 if (*s == op->one_char) break;
3829 }
3830 if (op->one_char == 0)
3831 {
3832 fprintf(stderr, "pcre2grep: Unknown option letter '%c' in \"%s\"\n",
3833 *s, argv[i]);
3834 pcre2grep_exit(usage(2));
3835 }
3836
3837 option_data = s+1;
3838
3839 /* Break out if this is the last character in the string; it's handled
3840 below like a single multi-char option. */
3841
3842 if (*option_data == 0) break;
3843
3844 /* Check for a single-character option that has data: OP_OP_NUMBER(S)
3845 are used for ones that either have a numerical number or defaults, i.e.
3846 the data is optional. If a digit follows, there is data; if not, carry on
3847 with other single-character options in the same string. */
3848
3849 if (op->type == OP_OP_NUMBER || op->type == OP_OP_NUMBERS)
3850 {
3851 if (isdigit((unsigned char)s[1])) break;
3852 }
3853 else /* Check for an option with data */
3854 {
3855 if (op->type != OP_NODATA) break;
3856 }
3857
3858 /* Handle a single-character option with no data, then loop for the
3859 next character in the string. */
3860
3861 pcre2_options = handle_option(*s++, pcre2_options);
3862 }
3863 }
3864
3865 /* At this point we should have op pointing to a matched option. If the type
3866 is NO_DATA, it means that there is no data, and the option might set
3867 something in the PCRE options. */
3868
3869 if (op->type == OP_NODATA)
3870 {
3871 pcre2_options = handle_option(op->one_char, pcre2_options);
3872 continue;
3873 }
3874
3875 /* If the option type is OP_OP_STRING or OP_OP_NUMBER(S), it's an option that
3876 either has a value or defaults to something. It cannot have data in a
3877 separate item. At the moment, the only such options are "colo(u)r",
3878 "only-matching", and Jeffrey Friedl's special -S debugging option. */
3879
3880 if (*option_data == 0 &&
3881 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER ||
3882 op->type == OP_OP_NUMBERS))
3883 {
3884 switch (op->one_char)
3885 {
3886 case N_COLOUR:
3887 colour_option = "auto";
3888 break;
3889
3890 case 'o':
3891 only_matching_last = add_number(0, only_matching_last);
3892 if (only_matching == NULL) only_matching = only_matching_last;
3893 break;
3894
3895 #ifdef JFRIEDL_DEBUG
3896 case 'S':
3897 S_arg = 0;
3898 break;
3899 #endif
3900 }
3901 continue;
3902 }
3903
3904 /* Otherwise, find the data string for the option. */
3905
3906 if (*option_data == 0)
3907 {
3908 if (i >= argc - 1 || longopwasequals)
3909 {
3910 fprintf(stderr, "pcre2grep: Data missing after %s\n", argv[i]);
3911 pcre2grep_exit(usage(2));
3912 }
3913 option_data = argv[++i];
3914 }
3915
3916 /* If the option type is OP_OP_NUMBERS, the value is a number that is to be
3917 added to a chain of numbers. */
3918
3919 if (op->type == OP_OP_NUMBERS)
3920 {
3921 unsigned long int n = decode_number(option_data, op, longop);
3922 omdatastr *omd = (omdatastr *)op->dataptr;
3923 *(omd->lastptr) = add_number((int)n, *(omd->lastptr));
3924 if (*(omd->anchor) == NULL) *(omd->anchor) = *(omd->lastptr);
3925 }
3926
3927 /* If the option type is OP_PATLIST, it's the -e option, or one of the
3928 include/exclude options, which can be called multiple times to create lists
3929 of patterns. */
3930
3931 else if (op->type == OP_PATLIST)
3932 {
3933 patdatastr *pd = (patdatastr *)op->dataptr;
3934 *(pd->lastptr) = add_pattern(option_data, (PCRE2_SIZE)strlen(option_data),
3935 *(pd->lastptr));
3936 if (*(pd->lastptr) == NULL) goto EXIT2;
3937 if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr);
3938 }
3939
3940 /* If the option type is OP_FILELIST, it's one of the options that names a
3941 file. */
3942
3943 else if (op->type == OP_FILELIST)
3944 {
3945 fndatastr *fd = (fndatastr *)op->dataptr;
3946 fn = (fnstr *)malloc(sizeof(fnstr));
3947 if (fn == NULL)
3948 {
3949 fprintf(stderr, "pcre2grep: malloc failed\n");
3950 goto EXIT2;
3951 }
3952 fn->next = NULL;
3953 fn->name = option_data;
3954 if (*(fd->anchor) == NULL)
3955 *(fd->anchor) = fn;
3956 else
3957 (*(fd->lastptr))->next = fn;
3958 *(fd->lastptr) = fn;
3959 }
3960
3961 /* Handle OP_BINARY_FILES */
3962
3963 else if (op->type == OP_BINFILES)
3964 {
3965 if (strcmp(option_data, "binary") == 0)
3966 binary_files = BIN_BINARY;
3967 else if (strcmp(option_data, "without-match") == 0)
3968 binary_files = BIN_NOMATCH;
3969 else if (strcmp(option_data, "text") == 0)
3970 binary_files = BIN_TEXT;
3971 else
3972 {
3973 fprintf(stderr, "pcre2grep: unknown value \"%s\" for binary-files\n",
3974 option_data);
3975 pcre2grep_exit(usage(2));
3976 }
3977 }
3978
3979 /* Otherwise, deal with a single string or numeric data value. */
3980
3981 else if (op->type != OP_NUMBER && op->type != OP_U32NUMBER &&
3982 op->type != OP_OP_NUMBER && op->type != OP_SIZE)
3983 {
3984 *((char **)op->dataptr) = option_data;
3985 }
3986 else
3987 {
3988 unsigned long int n = decode_number(option_data, op, longop);
3989 if (op->type == OP_U32NUMBER) *((uint32_t *)op->dataptr) = n;
3990 else if (op->type == OP_SIZE) *((PCRE2_SIZE *)op->dataptr) = n;
3991 else *((int *)op->dataptr) = n;
3992 }
3993 }
3994
3995 /* Options have been decoded. If -C was used, its value is used as a default
3996 for -A and -B. */
3997
3998 if (both_context > 0)
3999 {
4000 if (after_context == 0) after_context = both_context;
4001 if (before_context == 0) before_context = both_context;
4002 }
4003
4004 /* Only one of --only-matching, --output, --file-offsets, or --line-offsets is
4005 permitted. They display, each in their own way, only the data that has matched.
4006 */
4007
4008 only_matching_count = (only_matching != NULL) + (output_text != NULL) +
4009 file_offsets + line_offsets;
4010
4011 if (only_matching_count > 1)
4012 {
4013 fprintf(stderr, "pcre2grep: Cannot mix --only-matching, --output, "
4014 "--file-offsets and/or --line-offsets\n");
4015 pcre2grep_exit(usage(2));
4016 }
4017
4018 /* Check the text supplied to --output for errors. */
4019
4020 if (output_text != NULL &&
4021 !syntax_check_output_text((PCRE2_SPTR)output_text, FALSE))
4022 goto EXIT2;
4023
4024 /* Put limits into the match data block. */
4025
4026 if (heap_limit != PCRE2_UNSET) pcre2_set_heap_limit(match_context, heap_limit);
4027 if (match_limit > 0) pcre2_set_match_limit(match_context, match_limit);
4028 if (depth_limit > 0) pcre2_set_depth_limit(match_context, depth_limit);
4029
4030 /* If a locale has not been provided as an option, see if the LC_CTYPE or
4031 LC_ALL environment variable is set, and if so, use it. */
4032
4033 if (locale == NULL)
4034 {
4035 locale = getenv("LC_ALL");
4036 locale_from = "LC_ALL";
4037 }
4038
4039 if (locale == NULL)
4040 {
4041 locale = getenv("LC_CTYPE");
4042 locale_from = "LC_CTYPE";
4043 }
4044
4045 /* If a locale is set, use it to generate the tables the PCRE needs. Passing
4046 NULL to pcre2_maketables() means that malloc() is used to get the memory. */
4047
4048 if (locale != NULL)
4049 {
4050 if (setlocale(LC_CTYPE, locale) == NULL)
4051 {
4052 fprintf(stderr, "pcre2grep: Failed to set locale %s (obtained from %s)\n",
4053 locale, locale_from);
4054 goto EXIT2;
4055 }
4056 character_tables = pcre2_maketables(NULL);
4057 pcre2_set_character_tables(compile_context, character_tables);
4058 }
4059
4060 /* Sort out colouring */
4061
4062 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
4063 {
4064 if (strcmp(colour_option, "always") == 0)
4065 #ifdef WIN32
4066 do_ansi = !is_stdout_tty(),
4067 #endif
4068 do_colour = TRUE;
4069 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
4070 else
4071 {
4072 fprintf(stderr, "pcre2grep: Unknown colour setting \"%s\"\n",
4073 colour_option);
4074 goto EXIT2;
4075 }
4076 if (do_colour)
4077 {
4078 char *cs = getenv("PCRE2GREP_COLOUR");
4079 if (cs == NULL) cs = getenv("PCRE2GREP_COLOR");
4080 if (cs == NULL) cs = getenv("PCREGREP_COLOUR");
4081 if (cs == NULL) cs = getenv("PCREGREP_COLOR");
4082 if (cs == NULL) cs = parse_grep_colors(getenv("GREP_COLORS"));
4083 if (cs == NULL) cs = getenv("GREP_COLOR");
4084 if (cs != NULL)
4085 {
4086 if (strspn(cs, ";0123456789") == strlen(cs)) colour_string = cs;
4087 }
4088 #ifdef WIN32
4089 init_colour_output();
4090 #endif
4091 }
4092 }
4093
4094 /* Sort out a newline setting. */
4095
4096 if (newline_arg != NULL)
4097 {
4098 for (endlinetype = 1; endlinetype < (int)(sizeof(newlines)/sizeof(char *));
4099 endlinetype++)
4100 {
4101 if (strcmpic(newline_arg, newlines[endlinetype]) == 0) break;
4102 }
4103 if (endlinetype < (int)(sizeof(newlines)/sizeof(char *)))
4104 pcre2_set_newline(compile_context, endlinetype);
4105 else
4106 {
4107 fprintf(stderr, "pcre2grep: Invalid newline specifier \"%s\"\n",
4108 newline_arg);
4109 goto EXIT2;
4110 }
4111 }
4112
4113 /* Find default newline convention */
4114
4115 else
4116 {
4117 (void)pcre2_config(PCRE2_CONFIG_NEWLINE, &endlinetype);
4118 }
4119
4120 /* Interpret the text values for -d and -D */
4121
4122 if (dee_option != NULL)
4123 {
4124 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
4125 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
4126 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
4127 else
4128 {
4129 fprintf(stderr, "pcre2grep: Invalid value \"%s\" for -d\n", dee_option);
4130 goto EXIT2;
4131 }
4132 }
4133
4134 if (DEE_option != NULL)
4135 {
4136 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
4137 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
4138 else
4139 {
4140 fprintf(stderr, "pcre2grep: Invalid value \"%s\" for -D\n", DEE_option);
4141 goto EXIT2;
4142 }
4143 }
4144
4145 /* Set the extra options */
4146
4147 (void)pcre2_set_compile_extra_options(compile_context, extra_options);
4148
4149 /* Check the values for Jeffrey Friedl's debugging options. */
4150
4151 #ifdef JFRIEDL_DEBUG
4152 if (S_arg > 9)
4153 {
4154 fprintf(stderr, "pcre2grep: bad value for -S option\n");
4155 return 2;
4156 }
4157 if (jfriedl_XT != 0 || jfriedl_XR != 0)
4158 {
4159 if (jfriedl_XT == 0) jfriedl_XT = 1;
4160 if (jfriedl_XR == 0) jfriedl_XR = 1;
4161 }
4162 #endif
4163
4164 /* If use_jit is set, check whether JIT is available. If not, do not try
4165 to use JIT. */
4166
4167 if (use_jit)
4168 {
4169 uint32_t answer;
4170 (void)pcre2_config(PCRE2_CONFIG_JIT, &answer);
4171 if (!answer) use_jit = FALSE;
4172 }
4173
4174 /* Get memory for the main buffer. */
4175
4176 if (bufthird <= 0)
4177 {
4178 fprintf(stderr, "pcre2grep: --buffer-size must be greater than zero\n");
4179 goto EXIT2;
4180 }
4181
4182 bufsize = 3*bufthird;
4183 main_buffer = (char *)malloc(bufsize);
4184
4185 if (main_buffer == NULL)
4186 {
4187 fprintf(stderr, "pcre2grep: malloc failed\n");
4188 goto EXIT2;
4189 }
4190
4191 /* If no patterns were provided by -e, and there are no files provided by -f,
4192 the first argument is the one and only pattern, and it must exist. */
4193
4194 if (patterns == NULL && pattern_files == NULL)
4195 {
4196 if (i >= argc) return usage(2);
4197 patterns = patterns_last = add_pattern(argv[i], (PCRE2_SIZE)strlen(argv[i]),
4198 NULL);
4199 i++;
4200 if (patterns == NULL) goto EXIT2;
4201 }
4202
4203 /* Compile the patterns that were provided on the command line, either by
4204 multiple uses of -e or as a single unkeyed pattern. We cannot do this until
4205 after all the command-line options are read so that we know which PCRE options
4206 to use. When -F is used, compile_pattern() may add another block into the
4207 chain, so we must not access the next pointer till after the compile. */
4208
4209 for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
4210 {
4211 if (!compile_pattern(cp, pcre2_options, FALSE, "command-line",
4212 (j == 1 && patterns->next == NULL)? 0 : j))
4213 goto EXIT2;
4214 }
4215
4216 /* Read and compile the regular expressions that are provided in files. */
4217
4218 for (fn = pattern_files; fn != NULL; fn = fn->next)
4219 {
4220 if (!read_pattern_file(fn->name, &patterns, &patterns_last)) goto EXIT2;
4221 }
4222
4223 /* Unless JIT has been explicitly disabled, arrange a stack for it to use. */
4224
4225 #ifdef SUPPORT_PCRE2GREP_JIT
4226 if (use_jit)
4227 {
4228 jit_stack = pcre2_jit_stack_create(32*1024, 1024*1024, NULL);
4229 if (jit_stack != NULL )
4230 pcre2_jit_stack_assign(match_context, NULL, jit_stack);
4231 }
4232 #endif
4233
4234 /* -F, -w, and -x do not apply to include or exclude patterns, so we must
4235 adjust the options. */
4236
4237 pcre2_options &= ~PCRE2_LITERAL;
4238 (void)pcre2_set_compile_extra_options(compile_context, 0);
4239
4240 /* If there are include or exclude patterns read from the command line, compile
4241 them. */
4242
4243 for (j = 0; j < 4; j++)
4244 {
4245 int k;
4246 for (k = 1, cp = *(incexlist[j]); cp != NULL; k++, cp = cp->next)
4247 {
4248 if (!compile_pattern(cp, pcre2_options, FALSE, incexname[j],
4249 (k == 1 && cp->next == NULL)? 0 : k))
4250 goto EXIT2;
4251 }
4252 }
4253
4254 /* Read and compile include/exclude patterns from files. */
4255
4256 for (fn = include_from; fn != NULL; fn = fn->next)
4257 {
4258 if (!read_pattern_file(fn->name, &include_patterns, &include_patterns_last))
4259 goto EXIT2;
4260 }
4261
4262 for (fn = exclude_from; fn != NULL; fn = fn->next)
4263 {
4264 if (!read_pattern_file(fn->name, &exclude_patterns, &exclude_patterns_last))
4265 goto EXIT2;
4266 }
4267
4268 /* If there are no files that contain lists of files to search, and there are
4269 no file arguments, search stdin, and then exit. */
4270
4271 if (file_lists == NULL && i >= argc)
4272 {
4273 rc = pcre2grep(stdin, FR_PLAIN, stdin_name,
4274 (filenames > FN_DEFAULT)? stdin_name : NULL);
4275 goto EXIT;
4276 }
4277
4278 /* If any files that contains a list of files to search have been specified,
4279 read them line by line and search the given files. */
4280
4281 for (fn = file_lists; fn != NULL; fn = fn->next)
4282 {
4283 char buffer[FNBUFSIZ];
4284 FILE *fl;
4285 if (strcmp(fn->name, "-") == 0) fl = stdin; else
4286 {
4287 fl = fopen(fn->name, "rb");
4288 if (fl == NULL)
4289 {
4290 fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", fn->name,
4291 strerror(errno));
4292 goto EXIT2;
4293 }
4294 }
4295 while (fgets(buffer, sizeof(buffer), fl) != NULL)
4296 {
4297 int frc;
4298 char *end = buffer + (int)strlen(buffer);
4299 while (end > buffer && isspace(end[-1])) end--;
4300 *end = 0;
4301 if (*buffer != 0)
4302 {
4303 frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);
4304 if (frc > 1) rc = frc;
4305 else if (frc == 0 && rc == 1) rc = 0;
4306 }
4307 }
4308 if (fl != stdin) fclose(fl);
4309 }
4310
4311 /* After handling file-list, work through remaining arguments. Pass in the fact
4312 that there is only one argument at top level - this suppresses the file name if
4313 the argument is not a directory and filenames are not otherwise forced. */
4314
4315 only_one_at_top = i == argc - 1 && file_lists == NULL;
4316
4317 for (; i < argc; i++)
4318 {
4319 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
4320 only_one_at_top);
4321 if (frc > 1) rc = frc;
4322 else if (frc == 0 && rc == 1) rc = 0;
4323 }
4324
4325 #ifdef SUPPORT_PCRE2GREP_CALLOUT
4326 /* If separating builtin echo callouts by implicit newline, add one more for
4327 the final item. */
4328
4329 if (om_separator != NULL && strcmp(om_separator, STDOUT_NL) == 0)
4330 fprintf(stdout, STDOUT_NL);
4331 #endif
4332
4333 /* Show the total number of matches if requested, but not if only one file's
4334 count was printed. */
4335
4336 if (show_total_count && counts_printed != 1 && filenames != FN_NOMATCH_ONLY)
4337 {
4338 if (counts_printed != 0 && filenames >= FN_DEFAULT)
4339 fprintf(stdout, "TOTAL:");
4340 fprintf(stdout, "%lu" STDOUT_NL, total_count);
4341 }
4342
4343 EXIT:
4344 #ifdef SUPPORT_PCRE2GREP_JIT
4345 pcre2_jit_free_unused_memory(NULL);
4346 if (jit_stack != NULL) pcre2_jit_stack_free(jit_stack);
4347 #endif
4348
4349 free(main_buffer);
4350 free((void *)character_tables);
4351
4352 pcre2_compile_context_free(compile_context);
4353 pcre2_match_context_free(match_context);
4354 pcre2_match_data_free(match_data);
4355
4356 free_pattern_chain(patterns);
4357 free_pattern_chain(include_patterns);
4358 free_pattern_chain(include_dir_patterns);
4359 free_pattern_chain(exclude_patterns);
4360 free_pattern_chain(exclude_dir_patterns);
4361
4362 free_file_chain(exclude_from);
4363 free_file_chain(include_from);
4364 free_file_chain(pattern_files);
4365 free_file_chain(file_lists);
4366
4367 while (only_matching != NULL)
4368 {
4369 omstr *this = only_matching;
4370 only_matching = this->next;
4371 free(this);
4372 }
4373
4374 pcre2grep_exit(rc);
4375
4376 EXIT2:
4377 rc = 2;
4378 goto EXIT;
4379 }
4380
4381 /* End of pcre2grep */
4382