1 /*************************************************
2 * pcre2grep program *
3 *************************************************/
4
5 /* This is a grep program that uses the 8-bit PCRE regular expression library
6 via the PCRE2 updated API to do its pattern matching. On Unix-like, Windows,
7 and native z/OS systems it can recurse into directories, and in z/OS it can
8 handle PDS files.
9
10 Note that for native z/OS, in addition to defining the NATIVE_ZOS macro, an
11 additional header is required. That header is not included in the main PCRE2
12 distribution because other apparatus is needed to compile pcre2grep for z/OS.
13 The header can be found in the special z/OS distribution, which is available
14 from www.zaconsultants.net or from www.cbttape.org.
15
16 Copyright (c) 1997-2016 University of Cambridge
17
18 -----------------------------------------------------------------------------
19 Redistribution and use in source and binary forms, with or without
20 modification, are permitted provided that the following conditions are met:
21
22 * Redistributions of source code must retain the above copyright notice,
23 this list of conditions and the following disclaimer.
24
25 * Redistributions in binary form must reproduce the above copyright
26 notice, this list of conditions and the following disclaimer in the
27 documentation and/or other materials provided with the distribution.
28
29 * Neither the name of the University of Cambridge nor the names of its
30 contributors may be used to endorse or promote products derived from
31 this software without specific prior written permission.
32
33 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
34 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
35 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
37 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
38 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
39 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
40 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
41 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
43 POSSIBILITY OF SUCH DAMAGE.
44 -----------------------------------------------------------------------------
45 */
46
47 #ifdef HAVE_CONFIG_H
48 #include "config.h"
49 #endif
50
51 #include <ctype.h>
52 #include <locale.h>
53 #include <stdio.h>
54 #include <string.h>
55 #include <stdlib.h>
56 #include <errno.h>
57
58 #include <sys/types.h>
59 #include <sys/stat.h>
60
61 #if defined(_WIN32) || defined(WIN32)
62 #include <io.h> /* For _setmode() */
63 #include <fcntl.h> /* For _O_BINARY */
64 #endif
65
66 #ifdef SUPPORT_PCRE2GREP_CALLOUT
67 #include <sys/wait.h>
68 #endif
69
70 #ifdef HAVE_UNISTD_H
71 #include <unistd.h>
72 #endif
73
74 #ifdef SUPPORT_LIBZ
75 #include <zlib.h>
76 #endif
77
78 #ifdef SUPPORT_LIBBZ2
79 #include <bzlib.h>
80 #endif
81
82 #define PCRE2_CODE_UNIT_WIDTH 8
83 #include "pcre2.h"
84
85 #define FALSE 0
86 #define TRUE 1
87
88 typedef int BOOL;
89
90 #define OFFSET_SIZE 33
91
92 #if BUFSIZ > 8192
93 #define MAXPATLEN BUFSIZ
94 #else
95 #define MAXPATLEN 8192
96 #endif
97
98 #define PATBUFSIZE (MAXPATLEN + 10) /* Allows for prefix+suffix */
99
100 /* Values for the "filenames" variable, which specifies options for file name
101 output. The order is important; it is assumed that a file name is wanted for
102 all values greater than FN_DEFAULT. */
103
104 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
105
106 /* File reading styles */
107
108 enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
109
110 /* Actions for the -d and -D options */
111
112 enum { dee_READ, dee_SKIP, dee_RECURSE };
113 enum { DEE_READ, DEE_SKIP };
114
115 /* Actions for special processing options (flag bits) */
116
117 #define PO_WORD_MATCH 0x0001
118 #define PO_LINE_MATCH 0x0002
119 #define PO_FIXED_STRINGS 0x0004
120
121 /* Binary file options */
122
123 enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT };
124
125 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
126 environments), a warning is issued if the value of fwrite() is ignored.
127 Unfortunately, casting to (void) does not suppress the warning. To get round
128 this, we use a macro that compiles a fudge. Oddly, this does not also seem to
129 apply to fprintf(). */
130
131 #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
132
133 /* Under Windows, we have to set stdout to be binary, so that it does not
134 convert \r\n at the ends of output lines to \r\r\n. However, that means that
135 any messages written to stdout must have \r\n as their line terminator. This is
136 handled by using STDOUT_NL as the newline string. */
137
138 #if defined(_WIN32) || defined(WIN32)
139 #define STDOUT_NL "\r\n"
140 #else
141 #define STDOUT_NL "\n"
142 #endif
143
144
145
146 /*************************************************
147 * Global variables *
148 *************************************************/
149
150 /* Jeffrey Friedl has some debugging requirements that are not part of the
151 regular code. */
152
153 #ifdef JFRIEDL_DEBUG
154 static int S_arg = -1;
155 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
156 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
157 static const char *jfriedl_prefix = "";
158 static const char *jfriedl_postfix = "";
159 #endif
160
161 static char *colour_string = (char *)"1;31";
162 static char *colour_option = NULL;
163 static char *dee_option = NULL;
164 static char *DEE_option = NULL;
165 static char *locale = NULL;
166 static char *main_buffer = NULL;
167 static char *newline_arg = NULL;
168 static char *om_separator = (char *)"";
169 static char *stdin_name = (char *)"(standard input)";
170
171 static int after_context = 0;
172 static int before_context = 0;
173 static int binary_files = BIN_BINARY;
174 static int both_context = 0;
175 static int bufthird = PCRE2GREP_BUFSIZE;
176 static int bufsize = 3*PCRE2GREP_BUFSIZE;
177 static int endlinetype;
178
179 #if defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
180 static int dee_action = dee_SKIP;
181 #else
182 static int dee_action = dee_READ;
183 #endif
184 static int DEE_action = DEE_READ;
185 static int error_count = 0;
186 static int filenames = FN_DEFAULT;
187
188 #ifdef SUPPORT_PCRE2GREP_JIT
189 static BOOL use_jit = TRUE;
190 #else
191 static BOOL use_jit = FALSE;
192 #endif
193
194 static const uint8_t *character_tables = NULL;
195
196 static uint32_t pcre2_options = 0;
197 static uint32_t process_options = 0;
198 static uint32_t match_limit = 0;
199 static uint32_t recursion_limit = 0;
200
201 static pcre2_compile_context *compile_context;
202 static pcre2_match_context *match_context;
203 static pcre2_match_data *match_data;
204 static PCRE2_SIZE *offsets;
205
206 static BOOL count_only = FALSE;
207 static BOOL do_colour = FALSE;
208 static BOOL file_offsets = FALSE;
209 static BOOL hyphenpending = FALSE;
210 static BOOL invert = FALSE;
211 static BOOL line_buffered = FALSE;
212 static BOOL line_offsets = FALSE;
213 static BOOL multiline = FALSE;
214 static BOOL number = FALSE;
215 static BOOL omit_zero_count = FALSE;
216 static BOOL resource_error = FALSE;
217 static BOOL quiet = FALSE;
218 static BOOL show_only_matching = FALSE;
219 static BOOL silent = FALSE;
220 static BOOL utf = FALSE;
221
222 /* Structure for list of --only-matching capturing numbers. */
223
224 typedef struct omstr {
225 struct omstr *next;
226 int groupnum;
227 } omstr;
228
229 static omstr *only_matching = NULL;
230 static omstr *only_matching_last = NULL;
231
232 /* Structure for holding the two variables that describe a number chain. */
233
234 typedef struct omdatastr {
235 omstr **anchor;
236 omstr **lastptr;
237 } omdatastr;
238
239 static omdatastr only_matching_data = { &only_matching, &only_matching_last };
240
241 /* Structure for list of file names (for -f and --{in,ex}clude-from) */
242
243 typedef struct fnstr {
244 struct fnstr *next;
245 char *name;
246 } fnstr;
247
248 static fnstr *exclude_from = NULL;
249 static fnstr *exclude_from_last = NULL;
250 static fnstr *include_from = NULL;
251 static fnstr *include_from_last = NULL;
252
253 static fnstr *file_lists = NULL;
254 static fnstr *file_lists_last = NULL;
255 static fnstr *pattern_files = NULL;
256 static fnstr *pattern_files_last = NULL;
257
258 /* Structure for holding the two variables that describe a file name chain. */
259
260 typedef struct fndatastr {
261 fnstr **anchor;
262 fnstr **lastptr;
263 } fndatastr;
264
265 static fndatastr exclude_from_data = { &exclude_from, &exclude_from_last };
266 static fndatastr include_from_data = { &include_from, &include_from_last };
267 static fndatastr file_lists_data = { &file_lists, &file_lists_last };
268 static fndatastr pattern_files_data = { &pattern_files, &pattern_files_last };
269
270 /* Structure for pattern and its compiled form; used for matching patterns and
271 also for include/exclude patterns. */
272
273 typedef struct patstr {
274 struct patstr *next;
275 char *string;
276 pcre2_code *compiled;
277 } patstr;
278
279 static patstr *patterns = NULL;
280 static patstr *patterns_last = NULL;
281 static patstr *include_patterns = NULL;
282 static patstr *include_patterns_last = NULL;
283 static patstr *exclude_patterns = NULL;
284 static patstr *exclude_patterns_last = NULL;
285 static patstr *include_dir_patterns = NULL;
286 static patstr *include_dir_patterns_last = NULL;
287 static patstr *exclude_dir_patterns = NULL;
288 static patstr *exclude_dir_patterns_last = NULL;
289
290 /* Structure holding the two variables that describe a pattern chain. A pointer
291 to such structures is used for each appropriate option. */
292
293 typedef struct patdatastr {
294 patstr **anchor;
295 patstr **lastptr;
296 } patdatastr;
297
298 static patdatastr match_patdata = { &patterns, &patterns_last };
299 static patdatastr include_patdata = { &include_patterns, &include_patterns_last };
300 static patdatastr exclude_patdata = { &exclude_patterns, &exclude_patterns_last };
301 static patdatastr include_dir_patdata = { &include_dir_patterns, &include_dir_patterns_last };
302 static patdatastr exclude_dir_patdata = { &exclude_dir_patterns, &exclude_dir_patterns_last };
303
304 static patstr **incexlist[4] = { &include_patterns, &exclude_patterns,
305 &include_dir_patterns, &exclude_dir_patterns };
306
307 static const char *incexname[4] = { "--include", "--exclude",
308 "--include-dir", "--exclude-dir" };
309
310 /* Structure for options and list of them */
311
312 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_U32NUMBER,
313 OP_OP_NUMBER, OP_OP_NUMBERS, OP_PATLIST, OP_FILELIST, OP_BINFILES };
314
315 typedef struct option_item {
316 int type;
317 int one_char;
318 void *dataptr;
319 const char *long_name;
320 const char *help_text;
321 } option_item;
322
323 /* Options without a single-letter equivalent get a negative value. This can be
324 used to identify them. */
325
326 #define N_COLOUR (-1)
327 #define N_EXCLUDE (-2)
328 #define N_EXCLUDE_DIR (-3)
329 #define N_HELP (-4)
330 #define N_INCLUDE (-5)
331 #define N_INCLUDE_DIR (-6)
332 #define N_LABEL (-7)
333 #define N_LOCALE (-8)
334 #define N_NULL (-9)
335 #define N_LOFFSETS (-10)
336 #define N_FOFFSETS (-11)
337 #define N_LBUFFER (-12)
338 #define N_M_LIMIT (-13)
339 #define N_M_LIMIT_REC (-14)
340 #define N_BUFSIZE (-15)
341 #define N_NOJIT (-16)
342 #define N_FILE_LIST (-17)
343 #define N_BINARY_FILES (-18)
344 #define N_EXCLUDE_FROM (-19)
345 #define N_INCLUDE_FROM (-20)
346 #define N_OM_SEPARATOR (-21)
347
348 static option_item optionlist[] = {
349 { OP_NODATA, N_NULL, NULL, "", "terminate options" },
350 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
351 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
352 { OP_NODATA, 'a', NULL, "text", "treat binary files as text" },
353 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
354 { OP_BINFILES, N_BINARY_FILES, NULL, "binary-files=word", "set treatment of binary files" },
355 { OP_NUMBER, N_BUFSIZE,&bufthird, "buffer-size=number", "set processing buffer size parameter" },
356 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
357 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
358 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
359 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
360 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
361 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
362 { OP_PATLIST, 'e', &match_patdata, "regex(p)=pattern", "specify pattern (may be used more than once)" },
363 { OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" },
364 { OP_FILELIST, 'f', &pattern_files_data, "file=path", "read patterns from file" },
365 { OP_FILELIST, N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" },
366 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
367 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
368 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
369 { OP_NODATA, 'I', NULL, "", "treat binary files as not matching (ignore)" },
370 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
371 #ifdef SUPPORT_PCRE2GREP_JIT
372 { OP_NODATA, N_NOJIT, NULL, "no-jit", "do not use just-in-time compiler optimization" },
373 #else
374 { OP_NODATA, N_NOJIT, NULL, "no-jit", "ignored: this pcre2grep does not support JIT" },
375 #endif
376 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
377 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
378 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
379 { OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" },
380 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
381 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
382 { OP_U32NUMBER, N_M_LIMIT, &match_limit, "match-limit=number", "set PCRE match limit option" },
383 { OP_U32NUMBER, N_M_LIMIT_REC, &recursion_limit, "recursion-limit=number", "set PCRE match recursion limit option" },
384 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
385 { OP_STRING, 'N', &newline_arg, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
386 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
387 { OP_OP_NUMBERS, 'o', &only_matching_data, "only-matching=n", "show only the part of the line that matched" },
388 { OP_STRING, N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" },
389 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
390 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
391 { OP_PATLIST, N_EXCLUDE,&exclude_patdata, "exclude=pattern","exclude matching files when recursing" },
392 { OP_PATLIST, N_INCLUDE,&include_patdata, "include=pattern","include matching files when recursing" },
393 { OP_PATLIST, N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude-dir=pattern","exclude matching directories when recursing" },
394 { OP_PATLIST, N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" },
395 { OP_FILELIST, N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" },
396 { OP_FILELIST, N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" },
397 #ifdef JFRIEDL_DEBUG
398 { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
399 #endif
400 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
401 { OP_NODATA, 'u', NULL, "utf", "use UTF mode" },
402 { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
403 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
404 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
405 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
406 { OP_NODATA, 0, NULL, NULL, NULL }
407 };
408
409 /* Table of names for newline types. Must be kept in step with the definitions
410 of PCRE2_NEWLINE_xx in pcre2.h. */
411
412 static const char *newlines[] = {
413 "DEFAULT", "CR", "LF", "CRLF", "ANY", "ANYCRLF" };
414
415 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
416 options. These set the 1, 2, and 4 bits in process_options, respectively. Note
417 that the combination of -w and -x has the same effect as -x on its own, so we
418 can treat them as the same. Note that the MAXPATLEN macro assumes the longest
419 prefix+suffix is 10 characters; if anything longer is added, it must be
420 adjusted. */
421
422 static const char *prefix[] = {
423 "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
424
425 static const char *suffix[] = {
426 "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
427
428 /* UTF-8 tables - used only when the newline setting is "any". */
429
430 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
431
432 const char utf8_table4[] = {
433 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
434 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
435 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
436 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
437
438
439
440 /*************************************************
441 * Case-independent string compare *
442 *************************************************/
443
444 static int
strcmpic(const char * str1,const char * str2)445 strcmpic(const char *str1, const char *str2)
446 {
447 unsigned int c1, c2;
448 while (*str1 != '\0' || *str2 != '\0')
449 {
450 c1 = tolower(*str1++);
451 c2 = tolower(*str2++);
452 if (c1 != c2) return ((c1 > c2) << 1) - 1;
453 }
454 return 0;
455 }
456
457
458
459 /*************************************************
460 * Exit from the program *
461 *************************************************/
462
463 /* If there has been a resource error, give a suitable message.
464
465 Argument: the return code
466 Returns: does not return
467 */
468
469 static void
pcre2grep_exit(int rc)470 pcre2grep_exit(int rc)
471 {
472 if (resource_error)
473 {
474 fprintf(stderr, "pcre2grep: Error %d, %d or %d means that a resource limit "
475 "was exceeded.\n", PCRE2_ERROR_JIT_STACKLIMIT, PCRE2_ERROR_MATCHLIMIT,
476 PCRE2_ERROR_RECURSIONLIMIT);
477 fprintf(stderr, "pcre2grep: Check your regex for nested unlimited loops.\n");
478 }
479 exit(rc);
480 }
481
482
483 /*************************************************
484 * Add item to chain of patterns *
485 *************************************************/
486
487 /* Used to add an item onto a chain, or just return an unconnected item if the
488 "after" argument is NULL.
489
490 Arguments:
491 s pattern string to add
492 after if not NULL points to item to insert after
493
494 Returns: new pattern block or NULL on error
495 */
496
497 static patstr *
add_pattern(char * s,patstr * after)498 add_pattern(char *s, patstr *after)
499 {
500 patstr *p = (patstr *)malloc(sizeof(patstr));
501 if (p == NULL)
502 {
503 fprintf(stderr, "pcre2grep: malloc failed\n");
504 pcre2grep_exit(2);
505 }
506 if (strlen(s) > MAXPATLEN)
507 {
508 fprintf(stderr, "pcre2grep: pattern is too long (limit is %d bytes)\n",
509 MAXPATLEN);
510 free(p);
511 return NULL;
512 }
513 p->next = NULL;
514 p->string = s;
515 p->compiled = NULL;
516
517 if (after != NULL)
518 {
519 p->next = after->next;
520 after->next = p;
521 }
522 return p;
523 }
524
525
526 /*************************************************
527 * Free chain of patterns *
528 *************************************************/
529
530 /* Used for several chains of patterns.
531
532 Argument: pointer to start of chain
533 Returns: nothing
534 */
535
536 static void
free_pattern_chain(patstr * pc)537 free_pattern_chain(patstr *pc)
538 {
539 while (pc != NULL)
540 {
541 patstr *p = pc;
542 pc = p->next;
543 if (p->compiled != NULL) pcre2_code_free(p->compiled);
544 free(p);
545 }
546 }
547
548
549 /*************************************************
550 * Free chain of file names *
551 *************************************************/
552
553 /*
554 Argument: pointer to start of chain
555 Returns: nothing
556 */
557
558 static void
free_file_chain(fnstr * fn)559 free_file_chain(fnstr *fn)
560 {
561 while (fn != NULL)
562 {
563 fnstr *f = fn;
564 fn = f->next;
565 free(f);
566 }
567 }
568
569
570 /*************************************************
571 * OS-specific functions *
572 *************************************************/
573
574 /* These functions are defined so that they can be made system specific.
575 At present there are versions for Unix-style environments, Windows, native
576 z/OS, and "no support". */
577
578
579 /************* Directory scanning Unix-style and z/OS ***********/
580
581 #if (defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H) || defined NATIVE_ZOS
582 #include <sys/types.h>
583 #include <sys/stat.h>
584 #include <dirent.h>
585
586 #if defined NATIVE_ZOS
587 /************* Directory and PDS/E scanning for z/OS ***********/
588 /************* z/OS looks mostly like Unix with USS ************/
589 /* However, z/OS needs the #include statements in this header */
590 #include "pcrzosfs.h"
591 /* That header is not included in the main PCRE distribution because
592 other apparatus is needed to compile pcre2grep for z/OS. The header
593 can be found in the special z/OS distribution, which is available
594 from www.zaconsultants.net or from www.cbttape.org. */
595 #endif
596
597 typedef DIR directory_type;
598 #define FILESEP '/'
599
600 static int
isdirectory(char * filename)601 isdirectory(char *filename)
602 {
603 struct stat statbuf;
604 if (stat(filename, &statbuf) < 0)
605 return 0; /* In the expectation that opening as a file will fail */
606 return S_ISDIR(statbuf.st_mode);
607 }
608
609 static directory_type *
opendirectory(char * filename)610 opendirectory(char *filename)
611 {
612 return opendir(filename);
613 }
614
615 static char *
readdirectory(directory_type * dir)616 readdirectory(directory_type *dir)
617 {
618 for (;;)
619 {
620 struct dirent *dent = readdir(dir);
621 if (dent == NULL) return NULL;
622 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
623 return dent->d_name;
624 }
625 /* Control never reaches here */
626 }
627
628 static void
closedirectory(directory_type * dir)629 closedirectory(directory_type *dir)
630 {
631 closedir(dir);
632 }
633
634
635 /************* Test for regular file, Unix-style **********/
636
637 static int
isregfile(char * filename)638 isregfile(char *filename)
639 {
640 struct stat statbuf;
641 if (stat(filename, &statbuf) < 0)
642 return 1; /* In the expectation that opening as a file will fail */
643 return S_ISREG(statbuf.st_mode);
644 }
645
646
647 #if defined NATIVE_ZOS
648 /************* Test for a terminal in z/OS **********/
649 /* isatty() does not work in a TSO environment, so always give FALSE.*/
650
651 static BOOL
is_stdout_tty(void)652 is_stdout_tty(void)
653 {
654 return FALSE;
655 }
656
657 static BOOL
is_file_tty(FILE * f)658 is_file_tty(FILE *f)
659 {
660 return FALSE;
661 }
662
663
664 /************* Test for a terminal, Unix-style **********/
665
666 #else
667 static BOOL
is_stdout_tty(void)668 is_stdout_tty(void)
669 {
670 return isatty(fileno(stdout));
671 }
672
673 static BOOL
is_file_tty(FILE * f)674 is_file_tty(FILE *f)
675 {
676 return isatty(fileno(f));
677 }
678 #endif
679
680 /* End of Unix-style or native z/OS environment functions. */
681
682
683 /************* Directory scanning in Windows ***********/
684
685 /* I (Philip Hazel) have no means of testing this code. It was contributed by
686 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
687 when it did not exist. David Byron added a patch that moved the #include of
688 <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
689 The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
690 undefined when it is indeed undefined. */
691
692 #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
693
694 #ifndef STRICT
695 # define STRICT
696 #endif
697 #ifndef WIN32_LEAN_AND_MEAN
698 # define WIN32_LEAN_AND_MEAN
699 #endif
700
701 #include <windows.h>
702
703 #ifndef INVALID_FILE_ATTRIBUTES
704 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
705 #endif
706
707 typedef struct directory_type
708 {
709 HANDLE handle;
710 BOOL first;
711 WIN32_FIND_DATA data;
712 } directory_type;
713
714 #define FILESEP '/'
715
716 int
isdirectory(char * filename)717 isdirectory(char *filename)
718 {
719 DWORD attr = GetFileAttributes(filename);
720 if (attr == INVALID_FILE_ATTRIBUTES)
721 return 0;
722 return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0;
723 }
724
725 directory_type *
opendirectory(char * filename)726 opendirectory(char *filename)
727 {
728 size_t len;
729 char *pattern;
730 directory_type *dir;
731 DWORD err;
732 len = strlen(filename);
733 pattern = (char *)malloc(len + 3);
734 dir = (directory_type *)malloc(sizeof(*dir));
735 if ((pattern == NULL) || (dir == NULL))
736 {
737 fprintf(stderr, "pcre2grep: malloc failed\n");
738 pcre2grep_exit(2);
739 }
740 memcpy(pattern, filename, len);
741 memcpy(&(pattern[len]), "\\*", 3);
742 dir->handle = FindFirstFile(pattern, &(dir->data));
743 if (dir->handle != INVALID_HANDLE_VALUE)
744 {
745 free(pattern);
746 dir->first = TRUE;
747 return dir;
748 }
749 err = GetLastError();
750 free(pattern);
751 free(dir);
752 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
753 return NULL;
754 }
755
756 char *
readdirectory(directory_type * dir)757 readdirectory(directory_type *dir)
758 {
759 for (;;)
760 {
761 if (!dir->first)
762 {
763 if (!FindNextFile(dir->handle, &(dir->data)))
764 return NULL;
765 }
766 else
767 {
768 dir->first = FALSE;
769 }
770 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
771 return dir->data.cFileName;
772 }
773 #ifndef _MSC_VER
774 return NULL; /* Keep compiler happy; never executed */
775 #endif
776 }
777
778 void
closedirectory(directory_type * dir)779 closedirectory(directory_type *dir)
780 {
781 FindClose(dir->handle);
782 free(dir);
783 }
784
785
786 /************* Test for regular file in Windows **********/
787
788 /* I don't know how to do this, or if it can be done; assume all paths are
789 regular if they are not directories. */
790
isregfile(char * filename)791 int isregfile(char *filename)
792 {
793 return !isdirectory(filename);
794 }
795
796
797 /************* Test for a terminal in Windows **********/
798
799 /* I don't know how to do this; assume never */
800
801 static BOOL
is_stdout_tty(void)802 is_stdout_tty(void)
803 {
804 return FALSE;
805 }
806
807 static BOOL
is_file_tty(FILE * f)808 is_file_tty(FILE *f)
809 {
810 return FALSE;
811 }
812
813 /* End of Windows functions */
814
815
816 /************* Directory scanning when we can't do it ***********/
817
818 /* The type is void, and apart from isdirectory(), the functions do nothing. */
819
820 #else
821
822 #define FILESEP 0
823 typedef void directory_type;
824
isdirectory(char * filename)825 int isdirectory(char *filename) { return 0; }
opendirectory(char * filename)826 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
readdirectory(directory_type * dir)827 char *readdirectory(directory_type *dir) { return (char*)0;}
closedirectory(directory_type * dir)828 void closedirectory(directory_type *dir) {}
829
830
831 /************* Test for regular file when we can't do it **********/
832
833 /* Assume all files are regular. */
834
isregfile(char * filename)835 int isregfile(char *filename) { return 1; }
836
837
838 /************* Test for a terminal when we can't do it **********/
839
840 static BOOL
is_stdout_tty(void)841 is_stdout_tty(void)
842 {
843 return FALSE;
844 }
845
846 static BOOL
is_file_tty(FILE * f)847 is_file_tty(FILE *f)
848 {
849 return FALSE;
850 }
851
852 #endif /* End of system-specific functions */
853
854
855
856 #ifndef HAVE_STRERROR
857 /*************************************************
858 * Provide strerror() for non-ANSI libraries *
859 *************************************************/
860
861 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
862 in their libraries, but can provide the same facility by this simple
863 alternative function. */
864
865 extern int sys_nerr;
866 extern char *sys_errlist[];
867
868 char *
strerror(int n)869 strerror(int n)
870 {
871 if (n < 0 || n >= sys_nerr) return "unknown error number";
872 return sys_errlist[n];
873 }
874 #endif /* HAVE_STRERROR */
875
876
877
878 /*************************************************
879 * Usage function *
880 *************************************************/
881
882 static int
usage(int rc)883 usage(int rc)
884 {
885 option_item *op;
886 fprintf(stderr, "Usage: pcre2grep [-");
887 for (op = optionlist; op->one_char != 0; op++)
888 {
889 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
890 }
891 fprintf(stderr, "] [long options] [pattern] [files]\n");
892 fprintf(stderr, "Type `pcre2grep --help' for more information and the long "
893 "options.\n");
894 return rc;
895 }
896
897
898
899 /*************************************************
900 * Help function *
901 *************************************************/
902
903 static void
help(void)904 help(void)
905 {
906 option_item *op;
907
908 printf("Usage: pcre2grep [OPTION]... [PATTERN] [FILE1 FILE2 ...]" STDOUT_NL);
909 printf("Search for PATTERN in each FILE or standard input." STDOUT_NL);
910 printf("PATTERN must be present if neither -e nor -f is used." STDOUT_NL);
911
912 #ifdef SUPPORT_PCRE2GREP_CALLOUT
913 printf("Callout scripts in patterns are supported." STDOUT_NL);
914 #else
915 printf("Callout scripts are not supported in this pcre2grep." STDOUT_NL);
916 #endif
917
918 printf("\"-\" can be used as a file name to mean STDIN." STDOUT_NL);
919
920 #ifdef SUPPORT_LIBZ
921 printf("Files whose names end in .gz are read using zlib." STDOUT_NL);
922 #endif
923
924 #ifdef SUPPORT_LIBBZ2
925 printf("Files whose names end in .bz2 are read using bzlib2." STDOUT_NL);
926 #endif
927
928 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
929 printf("Other files and the standard input are read as plain files." STDOUT_NL STDOUT_NL);
930 #else
931 printf("All files are read as plain files, without any interpretation." STDOUT_NL STDOUT_NL);
932 #endif
933
934 printf("Example: pcre2grep -i 'hello.*world' menu.h main.c" STDOUT_NL STDOUT_NL);
935 printf("Options:" STDOUT_NL);
936
937 for (op = optionlist; op->one_char != 0; op++)
938 {
939 int n;
940 char s[4];
941
942 if (op->one_char > 0 && (op->long_name)[0] == 0)
943 n = 31 - printf(" -%c", op->one_char);
944 else
945 {
946 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char);
947 else strcpy(s, " ");
948 n = 31 - printf(" %s --%s", s, op->long_name);
949 }
950
951 if (n < 1) n = 1;
952 printf("%.*s%s" STDOUT_NL, n, " ", op->help_text);
953 }
954
955 printf(STDOUT_NL "Numbers may be followed by K or M, e.g. --buffer-size=100K." STDOUT_NL);
956 printf("The default value for --buffer-size is %d." STDOUT_NL, PCRE2GREP_BUFSIZE);
957 printf("When reading patterns or file names from a file, trailing white" STDOUT_NL);
958 printf("space is removed and blank lines are ignored." STDOUT_NL);
959 printf("The maximum size of any pattern is %d bytes." STDOUT_NL, MAXPATLEN);
960
961 printf(STDOUT_NL "With no FILEs, read standard input. If fewer than two FILEs given, assume -h." STDOUT_NL);
962 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble." STDOUT_NL);
963 }
964
965
966
967 /*************************************************
968 * Test exclude/includes *
969 *************************************************/
970
971 /* If any exclude pattern matches, the path is excluded. Otherwise, unless
972 there are no includes, the path must match an include pattern.
973
974 Arguments:
975 path the path to be matched
976 ip the chain of include patterns
977 ep the chain of exclude patterns
978
979 Returns: TRUE if the path is not excluded
980 */
981
982 static BOOL
test_incexc(char * path,patstr * ip,patstr * ep)983 test_incexc(char *path, patstr *ip, patstr *ep)
984 {
985 int plen = strlen((const char *)path);
986
987 for (; ep != NULL; ep = ep->next)
988 {
989 if (pcre2_match(ep->compiled, (PCRE2_SPTR)path, plen, 0, 0, match_data, NULL) >= 0)
990 return FALSE;
991 }
992
993 if (ip == NULL) return TRUE;
994
995 for (; ip != NULL; ip = ip->next)
996 {
997 if (pcre2_match(ip->compiled, (PCRE2_SPTR)path, plen, 0, 0, match_data, NULL) >= 0)
998 return TRUE;
999 }
1000
1001 return FALSE;
1002 }
1003
1004
1005
1006 /*************************************************
1007 * Decode integer argument value *
1008 *************************************************/
1009
1010 /* Integer arguments can be followed by K or M. Avoid the use of strtoul()
1011 because SunOS4 doesn't have it. This is used only for unpicking arguments, so
1012 just keep it simple.
1013
1014 Arguments:
1015 option_data the option data string
1016 op the option item (for error messages)
1017 longop TRUE if option given in long form
1018
1019 Returns: a long integer
1020 */
1021
1022 static long int
decode_number(char * option_data,option_item * op,BOOL longop)1023 decode_number(char *option_data, option_item *op, BOOL longop)
1024 {
1025 unsigned long int n = 0;
1026 char *endptr = option_data;
1027 while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
1028 while (isdigit((unsigned char)(*endptr)))
1029 n = n * 10 + (int)(*endptr++ - '0');
1030 if (toupper(*endptr) == 'K')
1031 {
1032 n *= 1024;
1033 endptr++;
1034 }
1035 else if (toupper(*endptr) == 'M')
1036 {
1037 n *= 1024*1024;
1038 endptr++;
1039 }
1040
1041 if (*endptr != 0) /* Error */
1042 {
1043 if (longop)
1044 {
1045 char *equals = strchr(op->long_name, '=');
1046 int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1047 (int)(equals - op->long_name);
1048 fprintf(stderr, "pcre2grep: Malformed number \"%s\" after --%.*s\n",
1049 option_data, nlen, op->long_name);
1050 }
1051 else
1052 fprintf(stderr, "pcre2grep: Malformed number \"%s\" after -%c\n",
1053 option_data, op->one_char);
1054 pcre2grep_exit(usage(2));
1055 }
1056
1057 return n;
1058 }
1059
1060
1061
1062 /*************************************************
1063 * Add item to a chain of numbers *
1064 *************************************************/
1065
1066 /* Used to add an item onto a chain, or just return an unconnected item if the
1067 "after" argument is NULL.
1068
1069 Arguments:
1070 n the number to add
1071 after if not NULL points to item to insert after
1072
1073 Returns: new number block
1074 */
1075
1076 static omstr *
add_number(int n,omstr * after)1077 add_number(int n, omstr *after)
1078 {
1079 omstr *om = (omstr *)malloc(sizeof(omstr));
1080
1081 if (om == NULL)
1082 {
1083 fprintf(stderr, "pcre2grep: malloc failed\n");
1084 pcre2grep_exit(2);
1085 }
1086 om->next = NULL;
1087 om->groupnum = n;
1088
1089 if (after != NULL)
1090 {
1091 om->next = after->next;
1092 after->next = om;
1093 }
1094 return om;
1095 }
1096
1097
1098
1099 /*************************************************
1100 * Read one line of input *
1101 *************************************************/
1102
1103 /* Normally, input is read using fread() into a large buffer, so many lines may
1104 be read at once. However, doing this for tty input means that no output appears
1105 until a lot of input has been typed. Instead, tty input is handled line by
1106 line. We cannot use fgets() for this, because it does not stop at a binary
1107 zero, and therefore there is no way of telling how many characters it has read,
1108 because there may be binary zeros embedded in the data.
1109
1110 Arguments:
1111 buffer the buffer to read into
1112 length the maximum number of characters to read
1113 f the file
1114
1115 Returns: the number of characters read, zero at end of file
1116 */
1117
1118 static unsigned int
read_one_line(char * buffer,int length,FILE * f)1119 read_one_line(char *buffer, int length, FILE *f)
1120 {
1121 int c;
1122 int yield = 0;
1123 while ((c = fgetc(f)) != EOF)
1124 {
1125 buffer[yield++] = c;
1126 if (c == '\n' || yield >= length) break;
1127 }
1128 return yield;
1129 }
1130
1131
1132
1133 /*************************************************
1134 * Find end of line *
1135 *************************************************/
1136
1137 /* The length of the endline sequence that is found is set via lenptr. This may
1138 be zero at the very end of the file if there is no line-ending sequence there.
1139
1140 Arguments:
1141 p current position in line
1142 endptr end of available data
1143 lenptr where to put the length of the eol sequence
1144
1145 Returns: pointer after the last byte of the line,
1146 including the newline byte(s)
1147 */
1148
1149 static char *
end_of_line(char * p,char * endptr,int * lenptr)1150 end_of_line(char *p, char *endptr, int *lenptr)
1151 {
1152 switch(endlinetype)
1153 {
1154 default: /* Just in case */
1155 case PCRE2_NEWLINE_LF:
1156 while (p < endptr && *p != '\n') p++;
1157 if (p < endptr)
1158 {
1159 *lenptr = 1;
1160 return p + 1;
1161 }
1162 *lenptr = 0;
1163 return endptr;
1164
1165 case PCRE2_NEWLINE_CR:
1166 while (p < endptr && *p != '\r') p++;
1167 if (p < endptr)
1168 {
1169 *lenptr = 1;
1170 return p + 1;
1171 }
1172 *lenptr = 0;
1173 return endptr;
1174
1175 case PCRE2_NEWLINE_CRLF:
1176 for (;;)
1177 {
1178 while (p < endptr && *p != '\r') p++;
1179 if (++p >= endptr)
1180 {
1181 *lenptr = 0;
1182 return endptr;
1183 }
1184 if (*p == '\n')
1185 {
1186 *lenptr = 2;
1187 return p + 1;
1188 }
1189 }
1190 break;
1191
1192 case PCRE2_NEWLINE_ANYCRLF:
1193 while (p < endptr)
1194 {
1195 int extra = 0;
1196 register int c = *((unsigned char *)p);
1197
1198 if (utf && c >= 0xc0)
1199 {
1200 int gcii, gcss;
1201 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1202 gcss = 6*extra;
1203 c = (c & utf8_table3[extra]) << gcss;
1204 for (gcii = 1; gcii <= extra; gcii++)
1205 {
1206 gcss -= 6;
1207 c |= (p[gcii] & 0x3f) << gcss;
1208 }
1209 }
1210
1211 p += 1 + extra;
1212
1213 switch (c)
1214 {
1215 case '\n':
1216 *lenptr = 1;
1217 return p;
1218
1219 case '\r':
1220 if (p < endptr && *p == '\n')
1221 {
1222 *lenptr = 2;
1223 p++;
1224 }
1225 else *lenptr = 1;
1226 return p;
1227
1228 default:
1229 break;
1230 }
1231 } /* End of loop for ANYCRLF case */
1232
1233 *lenptr = 0; /* Must have hit the end */
1234 return endptr;
1235
1236 case PCRE2_NEWLINE_ANY:
1237 while (p < endptr)
1238 {
1239 int extra = 0;
1240 register int c = *((unsigned char *)p);
1241
1242 if (utf && c >= 0xc0)
1243 {
1244 int gcii, gcss;
1245 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1246 gcss = 6*extra;
1247 c = (c & utf8_table3[extra]) << gcss;
1248 for (gcii = 1; gcii <= extra; gcii++)
1249 {
1250 gcss -= 6;
1251 c |= (p[gcii] & 0x3f) << gcss;
1252 }
1253 }
1254
1255 p += 1 + extra;
1256
1257 switch (c)
1258 {
1259 case '\n': /* LF */
1260 case '\v': /* VT */
1261 case '\f': /* FF */
1262 *lenptr = 1;
1263 return p;
1264
1265 case '\r': /* CR */
1266 if (p < endptr && *p == '\n')
1267 {
1268 *lenptr = 2;
1269 p++;
1270 }
1271 else *lenptr = 1;
1272 return p;
1273
1274 #ifndef EBCDIC
1275 case 0x85: /* Unicode NEL */
1276 *lenptr = utf? 2 : 1;
1277 return p;
1278
1279 case 0x2028: /* Unicode LS */
1280 case 0x2029: /* Unicode PS */
1281 *lenptr = 3;
1282 return p;
1283 #endif /* Not EBCDIC */
1284
1285 default:
1286 break;
1287 }
1288 } /* End of loop for ANY case */
1289
1290 *lenptr = 0; /* Must have hit the end */
1291 return endptr;
1292 } /* End of overall switch */
1293 }
1294
1295
1296
1297 /*************************************************
1298 * Find start of previous line *
1299 *************************************************/
1300
1301 /* This is called when looking back for before lines to print.
1302
1303 Arguments:
1304 p start of the subsequent line
1305 startptr start of available data
1306
1307 Returns: pointer to the start of the previous line
1308 */
1309
1310 static char *
previous_line(char * p,char * startptr)1311 previous_line(char *p, char *startptr)
1312 {
1313 switch(endlinetype)
1314 {
1315 default: /* Just in case */
1316 case PCRE2_NEWLINE_LF:
1317 p--;
1318 while (p > startptr && p[-1] != '\n') p--;
1319 return p;
1320
1321 case PCRE2_NEWLINE_CR:
1322 p--;
1323 while (p > startptr && p[-1] != '\n') p--;
1324 return p;
1325
1326 case PCRE2_NEWLINE_CRLF:
1327 for (;;)
1328 {
1329 p -= 2;
1330 while (p > startptr && p[-1] != '\n') p--;
1331 if (p <= startptr + 1 || p[-2] == '\r') return p;
1332 }
1333 /* Control can never get here */
1334
1335 case PCRE2_NEWLINE_ANY:
1336 case PCRE2_NEWLINE_ANYCRLF:
1337 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
1338 if (utf) while ((*p & 0xc0) == 0x80) p--;
1339
1340 while (p > startptr)
1341 {
1342 register unsigned int c;
1343 char *pp = p - 1;
1344
1345 if (utf)
1346 {
1347 int extra = 0;
1348 while ((*pp & 0xc0) == 0x80) pp--;
1349 c = *((unsigned char *)pp);
1350 if (c >= 0xc0)
1351 {
1352 int gcii, gcss;
1353 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1354 gcss = 6*extra;
1355 c = (c & utf8_table3[extra]) << gcss;
1356 for (gcii = 1; gcii <= extra; gcii++)
1357 {
1358 gcss -= 6;
1359 c |= (pp[gcii] & 0x3f) << gcss;
1360 }
1361 }
1362 }
1363 else c = *((unsigned char *)pp);
1364
1365 if (endlinetype == PCRE2_NEWLINE_ANYCRLF) switch (c)
1366 {
1367 case '\n': /* LF */
1368 case '\r': /* CR */
1369 return p;
1370
1371 default:
1372 break;
1373 }
1374
1375 else switch (c)
1376 {
1377 case '\n': /* LF */
1378 case '\v': /* VT */
1379 case '\f': /* FF */
1380 case '\r': /* CR */
1381 #ifndef EBCDIE
1382 case 0x85: /* Unicode NEL */
1383 case 0x2028: /* Unicode LS */
1384 case 0x2029: /* Unicode PS */
1385 #endif /* Not EBCDIC */
1386 return p;
1387
1388 default:
1389 break;
1390 }
1391
1392 p = pp; /* Back one character */
1393 } /* End of loop for ANY case */
1394
1395 return startptr; /* Hit start of data */
1396 } /* End of overall switch */
1397 }
1398
1399
1400
1401
1402
1403 /*************************************************
1404 * Print the previous "after" lines *
1405 *************************************************/
1406
1407 /* This is called if we are about to lose said lines because of buffer filling,
1408 and at the end of the file. The data in the line is written using fwrite() so
1409 that a binary zero does not terminate it.
1410
1411 Arguments:
1412 lastmatchnumber the number of the last matching line, plus one
1413 lastmatchrestart where we restarted after the last match
1414 endptr end of available data
1415 printname filename for printing
1416
1417 Returns: nothing
1418 */
1419
1420 static void
do_after_lines(int lastmatchnumber,char * lastmatchrestart,char * endptr,char * printname)1421 do_after_lines(int lastmatchnumber, char *lastmatchrestart, char *endptr,
1422 char *printname)
1423 {
1424 if (after_context > 0 && lastmatchnumber > 0)
1425 {
1426 int count = 0;
1427 while (lastmatchrestart < endptr && count++ < after_context)
1428 {
1429 int ellength;
1430 char *pp = lastmatchrestart;
1431 if (printname != NULL) fprintf(stdout, "%s-", printname);
1432 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1433 pp = end_of_line(pp, endptr, &ellength);
1434 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1435 lastmatchrestart = pp;
1436 }
1437 hyphenpending = TRUE;
1438 }
1439 }
1440
1441
1442
1443 /*************************************************
1444 * Apply patterns to subject till one matches *
1445 *************************************************/
1446
1447 /* This function is called to run through all patterns, looking for a match. It
1448 is used multiple times for the same subject when colouring is enabled, in order
1449 to find all possible matches.
1450
1451 Arguments:
1452 matchptr the start of the subject
1453 length the length of the subject to match
1454 options options for pcre_exec
1455 startoffset where to start matching
1456 mrc address of where to put the result of pcre2_match()
1457
1458 Returns: TRUE if there was a match
1459 FALSE if there was no match
1460 invert if there was a non-fatal error
1461 */
1462
1463 static BOOL
match_patterns(char * matchptr,size_t length,unsigned int options,size_t startoffset,int * mrc)1464 match_patterns(char *matchptr, size_t length, unsigned int options,
1465 size_t startoffset, int *mrc)
1466 {
1467 int i;
1468 size_t slen = length;
1469 patstr *p = patterns;
1470 const char *msg = "this text:\n\n";
1471
1472 if (slen > 200)
1473 {
1474 slen = 200;
1475 msg = "text that starts:\n\n";
1476 }
1477 for (i = 1; p != NULL; p = p->next, i++)
1478 {
1479 *mrc = pcre2_match(p->compiled, (PCRE2_SPTR)matchptr, (int)length,
1480 startoffset, options, match_data, match_context);
1481 if (*mrc >= 0) return TRUE;
1482 if (*mrc == PCRE2_ERROR_NOMATCH) continue;
1483 fprintf(stderr, "pcre2grep: pcre2_match() gave error %d while matching ", *mrc);
1484 if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i);
1485 fprintf(stderr, "%s", msg);
1486 FWRITE(matchptr, 1, slen, stderr); /* In case binary zero included */
1487 fprintf(stderr, "\n\n");
1488 if (*mrc == PCRE2_ERROR_MATCHLIMIT || *mrc == PCRE2_ERROR_RECURSIONLIMIT ||
1489 *mrc == PCRE2_ERROR_JIT_STACKLIMIT)
1490 resource_error = TRUE;
1491 if (error_count++ > 20)
1492 {
1493 fprintf(stderr, "pcre2grep: Too many errors - abandoned.\n");
1494 pcre2grep_exit(2);
1495 }
1496 return invert; /* No more matching; don't show the line again */
1497 }
1498
1499 return FALSE; /* No match, no errors */
1500 }
1501
1502
1503 #ifdef SUPPORT_PCRE2GREP_CALLOUT
1504
1505 /*************************************************
1506 * Parse and execute callout scripts *
1507 *************************************************/
1508
1509 /* This function parses a callout string block and executes the
1510 program specified by the string. The string is a list of substrings
1511 separated by pipe characters. The first substring represents the
1512 executable name, and the following substrings specify the arguments:
1513
1514 program_name|param1|param2|...
1515
1516 Any substirng (including the program name) can contain escape sequences
1517 started by the dollar character. The escape sequences are substituted as
1518 follows:
1519
1520 $<digits> or ${<digits>} is replaced by the captured substring of the given
1521 decimal number, which must be greater than zero. If the number is greater
1522 than the number of capturing substrings, or if the capture is unset, the
1523 replacement is empty.
1524
1525 Any other character is substituted by itself. E.g: $$ is replaced by a single
1526 dollar or $| replaced by a pipe character.
1527
1528 Example:
1529
1530 echo -e "abcde\n12345" | pcre2grep \
1531 '(.)(..(.))(?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4)")()' -
1532
1533 Output:
1534
1535 Arg1: [a] [bcd] [d] Arg2: |a| ()
1536 abcde
1537 Arg1: [1] [234] [4] Arg2: |1| ()
1538 12345
1539
1540 Arguments:
1541 blockptr the callout block
1542
1543 Returns: currently it always returns with 0
1544 */
1545
1546 static int
pcre2grep_callout(pcre2_callout_block * calloutptr,void * unused)1547 pcre2grep_callout(pcre2_callout_block *calloutptr, void *unused)
1548 {
1549 PCRE2_SIZE length = calloutptr->callout_string_length;
1550 PCRE2_SPTR string = calloutptr->callout_string;
1551 PCRE2_SPTR subject = calloutptr->subject;
1552 PCRE2_SIZE *ovector = calloutptr->offset_vector;
1553 PCRE2_SIZE capture_top = calloutptr->capture_top;
1554 PCRE2_SIZE argsvectorlen = 2;
1555 PCRE2_SIZE argslen = 1;
1556 char *args;
1557 char *argsptr;
1558 char **argsvector;
1559 char **argsvectorptr;
1560 pid_t pid;
1561 int result = 0;
1562
1563 (void)unused; /* Avoid compiler warning */
1564
1565 /* Only callout with strings are supported. */
1566 if (string == NULL || length == 0) return 0;
1567
1568 /* Checking syntax and compute the number of string fragments. Callout strings
1569 are ignored in case of a syntax error. */
1570
1571 while (length > 0)
1572 {
1573 if (*string == '|')
1574 {
1575 argsvectorlen++;
1576
1577 /* Maximum 10000 arguments allowed. */
1578 if (argsvectorlen > 10000) return 0;
1579 }
1580 else if (*string == '$')
1581 {
1582 PCRE2_SIZE capture_id = 0;
1583
1584 string++;
1585 length--;
1586
1587 /* Syntax error: a character must be present after $. */
1588 if (length == 0) return 0;
1589
1590 if (*string >= '1' && *string <= '9')
1591 {
1592 do
1593 {
1594 /* Maximum capture id is 65535. */
1595 if (capture_id <= 65535)
1596 capture_id = capture_id * 10 + (*string - '0');
1597
1598 string++;
1599 length--;
1600 }
1601 while (length > 0 && *string >= '0' && *string <= '9');
1602
1603 /* To negate the effect of string++ below. */
1604 string--;
1605 length++;
1606 }
1607 else if (*string == '{')
1608 {
1609 /* Must be a decimal number in parenthesis, e.g: (5) or (38) */
1610 string++;
1611 length--;
1612
1613 /* Syntax error: a decimal number required. */
1614 if (length == 0) return 0;
1615 if (*string < '1' || *string > '9') return 0;
1616
1617 do
1618 {
1619 /* Maximum capture id is 65535. */
1620 if (capture_id <= 65535)
1621 capture_id = capture_id * 10 + (*string - '0');
1622
1623 string++;
1624 length--;
1625
1626 /* Syntax error: no more characters */
1627 if (length == 0) return 0;
1628 }
1629 while (*string >= '0' && *string <= '9');
1630
1631 /* Syntax error: close paren is missing. */
1632 if (*string != '}') return 0;
1633 }
1634
1635 if (capture_id > 0)
1636 {
1637 if (capture_id < capture_top)
1638 {
1639 capture_id *= 2;
1640 argslen += ovector[capture_id + 1] - ovector[capture_id];
1641 }
1642
1643 /* To negate the effect of argslen++ below. */
1644 argslen--;
1645 }
1646 }
1647
1648 string++;
1649 length--;
1650 argslen++;
1651 }
1652
1653 args = (char*)malloc(argslen);
1654 if (args == NULL) return 0;
1655
1656 argsvector = (char**)malloc(argsvectorlen * sizeof(char*));
1657 if (argsvector == NULL)
1658 {
1659 free(args);
1660 return 0;
1661 }
1662
1663 argsptr = args;
1664 argsvectorptr = argsvector;
1665
1666 *argsvectorptr++ = argsptr;
1667
1668 length = calloutptr->callout_string_length;
1669 string = calloutptr->callout_string;
1670
1671 while (length > 0)
1672 {
1673 if (*string == '|')
1674 {
1675 *argsptr++ = '\0';
1676 *argsvectorptr++ = argsptr;
1677 }
1678 else if (*string == '$')
1679 {
1680 string++;
1681 length--;
1682
1683 if ((*string >= '1' && *string <= '9') || *string == '{')
1684 {
1685 PCRE2_SIZE capture_id = 0;
1686
1687 if (*string != '{')
1688 {
1689 do
1690 {
1691 /* Maximum capture id is 65535. */
1692 if (capture_id <= 65535)
1693 capture_id = capture_id * 10 + (*string - '0');
1694
1695 string++;
1696 length--;
1697 }
1698 while (length > 0 && *string >= '0' && *string <= '9');
1699
1700 /* To negate the effect of string++ below. */
1701 string--;
1702 length++;
1703 }
1704 else
1705 {
1706 string++;
1707 length--;
1708
1709 do
1710 {
1711 /* Maximum capture id is 65535. */
1712 if (capture_id <= 65535)
1713 capture_id = capture_id * 10 + (*string - '0');
1714
1715 string++;
1716 length--;
1717 }
1718 while (*string != '}');
1719 }
1720
1721 if (capture_id < capture_top)
1722 {
1723 PCRE2_SIZE capturesize;
1724 capture_id *= 2;
1725
1726 capturesize = ovector[capture_id + 1] - ovector[capture_id];
1727 memcpy(argsptr, subject + ovector[capture_id], capturesize);
1728 argsptr += capturesize;
1729 }
1730 }
1731 else
1732 {
1733 *argsptr++ = *string;
1734 }
1735 }
1736 else
1737 {
1738 *argsptr++ = *string;
1739 }
1740
1741 string++;
1742 length--;
1743 }
1744
1745 *argsptr++ = '\0';
1746 *argsvectorptr = NULL;
1747
1748 pid = fork();
1749
1750 if (pid == 0)
1751 {
1752 (void)execv(argsvector[0], argsvector);
1753 /* Control gets here if there is an error, e.g. a non-existent program */
1754 exit(1);
1755 }
1756 else if (pid > 0)
1757 (void)waitpid(pid, &result, 0);
1758
1759 free(args);
1760 free(argsvector);
1761
1762 /* Currently negative return values are not supported, only zero (match
1763 continues) or non-zero (match fails). */
1764
1765 return result != 0;
1766 }
1767
1768 #endif
1769
1770
1771
1772 /*************************************************
1773 * Grep an individual file *
1774 *************************************************/
1775
1776 /* This is called from grep_or_recurse() below. It uses a buffer that is three
1777 times the value of bufthird. The matching point is never allowed to stray into
1778 the top third of the buffer, thus keeping more of the file available for
1779 context printing or for multiline scanning. For large files, the pointer will
1780 be in the middle third most of the time, so the bottom third is available for
1781 "before" context printing.
1782
1783 Arguments:
1784 handle the fopened FILE stream for a normal file
1785 the gzFile pointer when reading is via libz
1786 the BZFILE pointer when reading is via libbz2
1787 frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1788 filename the file name or NULL (for errors)
1789 printname the file name if it is to be printed for each match
1790 or NULL if the file name is not to be printed
1791 it cannot be NULL if filenames[_nomatch]_only is set
1792
1793 Returns: 0 if there was at least one match
1794 1 otherwise (no matches)
1795 2 if an overlong line is encountered
1796 3 if there is a read error on a .bz2 file
1797 */
1798
1799 static int
pcre2grep(void * handle,int frtype,char * filename,char * printname)1800 pcre2grep(void *handle, int frtype, char *filename, char *printname)
1801 {
1802 int rc = 1;
1803 int linenumber = 1;
1804 int lastmatchnumber = 0;
1805 int count = 0;
1806 int filepos = 0;
1807 char *lastmatchrestart = NULL;
1808 char *ptr = main_buffer;
1809 char *endptr;
1810 size_t bufflength;
1811 BOOL binary = FALSE;
1812 BOOL endhyphenpending = FALSE;
1813 BOOL input_line_buffered = line_buffered;
1814 FILE *in = NULL; /* Ensure initialized */
1815
1816 #ifdef SUPPORT_LIBZ
1817 gzFile ingz = NULL;
1818 #endif
1819
1820 #ifdef SUPPORT_LIBBZ2
1821 BZFILE *inbz2 = NULL;
1822 #endif
1823
1824
1825 /* Do the first read into the start of the buffer and set up the pointer to end
1826 of what we have. In the case of libz, a non-zipped .gz file will be read as a
1827 plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1828 fail. */
1829
1830 (void)frtype;
1831
1832 #ifdef SUPPORT_LIBZ
1833 if (frtype == FR_LIBZ)
1834 {
1835 ingz = (gzFile)handle;
1836 bufflength = gzread (ingz, main_buffer, bufsize);
1837 }
1838 else
1839 #endif
1840
1841 #ifdef SUPPORT_LIBBZ2
1842 if (frtype == FR_LIBBZ2)
1843 {
1844 inbz2 = (BZFILE *)handle;
1845 bufflength = BZ2_bzread(inbz2, main_buffer, bufsize);
1846 if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */
1847 } /* without the cast it is unsigned. */
1848 else
1849 #endif
1850
1851 {
1852 in = (FILE *)handle;
1853 if (is_file_tty(in)) input_line_buffered = TRUE;
1854 bufflength = input_line_buffered?
1855 read_one_line(main_buffer, bufsize, in) :
1856 fread(main_buffer, 1, bufsize, in);
1857 }
1858
1859 endptr = main_buffer + bufflength;
1860
1861 /* Unless binary-files=text, see if we have a binary file. This uses the same
1862 rule as GNU grep, namely, a search for a binary zero byte near the start of the
1863 file. */
1864
1865 if (binary_files != BIN_TEXT)
1866 {
1867 binary =
1868 memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength) != NULL;
1869 if (binary && binary_files == BIN_NOMATCH) return 1;
1870 }
1871
1872 /* Loop while the current pointer is not at the end of the file. For large
1873 files, endptr will be at the end of the buffer when we are in the middle of the
1874 file, but ptr will never get there, because as soon as it gets over 2/3 of the
1875 way, the buffer is shifted left and re-filled. */
1876
1877 while (ptr < endptr)
1878 {
1879 int endlinelength;
1880 int mrc = 0;
1881 unsigned int options = 0;
1882 BOOL match;
1883 char *matchptr = ptr;
1884 char *t = ptr;
1885 size_t length, linelength;
1886 size_t startoffset = 0;
1887
1888 /* At this point, ptr is at the start of a line. We need to find the length
1889 of the subject string to pass to pcre2_match(). In multiline mode, it is the
1890 length remainder of the data in the buffer. Otherwise, it is the length of
1891 the next line, excluding the terminating newline. After matching, we always
1892 advance by the length of the next line. In multiline mode the PCRE2_FIRSTLINE
1893 option is used for compiling, so that any match is constrained to be in the
1894 first line. */
1895
1896 t = end_of_line(t, endptr, &endlinelength);
1897 linelength = t - ptr - endlinelength;
1898 length = multiline? (size_t)(endptr - ptr) : linelength;
1899
1900 /* Check to see if the line we are looking at extends right to the very end
1901 of the buffer without a line terminator. This means the line is too long to
1902 handle. */
1903
1904 if (endlinelength == 0 && t == main_buffer + bufsize)
1905 {
1906 fprintf(stderr, "pcre2grep: line %d%s%s is too long for the internal buffer\n"
1907 "pcre2grep: the buffer size is %d\n"
1908 "pcre2grep: use the --buffer-size option to change it\n",
1909 linenumber,
1910 (filename == NULL)? "" : " of file ",
1911 (filename == NULL)? "" : filename,
1912 bufthird);
1913 return 2;
1914 }
1915
1916 /* Extra processing for Jeffrey Friedl's debugging. */
1917
1918 #ifdef JFRIEDL_DEBUG
1919 if (jfriedl_XT || jfriedl_XR)
1920 {
1921 # include <sys/time.h>
1922 # include <time.h>
1923 struct timeval start_time, end_time;
1924 struct timezone dummy;
1925 int i;
1926
1927 if (jfriedl_XT)
1928 {
1929 unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1930 const char *orig = ptr;
1931 ptr = malloc(newlen + 1);
1932 if (!ptr) {
1933 printf("out of memory");
1934 pcre2grep_exit(2);
1935 }
1936 endptr = ptr;
1937 strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1938 for (i = 0; i < jfriedl_XT; i++) {
1939 strncpy(endptr, orig, length);
1940 endptr += length;
1941 }
1942 strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1943 length = newlen;
1944 }
1945
1946 if (gettimeofday(&start_time, &dummy) != 0)
1947 perror("bad gettimeofday");
1948
1949
1950 for (i = 0; i < jfriedl_XR; i++)
1951 match = (pcre_exec(patterns->compiled, patterns->hint, ptr, length, 0,
1952 PCRE2_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1953
1954 if (gettimeofday(&end_time, &dummy) != 0)
1955 perror("bad gettimeofday");
1956
1957 double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1958 -
1959 (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1960
1961 printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1962 return 0;
1963 }
1964 #endif
1965
1966 /* We come back here after a match when show_only_matching is set, in order
1967 to find any further matches in the same line. This applies to
1968 --only-matching, --file-offsets, and --line-offsets. */
1969
1970 ONLY_MATCHING_RESTART:
1971
1972 /* Run through all the patterns until one matches or there is an error other
1973 than NOMATCH. This code is in a subroutine so that it can be re-used for
1974 finding subsequent matches when colouring matched lines. After finding one
1975 match, set PCRE2_NOTEMPTY to disable any further matches of null strings in
1976 this line. */
1977
1978 match = match_patterns(matchptr, length, options, startoffset, &mrc);
1979 options = PCRE2_NOTEMPTY;
1980
1981 /* If it's a match or a not-match (as required), do what's wanted. */
1982
1983 if (match != invert)
1984 {
1985 BOOL hyphenprinted = FALSE;
1986
1987 /* We've failed if we want a file that doesn't have any matches. */
1988
1989 if (filenames == FN_NOMATCH_ONLY) return 1;
1990
1991 /* If all we want is a yes/no answer, we can return immediately. */
1992
1993 if (quiet) return 0;
1994
1995 /* Just count if just counting is wanted. */
1996
1997 else if (count_only) count++;
1998
1999 /* When handling a binary file and binary-files==binary, the "binary"
2000 variable will be set true (it's false in all other cases). In this
2001 situation we just want to output the file name. No need to scan further. */
2002
2003 else if (binary)
2004 {
2005 fprintf(stdout, "Binary file %s matches" STDOUT_NL, filename);
2006 return 0;
2007 }
2008
2009 /* Likewise, if all we want is a file name, there is no need to scan any
2010 more lines in the file. */
2011
2012 else if (filenames == FN_MATCH_ONLY)
2013 {
2014 fprintf(stdout, "%s" STDOUT_NL, printname);
2015 return 0;
2016 }
2017
2018 /* The --only-matching option prints just the substring that matched,
2019 and/or one or more captured portions of it, as long as these strings are
2020 not empty. The --file-offsets and --line-offsets options output offsets for
2021 the matching substring (all three set show_only_matching). None of these
2022 mutually exclusive options prints any context. Afterwards, adjust the start
2023 and then jump back to look for further matches in the same line. If we are
2024 in invert mode, however, nothing is printed and we do not restart - this
2025 could still be useful because the return code is set. */
2026
2027 else if (show_only_matching)
2028 {
2029 if (!invert)
2030 {
2031 size_t oldstartoffset;
2032
2033 if (printname != NULL) fprintf(stdout, "%s:", printname);
2034 if (number) fprintf(stdout, "%d:", linenumber);
2035
2036 /* Handle --line-offsets */
2037
2038 if (line_offsets)
2039 fprintf(stdout, "%d,%d" STDOUT_NL, (int)(matchptr + offsets[0] - ptr),
2040 (int)(offsets[1] - offsets[0]));
2041
2042 /* Handle --file-offsets */
2043
2044 else if (file_offsets)
2045 fprintf(stdout, "%d,%d" STDOUT_NL,
2046 (int)(filepos + matchptr + offsets[0] - ptr),
2047 (int)(offsets[1] - offsets[0]));
2048
2049 /* Handle --only-matching, which may occur many times */
2050
2051 else
2052 {
2053 BOOL printed = FALSE;
2054 omstr *om;
2055
2056 for (om = only_matching; om != NULL; om = om->next)
2057 {
2058 int n = om->groupnum;
2059 if (n < mrc)
2060 {
2061 int plen = offsets[2*n + 1] - offsets[2*n];
2062 if (plen > 0)
2063 {
2064 if (printed) fprintf(stdout, "%s", om_separator);
2065 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
2066 FWRITE(matchptr + offsets[n*2], 1, plen, stdout);
2067 if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
2068 printed = TRUE;
2069 }
2070 }
2071 }
2072
2073 if (printed || printname != NULL || number)
2074 fprintf(stdout, STDOUT_NL);
2075 }
2076
2077 /* Prepare to repeat to find the next match in the line. */
2078
2079 match = FALSE;
2080 if (line_buffered) fflush(stdout);
2081 rc = 0; /* Had some success */
2082
2083 /* If the current match ended past the end of the line (only possible
2084 in multiline mode), we are done with this line. */
2085
2086 if (offsets[1] > linelength) goto END_ONE_MATCH;
2087
2088 /* If the pattern contained a lookbehind that included \K, it is
2089 possible that the end of the match might be at or before the actual
2090 starting offset we have just used. In this case, start one character
2091 further on. */
2092
2093 startoffset = offsets[1]; /* Restart after the match */
2094 oldstartoffset = pcre2_get_startchar(match_data);
2095 if (startoffset <= oldstartoffset)
2096 {
2097 if (startoffset >= length) goto END_ONE_MATCH; /* Were at end */
2098 startoffset = oldstartoffset + 1;
2099 if (utf)
2100 while ((matchptr[startoffset] & 0xc0) == 0x80) startoffset++;
2101 }
2102 goto ONLY_MATCHING_RESTART;
2103 }
2104 }
2105
2106 /* This is the default case when none of the above options is set. We print
2107 the matching lines(s), possibly preceded and/or followed by other lines of
2108 context. */
2109
2110 else
2111 {
2112 /* See if there is a requirement to print some "after" lines from a
2113 previous match. We never print any overlaps. */
2114
2115 if (after_context > 0 && lastmatchnumber > 0)
2116 {
2117 int ellength;
2118 int linecount = 0;
2119 char *p = lastmatchrestart;
2120
2121 while (p < ptr && linecount < after_context)
2122 {
2123 p = end_of_line(p, ptr, &ellength);
2124 linecount++;
2125 }
2126
2127 /* It is important to advance lastmatchrestart during this printing so
2128 that it interacts correctly with any "before" printing below. Print
2129 each line's data using fwrite() in case there are binary zeroes. */
2130
2131 while (lastmatchrestart < p)
2132 {
2133 char *pp = lastmatchrestart;
2134 if (printname != NULL) fprintf(stdout, "%s-", printname);
2135 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
2136 pp = end_of_line(pp, endptr, &ellength);
2137 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
2138 lastmatchrestart = pp;
2139 }
2140 if (lastmatchrestart != ptr) hyphenpending = TRUE;
2141 }
2142
2143 /* If there were non-contiguous lines printed above, insert hyphens. */
2144
2145 if (hyphenpending)
2146 {
2147 fprintf(stdout, "--" STDOUT_NL);
2148 hyphenpending = FALSE;
2149 hyphenprinted = TRUE;
2150 }
2151
2152 /* See if there is a requirement to print some "before" lines for this
2153 match. Again, don't print overlaps. */
2154
2155 if (before_context > 0)
2156 {
2157 int linecount = 0;
2158 char *p = ptr;
2159
2160 while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
2161 linecount < before_context)
2162 {
2163 linecount++;
2164 p = previous_line(p, main_buffer);
2165 }
2166
2167 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
2168 fprintf(stdout, "--" STDOUT_NL);
2169
2170 while (p < ptr)
2171 {
2172 int ellength;
2173 char *pp = p;
2174 if (printname != NULL) fprintf(stdout, "%s-", printname);
2175 if (number) fprintf(stdout, "%d-", linenumber - linecount--);
2176 pp = end_of_line(pp, endptr, &ellength);
2177 FWRITE(p, 1, pp - p, stdout);
2178 p = pp;
2179 }
2180 }
2181
2182 /* Now print the matching line(s); ensure we set hyphenpending at the end
2183 of the file if any context lines are being output. */
2184
2185 if (after_context > 0 || before_context > 0)
2186 endhyphenpending = TRUE;
2187
2188 if (printname != NULL) fprintf(stdout, "%s:", printname);
2189 if (number) fprintf(stdout, "%d:", linenumber);
2190
2191 /* In multiline mode, we want to print to the end of the line in which
2192 the end of the matched string is found, so we adjust linelength and the
2193 line number appropriately, but only when there actually was a match
2194 (invert not set). Because the PCRE2_FIRSTLINE option is set, the start of
2195 the match will always be before the first newline sequence. */
2196
2197 if (multiline & !invert)
2198 {
2199 char *endmatch = ptr + offsets[1];
2200 t = ptr;
2201 while (t <= endmatch)
2202 {
2203 t = end_of_line(t, endptr, &endlinelength);
2204 if (t < endmatch) linenumber++; else break;
2205 }
2206 linelength = t - ptr - endlinelength;
2207 }
2208
2209 /*** NOTE: Use only fwrite() to output the data line, so that binary
2210 zeroes are treated as just another data character. */
2211
2212 /* This extra option, for Jeffrey Friedl's debugging requirements,
2213 replaces the matched string, or a specific captured string if it exists,
2214 with X. When this happens, colouring is ignored. */
2215
2216 #ifdef JFRIEDL_DEBUG
2217 if (S_arg >= 0 && S_arg < mrc)
2218 {
2219 int first = S_arg * 2;
2220 int last = first + 1;
2221 FWRITE(ptr, 1, offsets[first], stdout);
2222 fprintf(stdout, "X");
2223 FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
2224 }
2225 else
2226 #endif
2227
2228 /* We have to split the line(s) up if colouring, and search for further
2229 matches, but not of course if the line is a non-match. */
2230
2231 if (do_colour && !invert)
2232 {
2233 int plength;
2234 FWRITE(ptr, 1, offsets[0], stdout);
2235 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
2236 FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
2237 fprintf(stdout, "%c[00m", 0x1b);
2238 for (;;)
2239 {
2240 startoffset = offsets[1];
2241 if (startoffset >= linelength + endlinelength ||
2242 !match_patterns(matchptr, length, options, startoffset, &mrc))
2243 break;
2244 FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
2245 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
2246 FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
2247 fprintf(stdout, "%c[00m", 0x1b);
2248 }
2249
2250 /* In multiline mode, we may have already printed the complete line
2251 and its line-ending characters (if they matched the pattern), so there
2252 may be no more to print. */
2253
2254 plength = (int)((linelength + endlinelength) - startoffset);
2255 if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout);
2256 }
2257
2258 /* Not colouring; no need to search for further matches */
2259
2260 else FWRITE(ptr, 1, linelength + endlinelength, stdout);
2261 }
2262
2263 /* End of doing what has to be done for a match. If --line-buffered was
2264 given, flush the output. */
2265
2266 if (line_buffered) fflush(stdout);
2267 rc = 0; /* Had some success */
2268
2269 /* Remember where the last match happened for after_context. We remember
2270 where we are about to restart, and that line's number. */
2271
2272 lastmatchrestart = ptr + linelength + endlinelength;
2273 lastmatchnumber = linenumber + 1;
2274 }
2275
2276 /* For a match in multiline inverted mode (which of course did not cause
2277 anything to be printed), we have to move on to the end of the match before
2278 proceeding. */
2279
2280 if (multiline && invert && match)
2281 {
2282 int ellength;
2283 char *endmatch = ptr + offsets[1];
2284 t = ptr;
2285 while (t < endmatch)
2286 {
2287 t = end_of_line(t, endptr, &ellength);
2288 if (t <= endmatch) linenumber++; else break;
2289 }
2290 endmatch = end_of_line(endmatch, endptr, &ellength);
2291 linelength = endmatch - ptr - ellength;
2292 }
2293
2294 /* Advance to after the newline and increment the line number. The file
2295 offset to the current line is maintained in filepos. */
2296
2297 END_ONE_MATCH:
2298 ptr += linelength + endlinelength;
2299 filepos += (int)(linelength + endlinelength);
2300 linenumber++;
2301
2302 /* If input is line buffered, and the buffer is not yet full, read another
2303 line and add it into the buffer. */
2304
2305 if (input_line_buffered && bufflength < (size_t)bufsize)
2306 {
2307 int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
2308 bufflength += add;
2309 endptr += add;
2310 }
2311
2312 /* If we haven't yet reached the end of the file (the buffer is full), and
2313 the current point is in the top 1/3 of the buffer, slide the buffer down by
2314 1/3 and refill it. Before we do this, if some unprinted "after" lines are
2315 about to be lost, print them. */
2316
2317 if (bufflength >= (size_t)bufsize && ptr > main_buffer + 2*bufthird)
2318 {
2319 if (after_context > 0 &&
2320 lastmatchnumber > 0 &&
2321 lastmatchrestart < main_buffer + bufthird)
2322 {
2323 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
2324 lastmatchnumber = 0;
2325 }
2326
2327 /* Now do the shuffle */
2328
2329 memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
2330 ptr -= bufthird;
2331
2332 #ifdef SUPPORT_LIBZ
2333 if (frtype == FR_LIBZ)
2334 bufflength = 2*bufthird +
2335 gzread (ingz, main_buffer + 2*bufthird, bufthird);
2336 else
2337 #endif
2338
2339 #ifdef SUPPORT_LIBBZ2
2340 if (frtype == FR_LIBBZ2)
2341 bufflength = 2*bufthird +
2342 BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird);
2343 else
2344 #endif
2345
2346 bufflength = 2*bufthird +
2347 (input_line_buffered?
2348 read_one_line(main_buffer + 2*bufthird, bufthird, in) :
2349 fread(main_buffer + 2*bufthird, 1, bufthird, in));
2350 endptr = main_buffer + bufflength;
2351
2352 /* Adjust any last match point */
2353
2354 if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
2355 }
2356 } /* Loop through the whole file */
2357
2358 /* End of file; print final "after" lines if wanted; do_after_lines sets
2359 hyphenpending if it prints something. */
2360
2361 if (!show_only_matching && !count_only)
2362 {
2363 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
2364 hyphenpending |= endhyphenpending;
2365 }
2366
2367 /* Print the file name if we are looking for those without matches and there
2368 were none. If we found a match, we won't have got this far. */
2369
2370 if (filenames == FN_NOMATCH_ONLY)
2371 {
2372 fprintf(stdout, "%s" STDOUT_NL, printname);
2373 return 0;
2374 }
2375
2376 /* Print the match count if wanted */
2377
2378 if (count_only && !quiet)
2379 {
2380 if (count > 0 || !omit_zero_count)
2381 {
2382 if (printname != NULL && filenames != FN_NONE)
2383 fprintf(stdout, "%s:", printname);
2384 fprintf(stdout, "%d" STDOUT_NL, count);
2385 }
2386 }
2387
2388 return rc;
2389 }
2390
2391
2392
2393 /*************************************************
2394 * Grep a file or recurse into a directory *
2395 *************************************************/
2396
2397 /* Given a path name, if it's a directory, scan all the files if we are
2398 recursing; if it's a file, grep it.
2399
2400 Arguments:
2401 pathname the path to investigate
2402 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
2403 only_one_at_top TRUE if the path is the only one at toplevel
2404
2405 Returns: -1 the file/directory was skipped
2406 0 if there was at least one match
2407 1 if there were no matches
2408 2 there was some kind of error
2409
2410 However, file opening failures are suppressed if "silent" is set.
2411 */
2412
2413 static int
grep_or_recurse(char * pathname,BOOL dir_recurse,BOOL only_one_at_top)2414 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
2415 {
2416 int rc = 1;
2417 int frtype;
2418 void *handle;
2419 char *lastcomp;
2420 FILE *in = NULL; /* Ensure initialized */
2421
2422 #ifdef SUPPORT_LIBZ
2423 gzFile ingz = NULL;
2424 #endif
2425
2426 #ifdef SUPPORT_LIBBZ2
2427 BZFILE *inbz2 = NULL;
2428 #endif
2429
2430 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
2431 int pathlen;
2432 #endif
2433
2434 #if defined NATIVE_ZOS
2435 int zos_type;
2436 FILE *zos_test_file;
2437 #endif
2438
2439 /* If the file name is "-" we scan stdin */
2440
2441 if (strcmp(pathname, "-") == 0)
2442 {
2443 return pcre2grep(stdin, FR_PLAIN, stdin_name,
2444 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
2445 stdin_name : NULL);
2446 }
2447
2448 /* Inclusion and exclusion: --include-dir and --exclude-dir apply only to
2449 directories, whereas --include and --exclude apply to everything else. The test
2450 is against the final component of the path. */
2451
2452 lastcomp = strrchr(pathname, FILESEP);
2453 lastcomp = (lastcomp == NULL)? pathname : lastcomp + 1;
2454
2455 /* If the file is a directory, skip if not recursing or if explicitly excluded.
2456 Otherwise, scan the directory and recurse for each path within it. The scanning
2457 code is localized so it can be made system-specific. */
2458
2459
2460 /* For z/OS, determine the file type. */
2461
2462 #if defined NATIVE_ZOS
2463 zos_test_file = fopen(pathname,"rb");
2464
2465 if (zos_test_file == NULL)
2466 {
2467 if (!silent) fprintf(stderr, "pcre2grep: failed to test next file %s\n",
2468 pathname, strerror(errno));
2469 return -1;
2470 }
2471 zos_type = identifyzosfiletype (zos_test_file);
2472 fclose (zos_test_file);
2473
2474 /* Handle a PDS in separate code */
2475
2476 if (zos_type == __ZOS_PDS || zos_type == __ZOS_PDSE)
2477 {
2478 return travelonpdsdir (pathname, only_one_at_top);
2479 }
2480
2481 /* Deal with regular files in the normal way below. These types are:
2482 zos_type == __ZOS_PDS_MEMBER
2483 zos_type == __ZOS_PS
2484 zos_type == __ZOS_VSAM_KSDS
2485 zos_type == __ZOS_VSAM_ESDS
2486 zos_type == __ZOS_VSAM_RRDS
2487 */
2488
2489 /* Handle a z/OS directory using common code. */
2490
2491 else if (zos_type == __ZOS_HFS)
2492 {
2493 #endif /* NATIVE_ZOS */
2494
2495
2496 /* Handle directories: common code for all OS */
2497
2498 if (isdirectory(pathname))
2499 {
2500 if (dee_action == dee_SKIP ||
2501 !test_incexc(lastcomp, include_dir_patterns, exclude_dir_patterns))
2502 return -1;
2503
2504 if (dee_action == dee_RECURSE)
2505 {
2506 char buffer[1024];
2507 char *nextfile;
2508 directory_type *dir = opendirectory(pathname);
2509
2510 if (dir == NULL)
2511 {
2512 if (!silent)
2513 fprintf(stderr, "pcre2grep: Failed to open directory %s: %s\n", pathname,
2514 strerror(errno));
2515 return 2;
2516 }
2517
2518 while ((nextfile = readdirectory(dir)) != NULL)
2519 {
2520 int frc;
2521 sprintf(buffer, "%.512s%c%.128s", pathname, FILESEP, nextfile);
2522 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
2523 if (frc > 1) rc = frc;
2524 else if (frc == 0 && rc == 1) rc = 0;
2525 }
2526
2527 closedirectory(dir);
2528 return rc;
2529 }
2530 }
2531
2532 #if defined NATIVE_ZOS
2533 }
2534 #endif
2535
2536 /* If the file is not a directory, check for a regular file, and if it is not,
2537 skip it if that's been requested. Otherwise, check for an explicit inclusion or
2538 exclusion. */
2539
2540 else if (
2541 #if defined NATIVE_ZOS
2542 (zos_type == __ZOS_NOFILE && DEE_action == DEE_SKIP) ||
2543 #else /* all other OS */
2544 (!isregfile(pathname) && DEE_action == DEE_SKIP) ||
2545 #endif
2546 !test_incexc(lastcomp, include_patterns, exclude_patterns))
2547 return -1; /* File skipped */
2548
2549 /* Control reaches here if we have a regular file, or if we have a directory
2550 and recursion or skipping was not requested, or if we have anything else and
2551 skipping was not requested. The scan proceeds. If this is the first and only
2552 argument at top level, we don't show the file name, unless we are only showing
2553 the file name, or the filename was forced (-H). */
2554
2555 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
2556 pathlen = (int)(strlen(pathname));
2557 #endif
2558
2559 /* Open using zlib if it is supported and the file name ends with .gz. */
2560
2561 #ifdef SUPPORT_LIBZ
2562 if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
2563 {
2564 ingz = gzopen(pathname, "rb");
2565 if (ingz == NULL)
2566 {
2567 if (!silent)
2568 fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", pathname,
2569 strerror(errno));
2570 return 2;
2571 }
2572 handle = (void *)ingz;
2573 frtype = FR_LIBZ;
2574 }
2575 else
2576 #endif
2577
2578 /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
2579
2580 #ifdef SUPPORT_LIBBZ2
2581 if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
2582 {
2583 inbz2 = BZ2_bzopen(pathname, "rb");
2584 handle = (void *)inbz2;
2585 frtype = FR_LIBBZ2;
2586 }
2587 else
2588 #endif
2589
2590 /* Otherwise use plain fopen(). The label is so that we can come back here if
2591 an attempt to read a .bz2 file indicates that it really is a plain file. */
2592
2593 #ifdef SUPPORT_LIBBZ2
2594 PLAIN_FILE:
2595 #endif
2596 {
2597 in = fopen(pathname, "rb");
2598 handle = (void *)in;
2599 frtype = FR_PLAIN;
2600 }
2601
2602 /* All the opening methods return errno when they fail. */
2603
2604 if (handle == NULL)
2605 {
2606 if (!silent)
2607 fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", pathname,
2608 strerror(errno));
2609 return 2;
2610 }
2611
2612 /* Now grep the file */
2613
2614 rc = pcre2grep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
2615 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
2616
2617 /* Close in an appropriate manner. */
2618
2619 #ifdef SUPPORT_LIBZ
2620 if (frtype == FR_LIBZ)
2621 gzclose(ingz);
2622 else
2623 #endif
2624
2625 /* If it is a .bz2 file and the result is 3, it means that the first attempt to
2626 read failed. If the error indicates that the file isn't in fact bzipped, try
2627 again as a normal file. */
2628
2629 #ifdef SUPPORT_LIBBZ2
2630 if (frtype == FR_LIBBZ2)
2631 {
2632 if (rc == 3)
2633 {
2634 int errnum;
2635 const char *err = BZ2_bzerror(inbz2, &errnum);
2636 if (errnum == BZ_DATA_ERROR_MAGIC)
2637 {
2638 BZ2_bzclose(inbz2);
2639 goto PLAIN_FILE;
2640 }
2641 else if (!silent)
2642 fprintf(stderr, "pcre2grep: Failed to read %s using bzlib: %s\n",
2643 pathname, err);
2644 rc = 2; /* The normal "something went wrong" code */
2645 }
2646 BZ2_bzclose(inbz2);
2647 }
2648 else
2649 #endif
2650
2651 /* Normal file close */
2652
2653 fclose(in);
2654
2655 /* Pass back the yield from pcre2grep(). */
2656
2657 return rc;
2658 }
2659
2660
2661
2662 /*************************************************
2663 * Handle a single-letter, no data option *
2664 *************************************************/
2665
2666 static int
handle_option(int letter,int options)2667 handle_option(int letter, int options)
2668 {
2669 switch(letter)
2670 {
2671 case N_FOFFSETS: file_offsets = TRUE; break;
2672 case N_HELP: help(); pcre2grep_exit(0);
2673 case N_LBUFFER: line_buffered = TRUE; break;
2674 case N_LOFFSETS: line_offsets = number = TRUE; break;
2675 case N_NOJIT: use_jit = FALSE; break;
2676 case 'a': binary_files = BIN_TEXT; break;
2677 case 'c': count_only = TRUE; break;
2678 case 'F': process_options |= PO_FIXED_STRINGS; break;
2679 case 'H': filenames = FN_FORCE; break;
2680 case 'I': binary_files = BIN_NOMATCH; break;
2681 case 'h': filenames = FN_NONE; break;
2682 case 'i': options |= PCRE2_CASELESS; break;
2683 case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
2684 case 'L': filenames = FN_NOMATCH_ONLY; break;
2685 case 'M': multiline = TRUE; options |= PCRE2_MULTILINE|PCRE2_FIRSTLINE; break;
2686 case 'n': number = TRUE; break;
2687
2688 case 'o':
2689 only_matching_last = add_number(0, only_matching_last);
2690 if (only_matching == NULL) only_matching = only_matching_last;
2691 break;
2692
2693 case 'q': quiet = TRUE; break;
2694 case 'r': dee_action = dee_RECURSE; break;
2695 case 's': silent = TRUE; break;
2696 case 'u': options |= PCRE2_UTF; utf = TRUE; break;
2697 case 'v': invert = TRUE; break;
2698 case 'w': process_options |= PO_WORD_MATCH; break;
2699 case 'x': process_options |= PO_LINE_MATCH; break;
2700
2701 case 'V':
2702 {
2703 unsigned char buffer[128];
2704 (void)pcre2_config(PCRE2_CONFIG_VERSION, buffer);
2705 fprintf(stdout, "pcre2grep version %s" STDOUT_NL, buffer);
2706 }
2707 pcre2grep_exit(0);
2708 break;
2709
2710 default:
2711 fprintf(stderr, "pcre2grep: Unknown option -%c\n", letter);
2712 pcre2grep_exit(usage(2));
2713 }
2714
2715 return options;
2716 }
2717
2718
2719
2720
2721 /*************************************************
2722 * Construct printed ordinal *
2723 *************************************************/
2724
2725 /* This turns a number into "1st", "3rd", etc. */
2726
2727 static char *
ordin(int n)2728 ordin(int n)
2729 {
2730 static char buffer[14];
2731 char *p = buffer;
2732 sprintf(p, "%d", n);
2733 while (*p != 0) p++;
2734 switch (n%10)
2735 {
2736 case 1: strcpy(p, "st"); break;
2737 case 2: strcpy(p, "nd"); break;
2738 case 3: strcpy(p, "rd"); break;
2739 default: strcpy(p, "th"); break;
2740 }
2741 return buffer;
2742 }
2743
2744
2745
2746 /*************************************************
2747 * Compile a single pattern *
2748 *************************************************/
2749
2750 /* Do nothing if the pattern has already been compiled. This is the case for
2751 include/exclude patterns read from a file.
2752
2753 When the -F option has been used, each "pattern" may be a list of strings,
2754 separated by line breaks. They will be matched literally. We split such a
2755 string and compile the first substring, inserting an additional block into the
2756 pattern chain.
2757
2758 Arguments:
2759 p points to the pattern block
2760 options the PCRE options
2761 popts the processing options
2762 fromfile TRUE if the pattern was read from a file
2763 fromtext file name or identifying text (e.g. "include")
2764 count 0 if this is the only command line pattern, or
2765 number of the command line pattern, or
2766 linenumber for a pattern from a file
2767
2768 Returns: TRUE on success, FALSE after an error
2769 */
2770
2771 static BOOL
compile_pattern(patstr * p,int options,int popts,int fromfile,const char * fromtext,int count)2772 compile_pattern(patstr *p, int options, int popts, int fromfile,
2773 const char *fromtext, int count)
2774 {
2775 unsigned char buffer[PATBUFSIZE];
2776 PCRE2_SIZE erroffset;
2777 char *ps = p->string;
2778 unsigned int patlen = strlen(ps);
2779 int errcode;
2780
2781 if (p->compiled != NULL) return TRUE;
2782
2783 if ((popts & PO_FIXED_STRINGS) != 0)
2784 {
2785 int ellength;
2786 char *eop = ps + patlen;
2787 char *pe = end_of_line(ps, eop, &ellength);
2788
2789 if (ellength != 0)
2790 {
2791 if (add_pattern(pe, p) == NULL) return FALSE;
2792 patlen = (int)(pe - ps - ellength);
2793 }
2794 }
2795
2796 sprintf((char *)buffer, "%s%.*s%s", prefix[popts], patlen, ps, suffix[popts]);
2797 p->compiled = pcre2_compile(buffer, PCRE2_ZERO_TERMINATED, options, &errcode,
2798 &erroffset, compile_context);
2799
2800 /* Handle successful compile */
2801
2802 if (p->compiled != NULL)
2803 {
2804 #ifdef SUPPORT_PCRE2GREP_JIT
2805 if (use_jit)
2806 {
2807 errcode = pcre2_jit_compile(p->compiled, PCRE2_JIT_COMPLETE);
2808 if (errcode == 0) return TRUE;
2809 erroffset = PCRE2_SIZE_MAX; /* Will get reduced to patlen below */
2810 }
2811 else
2812 #endif
2813 return TRUE;
2814 }
2815
2816 /* Handle compile and JIT compile errors */
2817
2818 erroffset -= (int)strlen(prefix[popts]);
2819 if (erroffset > patlen) erroffset = patlen;
2820 pcre2_get_error_message(errcode, buffer, PATBUFSIZE);
2821
2822 if (fromfile)
2823 {
2824 fprintf(stderr, "pcre2grep: Error in regex in line %d of %s "
2825 "at offset %d: %s\n", count, fromtext, (int)erroffset, buffer);
2826 }
2827 else
2828 {
2829 if (count == 0)
2830 fprintf(stderr, "pcre2grep: Error in %s regex at offset %d: %s\n",
2831 fromtext, (int)erroffset, buffer);
2832 else
2833 fprintf(stderr, "pcre2grep: Error in %s %s regex at offset %d: %s\n",
2834 ordin(count), fromtext, (int)erroffset, buffer);
2835 }
2836
2837 return FALSE;
2838 }
2839
2840
2841
2842 /*************************************************
2843 * Read and compile a file of patterns *
2844 *************************************************/
2845
2846 /* This is used for --filelist, --include-from, and --exclude-from.
2847
2848 Arguments:
2849 name the name of the file; "-" is stdin
2850 patptr pointer to the pattern chain anchor
2851 patlastptr pointer to the last pattern pointer
2852 popts the process options to pass to pattern_compile()
2853
2854 Returns: TRUE if all went well
2855 */
2856
2857 static BOOL
read_pattern_file(char * name,patstr ** patptr,patstr ** patlastptr,int popts)2858 read_pattern_file(char *name, patstr **patptr, patstr **patlastptr, int popts)
2859 {
2860 int linenumber = 0;
2861 FILE *f;
2862 char *filename;
2863 char buffer[PATBUFSIZE];
2864
2865 if (strcmp(name, "-") == 0)
2866 {
2867 f = stdin;
2868 filename = stdin_name;
2869 }
2870 else
2871 {
2872 f = fopen(name, "r");
2873 if (f == NULL)
2874 {
2875 fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", name, strerror(errno));
2876 return FALSE;
2877 }
2878 filename = name;
2879 }
2880
2881 while (fgets(buffer, PATBUFSIZE, f) != NULL)
2882 {
2883 char *s = buffer + (int)strlen(buffer);
2884 while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2885 *s = 0;
2886 linenumber++;
2887 if (buffer[0] == 0) continue; /* Skip blank lines */
2888
2889 /* Note: this call to add_pattern() puts a pointer to the local variable
2890 "buffer" into the pattern chain. However, that pointer is used only when
2891 compiling the pattern, which happens immediately below, so we flatten it
2892 afterwards, as a precaution against any later code trying to use it. */
2893
2894 *patlastptr = add_pattern(buffer, *patlastptr);
2895 if (*patlastptr == NULL)
2896 {
2897 if (f != stdin) fclose(f);
2898 return FALSE;
2899 }
2900 if (*patptr == NULL) *patptr = *patlastptr;
2901
2902 /* This loop is needed because compiling a "pattern" when -F is set may add
2903 on additional literal patterns if the original contains a newline. In the
2904 common case, it never will, because fgets() stops at a newline. However,
2905 the -N option can be used to give pcre2grep a different newline setting. */
2906
2907 for(;;)
2908 {
2909 if (!compile_pattern(*patlastptr, pcre2_options, popts, TRUE, filename,
2910 linenumber))
2911 {
2912 if (f != stdin) fclose(f);
2913 return FALSE;
2914 }
2915 (*patlastptr)->string = NULL; /* Insurance */
2916 if ((*patlastptr)->next == NULL) break;
2917 *patlastptr = (*patlastptr)->next;
2918 }
2919 }
2920
2921 if (f != stdin) fclose(f);
2922 return TRUE;
2923 }
2924
2925
2926
2927 /*************************************************
2928 * Main program *
2929 *************************************************/
2930
2931 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
2932
2933 int
main(int argc,char ** argv)2934 main(int argc, char **argv)
2935 {
2936 int i, j;
2937 int rc = 1;
2938 BOOL only_one_at_top;
2939 patstr *cp;
2940 fnstr *fn;
2941 const char *locale_from = "--locale";
2942
2943 #ifdef SUPPORT_PCRE2GREP_JIT
2944 pcre2_jit_stack *jit_stack = NULL;
2945 #endif
2946
2947 /* In Windows, stdout is set up as a text stream, which means that \n is
2948 converted to \r\n. This causes output lines that are copied from the input to
2949 change from ....\r\n to ....\r\r\n, which is not right. We therefore ensure
2950 that stdout is a binary stream. Note that this means all other output to stdout
2951 must use STDOUT_NL to terminate lines. */
2952
2953 #if defined(_WIN32) || defined(WIN32)
2954 _setmode( _fileno(stdout), _O_BINARY);
2955 #endif
2956
2957 /* Set up a default compile and match contexts and a match data block. */
2958
2959 compile_context = pcre2_compile_context_create(NULL);
2960 match_context = pcre2_match_context_create(NULL);
2961 match_data = pcre2_match_data_create(OFFSET_SIZE, NULL);
2962 offsets = pcre2_get_ovector_pointer(match_data);
2963
2964 /* If string (script) callouts are supported, set up the callout processing
2965 function. */
2966
2967 #ifdef SUPPORT_PCRE2GREP_CALLOUT
2968 pcre2_set_callout(match_context, pcre2grep_callout, NULL);
2969 #endif
2970
2971 /* Process the options */
2972
2973 for (i = 1; i < argc; i++)
2974 {
2975 option_item *op = NULL;
2976 char *option_data = (char *)""; /* default to keep compiler happy */
2977 BOOL longop;
2978 BOOL longopwasequals = FALSE;
2979
2980 if (argv[i][0] != '-') break;
2981
2982 /* If we hit an argument that is just "-", it may be a reference to STDIN,
2983 but only if we have previously had -e or -f to define the patterns. */
2984
2985 if (argv[i][1] == 0)
2986 {
2987 if (pattern_files != NULL || patterns != NULL) break;
2988 else pcre2grep_exit(usage(2));
2989 }
2990
2991 /* Handle a long name option, or -- to terminate the options */
2992
2993 if (argv[i][1] == '-')
2994 {
2995 char *arg = argv[i] + 2;
2996 char *argequals = strchr(arg, '=');
2997
2998 if (*arg == 0) /* -- terminates options */
2999 {
3000 i++;
3001 break; /* out of the options-handling loop */
3002 }
3003
3004 longop = TRUE;
3005
3006 /* Some long options have data that follows after =, for example file=name.
3007 Some options have variations in the long name spelling: specifically, we
3008 allow "regexp" because GNU grep allows it, though I personally go along
3009 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
3010 These options are entered in the table as "regex(p)". Options can be in
3011 both these categories. */
3012
3013 for (op = optionlist; op->one_char != 0; op++)
3014 {
3015 char *opbra = strchr(op->long_name, '(');
3016 char *equals = strchr(op->long_name, '=');
3017
3018 /* Handle options with only one spelling of the name */
3019
3020 if (opbra == NULL) /* Does not contain '(' */
3021 {
3022 if (equals == NULL) /* Not thing=data case */
3023 {
3024 if (strcmp(arg, op->long_name) == 0) break;
3025 }
3026 else /* Special case xxx=data */
3027 {
3028 int oplen = (int)(equals - op->long_name);
3029 int arglen = (argequals == NULL)?
3030 (int)strlen(arg) : (int)(argequals - arg);
3031 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
3032 {
3033 option_data = arg + arglen;
3034 if (*option_data == '=')
3035 {
3036 option_data++;
3037 longopwasequals = TRUE;
3038 }
3039 break;
3040 }
3041 }
3042 }
3043
3044 /* Handle options with an alternate spelling of the name */
3045
3046 else
3047 {
3048 char buff1[24];
3049 char buff2[24];
3050
3051 int baselen = (int)(opbra - op->long_name);
3052 int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
3053 int arglen = (argequals == NULL || equals == NULL)?
3054 (int)strlen(arg) : (int)(argequals - arg);
3055
3056 sprintf(buff1, "%.*s", baselen, op->long_name);
3057 sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
3058
3059 if (strncmp(arg, buff1, arglen) == 0 ||
3060 strncmp(arg, buff2, arglen) == 0)
3061 {
3062 if (equals != NULL && argequals != NULL)
3063 {
3064 option_data = argequals;
3065 if (*option_data == '=')
3066 {
3067 option_data++;
3068 longopwasequals = TRUE;
3069 }
3070 }
3071 break;
3072 }
3073 }
3074 }
3075
3076 if (op->one_char == 0)
3077 {
3078 fprintf(stderr, "pcre2grep: Unknown option %s\n", argv[i]);
3079 pcre2grep_exit(usage(2));
3080 }
3081 }
3082
3083 /* Jeffrey Friedl's debugging harness uses these additional options which
3084 are not in the right form for putting in the option table because they use
3085 only one hyphen, yet are more than one character long. By putting them
3086 separately here, they will not get displayed as part of the help() output,
3087 but I don't think Jeffrey will care about that. */
3088
3089 #ifdef JFRIEDL_DEBUG
3090 else if (strcmp(argv[i], "-pre") == 0) {
3091 jfriedl_prefix = argv[++i];
3092 continue;
3093 } else if (strcmp(argv[i], "-post") == 0) {
3094 jfriedl_postfix = argv[++i];
3095 continue;
3096 } else if (strcmp(argv[i], "-XT") == 0) {
3097 sscanf(argv[++i], "%d", &jfriedl_XT);
3098 continue;
3099 } else if (strcmp(argv[i], "-XR") == 0) {
3100 sscanf(argv[++i], "%d", &jfriedl_XR);
3101 continue;
3102 }
3103 #endif
3104
3105
3106 /* One-char options; many that have no data may be in a single argument; we
3107 continue till we hit the last one or one that needs data. */
3108
3109 else
3110 {
3111 char *s = argv[i] + 1;
3112 longop = FALSE;
3113
3114 while (*s != 0)
3115 {
3116 for (op = optionlist; op->one_char != 0; op++)
3117 {
3118 if (*s == op->one_char) break;
3119 }
3120 if (op->one_char == 0)
3121 {
3122 fprintf(stderr, "pcre2grep: Unknown option letter '%c' in \"%s\"\n",
3123 *s, argv[i]);
3124 pcre2grep_exit(usage(2));
3125 }
3126
3127 option_data = s+1;
3128
3129 /* Break out if this is the last character in the string; it's handled
3130 below like a single multi-char option. */
3131
3132 if (*option_data == 0) break;
3133
3134 /* Check for a single-character option that has data: OP_OP_NUMBER(S)
3135 are used for ones that either have a numerical number or defaults, i.e.
3136 the data is optional. If a digit follows, there is data; if not, carry on
3137 with other single-character options in the same string. */
3138
3139 if (op->type == OP_OP_NUMBER || op->type == OP_OP_NUMBERS)
3140 {
3141 if (isdigit((unsigned char)s[1])) break;
3142 }
3143 else /* Check for an option with data */
3144 {
3145 if (op->type != OP_NODATA) break;
3146 }
3147
3148 /* Handle a single-character option with no data, then loop for the
3149 next character in the string. */
3150
3151 pcre2_options = handle_option(*s++, pcre2_options);
3152 }
3153 }
3154
3155 /* At this point we should have op pointing to a matched option. If the type
3156 is NO_DATA, it means that there is no data, and the option might set
3157 something in the PCRE options. */
3158
3159 if (op->type == OP_NODATA)
3160 {
3161 pcre2_options = handle_option(op->one_char, pcre2_options);
3162 continue;
3163 }
3164
3165 /* If the option type is OP_OP_STRING or OP_OP_NUMBER(S), it's an option that
3166 either has a value or defaults to something. It cannot have data in a
3167 separate item. At the moment, the only such options are "colo(u)r",
3168 "only-matching", and Jeffrey Friedl's special -S debugging option. */
3169
3170 if (*option_data == 0 &&
3171 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER ||
3172 op->type == OP_OP_NUMBERS))
3173 {
3174 switch (op->one_char)
3175 {
3176 case N_COLOUR:
3177 colour_option = (char *)"auto";
3178 break;
3179
3180 case 'o':
3181 only_matching_last = add_number(0, only_matching_last);
3182 if (only_matching == NULL) only_matching = only_matching_last;
3183 break;
3184
3185 #ifdef JFRIEDL_DEBUG
3186 case 'S':
3187 S_arg = 0;
3188 break;
3189 #endif
3190 }
3191 continue;
3192 }
3193
3194 /* Otherwise, find the data string for the option. */
3195
3196 if (*option_data == 0)
3197 {
3198 if (i >= argc - 1 || longopwasequals)
3199 {
3200 fprintf(stderr, "pcre2grep: Data missing after %s\n", argv[i]);
3201 pcre2grep_exit(usage(2));
3202 }
3203 option_data = argv[++i];
3204 }
3205
3206 /* If the option type is OP_OP_NUMBERS, the value is a number that is to be
3207 added to a chain of numbers. */
3208
3209 if (op->type == OP_OP_NUMBERS)
3210 {
3211 unsigned long int n = decode_number(option_data, op, longop);
3212 omdatastr *omd = (omdatastr *)op->dataptr;
3213 *(omd->lastptr) = add_number((int)n, *(omd->lastptr));
3214 if (*(omd->anchor) == NULL) *(omd->anchor) = *(omd->lastptr);
3215 }
3216
3217 /* If the option type is OP_PATLIST, it's the -e option, or one of the
3218 include/exclude options, which can be called multiple times to create lists
3219 of patterns. */
3220
3221 else if (op->type == OP_PATLIST)
3222 {
3223 patdatastr *pd = (patdatastr *)op->dataptr;
3224 *(pd->lastptr) = add_pattern(option_data, *(pd->lastptr));
3225 if (*(pd->lastptr) == NULL) goto EXIT2;
3226 if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr);
3227 }
3228
3229 /* If the option type is OP_FILELIST, it's one of the options that names a
3230 file. */
3231
3232 else if (op->type == OP_FILELIST)
3233 {
3234 fndatastr *fd = (fndatastr *)op->dataptr;
3235 fn = (fnstr *)malloc(sizeof(fnstr));
3236 if (fn == NULL)
3237 {
3238 fprintf(stderr, "pcre2grep: malloc failed\n");
3239 goto EXIT2;
3240 }
3241 fn->next = NULL;
3242 fn->name = option_data;
3243 if (*(fd->anchor) == NULL)
3244 *(fd->anchor) = fn;
3245 else
3246 (*(fd->lastptr))->next = fn;
3247 *(fd->lastptr) = fn;
3248 }
3249
3250 /* Handle OP_BINARY_FILES */
3251
3252 else if (op->type == OP_BINFILES)
3253 {
3254 if (strcmp(option_data, "binary") == 0)
3255 binary_files = BIN_BINARY;
3256 else if (strcmp(option_data, "without-match") == 0)
3257 binary_files = BIN_NOMATCH;
3258 else if (strcmp(option_data, "text") == 0)
3259 binary_files = BIN_TEXT;
3260 else
3261 {
3262 fprintf(stderr, "pcre2grep: unknown value \"%s\" for binary-files\n",
3263 option_data);
3264 pcre2grep_exit(usage(2));
3265 }
3266 }
3267
3268 /* Otherwise, deal with a single string or numeric data value. */
3269
3270 else if (op->type != OP_NUMBER && op->type != OP_U32NUMBER &&
3271 op->type != OP_OP_NUMBER)
3272 {
3273 *((char **)op->dataptr) = option_data;
3274 }
3275 else
3276 {
3277 unsigned long int n = decode_number(option_data, op, longop);
3278 if (op->type == OP_U32NUMBER) *((uint32_t *)op->dataptr) = n;
3279 else *((int *)op->dataptr) = n;
3280 }
3281 }
3282
3283 /* Options have been decoded. If -C was used, its value is used as a default
3284 for -A and -B. */
3285
3286 if (both_context > 0)
3287 {
3288 if (after_context == 0) after_context = both_context;
3289 if (before_context == 0) before_context = both_context;
3290 }
3291
3292 /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
3293 However, all three set show_only_matching because they display, each in their
3294 own way, only the data that has matched. */
3295
3296 if ((only_matching != NULL && (file_offsets || line_offsets)) ||
3297 (file_offsets && line_offsets))
3298 {
3299 fprintf(stderr, "pcre2grep: Cannot mix --only-matching, --file-offsets "
3300 "and/or --line-offsets\n");
3301 pcre2grep_exit(usage(2));
3302 }
3303
3304 /* Put limits into the match data block. */
3305
3306 if (match_limit > 0) pcre2_set_match_limit(match_context, match_limit);
3307 if (recursion_limit > 0) pcre2_set_recursion_limit(match_context, recursion_limit);
3308
3309 if (only_matching != NULL || file_offsets || line_offsets)
3310 show_only_matching = TRUE;
3311
3312 /* If a locale has not been provided as an option, see if the LC_CTYPE or
3313 LC_ALL environment variable is set, and if so, use it. */
3314
3315 if (locale == NULL)
3316 {
3317 locale = getenv("LC_ALL");
3318 locale_from = "LCC_ALL";
3319 }
3320
3321 if (locale == NULL)
3322 {
3323 locale = getenv("LC_CTYPE");
3324 locale_from = "LC_CTYPE";
3325 }
3326
3327 /* If a locale is set, use it to generate the tables the PCRE needs. Passing
3328 NULL to pcre2_maketables() means that malloc() is used to get the memory. */
3329
3330 if (locale != NULL)
3331 {
3332 if (setlocale(LC_CTYPE, locale) == NULL)
3333 {
3334 fprintf(stderr, "pcre2grep: Failed to set locale %s (obtained from %s)\n",
3335 locale, locale_from);
3336 goto EXIT2;
3337 }
3338 character_tables = pcre2_maketables(NULL);
3339 pcre2_set_character_tables(compile_context, character_tables);
3340 }
3341
3342 /* Sort out colouring */
3343
3344 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
3345 {
3346 if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
3347 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
3348 else
3349 {
3350 fprintf(stderr, "pcre2grep: Unknown colour setting \"%s\"\n",
3351 colour_option);
3352 goto EXIT2;
3353 }
3354 if (do_colour)
3355 {
3356 char *cs = getenv("PCRE2GREP_COLOUR");
3357 if (cs == NULL) cs = getenv("PCRE2GREP_COLOR");
3358 if (cs != NULL) colour_string = cs;
3359 }
3360 }
3361
3362 /* Sort out a newline setting. */
3363
3364 if (newline_arg != NULL)
3365 {
3366 for (endlinetype = 1; endlinetype < (int)(sizeof(newlines)/sizeof(char *));
3367 endlinetype++)
3368 {
3369 if (strcmpic(newline_arg, newlines[endlinetype]) == 0) break;
3370 }
3371 if (endlinetype < (int)(sizeof(newlines)/sizeof(char *)))
3372 pcre2_set_newline(compile_context, endlinetype);
3373 else
3374 {
3375 fprintf(stderr, "pcre2grep: Invalid newline specifier \"%s\"\n",
3376 newline_arg);
3377 goto EXIT2;
3378 }
3379 }
3380
3381 /* Find default newline convention */
3382
3383 else
3384 {
3385 (void)pcre2_config(PCRE2_CONFIG_NEWLINE, &endlinetype);
3386 }
3387
3388 /* Interpret the text values for -d and -D */
3389
3390 if (dee_option != NULL)
3391 {
3392 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
3393 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
3394 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
3395 else
3396 {
3397 fprintf(stderr, "pcre2grep: Invalid value \"%s\" for -d\n", dee_option);
3398 goto EXIT2;
3399 }
3400 }
3401
3402 if (DEE_option != NULL)
3403 {
3404 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
3405 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
3406 else
3407 {
3408 fprintf(stderr, "pcre2grep: Invalid value \"%s\" for -D\n", DEE_option);
3409 goto EXIT2;
3410 }
3411 }
3412
3413 /* Check the values for Jeffrey Friedl's debugging options. */
3414
3415 #ifdef JFRIEDL_DEBUG
3416 if (S_arg > 9)
3417 {
3418 fprintf(stderr, "pcre2grep: bad value for -S option\n");
3419 return 2;
3420 }
3421 if (jfriedl_XT != 0 || jfriedl_XR != 0)
3422 {
3423 if (jfriedl_XT == 0) jfriedl_XT = 1;
3424 if (jfriedl_XR == 0) jfriedl_XR = 1;
3425 }
3426 #endif
3427
3428 /* Get memory for the main buffer. */
3429
3430 bufsize = 3*bufthird;
3431 main_buffer = (char *)malloc(bufsize);
3432
3433 if (main_buffer == NULL)
3434 {
3435 fprintf(stderr, "pcre2grep: malloc failed\n");
3436 goto EXIT2;
3437 }
3438
3439 /* If no patterns were provided by -e, and there are no files provided by -f,
3440 the first argument is the one and only pattern, and it must exist. */
3441
3442 if (patterns == NULL && pattern_files == NULL)
3443 {
3444 if (i >= argc) return usage(2);
3445 patterns = patterns_last = add_pattern(argv[i++], NULL);
3446 if (patterns == NULL) goto EXIT2;
3447 }
3448
3449 /* Compile the patterns that were provided on the command line, either by
3450 multiple uses of -e or as a single unkeyed pattern. We cannot do this until
3451 after all the command-line options are read so that we know which PCRE options
3452 to use. When -F is used, compile_pattern() may add another block into the
3453 chain, so we must not access the next pointer till after the compile. */
3454
3455 for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
3456 {
3457 if (!compile_pattern(cp, pcre2_options, process_options, FALSE, "command-line",
3458 (j == 1 && patterns->next == NULL)? 0 : j))
3459 goto EXIT2;
3460 }
3461
3462 /* Read and compile the regular expressions that are provided in files. */
3463
3464 for (fn = pattern_files; fn != NULL; fn = fn->next)
3465 {
3466 if (!read_pattern_file(fn->name, &patterns, &patterns_last, process_options))
3467 goto EXIT2;
3468 }
3469
3470 /* Unless JIT has been explicitly disabled, arrange a stack for it to use. */
3471
3472 #ifdef SUPPORT_PCRE2GREP_JIT
3473 if (use_jit)
3474 jit_stack = pcre2_jit_stack_create(32*1024, 1024*1024, NULL);
3475 #endif
3476
3477 for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
3478 {
3479 #ifdef SUPPORT_PCRE2GREP_JIT
3480 if (jit_stack != NULL && cp->compiled != NULL)
3481 pcre2_jit_stack_assign(match_context, NULL, jit_stack);
3482 #endif
3483 }
3484
3485 /* If there are include or exclude patterns read from the command line, compile
3486 them. -F, -w, and -x do not apply, so the third argument of compile_pattern is
3487 0. */
3488
3489 for (j = 0; j < 4; j++)
3490 {
3491 int k;
3492 for (k = 1, cp = *(incexlist[j]); cp != NULL; k++, cp = cp->next)
3493 {
3494 if (!compile_pattern(cp, pcre2_options, 0, FALSE, incexname[j],
3495 (k == 1 && cp->next == NULL)? 0 : k))
3496 goto EXIT2;
3497 }
3498 }
3499
3500 /* Read and compile include/exclude patterns from files. */
3501
3502 for (fn = include_from; fn != NULL; fn = fn->next)
3503 {
3504 if (!read_pattern_file(fn->name, &include_patterns, &include_patterns_last, 0))
3505 goto EXIT2;
3506 }
3507
3508 for (fn = exclude_from; fn != NULL; fn = fn->next)
3509 {
3510 if (!read_pattern_file(fn->name, &exclude_patterns, &exclude_patterns_last, 0))
3511 goto EXIT2;
3512 }
3513
3514 /* If there are no files that contain lists of files to search, and there are
3515 no file arguments, search stdin, and then exit. */
3516
3517 if (file_lists == NULL && i >= argc)
3518 {
3519 rc = pcre2grep(stdin, FR_PLAIN, stdin_name,
3520 (filenames > FN_DEFAULT)? stdin_name : NULL);
3521 goto EXIT;
3522 }
3523
3524 /* If any files that contains a list of files to search have been specified,
3525 read them line by line and search the given files. */
3526
3527 for (fn = file_lists; fn != NULL; fn = fn->next)
3528 {
3529 char buffer[PATBUFSIZE];
3530 FILE *fl;
3531 if (strcmp(fn->name, "-") == 0) fl = stdin; else
3532 {
3533 fl = fopen(fn->name, "rb");
3534 if (fl == NULL)
3535 {
3536 fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", fn->name,
3537 strerror(errno));
3538 goto EXIT2;
3539 }
3540 }
3541 while (fgets(buffer, PATBUFSIZE, fl) != NULL)
3542 {
3543 int frc;
3544 char *end = buffer + (int)strlen(buffer);
3545 while (end > buffer && isspace(end[-1])) end--;
3546 *end = 0;
3547 if (*buffer != 0)
3548 {
3549 frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);
3550 if (frc > 1) rc = frc;
3551 else if (frc == 0 && rc == 1) rc = 0;
3552 }
3553 }
3554 if (fl != stdin) fclose(fl);
3555 }
3556
3557 /* After handling file-list, work through remaining arguments. Pass in the fact
3558 that there is only one argument at top level - this suppresses the file name if
3559 the argument is not a directory and filenames are not otherwise forced. */
3560
3561 only_one_at_top = i == argc - 1 && file_lists == NULL;
3562
3563 for (; i < argc; i++)
3564 {
3565 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
3566 only_one_at_top);
3567 if (frc > 1) rc = frc;
3568 else if (frc == 0 && rc == 1) rc = 0;
3569 }
3570
3571 EXIT:
3572 #ifdef SUPPORT_PCRE2GREP_JIT
3573 if (jit_stack != NULL) pcre2_jit_stack_free(jit_stack);
3574 #endif
3575
3576 free(main_buffer);
3577 free((void *)character_tables);
3578
3579 pcre2_compile_context_free(compile_context);
3580 pcre2_match_context_free(match_context);
3581 pcre2_match_data_free(match_data);
3582
3583 free_pattern_chain(patterns);
3584 free_pattern_chain(include_patterns);
3585 free_pattern_chain(include_dir_patterns);
3586 free_pattern_chain(exclude_patterns);
3587 free_pattern_chain(exclude_dir_patterns);
3588
3589 free_file_chain(exclude_from);
3590 free_file_chain(include_from);
3591 free_file_chain(pattern_files);
3592 free_file_chain(file_lists);
3593
3594 while (only_matching != NULL)
3595 {
3596 omstr *this = only_matching;
3597 only_matching = this->next;
3598 free(this);
3599 }
3600
3601 pcre2grep_exit(rc);
3602
3603 EXIT2:
3604 rc = 2;
3605 goto EXIT;
3606 }
3607
3608 /* End of pcre2grep */
3609