1 /*************************************************
2 * PCRE2 testing program *
3 *************************************************/
4
5 /* PCRE2 is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language. In 2014
7 the API was completely revised and '2' was added to the name, because the old
8 API, which had lasted for 16 years, could not accommodate new requirements. At
9 the same time, this testing program was re-designed because its original
10 hacked-up (non-) design had also run out of steam.
11
12 Written by Philip Hazel
13 Original code Copyright (c) 1997-2012 University of Cambridge
14 Rewritten code Copyright (c) 2016-2020 University of Cambridge
15
16 -----------------------------------------------------------------------------
17 Redistribution and use in source and binary forms, with or without
18 modification, are permitted provided that the following conditions are met:
19
20 * Redistributions of source code must retain the above copyright notice,
21 this list of conditions and the following disclaimer.
22
23 * Redistributions in binary form must reproduce the above copyright
24 notice, this list of conditions and the following disclaimer in the
25 documentation and/or other materials provided with the distribution.
26
27 * Neither the name of the University of Cambridge nor the names of its
28 contributors may be used to endorse or promote products derived from
29 this software without specific prior written permission.
30
31 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
32 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
35 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
36 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
37 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
38 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
39 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
41 POSSIBILITY OF SUCH DAMAGE.
42 -----------------------------------------------------------------------------
43 */
44
45
46 /* This program supports testing of the 8-bit, 16-bit, and 32-bit PCRE2
47 libraries in a single program, though its input and output are always 8-bit.
48 It is different from modules such as pcre2_compile.c in the library itself,
49 which are compiled separately for each code unit width. If two widths are
50 enabled, for example, pcre2_compile.c is compiled twice. In contrast,
51 pcre2test.c is compiled only once, and linked with all the enabled libraries.
52 Therefore, it must not make use of any of the macros from pcre2.h or
53 pcre2_internal.h that depend on PCRE2_CODE_UNIT_WIDTH. It does, however, make
54 use of SUPPORT_PCRE2_8, SUPPORT_PCRE2_16, and SUPPORT_PCRE2_32, to ensure that
55 it references only the enabled library functions. */
56
57 #ifdef HAVE_CONFIG_H
58 #include "config.h"
59 #endif
60
61 #include <ctype.h>
62 #include <stdio.h>
63 #include <string.h>
64 #include <stdlib.h>
65 #include <time.h>
66 #include <locale.h>
67 #include <errno.h>
68
69 #if defined NATIVE_ZOS
70 #include "pcrzoscs.h"
71 /* That header is not included in the main PCRE2 distribution because other
72 apparatus is needed to compile pcre2test for z/OS. The header can be found in
73 the special z/OS distribution, which is available from www.zaconsultants.net or
74 from www.cbttape.org. */
75 #endif
76
77 #ifdef HAVE_UNISTD_H
78 #include <unistd.h>
79 #endif
80
81 /* Debugging code enabler */
82
83 /* #define DEBUG_SHOW_MALLOC_ADDRESSES */
84
85 /* Both libreadline and libedit are optionally supported. The user-supplied
86 original patch uses readline/readline.h for libedit, but in at least one system
87 it is installed as editline/readline.h, so the configuration code now looks for
88 that first, falling back to readline/readline.h. */
89
90 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
91 #if defined(SUPPORT_LIBREADLINE)
92 #include <readline/readline.h>
93 #include <readline/history.h>
94 #else
95 #if defined(HAVE_EDITLINE_READLINE_H)
96 #include <editline/readline.h>
97 #else
98 #include <readline/readline.h>
99 #endif
100 #endif
101 #endif
102
103 /* Put the test for interactive input into a macro so that it can be changed if
104 required for different environments. */
105
106 #define INTERACTIVE(f) isatty(fileno(f))
107
108
109 /* ---------------------- System-specific definitions ---------------------- */
110
111 /* A number of things vary for Windows builds. Originally, pcretest opened its
112 input and output without "b"; then I was told that "b" was needed in some
113 environments, so it was added for release 5.0 to both the input and output. (It
114 makes no difference on Unix-like systems.) Later I was told that it is wrong
115 for the input on Windows. I've now abstracted the modes into macros that are
116 set here, to make it easier to fiddle with them, and removed "b" from the input
117 mode under Windows. The BINARY versions are used when saving/restoring compiled
118 patterns. */
119
120 #if defined(_WIN32) || defined(WIN32)
121 #include <io.h> /* For _setmode() */
122 #include <fcntl.h> /* For _O_BINARY */
123 #define INPUT_MODE "r"
124 #define OUTPUT_MODE "wb"
125 #define BINARY_INPUT_MODE "rb"
126 #define BINARY_OUTPUT_MODE "wb"
127
128 #ifndef isatty
129 #define isatty _isatty /* This is what Windows calls them, I'm told, */
130 #endif /* though in some environments they seem to */
131 /* be already defined, hence the #ifndefs. */
132 #ifndef fileno
133 #define fileno _fileno
134 #endif
135
136 /* A user sent this fix for Borland Builder 5 under Windows. */
137
138 #ifdef __BORLANDC__
139 #define _setmode(handle, mode) setmode(handle, mode)
140 #endif
141
142 /* Not Windows */
143
144 #else
145 #include <sys/time.h> /* These two includes are needed */
146 #include <sys/resource.h> /* for setrlimit(). */
147 #if defined NATIVE_ZOS /* z/OS uses non-binary I/O */
148 #define INPUT_MODE "r"
149 #define OUTPUT_MODE "w"
150 #define BINARY_INPUT_MODE "rb"
151 #define BINARY_OUTPUT_MODE "wb"
152 #else
153 #define INPUT_MODE "rb"
154 #define OUTPUT_MODE "wb"
155 #define BINARY_INPUT_MODE "rb"
156 #define BINARY_OUTPUT_MODE "wb"
157 #endif
158 #endif
159
160 /* VMS-specific code was included as suggested by a VMS user [1]. Another VMS
161 user [2] provided alternative code which worked better for him. I have
162 commented out the original, but kept it around just in case. */
163
164 #ifdef __VMS
165 #include <ssdef.h>
166 /* These two includes came from [2]. */
167 #include descrip
168 #include lib$routines
169 /* void vms_setsymbol( char *, char *, int ); Original code from [1]. */
170 #endif
171
172 /* VC and older compilers don't support %td or %zu, and even some that claim to
173 be C99 don't support it (hence DISABLE_PERCENT_ZT). There are some non-C99
174 environments where %lu gives a warning with 32-bit pointers. As there doesn't
175 seem to be an easy way round this, just live with it (the cases are rare). */
176
177 #if defined(_MSC_VER) || !defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L || defined(DISABLE_PERCENT_ZT)
178 #define PTR_FORM "lu"
179 #define SIZ_FORM "lu"
180 #define SIZ_CAST (unsigned long int)
181 #else
182 #define PTR_FORM "td"
183 #define SIZ_FORM "zu"
184 #define SIZ_CAST
185 #endif
186
187 /* ------------------End of system-specific definitions -------------------- */
188
189 /* Glueing macros that are used in several places below. */
190
191 #define glue(a,b) a##b
192 #define G(a,b) glue(a,b)
193
194 /* Miscellaneous parameters and manifests */
195
196 #ifndef CLOCKS_PER_SEC
197 #ifdef CLK_TCK
198 #define CLOCKS_PER_SEC CLK_TCK
199 #else
200 #define CLOCKS_PER_SEC 100
201 #endif
202 #endif
203
204 #define CFORE_UNSET UINT32_MAX /* Unset value for startend/cfail/cerror fields */
205 #define CONVERT_UNSET UINT32_MAX /* Unset value for convert_type field */
206 #define DFA_WS_DIMENSION 1000 /* Size of DFA workspace */
207 #define DEFAULT_OVECCOUNT 15 /* Default ovector count */
208 #define JUNK_OFFSET 0xdeadbeef /* For initializing ovector */
209 #define LOCALESIZE 32 /* Size of locale name */
210 #define LOOPREPEAT 500000 /* Default loop count for timing */
211 #define MALLOCLISTSIZE 20 /* For remembering mallocs */
212 #define PARENS_NEST_DEFAULT 220 /* Default parentheses nest limit */
213 #define PATSTACKSIZE 20 /* Pattern stack for save/restore testing */
214 #define REPLACE_MODSIZE 100 /* Field for reading 8-bit replacement */
215 #define VERSION_SIZE 64 /* Size of buffer for the version strings */
216
217 /* Default JIT compile options */
218
219 #define JIT_DEFAULT (PCRE2_JIT_COMPLETE|\
220 PCRE2_JIT_PARTIAL_SOFT|\
221 PCRE2_JIT_PARTIAL_HARD)
222
223 /* Make sure the buffer into which replacement strings are copied is big enough
224 to hold them as 32-bit code units. */
225
226 #define REPLACE_BUFFSIZE 1024 /* This is a byte value */
227
228 /* Execution modes */
229
230 #define PCRE8_MODE 8
231 #define PCRE16_MODE 16
232 #define PCRE32_MODE 32
233
234 /* Processing returns */
235
236 enum { PR_OK, PR_SKIP, PR_ABEND };
237
238 /* The macro PRINTABLE determines whether to print an output character as-is or
239 as a hex value when showing compiled patterns. is We use it in cases when the
240 locale has not been explicitly changed, so as to get consistent output from
241 systems that differ in their output from isprint() even in the "C" locale. */
242
243 #ifdef EBCDIC
244 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
245 #else
246 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
247 #endif
248
249 #define PRINTOK(c) ((use_tables != NULL && c < 256)? isprint(c) : PRINTABLE(c))
250
251 /* We have to include some of the library source files because we need
252 to use some of the macros, internal structure definitions, and other internal
253 values - pcre2test has "inside information" compared to an application program
254 that strictly follows the PCRE2 API.
255
256 Before including pcre2_internal.h we define PRIV so that it does not get
257 defined therein. This ensures that PRIV names in the included files do not
258 clash with those in the libraries. Also, although pcre2_internal.h does itself
259 include pcre2.h, we explicitly include it beforehand, along with pcre2posix.h,
260 so that the PCRE2_EXP_xxx macros get set appropriately for an application, not
261 for building the library. */
262
263 #define PRIV(name) name
264 #define PCRE2_CODE_UNIT_WIDTH 0
265 #include "pcre2.h"
266 #include "pcre2posix.h"
267 #include "pcre2_internal.h"
268
269 /* We need access to some of the data tables that PCRE2 uses. Defining
270 PCRE2_PCRETEST makes some minor changes in the files. The previous definition
271 of PRIV avoids name clashes. */
272
273 #define PCRE2_PCRE2TEST
274 #include "pcre2_tables.c"
275 #include "pcre2_ucd.c"
276
277 /* 32-bit integer values in the input are read by strtoul() or strtol(). The
278 check needed for overflow depends on whether long ints are in fact longer than
279 ints. They are defined not to be shorter. */
280
281 #if ULONG_MAX > UINT32_MAX
282 #define U32OVERFLOW(x) (x > UINT32_MAX)
283 #else
284 #define U32OVERFLOW(x) (x == UINT32_MAX)
285 #endif
286
287 #if LONG_MAX > INT32_MAX
288 #define S32OVERFLOW(x) (x > INT32_MAX || x < INT32_MIN)
289 #else
290 #define S32OVERFLOW(x) (x == INT32_MAX || x == INT32_MIN)
291 #endif
292
293 /* When PCRE2_CODE_UNIT_WIDTH is zero, pcre2_internal.h does not include
294 pcre2_intmodedep.h, which is where mode-dependent macros and structures are
295 defined. We can now include it for each supported code unit width. Because
296 PCRE2_CODE_UNIT_WIDTH was defined as zero before including pcre2.h, it will
297 have left PCRE2_SUFFIX defined as a no-op. We must re-define it appropriately
298 while including these files, and then restore it to a no-op. Because LINK_SIZE
299 may be changed in 16-bit mode and forced to 1 in 32-bit mode, the order of
300 these inclusions should not be changed. */
301
302 #undef PCRE2_SUFFIX
303 #undef PCRE2_CODE_UNIT_WIDTH
304
305 #ifdef SUPPORT_PCRE2_8
306 #define PCRE2_CODE_UNIT_WIDTH 8
307 #define PCRE2_SUFFIX(a) G(a,8)
308 #include "pcre2_intmodedep.h"
309 #include "pcre2_printint.c"
310 #undef PCRE2_CODE_UNIT_WIDTH
311 #undef PCRE2_SUFFIX
312 #endif /* SUPPORT_PCRE2_8 */
313
314 #ifdef SUPPORT_PCRE2_16
315 #define PCRE2_CODE_UNIT_WIDTH 16
316 #define PCRE2_SUFFIX(a) G(a,16)
317 #include "pcre2_intmodedep.h"
318 #include "pcre2_printint.c"
319 #undef PCRE2_CODE_UNIT_WIDTH
320 #undef PCRE2_SUFFIX
321 #endif /* SUPPORT_PCRE2_16 */
322
323 #ifdef SUPPORT_PCRE2_32
324 #define PCRE2_CODE_UNIT_WIDTH 32
325 #define PCRE2_SUFFIX(a) G(a,32)
326 #include "pcre2_intmodedep.h"
327 #include "pcre2_printint.c"
328 #undef PCRE2_CODE_UNIT_WIDTH
329 #undef PCRE2_SUFFIX
330 #endif /* SUPPORT_PCRE2_32 */
331
332 #define PCRE2_SUFFIX(a) a
333
334 /* We need to be able to check input text for UTF-8 validity, whatever code
335 widths are actually available, because the input to pcre2test is always in
336 8-bit code units. So we include the UTF validity checking function for 8-bit
337 code units. */
338
339 extern int valid_utf(PCRE2_SPTR8, PCRE2_SIZE, PCRE2_SIZE *);
340
341 #define PCRE2_CODE_UNIT_WIDTH 8
342 #undef PCRE2_SPTR
343 #define PCRE2_SPTR PCRE2_SPTR8
344 #include "pcre2_valid_utf.c"
345 #undef PCRE2_CODE_UNIT_WIDTH
346 #undef PCRE2_SPTR
347
348 /* If we have 8-bit support, default to it; if there is also 16-or 32-bit
349 support, it can be selected by a command-line option. If there is no 8-bit
350 support, there must be 16-bit or 32-bit support, so default to one of them. The
351 config function, JIT stack, contexts, and version string are the same in all
352 modes, so use the form of the first that is available. */
353
354 #if defined SUPPORT_PCRE2_8
355 #define DEFAULT_TEST_MODE PCRE8_MODE
356 #define VERSION_TYPE PCRE2_UCHAR8
357 #define PCRE2_CONFIG pcre2_config_8
358 #define PCRE2_JIT_STACK pcre2_jit_stack_8
359 #define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_8
360 #define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_8
361 #define PCRE2_REAL_CONVERT_CONTEXT pcre2_real_convert_context_8
362 #define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_8
363
364 #elif defined SUPPORT_PCRE2_16
365 #define DEFAULT_TEST_MODE PCRE16_MODE
366 #define VERSION_TYPE PCRE2_UCHAR16
367 #define PCRE2_CONFIG pcre2_config_16
368 #define PCRE2_JIT_STACK pcre2_jit_stack_16
369 #define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_16
370 #define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_16
371 #define PCRE2_REAL_CONVERT_CONTEXT pcre2_real_convert_context_16
372 #define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_16
373
374 #elif defined SUPPORT_PCRE2_32
375 #define DEFAULT_TEST_MODE PCRE32_MODE
376 #define VERSION_TYPE PCRE2_UCHAR32
377 #define PCRE2_CONFIG pcre2_config_32
378 #define PCRE2_JIT_STACK pcre2_jit_stack_32
379 #define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_32
380 #define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_32
381 #define PCRE2_REAL_CONVERT_CONTEXT pcre2_real_convert_context_32
382 #define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_32
383 #endif
384
385 /* ------------- Structure and table for handling #-commands ------------- */
386
387 typedef struct cmdstruct {
388 const char *name;
389 int value;
390 } cmdstruct;
391
392 enum { CMD_FORBID_UTF, CMD_LOAD, CMD_LOADTABLES, CMD_NEWLINE_DEFAULT,
393 CMD_PATTERN, CMD_PERLTEST, CMD_POP, CMD_POPCOPY, CMD_SAVE, CMD_SUBJECT,
394 CMD_UNKNOWN };
395
396 static cmdstruct cmdlist[] = {
397 { "forbid_utf", CMD_FORBID_UTF },
398 { "load", CMD_LOAD },
399 { "loadtables", CMD_LOADTABLES },
400 { "newline_default", CMD_NEWLINE_DEFAULT },
401 { "pattern", CMD_PATTERN },
402 { "perltest", CMD_PERLTEST },
403 { "pop", CMD_POP },
404 { "popcopy", CMD_POPCOPY },
405 { "save", CMD_SAVE },
406 { "subject", CMD_SUBJECT }};
407
408 #define cmdlistcount (sizeof(cmdlist)/sizeof(cmdstruct))
409
410 /* ------------- Structures and tables for handling modifiers -------------- */
411
412 /* Table of names for newline types. Must be kept in step with the definitions
413 of PCRE2_NEWLINE_xx in pcre2.h. */
414
415 static const char *newlines[] = {
416 "DEFAULT", "CR", "LF", "CRLF", "ANY", "ANYCRLF", "NUL" };
417
418 /* Structure and table for handling pattern conversion types. */
419
420 typedef struct convertstruct {
421 const char *name;
422 uint32_t option;
423 } convertstruct;
424
425 static convertstruct convertlist[] = {
426 { "glob", PCRE2_CONVERT_GLOB },
427 { "glob_no_starstar", PCRE2_CONVERT_GLOB_NO_STARSTAR },
428 { "glob_no_wild_separator", PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR },
429 { "posix_basic", PCRE2_CONVERT_POSIX_BASIC },
430 { "posix_extended", PCRE2_CONVERT_POSIX_EXTENDED },
431 { "unset", CONVERT_UNSET }};
432
433 #define convertlistcount (sizeof(convertlist)/sizeof(convertstruct))
434
435 /* Modifier types and applicability */
436
437 enum { MOD_CTC, /* Applies to a compile context */
438 MOD_CTM, /* Applies to a match context */
439 MOD_PAT, /* Applies to a pattern */
440 MOD_PATP, /* Ditto, OK for Perl test */
441 MOD_DAT, /* Applies to a data line */
442 MOD_PD, /* Applies to a pattern or a data line */
443 MOD_PDP, /* As MOD_PD, OK for Perl test */
444 MOD_PND, /* As MOD_PD, but not for a default pattern */
445 MOD_PNDP, /* As MOD_PND, OK for Perl test */
446 MOD_CHR, /* Is a single character */
447 MOD_CON, /* Is a "convert" type/options list */
448 MOD_CTL, /* Is a control bit */
449 MOD_BSR, /* Is a BSR value */
450 MOD_IN2, /* Is one or two unsigned integers */
451 MOD_INS, /* Is a signed integer */
452 MOD_INT, /* Is an unsigned integer */
453 MOD_IND, /* Is an unsigned integer, but no value => default */
454 MOD_NL, /* Is a newline value */
455 MOD_NN, /* Is a number or a name; more than one may occur */
456 MOD_OPT, /* Is an option bit */
457 MOD_SIZ, /* Is a PCRE2_SIZE value */
458 MOD_STR }; /* Is a string */
459
460 /* Control bits. Some apply to compiling, some to matching, but some can be set
461 either on a pattern or a data line, so they must all be distinct. There are now
462 so many of them that they are split into two fields. */
463
464 #define CTL_AFTERTEXT 0x00000001u
465 #define CTL_ALLAFTERTEXT 0x00000002u
466 #define CTL_ALLCAPTURES 0x00000004u
467 #define CTL_ALLUSEDTEXT 0x00000008u
468 #define CTL_ALTGLOBAL 0x00000010u
469 #define CTL_BINCODE 0x00000020u
470 #define CTL_CALLOUT_CAPTURE 0x00000040u
471 #define CTL_CALLOUT_INFO 0x00000080u
472 #define CTL_CALLOUT_NONE 0x00000100u
473 #define CTL_DFA 0x00000200u
474 #define CTL_EXPAND 0x00000400u
475 #define CTL_FINDLIMITS 0x00000800u
476 #define CTL_FRAMESIZE 0x00001000u
477 #define CTL_FULLBINCODE 0x00002000u
478 #define CTL_GETALL 0x00004000u
479 #define CTL_GLOBAL 0x00008000u
480 #define CTL_HEXPAT 0x00010000u /* Same word as USE_LENGTH */
481 #define CTL_INFO 0x00020000u
482 #define CTL_JITFAST 0x00040000u
483 #define CTL_JITVERIFY 0x00080000u
484 #define CTL_MARK 0x00100000u
485 #define CTL_MEMORY 0x00200000u
486 #define CTL_NULLCONTEXT 0x00400000u
487 #define CTL_POSIX 0x00800000u
488 #define CTL_POSIX_NOSUB 0x01000000u
489 #define CTL_PUSH 0x02000000u /* These three must be */
490 #define CTL_PUSHCOPY 0x04000000u /* all in the same */
491 #define CTL_PUSHTABLESCOPY 0x08000000u /* word. */
492 #define CTL_STARTCHAR 0x10000000u
493 #define CTL_USE_LENGTH 0x20000000u /* Same word as HEXPAT */
494 #define CTL_UTF8_INPUT 0x40000000u
495 #define CTL_ZERO_TERMINATE 0x80000000u
496
497 /* Combinations */
498
499 #define CTL_DEBUG (CTL_FULLBINCODE|CTL_INFO) /* For setting */
500 #define CTL_ANYINFO (CTL_DEBUG|CTL_BINCODE|CTL_CALLOUT_INFO)
501 #define CTL_ANYGLOB (CTL_ALTGLOBAL|CTL_GLOBAL)
502
503 /* Second control word */
504
505 #define CTL2_SUBSTITUTE_CALLOUT 0x00000001u
506 #define CTL2_SUBSTITUTE_EXTENDED 0x00000002u
507 #define CTL2_SUBSTITUTE_LITERAL 0x00000004u
508 #define CTL2_SUBSTITUTE_MATCHED 0x00000008u
509 #define CTL2_SUBSTITUTE_OVERFLOW_LENGTH 0x00000010u
510 #define CTL2_SUBSTITUTE_REPLACEMENT_ONLY 0x00000020u
511 #define CTL2_SUBSTITUTE_UNKNOWN_UNSET 0x00000040u
512 #define CTL2_SUBSTITUTE_UNSET_EMPTY 0x00000080u
513 #define CTL2_SUBJECT_LITERAL 0x00000100u
514 #define CTL2_CALLOUT_NO_WHERE 0x00000200u
515 #define CTL2_CALLOUT_EXTRA 0x00000400u
516 #define CTL2_ALLVECTOR 0x00000800u
517
518 #define CTL2_NL_SET 0x40000000u /* Informational */
519 #define CTL2_BSR_SET 0x80000000u /* Informational */
520
521 /* These are the matching controls that may be set either on a pattern or on a
522 data line. They are copied from the pattern controls as initial settings for
523 data line controls. Note that CTL_MEMORY is not included here, because it does
524 different things in the two cases. */
525
526 #define CTL_ALLPD (CTL_AFTERTEXT|\
527 CTL_ALLAFTERTEXT|\
528 CTL_ALLCAPTURES|\
529 CTL_ALLUSEDTEXT|\
530 CTL_ALTGLOBAL|\
531 CTL_GLOBAL|\
532 CTL_MARK|\
533 CTL_STARTCHAR|\
534 CTL_UTF8_INPUT)
535
536 #define CTL2_ALLPD (CTL2_SUBSTITUTE_CALLOUT|\
537 CTL2_SUBSTITUTE_EXTENDED|\
538 CTL2_SUBSTITUTE_LITERAL|\
539 CTL2_SUBSTITUTE_MATCHED|\
540 CTL2_SUBSTITUTE_OVERFLOW_LENGTH|\
541 CTL2_SUBSTITUTE_REPLACEMENT_ONLY|\
542 CTL2_SUBSTITUTE_UNKNOWN_UNSET|\
543 CTL2_SUBSTITUTE_UNSET_EMPTY|\
544 CTL2_ALLVECTOR)
545
546 /* Structures for holding modifier information for patterns and subject strings
547 (data). Fields containing modifiers that can be set either for a pattern or a
548 subject must be at the start and in the same order in both cases so that the
549 same offset in the big table below works for both. */
550
551 typedef struct patctl { /* Structure for pattern modifiers. */
552 uint32_t options; /* Must be in same position as datctl */
553 uint32_t control; /* Must be in same position as datctl */
554 uint32_t control2; /* Must be in same position as datctl */
555 uint32_t jitstack; /* Must be in same position as datctl */
556 uint8_t replacement[REPLACE_MODSIZE]; /* So must this */
557 uint32_t substitute_skip; /* Must be in same position as patctl */
558 uint32_t substitute_stop; /* Must be in same position as patctl */
559 uint32_t jit;
560 uint32_t stackguard_test;
561 uint32_t tables_id;
562 uint32_t convert_type;
563 uint32_t convert_length;
564 uint32_t convert_glob_escape;
565 uint32_t convert_glob_separator;
566 uint32_t regerror_buffsize;
567 uint8_t locale[LOCALESIZE];
568 } patctl;
569
570 #define MAXCPYGET 10
571 #define LENCPYGET 64
572
573 typedef struct datctl { /* Structure for data line modifiers. */
574 uint32_t options; /* Must be in same position as patctl */
575 uint32_t control; /* Must be in same position as patctl */
576 uint32_t control2; /* Must be in same position as patctl */
577 uint32_t jitstack; /* Must be in same position as patctl */
578 uint8_t replacement[REPLACE_MODSIZE]; /* So must this */
579 uint32_t substitute_skip; /* Must be in same position as patctl */
580 uint32_t substitute_stop; /* Must be in same position as patctl */
581 uint32_t startend[2];
582 uint32_t cerror[2];
583 uint32_t cfail[2];
584 int32_t callout_data;
585 int32_t copy_numbers[MAXCPYGET];
586 int32_t get_numbers[MAXCPYGET];
587 uint32_t oveccount;
588 uint32_t offset;
589 uint8_t copy_names[LENCPYGET];
590 uint8_t get_names[LENCPYGET];
591 } datctl;
592
593 /* Ids for which context to modify. */
594
595 enum { CTX_PAT, /* Active pattern context */
596 CTX_POPPAT, /* Ditto, for a popped pattern */
597 CTX_DEFPAT, /* Default pattern context */
598 CTX_DAT, /* Active data (match) context */
599 CTX_DEFDAT }; /* Default data (match) context */
600
601 /* Macros to simplify the big table below. */
602
603 #define CO(name) offsetof(PCRE2_REAL_COMPILE_CONTEXT, name)
604 #define MO(name) offsetof(PCRE2_REAL_MATCH_CONTEXT, name)
605 #define PO(name) offsetof(patctl, name)
606 #define PD(name) PO(name)
607 #define DO(name) offsetof(datctl, name)
608
609 /* Table of all long-form modifiers. Must be in collating sequence of modifier
610 name because it is searched by binary chop. */
611
612 typedef struct modstruct {
613 const char *name;
614 uint16_t which;
615 uint16_t type;
616 uint32_t value;
617 PCRE2_SIZE offset;
618 } modstruct;
619
620 static modstruct modlist[] = {
621 { "aftertext", MOD_PNDP, MOD_CTL, CTL_AFTERTEXT, PO(control) },
622 { "allaftertext", MOD_PNDP, MOD_CTL, CTL_ALLAFTERTEXT, PO(control) },
623 { "allcaptures", MOD_PND, MOD_CTL, CTL_ALLCAPTURES, PO(control) },
624 { "allow_empty_class", MOD_PAT, MOD_OPT, PCRE2_ALLOW_EMPTY_CLASS, PO(options) },
625 { "allow_surrogate_escapes", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES, CO(extra_options) },
626 { "allusedtext", MOD_PNDP, MOD_CTL, CTL_ALLUSEDTEXT, PO(control) },
627 { "allvector", MOD_PND, MOD_CTL, CTL2_ALLVECTOR, PO(control2) },
628 { "alt_bsux", MOD_PAT, MOD_OPT, PCRE2_ALT_BSUX, PO(options) },
629 { "alt_circumflex", MOD_PAT, MOD_OPT, PCRE2_ALT_CIRCUMFLEX, PO(options) },
630 { "alt_verbnames", MOD_PAT, MOD_OPT, PCRE2_ALT_VERBNAMES, PO(options) },
631 { "altglobal", MOD_PND, MOD_CTL, CTL_ALTGLOBAL, PO(control) },
632 { "anchored", MOD_PD, MOD_OPT, PCRE2_ANCHORED, PD(options) },
633 { "auto_callout", MOD_PAT, MOD_OPT, PCRE2_AUTO_CALLOUT, PO(options) },
634 { "bad_escape_is_literal", MOD_CTC, MOD_OPT, PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL, CO(extra_options) },
635 { "bincode", MOD_PAT, MOD_CTL, CTL_BINCODE, PO(control) },
636 { "bsr", MOD_CTC, MOD_BSR, 0, CO(bsr_convention) },
637 { "callout_capture", MOD_DAT, MOD_CTL, CTL_CALLOUT_CAPTURE, DO(control) },
638 { "callout_data", MOD_DAT, MOD_INS, 0, DO(callout_data) },
639 { "callout_error", MOD_DAT, MOD_IN2, 0, DO(cerror) },
640 { "callout_extra", MOD_DAT, MOD_CTL, CTL2_CALLOUT_EXTRA, DO(control2) },
641 { "callout_fail", MOD_DAT, MOD_IN2, 0, DO(cfail) },
642 { "callout_info", MOD_PAT, MOD_CTL, CTL_CALLOUT_INFO, PO(control) },
643 { "callout_no_where", MOD_DAT, MOD_CTL, CTL2_CALLOUT_NO_WHERE, DO(control2) },
644 { "callout_none", MOD_DAT, MOD_CTL, CTL_CALLOUT_NONE, DO(control) },
645 { "caseless", MOD_PATP, MOD_OPT, PCRE2_CASELESS, PO(options) },
646 { "convert", MOD_PAT, MOD_CON, 0, PO(convert_type) },
647 { "convert_glob_escape", MOD_PAT, MOD_CHR, 0, PO(convert_glob_escape) },
648 { "convert_glob_separator", MOD_PAT, MOD_CHR, 0, PO(convert_glob_separator) },
649 { "convert_length", MOD_PAT, MOD_INT, 0, PO(convert_length) },
650 { "copy", MOD_DAT, MOD_NN, DO(copy_numbers), DO(copy_names) },
651 { "copy_matched_subject", MOD_DAT, MOD_OPT, PCRE2_COPY_MATCHED_SUBJECT, DO(options) },
652 { "debug", MOD_PAT, MOD_CTL, CTL_DEBUG, PO(control) },
653 { "depth_limit", MOD_CTM, MOD_INT, 0, MO(depth_limit) },
654 { "dfa", MOD_DAT, MOD_CTL, CTL_DFA, DO(control) },
655 { "dfa_restart", MOD_DAT, MOD_OPT, PCRE2_DFA_RESTART, DO(options) },
656 { "dfa_shortest", MOD_DAT, MOD_OPT, PCRE2_DFA_SHORTEST, DO(options) },
657 { "dollar_endonly", MOD_PAT, MOD_OPT, PCRE2_DOLLAR_ENDONLY, PO(options) },
658 { "dotall", MOD_PATP, MOD_OPT, PCRE2_DOTALL, PO(options) },
659 { "dupnames", MOD_PATP, MOD_OPT, PCRE2_DUPNAMES, PO(options) },
660 { "endanchored", MOD_PD, MOD_OPT, PCRE2_ENDANCHORED, PD(options) },
661 { "escaped_cr_is_lf", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ESCAPED_CR_IS_LF, CO(extra_options) },
662 { "expand", MOD_PAT, MOD_CTL, CTL_EXPAND, PO(control) },
663 { "extended", MOD_PATP, MOD_OPT, PCRE2_EXTENDED, PO(options) },
664 { "extended_more", MOD_PATP, MOD_OPT, PCRE2_EXTENDED_MORE, PO(options) },
665 { "extra_alt_bsux", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ALT_BSUX, CO(extra_options) },
666 { "find_limits", MOD_DAT, MOD_CTL, CTL_FINDLIMITS, DO(control) },
667 { "firstline", MOD_PAT, MOD_OPT, PCRE2_FIRSTLINE, PO(options) },
668 { "framesize", MOD_PAT, MOD_CTL, CTL_FRAMESIZE, PO(control) },
669 { "fullbincode", MOD_PAT, MOD_CTL, CTL_FULLBINCODE, PO(control) },
670 { "get", MOD_DAT, MOD_NN, DO(get_numbers), DO(get_names) },
671 { "getall", MOD_DAT, MOD_CTL, CTL_GETALL, DO(control) },
672 { "global", MOD_PNDP, MOD_CTL, CTL_GLOBAL, PO(control) },
673 { "heap_limit", MOD_CTM, MOD_INT, 0, MO(heap_limit) },
674 { "hex", MOD_PAT, MOD_CTL, CTL_HEXPAT, PO(control) },
675 { "info", MOD_PAT, MOD_CTL, CTL_INFO, PO(control) },
676 { "jit", MOD_PAT, MOD_IND, 7, PO(jit) },
677 { "jitfast", MOD_PAT, MOD_CTL, CTL_JITFAST, PO(control) },
678 { "jitstack", MOD_PNDP, MOD_INT, 0, PO(jitstack) },
679 { "jitverify", MOD_PAT, MOD_CTL, CTL_JITVERIFY, PO(control) },
680 { "literal", MOD_PAT, MOD_OPT, PCRE2_LITERAL, PO(options) },
681 { "locale", MOD_PAT, MOD_STR, LOCALESIZE, PO(locale) },
682 { "mark", MOD_PNDP, MOD_CTL, CTL_MARK, PO(control) },
683 { "match_invalid_utf", MOD_PAT, MOD_OPT, PCRE2_MATCH_INVALID_UTF, PO(options) },
684 { "match_limit", MOD_CTM, MOD_INT, 0, MO(match_limit) },
685 { "match_line", MOD_CTC, MOD_OPT, PCRE2_EXTRA_MATCH_LINE, CO(extra_options) },
686 { "match_unset_backref", MOD_PAT, MOD_OPT, PCRE2_MATCH_UNSET_BACKREF, PO(options) },
687 { "match_word", MOD_CTC, MOD_OPT, PCRE2_EXTRA_MATCH_WORD, CO(extra_options) },
688 { "max_pattern_length", MOD_CTC, MOD_SIZ, 0, CO(max_pattern_length) },
689 { "memory", MOD_PD, MOD_CTL, CTL_MEMORY, PD(control) },
690 { "multiline", MOD_PATP, MOD_OPT, PCRE2_MULTILINE, PO(options) },
691 { "never_backslash_c", MOD_PAT, MOD_OPT, PCRE2_NEVER_BACKSLASH_C, PO(options) },
692 { "never_ucp", MOD_PAT, MOD_OPT, PCRE2_NEVER_UCP, PO(options) },
693 { "never_utf", MOD_PAT, MOD_OPT, PCRE2_NEVER_UTF, PO(options) },
694 { "newline", MOD_CTC, MOD_NL, 0, CO(newline_convention) },
695 { "no_auto_capture", MOD_PAT, MOD_OPT, PCRE2_NO_AUTO_CAPTURE, PO(options) },
696 { "no_auto_possess", MOD_PATP, MOD_OPT, PCRE2_NO_AUTO_POSSESS, PO(options) },
697 { "no_dotstar_anchor", MOD_PAT, MOD_OPT, PCRE2_NO_DOTSTAR_ANCHOR, PO(options) },
698 { "no_jit", MOD_DAT, MOD_OPT, PCRE2_NO_JIT, DO(options) },
699 { "no_start_optimize", MOD_PATP, MOD_OPT, PCRE2_NO_START_OPTIMIZE, PO(options) },
700 { "no_utf_check", MOD_PD, MOD_OPT, PCRE2_NO_UTF_CHECK, PD(options) },
701 { "notbol", MOD_DAT, MOD_OPT, PCRE2_NOTBOL, DO(options) },
702 { "notempty", MOD_DAT, MOD_OPT, PCRE2_NOTEMPTY, DO(options) },
703 { "notempty_atstart", MOD_DAT, MOD_OPT, PCRE2_NOTEMPTY_ATSTART, DO(options) },
704 { "noteol", MOD_DAT, MOD_OPT, PCRE2_NOTEOL, DO(options) },
705 { "null_context", MOD_PD, MOD_CTL, CTL_NULLCONTEXT, PO(control) },
706 { "offset", MOD_DAT, MOD_INT, 0, DO(offset) },
707 { "offset_limit", MOD_CTM, MOD_SIZ, 0, MO(offset_limit)},
708 { "ovector", MOD_DAT, MOD_INT, 0, DO(oveccount) },
709 { "parens_nest_limit", MOD_CTC, MOD_INT, 0, CO(parens_nest_limit) },
710 { "partial_hard", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_HARD, DO(options) },
711 { "partial_soft", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_SOFT, DO(options) },
712 { "ph", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_HARD, DO(options) },
713 { "posix", MOD_PAT, MOD_CTL, CTL_POSIX, PO(control) },
714 { "posix_nosub", MOD_PAT, MOD_CTL, CTL_POSIX|CTL_POSIX_NOSUB, PO(control) },
715 { "posix_startend", MOD_DAT, MOD_IN2, 0, DO(startend) },
716 { "ps", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_SOFT, DO(options) },
717 { "push", MOD_PAT, MOD_CTL, CTL_PUSH, PO(control) },
718 { "pushcopy", MOD_PAT, MOD_CTL, CTL_PUSHCOPY, PO(control) },
719 { "pushtablescopy", MOD_PAT, MOD_CTL, CTL_PUSHTABLESCOPY, PO(control) },
720 { "recursion_limit", MOD_CTM, MOD_INT, 0, MO(depth_limit) }, /* Obsolete synonym */
721 { "regerror_buffsize", MOD_PAT, MOD_INT, 0, PO(regerror_buffsize) },
722 { "replace", MOD_PND, MOD_STR, REPLACE_MODSIZE, PO(replacement) },
723 { "stackguard", MOD_PAT, MOD_INT, 0, PO(stackguard_test) },
724 { "startchar", MOD_PND, MOD_CTL, CTL_STARTCHAR, PO(control) },
725 { "startoffset", MOD_DAT, MOD_INT, 0, DO(offset) },
726 { "subject_literal", MOD_PATP, MOD_CTL, CTL2_SUBJECT_LITERAL, PO(control2) },
727 { "substitute_callout", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_CALLOUT, PO(control2) },
728 { "substitute_extended", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_EXTENDED, PO(control2) },
729 { "substitute_literal", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_LITERAL, PO(control2) },
730 { "substitute_matched", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_MATCHED, PO(control2) },
731 { "substitute_overflow_length", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_OVERFLOW_LENGTH, PO(control2) },
732 { "substitute_replacement_only", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_REPLACEMENT_ONLY, PO(control2) },
733 { "substitute_skip", MOD_PND, MOD_INT, 0, PO(substitute_skip) },
734 { "substitute_stop", MOD_PND, MOD_INT, 0, PO(substitute_stop) },
735 { "substitute_unknown_unset", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_UNKNOWN_UNSET, PO(control2) },
736 { "substitute_unset_empty", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_UNSET_EMPTY, PO(control2) },
737 { "tables", MOD_PAT, MOD_INT, 0, PO(tables_id) },
738 { "ucp", MOD_PATP, MOD_OPT, PCRE2_UCP, PO(options) },
739 { "ungreedy", MOD_PAT, MOD_OPT, PCRE2_UNGREEDY, PO(options) },
740 { "use_length", MOD_PAT, MOD_CTL, CTL_USE_LENGTH, PO(control) },
741 { "use_offset_limit", MOD_PAT, MOD_OPT, PCRE2_USE_OFFSET_LIMIT, PO(options) },
742 { "utf", MOD_PATP, MOD_OPT, PCRE2_UTF, PO(options) },
743 { "utf8_input", MOD_PAT, MOD_CTL, CTL_UTF8_INPUT, PO(control) },
744 { "zero_terminate", MOD_DAT, MOD_CTL, CTL_ZERO_TERMINATE, DO(control) }
745 };
746
747 #define MODLISTCOUNT sizeof(modlist)/sizeof(modstruct)
748
749 /* Controls and options that are supported for use with the POSIX interface. */
750
751 #define POSIX_SUPPORTED_COMPILE_OPTIONS ( \
752 PCRE2_CASELESS|PCRE2_DOTALL|PCRE2_LITERAL|PCRE2_MULTILINE|PCRE2_UCP| \
753 PCRE2_UTF|PCRE2_UNGREEDY)
754
755 #define POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS (0)
756
757 #define POSIX_SUPPORTED_COMPILE_CONTROLS ( \
758 CTL_AFTERTEXT|CTL_ALLAFTERTEXT|CTL_EXPAND|CTL_HEXPAT|CTL_POSIX| \
759 CTL_POSIX_NOSUB|CTL_USE_LENGTH)
760
761 #define POSIX_SUPPORTED_COMPILE_CONTROLS2 (0)
762
763 #define POSIX_SUPPORTED_MATCH_OPTIONS ( \
764 PCRE2_NOTBOL|PCRE2_NOTEMPTY|PCRE2_NOTEOL)
765
766 #define POSIX_SUPPORTED_MATCH_CONTROLS (CTL_AFTERTEXT|CTL_ALLAFTERTEXT)
767 #define POSIX_SUPPORTED_MATCH_CONTROLS2 (0)
768
769 /* Control bits that are not ignored with 'push'. */
770
771 #define PUSH_SUPPORTED_COMPILE_CONTROLS ( \
772 CTL_BINCODE|CTL_CALLOUT_INFO|CTL_FULLBINCODE|CTL_HEXPAT|CTL_INFO| \
773 CTL_JITVERIFY|CTL_MEMORY|CTL_FRAMESIZE|CTL_PUSH|CTL_PUSHCOPY| \
774 CTL_PUSHTABLESCOPY|CTL_USE_LENGTH)
775
776 #define PUSH_SUPPORTED_COMPILE_CONTROLS2 (CTL2_BSR_SET|CTL2_NL_SET)
777
778 /* Controls that apply only at compile time with 'push'. */
779
780 #define PUSH_COMPILE_ONLY_CONTROLS CTL_JITVERIFY
781 #define PUSH_COMPILE_ONLY_CONTROLS2 (0)
782
783 /* Controls that are forbidden with #pop or #popcopy. */
784
785 #define NOTPOP_CONTROLS (CTL_HEXPAT|CTL_POSIX|CTL_POSIX_NOSUB|CTL_PUSH| \
786 CTL_PUSHCOPY|CTL_PUSHTABLESCOPY|CTL_USE_LENGTH)
787
788 /* Pattern controls that are mutually exclusive. At present these are all in
789 the first control word. Note that CTL_POSIX_NOSUB is always accompanied by
790 CTL_POSIX, so it doesn't need its own entries. */
791
792 static uint32_t exclusive_pat_controls[] = {
793 CTL_POSIX | CTL_PUSH,
794 CTL_POSIX | CTL_PUSHCOPY,
795 CTL_POSIX | CTL_PUSHTABLESCOPY,
796 CTL_PUSH | CTL_PUSHCOPY,
797 CTL_PUSH | CTL_PUSHTABLESCOPY,
798 CTL_PUSHCOPY | CTL_PUSHTABLESCOPY,
799 CTL_EXPAND | CTL_HEXPAT };
800
801 /* Data controls that are mutually exclusive. At present these are all in the
802 first control word. */
803
804 static uint32_t exclusive_dat_controls[] = {
805 CTL_ALLUSEDTEXT | CTL_STARTCHAR,
806 CTL_FINDLIMITS | CTL_NULLCONTEXT };
807
808 /* Table of single-character abbreviated modifiers. The index field is
809 initialized to -1, but the first time the modifier is encountered, it is filled
810 in with the index of the full entry in modlist, to save repeated searching when
811 processing multiple test items. This short list is searched serially, so its
812 order does not matter. */
813
814 typedef struct c1modstruct {
815 const char *fullname;
816 uint32_t onechar;
817 int index;
818 } c1modstruct;
819
820 static c1modstruct c1modlist[] = {
821 { "bincode", 'B', -1 },
822 { "info", 'I', -1 },
823 { "global", 'g', -1 },
824 { "caseless", 'i', -1 },
825 { "multiline", 'm', -1 },
826 { "no_auto_capture", 'n', -1 },
827 { "dotall", 's', -1 },
828 { "extended", 'x', -1 }
829 };
830
831 #define C1MODLISTCOUNT sizeof(c1modlist)/sizeof(c1modstruct)
832
833 /* Table of arguments for the -C command line option. Use macros to make the
834 table itself easier to read. */
835
836 #if defined SUPPORT_PCRE2_8
837 #define SUPPORT_8 1
838 #endif
839 #if defined SUPPORT_PCRE2_16
840 #define SUPPORT_16 1
841 #endif
842 #if defined SUPPORT_PCRE2_32
843 #define SUPPORT_32 1
844 #endif
845
846 #ifndef SUPPORT_8
847 #define SUPPORT_8 0
848 #endif
849 #ifndef SUPPORT_16
850 #define SUPPORT_16 0
851 #endif
852 #ifndef SUPPORT_32
853 #define SUPPORT_32 0
854 #endif
855
856 #ifdef EBCDIC
857 #define SUPPORT_EBCDIC 1
858 #define EBCDIC_NL CHAR_LF
859 #else
860 #define SUPPORT_EBCDIC 0
861 #define EBCDIC_NL 0
862 #endif
863
864 #ifdef NEVER_BACKSLASH_C
865 #define BACKSLASH_C 0
866 #else
867 #define BACKSLASH_C 1
868 #endif
869
870 typedef struct coptstruct {
871 const char *name;
872 uint32_t type;
873 uint32_t value;
874 } coptstruct;
875
876 enum { CONF_BSR,
877 CONF_FIX,
878 CONF_FIZ,
879 CONF_INT,
880 CONF_NL
881 };
882
883 static coptstruct coptlist[] = {
884 { "backslash-C", CONF_FIX, BACKSLASH_C },
885 { "bsr", CONF_BSR, PCRE2_CONFIG_BSR },
886 { "ebcdic", CONF_FIX, SUPPORT_EBCDIC },
887 { "ebcdic-nl", CONF_FIZ, EBCDIC_NL },
888 { "jit", CONF_INT, PCRE2_CONFIG_JIT },
889 { "linksize", CONF_INT, PCRE2_CONFIG_LINKSIZE },
890 { "newline", CONF_NL, PCRE2_CONFIG_NEWLINE },
891 { "pcre2-16", CONF_FIX, SUPPORT_16 },
892 { "pcre2-32", CONF_FIX, SUPPORT_32 },
893 { "pcre2-8", CONF_FIX, SUPPORT_8 },
894 { "unicode", CONF_INT, PCRE2_CONFIG_UNICODE }
895 };
896
897 #define COPTLISTCOUNT sizeof(coptlist)/sizeof(coptstruct)
898
899 #undef SUPPORT_8
900 #undef SUPPORT_16
901 #undef SUPPORT_32
902 #undef SUPPORT_EBCDIC
903
904
905 /* ----------------------- Static variables ------------------------ */
906
907 static FILE *infile;
908 static FILE *outfile;
909
910 static const void *last_callout_mark;
911 static PCRE2_JIT_STACK *jit_stack = NULL;
912 static size_t jit_stack_size = 0;
913
914 static BOOL first_callout;
915 static BOOL jit_was_used;
916 static BOOL restrict_for_perl_test = FALSE;
917 static BOOL show_memory = FALSE;
918
919 static int code_unit_size; /* Bytes */
920 static int jitrc; /* Return from JIT compile */
921 static int test_mode = DEFAULT_TEST_MODE;
922 static int timeit = 0;
923 static int timeitm = 0;
924
925 clock_t total_compile_time = 0;
926 clock_t total_jit_compile_time = 0;
927 clock_t total_match_time = 0;
928
929 static uint32_t dfa_matched;
930 static uint32_t forbid_utf = 0;
931 static uint32_t maxlookbehind;
932 static uint32_t max_oveccount;
933 static uint32_t callout_count;
934 static uint32_t maxcapcount;
935
936 static uint16_t local_newline_default = 0;
937
938 static VERSION_TYPE jittarget[VERSION_SIZE];
939 static VERSION_TYPE version[VERSION_SIZE];
940 static VERSION_TYPE uversion[VERSION_SIZE];
941
942 static patctl def_patctl;
943 static patctl pat_patctl;
944 static datctl def_datctl;
945 static datctl dat_datctl;
946
947 static void *patstack[PATSTACKSIZE];
948 static int patstacknext = 0;
949
950 static void *malloclist[MALLOCLISTSIZE];
951 static PCRE2_SIZE malloclistlength[MALLOCLISTSIZE];
952 static uint32_t malloclistptr = 0;
953
954 #ifdef SUPPORT_PCRE2_8
955 static regex_t preg = { NULL, NULL, 0, 0, 0, 0 };
956 #endif
957
958 static int *dfa_workspace = NULL;
959 static const uint8_t *locale_tables = NULL;
960 static const uint8_t *use_tables = NULL;
961 static uint8_t locale_name[32];
962 static uint8_t *tables3 = NULL; /* For binary-loaded tables */
963 static uint32_t loadtables_length = 0;
964
965 /* We need buffers for building 16/32-bit strings; 8-bit strings don't need
966 rebuilding, but set up the same naming scheme for use in macros. The "buffer"
967 buffer is where all input lines are read. Its size is the same as pbuffer8.
968 Pattern lines are always copied to pbuffer8 for use in callouts, even if they
969 are actually compiled from pbuffer16 or pbuffer32. */
970
971 static size_t pbuffer8_size = 50000; /* Initial size, bytes */
972 static uint8_t *pbuffer8 = NULL;
973 static uint8_t *buffer = NULL;
974
975 /* The dbuffer is where all processed data lines are put. In non-8-bit modes it
976 is cast as needed. For long data lines it grows as necessary. */
977
978 static size_t dbuffer_size = 1u << 14; /* Initial size, bytes */
979 static uint8_t *dbuffer = NULL;
980
981
982 /* ---------------- Mode-dependent variables -------------------*/
983
984 #ifdef SUPPORT_PCRE2_8
985 static pcre2_code_8 *compiled_code8;
986 static pcre2_general_context_8 *general_context8, *general_context_copy8;
987 static pcre2_compile_context_8 *pat_context8, *default_pat_context8;
988 static pcre2_convert_context_8 *con_context8, *default_con_context8;
989 static pcre2_match_context_8 *dat_context8, *default_dat_context8;
990 static pcre2_match_data_8 *match_data8;
991 #endif
992
993 #ifdef SUPPORT_PCRE2_16
994 static pcre2_code_16 *compiled_code16;
995 static pcre2_general_context_16 *general_context16, *general_context_copy16;
996 static pcre2_compile_context_16 *pat_context16, *default_pat_context16;
997 static pcre2_convert_context_16 *con_context16, *default_con_context16;
998 static pcre2_match_context_16 *dat_context16, *default_dat_context16;
999 static pcre2_match_data_16 *match_data16;
1000 static PCRE2_SIZE pbuffer16_size = 0; /* Set only when needed */
1001 static uint16_t *pbuffer16 = NULL;
1002 #endif
1003
1004 #ifdef SUPPORT_PCRE2_32
1005 static pcre2_code_32 *compiled_code32;
1006 static pcre2_general_context_32 *general_context32, *general_context_copy32;
1007 static pcre2_compile_context_32 *pat_context32, *default_pat_context32;
1008 static pcre2_convert_context_32 *con_context32, *default_con_context32;
1009 static pcre2_match_context_32 *dat_context32, *default_dat_context32;
1010 static pcre2_match_data_32 *match_data32;
1011 static PCRE2_SIZE pbuffer32_size = 0; /* Set only when needed */
1012 static uint32_t *pbuffer32 = NULL;
1013 #endif
1014
1015
1016 /* ---------------- Macros that work in all modes ----------------- */
1017
1018 #define CAST8VAR(x) CASTVAR(uint8_t *, x)
1019 #define SET(x,y) SETOP(x,y,=)
1020 #define SETPLUS(x,y) SETOP(x,y,+=)
1021 #define strlen8(x) strlen((char *)x)
1022
1023
1024 /* ---------------- Mode-dependent, runtime-testing macros ------------------*/
1025
1026 /* Define macros for variables and functions that must be selected dynamically
1027 depending on the mode setting (8, 16, 32). These are dependent on which modes
1028 are supported. */
1029
1030 #if (defined (SUPPORT_PCRE2_8) + defined (SUPPORT_PCRE2_16) + \
1031 defined (SUPPORT_PCRE2_32)) >= 2
1032
1033 /* ----- All three modes supported ----- */
1034
1035 #if defined(SUPPORT_PCRE2_8) && defined(SUPPORT_PCRE2_16) && defined(SUPPORT_PCRE2_32)
1036
1037 #define CASTFLD(t,a,b) ((test_mode == PCRE8_MODE)? (t)(G(a,8)->b) : \
1038 (test_mode == PCRE16_MODE)? (t)(G(a,16)->b) : (t)(G(a,32)->b))
1039
1040 #define CASTVAR(t,x) ( \
1041 (test_mode == PCRE8_MODE)? (t)G(x,8) : \
1042 (test_mode == PCRE16_MODE)? (t)G(x,16) : (t)G(x,32))
1043
1044 #define CODE_UNIT(a,b) ( \
1045 (test_mode == PCRE8_MODE)? (uint32_t)(((PCRE2_SPTR8)(a))[b]) : \
1046 (test_mode == PCRE16_MODE)? (uint32_t)(((PCRE2_SPTR16)(a))[b]) : \
1047 (uint32_t)(((PCRE2_SPTR32)(a))[b]))
1048
1049 #define CONCTXCPY(a,b) \
1050 if (test_mode == PCRE8_MODE) \
1051 memcpy(G(a,8),G(b,8),sizeof(pcre2_convert_context_8)); \
1052 else if (test_mode == PCRE16_MODE) \
1053 memcpy(G(a,16),G(b,16),sizeof(pcre2_convert_context_16)); \
1054 else memcpy(G(a,32),G(b,32),sizeof(pcre2_convert_context_32))
1055
1056 #define CONVERT_COPY(a,b,c) \
1057 if (test_mode == PCRE8_MODE) \
1058 memcpy(G(a,8),(char *)b,c); \
1059 else if (test_mode == PCRE16_MODE) \
1060 memcpy(G(a,16),(char *)b,(c)*2); \
1061 else if (test_mode == PCRE32_MODE) \
1062 memcpy(G(a,32),(char *)b,(c)*4)
1063
1064 #define DATCTXCPY(a,b) \
1065 if (test_mode == PCRE8_MODE) \
1066 memcpy(G(a,8),G(b,8),sizeof(pcre2_match_context_8)); \
1067 else if (test_mode == PCRE16_MODE) \
1068 memcpy(G(a,16),G(b,16),sizeof(pcre2_match_context_16)); \
1069 else memcpy(G(a,32),G(b,32),sizeof(pcre2_match_context_32))
1070
1071 #define FLD(a,b) ((test_mode == PCRE8_MODE)? G(a,8)->b : \
1072 (test_mode == PCRE16_MODE)? G(a,16)->b : G(a,32)->b)
1073
1074 #define PATCTXCPY(a,b) \
1075 if (test_mode == PCRE8_MODE) \
1076 memcpy(G(a,8),G(b,8),sizeof(pcre2_compile_context_8)); \
1077 else if (test_mode == PCRE16_MODE) \
1078 memcpy(G(a,16),G(b,16),sizeof(pcre2_compile_context_16)); \
1079 else memcpy(G(a,32),G(b,32),sizeof(pcre2_compile_context_32))
1080
1081 #define PCHARS(lv, p, offset, len, utf, f) \
1082 if (test_mode == PCRE32_MODE) \
1083 lv = pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f); \
1084 else if (test_mode == PCRE16_MODE) \
1085 lv = pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f); \
1086 else \
1087 lv = pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
1088
1089 #define PCHARSV(p, offset, len, utf, f) \
1090 if (test_mode == PCRE32_MODE) \
1091 (void)pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f); \
1092 else if (test_mode == PCRE16_MODE) \
1093 (void)pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f); \
1094 else \
1095 (void)pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
1096
1097 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
1098 if (test_mode == PCRE8_MODE) \
1099 a = pcre2_callout_enumerate_8(compiled_code8, \
1100 (int (*)(struct pcre2_callout_enumerate_block_8 *, void *))b,c); \
1101 else if (test_mode == PCRE16_MODE) \
1102 a = pcre2_callout_enumerate_16(compiled_code16, \
1103 (int(*)(struct pcre2_callout_enumerate_block_16 *, void *))b,c); \
1104 else \
1105 a = pcre2_callout_enumerate_32(compiled_code32, \
1106 (int (*)(struct pcre2_callout_enumerate_block_32 *, void *))b,c)
1107
1108 #define PCRE2_CODE_COPY_FROM_VOID(a,b) \
1109 if (test_mode == PCRE8_MODE) \
1110 G(a,8) = pcre2_code_copy_8(b); \
1111 else if (test_mode == PCRE16_MODE) \
1112 G(a,16) = pcre2_code_copy_16(b); \
1113 else \
1114 G(a,32) = pcre2_code_copy_32(b)
1115
1116 #define PCRE2_CODE_COPY_TO_VOID(a,b) \
1117 if (test_mode == PCRE8_MODE) \
1118 a = (void *)pcre2_code_copy_8(G(b,8)); \
1119 else if (test_mode == PCRE16_MODE) \
1120 a = (void *)pcre2_code_copy_16(G(b,16)); \
1121 else \
1122 a = (void *)pcre2_code_copy_32(G(b,32))
1123
1124 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) \
1125 if (test_mode == PCRE8_MODE) \
1126 a = (void *)pcre2_code_copy_with_tables_8(G(b,8)); \
1127 else if (test_mode == PCRE16_MODE) \
1128 a = (void *)pcre2_code_copy_with_tables_16(G(b,16)); \
1129 else \
1130 a = (void *)pcre2_code_copy_with_tables_32(G(b,32))
1131
1132 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
1133 if (test_mode == PCRE8_MODE) \
1134 G(a,8) = pcre2_compile_8(G(b,8),c,d,e,f,g); \
1135 else if (test_mode == PCRE16_MODE) \
1136 G(a,16) = pcre2_compile_16(G(b,16),c,d,e,f,g); \
1137 else \
1138 G(a,32) = pcre2_compile_32(G(b,32),c,d,e,f,g)
1139
1140 #define PCRE2_CONVERTED_PATTERN_FREE(a) \
1141 if (test_mode == PCRE8_MODE) pcre2_converted_pattern_free_8((PCRE2_UCHAR8 *)a); \
1142 else if (test_mode == PCRE16_MODE) pcre2_converted_pattern_free_16((PCRE2_UCHAR16 *)a); \
1143 else pcre2_converted_pattern_free_32((PCRE2_UCHAR32 *)a)
1144
1145 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
1146 if (test_mode == PCRE8_MODE) \
1147 a = pcre2_dfa_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h,i,j); \
1148 else if (test_mode == PCRE16_MODE) \
1149 a = pcre2_dfa_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h,i,j); \
1150 else \
1151 a = pcre2_dfa_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h,i,j)
1152
1153 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
1154 if (test_mode == PCRE8_MODE) \
1155 r = pcre2_get_error_message_8(a,G(b,8),G(G(b,8),_size)); \
1156 else if (test_mode == PCRE16_MODE) \
1157 r = pcre2_get_error_message_16(a,G(b,16),G(G(b,16),_size/2)); \
1158 else \
1159 r = pcre2_get_error_message_32(a,G(b,32),G(G(b,32),_size/4))
1160
1161 #define PCRE2_GET_OVECTOR_COUNT(a,b) \
1162 if (test_mode == PCRE8_MODE) \
1163 a = pcre2_get_ovector_count_8(G(b,8)); \
1164 else if (test_mode == PCRE16_MODE) \
1165 a = pcre2_get_ovector_count_16(G(b,16)); \
1166 else \
1167 a = pcre2_get_ovector_count_32(G(b,32))
1168
1169 #define PCRE2_GET_STARTCHAR(a,b) \
1170 if (test_mode == PCRE8_MODE) \
1171 a = pcre2_get_startchar_8(G(b,8)); \
1172 else if (test_mode == PCRE16_MODE) \
1173 a = pcre2_get_startchar_16(G(b,16)); \
1174 else \
1175 a = pcre2_get_startchar_32(G(b,32))
1176
1177 #define PCRE2_JIT_COMPILE(r,a,b) \
1178 if (test_mode == PCRE8_MODE) r = pcre2_jit_compile_8(G(a,8),b); \
1179 else if (test_mode == PCRE16_MODE) r = pcre2_jit_compile_16(G(a,16),b); \
1180 else r = pcre2_jit_compile_32(G(a,32),b)
1181
1182 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) \
1183 if (test_mode == PCRE8_MODE) pcre2_jit_free_unused_memory_8(G(a,8)); \
1184 else if (test_mode == PCRE16_MODE) pcre2_jit_free_unused_memory_16(G(a,16)); \
1185 else pcre2_jit_free_unused_memory_32(G(a,32))
1186
1187 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
1188 if (test_mode == PCRE8_MODE) \
1189 a = pcre2_jit_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h); \
1190 else if (test_mode == PCRE16_MODE) \
1191 a = pcre2_jit_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h); \
1192 else \
1193 a = pcre2_jit_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
1194
1195 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
1196 if (test_mode == PCRE8_MODE) \
1197 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_8(b,c,d); \
1198 else if (test_mode == PCRE16_MODE) \
1199 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_16(b,c,d); \
1200 else \
1201 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_32(b,c,d);
1202
1203 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
1204 if (test_mode == PCRE8_MODE) \
1205 pcre2_jit_stack_assign_8(G(a,8),(pcre2_jit_callback_8)b,c); \
1206 else if (test_mode == PCRE16_MODE) \
1207 pcre2_jit_stack_assign_16(G(a,16),(pcre2_jit_callback_16)b,c); \
1208 else \
1209 pcre2_jit_stack_assign_32(G(a,32),(pcre2_jit_callback_32)b,c);
1210
1211 #define PCRE2_JIT_STACK_FREE(a) \
1212 if (test_mode == PCRE8_MODE) \
1213 pcre2_jit_stack_free_8((pcre2_jit_stack_8 *)a); \
1214 else if (test_mode == PCRE16_MODE) \
1215 pcre2_jit_stack_free_16((pcre2_jit_stack_16 *)a); \
1216 else \
1217 pcre2_jit_stack_free_32((pcre2_jit_stack_32 *)a);
1218
1219 #define PCRE2_MAKETABLES(a) \
1220 if (test_mode == PCRE8_MODE) a = pcre2_maketables_8(NULL); \
1221 else if (test_mode == PCRE16_MODE) a = pcre2_maketables_16(NULL); \
1222 else a = pcre2_maketables_32(NULL)
1223
1224 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
1225 if (test_mode == PCRE8_MODE) \
1226 a = pcre2_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h); \
1227 else if (test_mode == PCRE16_MODE) \
1228 a = pcre2_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h); \
1229 else \
1230 a = pcre2_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
1231
1232 #define PCRE2_MATCH_DATA_CREATE(a,b,c) \
1233 if (test_mode == PCRE8_MODE) \
1234 G(a,8) = pcre2_match_data_create_8(b,c); \
1235 else if (test_mode == PCRE16_MODE) \
1236 G(a,16) = pcre2_match_data_create_16(b,c); \
1237 else \
1238 G(a,32) = pcre2_match_data_create_32(b,c)
1239
1240 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
1241 if (test_mode == PCRE8_MODE) \
1242 G(a,8) = pcre2_match_data_create_from_pattern_8(G(b,8),c); \
1243 else if (test_mode == PCRE16_MODE) \
1244 G(a,16) = pcre2_match_data_create_from_pattern_16(G(b,16),c); \
1245 else \
1246 G(a,32) = pcre2_match_data_create_from_pattern_32(G(b,32),c)
1247
1248 #define PCRE2_MATCH_DATA_FREE(a) \
1249 if (test_mode == PCRE8_MODE) \
1250 pcre2_match_data_free_8(G(a,8)); \
1251 else if (test_mode == PCRE16_MODE) \
1252 pcre2_match_data_free_16(G(a,16)); \
1253 else \
1254 pcre2_match_data_free_32(G(a,32))
1255
1256 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) \
1257 if (test_mode == PCRE8_MODE) \
1258 a = pcre2_pattern_convert_8(G(b,8),c,d,(PCRE2_UCHAR8 **)e,f,G(g,8)); \
1259 else if (test_mode == PCRE16_MODE) \
1260 a = pcre2_pattern_convert_16(G(b,16),c,d,(PCRE2_UCHAR16 **)e,f,G(g,16)); \
1261 else \
1262 a = pcre2_pattern_convert_32(G(b,32),c,d,(PCRE2_UCHAR32 **)e,f,G(g,32))
1263
1264 #define PCRE2_PATTERN_INFO(a,b,c,d) \
1265 if (test_mode == PCRE8_MODE) \
1266 a = pcre2_pattern_info_8(G(b,8),c,d); \
1267 else if (test_mode == PCRE16_MODE) \
1268 a = pcre2_pattern_info_16(G(b,16),c,d); \
1269 else \
1270 a = pcre2_pattern_info_32(G(b,32),c,d)
1271
1272 #define PCRE2_PRINTINT(a) \
1273 if (test_mode == PCRE8_MODE) \
1274 pcre2_printint_8(compiled_code8,outfile,a); \
1275 else if (test_mode == PCRE16_MODE) \
1276 pcre2_printint_16(compiled_code16,outfile,a); \
1277 else \
1278 pcre2_printint_32(compiled_code32,outfile,a)
1279
1280 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
1281 if (test_mode == PCRE8_MODE) \
1282 r = pcre2_serialize_decode_8((pcre2_code_8 **)a,b,c,G(d,8)); \
1283 else if (test_mode == PCRE16_MODE) \
1284 r = pcre2_serialize_decode_16((pcre2_code_16 **)a,b,c,G(d,16)); \
1285 else \
1286 r = pcre2_serialize_decode_32((pcre2_code_32 **)a,b,c,G(d,32))
1287
1288 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
1289 if (test_mode == PCRE8_MODE) \
1290 r = pcre2_serialize_encode_8((const pcre2_code_8 **)a,b,c,d,G(e,8)); \
1291 else if (test_mode == PCRE16_MODE) \
1292 r = pcre2_serialize_encode_16((const pcre2_code_16 **)a,b,c,d,G(e,16)); \
1293 else \
1294 r = pcre2_serialize_encode_32((const pcre2_code_32 **)a,b,c,d,G(e,32))
1295
1296 #define PCRE2_SERIALIZE_FREE(a) \
1297 if (test_mode == PCRE8_MODE) \
1298 pcre2_serialize_free_8(a); \
1299 else if (test_mode == PCRE16_MODE) \
1300 pcre2_serialize_free_16(a); \
1301 else \
1302 pcre2_serialize_free_32(a)
1303
1304 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
1305 if (test_mode == PCRE8_MODE) \
1306 r = pcre2_serialize_get_number_of_codes_8(a); \
1307 else if (test_mode == PCRE16_MODE) \
1308 r = pcre2_serialize_get_number_of_codes_16(a); \
1309 else \
1310 r = pcre2_serialize_get_number_of_codes_32(a); \
1311
1312 #define PCRE2_SET_CALLOUT(a,b,c) \
1313 if (test_mode == PCRE8_MODE) \
1314 pcre2_set_callout_8(G(a,8),(int (*)(pcre2_callout_block_8 *, void *))b,c); \
1315 else if (test_mode == PCRE16_MODE) \
1316 pcre2_set_callout_16(G(a,16),(int (*)(pcre2_callout_block_16 *, void *))b,c); \
1317 else \
1318 pcre2_set_callout_32(G(a,32),(int (*)(pcre2_callout_block_32 *, void *))b,c);
1319
1320 #define PCRE2_SET_CHARACTER_TABLES(a,b) \
1321 if (test_mode == PCRE8_MODE) \
1322 pcre2_set_character_tables_8(G(a,8),b); \
1323 else if (test_mode == PCRE16_MODE) \
1324 pcre2_set_character_tables_16(G(a,16),b); \
1325 else \
1326 pcre2_set_character_tables_32(G(a,32),b)
1327
1328 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
1329 if (test_mode == PCRE8_MODE) \
1330 pcre2_set_compile_recursion_guard_8(G(a,8),b,c); \
1331 else if (test_mode == PCRE16_MODE) \
1332 pcre2_set_compile_recursion_guard_16(G(a,16),b,c); \
1333 else \
1334 pcre2_set_compile_recursion_guard_32(G(a,32),b,c)
1335
1336 #define PCRE2_SET_DEPTH_LIMIT(a,b) \
1337 if (test_mode == PCRE8_MODE) \
1338 pcre2_set_depth_limit_8(G(a,8),b); \
1339 else if (test_mode == PCRE16_MODE) \
1340 pcre2_set_depth_limit_16(G(a,16),b); \
1341 else \
1342 pcre2_set_depth_limit_32(G(a,32),b)
1343
1344 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) \
1345 if (test_mode == PCRE8_MODE) \
1346 r = pcre2_set_glob_separator_8(G(a,8),b); \
1347 else if (test_mode == PCRE16_MODE) \
1348 r = pcre2_set_glob_separator_16(G(a,16),b); \
1349 else \
1350 r = pcre2_set_glob_separator_32(G(a,32),b)
1351
1352 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) \
1353 if (test_mode == PCRE8_MODE) \
1354 r = pcre2_set_glob_escape_8(G(a,8),b); \
1355 else if (test_mode == PCRE16_MODE) \
1356 r = pcre2_set_glob_escape_16(G(a,16),b); \
1357 else \
1358 r = pcre2_set_glob_escape_32(G(a,32),b)
1359
1360 #define PCRE2_SET_HEAP_LIMIT(a,b) \
1361 if (test_mode == PCRE8_MODE) \
1362 pcre2_set_heap_limit_8(G(a,8),b); \
1363 else if (test_mode == PCRE16_MODE) \
1364 pcre2_set_heap_limit_16(G(a,16),b); \
1365 else \
1366 pcre2_set_heap_limit_32(G(a,32),b)
1367
1368 #define PCRE2_SET_MATCH_LIMIT(a,b) \
1369 if (test_mode == PCRE8_MODE) \
1370 pcre2_set_match_limit_8(G(a,8),b); \
1371 else if (test_mode == PCRE16_MODE) \
1372 pcre2_set_match_limit_16(G(a,16),b); \
1373 else \
1374 pcre2_set_match_limit_32(G(a,32),b)
1375
1376 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) \
1377 if (test_mode == PCRE8_MODE) \
1378 pcre2_set_max_pattern_length_8(G(a,8),b); \
1379 else if (test_mode == PCRE16_MODE) \
1380 pcre2_set_max_pattern_length_16(G(a,16),b); \
1381 else \
1382 pcre2_set_max_pattern_length_32(G(a,32),b)
1383
1384 #define PCRE2_SET_OFFSET_LIMIT(a,b) \
1385 if (test_mode == PCRE8_MODE) \
1386 pcre2_set_offset_limit_8(G(a,8),b); \
1387 else if (test_mode == PCRE16_MODE) \
1388 pcre2_set_offset_limit_16(G(a,16),b); \
1389 else \
1390 pcre2_set_offset_limit_32(G(a,32),b)
1391
1392 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) \
1393 if (test_mode == PCRE8_MODE) \
1394 pcre2_set_parens_nest_limit_8(G(a,8),b); \
1395 else if (test_mode == PCRE16_MODE) \
1396 pcre2_set_parens_nest_limit_16(G(a,16),b); \
1397 else \
1398 pcre2_set_parens_nest_limit_32(G(a,32),b)
1399
1400 #define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
1401 if (test_mode == PCRE8_MODE) \
1402 pcre2_set_substitute_callout_8(G(a,8), \
1403 (int (*)(pcre2_substitute_callout_block_8 *, void *))b,c); \
1404 else if (test_mode == PCRE16_MODE) \
1405 pcre2_set_substitute_callout_16(G(a,16), \
1406 (int (*)(pcre2_substitute_callout_block_16 *, void *))b,c); \
1407 else \
1408 pcre2_set_substitute_callout_32(G(a,32), \
1409 (int (*)(pcre2_substitute_callout_block_32 *, void *))b,c)
1410
1411 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
1412 if (test_mode == PCRE8_MODE) \
1413 a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h, \
1414 (PCRE2_SPTR8)i,j,(PCRE2_UCHAR8 *)k,l); \
1415 else if (test_mode == PCRE16_MODE) \
1416 a = pcre2_substitute_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h, \
1417 (PCRE2_SPTR16)i,j,(PCRE2_UCHAR16 *)k,l); \
1418 else \
1419 a = pcre2_substitute_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h, \
1420 (PCRE2_SPTR32)i,j,(PCRE2_UCHAR32 *)k,l)
1421
1422 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
1423 if (test_mode == PCRE8_MODE) \
1424 a = pcre2_substring_copy_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 *)d,e); \
1425 else if (test_mode == PCRE16_MODE) \
1426 a = pcre2_substring_copy_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 *)d,e); \
1427 else \
1428 a = pcre2_substring_copy_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 *)d,e)
1429
1430 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
1431 if (test_mode == PCRE8_MODE) \
1432 a = pcre2_substring_copy_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 *)d,e); \
1433 else if (test_mode == PCRE16_MODE) \
1434 a = pcre2_substring_copy_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 *)d,e); \
1435 else \
1436 a = pcre2_substring_copy_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 *)d,e)
1437
1438 #define PCRE2_SUBSTRING_FREE(a) \
1439 if (test_mode == PCRE8_MODE) pcre2_substring_free_8((PCRE2_UCHAR8 *)a); \
1440 else if (test_mode == PCRE16_MODE) \
1441 pcre2_substring_free_16((PCRE2_UCHAR16 *)a); \
1442 else pcre2_substring_free_32((PCRE2_UCHAR32 *)a)
1443
1444 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
1445 if (test_mode == PCRE8_MODE) \
1446 a = pcre2_substring_get_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 **)d,e); \
1447 else if (test_mode == PCRE16_MODE) \
1448 a = pcre2_substring_get_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 **)d,e); \
1449 else \
1450 a = pcre2_substring_get_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 **)d,e)
1451
1452 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
1453 if (test_mode == PCRE8_MODE) \
1454 a = pcre2_substring_get_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 **)d,e); \
1455 else if (test_mode == PCRE16_MODE) \
1456 a = pcre2_substring_get_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 **)d,e); \
1457 else \
1458 a = pcre2_substring_get_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 **)d,e)
1459
1460 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
1461 if (test_mode == PCRE8_MODE) \
1462 a = pcre2_substring_length_byname_8(G(b,8),G(c,8),d); \
1463 else if (test_mode == PCRE16_MODE) \
1464 a = pcre2_substring_length_byname_16(G(b,16),G(c,16),d); \
1465 else \
1466 a = pcre2_substring_length_byname_32(G(b,32),G(c,32),d)
1467
1468 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
1469 if (test_mode == PCRE8_MODE) \
1470 a = pcre2_substring_length_bynumber_8(G(b,8),c,d); \
1471 else if (test_mode == PCRE16_MODE) \
1472 a = pcre2_substring_length_bynumber_16(G(b,16),c,d); \
1473 else \
1474 a = pcre2_substring_length_bynumber_32(G(b,32),c,d)
1475
1476 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
1477 if (test_mode == PCRE8_MODE) \
1478 a = pcre2_substring_list_get_8(G(b,8),(PCRE2_UCHAR8 ***)c,d); \
1479 else if (test_mode == PCRE16_MODE) \
1480 a = pcre2_substring_list_get_16(G(b,16),(PCRE2_UCHAR16 ***)c,d); \
1481 else \
1482 a = pcre2_substring_list_get_32(G(b,32),(PCRE2_UCHAR32 ***)c,d)
1483
1484 #define PCRE2_SUBSTRING_LIST_FREE(a) \
1485 if (test_mode == PCRE8_MODE) \
1486 pcre2_substring_list_free_8((PCRE2_SPTR8 *)a); \
1487 else if (test_mode == PCRE16_MODE) \
1488 pcre2_substring_list_free_16((PCRE2_SPTR16 *)a); \
1489 else \
1490 pcre2_substring_list_free_32((PCRE2_SPTR32 *)a)
1491
1492 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
1493 if (test_mode == PCRE8_MODE) \
1494 a = pcre2_substring_number_from_name_8(G(b,8),G(c,8)); \
1495 else if (test_mode == PCRE16_MODE) \
1496 a = pcre2_substring_number_from_name_16(G(b,16),G(c,16)); \
1497 else \
1498 a = pcre2_substring_number_from_name_32(G(b,32),G(c,32))
1499
1500 #define PTR(x) ( \
1501 (test_mode == PCRE8_MODE)? (void *)G(x,8) : \
1502 (test_mode == PCRE16_MODE)? (void *)G(x,16) : \
1503 (void *)G(x,32))
1504
1505 #define SETFLD(x,y,z) \
1506 if (test_mode == PCRE8_MODE) G(x,8)->y = z; \
1507 else if (test_mode == PCRE16_MODE) G(x,16)->y = z; \
1508 else G(x,32)->y = z
1509
1510 #define SETFLDVEC(x,y,v,z) \
1511 if (test_mode == PCRE8_MODE) G(x,8)->y[v] = z; \
1512 else if (test_mode == PCRE16_MODE) G(x,16)->y[v] = z; \
1513 else G(x,32)->y[v] = z
1514
1515 #define SETOP(x,y,z) \
1516 if (test_mode == PCRE8_MODE) G(x,8) z y; \
1517 else if (test_mode == PCRE16_MODE) G(x,16) z y; \
1518 else G(x,32) z y
1519
1520 #define SETCASTPTR(x,y) \
1521 if (test_mode == PCRE8_MODE) \
1522 G(x,8) = (uint8_t *)(y); \
1523 else if (test_mode == PCRE16_MODE) \
1524 G(x,16) = (uint16_t *)(y); \
1525 else \
1526 G(x,32) = (uint32_t *)(y)
1527
1528 #define STRLEN(p) ((test_mode == PCRE8_MODE)? ((int)strlen((char *)p)) : \
1529 (test_mode == PCRE16_MODE)? ((int)strlen16((PCRE2_SPTR16)p)) : \
1530 ((int)strlen32((PCRE2_SPTR32)p)))
1531
1532 #define SUB1(a,b) \
1533 if (test_mode == PCRE8_MODE) G(a,8)(G(b,8)); \
1534 else if (test_mode == PCRE16_MODE) G(a,16)(G(b,16)); \
1535 else G(a,32)(G(b,32))
1536
1537 #define SUB2(a,b,c) \
1538 if (test_mode == PCRE8_MODE) G(a,8)(G(b,8),G(c,8)); \
1539 else if (test_mode == PCRE16_MODE) G(a,16)(G(b,16),G(c,16)); \
1540 else G(a,32)(G(b,32),G(c,32))
1541
1542 #define TEST(x,r,y) ( \
1543 (test_mode == PCRE8_MODE && G(x,8) r (y)) || \
1544 (test_mode == PCRE16_MODE && G(x,16) r (y)) || \
1545 (test_mode == PCRE32_MODE && G(x,32) r (y)))
1546
1547 #define TESTFLD(x,f,r,y) ( \
1548 (test_mode == PCRE8_MODE && G(x,8)->f r (y)) || \
1549 (test_mode == PCRE16_MODE && G(x,16)->f r (y)) || \
1550 (test_mode == PCRE32_MODE && G(x,32)->f r (y)))
1551
1552
1553 /* ----- Two out of three modes are supported ----- */
1554
1555 #else
1556
1557 /* We can use some macro trickery to make a single set of definitions work in
1558 the three different cases. */
1559
1560 /* ----- 32-bit and 16-bit but not 8-bit supported ----- */
1561
1562 #if defined(SUPPORT_PCRE2_32) && defined(SUPPORT_PCRE2_16)
1563 #define BITONE 32
1564 #define BITTWO 16
1565
1566 /* ----- 32-bit and 8-bit but not 16-bit supported ----- */
1567
1568 #elif defined(SUPPORT_PCRE2_32) && defined(SUPPORT_PCRE2_8)
1569 #define BITONE 32
1570 #define BITTWO 8
1571
1572 /* ----- 16-bit and 8-bit but not 32-bit supported ----- */
1573
1574 #else
1575 #define BITONE 16
1576 #define BITTWO 8
1577 #endif
1578
1579
1580 /* ----- Common macros for two-mode cases ----- */
1581
1582 #define BYTEONE (BITONE/8)
1583 #define BYTETWO (BITTWO/8)
1584
1585 #define CASTFLD(t,a,b) \
1586 ((test_mode == G(G(PCRE,BITONE),_MODE))? (t)(G(a,BITONE)->b) : \
1587 (t)(G(a,BITTWO)->b))
1588
1589 #define CASTVAR(t,x) ( \
1590 (test_mode == G(G(PCRE,BITONE),_MODE))? \
1591 (t)G(x,BITONE) : (t)G(x,BITTWO))
1592
1593 #define CODE_UNIT(a,b) ( \
1594 (test_mode == G(G(PCRE,BITONE),_MODE))? \
1595 (uint32_t)(((G(PCRE2_SPTR,BITONE))(a))[b]) : \
1596 (uint32_t)(((G(PCRE2_SPTR,BITTWO))(a))[b]))
1597
1598 #define CONCTXCPY(a,b) \
1599 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1600 memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_convert_context_,BITONE))); \
1601 else \
1602 memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_convert_context_,BITTWO)))
1603
1604 #define CONVERT_COPY(a,b,c) \
1605 (test_mode == G(G(PCRE,BITONE),_MODE))? \
1606 memcpy(G(a,BITONE),(char *)b,(c)*BYTEONE) : \
1607 memcpy(G(a,BITTWO),(char *)b,(c)*BYTETWO)
1608
1609 #define DATCTXCPY(a,b) \
1610 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1611 memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_match_context_,BITONE))); \
1612 else \
1613 memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_match_context_,BITTWO)))
1614
1615 #define FLD(a,b) \
1616 ((test_mode == G(G(PCRE,BITONE),_MODE))? G(a,BITONE)->b : G(a,BITTWO)->b)
1617
1618 #define PATCTXCPY(a,b) \
1619 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1620 memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_compile_context_,BITONE))); \
1621 else \
1622 memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_compile_context_,BITTWO)))
1623
1624 #define PCHARS(lv, p, offset, len, utf, f) \
1625 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1626 lv = G(pchars,BITONE)((G(PCRE2_SPTR,BITONE))(p)+offset, len, utf, f); \
1627 else \
1628 lv = G(pchars,BITTWO)((G(PCRE2_SPTR,BITTWO))(p)+offset, len, utf, f)
1629
1630 #define PCHARSV(p, offset, len, utf, f) \
1631 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1632 (void)G(pchars,BITONE)((G(PCRE2_SPTR,BITONE))(p)+offset, len, utf, f); \
1633 else \
1634 (void)G(pchars,BITTWO)((G(PCRE2_SPTR,BITTWO))(p)+offset, len, utf, f)
1635
1636 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
1637 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1638 a = G(pcre2_callout_enumerate,BITONE)(G(compiled_code,BITONE), \
1639 (int (*)(struct G(pcre2_callout_enumerate_block_,BITONE) *, void *))b,c); \
1640 else \
1641 a = G(pcre2_callout_enumerate,BITTWO)(G(compiled_code,BITTWO), \
1642 (int (*)(struct G(pcre2_callout_enumerate_block_,BITTWO) *, void *))b,c)
1643
1644 #define PCRE2_CODE_COPY_FROM_VOID(a,b) \
1645 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1646 G(a,BITONE) = G(pcre2_code_copy_,BITONE)(b); \
1647 else \
1648 G(a,BITTWO) = G(pcre2_code_copy_,BITTWO)(b)
1649
1650 #define PCRE2_CODE_COPY_TO_VOID(a,b) \
1651 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1652 a = (void *)G(pcre2_code_copy_,BITONE)(G(b,BITONE)); \
1653 else \
1654 a = (void *)G(pcre2_code_copy_,BITTWO)(G(b,BITTWO))
1655
1656 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) \
1657 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1658 a = (void *)G(pcre2_code_copy_with_tables_,BITONE)(G(b,BITONE)); \
1659 else \
1660 a = (void *)G(pcre2_code_copy_with_tables_,BITTWO)(G(b,BITTWO))
1661
1662 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
1663 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1664 G(a,BITONE) = G(pcre2_compile_,BITONE)(G(b,BITONE),c,d,e,f,g); \
1665 else \
1666 G(a,BITTWO) = G(pcre2_compile_,BITTWO)(G(b,BITTWO),c,d,e,f,g)
1667
1668 #define PCRE2_CONVERTED_PATTERN_FREE(a) \
1669 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1670 G(pcre2_converted_pattern_free_,BITONE)((G(PCRE2_UCHAR,BITONE) *)a); \
1671 else \
1672 G(pcre2_converted_pattern_free_,BITTWO)((G(PCRE2_UCHAR,BITTWO) *)a)
1673
1674 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
1675 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1676 a = G(pcre2_dfa_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1677 G(g,BITONE),h,i,j); \
1678 else \
1679 a = G(pcre2_dfa_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1680 G(g,BITTWO),h,i,j)
1681
1682 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
1683 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1684 r = G(pcre2_get_error_message_,BITONE)(a,G(b,BITONE),G(G(b,BITONE),_size/BYTEONE)); \
1685 else \
1686 r = G(pcre2_get_error_message_,BITTWO)(a,G(b,BITTWO),G(G(b,BITTWO),_size/BYTETWO))
1687
1688 #define PCRE2_GET_OVECTOR_COUNT(a,b) \
1689 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1690 a = G(pcre2_get_ovector_count_,BITONE)(G(b,BITONE)); \
1691 else \
1692 a = G(pcre2_get_ovector_count_,BITTWO)(G(b,BITTWO))
1693
1694 #define PCRE2_GET_STARTCHAR(a,b) \
1695 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1696 a = G(pcre2_get_startchar_,BITONE)(G(b,BITONE)); \
1697 else \
1698 a = G(pcre2_get_startchar_,BITTWO)(G(b,BITTWO))
1699
1700 #define PCRE2_JIT_COMPILE(r,a,b) \
1701 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1702 r = G(pcre2_jit_compile_,BITONE)(G(a,BITONE),b); \
1703 else \
1704 r = G(pcre2_jit_compile_,BITTWO)(G(a,BITTWO),b)
1705
1706 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) \
1707 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1708 G(pcre2_jit_free_unused_memory_,BITONE)(G(a,BITONE)); \
1709 else \
1710 G(pcre2_jit_free_unused_memory_,BITTWO)(G(a,BITTWO))
1711
1712 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
1713 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1714 a = G(pcre2_jit_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1715 G(g,BITONE),h); \
1716 else \
1717 a = G(pcre2_jit_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1718 G(g,BITTWO),h)
1719
1720 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
1721 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1722 a = (PCRE2_JIT_STACK *)G(pcre2_jit_stack_create_,BITONE)(b,c,d); \
1723 else \
1724 a = (PCRE2_JIT_STACK *)G(pcre2_jit_stack_create_,BITTWO)(b,c,d); \
1725
1726 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
1727 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1728 G(pcre2_jit_stack_assign_,BITONE)(G(a,BITONE),(G(pcre2_jit_callback_,BITONE))b,c); \
1729 else \
1730 G(pcre2_jit_stack_assign_,BITTWO)(G(a,BITTWO),(G(pcre2_jit_callback_,BITTWO))b,c);
1731
1732 #define PCRE2_JIT_STACK_FREE(a) \
1733 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1734 G(pcre2_jit_stack_free_,BITONE)((G(pcre2_jit_stack_,BITONE) *)a); \
1735 else \
1736 G(pcre2_jit_stack_free_,BITTWO)((G(pcre2_jit_stack_,BITTWO) *)a);
1737
1738 #define PCRE2_MAKETABLES(a) \
1739 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1740 a = G(pcre2_maketables_,BITONE)(NULL); \
1741 else \
1742 a = G(pcre2_maketables_,BITTWO)(NULL)
1743
1744 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
1745 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1746 a = G(pcre2_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1747 G(g,BITONE),h); \
1748 else \
1749 a = G(pcre2_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1750 G(g,BITTWO),h)
1751
1752 #define PCRE2_MATCH_DATA_CREATE(a,b,c) \
1753 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1754 G(a,BITONE) = G(pcre2_match_data_create_,BITONE)(b,c); \
1755 else \
1756 G(a,BITTWO) = G(pcre2_match_data_create_,BITTWO)(b,c)
1757
1758 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
1759 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1760 G(a,BITONE) = G(pcre2_match_data_create_from_pattern_,BITONE)(G(b,BITONE),c); \
1761 else \
1762 G(a,BITTWO) = G(pcre2_match_data_create_from_pattern_,BITTWO)(G(b,BITTWO),c)
1763
1764 #define PCRE2_MATCH_DATA_FREE(a) \
1765 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1766 G(pcre2_match_data_free_,BITONE)(G(a,BITONE)); \
1767 else \
1768 G(pcre2_match_data_free_,BITTWO)(G(a,BITTWO))
1769
1770 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) \
1771 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1772 a = G(pcre2_pattern_convert_,BITONE)(G(b,BITONE),c,d,(G(PCRE2_UCHAR,BITONE) **)e,f,G(g,BITONE)); \
1773 else \
1774 a = G(pcre2_pattern_convert_,BITTWO)(G(b,BITTWO),c,d,(G(PCRE2_UCHAR,BITTWO) **)e,f,G(g,BITTWO))
1775
1776 #define PCRE2_PATTERN_INFO(a,b,c,d) \
1777 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1778 a = G(pcre2_pattern_info_,BITONE)(G(b,BITONE),c,d); \
1779 else \
1780 a = G(pcre2_pattern_info_,BITTWO)(G(b,BITTWO),c,d)
1781
1782 #define PCRE2_PRINTINT(a) \
1783 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1784 G(pcre2_printint_,BITONE)(G(compiled_code,BITONE),outfile,a); \
1785 else \
1786 G(pcre2_printint_,BITTWO)(G(compiled_code,BITTWO),outfile,a)
1787
1788 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
1789 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1790 r = G(pcre2_serialize_decode_,BITONE)((G(pcre2_code_,BITONE) **)a,b,c,G(d,BITONE)); \
1791 else \
1792 r = G(pcre2_serialize_decode_,BITTWO)((G(pcre2_code_,BITTWO) **)a,b,c,G(d,BITTWO))
1793
1794 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
1795 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1796 r = G(pcre2_serialize_encode_,BITONE)((G(const pcre2_code_,BITONE) **)a,b,c,d,G(e,BITONE)); \
1797 else \
1798 r = G(pcre2_serialize_encode_,BITTWO)((G(const pcre2_code_,BITTWO) **)a,b,c,d,G(e,BITTWO))
1799
1800 #define PCRE2_SERIALIZE_FREE(a) \
1801 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1802 G(pcre2_serialize_free_,BITONE)(a); \
1803 else \
1804 G(pcre2_serialize_free_,BITTWO)(a)
1805
1806 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
1807 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1808 r = G(pcre2_serialize_get_number_of_codes_,BITONE)(a); \
1809 else \
1810 r = G(pcre2_serialize_get_number_of_codes_,BITTWO)(a)
1811
1812 #define PCRE2_SET_CALLOUT(a,b,c) \
1813 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1814 G(pcre2_set_callout_,BITONE)(G(a,BITONE), \
1815 (int (*)(G(pcre2_callout_block_,BITONE) *, void *))b,c); \
1816 else \
1817 G(pcre2_set_callout_,BITTWO)(G(a,BITTWO), \
1818 (int (*)(G(pcre2_callout_block_,BITTWO) *, void *))b,c);
1819
1820 #define PCRE2_SET_CHARACTER_TABLES(a,b) \
1821 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1822 G(pcre2_set_character_tables_,BITONE)(G(a,BITONE),b); \
1823 else \
1824 G(pcre2_set_character_tables_,BITTWO)(G(a,BITTWO),b)
1825
1826 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
1827 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1828 G(pcre2_set_compile_recursion_guard_,BITONE)(G(a,BITONE),b,c); \
1829 else \
1830 G(pcre2_set_compile_recursion_guard_,BITTWO)(G(a,BITTWO),b,c)
1831
1832 #define PCRE2_SET_DEPTH_LIMIT(a,b) \
1833 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1834 G(pcre2_set_depth_limit_,BITONE)(G(a,BITONE),b); \
1835 else \
1836 G(pcre2_set_depth_limit_,BITTWO)(G(a,BITTWO),b)
1837
1838 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) \
1839 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1840 r = G(pcre2_set_glob_escape_,BITONE)(G(a,BITONE),b); \
1841 else \
1842 r = G(pcre2_set_glob_escape_,BITTWO)(G(a,BITTWO),b)
1843
1844 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) \
1845 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1846 r = G(pcre2_set_glob_separator_,BITONE)(G(a,BITONE),b); \
1847 else \
1848 r = G(pcre2_set_glob_separator_,BITTWO)(G(a,BITTWO),b)
1849
1850 #define PCRE2_SET_HEAP_LIMIT(a,b) \
1851 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1852 G(pcre2_set_heap_limit_,BITONE)(G(a,BITONE),b); \
1853 else \
1854 G(pcre2_set_heap_limit_,BITTWO)(G(a,BITTWO),b)
1855
1856 #define PCRE2_SET_MATCH_LIMIT(a,b) \
1857 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1858 G(pcre2_set_match_limit_,BITONE)(G(a,BITONE),b); \
1859 else \
1860 G(pcre2_set_match_limit_,BITTWO)(G(a,BITTWO),b)
1861
1862 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) \
1863 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1864 G(pcre2_set_max_pattern_length_,BITONE)(G(a,BITONE),b); \
1865 else \
1866 G(pcre2_set_max_pattern_length_,BITTWO)(G(a,BITTWO),b)
1867
1868 #define PCRE2_SET_OFFSET_LIMIT(a,b) \
1869 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1870 G(pcre2_set_offset_limit_,BITONE)(G(a,BITONE),b); \
1871 else \
1872 G(pcre2_set_offset_limit_,BITTWO)(G(a,BITTWO),b)
1873
1874 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) \
1875 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1876 G(pcre2_set_parens_nest_limit_,BITONE)(G(a,BITONE),b); \
1877 else \
1878 G(pcre2_set_parens_nest_limit_,BITTWO)(G(a,BITTWO),b)
1879
1880 #define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
1881 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1882 G(pcre2_set_substitute_callout_,BITONE)(G(a,BITONE), \
1883 (int (*)(G(pcre2_substitute_callout_block_,BITONE) *, void *))b,c); \
1884 else \
1885 G(pcre2_set_substitute_callout_,BITTWO)(G(a,BITTWO), \
1886 (int (*)(G(pcre2_substitute_callout_block_,BITTWO) *, void *))b,c)
1887
1888 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
1889 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1890 a = G(pcre2_substitute_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1891 G(g,BITONE),h,(G(PCRE2_SPTR,BITONE))i,j, \
1892 (G(PCRE2_UCHAR,BITONE) *)k,l); \
1893 else \
1894 a = G(pcre2_substitute_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1895 G(g,BITTWO),h,(G(PCRE2_SPTR,BITTWO))i,j, \
1896 (G(PCRE2_UCHAR,BITTWO) *)k,l)
1897
1898 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
1899 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1900 a = G(pcre2_substring_copy_byname_,BITONE)(G(b,BITONE),G(c,BITONE),\
1901 (G(PCRE2_UCHAR,BITONE) *)d,e); \
1902 else \
1903 a = G(pcre2_substring_copy_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),\
1904 (G(PCRE2_UCHAR,BITTWO) *)d,e)
1905
1906 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
1907 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1908 a = G(pcre2_substring_copy_bynumber_,BITONE)(G(b,BITONE),c,\
1909 (G(PCRE2_UCHAR,BITONE) *)d,e); \
1910 else \
1911 a = G(pcre2_substring_copy_bynumber_,BITTWO)(G(b,BITTWO),c,\
1912 (G(PCRE2_UCHAR,BITTWO) *)d,e)
1913
1914 #define PCRE2_SUBSTRING_FREE(a) \
1915 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1916 G(pcre2_substring_free_,BITONE)((G(PCRE2_UCHAR,BITONE) *)a); \
1917 else G(pcre2_substring_free_,BITTWO)((G(PCRE2_UCHAR,BITTWO) *)a)
1918
1919 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
1920 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1921 a = G(pcre2_substring_get_byname_,BITONE)(G(b,BITONE),G(c,BITONE),\
1922 (G(PCRE2_UCHAR,BITONE) **)d,e); \
1923 else \
1924 a = G(pcre2_substring_get_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),\
1925 (G(PCRE2_UCHAR,BITTWO) **)d,e)
1926
1927 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
1928 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1929 a = G(pcre2_substring_get_bynumber_,BITONE)(G(b,BITONE),c,\
1930 (G(PCRE2_UCHAR,BITONE) **)d,e); \
1931 else \
1932 a = G(pcre2_substring_get_bynumber_,BITTWO)(G(b,BITTWO),c,\
1933 (G(PCRE2_UCHAR,BITTWO) **)d,e)
1934
1935 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
1936 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1937 a = G(pcre2_substring_length_byname_,BITONE)(G(b,BITONE),G(c,BITONE),d); \
1938 else \
1939 a = G(pcre2_substring_length_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),d)
1940
1941 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
1942 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1943 a = G(pcre2_substring_length_bynumber_,BITONE)(G(b,BITONE),c,d); \
1944 else \
1945 a = G(pcre2_substring_length_bynumber_,BITTWO)(G(b,BITTWO),c,d)
1946
1947 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
1948 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1949 a = G(pcre2_substring_list_get_,BITONE)(G(b,BITONE), \
1950 (G(PCRE2_UCHAR,BITONE) ***)c,d); \
1951 else \
1952 a = G(pcre2_substring_list_get_,BITTWO)(G(b,BITTWO), \
1953 (G(PCRE2_UCHAR,BITTWO) ***)c,d)
1954
1955 #define PCRE2_SUBSTRING_LIST_FREE(a) \
1956 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1957 G(pcre2_substring_list_free_,BITONE)((G(PCRE2_SPTR,BITONE) *)a); \
1958 else \
1959 G(pcre2_substring_list_free_,BITTWO)((G(PCRE2_SPTR,BITTWO) *)a)
1960
1961 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
1962 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1963 a = G(pcre2_substring_number_from_name_,BITONE)(G(b,BITONE),G(c,BITONE)); \
1964 else \
1965 a = G(pcre2_substring_number_from_name_,BITTWO)(G(b,BITTWO),G(c,BITTWO))
1966
1967 #define PTR(x) ( \
1968 (test_mode == G(G(PCRE,BITONE),_MODE))? (void *)G(x,BITONE) : \
1969 (void *)G(x,BITTWO))
1970
1971 #define SETFLD(x,y,z) \
1972 if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE)->y = z; \
1973 else G(x,BITTWO)->y = z
1974
1975 #define SETFLDVEC(x,y,v,z) \
1976 if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE)->y[v] = z; \
1977 else G(x,BITTWO)->y[v] = z
1978
1979 #define SETOP(x,y,z) \
1980 if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE) z y; \
1981 else G(x,BITTWO) z y
1982
1983 #define SETCASTPTR(x,y) \
1984 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1985 G(x,BITONE) = (G(G(uint,BITONE),_t) *)(y); \
1986 else \
1987 G(x,BITTWO) = (G(G(uint,BITTWO),_t) *)(y)
1988
1989 #define STRLEN(p) ((test_mode == G(G(PCRE,BITONE),_MODE))? \
1990 G(strlen,BITONE)((G(PCRE2_SPTR,BITONE))p) : \
1991 G(strlen,BITTWO)((G(PCRE2_SPTR,BITTWO))p))
1992
1993 #define SUB1(a,b) \
1994 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1995 G(a,BITONE)(G(b,BITONE)); \
1996 else \
1997 G(a,BITTWO)(G(b,BITTWO))
1998
1999 #define SUB2(a,b,c) \
2000 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
2001 G(a,BITONE))(G(b,BITONE),G(c,BITONE)); \
2002 else \
2003 G(a,BITTWO))(G(b,BITTWO),G(c,BITTWO))
2004
2005 #define TEST(x,r,y) ( \
2006 (test_mode == G(G(PCRE,BITONE),_MODE) && G(x,BITONE) r (y)) || \
2007 (test_mode == G(G(PCRE,BITTWO),_MODE) && G(x,BITTWO) r (y)))
2008
2009 #define TESTFLD(x,f,r,y) ( \
2010 (test_mode == G(G(PCRE,BITONE),_MODE) && G(x,BITONE)->f r (y)) || \
2011 (test_mode == G(G(PCRE,BITTWO),_MODE) && G(x,BITTWO)->f r (y)))
2012
2013
2014 #endif /* Two out of three modes */
2015
2016 /* ----- End of cases where more than one mode is supported ----- */
2017
2018
2019 /* ----- Only 8-bit mode is supported ----- */
2020
2021 #elif defined SUPPORT_PCRE2_8
2022 #define CASTFLD(t,a,b) (t)(G(a,8)->b)
2023 #define CASTVAR(t,x) (t)G(x,8)
2024 #define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR8)(a))[b])
2025 #define CONCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_convert_context_8))
2026 #define CONVERT_COPY(a,b,c) memcpy(G(a,8),(char *)b, c)
2027 #define DATCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_match_context_8))
2028 #define FLD(a,b) G(a,8)->b
2029 #define PATCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_compile_context_8))
2030 #define PCHARS(lv, p, offset, len, utf, f) \
2031 lv = pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
2032 #define PCHARSV(p, offset, len, utf, f) \
2033 (void)pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
2034 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
2035 a = pcre2_callout_enumerate_8(compiled_code8, \
2036 (int (*)(struct pcre2_callout_enumerate_block_8 *, void *))b,c)
2037 #define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,8) = pcre2_code_copy_8(b)
2038 #define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_8(G(b,8))
2039 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_8(G(b,8))
2040 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
2041 G(a,8) = pcre2_compile_8(G(b,8),c,d,e,f,g)
2042 #define PCRE2_CONVERTED_PATTERN_FREE(a) \
2043 pcre2_converted_pattern_free_8((PCRE2_UCHAR8 *)a)
2044 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
2045 a = pcre2_dfa_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h,i,j)
2046 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
2047 r = pcre2_get_error_message_8(a,G(b,8),G(G(b,8),_size))
2048 #define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_8(G(b,8))
2049 #define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_8(G(b,8))
2050 #define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_8(G(a,8),b)
2051 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_8(G(a,8))
2052 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
2053 a = pcre2_jit_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h)
2054 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
2055 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_8(b,c,d);
2056 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
2057 pcre2_jit_stack_assign_8(G(a,8),(pcre2_jit_callback_8)b,c);
2058 #define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_8((pcre2_jit_stack_8 *)a);
2059 #define PCRE2_MAKETABLES(a) a = pcre2_maketables_8(NULL)
2060 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
2061 a = pcre2_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h)
2062 #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,8) = pcre2_match_data_create_8(b,c)
2063 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
2064 G(a,8) = pcre2_match_data_create_from_pattern_8(G(b,8),c)
2065 #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_8(G(a,8))
2066 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) a = pcre2_pattern_convert_8(G(b,8),c,d,(PCRE2_UCHAR8 **)e,f,G(g,8))
2067 #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_8(G(b,8),c,d)
2068 #define PCRE2_PRINTINT(a) pcre2_printint_8(compiled_code8,outfile,a)
2069 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
2070 r = pcre2_serialize_decode_8((pcre2_code_8 **)a,b,c,G(d,8))
2071 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
2072 r = pcre2_serialize_encode_8((const pcre2_code_8 **)a,b,c,d,G(e,8))
2073 #define PCRE2_SERIALIZE_FREE(a) pcre2_serialize_free_8(a)
2074 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
2075 r = pcre2_serialize_get_number_of_codes_8(a)
2076 #define PCRE2_SET_CALLOUT(a,b,c) \
2077 pcre2_set_callout_8(G(a,8),(int (*)(pcre2_callout_block_8 *, void *))b,c)
2078 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_8(G(a,8),b)
2079 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
2080 pcre2_set_compile_recursion_guard_8(G(a,8),b,c)
2081 #define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_8(G(a,8),b)
2082 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) r = pcre2_set_glob_escape_8(G(a,8),b)
2083 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_8(G(a,8),b)
2084 #define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_8(G(a,8),b)
2085 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_8(G(a,8),b)
2086 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_8(G(a,8),b)
2087 #define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_8(G(a,8),b)
2088 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_8(G(a,8),b)
2089 #define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
2090 pcre2_set_substitute_callout_8(G(a,8), \
2091 (int (*)(pcre2_substitute_callout_block_8 *, void *))b,c)
2092 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
2093 a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h, \
2094 (PCRE2_SPTR8)i,j,(PCRE2_UCHAR8 *)k,l)
2095 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
2096 a = pcre2_substring_copy_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 *)d,e)
2097 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
2098 a = pcre2_substring_copy_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 *)d,e)
2099 #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_8((PCRE2_UCHAR8 *)a)
2100 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
2101 a = pcre2_substring_get_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 **)d,e)
2102 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
2103 a = pcre2_substring_get_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 **)d,e)
2104 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
2105 a = pcre2_substring_length_byname_8(G(b,8),G(c,8),d)
2106 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
2107 a = pcre2_substring_length_bynumber_8(G(b,8),c,d)
2108 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
2109 a = pcre2_substring_list_get_8(G(b,8),(PCRE2_UCHAR8 ***)c,d)
2110 #define PCRE2_SUBSTRING_LIST_FREE(a) \
2111 pcre2_substring_list_free_8((PCRE2_SPTR8 *)a)
2112 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
2113 a = pcre2_substring_number_from_name_8(G(b,8),G(c,8));
2114 #define PTR(x) (void *)G(x,8)
2115 #define SETFLD(x,y,z) G(x,8)->y = z
2116 #define SETFLDVEC(x,y,v,z) G(x,8)->y[v] = z
2117 #define SETOP(x,y,z) G(x,8) z y
2118 #define SETCASTPTR(x,y) G(x,8) = (uint8_t *)(y)
2119 #define STRLEN(p) (int)strlen((char *)p)
2120 #define SUB1(a,b) G(a,8)(G(b,8))
2121 #define SUB2(a,b,c) G(a,8)(G(b,8),G(c,8))
2122 #define TEST(x,r,y) (G(x,8) r (y))
2123 #define TESTFLD(x,f,r,y) (G(x,8)->f r (y))
2124
2125
2126 /* ----- Only 16-bit mode is supported ----- */
2127
2128 #elif defined SUPPORT_PCRE2_16
2129 #define CASTFLD(t,a,b) (t)(G(a,16)->b)
2130 #define CASTVAR(t,x) (t)G(x,16)
2131 #define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR16)(a))[b])
2132 #define CONCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_convert_context_16))
2133 #define CONVERT_COPY(a,b,c) memcpy(G(a,16),(char *)b, (c)*2)
2134 #define DATCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_match_context_16))
2135 #define FLD(a,b) G(a,16)->b
2136 #define PATCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_compile_context_16))
2137 #define PCHARS(lv, p, offset, len, utf, f) \
2138 lv = pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f)
2139 #define PCHARSV(p, offset, len, utf, f) \
2140 (void)pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f)
2141 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
2142 a = pcre2_callout_enumerate_16(compiled_code16, \
2143 (int (*)(struct pcre2_callout_enumerate_block_16 *, void *))b,c)
2144 #define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,16) = pcre2_code_copy_16(b)
2145 #define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_16(G(b,16))
2146 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_16(G(b,16))
2147 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
2148 G(a,16) = pcre2_compile_16(G(b,16),c,d,e,f,g)
2149 #define PCRE2_CONVERTED_PATTERN_FREE(a) \
2150 pcre2_converted_pattern_free_16((PCRE2_UCHAR16 *)a)
2151 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
2152 a = pcre2_dfa_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h,i,j)
2153 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
2154 r = pcre2_get_error_message_16(a,G(b,16),G(G(b,16),_size/2))
2155 #define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_16(G(b,16))
2156 #define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_16(G(b,16))
2157 #define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_16(G(a,16),b)
2158 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_16(G(a,16))
2159 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
2160 a = pcre2_jit_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h)
2161 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
2162 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_16(b,c,d);
2163 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
2164 pcre2_jit_stack_assign_16(G(a,16),(pcre2_jit_callback_16)b,c);
2165 #define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_16((pcre2_jit_stack_16 *)a);
2166 #define PCRE2_MAKETABLES(a) a = pcre2_maketables_16(NULL)
2167 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
2168 a = pcre2_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h)
2169 #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,16) = pcre2_match_data_create_16(b,c)
2170 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
2171 G(a,16) = pcre2_match_data_create_from_pattern_16(G(b,16),c)
2172 #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_16(G(a,16))
2173 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) a = pcre2_pattern_convert_16(G(b,16),c,d,(PCRE2_UCHAR16 **)e,f,G(g,16))
2174 #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_16(G(b,16),c,d)
2175 #define PCRE2_PRINTINT(a) pcre2_printint_16(compiled_code16,outfile,a)
2176 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
2177 r = pcre2_serialize_decode_16((pcre2_code_16 **)a,b,c,G(d,16))
2178 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
2179 r = pcre2_serialize_encode_16((const pcre2_code_16 **)a,b,c,d,G(e,16))
2180 #define PCRE2_SERIALIZE_FREE(a) pcre2_serialize_free_16(a)
2181 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
2182 r = pcre2_serialize_get_number_of_codes_16(a)
2183 #define PCRE2_SET_CALLOUT(a,b,c) \
2184 pcre2_set_callout_16(G(a,16),(int (*)(pcre2_callout_block_16 *, void *))b,c);
2185 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_16(G(a,16),b)
2186 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
2187 pcre2_set_compile_recursion_guard_16(G(a,16),b,c)
2188 #define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_16(G(a,16),b)
2189 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) r = pcre2_set_glob_escape_16(G(a,16),b)
2190 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_16(G(a,16),b)
2191 #define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_16(G(a,16),b)
2192 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_16(G(a,16),b)
2193 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_16(G(a,16),b)
2194 #define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_16(G(a,16),b)
2195 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_16(G(a,16),b)
2196 #define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
2197 pcre2_set_substitute_callout_16(G(a,16), \
2198 (int (*)(pcre2_substitute_callout_block_16 *, void *))b,c)
2199 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
2200 a = pcre2_substitute_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h, \
2201 (PCRE2_SPTR16)i,j,(PCRE2_UCHAR16 *)k,l)
2202 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
2203 a = pcre2_substring_copy_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 *)d,e)
2204 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
2205 a = pcre2_substring_copy_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 *)d,e)
2206 #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_16((PCRE2_UCHAR16 *)a)
2207 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
2208 a = pcre2_substring_get_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 **)d,e)
2209 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
2210 a = pcre2_substring_get_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 **)d,e)
2211 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
2212 a = pcre2_substring_length_byname_16(G(b,16),G(c,16),d)
2213 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
2214 a = pcre2_substring_length_bynumber_16(G(b,16),c,d)
2215 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
2216 a = pcre2_substring_list_get_16(G(b,16),(PCRE2_UCHAR16 ***)c,d)
2217 #define PCRE2_SUBSTRING_LIST_FREE(a) \
2218 pcre2_substring_list_free_16((PCRE2_SPTR16 *)a)
2219 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
2220 a = pcre2_substring_number_from_name_16(G(b,16),G(c,16));
2221 #define PTR(x) (void *)G(x,16)
2222 #define SETFLD(x,y,z) G(x,16)->y = z
2223 #define SETFLDVEC(x,y,v,z) G(x,16)->y[v] = z
2224 #define SETOP(x,y,z) G(x,16) z y
2225 #define SETCASTPTR(x,y) G(x,16) = (uint16_t *)(y)
2226 #define STRLEN(p) (int)strlen16((PCRE2_SPTR16)p)
2227 #define SUB1(a,b) G(a,16)(G(b,16))
2228 #define SUB2(a,b,c) G(a,16)(G(b,16),G(c,16))
2229 #define TEST(x,r,y) (G(x,16) r (y))
2230 #define TESTFLD(x,f,r,y) (G(x,16)->f r (y))
2231
2232
2233 /* ----- Only 32-bit mode is supported ----- */
2234
2235 #elif defined SUPPORT_PCRE2_32
2236 #define CASTFLD(t,a,b) (t)(G(a,32)->b)
2237 #define CASTVAR(t,x) (t)G(x,32)
2238 #define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR32)(a))[b])
2239 #define CONCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_convert_context_32))
2240 #define CONVERT_COPY(a,b,c) memcpy(G(a,32),(char *)b, (c)*4)
2241 #define DATCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_match_context_32))
2242 #define FLD(a,b) G(a,32)->b
2243 #define PATCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_compile_context_32))
2244 #define PCHARS(lv, p, offset, len, utf, f) \
2245 lv = pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f)
2246 #define PCHARSV(p, offset, len, utf, f) \
2247 (void)pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f)
2248 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
2249 a = pcre2_callout_enumerate_32(compiled_code32, \
2250 (int (*)(struct pcre2_callout_enumerate_block_32 *, void *))b,c)
2251 #define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,32) = pcre2_code_copy_32(b)
2252 #define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_32(G(b,32))
2253 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_32(G(b,32))
2254 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
2255 G(a,32) = pcre2_compile_32(G(b,32),c,d,e,f,g)
2256 #define PCRE2_CONVERTED_PATTERN_FREE(a) \
2257 pcre2_converted_pattern_free_32((PCRE2_UCHAR32 *)a)
2258 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
2259 a = pcre2_dfa_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h,i,j)
2260 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
2261 r = pcre2_get_error_message_32(a,G(b,32),G(G(b,32),_size/4))
2262 #define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_32(G(b,32))
2263 #define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_32(G(b,32))
2264 #define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_32(G(a,32),b)
2265 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_32(G(a,32))
2266 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
2267 a = pcre2_jit_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
2268 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
2269 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_32(b,c,d);
2270 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
2271 pcre2_jit_stack_assign_32(G(a,32),(pcre2_jit_callback_32)b,c);
2272 #define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_32((pcre2_jit_stack_32 *)a);
2273 #define PCRE2_MAKETABLES(a) a = pcre2_maketables_32(NULL)
2274 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
2275 a = pcre2_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
2276 #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,32) = pcre2_match_data_create_32(b,c)
2277 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
2278 G(a,32) = pcre2_match_data_create_from_pattern_32(G(b,32),c)
2279 #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_32(G(a,32))
2280 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) a = pcre2_pattern_convert_32(G(b,32),c,d,(PCRE2_UCHAR32 **)e,f,G(g,32))
2281 #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_32(G(b,32),c,d)
2282 #define PCRE2_PRINTINT(a) pcre2_printint_32(compiled_code32,outfile,a)
2283 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
2284 r = pcre2_serialize_decode_32((pcre2_code_32 **)a,b,c,G(d,32))
2285 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
2286 r = pcre2_serialize_encode_32((const pcre2_code_32 **)a,b,c,d,G(e,32))
2287 #define PCRE2_SERIALIZE_FREE(a) pcre2_serialize_free_32(a)
2288 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
2289 r = pcre2_serialize_get_number_of_codes_32(a)
2290 #define PCRE2_SET_CALLOUT(a,b,c) \
2291 pcre2_set_callout_32(G(a,32),(int (*)(pcre2_callout_block_32 *, void *))b,c)
2292 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_32(G(a,32),b)
2293 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
2294 pcre2_set_compile_recursion_guard_32(G(a,32),b,c)
2295 #define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_32(G(a,32),b)
2296 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) r = pcre2_set_glob_escape_32(G(a,32),b)
2297 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_32(G(a,32),b)
2298 #define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_32(G(a,32),b)
2299 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_32(G(a,32),b)
2300 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_32(G(a,32),b)
2301 #define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_32(G(a,32),b)
2302 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_32(G(a,32),b)
2303 #define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
2304 pcre2_set_substitute_callout_32(G(a,32), \
2305 (int (*)(pcre2_substitute_callout_block_32 *, void *))b,c)
2306 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
2307 a = pcre2_substitute_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h, \
2308 (PCRE2_SPTR32)i,j,(PCRE2_UCHAR32 *)k,l)
2309 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
2310 a = pcre2_substring_copy_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 *)d,e)
2311 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
2312 a = pcre2_substring_copy_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 *)d,e);
2313 #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_32((PCRE2_UCHAR32 *)a)
2314 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
2315 a = pcre2_substring_get_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 **)d,e)
2316 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
2317 a = pcre2_substring_get_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 **)d,e)
2318 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
2319 a = pcre2_substring_length_byname_32(G(b,32),G(c,32),d)
2320 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
2321 a = pcre2_substring_length_bynumber_32(G(b,32),c,d)
2322 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
2323 a = pcre2_substring_list_get_32(G(b,32),(PCRE2_UCHAR32 ***)c,d)
2324 #define PCRE2_SUBSTRING_LIST_FREE(a) \
2325 pcre2_substring_list_free_32((PCRE2_SPTR32 *)a)
2326 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
2327 a = pcre2_substring_number_from_name_32(G(b,32),G(c,32));
2328 #define PTR(x) (void *)G(x,32)
2329 #define SETFLD(x,y,z) G(x,32)->y = z
2330 #define SETFLDVEC(x,y,v,z) G(x,32)->y[v] = z
2331 #define SETOP(x,y,z) G(x,32) z y
2332 #define SETCASTPTR(x,y) G(x,32) = (uint32_t *)(y)
2333 #define STRLEN(p) (int)strlen32((PCRE2_SPTR32)p)
2334 #define SUB1(a,b) G(a,32)(G(b,32))
2335 #define SUB2(a,b,c) G(a,32)(G(b,32),G(c,32))
2336 #define TEST(x,r,y) (G(x,32) r (y))
2337 #define TESTFLD(x,f,r,y) (G(x,32)->f r (y))
2338
2339 #endif
2340
2341 /* ----- End of mode-specific function call macros ----- */
2342
2343
2344
2345
2346 /*************************************************
2347 * Alternate character tables *
2348 *************************************************/
2349
2350 /* By default, the "tables" pointer in the compile context when calling
2351 pcre2_compile() is not set (= NULL), thereby using the default tables of the
2352 library. However, the tables modifier can be used to select alternate sets of
2353 tables, for different kinds of testing. Note that the locale modifier also
2354 adjusts the tables. */
2355
2356 /* This is the set of tables distributed as default with PCRE2. It recognizes
2357 only ASCII characters. */
2358
2359 static const uint8_t tables1[] = {
2360
2361 /* This table is a lower casing table. */
2362
2363 0, 1, 2, 3, 4, 5, 6, 7,
2364 8, 9, 10, 11, 12, 13, 14, 15,
2365 16, 17, 18, 19, 20, 21, 22, 23,
2366 24, 25, 26, 27, 28, 29, 30, 31,
2367 32, 33, 34, 35, 36, 37, 38, 39,
2368 40, 41, 42, 43, 44, 45, 46, 47,
2369 48, 49, 50, 51, 52, 53, 54, 55,
2370 56, 57, 58, 59, 60, 61, 62, 63,
2371 64, 97, 98, 99,100,101,102,103,
2372 104,105,106,107,108,109,110,111,
2373 112,113,114,115,116,117,118,119,
2374 120,121,122, 91, 92, 93, 94, 95,
2375 96, 97, 98, 99,100,101,102,103,
2376 104,105,106,107,108,109,110,111,
2377 112,113,114,115,116,117,118,119,
2378 120,121,122,123,124,125,126,127,
2379 128,129,130,131,132,133,134,135,
2380 136,137,138,139,140,141,142,143,
2381 144,145,146,147,148,149,150,151,
2382 152,153,154,155,156,157,158,159,
2383 160,161,162,163,164,165,166,167,
2384 168,169,170,171,172,173,174,175,
2385 176,177,178,179,180,181,182,183,
2386 184,185,186,187,188,189,190,191,
2387 192,193,194,195,196,197,198,199,
2388 200,201,202,203,204,205,206,207,
2389 208,209,210,211,212,213,214,215,
2390 216,217,218,219,220,221,222,223,
2391 224,225,226,227,228,229,230,231,
2392 232,233,234,235,236,237,238,239,
2393 240,241,242,243,244,245,246,247,
2394 248,249,250,251,252,253,254,255,
2395
2396 /* This table is a case flipping table. */
2397
2398 0, 1, 2, 3, 4, 5, 6, 7,
2399 8, 9, 10, 11, 12, 13, 14, 15,
2400 16, 17, 18, 19, 20, 21, 22, 23,
2401 24, 25, 26, 27, 28, 29, 30, 31,
2402 32, 33, 34, 35, 36, 37, 38, 39,
2403 40, 41, 42, 43, 44, 45, 46, 47,
2404 48, 49, 50, 51, 52, 53, 54, 55,
2405 56, 57, 58, 59, 60, 61, 62, 63,
2406 64, 97, 98, 99,100,101,102,103,
2407 104,105,106,107,108,109,110,111,
2408 112,113,114,115,116,117,118,119,
2409 120,121,122, 91, 92, 93, 94, 95,
2410 96, 65, 66, 67, 68, 69, 70, 71,
2411 72, 73, 74, 75, 76, 77, 78, 79,
2412 80, 81, 82, 83, 84, 85, 86, 87,
2413 88, 89, 90,123,124,125,126,127,
2414 128,129,130,131,132,133,134,135,
2415 136,137,138,139,140,141,142,143,
2416 144,145,146,147,148,149,150,151,
2417 152,153,154,155,156,157,158,159,
2418 160,161,162,163,164,165,166,167,
2419 168,169,170,171,172,173,174,175,
2420 176,177,178,179,180,181,182,183,
2421 184,185,186,187,188,189,190,191,
2422 192,193,194,195,196,197,198,199,
2423 200,201,202,203,204,205,206,207,
2424 208,209,210,211,212,213,214,215,
2425 216,217,218,219,220,221,222,223,
2426 224,225,226,227,228,229,230,231,
2427 232,233,234,235,236,237,238,239,
2428 240,241,242,243,244,245,246,247,
2429 248,249,250,251,252,253,254,255,
2430
2431 /* This table contains bit maps for various character classes. Each map is 32
2432 bytes long and the bits run from the least significant end of each byte. The
2433 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
2434 graph, print, punct, and cntrl. Other classes are built from combinations. */
2435
2436 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
2437 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2438 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2439 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2440
2441 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
2442 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
2443 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2444 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2445
2446 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
2447 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2448 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2449 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2450
2451 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2452 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
2453 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2454 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2455
2456 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2457 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
2458 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2459 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2460
2461 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
2462 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
2463 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2464 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2465
2466 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
2467 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
2468 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2469 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2470
2471 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
2472 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
2473 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2474 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2475
2476 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
2477 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
2478 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2479 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2480
2481 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
2482 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
2483 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2484 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2485
2486 /* This table identifies various classes of character by individual bits:
2487 0x01 white space character
2488 0x02 letter
2489 0x04 decimal digit
2490 0x08 hexadecimal digit
2491 0x10 alphanumeric or '_'
2492 0x80 regular expression metacharacter or binary zero
2493 */
2494
2495 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
2496 0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /* 8- 15 */
2497 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
2498 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
2499 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
2500 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
2501 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
2502 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
2503 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
2504 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
2505 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
2506 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
2507 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
2508 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
2509 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
2510 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
2511 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
2512 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
2513 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
2514 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
2515 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
2516 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
2517 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
2518 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
2519 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
2520 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
2521 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
2522 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
2523 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
2524 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
2525 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
2526 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
2527
2528 /* This is a set of tables that came originally from a Windows user. It seems
2529 to be at least an approximation of ISO 8859. In particular, there are
2530 characters greater than 128 that are marked as spaces, letters, etc. */
2531
2532 static const uint8_t tables2[] = {
2533 0,1,2,3,4,5,6,7,
2534 8,9,10,11,12,13,14,15,
2535 16,17,18,19,20,21,22,23,
2536 24,25,26,27,28,29,30,31,
2537 32,33,34,35,36,37,38,39,
2538 40,41,42,43,44,45,46,47,
2539 48,49,50,51,52,53,54,55,
2540 56,57,58,59,60,61,62,63,
2541 64,97,98,99,100,101,102,103,
2542 104,105,106,107,108,109,110,111,
2543 112,113,114,115,116,117,118,119,
2544 120,121,122,91,92,93,94,95,
2545 96,97,98,99,100,101,102,103,
2546 104,105,106,107,108,109,110,111,
2547 112,113,114,115,116,117,118,119,
2548 120,121,122,123,124,125,126,127,
2549 128,129,130,131,132,133,134,135,
2550 136,137,138,139,140,141,142,143,
2551 144,145,146,147,148,149,150,151,
2552 152,153,154,155,156,157,158,159,
2553 160,161,162,163,164,165,166,167,
2554 168,169,170,171,172,173,174,175,
2555 176,177,178,179,180,181,182,183,
2556 184,185,186,187,188,189,190,191,
2557 224,225,226,227,228,229,230,231,
2558 232,233,234,235,236,237,238,239,
2559 240,241,242,243,244,245,246,215,
2560 248,249,250,251,252,253,254,223,
2561 224,225,226,227,228,229,230,231,
2562 232,233,234,235,236,237,238,239,
2563 240,241,242,243,244,245,246,247,
2564 248,249,250,251,252,253,254,255,
2565 0,1,2,3,4,5,6,7,
2566 8,9,10,11,12,13,14,15,
2567 16,17,18,19,20,21,22,23,
2568 24,25,26,27,28,29,30,31,
2569 32,33,34,35,36,37,38,39,
2570 40,41,42,43,44,45,46,47,
2571 48,49,50,51,52,53,54,55,
2572 56,57,58,59,60,61,62,63,
2573 64,97,98,99,100,101,102,103,
2574 104,105,106,107,108,109,110,111,
2575 112,113,114,115,116,117,118,119,
2576 120,121,122,91,92,93,94,95,
2577 96,65,66,67,68,69,70,71,
2578 72,73,74,75,76,77,78,79,
2579 80,81,82,83,84,85,86,87,
2580 88,89,90,123,124,125,126,127,
2581 128,129,130,131,132,133,134,135,
2582 136,137,138,139,140,141,142,143,
2583 144,145,146,147,148,149,150,151,
2584 152,153,154,155,156,157,158,159,
2585 160,161,162,163,164,165,166,167,
2586 168,169,170,171,172,173,174,175,
2587 176,177,178,179,180,181,182,183,
2588 184,185,186,187,188,189,190,191,
2589 224,225,226,227,228,229,230,231,
2590 232,233,234,235,236,237,238,239,
2591 240,241,242,243,244,245,246,215,
2592 248,249,250,251,252,253,254,223,
2593 192,193,194,195,196,197,198,199,
2594 200,201,202,203,204,205,206,207,
2595 208,209,210,211,212,213,214,247,
2596 216,217,218,219,220,221,222,255,
2597 0,62,0,0,1,0,0,0,
2598 0,0,0,0,0,0,0,0,
2599 32,0,0,0,1,0,0,0,
2600 0,0,0,0,0,0,0,0,
2601 0,0,0,0,0,0,255,3,
2602 126,0,0,0,126,0,0,0,
2603 0,0,0,0,0,0,0,0,
2604 0,0,0,0,0,0,0,0,
2605 0,0,0,0,0,0,255,3,
2606 0,0,0,0,0,0,0,0,
2607 0,0,0,0,0,0,12,2,
2608 0,0,0,0,0,0,0,0,
2609 0,0,0,0,0,0,0,0,
2610 254,255,255,7,0,0,0,0,
2611 0,0,0,0,0,0,0,0,
2612 255,255,127,127,0,0,0,0,
2613 0,0,0,0,0,0,0,0,
2614 0,0,0,0,254,255,255,7,
2615 0,0,0,0,0,4,32,4,
2616 0,0,0,128,255,255,127,255,
2617 0,0,0,0,0,0,255,3,
2618 254,255,255,135,254,255,255,7,
2619 0,0,0,0,0,4,44,6,
2620 255,255,127,255,255,255,127,255,
2621 0,0,0,0,254,255,255,255,
2622 255,255,255,255,255,255,255,127,
2623 0,0,0,0,254,255,255,255,
2624 255,255,255,255,255,255,255,255,
2625 0,2,0,0,255,255,255,255,
2626 255,255,255,255,255,255,255,127,
2627 0,0,0,0,255,255,255,255,
2628 255,255,255,255,255,255,255,255,
2629 0,0,0,0,254,255,0,252,
2630 1,0,0,248,1,0,0,120,
2631 0,0,0,0,254,255,255,255,
2632 0,0,128,0,0,0,128,0,
2633 255,255,255,255,0,0,0,0,
2634 0,0,0,0,0,0,0,128,
2635 255,255,255,255,0,0,0,0,
2636 0,0,0,0,0,0,0,0,
2637 128,0,0,0,0,0,0,0,
2638 0,1,1,0,1,1,0,0,
2639 0,0,0,0,0,0,0,0,
2640 0,0,0,0,0,0,0,0,
2641 1,0,0,0,128,0,0,0,
2642 128,128,128,128,0,0,128,0,
2643 28,28,28,28,28,28,28,28,
2644 28,28,0,0,0,0,0,128,
2645 0,26,26,26,26,26,26,18,
2646 18,18,18,18,18,18,18,18,
2647 18,18,18,18,18,18,18,18,
2648 18,18,18,128,128,0,128,16,
2649 0,26,26,26,26,26,26,18,
2650 18,18,18,18,18,18,18,18,
2651 18,18,18,18,18,18,18,18,
2652 18,18,18,128,128,0,0,0,
2653 0,0,0,0,0,1,0,0,
2654 0,0,0,0,0,0,0,0,
2655 0,0,0,0,0,0,0,0,
2656 0,0,0,0,0,0,0,0,
2657 1,0,0,0,0,0,0,0,
2658 0,0,18,0,0,0,0,0,
2659 0,0,20,20,0,18,0,0,
2660 0,20,18,0,0,0,0,0,
2661 18,18,18,18,18,18,18,18,
2662 18,18,18,18,18,18,18,18,
2663 18,18,18,18,18,18,18,0,
2664 18,18,18,18,18,18,18,18,
2665 18,18,18,18,18,18,18,18,
2666 18,18,18,18,18,18,18,18,
2667 18,18,18,18,18,18,18,0,
2668 18,18,18,18,18,18,18,18
2669 };
2670
2671
2672
2673 #if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE)
2674 /*************************************************
2675 * Emulated memmove() for systems without it *
2676 *************************************************/
2677
2678 /* This function can make use of bcopy() if it is available. Otherwise do it by
2679 steam, as there are some non-Unix environments that lack both memmove() and
2680 bcopy(). */
2681
2682 static void *
emulated_memmove(void * d,const void * s,size_t n)2683 emulated_memmove(void *d, const void *s, size_t n)
2684 {
2685 #ifdef HAVE_BCOPY
2686 bcopy(s, d, n);
2687 return d;
2688 #else
2689 size_t i;
2690 unsigned char *dest = (unsigned char *)d;
2691 const unsigned char *src = (const unsigned char *)s;
2692 if (dest > src)
2693 {
2694 dest += n;
2695 src += n;
2696 for (i = 0; i < n; ++i) *(--dest) = *(--src);
2697 return (void *)dest;
2698 }
2699 else
2700 {
2701 for (i = 0; i < n; ++i) *dest++ = *src++;
2702 return (void *)(dest - n);
2703 }
2704 #endif /* not HAVE_BCOPY */
2705 }
2706 #undef memmove
2707 #define memmove(d,s,n) emulated_memmove(d,s,n)
2708 #endif /* not VPCOMPAT && not HAVE_MEMMOVE */
2709
2710
2711
2712 #ifndef HAVE_STRERROR
2713 /*************************************************
2714 * Provide strerror() for non-ANSI libraries *
2715 *************************************************/
2716
2717 /* Some old-fashioned systems (e.g. SunOS4) didn't have strerror() in their
2718 libraries. They may no longer be around, but just in case, we can try to
2719 provide the same facility by this simple alternative function. */
2720
2721 extern int sys_nerr;
2722 extern char *sys_errlist[];
2723
2724 char *
strerror(int n)2725 strerror(int n)
2726 {
2727 if (n < 0 || n >= sys_nerr) return "unknown error number";
2728 return sys_errlist[n];
2729 }
2730 #endif /* HAVE_STRERROR */
2731
2732
2733
2734 /*************************************************
2735 * Local memory functions *
2736 *************************************************/
2737
2738 /* Alternative memory functions, to test functionality. */
2739
my_malloc(PCRE2_SIZE size,void * data)2740 static void *my_malloc(PCRE2_SIZE size, void *data)
2741 {
2742 void *block = malloc(size);
2743 (void)data;
2744 if (show_memory)
2745 {
2746 if (block == NULL)
2747 {
2748 fprintf(outfile, "** malloc() failed for %" SIZ_FORM "\n", SIZ_CAST size);
2749 }
2750 else
2751 {
2752 fprintf(outfile, "malloc %5" SIZ_FORM, SIZ_CAST size);
2753 #ifdef DEBUG_SHOW_MALLOC_ADDRESSES
2754 fprintf(outfile, " %p", block); /* Not portable */
2755 #endif
2756 if (malloclistptr < MALLOCLISTSIZE)
2757 {
2758 malloclist[malloclistptr] = block;
2759 malloclistlength[malloclistptr++] = size;
2760 }
2761 else
2762 fprintf(outfile, " (not remembered)");
2763 fprintf(outfile, "\n");
2764 }
2765 }
2766 return block;
2767 }
2768
my_free(void * block,void * data)2769 static void my_free(void *block, void *data)
2770 {
2771 (void)data;
2772 if (show_memory)
2773 {
2774 uint32_t i, j;
2775 BOOL found = FALSE;
2776
2777 fprintf(outfile, "free");
2778 for (i = 0; i < malloclistptr; i++)
2779 {
2780 if (block == malloclist[i])
2781 {
2782 fprintf(outfile, " %5" SIZ_FORM, SIZ_CAST malloclistlength[i]);
2783 malloclistptr--;
2784 for (j = i; j < malloclistptr; j++)
2785 {
2786 malloclist[j] = malloclist[j+1];
2787 malloclistlength[j] = malloclistlength[j+1];
2788 }
2789 found = TRUE;
2790 break;
2791 }
2792 }
2793 if (!found) fprintf(outfile, " unremembered block");
2794 #ifdef DEBUG_SHOW_MALLOC_ADDRESSES
2795 fprintf(outfile, " %p", block); /* Not portable */
2796 #endif
2797 fprintf(outfile, "\n");
2798 }
2799 free(block);
2800 }
2801
2802
2803
2804 /*************************************************
2805 * Callback function for stack guard *
2806 *************************************************/
2807
2808 /* This is set up to be called from pcre2_compile() when the stackguard=n
2809 modifier sets a value greater than zero. The test we do is whether the
2810 parenthesis nesting depth is greater than the value set by the modifier.
2811
2812 Argument: the current parenthesis nesting depth
2813 Returns: non-zero to kill the compilation
2814 */
2815
2816 static int
stack_guard(uint32_t depth,void * user_data)2817 stack_guard(uint32_t depth, void *user_data)
2818 {
2819 (void)user_data;
2820 return depth > pat_patctl.stackguard_test;
2821 }
2822
2823
2824 /*************************************************
2825 * JIT memory callback *
2826 *************************************************/
2827
2828 static PCRE2_JIT_STACK*
jit_callback(void * arg)2829 jit_callback(void *arg)
2830 {
2831 jit_was_used = TRUE;
2832 return (PCRE2_JIT_STACK *)arg;
2833 }
2834
2835
2836 /*************************************************
2837 * Convert UTF-8 character to code point *
2838 *************************************************/
2839
2840 /* This function reads one or more bytes that represent a UTF-8 character,
2841 and returns the codepoint of that character. Note that the function supports
2842 the original UTF-8 definition of RFC 2279, allowing for values in the range 0
2843 to 0x7fffffff, up to 6 bytes long. This makes it possible to generate
2844 codepoints greater than 0x10ffff which are useful for testing PCRE2's error
2845 checking, and also for generating 32-bit non-UTF data values above the UTF
2846 limit.
2847
2848 Argument:
2849 utf8bytes a pointer to the byte vector
2850 vptr a pointer to an int to receive the value
2851
2852 Returns: > 0 => the number of bytes consumed
2853 -6 to 0 => malformed UTF-8 character at offset = (-return)
2854 */
2855
2856 static int
utf82ord(PCRE2_SPTR8 utf8bytes,uint32_t * vptr)2857 utf82ord(PCRE2_SPTR8 utf8bytes, uint32_t *vptr)
2858 {
2859 uint32_t c = *utf8bytes++;
2860 uint32_t d = c;
2861 int i, j, s;
2862
2863 for (i = -1; i < 6; i++) /* i is number of additional bytes */
2864 {
2865 if ((d & 0x80) == 0) break;
2866 d <<= 1;
2867 }
2868
2869 if (i == -1) { *vptr = c; return 1; } /* ascii character */
2870 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
2871
2872 /* i now has a value in the range 1-5 */
2873
2874 s = 6*i;
2875 d = (c & utf8_table3[i]) << s;
2876
2877 for (j = 0; j < i; j++)
2878 {
2879 c = *utf8bytes++;
2880 if ((c & 0xc0) != 0x80) return -(j+1);
2881 s -= 6;
2882 d |= (c & 0x3f) << s;
2883 }
2884
2885 /* Check that encoding was the correct unique one */
2886
2887 for (j = 0; j < utf8_table1_size; j++)
2888 if (d <= (uint32_t)utf8_table1[j]) break;
2889 if (j != i) return -(i+1);
2890
2891 /* Valid value */
2892
2893 *vptr = d;
2894 return i+1;
2895 }
2896
2897
2898
2899 /*************************************************
2900 * Print one character *
2901 *************************************************/
2902
2903 /* Print a single character either literally, or as a hex escape, and count how
2904 many printed characters are used.
2905
2906 Arguments:
2907 c the character
2908 utf TRUE in UTF mode
2909 f the FILE to print to, or NULL just to count characters
2910
2911 Returns: number of characters written
2912 */
2913
2914 static int
pchar(uint32_t c,BOOL utf,FILE * f)2915 pchar(uint32_t c, BOOL utf, FILE *f)
2916 {
2917 int n = 0;
2918 char tempbuffer[16];
2919
2920 if (PRINTOK(c))
2921 {
2922 if (f != NULL) fprintf(f, "%c", c);
2923 return 1;
2924 }
2925
2926 if (c < 0x100)
2927 {
2928 if (utf)
2929 {
2930 if (f != NULL) fprintf(f, "\\x{%02x}", c);
2931 return 6;
2932 }
2933 else
2934 {
2935 if (f != NULL) fprintf(f, "\\x%02x", c);
2936 return 4;
2937 }
2938 }
2939
2940 if (f != NULL) n = fprintf(f, "\\x{%02x}", c);
2941 else n = sprintf(tempbuffer, "\\x{%02x}", c);
2942
2943 return n >= 0 ? n : 0;
2944 }
2945
2946
2947
2948 #ifdef SUPPORT_PCRE2_16
2949 /*************************************************
2950 * Find length of 0-terminated 16-bit string *
2951 *************************************************/
2952
strlen16(PCRE2_SPTR16 p)2953 static size_t strlen16(PCRE2_SPTR16 p)
2954 {
2955 PCRE2_SPTR16 pp = p;
2956 while (*pp != 0) pp++;
2957 return (int)(pp - p);
2958 }
2959 #endif /* SUPPORT_PCRE2_16 */
2960
2961
2962
2963 #ifdef SUPPORT_PCRE2_32
2964 /*************************************************
2965 * Find length of 0-terminated 32-bit string *
2966 *************************************************/
2967
strlen32(PCRE2_SPTR32 p)2968 static size_t strlen32(PCRE2_SPTR32 p)
2969 {
2970 PCRE2_SPTR32 pp = p;
2971 while (*pp != 0) pp++;
2972 return (int)(pp - p);
2973 }
2974 #endif /* SUPPORT_PCRE2_32 */
2975
2976
2977 #ifdef SUPPORT_PCRE2_8
2978 /*************************************************
2979 * Print 8-bit character string *
2980 *************************************************/
2981
2982 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
2983 For printing *MARK strings, a negative length is given, indicating that the
2984 length is in the first code unit. If handed a NULL file, this function just
2985 counts chars without printing (because pchar() does that). */
2986
pchars8(PCRE2_SPTR8 p,int length,BOOL utf,FILE * f)2987 static int pchars8(PCRE2_SPTR8 p, int length, BOOL utf, FILE *f)
2988 {
2989 uint32_t c = 0;
2990 int yield = 0;
2991 if (length < 0) length = *p++;
2992 while (length-- > 0)
2993 {
2994 if (utf)
2995 {
2996 int rc = utf82ord(p, &c);
2997 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
2998 {
2999 length -= rc - 1;
3000 p += rc;
3001 yield += pchar(c, utf, f);
3002 continue;
3003 }
3004 }
3005 c = *p++;
3006 yield += pchar(c, utf, f);
3007 }
3008
3009 return yield;
3010 }
3011 #endif
3012
3013
3014 #ifdef SUPPORT_PCRE2_16
3015 /*************************************************
3016 * Print 16-bit character string *
3017 *************************************************/
3018
3019 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
3020 For printing *MARK strings, a negative length is given, indicating that the
3021 length is in the first code unit. If handed a NULL file, just counts chars
3022 without printing. */
3023
pchars16(PCRE2_SPTR16 p,int length,BOOL utf,FILE * f)3024 static int pchars16(PCRE2_SPTR16 p, int length, BOOL utf, FILE *f)
3025 {
3026 int yield = 0;
3027 if (length < 0) length = *p++;
3028 while (length-- > 0)
3029 {
3030 uint32_t c = *p++ & 0xffff;
3031 if (utf && c >= 0xD800 && c < 0xDC00 && length > 0)
3032 {
3033 int d = *p & 0xffff;
3034 if (d >= 0xDC00 && d <= 0xDFFF)
3035 {
3036 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
3037 length--;
3038 p++;
3039 }
3040 }
3041 yield += pchar(c, utf, f);
3042 }
3043 return yield;
3044 }
3045 #endif /* SUPPORT_PCRE2_16 */
3046
3047
3048
3049 #ifdef SUPPORT_PCRE2_32
3050 /*************************************************
3051 * Print 32-bit character string *
3052 *************************************************/
3053
3054 /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
3055 For printing *MARK strings, a negative length is given, indicating that the
3056 length is in the first code unit. If handed a NULL file, just counts chars
3057 without printing. */
3058
pchars32(PCRE2_SPTR32 p,int length,BOOL utf,FILE * f)3059 static int pchars32(PCRE2_SPTR32 p, int length, BOOL utf, FILE *f)
3060 {
3061 int yield = 0;
3062 (void)(utf); /* Avoid compiler warning */
3063 if (length < 0) length = *p++;
3064 while (length-- > 0)
3065 {
3066 uint32_t c = *p++;
3067 yield += pchar(c, utf, f);
3068 }
3069 return yield;
3070 }
3071 #endif /* SUPPORT_PCRE2_32 */
3072
3073
3074
3075
3076 /*************************************************
3077 * Convert character value to UTF-8 *
3078 *************************************************/
3079
3080 /* This function takes an integer value in the range 0 - 0x7fffffff
3081 and encodes it as a UTF-8 character in 0 to 6 bytes. It is needed even when the
3082 8-bit library is not supported, to generate UTF-8 output for non-ASCII
3083 characters.
3084
3085 Arguments:
3086 cvalue the character value
3087 utf8bytes pointer to buffer for result - at least 6 bytes long
3088
3089 Returns: number of characters placed in the buffer
3090 */
3091
3092 static int
ord2utf8(uint32_t cvalue,uint8_t * utf8bytes)3093 ord2utf8(uint32_t cvalue, uint8_t *utf8bytes)
3094 {
3095 int i, j;
3096 if (cvalue > 0x7fffffffu)
3097 return -1;
3098 for (i = 0; i < utf8_table1_size; i++)
3099 if (cvalue <= (uint32_t)utf8_table1[i]) break;
3100 utf8bytes += i;
3101 for (j = i; j > 0; j--)
3102 {
3103 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
3104 cvalue >>= 6;
3105 }
3106 *utf8bytes = utf8_table2[i] | cvalue;
3107 return i + 1;
3108 }
3109
3110
3111
3112 #ifdef SUPPORT_PCRE2_16
3113 /*************************************************
3114 * Convert string to 16-bit *
3115 *************************************************/
3116
3117 /* In UTF mode the input is always interpreted as a string of UTF-8 bytes using
3118 the original UTF-8 definition of RFC 2279, which allows for up to 6 bytes, and
3119 code values from 0 to 0x7fffffff. However, values greater than the later UTF
3120 limit of 0x10ffff cause an error. In non-UTF mode the input is interpreted as
3121 UTF-8 if the utf8_input modifier is set, but an error is generated for values
3122 greater than 0xffff.
3123
3124 If all the input bytes are ASCII, the space needed for a 16-bit string is
3125 exactly double the 8-bit size. Otherwise, the size needed for a 16-bit string
3126 is no more than double, because up to 0xffff uses no more than 3 bytes in UTF-8
3127 but possibly 4 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes
3128 in UTF-16. The result is always left in pbuffer16. Impose a minimum size to
3129 save repeated re-sizing.
3130
3131 Note that this function does not object to surrogate values. This is
3132 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
3133 for the purpose of testing that they are correctly faulted.
3134
3135 Arguments:
3136 p points to a byte string
3137 utf true in UTF mode
3138 lenptr points to number of bytes in the string (excluding trailing zero)
3139
3140 Returns: 0 on success, with the length updated to the number of 16-bit
3141 data items used (excluding the trailing zero)
3142 OR -1 if a UTF-8 string is malformed
3143 OR -2 if a value > 0x10ffff is encountered in UTF mode
3144 OR -3 if a value > 0xffff is encountered when not in UTF mode
3145 */
3146
3147 static PCRE2_SIZE
to16(uint8_t * p,int utf,PCRE2_SIZE * lenptr)3148 to16(uint8_t *p, int utf, PCRE2_SIZE *lenptr)
3149 {
3150 uint16_t *pp;
3151 PCRE2_SIZE len = *lenptr;
3152
3153 if (pbuffer16_size < 2*len + 2)
3154 {
3155 if (pbuffer16 != NULL) free(pbuffer16);
3156 pbuffer16_size = 2*len + 2;
3157 if (pbuffer16_size < 4096) pbuffer16_size = 4096;
3158 pbuffer16 = (uint16_t *)malloc(pbuffer16_size);
3159 if (pbuffer16 == NULL)
3160 {
3161 fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer16\n",
3162 SIZ_CAST pbuffer16_size);
3163 exit(1);
3164 }
3165 }
3166
3167 pp = pbuffer16;
3168 if (!utf && (pat_patctl.control & CTL_UTF8_INPUT) == 0)
3169 {
3170 for (; len > 0; len--) *pp++ = *p++;
3171 }
3172 else while (len > 0)
3173 {
3174 uint32_t c;
3175 int chlen = utf82ord(p, &c);
3176 if (chlen <= 0) return -1;
3177 if (!utf && c > 0xffff) return -3;
3178 if (c > 0x10ffff) return -2;
3179 p += chlen;
3180 len -= chlen;
3181 if (c < 0x10000) *pp++ = c; else
3182 {
3183 c -= 0x10000;
3184 *pp++ = 0xD800 | (c >> 10);
3185 *pp++ = 0xDC00 | (c & 0x3ff);
3186 }
3187 }
3188
3189 *pp = 0;
3190 *lenptr = pp - pbuffer16;
3191 return 0;
3192 }
3193 #endif
3194
3195
3196
3197 #ifdef SUPPORT_PCRE2_32
3198 /*************************************************
3199 * Convert string to 32-bit *
3200 *************************************************/
3201
3202 /* In UTF mode the input is always interpreted as a string of UTF-8 bytes using
3203 the original UTF-8 definition of RFC 2279, which allows for up to 6 bytes, and
3204 code values from 0 to 0x7fffffff. However, values greater than the later UTF
3205 limit of 0x10ffff cause an error.
3206
3207 In non-UTF mode the input is interpreted as UTF-8 if the utf8_input modifier
3208 is set, and no limit is imposed. There is special interpretation of the 0xff
3209 byte (which is illegal in UTF-8) in this case: it causes the top bit of the
3210 next character to be set. This provides a way of generating 32-bit characters
3211 greater than 0x7fffffff.
3212
3213 If all the input bytes are ASCII, the space needed for a 32-bit string is
3214 exactly four times the 8-bit size. Otherwise, the size needed for a 32-bit
3215 string is no more than four times, because the number of characters must be
3216 less than the number of bytes. The result is always left in pbuffer32. Impose a
3217 minimum size to save repeated re-sizing.
3218
3219 Note that this function does not object to surrogate values. This is
3220 deliberate; it makes it possible to construct UTF-32 strings that are invalid,
3221 for the purpose of testing that they are correctly faulted.
3222
3223 Arguments:
3224 p points to a byte string
3225 utf true in UTF mode
3226 lenptr points to number of bytes in the string (excluding trailing zero)
3227
3228 Returns: 0 on success, with the length updated to the number of 32-bit
3229 data items used (excluding the trailing zero)
3230 OR -1 if a UTF-8 string is malformed
3231 OR -2 if a value > 0x10ffff is encountered in UTF mode
3232 */
3233
3234 static PCRE2_SIZE
to32(uint8_t * p,int utf,PCRE2_SIZE * lenptr)3235 to32(uint8_t *p, int utf, PCRE2_SIZE *lenptr)
3236 {
3237 uint32_t *pp;
3238 PCRE2_SIZE len = *lenptr;
3239
3240 if (pbuffer32_size < 4*len + 4)
3241 {
3242 if (pbuffer32 != NULL) free(pbuffer32);
3243 pbuffer32_size = 4*len + 4;
3244 if (pbuffer32_size < 8192) pbuffer32_size = 8192;
3245 pbuffer32 = (uint32_t *)malloc(pbuffer32_size);
3246 if (pbuffer32 == NULL)
3247 {
3248 fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer32\n",
3249 SIZ_CAST pbuffer32_size);
3250 exit(1);
3251 }
3252 }
3253
3254 pp = pbuffer32;
3255
3256 if (!utf && (pat_patctl.control & CTL_UTF8_INPUT) == 0)
3257 {
3258 for (; len > 0; len--) *pp++ = *p++;
3259 }
3260
3261 else while (len > 0)
3262 {
3263 int chlen;
3264 uint32_t c;
3265 uint32_t topbit = 0;
3266 if (!utf && *p == 0xff && len > 1)
3267 {
3268 topbit = 0x80000000u;
3269 p++;
3270 len--;
3271 }
3272 chlen = utf82ord(p, &c);
3273 if (chlen <= 0) return -1;
3274 if (utf && c > 0x10ffff) return -2;
3275 p += chlen;
3276 len -= chlen;
3277 *pp++ = c | topbit;
3278 }
3279
3280 *pp = 0;
3281 *lenptr = pp - pbuffer32;
3282 return 0;
3283 }
3284 #endif /* SUPPORT_PCRE2_32 */
3285
3286
3287
3288 /* This function is no longer used. Keep it around for a while, just in case it
3289 needs to be re-instated. */
3290
3291 #ifdef NEVERNEVERNEVER
3292
3293 /*************************************************
3294 * Move back by so many characters *
3295 *************************************************/
3296
3297 /* Given a code unit offset in a subject string, move backwards by a number of
3298 characters, and return the resulting offset.
3299
3300 Arguments:
3301 subject pointer to the string
3302 offset start offset
3303 count count to move back by
3304 utf TRUE if in UTF mode
3305
3306 Returns: a possibly changed offset
3307 */
3308
3309 static PCRE2_SIZE
backchars(uint8_t * subject,PCRE2_SIZE offset,uint32_t count,BOOL utf)3310 backchars(uint8_t *subject, PCRE2_SIZE offset, uint32_t count, BOOL utf)
3311 {
3312 if (!utf || test_mode == PCRE32_MODE)
3313 return (count >= offset)? 0 : (offset - count);
3314
3315 else if (test_mode == PCRE8_MODE)
3316 {
3317 PCRE2_SPTR8 pp = (PCRE2_SPTR8)subject + offset;
3318 for (; count > 0 && pp > (PCRE2_SPTR8)subject; count--)
3319 {
3320 pp--;
3321 while ((*pp & 0xc0) == 0x80) pp--;
3322 }
3323 return pp - (PCRE2_SPTR8)subject;
3324 }
3325
3326 else /* 16-bit mode */
3327 {
3328 PCRE2_SPTR16 pp = (PCRE2_SPTR16)subject + offset;
3329 for (; count > 0 && pp > (PCRE2_SPTR16)subject; count--)
3330 {
3331 pp--;
3332 if ((*pp & 0xfc00) == 0xdc00) pp--;
3333 }
3334 return pp - (PCRE2_SPTR16)subject;
3335 }
3336 }
3337 #endif /* NEVERNEVERNEVER */
3338
3339
3340
3341 /*************************************************
3342 * Expand input buffers *
3343 *************************************************/
3344
3345 /* This function doubles the size of the input buffer and the buffer for
3346 keeping an 8-bit copy of patterns (pbuffer8), and copies the current buffers to
3347 the new ones.
3348
3349 Arguments: none
3350 Returns: nothing (aborts if malloc() fails)
3351 */
3352
3353 static void
expand_input_buffers(void)3354 expand_input_buffers(void)
3355 {
3356 int new_pbuffer8_size = 2*pbuffer8_size;
3357 uint8_t *new_buffer = (uint8_t *)malloc(new_pbuffer8_size);
3358 uint8_t *new_pbuffer8 = (uint8_t *)malloc(new_pbuffer8_size);
3359
3360 if (new_buffer == NULL || new_pbuffer8 == NULL)
3361 {
3362 fprintf(stderr, "pcre2test: malloc(%d) failed\n", new_pbuffer8_size);
3363 exit(1);
3364 }
3365
3366 memcpy(new_buffer, buffer, pbuffer8_size);
3367 memcpy(new_pbuffer8, pbuffer8, pbuffer8_size);
3368
3369 pbuffer8_size = new_pbuffer8_size;
3370
3371 free(buffer);
3372 free(pbuffer8);
3373
3374 buffer = new_buffer;
3375 pbuffer8 = new_pbuffer8;
3376 }
3377
3378
3379
3380 /*************************************************
3381 * Read or extend an input line *
3382 *************************************************/
3383
3384 /* Input lines are read into buffer, but both patterns and data lines can be
3385 continued over multiple input lines. In addition, if the buffer fills up, we
3386 want to automatically expand it so as to be able to handle extremely large
3387 lines that are needed for certain stress tests, although this is less likely
3388 now that there are repetition features for both patterns and data. When the
3389 input buffer is expanded, the other two buffers must also be expanded likewise,
3390 and the contents of pbuffer, which are a copy of the input for callouts, must
3391 be preserved (for when expansion happens for a data line). This is not the most
3392 optimal way of handling this, but hey, this is just a test program!
3393
3394 Arguments:
3395 f the file to read
3396 start where in buffer to start (this *must* be within buffer)
3397 prompt for stdin or readline()
3398
3399 Returns: pointer to the start of new data
3400 could be a copy of start, or could be moved
3401 NULL if no data read and EOF reached
3402 */
3403
3404 static uint8_t *
extend_inputline(FILE * f,uint8_t * start,const char * prompt)3405 extend_inputline(FILE *f, uint8_t *start, const char *prompt)
3406 {
3407 uint8_t *here = start;
3408
3409 for (;;)
3410 {
3411 size_t rlen = (size_t)(pbuffer8_size - (here - buffer));
3412
3413 if (rlen > 1000)
3414 {
3415 size_t dlen;
3416
3417 /* If libreadline or libedit support is required, use readline() to read a
3418 line if the input is a terminal. Note that readline() removes the trailing
3419 newline, so we must put it back again, to be compatible with fgets(). */
3420
3421 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
3422 if (INTERACTIVE(f))
3423 {
3424 size_t len;
3425 char *s = readline(prompt);
3426 if (s == NULL) return (here == start)? NULL : start;
3427 len = strlen(s);
3428 if (len > 0) add_history(s);
3429 if (len > rlen - 1) len = rlen - 1;
3430 memcpy(here, s, len);
3431 here[len] = '\n';
3432 here[len+1] = 0;
3433 free(s);
3434 }
3435 else
3436 #endif
3437
3438 /* Read the next line by normal means, prompting if the file is a tty. */
3439
3440 {
3441 if (INTERACTIVE(f)) printf("%s", prompt);
3442 if (fgets((char *)here, rlen, f) == NULL)
3443 return (here == start)? NULL : start;
3444 }
3445
3446 dlen = strlen((char *)here);
3447 here += dlen;
3448
3449 /* Check for end of line reached. Take care not to read data from before
3450 start (dlen will be zero for a file starting with a binary zero). */
3451
3452 if (here > start && here[-1] == '\n') return start;
3453
3454 /* If we have not read a newline when reading a file, we have either filled
3455 the buffer or reached the end of the file. We can detect the former by
3456 checking that the string fills the buffer, and the latter by feof(). If
3457 neither of these is true, it means we read a binary zero which has caused
3458 strlen() to give a short length. This is a hard error because pcre2test
3459 expects to work with C strings. */
3460
3461 if (!INTERACTIVE(f) && dlen < rlen - 1 && !feof(f))
3462 {
3463 fprintf(outfile, "** Binary zero encountered in input\n");
3464 fprintf(outfile, "** pcre2test run abandoned\n");
3465 exit(1);
3466 }
3467 }
3468
3469 else
3470 {
3471 size_t start_offset = start - buffer;
3472 size_t here_offset = here - buffer;
3473 expand_input_buffers();
3474 start = buffer + start_offset;
3475 here = buffer + here_offset;
3476 }
3477 }
3478
3479 /* Control never gets here */
3480 }
3481
3482
3483
3484 /*************************************************
3485 * Case-independent strncmp() function *
3486 *************************************************/
3487
3488 /*
3489 Arguments:
3490 s first string
3491 t second string
3492 n number of characters to compare
3493
3494 Returns: < 0, = 0, or > 0, according to the comparison
3495 */
3496
3497 static int
strncmpic(const uint8_t * s,const uint8_t * t,int n)3498 strncmpic(const uint8_t *s, const uint8_t *t, int n)
3499 {
3500 while (n--)
3501 {
3502 int c = tolower(*s++) - tolower(*t++);
3503 if (c != 0) return c;
3504 }
3505 return 0;
3506 }
3507
3508
3509
3510 /*************************************************
3511 * Scan the main modifier list *
3512 *************************************************/
3513
3514 /* This function searches the modifier list for a long modifier name.
3515
3516 Argument:
3517 p start of the name
3518 lenp length of the name
3519
3520 Returns: an index in the modifier list, or -1 on failure
3521 */
3522
3523 static int
scan_modifiers(const uint8_t * p,unsigned int len)3524 scan_modifiers(const uint8_t *p, unsigned int len)
3525 {
3526 int bot = 0;
3527 int top = MODLISTCOUNT;
3528
3529 while (top > bot)
3530 {
3531 int mid = (bot + top)/2;
3532 unsigned int mlen = strlen(modlist[mid].name);
3533 int c = strncmp((char *)p, modlist[mid].name, (len < mlen)? len : mlen);
3534 if (c == 0)
3535 {
3536 if (len == mlen) return mid;
3537 c = (int)len - (int)mlen;
3538 }
3539 if (c > 0) bot = mid + 1; else top = mid;
3540 }
3541
3542 return -1;
3543
3544 }
3545
3546
3547
3548 /*************************************************
3549 * Check a modifer and find its field *
3550 *************************************************/
3551
3552 /* This function is called when a modifier has been identified. We check that
3553 it is allowed here and find the field that is to be changed.
3554
3555 Arguments:
3556 m the modifier list entry
3557 ctx CTX_PAT => pattern context
3558 CTX_POPPAT => pattern context for popped pattern
3559 CTX_DEFPAT => default pattern context
3560 CTX_DAT => data context
3561 CTX_DEFDAT => default data context
3562 pctl point to pattern control block
3563 dctl point to data control block
3564 c a single character or 0
3565
3566 Returns: a field pointer or NULL
3567 */
3568
3569 static void *
check_modifier(modstruct * m,int ctx,patctl * pctl,datctl * dctl,uint32_t c)3570 check_modifier(modstruct *m, int ctx, patctl *pctl, datctl *dctl, uint32_t c)
3571 {
3572 void *field = NULL;
3573 PCRE2_SIZE offset = m->offset;
3574
3575 if (restrict_for_perl_test) switch(m->which)
3576 {
3577 case MOD_PNDP:
3578 case MOD_PATP:
3579 case MOD_PDP:
3580 break;
3581
3582 default:
3583 fprintf(outfile, "** '%s' is not allowed in a Perl-compatible test\n",
3584 m->name);
3585 return NULL;
3586 }
3587
3588 switch (m->which)
3589 {
3590 case MOD_CTC: /* Compile context modifier */
3591 if (ctx == CTX_DEFPAT) field = PTR(default_pat_context);
3592 else if (ctx == CTX_PAT) field = PTR(pat_context);
3593 break;
3594
3595 case MOD_CTM: /* Match context modifier */
3596 if (ctx == CTX_DEFDAT) field = PTR(default_dat_context);
3597 else if (ctx == CTX_DAT) field = PTR(dat_context);
3598 break;
3599
3600 case MOD_DAT: /* Data line modifier */
3601 if (dctl != NULL) field = dctl;
3602 break;
3603
3604 case MOD_PAT: /* Pattern modifier */
3605 case MOD_PATP: /* Allowed for Perl test */
3606 if (pctl != NULL) field = pctl;
3607 break;
3608
3609 case MOD_PD: /* Pattern or data line modifier */
3610 case MOD_PDP: /* Ditto, allowed for Perl test */
3611 case MOD_PND: /* Ditto, but not default pattern */
3612 case MOD_PNDP: /* Ditto, allowed for Perl test */
3613 if (dctl != NULL) field = dctl;
3614 else if (pctl != NULL && (m->which == MOD_PD || m->which == MOD_PDP ||
3615 ctx != CTX_DEFPAT))
3616 field = pctl;
3617 break;
3618 }
3619
3620 if (field == NULL)
3621 {
3622 if (c == 0)
3623 fprintf(outfile, "** '%s' is not valid here\n", m->name);
3624 else
3625 fprintf(outfile, "** /%c is not valid here\n", c);
3626 return NULL;
3627 }
3628
3629 return (char *)field + offset;
3630 }
3631
3632
3633
3634 /*************************************************
3635 * Decode a modifier list *
3636 *************************************************/
3637
3638 /* A pointer to a control block is NULL when called in cases when that block is
3639 not relevant. They are never all relevant in one call. At least one of patctl
3640 and datctl is NULL. The second argument specifies which context to use for
3641 modifiers that apply to contexts.
3642
3643 Arguments:
3644 p point to modifier string
3645 ctx CTX_PAT => pattern context
3646 CTX_POPPAT => pattern context for popped pattern
3647 CTX_DEFPAT => default pattern context
3648 CTX_DAT => data context
3649 CTX_DEFDAT => default data context
3650 pctl point to pattern control block
3651 dctl point to data control block
3652
3653 Returns: TRUE if successful decode, FALSE otherwise
3654 */
3655
3656 static BOOL
decode_modifiers(uint8_t * p,int ctx,patctl * pctl,datctl * dctl)3657 decode_modifiers(uint8_t *p, int ctx, patctl *pctl, datctl *dctl)
3658 {
3659 uint8_t *ep, *pp;
3660 long li;
3661 unsigned long uli;
3662 BOOL first = TRUE;
3663
3664 for (;;)
3665 {
3666 void *field;
3667 modstruct *m;
3668 BOOL off = FALSE;
3669 unsigned int i, len;
3670 int index;
3671 char *endptr;
3672
3673 /* Skip white space and commas. */
3674
3675 while (isspace(*p) || *p == ',') p++;
3676 if (*p == 0) break;
3677
3678 /* Find the end of the item; lose trailing whitespace at end of line. */
3679
3680 for (ep = p; *ep != 0 && *ep != ','; ep++);
3681 if (*ep == 0)
3682 {
3683 while (ep > p && isspace(ep[-1])) ep--;
3684 *ep = 0;
3685 }
3686
3687 /* Remember if the first character is '-'. */
3688
3689 if (*p == '-')
3690 {
3691 off = TRUE;
3692 p++;
3693 }
3694
3695 /* Find the length of a full-length modifier name, and scan for it. */
3696
3697 pp = p;
3698 while (pp < ep && *pp != '=') pp++;
3699 index = scan_modifiers(p, pp - p);
3700
3701 /* If the first modifier is unrecognized, try to interpret it as a sequence
3702 of single-character abbreviated modifiers. None of these modifiers have any
3703 associated data. They just set options or control bits. */
3704
3705 if (index < 0)
3706 {
3707 uint32_t cc;
3708 uint8_t *mp = p;
3709
3710 if (!first)
3711 {
3712 fprintf(outfile, "** Unrecognized modifier '%.*s'\n", (int)(ep-p), p);
3713 if (ep - p == 1)
3714 fprintf(outfile, "** Single-character modifiers must come first\n");
3715 return FALSE;
3716 }
3717
3718 for (cc = *p; cc != ',' && cc != '\n' && cc != 0; cc = *(++p))
3719 {
3720 for (i = 0; i < C1MODLISTCOUNT; i++)
3721 if (cc == c1modlist[i].onechar) break;
3722
3723 if (i >= C1MODLISTCOUNT)
3724 {
3725 fprintf(outfile, "** Unrecognized modifier '%c' in '%.*s'\n",
3726 *p, (int)(ep-mp), mp);
3727 return FALSE;
3728 }
3729
3730 if (c1modlist[i].index >= 0)
3731 {
3732 index = c1modlist[i].index;
3733 }
3734
3735 else
3736 {
3737 index = scan_modifiers((uint8_t *)(c1modlist[i].fullname),
3738 strlen(c1modlist[i].fullname));
3739 if (index < 0)
3740 {
3741 fprintf(outfile, "** Internal error: single-character equivalent "
3742 "modifier '%s' not found\n", c1modlist[i].fullname);
3743 return FALSE;
3744 }
3745 c1modlist[i].index = index; /* Cache for next time */
3746 }
3747
3748 field = check_modifier(modlist + index, ctx, pctl, dctl, *p);
3749 if (field == NULL) return FALSE;
3750
3751 /* /x is a special case; a second appearance changes PCRE2_EXTENDED to
3752 PCRE2_EXTENDED_MORE. */
3753
3754 if (cc == 'x' && (*((uint32_t *)field) & PCRE2_EXTENDED) != 0)
3755 {
3756 *((uint32_t *)field) &= ~PCRE2_EXTENDED;
3757 *((uint32_t *)field) |= PCRE2_EXTENDED_MORE;
3758 }
3759 else
3760 *((uint32_t *)field) |= modlist[index].value;
3761 }
3762
3763 continue; /* With tne next (fullname) modifier */
3764 }
3765
3766 /* We have a match on a full-name modifier. Check for the existence of data
3767 when needed. */
3768
3769 m = modlist + index; /* Save typing */
3770 if (m->type != MOD_CTL && m->type != MOD_OPT &&
3771 (m->type != MOD_IND || *pp == '='))
3772 {
3773 if (*pp++ != '=')
3774 {
3775 fprintf(outfile, "** '=' expected after '%s'\n", m->name);
3776 return FALSE;
3777 }
3778 if (off)
3779 {
3780 fprintf(outfile, "** '-' is not valid for '%s'\n", m->name);
3781 return FALSE;
3782 }
3783 }
3784
3785 /* These on/off types have no data. */
3786
3787 else if (*pp != ',' && *pp != '\n' && *pp != ' ' && *pp != 0)
3788 {
3789 fprintf(outfile, "** Unrecognized modifier '%.*s'\n", (int)(ep-p), p);
3790 return FALSE;
3791 }
3792
3793 /* Set the data length for those types that have data. Then find the field
3794 that is to be set. If check_modifier() returns NULL, it has already output an
3795 error message. */
3796
3797 len = ep - pp;
3798 field = check_modifier(m, ctx, pctl, dctl, 0);
3799 if (field == NULL) return FALSE;
3800
3801 /* Process according to data type. */
3802
3803 switch (m->type)
3804 {
3805 case MOD_CTL:
3806 case MOD_OPT:
3807 if (off) *((uint32_t *)field) &= ~m->value;
3808 else *((uint32_t *)field) |= m->value;
3809 break;
3810
3811 case MOD_BSR:
3812 if (len == 7 && strncmpic(pp, (const uint8_t *)"default", 7) == 0)
3813 {
3814 #ifdef BSR_ANYCRLF
3815 *((uint16_t *)field) = PCRE2_BSR_ANYCRLF;
3816 #else
3817 *((uint16_t *)field) = PCRE2_BSR_UNICODE;
3818 #endif
3819 if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 &= ~CTL2_BSR_SET;
3820 else dctl->control2 &= ~CTL2_BSR_SET;
3821 }
3822 else
3823 {
3824 if (len == 7 && strncmpic(pp, (const uint8_t *)"anycrlf", 7) == 0)
3825 *((uint16_t *)field) = PCRE2_BSR_ANYCRLF;
3826 else if (len == 7 && strncmpic(pp, (const uint8_t *)"unicode", 7) == 0)
3827 *((uint16_t *)field) = PCRE2_BSR_UNICODE;
3828 else goto INVALID_VALUE;
3829 if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 |= CTL2_BSR_SET;
3830 else dctl->control2 |= CTL2_BSR_SET;
3831 }
3832 pp = ep;
3833 break;
3834
3835 case MOD_CHR: /* A single character */
3836 *((uint32_t *)field) = *pp++;
3837 break;
3838
3839 case MOD_CON: /* A convert type/options list */
3840 for (;; pp++)
3841 {
3842 uint8_t *colon = (uint8_t *)strchr((const char *)pp, ':');
3843 len = ((colon != NULL && colon < ep)? colon:ep) - pp;
3844 for (i = 0; i < convertlistcount; i++)
3845 {
3846 if (strncmpic(pp, (const uint8_t *)convertlist[i].name, len) == 0)
3847 {
3848 if (*((uint32_t *)field) == CONVERT_UNSET)
3849 *((uint32_t *)field) = convertlist[i].option;
3850 else
3851 *((uint32_t *)field) |= convertlist[i].option;
3852 break;
3853 }
3854 }
3855 if (i >= convertlistcount) goto INVALID_VALUE;
3856 pp += len;
3857 if (*pp != ':') break;
3858 }
3859 break;
3860
3861 case MOD_IN2: /* One or two unsigned integers */
3862 if (!isdigit(*pp)) goto INVALID_VALUE;
3863 uli = strtoul((const char *)pp, &endptr, 10);
3864 if (U32OVERFLOW(uli)) goto INVALID_VALUE;
3865 ((uint32_t *)field)[0] = (uint32_t)uli;
3866 if (*endptr == ':')
3867 {
3868 uli = strtoul((const char *)endptr+1, &endptr, 10);
3869 if (U32OVERFLOW(uli)) goto INVALID_VALUE;
3870 ((uint32_t *)field)[1] = (uint32_t)uli;
3871 }
3872 else ((uint32_t *)field)[1] = 0;
3873 pp = (uint8_t *)endptr;
3874 break;
3875
3876 /* PCRE2_SIZE_MAX is usually SIZE_MAX, which may be greater, equal to, or
3877 less than ULONG_MAX. So first test for overflowing the long int, and then
3878 test for overflowing PCRE2_SIZE_MAX if it is smaller than ULONG_MAX. */
3879
3880 case MOD_SIZ: /* PCRE2_SIZE value */
3881 if (!isdigit(*pp)) goto INVALID_VALUE;
3882 uli = strtoul((const char *)pp, &endptr, 10);
3883 if (uli == ULONG_MAX) goto INVALID_VALUE;
3884 #if ULONG_MAX > PCRE2_SIZE_MAX
3885 if (uli > PCRE2_SIZE_MAX) goto INVALID_VALUE;
3886 #endif
3887 *((PCRE2_SIZE *)field) = (PCRE2_SIZE)uli;
3888 pp = (uint8_t *)endptr;
3889 break;
3890
3891 case MOD_IND: /* Unsigned integer with default */
3892 if (len == 0)
3893 {
3894 *((uint32_t *)field) = (uint32_t)(m->value);
3895 break;
3896 }
3897 /* Fall through */
3898
3899 case MOD_INT: /* Unsigned integer */
3900 if (!isdigit(*pp)) goto INVALID_VALUE;
3901 uli = strtoul((const char *)pp, &endptr, 10);
3902 if (U32OVERFLOW(uli)) goto INVALID_VALUE;
3903 *((uint32_t *)field) = (uint32_t)uli;
3904 pp = (uint8_t *)endptr;
3905 break;
3906
3907 case MOD_INS: /* Signed integer */
3908 if (!isdigit(*pp) && *pp != '-') goto INVALID_VALUE;
3909 li = strtol((const char *)pp, &endptr, 10);
3910 if (S32OVERFLOW(li)) goto INVALID_VALUE;
3911 *((int32_t *)field) = (int32_t)li;
3912 pp = (uint8_t *)endptr;
3913 break;
3914
3915 case MOD_NL:
3916 for (i = 0; i < sizeof(newlines)/sizeof(char *); i++)
3917 if (len == strlen(newlines[i]) &&
3918 strncmpic(pp, (const uint8_t *)newlines[i], len) == 0) break;
3919 if (i >= sizeof(newlines)/sizeof(char *)) goto INVALID_VALUE;
3920 if (i == 0)
3921 {
3922 *((uint16_t *)field) = NEWLINE_DEFAULT;
3923 if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 &= ~CTL2_NL_SET;
3924 else dctl->control2 &= ~CTL2_NL_SET;
3925 }
3926 else
3927 {
3928 *((uint16_t *)field) = i;
3929 if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 |= CTL2_NL_SET;
3930 else dctl->control2 |= CTL2_NL_SET;
3931 }
3932 pp = ep;
3933 break;
3934
3935 case MOD_NN: /* Name or (signed) number; may be several */
3936 if (isdigit(*pp) || *pp == '-')
3937 {
3938 int ct = MAXCPYGET - 1;
3939 int32_t value;
3940 li = strtol((const char *)pp, &endptr, 10);
3941 if (S32OVERFLOW(li)) goto INVALID_VALUE;
3942 value = (int32_t)li;
3943 field = (char *)field - m->offset + m->value; /* Adjust field ptr */
3944 if (value >= 0) /* Add new number */
3945 {
3946 while (*((int32_t *)field) >= 0 && ct-- > 0) /* Skip previous */
3947 field = (char *)field + sizeof(int32_t);
3948 if (ct <= 0)
3949 {
3950 fprintf(outfile, "** Too many numeric '%s' modifiers\n", m->name);
3951 return FALSE;
3952 }
3953 }
3954 *((int32_t *)field) = value;
3955 if (ct > 0) ((int32_t *)field)[1] = -1;
3956 pp = (uint8_t *)endptr;
3957 }
3958
3959 /* Multiple strings are put end to end. */
3960
3961 else
3962 {
3963 char *nn = (char *)field;
3964 if (len > 0) /* Add new name */
3965 {
3966 if (len > MAX_NAME_SIZE)
3967 {
3968 fprintf(outfile, "** Group name in '%s' is too long\n", m->name);
3969 return FALSE;
3970 }
3971 while (*nn != 0) nn += strlen(nn) + 1;
3972 if (nn + len + 2 - (char *)field > LENCPYGET)
3973 {
3974 fprintf(outfile, "** Too many characters in named '%s' modifiers\n",
3975 m->name);
3976 return FALSE;
3977 }
3978 memcpy(nn, pp, len);
3979 }
3980 nn[len] = 0 ;
3981 nn[len+1] = 0;
3982 pp = ep;
3983 }
3984 break;
3985
3986 case MOD_STR:
3987 if (len + 1 > m->value)
3988 {
3989 fprintf(outfile, "** Overlong value for '%s' (max %d code units)\n",
3990 m->name, m->value - 1);
3991 return FALSE;
3992 }
3993 memcpy(field, pp, len);
3994 ((uint8_t *)field)[len] = 0;
3995 pp = ep;
3996 break;
3997 }
3998
3999 if (*pp != ',' && *pp != '\n' && *pp != ' ' && *pp != 0)
4000 {
4001 fprintf(outfile, "** Comma expected after modifier item '%s'\n", m->name);
4002 return FALSE;
4003 }
4004
4005 p = pp;
4006 first = FALSE;
4007
4008 if (ctx == CTX_POPPAT &&
4009 (pctl->options != 0 ||
4010 pctl->tables_id != 0 ||
4011 pctl->locale[0] != 0 ||
4012 (pctl->control & NOTPOP_CONTROLS) != 0))
4013 {
4014 fprintf(outfile, "** '%s' is not valid here\n", m->name);
4015 return FALSE;
4016 }
4017 }
4018
4019 return TRUE;
4020
4021 INVALID_VALUE:
4022 fprintf(outfile, "** Invalid value in '%.*s'\n", (int)(ep-p), p);
4023 return FALSE;
4024 }
4025
4026
4027 /*************************************************
4028 * Get info from a pattern *
4029 *************************************************/
4030
4031 /* A wrapped call to pcre2_pattern_info(), applied to the current compiled
4032 pattern.
4033
4034 Arguments:
4035 what code for the required information
4036 where where to put the answer
4037 unsetok PCRE2_ERROR_UNSET is an "expected" result
4038
4039 Returns: the return from pcre2_pattern_info()
4040 */
4041
4042 static int
pattern_info(int what,void * where,BOOL unsetok)4043 pattern_info(int what, void *where, BOOL unsetok)
4044 {
4045 int rc;
4046 PCRE2_PATTERN_INFO(rc, compiled_code, what, NULL); /* Exercise the code */
4047 PCRE2_PATTERN_INFO(rc, compiled_code, what, where);
4048 if (rc >= 0) return 0;
4049 if (rc != PCRE2_ERROR_UNSET || !unsetok)
4050 {
4051 fprintf(outfile, "Error %d from pcre2_pattern_info_%d(%d)\n", rc, test_mode,
4052 what);
4053 if (rc == PCRE2_ERROR_BADMODE)
4054 fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
4055 "%d-bit mode\n", test_mode,
4056 8 * (FLD(compiled_code, flags) & PCRE2_MODE_MASK));
4057 }
4058 return rc;
4059 }
4060
4061
4062
4063 #ifdef SUPPORT_PCRE2_8
4064 /*************************************************
4065 * Show something in a list *
4066 *************************************************/
4067
4068 /* This function just helps to keep the code that uses it tidier. It's used for
4069 various lists of things where there needs to be introductory text before the
4070 first item. As these calls are all in the POSIX-support code, they happen only
4071 when 8-bit mode is supported. */
4072
4073 static void
prmsg(const char ** msg,const char * s)4074 prmsg(const char **msg, const char *s)
4075 {
4076 fprintf(outfile, "%s %s", *msg, s);
4077 *msg = "";
4078 }
4079 #endif /* SUPPORT_PCRE2_8 */
4080
4081
4082
4083 /*************************************************
4084 * Show control bits *
4085 *************************************************/
4086
4087 /* Called for mutually exclusive controls and for unsupported POSIX controls.
4088 Because the bits are unique, this can be used for both pattern and data control
4089 words.
4090
4091 Arguments:
4092 controls control bits
4093 controls2 more control bits
4094 before text to print before
4095
4096 Returns: nothing
4097 */
4098
4099 static void
show_controls(uint32_t controls,uint32_t controls2,const char * before)4100 show_controls(uint32_t controls, uint32_t controls2, const char *before)
4101 {
4102 fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
4103 before,
4104 ((controls & CTL_AFTERTEXT) != 0)? " aftertext" : "",
4105 ((controls & CTL_ALLAFTERTEXT) != 0)? " allaftertext" : "",
4106 ((controls & CTL_ALLCAPTURES) != 0)? " allcaptures" : "",
4107 ((controls & CTL_ALLUSEDTEXT) != 0)? " allusedtext" : "",
4108 ((controls2 & CTL2_ALLVECTOR) != 0)? " allvector" : "",
4109 ((controls & CTL_ALTGLOBAL) != 0)? " altglobal" : "",
4110 ((controls & CTL_BINCODE) != 0)? " bincode" : "",
4111 ((controls2 & CTL2_BSR_SET) != 0)? " bsr" : "",
4112 ((controls & CTL_CALLOUT_CAPTURE) != 0)? " callout_capture" : "",
4113 ((controls2 & CTL2_CALLOUT_EXTRA) != 0)? " callout_extra" : "",
4114 ((controls & CTL_CALLOUT_INFO) != 0)? " callout_info" : "",
4115 ((controls & CTL_CALLOUT_NONE) != 0)? " callout_none" : "",
4116 ((controls2 & CTL2_CALLOUT_NO_WHERE) != 0)? " callout_no_where" : "",
4117 ((controls & CTL_DFA) != 0)? " dfa" : "",
4118 ((controls & CTL_EXPAND) != 0)? " expand" : "",
4119 ((controls & CTL_FINDLIMITS) != 0)? " find_limits" : "",
4120 ((controls & CTL_FRAMESIZE) != 0)? " framesize" : "",
4121 ((controls & CTL_FULLBINCODE) != 0)? " fullbincode" : "",
4122 ((controls & CTL_GETALL) != 0)? " getall" : "",
4123 ((controls & CTL_GLOBAL) != 0)? " global" : "",
4124 ((controls & CTL_HEXPAT) != 0)? " hex" : "",
4125 ((controls & CTL_INFO) != 0)? " info" : "",
4126 ((controls & CTL_JITFAST) != 0)? " jitfast" : "",
4127 ((controls & CTL_JITVERIFY) != 0)? " jitverify" : "",
4128 ((controls & CTL_MARK) != 0)? " mark" : "",
4129 ((controls & CTL_MEMORY) != 0)? " memory" : "",
4130 ((controls2 & CTL2_NL_SET) != 0)? " newline" : "",
4131 ((controls & CTL_NULLCONTEXT) != 0)? " null_context" : "",
4132 ((controls & CTL_POSIX) != 0)? " posix" : "",
4133 ((controls & CTL_POSIX_NOSUB) != 0)? " posix_nosub" : "",
4134 ((controls & CTL_PUSH) != 0)? " push" : "",
4135 ((controls & CTL_PUSHCOPY) != 0)? " pushcopy" : "",
4136 ((controls & CTL_PUSHTABLESCOPY) != 0)? " pushtablescopy" : "",
4137 ((controls & CTL_STARTCHAR) != 0)? " startchar" : "",
4138 ((controls2 & CTL2_SUBSTITUTE_CALLOUT) != 0)? " substitute_callout" : "",
4139 ((controls2 & CTL2_SUBSTITUTE_EXTENDED) != 0)? " substitute_extended" : "",
4140 ((controls2 & CTL2_SUBSTITUTE_LITERAL) != 0)? " substitute_literal" : "",
4141 ((controls2 & CTL2_SUBSTITUTE_MATCHED) != 0)? " substitute_matched" : "",
4142 ((controls2 & CTL2_SUBSTITUTE_OVERFLOW_LENGTH) != 0)? " substitute_overflow_length" : "",
4143 ((controls2 & CTL2_SUBSTITUTE_REPLACEMENT_ONLY) != 0)? " substitute_replacement_only" : "",
4144 ((controls2 & CTL2_SUBSTITUTE_UNKNOWN_UNSET) != 0)? " substitute_unknown_unset" : "",
4145 ((controls2 & CTL2_SUBSTITUTE_UNSET_EMPTY) != 0)? " substitute_unset_empty" : "",
4146 ((controls & CTL_USE_LENGTH) != 0)? " use_length" : "",
4147 ((controls & CTL_UTF8_INPUT) != 0)? " utf8_input" : "",
4148 ((controls & CTL_ZERO_TERMINATE) != 0)? " zero_terminate" : "");
4149 }
4150
4151
4152
4153 /*************************************************
4154 * Show compile options *
4155 *************************************************/
4156
4157 /* Called from show_pattern_info() and for unsupported POSIX options.
4158
4159 Arguments:
4160 options an options word
4161 before text to print before
4162 after text to print after
4163
4164 Returns: nothing
4165 */
4166
4167 static void
show_compile_options(uint32_t options,const char * before,const char * after)4168 show_compile_options(uint32_t options, const char *before, const char *after)
4169 {
4170 if (options == 0) fprintf(outfile, "%s <none>%s", before, after);
4171 else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
4172 before,
4173 ((options & PCRE2_ALT_BSUX) != 0)? " alt_bsux" : "",
4174 ((options & PCRE2_ALT_CIRCUMFLEX) != 0)? " alt_circumflex" : "",
4175 ((options & PCRE2_ALT_VERBNAMES) != 0)? " alt_verbnames" : "",
4176 ((options & PCRE2_ALLOW_EMPTY_CLASS) != 0)? " allow_empty_class" : "",
4177 ((options & PCRE2_ANCHORED) != 0)? " anchored" : "",
4178 ((options & PCRE2_AUTO_CALLOUT) != 0)? " auto_callout" : "",
4179 ((options & PCRE2_CASELESS) != 0)? " caseless" : "",
4180 ((options & PCRE2_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
4181 ((options & PCRE2_DOTALL) != 0)? " dotall" : "",
4182 ((options & PCRE2_DUPNAMES) != 0)? " dupnames" : "",
4183 ((options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "",
4184 ((options & PCRE2_EXTENDED) != 0)? " extended" : "",
4185 ((options & PCRE2_EXTENDED_MORE) != 0)? " extended_more" : "",
4186 ((options & PCRE2_FIRSTLINE) != 0)? " firstline" : "",
4187 ((options & PCRE2_LITERAL) != 0)? " literal" : "",
4188 ((options & PCRE2_MATCH_INVALID_UTF) != 0)? " match_invalid_utf" : "",
4189 ((options & PCRE2_MATCH_UNSET_BACKREF) != 0)? " match_unset_backref" : "",
4190 ((options & PCRE2_MULTILINE) != 0)? " multiline" : "",
4191 ((options & PCRE2_NEVER_BACKSLASH_C) != 0)? " never_backslash_c" : "",
4192 ((options & PCRE2_NEVER_UCP) != 0)? " never_ucp" : "",
4193 ((options & PCRE2_NEVER_UTF) != 0)? " never_utf" : "",
4194 ((options & PCRE2_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
4195 ((options & PCRE2_NO_AUTO_POSSESS) != 0)? " no_auto_possess" : "",
4196 ((options & PCRE2_NO_DOTSTAR_ANCHOR) != 0)? " no_dotstar_anchor" : "",
4197 ((options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
4198 ((options & PCRE2_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
4199 ((options & PCRE2_UCP) != 0)? " ucp" : "",
4200 ((options & PCRE2_UNGREEDY) != 0)? " ungreedy" : "",
4201 ((options & PCRE2_USE_OFFSET_LIMIT) != 0)? " use_offset_limit" : "",
4202 ((options & PCRE2_UTF) != 0)? " utf" : "",
4203 after);
4204 }
4205
4206
4207 /*************************************************
4208 * Show compile extra options *
4209 *************************************************/
4210
4211 /* Called from show_pattern_info() and for unsupported POSIX options.
4212
4213 Arguments:
4214 options an options word
4215 before text to print before
4216 after text to print after
4217
4218 Returns: nothing
4219 */
4220
4221 static void
show_compile_extra_options(uint32_t options,const char * before,const char * after)4222 show_compile_extra_options(uint32_t options, const char *before,
4223 const char *after)
4224 {
4225 if (options == 0) fprintf(outfile, "%s <none>%s", before, after);
4226 else fprintf(outfile, "%s%s%s%s%s%s%s%s",
4227 before,
4228 ((options & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) != 0)? " allow_surrogate_escapes" : "",
4229 ((options & PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL) != 0)? " bad_escape_is_literal" : "",
4230 ((options & PCRE2_EXTRA_ALT_BSUX) != 0)? " extra_alt_bsux" : "",
4231 ((options & PCRE2_EXTRA_MATCH_WORD) != 0)? " match_word" : "",
4232 ((options & PCRE2_EXTRA_MATCH_LINE) != 0)? " match_line" : "",
4233 ((options & PCRE2_EXTRA_ESCAPED_CR_IS_LF) != 0)? " escaped_cr_is_lf" : "",
4234 after);
4235 }
4236
4237
4238
4239 #ifdef SUPPORT_PCRE2_8
4240 /*************************************************
4241 * Show match options *
4242 *************************************************/
4243
4244 /* Called for unsupported POSIX options. */
4245
4246 static void
show_match_options(uint32_t options)4247 show_match_options(uint32_t options)
4248 {
4249 fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s",
4250 ((options & PCRE2_ANCHORED) != 0)? " anchored" : "",
4251 ((options & PCRE2_COPY_MATCHED_SUBJECT) != 0)? " copy_matched_subject" : "",
4252 ((options & PCRE2_DFA_RESTART) != 0)? " dfa_restart" : "",
4253 ((options & PCRE2_DFA_SHORTEST) != 0)? " dfa_shortest" : "",
4254 ((options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "",
4255 ((options & PCRE2_NO_JIT) != 0)? " no_jit" : "",
4256 ((options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
4257 ((options & PCRE2_NOTBOL) != 0)? " notbol" : "",
4258 ((options & PCRE2_NOTEMPTY) != 0)? " notempty" : "",
4259 ((options & PCRE2_NOTEMPTY_ATSTART) != 0)? " notempty_atstart" : "",
4260 ((options & PCRE2_NOTEOL) != 0)? " noteol" : "",
4261 ((options & PCRE2_PARTIAL_HARD) != 0)? " partial_hard" : "",
4262 ((options & PCRE2_PARTIAL_SOFT) != 0)? " partial_soft" : "");
4263 }
4264 #endif /* SUPPORT_PCRE2_8 */
4265
4266
4267
4268 /*************************************************
4269 * Show memory usage info for a pattern *
4270 *************************************************/
4271
4272 static void
show_memory_info(void)4273 show_memory_info(void)
4274 {
4275 uint32_t name_count, name_entry_size;
4276 size_t size, cblock_size;
4277
4278 /* One of the test_mode values will always be true, but to stop a compiler
4279 warning we must initialize cblock_size. */
4280
4281 cblock_size = 0;
4282 #ifdef SUPPORT_PCRE2_8
4283 if (test_mode == PCRE8_MODE) cblock_size = sizeof(pcre2_real_code_8);
4284 #endif
4285 #ifdef SUPPORT_PCRE2_16
4286 if (test_mode == PCRE16_MODE) cblock_size = sizeof(pcre2_real_code_16);
4287 #endif
4288 #ifdef SUPPORT_PCRE2_32
4289 if (test_mode == PCRE32_MODE) cblock_size = sizeof(pcre2_real_code_32);
4290 #endif
4291
4292 (void)pattern_info(PCRE2_INFO_SIZE, &size, FALSE);
4293 (void)pattern_info(PCRE2_INFO_NAMECOUNT, &name_count, FALSE);
4294 (void)pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &name_entry_size, FALSE);
4295 fprintf(outfile, "Memory allocation (code space): %d\n",
4296 (int)(size - name_count*name_entry_size*code_unit_size - cblock_size));
4297 if (pat_patctl.jit != 0)
4298 {
4299 (void)pattern_info(PCRE2_INFO_JITSIZE, &size, FALSE);
4300 fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)size);
4301 }
4302 }
4303
4304
4305
4306 /*************************************************
4307 * Show frame size info for a pattern *
4308 *************************************************/
4309
4310 static void
show_framesize(void)4311 show_framesize(void)
4312 {
4313 size_t frame_size;
4314 (void)pattern_info(PCRE2_INFO_FRAMESIZE, &frame_size, FALSE);
4315 fprintf(outfile, "Frame size for pcre2_match(): %d\n", (int)frame_size);
4316 }
4317
4318
4319
4320 /*************************************************
4321 * Get and output an error message *
4322 *************************************************/
4323
4324 static BOOL
print_error_message(int errorcode,const char * before,const char * after)4325 print_error_message(int errorcode, const char *before, const char *after)
4326 {
4327 int len;
4328 PCRE2_GET_ERROR_MESSAGE(len, errorcode, pbuffer);
4329 if (len < 0)
4330 {
4331 fprintf(outfile, "\n** pcre2test internal error: cannot interpret error "
4332 "number\n** Unexpected return (%d) from pcre2_get_error_message()\n", len);
4333 }
4334 else
4335 {
4336 fprintf(outfile, "%s", before);
4337 PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, outfile);
4338 fprintf(outfile, "%s", after);
4339 }
4340 return len >= 0;
4341 }
4342
4343
4344 /*************************************************
4345 * Callback function for callout enumeration *
4346 *************************************************/
4347
4348 /* The only differences in the callout emumeration block for different code
4349 unit widths are that the pointers to the subject, the most recent MARK, and a
4350 callout argument string point to strings of the appropriate width. Casts can be
4351 used to deal with this.
4352
4353 Argument:
4354 cb pointer to enumerate block
4355 callout_data user data
4356
4357 Returns: 0
4358 */
4359
callout_callback(pcre2_callout_enumerate_block_8 * cb,void * callout_data)4360 static int callout_callback(pcre2_callout_enumerate_block_8 *cb,
4361 void *callout_data)
4362 {
4363 uint32_t i;
4364 BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
4365
4366 (void)callout_data; /* Not currently displayed */
4367
4368 fprintf(outfile, "Callout ");
4369 if (cb->callout_string != NULL)
4370 {
4371 uint32_t delimiter = CODE_UNIT(cb->callout_string, -1);
4372 fprintf(outfile, "%c", delimiter);
4373 PCHARSV(cb->callout_string, 0,
4374 cb->callout_string_length, utf, outfile);
4375 for (i = 0; callout_start_delims[i] != 0; i++)
4376 if (delimiter == callout_start_delims[i])
4377 {
4378 delimiter = callout_end_delims[i];
4379 break;
4380 }
4381 fprintf(outfile, "%c ", delimiter);
4382 }
4383 else fprintf(outfile, "%d ", cb->callout_number);
4384
4385 fprintf(outfile, "%.*s\n",
4386 (int)((cb->next_item_length == 0)? 1 : cb->next_item_length),
4387 pbuffer8 + cb->pattern_position);
4388
4389 return 0;
4390 }
4391
4392
4393
4394 /*************************************************
4395 * Show information about a pattern *
4396 *************************************************/
4397
4398 /* This function is called after a pattern has been compiled if any of the
4399 information-requesting controls have been set.
4400
4401 Arguments: none
4402
4403 Returns: PR_OK continue processing next line
4404 PR_SKIP skip to a blank line
4405 PR_ABEND abort the pcre2test run
4406 */
4407
4408 static int
show_pattern_info(void)4409 show_pattern_info(void)
4410 {
4411 uint32_t compile_options, overall_options, extra_options;
4412 BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
4413
4414 if ((pat_patctl.control & (CTL_BINCODE|CTL_FULLBINCODE)) != 0)
4415 {
4416 fprintf(outfile, "------------------------------------------------------------------\n");
4417 PCRE2_PRINTINT((pat_patctl.control & CTL_FULLBINCODE) != 0);
4418 }
4419
4420 if ((pat_patctl.control & CTL_INFO) != 0)
4421 {
4422 int rc;
4423 void *nametable;
4424 uint8_t *start_bits;
4425 BOOL heap_limit_set, match_limit_set, depth_limit_set;
4426 uint32_t backrefmax, bsr_convention, capture_count, first_ctype, first_cunit,
4427 hasbackslashc, hascrorlf, jchanged, last_ctype, last_cunit, match_empty,
4428 depth_limit, heap_limit, match_limit, minlength, nameentrysize, namecount,
4429 newline_convention;
4430
4431 /* Exercise the error route. */
4432
4433 PCRE2_PATTERN_INFO(rc, compiled_code, 999, NULL);
4434 (void)rc;
4435
4436 /* These info requests may return PCRE2_ERROR_UNSET. */
4437
4438 switch(pattern_info(PCRE2_INFO_HEAPLIMIT, &heap_limit, TRUE))
4439 {
4440 case 0:
4441 heap_limit_set = TRUE;
4442 break;
4443
4444 case PCRE2_ERROR_UNSET:
4445 heap_limit_set = FALSE;
4446 break;
4447
4448 default:
4449 return PR_ABEND;
4450 }
4451
4452 switch(pattern_info(PCRE2_INFO_MATCHLIMIT, &match_limit, TRUE))
4453 {
4454 case 0:
4455 match_limit_set = TRUE;
4456 break;
4457
4458 case PCRE2_ERROR_UNSET:
4459 match_limit_set = FALSE;
4460 break;
4461
4462 default:
4463 return PR_ABEND;
4464 }
4465
4466 switch(pattern_info(PCRE2_INFO_DEPTHLIMIT, &depth_limit, TRUE))
4467 {
4468 case 0:
4469 depth_limit_set = TRUE;
4470 break;
4471
4472 case PCRE2_ERROR_UNSET:
4473 depth_limit_set = FALSE;
4474 break;
4475
4476 default:
4477 return PR_ABEND;
4478 }
4479
4480 /* These info requests should always succeed. */
4481
4482 if (pattern_info(PCRE2_INFO_BACKREFMAX, &backrefmax, FALSE) +
4483 pattern_info(PCRE2_INFO_BSR, &bsr_convention, FALSE) +
4484 pattern_info(PCRE2_INFO_CAPTURECOUNT, &capture_count, FALSE) +
4485 pattern_info(PCRE2_INFO_FIRSTBITMAP, &start_bits, FALSE) +
4486 pattern_info(PCRE2_INFO_FIRSTCODEUNIT, &first_cunit, FALSE) +
4487 pattern_info(PCRE2_INFO_FIRSTCODETYPE, &first_ctype, FALSE) +
4488 pattern_info(PCRE2_INFO_HASBACKSLASHC, &hasbackslashc, FALSE) +
4489 pattern_info(PCRE2_INFO_HASCRORLF, &hascrorlf, FALSE) +
4490 pattern_info(PCRE2_INFO_JCHANGED, &jchanged, FALSE) +
4491 pattern_info(PCRE2_INFO_LASTCODEUNIT, &last_cunit, FALSE) +
4492 pattern_info(PCRE2_INFO_LASTCODETYPE, &last_ctype, FALSE) +
4493 pattern_info(PCRE2_INFO_MATCHEMPTY, &match_empty, FALSE) +
4494 pattern_info(PCRE2_INFO_MINLENGTH, &minlength, FALSE) +
4495 pattern_info(PCRE2_INFO_NAMECOUNT, &namecount, FALSE) +
4496 pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &nameentrysize, FALSE) +
4497 pattern_info(PCRE2_INFO_NAMETABLE, &nametable, FALSE) +
4498 pattern_info(PCRE2_INFO_NEWLINE, &newline_convention, FALSE)
4499 != 0)
4500 return PR_ABEND;
4501
4502 fprintf(outfile, "Capture group count = %d\n", capture_count);
4503
4504 if (backrefmax > 0)
4505 fprintf(outfile, "Max back reference = %d\n", backrefmax);
4506
4507 if (maxlookbehind > 0)
4508 fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
4509
4510 if (heap_limit_set)
4511 fprintf(outfile, "Heap limit = %u\n", heap_limit);
4512
4513 if (match_limit_set)
4514 fprintf(outfile, "Match limit = %u\n", match_limit);
4515
4516 if (depth_limit_set)
4517 fprintf(outfile, "Depth limit = %u\n", depth_limit);
4518
4519 if (namecount > 0)
4520 {
4521 fprintf(outfile, "Named capture groups:\n");
4522 for (; namecount > 0; namecount--)
4523 {
4524 int imm2_size = test_mode == PCRE8_MODE ? 2 : 1;
4525 uint32_t length = (uint32_t)STRLEN(nametable + imm2_size);
4526 fprintf(outfile, " ");
4527
4528 /* In UTF mode the name may be a UTF string containing non-ASCII
4529 letters and digits. We must output it as a UTF-8 string. In non-UTF mode,
4530 use the normal string printing functions, which use escapes for all
4531 non-ASCII characters. */
4532
4533 if (utf)
4534 {
4535 #ifdef SUPPORT_PCRE2_32
4536 if (test_mode == PCRE32_MODE)
4537 {
4538 PCRE2_SPTR32 nameptr = (PCRE2_SPTR32)nametable + imm2_size;
4539 while (*nameptr != 0)
4540 {
4541 uint8_t u8buff[6];
4542 int len = ord2utf8(*nameptr++, u8buff);
4543 fprintf(outfile, "%.*s", len, u8buff);
4544 }
4545 }
4546 #endif
4547 #ifdef SUPPORT_PCRE2_16
4548 if (test_mode == PCRE16_MODE)
4549 {
4550 PCRE2_SPTR16 nameptr = (PCRE2_SPTR16)nametable + imm2_size;
4551 while (*nameptr != 0)
4552 {
4553 int len;
4554 uint8_t u8buff[6];
4555 uint32_t c = *nameptr++ & 0xffff;
4556 if (c >= 0xD800 && c < 0xDC00)
4557 c = ((c & 0x3ff) << 10) + (*nameptr++ & 0x3ff) + 0x10000;
4558 len = ord2utf8(c, u8buff);
4559 fprintf(outfile, "%.*s", len, u8buff);
4560 }
4561 }
4562 #endif
4563 #ifdef SUPPORT_PCRE2_8
4564 if (test_mode == PCRE8_MODE)
4565 fprintf(outfile, "%s", (PCRE2_SPTR8)nametable + imm2_size);
4566 #endif
4567 }
4568 else /* Not UTF mode */
4569 {
4570 PCHARSV(nametable, imm2_size, length, FALSE, outfile);
4571 }
4572
4573 while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
4574
4575 #ifdef SUPPORT_PCRE2_32
4576 if (test_mode == PCRE32_MODE)
4577 fprintf(outfile, "%3d\n", (int)(((PCRE2_SPTR32)nametable)[0]));
4578 #endif
4579 #ifdef SUPPORT_PCRE2_16
4580 if (test_mode == PCRE16_MODE)
4581 fprintf(outfile, "%3d\n", (int)(((PCRE2_SPTR16)nametable)[0]));
4582 #endif
4583 #ifdef SUPPORT_PCRE2_8
4584 if (test_mode == PCRE8_MODE)
4585 fprintf(outfile, "%3d\n", (int)(
4586 ((((PCRE2_SPTR8)nametable)[0]) << 8) | ((PCRE2_SPTR8)nametable)[1]));
4587 #endif
4588
4589 nametable = (void*)((PCRE2_SPTR8)nametable + nameentrysize * code_unit_size);
4590 }
4591 }
4592
4593 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
4594 if (hasbackslashc) fprintf(outfile, "Contains \\C\n");
4595 if (match_empty) fprintf(outfile, "May match empty string\n");
4596
4597 pattern_info(PCRE2_INFO_ARGOPTIONS, &compile_options, FALSE);
4598 pattern_info(PCRE2_INFO_ALLOPTIONS, &overall_options, FALSE);
4599 pattern_info(PCRE2_INFO_EXTRAOPTIONS, &extra_options, FALSE);
4600
4601 /* Remove UTF/UCP if they were there only because of forbid_utf. This saves
4602 cluttering up the verification output of non-UTF test files. */
4603
4604 if ((pat_patctl.options & PCRE2_NEVER_UTF) == 0)
4605 {
4606 compile_options &= ~PCRE2_NEVER_UTF;
4607 overall_options &= ~PCRE2_NEVER_UTF;
4608 }
4609
4610 if ((pat_patctl.options & PCRE2_NEVER_UCP) == 0)
4611 {
4612 compile_options &= ~PCRE2_NEVER_UCP;
4613 overall_options &= ~PCRE2_NEVER_UCP;
4614 }
4615
4616 if ((compile_options|overall_options) != 0)
4617 {
4618 if (compile_options == overall_options)
4619 show_compile_options(compile_options, "Options:", "\n");
4620 else
4621 {
4622 show_compile_options(compile_options, "Compile options:", "\n");
4623 show_compile_options(overall_options, "Overall options:", "\n");
4624 }
4625 }
4626
4627 if (extra_options != 0)
4628 show_compile_extra_options(extra_options, "Extra options:", "\n");
4629
4630 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
4631
4632 if ((pat_patctl.control2 & CTL2_BSR_SET) != 0 ||
4633 (FLD(compiled_code, flags) & PCRE2_BSR_SET) != 0)
4634 fprintf(outfile, "\\R matches %s\n", (bsr_convention == PCRE2_BSR_UNICODE)?
4635 "any Unicode newline" : "CR, LF, or CRLF");
4636
4637 if ((FLD(compiled_code, flags) & PCRE2_NL_SET) != 0)
4638 {
4639 switch (newline_convention)
4640 {
4641 case PCRE2_NEWLINE_CR:
4642 fprintf(outfile, "Forced newline is CR\n");
4643 break;
4644
4645 case PCRE2_NEWLINE_LF:
4646 fprintf(outfile, "Forced newline is LF\n");
4647 break;
4648
4649 case PCRE2_NEWLINE_CRLF:
4650 fprintf(outfile, "Forced newline is CRLF\n");
4651 break;
4652
4653 case PCRE2_NEWLINE_ANYCRLF:
4654 fprintf(outfile, "Forced newline is CR, LF, or CRLF\n");
4655 break;
4656
4657 case PCRE2_NEWLINE_ANY:
4658 fprintf(outfile, "Forced newline is any Unicode newline\n");
4659 break;
4660
4661 case PCRE2_NEWLINE_NUL:
4662 fprintf(outfile, "Forced newline is NUL\n");
4663 break;
4664
4665 default:
4666 break;
4667 }
4668 }
4669
4670 if (first_ctype == 2)
4671 {
4672 fprintf(outfile, "First code unit at start or follows newline\n");
4673 }
4674 else if (first_ctype == 1)
4675 {
4676 const char *caseless =
4677 ((FLD(compiled_code, flags) & PCRE2_FIRSTCASELESS) == 0)?
4678 "" : " (caseless)";
4679 if (PRINTOK(first_cunit))
4680 fprintf(outfile, "First code unit = \'%c\'%s\n", first_cunit, caseless);
4681 else
4682 {
4683 fprintf(outfile, "First code unit = ");
4684 pchar(first_cunit, FALSE, outfile);
4685 fprintf(outfile, "%s\n", caseless);
4686 }
4687 }
4688 else if (start_bits != NULL)
4689 {
4690 int i;
4691 int c = 24;
4692 fprintf(outfile, "Starting code units: ");
4693 for (i = 0; i < 256; i++)
4694 {
4695 if ((start_bits[i/8] & (1u << (i&7))) != 0)
4696 {
4697 if (c > 75)
4698 {
4699 fprintf(outfile, "\n ");
4700 c = 2;
4701 }
4702 if (PRINTOK(i) && i != ' ')
4703 {
4704 fprintf(outfile, "%c ", i);
4705 c += 2;
4706 }
4707 else
4708 {
4709 fprintf(outfile, "\\x%02x ", i);
4710 c += 5;
4711 }
4712 }
4713 }
4714 fprintf(outfile, "\n");
4715 }
4716
4717 if (last_ctype != 0)
4718 {
4719 const char *caseless =
4720 ((FLD(compiled_code, flags) & PCRE2_LASTCASELESS) == 0)?
4721 "" : " (caseless)";
4722 if (PRINTOK(last_cunit))
4723 fprintf(outfile, "Last code unit = \'%c\'%s\n", last_cunit, caseless);
4724 else
4725 {
4726 fprintf(outfile, "Last code unit = ");
4727 pchar(last_cunit, FALSE, outfile);
4728 fprintf(outfile, "%s\n", caseless);
4729 }
4730 }
4731
4732 if ((FLD(compiled_code, overall_options) & PCRE2_NO_START_OPTIMIZE) == 0)
4733 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
4734
4735 if (pat_patctl.jit != 0 && (pat_patctl.control & CTL_JITVERIFY) != 0)
4736 {
4737 if (FLD(compiled_code, executable_jit) != NULL)
4738 fprintf(outfile, "JIT compilation was successful\n");
4739 else
4740 {
4741 #ifdef SUPPORT_JIT
4742 fprintf(outfile, "JIT compilation was not successful");
4743 if (jitrc != 0 && !print_error_message(jitrc, " (", ")"))
4744 return PR_ABEND;
4745 fprintf(outfile, "\n");
4746 #else
4747 fprintf(outfile, "JIT support is not available in this version of PCRE2\n");
4748 #endif
4749 }
4750 }
4751 }
4752
4753 if ((pat_patctl.control & CTL_CALLOUT_INFO) != 0)
4754 {
4755 int errorcode;
4756 PCRE2_CALLOUT_ENUMERATE(errorcode, callout_callback, 0);
4757 if (errorcode != 0)
4758 {
4759 fprintf(outfile, "Callout enumerate failed: error %d: ", errorcode);
4760 if (errorcode < 0 && !print_error_message(errorcode, "", "\n"))
4761 return PR_ABEND;
4762 return PR_SKIP;
4763 }
4764 }
4765
4766 return PR_OK;
4767 }
4768
4769
4770
4771 /*************************************************
4772 * Handle serialization error *
4773 *************************************************/
4774
4775 /* Print an error message after a serialization failure.
4776
4777 Arguments:
4778 rc the error code
4779 msg an initial message for what failed
4780
4781 Returns: FALSE if print_error_message() fails
4782 */
4783
4784 static BOOL
serial_error(int rc,const char * msg)4785 serial_error(int rc, const char *msg)
4786 {
4787 fprintf(outfile, "%s failed: error %d: ", msg, rc);
4788 return print_error_message(rc, "", "\n");
4789 }
4790
4791
4792
4793 /*************************************************
4794 * Open file for save/load commands *
4795 *************************************************/
4796
4797 /* This function decodes the file name and opens the file.
4798
4799 Arguments:
4800 buffptr point after the #command
4801 mode open mode
4802 fptr points to the FILE variable
4803 name name of # command
4804
4805 Returns: PR_OK or PR_ABEND
4806 */
4807
4808 static int
open_file(uint8_t * buffptr,const char * mode,FILE ** fptr,const char * name)4809 open_file(uint8_t *buffptr, const char *mode, FILE **fptr, const char *name)
4810 {
4811 char *endf;
4812 char *filename = (char *)buffptr;
4813 while (isspace(*filename)) filename++;
4814 endf = filename + strlen8(filename);
4815 while (endf > filename && isspace(endf[-1])) endf--;
4816
4817 if (endf == filename)
4818 {
4819 fprintf(outfile, "** File name expected after %s\n", name);
4820 return PR_ABEND;
4821 }
4822
4823 *endf = 0;
4824 *fptr = fopen((const char *)filename, mode);
4825 if (*fptr == NULL)
4826 {
4827 fprintf(outfile, "** Failed to open '%s': %s\n", filename, strerror(errno));
4828 return PR_ABEND;
4829 }
4830
4831 return PR_OK;
4832 }
4833
4834
4835
4836 /*************************************************
4837 * Process command line *
4838 *************************************************/
4839
4840 /* This function is called for lines beginning with # and a character that is
4841 not ! or whitespace, when encountered between tests, which means that there is
4842 no compiled pattern (compiled_code is NULL). The line is in buffer.
4843
4844 Arguments: none
4845
4846 Returns: PR_OK continue processing next line
4847 PR_SKIP skip to a blank line
4848 PR_ABEND abort the pcre2test run
4849 */
4850
4851 static int
process_command(void)4852 process_command(void)
4853 {
4854 FILE *f;
4855 PCRE2_SIZE serial_size;
4856 size_t i;
4857 int rc, cmd, cmdlen, yield;
4858 uint16_t first_listed_newline;
4859 const char *cmdname;
4860 uint8_t *argptr, *serial;
4861
4862 yield = PR_OK;
4863 cmd = CMD_UNKNOWN;
4864 cmdlen = 0;
4865
4866 for (i = 0; i < cmdlistcount; i++)
4867 {
4868 cmdname = cmdlist[i].name;
4869 cmdlen = strlen(cmdname);
4870 if (strncmp((char *)(buffer+1), cmdname, cmdlen) == 0 &&
4871 isspace(buffer[cmdlen+1]))
4872 {
4873 cmd = cmdlist[i].value;
4874 break;
4875 }
4876 }
4877
4878 argptr = buffer + cmdlen + 1;
4879
4880 if (restrict_for_perl_test && cmd != CMD_PATTERN && cmd != CMD_SUBJECT)
4881 {
4882 fprintf(outfile, "** #%s is not allowed after #perltest\n", cmdname);
4883 return PR_ABEND;
4884 }
4885
4886 switch(cmd)
4887 {
4888 case CMD_UNKNOWN:
4889 fprintf(outfile, "** Unknown command: %s", buffer);
4890 break;
4891
4892 case CMD_FORBID_UTF:
4893 forbid_utf = PCRE2_NEVER_UTF|PCRE2_NEVER_UCP;
4894 break;
4895
4896 case CMD_PERLTEST:
4897 restrict_for_perl_test = TRUE;
4898 break;
4899
4900 /* Set default pattern modifiers */
4901
4902 case CMD_PATTERN:
4903 (void)decode_modifiers(argptr, CTX_DEFPAT, &def_patctl, NULL);
4904 if (def_patctl.jit == 0 && (def_patctl.control & CTL_JITVERIFY) != 0)
4905 def_patctl.jit = JIT_DEFAULT;
4906 break;
4907
4908 /* Set default subject modifiers */
4909
4910 case CMD_SUBJECT:
4911 (void)decode_modifiers(argptr, CTX_DEFDAT, NULL, &def_datctl);
4912 break;
4913
4914 /* Check the default newline, and if not one of those listed, set up the
4915 first one to be forced. An empty list unsets. */
4916
4917 case CMD_NEWLINE_DEFAULT:
4918 local_newline_default = 0; /* Unset */
4919 first_listed_newline = 0;
4920 for (;;)
4921 {
4922 while (isspace(*argptr)) argptr++;
4923 if (*argptr == 0) break;
4924 for (i = 1; i < sizeof(newlines)/sizeof(char *); i++)
4925 {
4926 size_t nlen = strlen(newlines[i]);
4927 if (strncmpic(argptr, (const uint8_t *)newlines[i], nlen) == 0 &&
4928 isspace(argptr[nlen]))
4929 {
4930 if (i == NEWLINE_DEFAULT) return PR_OK; /* Default is valid */
4931 if (first_listed_newline == 0) first_listed_newline = i;
4932 }
4933 }
4934 while (*argptr != 0 && !isspace(*argptr)) argptr++;
4935 }
4936 local_newline_default = first_listed_newline;
4937 break;
4938
4939 /* Pop or copy a compiled pattern off the stack. Modifiers that do not affect
4940 the compiled pattern (e.g. to give information) are permitted. The default
4941 pattern modifiers are ignored. */
4942
4943 case CMD_POP:
4944 case CMD_POPCOPY:
4945 if (patstacknext <= 0)
4946 {
4947 fprintf(outfile, "** Can't pop off an empty stack\n");
4948 return PR_SKIP;
4949 }
4950 memset(&pat_patctl, 0, sizeof(patctl)); /* Completely unset */
4951 if (!decode_modifiers(argptr, CTX_POPPAT, &pat_patctl, NULL))
4952 return PR_SKIP;
4953
4954 if (cmd == CMD_POP)
4955 {
4956 SET(compiled_code, patstack[--patstacknext]);
4957 }
4958 else
4959 {
4960 PCRE2_CODE_COPY_FROM_VOID(compiled_code, patstack[patstacknext - 1]);
4961 }
4962
4963 if (pat_patctl.jit != 0)
4964 {
4965 PCRE2_JIT_COMPILE(jitrc, compiled_code, pat_patctl.jit);
4966 }
4967 if ((pat_patctl.control & CTL_MEMORY) != 0) show_memory_info();
4968 if ((pat_patctl.control & CTL_FRAMESIZE) != 0) show_framesize();
4969 if ((pat_patctl.control & CTL_ANYINFO) != 0)
4970 {
4971 rc = show_pattern_info();
4972 if (rc != PR_OK) return rc;
4973 }
4974 break;
4975
4976 /* Save the stack of compiled patterns to a file, then empty the stack. */
4977
4978 case CMD_SAVE:
4979 if (patstacknext <= 0)
4980 {
4981 fprintf(outfile, "** No stacked patterns to save\n");
4982 return PR_OK;
4983 }
4984
4985 rc = open_file(argptr+1, BINARY_OUTPUT_MODE, &f, "#save");
4986 if (rc != PR_OK) return rc;
4987
4988 PCRE2_SERIALIZE_ENCODE(rc, patstack, patstacknext, &serial, &serial_size,
4989 general_context);
4990 if (rc < 0)
4991 {
4992 fclose(f);
4993 if (!serial_error(rc, "Serialization")) return PR_ABEND;
4994 break;
4995 }
4996
4997 /* Write the length at the start of the file to make it straightforward to
4998 get the right memory when re-loading. This saves having to read the file size
4999 in different operating systems. To allow for different endianness (even
5000 though reloading with the opposite endianness does not work), write the
5001 length byte-by-byte. */
5002
5003 for (i = 0; i < 4; i++) fputc((serial_size >> (i*8)) & 255, f);
5004 if (fwrite(serial, 1, serial_size, f) != serial_size)
5005 {
5006 fprintf(outfile, "** Wrong return from fwrite()\n");
5007 fclose(f);
5008 return PR_ABEND;
5009 }
5010
5011 fclose(f);
5012 PCRE2_SERIALIZE_FREE(serial);
5013 while(patstacknext > 0)
5014 {
5015 SET(compiled_code, patstack[--patstacknext]);
5016 SUB1(pcre2_code_free, compiled_code);
5017 }
5018 SET(compiled_code, NULL);
5019 break;
5020
5021 /* Load a set of compiled patterns from a file onto the stack */
5022
5023 case CMD_LOAD:
5024 rc = open_file(argptr+1, BINARY_INPUT_MODE, &f, "#load");
5025 if (rc != PR_OK) return rc;
5026
5027 serial_size = 0;
5028 for (i = 0; i < 4; i++) serial_size |= fgetc(f) << (i*8);
5029
5030 serial = malloc(serial_size);
5031 if (serial == NULL)
5032 {
5033 fprintf(outfile, "** Failed to get memory (size %" SIZ_FORM ") for #load\n",
5034 SIZ_CAST serial_size);
5035 fclose(f);
5036 return PR_ABEND;
5037 }
5038
5039 i = fread(serial, 1, serial_size, f);
5040 fclose(f);
5041
5042 if (i != serial_size)
5043 {
5044 fprintf(outfile, "** Wrong return from fread()\n");
5045 yield = PR_ABEND;
5046 }
5047 else
5048 {
5049 PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(rc, serial);
5050 if (rc < 0)
5051 {
5052 if (!serial_error(rc, "Get number of codes")) yield = PR_ABEND;
5053 }
5054 else
5055 {
5056 if (rc + patstacknext > PATSTACKSIZE)
5057 {
5058 fprintf(outfile, "** Not enough space on pattern stack for %d pattern%s\n",
5059 rc, (rc == 1)? "" : "s");
5060 rc = PATSTACKSIZE - patstacknext;
5061 fprintf(outfile, "** Decoding %d pattern%s\n", rc,
5062 (rc == 1)? "" : "s");
5063 }
5064 PCRE2_SERIALIZE_DECODE(rc, patstack + patstacknext, rc, serial,
5065 general_context);
5066 if (rc < 0)
5067 {
5068 if (!serial_error(rc, "Deserialization")) yield = PR_ABEND;
5069 }
5070 else patstacknext += rc;
5071 }
5072 }
5073
5074 free(serial);
5075 break;
5076
5077 /* Load a set of binary tables into tables3. */
5078
5079 case CMD_LOADTABLES:
5080 rc = open_file(argptr+1, BINARY_INPUT_MODE, &f, "#loadtables");
5081 if (rc != PR_OK) return rc;
5082
5083 if (tables3 == NULL)
5084 {
5085 (void)PCRE2_CONFIG(PCRE2_CONFIG_TABLES_LENGTH, &loadtables_length);
5086 tables3 = malloc(loadtables_length);
5087 }
5088
5089 if (tables3 == NULL)
5090 {
5091 fprintf(outfile, "** Failed: malloc failed for #loadtables\n");
5092 yield = PR_ABEND;
5093 }
5094 else if (fread(tables3, 1, loadtables_length, f) != loadtables_length)
5095 {
5096 fprintf(outfile, "** Wrong return from fread()\n");
5097 yield = PR_ABEND;
5098 }
5099
5100 fclose(f);
5101 break;
5102 }
5103
5104 return yield;
5105 }
5106
5107
5108
5109 /*************************************************
5110 * Process pattern line *
5111 *************************************************/
5112
5113 /* This function is called when the input buffer contains the start of a
5114 pattern. The first character is known to be a valid delimiter. The pattern is
5115 read, modifiers are interpreted, and a suitable local context is set up for
5116 this test. The pattern is then compiled.
5117
5118 Arguments: none
5119
5120 Returns: PR_OK continue processing next line
5121 PR_SKIP skip to a blank line
5122 PR_ABEND abort the pcre2test run
5123 */
5124
5125 static int
process_pattern(void)5126 process_pattern(void)
5127 {
5128 BOOL utf;
5129 uint32_t k;
5130 uint8_t *p = buffer;
5131 unsigned int delimiter = *p++;
5132 int errorcode;
5133 void *use_pat_context;
5134 uint32_t use_forbid_utf = forbid_utf;
5135 PCRE2_SIZE patlen;
5136 PCRE2_SIZE valgrind_access_length;
5137 PCRE2_SIZE erroroffset;
5138
5139 /* The perltest.sh script supports only / as a delimiter. */
5140
5141 if (restrict_for_perl_test && delimiter != '/')
5142 {
5143 fprintf(outfile, "** The only allowed delimiter after #perltest is '/'\n");
5144 return PR_ABEND;
5145 }
5146
5147 /* Initialize the context and pattern/data controls for this test from the
5148 defaults. */
5149
5150 PATCTXCPY(pat_context, default_pat_context);
5151 memcpy(&pat_patctl, &def_patctl, sizeof(patctl));
5152
5153 /* Find the end of the pattern, reading more lines if necessary. */
5154
5155 for(;;)
5156 {
5157 while (*p != 0)
5158 {
5159 if (*p == '\\' && p[1] != 0) p++;
5160 else if (*p == delimiter) break;
5161 p++;
5162 }
5163 if (*p != 0) break;
5164 if ((p = extend_inputline(infile, p, " > ")) == NULL)
5165 {
5166 fprintf(outfile, "** Unexpected EOF\n");
5167 return PR_ABEND;
5168 }
5169 if (!INTERACTIVE(infile)) fprintf(outfile, "%s", (char *)p);
5170 }
5171
5172 /* If the first character after the delimiter is backslash, make the pattern
5173 end with backslash. This is purely to provide a way of testing for the error
5174 message when a pattern ends with backslash. */
5175
5176 if (p[1] == '\\') *p++ = '\\';
5177
5178 /* Terminate the pattern at the delimiter, and compute the length. */
5179
5180 *p++ = 0;
5181 patlen = p - buffer - 2;
5182
5183 /* Look for modifiers and options after the final delimiter. */
5184
5185 if (!decode_modifiers(p, CTX_PAT, &pat_patctl, NULL)) return PR_SKIP;
5186
5187 /* Note that the match_invalid_utf option also sets utf when passed to
5188 pcre2_compile(). */
5189
5190 utf = (pat_patctl.options & (PCRE2_UTF|PCRE2_MATCH_INVALID_UTF)) != 0;
5191
5192 /* The utf8_input modifier is not allowed in 8-bit mode, and is mutually
5193 exclusive with the utf modifier. */
5194
5195 if ((pat_patctl.control & CTL_UTF8_INPUT) != 0)
5196 {
5197 if (test_mode == PCRE8_MODE)
5198 {
5199 fprintf(outfile, "** The utf8_input modifier is not allowed in 8-bit mode\n");
5200 return PR_SKIP;
5201 }
5202 if (utf)
5203 {
5204 fprintf(outfile, "** The utf and utf8_input modifiers are mutually exclusive\n");
5205 return PR_SKIP;
5206 }
5207 }
5208
5209 /* The convert and posix modifiers are mutually exclusive. */
5210
5211 if (pat_patctl.convert_type != CONVERT_UNSET &&
5212 (pat_patctl.control & CTL_POSIX) != 0)
5213 {
5214 fprintf(outfile, "** The convert and posix modifiers are mutually exclusive\n");
5215 return PR_SKIP;
5216 }
5217
5218 /* Check for mutually exclusive control modifiers. At present, these are all in
5219 the first control word. */
5220
5221 for (k = 0; k < sizeof(exclusive_pat_controls)/sizeof(uint32_t); k++)
5222 {
5223 uint32_t c = pat_patctl.control & exclusive_pat_controls[k];
5224 if (c != 0 && c != (c & (~c+1)))
5225 {
5226 show_controls(c, 0, "** Not allowed together:");
5227 fprintf(outfile, "\n");
5228 return PR_SKIP;
5229 }
5230 }
5231
5232 /* Assume full JIT compile for jitverify and/or jitfast if nothing else was
5233 specified. */
5234
5235 if (pat_patctl.jit == 0 &&
5236 (pat_patctl.control & (CTL_JITVERIFY|CTL_JITFAST)) != 0)
5237 pat_patctl.jit = JIT_DEFAULT;
5238
5239 /* Now copy the pattern to pbuffer8 for use in 8-bit testing and for reflecting
5240 in callouts. Convert from hex if requested (literal strings in quotes may be
5241 present within the hexadecimal pairs). The result must necessarily be fewer
5242 characters so will always fit in pbuffer8. */
5243
5244 if ((pat_patctl.control & CTL_HEXPAT) != 0)
5245 {
5246 uint8_t *pp, *pt;
5247 uint32_t c, d;
5248
5249 pt = pbuffer8;
5250 for (pp = buffer + 1; *pp != 0; pp++)
5251 {
5252 if (isspace(*pp)) continue;
5253 c = *pp++;
5254
5255 /* Handle a literal substring */
5256
5257 if (c == '\'' || c == '"')
5258 {
5259 uint8_t *pq = pp;
5260 for (;; pp++)
5261 {
5262 d = *pp;
5263 if (d == 0)
5264 {
5265 fprintf(outfile, "** Missing closing quote in hex pattern: "
5266 "opening quote is at offset %" PTR_FORM ".\n", pq - buffer - 2);
5267 return PR_SKIP;
5268 }
5269 if (d == c) break;
5270 *pt++ = d;
5271 }
5272 }
5273
5274 /* Expect a hex pair */
5275
5276 else
5277 {
5278 if (!isxdigit(c))
5279 {
5280 fprintf(outfile, "** Unexpected non-hex-digit '%c' at offset %"
5281 PTR_FORM " in hex pattern: quote missing?\n", c, pp - buffer - 2);
5282 return PR_SKIP;
5283 }
5284 if (*pp == 0)
5285 {
5286 fprintf(outfile, "** Odd number of digits in hex pattern\n");
5287 return PR_SKIP;
5288 }
5289 d = *pp;
5290 if (!isxdigit(d))
5291 {
5292 fprintf(outfile, "** Unexpected non-hex-digit '%c' at offset %"
5293 PTR_FORM " in hex pattern: quote missing?\n", d, pp - buffer - 1);
5294 return PR_SKIP;
5295 }
5296 c = toupper(c);
5297 d = toupper(d);
5298 *pt++ = ((isdigit(c)? (c - '0') : (c - 'A' + 10)) << 4) +
5299 (isdigit(d)? (d - '0') : (d - 'A' + 10));
5300 }
5301 }
5302 *pt = 0;
5303 patlen = pt - pbuffer8;
5304 }
5305
5306 /* If not a hex string, process for repetition expansion if requested. */
5307
5308 else if ((pat_patctl.control & CTL_EXPAND) != 0)
5309 {
5310 uint8_t *pp, *pt;
5311
5312 pt = pbuffer8;
5313 for (pp = buffer + 1; *pp != 0; pp++)
5314 {
5315 uint8_t *pc = pp;
5316 uint32_t count = 1;
5317 size_t length = 1;
5318
5319 /* Check for replication syntax; if not found, the defaults just set will
5320 prevail and one character will be copied. */
5321
5322 if (pp[0] == '\\' && pp[1] == '[')
5323 {
5324 uint8_t *pe;
5325 for (pe = pp + 2; *pe != 0; pe++)
5326 {
5327 if (pe[0] == ']' && pe[1] == '{')
5328 {
5329 uint32_t clen = pe - pc - 2;
5330 uint32_t i = 0;
5331 unsigned long uli;
5332 char *endptr;
5333
5334 pe += 2;
5335 uli = strtoul((const char *)pe, &endptr, 10);
5336 if (U32OVERFLOW(uli))
5337 {
5338 fprintf(outfile, "** Pattern repeat count too large\n");
5339 return PR_SKIP;
5340 }
5341
5342 i = (uint32_t)uli;
5343 pe = (uint8_t *)endptr;
5344 if (*pe == '}')
5345 {
5346 if (i == 0)
5347 {
5348 fprintf(outfile, "** Zero repeat not allowed\n");
5349 return PR_SKIP;
5350 }
5351 pc += 2;
5352 count = i;
5353 length = clen;
5354 pp = pe;
5355 break;
5356 }
5357 }
5358 }
5359 }
5360
5361 /* Add to output. If the buffer is too small expand it. The function for
5362 expanding buffers always keeps buffer and pbuffer8 in step as far as their
5363 size goes. */
5364
5365 while (pt + count * length > pbuffer8 + pbuffer8_size)
5366 {
5367 size_t pc_offset = pc - buffer;
5368 size_t pp_offset = pp - buffer;
5369 size_t pt_offset = pt - pbuffer8;
5370 expand_input_buffers();
5371 pc = buffer + pc_offset;
5372 pp = buffer + pp_offset;
5373 pt = pbuffer8 + pt_offset;
5374 }
5375
5376 for (; count > 0; count--)
5377 {
5378 memcpy(pt, pc, length);
5379 pt += length;
5380 }
5381 }
5382
5383 *pt = 0;
5384 patlen = pt - pbuffer8;
5385
5386 if ((pat_patctl.control & CTL_INFO) != 0)
5387 fprintf(outfile, "Expanded: %s\n", pbuffer8);
5388 }
5389
5390 /* Neither hex nor expanded, just copy the input verbatim. */
5391
5392 else
5393 {
5394 strncpy((char *)pbuffer8, (char *)(buffer+1), patlen + 1);
5395 }
5396
5397 /* Sort out character tables */
5398
5399 if (pat_patctl.locale[0] != 0)
5400 {
5401 if (pat_patctl.tables_id != 0)
5402 {
5403 fprintf(outfile, "** 'Locale' and 'tables' must not both be set\n");
5404 return PR_SKIP;
5405 }
5406 if (setlocale(LC_CTYPE, (const char *)pat_patctl.locale) == NULL)
5407 {
5408 fprintf(outfile, "** Failed to set locale '%s'\n", pat_patctl.locale);
5409 return PR_SKIP;
5410 }
5411 if (strcmp((const char *)pat_patctl.locale, (const char *)locale_name) != 0)
5412 {
5413 strcpy((char *)locale_name, (char *)pat_patctl.locale);
5414 if (locale_tables != NULL) free((void *)locale_tables);
5415 PCRE2_MAKETABLES(locale_tables);
5416 }
5417 use_tables = locale_tables;
5418 }
5419
5420 else switch (pat_patctl.tables_id)
5421 {
5422 case 0: use_tables = NULL; break;
5423 case 1: use_tables = tables1; break;
5424 case 2: use_tables = tables2; break;
5425
5426 case 3:
5427 if (tables3 == NULL)
5428 {
5429 fprintf(outfile, "** 'Tables = 3' is invalid: binary tables have not "
5430 "been loaded\n");
5431 return PR_SKIP;
5432 }
5433 use_tables = tables3;
5434 break;
5435
5436 default:
5437 fprintf(outfile, "** 'Tables' must specify 0, 1, 2, or 3.\n");
5438 return PR_SKIP;
5439 }
5440
5441 PCRE2_SET_CHARACTER_TABLES(pat_context, use_tables);
5442
5443 /* Set up for the stackguard test. */
5444
5445 if (pat_patctl.stackguard_test != 0)
5446 {
5447 PCRE2_SET_COMPILE_RECURSION_GUARD(pat_context, stack_guard, NULL);
5448 }
5449
5450 /* Handle compiling via the POSIX interface, which doesn't support the
5451 timing, showing, or debugging options, nor the ability to pass over
5452 local character tables. Neither does it have 16-bit or 32-bit support. */
5453
5454 if ((pat_patctl.control & CTL_POSIX) != 0)
5455 {
5456 #ifdef SUPPORT_PCRE2_8
5457 int rc;
5458 int cflags = 0;
5459 const char *msg = "** Ignored with POSIX interface:";
5460 #endif
5461
5462 if (test_mode != PCRE8_MODE)
5463 {
5464 fprintf(outfile, "** The POSIX interface is available only in 8-bit mode\n");
5465 return PR_SKIP;
5466 }
5467
5468 #ifdef SUPPORT_PCRE2_8
5469 /* Check for features that the POSIX interface does not support. */
5470
5471 if (pat_patctl.locale[0] != 0) prmsg(&msg, "locale");
5472 if (pat_patctl.replacement[0] != 0) prmsg(&msg, "replace");
5473 if (pat_patctl.tables_id != 0) prmsg(&msg, "tables");
5474 if (pat_patctl.stackguard_test != 0) prmsg(&msg, "stackguard");
5475 if (timeit > 0) prmsg(&msg, "timing");
5476 if (pat_patctl.jit != 0) prmsg(&msg, "JIT");
5477
5478 if ((pat_patctl.options & ~POSIX_SUPPORTED_COMPILE_OPTIONS) != 0)
5479 {
5480 show_compile_options(
5481 pat_patctl.options & ~POSIX_SUPPORTED_COMPILE_OPTIONS, msg, "");
5482 msg = "";
5483 }
5484
5485 if ((FLD(pat_context, extra_options) &
5486 ~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS) != 0)
5487 {
5488 show_compile_extra_options(
5489 FLD(pat_context, extra_options) & ~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS,
5490 msg, "");
5491 msg = "";
5492 }
5493
5494 if ((pat_patctl.control & ~POSIX_SUPPORTED_COMPILE_CONTROLS) != 0 ||
5495 (pat_patctl.control2 & ~POSIX_SUPPORTED_COMPILE_CONTROLS2) != 0)
5496 {
5497 show_controls(pat_patctl.control & ~POSIX_SUPPORTED_COMPILE_CONTROLS,
5498 pat_patctl.control2 & ~POSIX_SUPPORTED_COMPILE_CONTROLS2, msg);
5499 msg = "";
5500 }
5501
5502 if (local_newline_default != 0) prmsg(&msg, "#newline_default");
5503 if (FLD(pat_context, max_pattern_length) != PCRE2_UNSET)
5504 prmsg(&msg, "max_pattern_length");
5505 if (FLD(pat_context, parens_nest_limit) != PARENS_NEST_DEFAULT)
5506 prmsg(&msg, "parens_nest_limit");
5507
5508 if (msg[0] == 0) fprintf(outfile, "\n");
5509
5510 /* Translate PCRE2 options to POSIX options and then compile. */
5511
5512 if (utf) cflags |= REG_UTF;
5513 if ((pat_patctl.control & CTL_POSIX_NOSUB) != 0) cflags |= REG_NOSUB;
5514 if ((pat_patctl.options & PCRE2_UCP) != 0) cflags |= REG_UCP;
5515 if ((pat_patctl.options & PCRE2_CASELESS) != 0) cflags |= REG_ICASE;
5516 if ((pat_patctl.options & PCRE2_LITERAL) != 0) cflags |= REG_NOSPEC;
5517 if ((pat_patctl.options & PCRE2_MULTILINE) != 0) cflags |= REG_NEWLINE;
5518 if ((pat_patctl.options & PCRE2_DOTALL) != 0) cflags |= REG_DOTALL;
5519 if ((pat_patctl.options & PCRE2_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
5520
5521 if ((pat_patctl.control & (CTL_HEXPAT|CTL_USE_LENGTH)) != 0)
5522 {
5523 preg.re_endp = (char *)pbuffer8 + patlen;
5524 cflags |= REG_PEND;
5525 }
5526
5527 rc = regcomp(&preg, (char *)pbuffer8, cflags);
5528
5529 /* Compiling failed */
5530
5531 if (rc != 0)
5532 {
5533 size_t bsize, usize;
5534 int psize;
5535
5536 preg.re_pcre2_code = NULL; /* In case something was left in there */
5537 preg.re_match_data = NULL;
5538
5539 bsize = (pat_patctl.regerror_buffsize != 0)?
5540 pat_patctl.regerror_buffsize : pbuffer8_size;
5541 if (bsize + 8 < pbuffer8_size)
5542 memcpy(pbuffer8 + bsize, "DEADBEEF", 8);
5543 usize = regerror(rc, &preg, (char *)pbuffer8, bsize);
5544
5545 /* Inside regerror(), snprintf() is used. If the buffer is too small, some
5546 versions of snprintf() put a zero byte at the end, but others do not.
5547 Therefore, we print a maximum of one less than the size of the buffer. */
5548
5549 psize = (int)bsize - 1;
5550 fprintf(outfile, "Failed: POSIX code %d: %.*s\n", rc, psize, pbuffer8);
5551 if (usize > bsize)
5552 {
5553 fprintf(outfile, "** regerror() message truncated\n");
5554 if (memcmp(pbuffer8 + bsize, "DEADBEEF", 8) != 0)
5555 fprintf(outfile, "** regerror() buffer overflow\n");
5556 }
5557 return PR_SKIP;
5558 }
5559
5560 /* Compiling succeeded. Check that the values in the preg block are sensible.
5561 It can happen that pcre2test is accidentally linked with a different POSIX
5562 library which succeeds, but of course puts different things into preg. In
5563 this situation, calling regfree() may cause a segfault (or invalid free() in
5564 valgrind), so ensure that preg.re_pcre2_code is NULL, which suppresses the
5565 calling of regfree() on exit. */
5566
5567 if (preg.re_pcre2_code == NULL ||
5568 ((pcre2_real_code_8 *)preg.re_pcre2_code)->magic_number != MAGIC_NUMBER ||
5569 ((pcre2_real_code_8 *)preg.re_pcre2_code)->top_bracket != preg.re_nsub ||
5570 preg.re_match_data == NULL ||
5571 preg.re_cflags != cflags)
5572 {
5573 fprintf(outfile,
5574 "** The regcomp() function returned zero (success), but the values set\n"
5575 "** in the preg block are not valid for PCRE2. Check that pcre2test is\n"
5576 "** linked with PCRE2's pcre2posix module (-lpcre2-posix) and not with\n"
5577 "** some other POSIX regex library.\n**\n");
5578 preg.re_pcre2_code = NULL;
5579 return PR_ABEND;
5580 }
5581
5582 return PR_OK;
5583 #endif /* SUPPORT_PCRE2_8 */
5584 }
5585
5586 /* Handle compiling via the native interface. Controls that act later are
5587 ignored with "push". Replacements are locked out. */
5588
5589 if ((pat_patctl.control & (CTL_PUSH|CTL_PUSHCOPY|CTL_PUSHTABLESCOPY)) != 0)
5590 {
5591 if (pat_patctl.replacement[0] != 0)
5592 {
5593 fprintf(outfile, "** Replacement text is not supported with 'push'.\n");
5594 return PR_OK;
5595 }
5596 if ((pat_patctl.control & ~PUSH_SUPPORTED_COMPILE_CONTROLS) != 0 ||
5597 (pat_patctl.control2 & ~PUSH_SUPPORTED_COMPILE_CONTROLS2) != 0)
5598 {
5599 show_controls(pat_patctl.control & ~PUSH_SUPPORTED_COMPILE_CONTROLS,
5600 pat_patctl.control2 & ~PUSH_SUPPORTED_COMPILE_CONTROLS2,
5601 "** Ignored when compiled pattern is stacked with 'push':");
5602 fprintf(outfile, "\n");
5603 }
5604 if ((pat_patctl.control & PUSH_COMPILE_ONLY_CONTROLS) != 0 ||
5605 (pat_patctl.control2 & PUSH_COMPILE_ONLY_CONTROLS2) != 0)
5606 {
5607 show_controls(pat_patctl.control & PUSH_COMPILE_ONLY_CONTROLS,
5608 pat_patctl.control2 & PUSH_COMPILE_ONLY_CONTROLS2,
5609 "** Applies only to compile when pattern is stacked with 'push':");
5610 fprintf(outfile, "\n");
5611 }
5612 }
5613
5614 /* Convert the input in non-8-bit modes. */
5615
5616 errorcode = 0;
5617
5618 #ifdef SUPPORT_PCRE2_16
5619 if (test_mode == PCRE16_MODE) errorcode = to16(pbuffer8, utf, &patlen);
5620 #endif
5621
5622 #ifdef SUPPORT_PCRE2_32
5623 if (test_mode == PCRE32_MODE) errorcode = to32(pbuffer8, utf, &patlen);
5624 #endif
5625
5626 switch(errorcode)
5627 {
5628 case -1:
5629 fprintf(outfile, "** Failed: invalid UTF-8 string cannot be "
5630 "converted to %d-bit string\n", (test_mode == PCRE16_MODE)? 16:32);
5631 return PR_SKIP;
5632
5633 case -2:
5634 fprintf(outfile, "** Failed: character value greater than 0x10ffff "
5635 "cannot be converted to UTF\n");
5636 return PR_SKIP;
5637
5638 case -3:
5639 fprintf(outfile, "** Failed: character value greater than 0xffff "
5640 "cannot be converted to 16-bit in non-UTF mode\n");
5641 return PR_SKIP;
5642
5643 default:
5644 break;
5645 }
5646
5647 /* The pattern is now in pbuffer[8|16|32], with the length in code units in
5648 patlen. If it is to be converted, copy the result back afterwards so that it
5649 ends up back in the usual place. */
5650
5651 if (pat_patctl.convert_type != CONVERT_UNSET)
5652 {
5653 int rc;
5654 int convert_return = PR_OK;
5655 uint32_t convert_options = pat_patctl.convert_type;
5656 void *converted_pattern;
5657 PCRE2_SIZE converted_length;
5658
5659 if (pat_patctl.convert_length != 0)
5660 {
5661 converted_length = pat_patctl.convert_length;
5662 converted_pattern = malloc(converted_length * code_unit_size);
5663 if (converted_pattern == NULL)
5664 {
5665 fprintf(outfile, "** Failed: malloc failed for converted pattern\n");
5666 return PR_SKIP;
5667 }
5668 }
5669 else converted_pattern = NULL; /* Let the library allocate */
5670
5671 if (utf) convert_options |= PCRE2_CONVERT_UTF;
5672 if ((pat_patctl.options & PCRE2_NO_UTF_CHECK) != 0)
5673 convert_options |= PCRE2_CONVERT_NO_UTF_CHECK;
5674
5675 CONCTXCPY(con_context, default_con_context);
5676
5677 if (pat_patctl.convert_glob_escape != 0)
5678 {
5679 uint32_t escape = (pat_patctl.convert_glob_escape == '0')? 0 :
5680 pat_patctl.convert_glob_escape;
5681 PCRE2_SET_GLOB_ESCAPE(rc, con_context, escape);
5682 if (rc != 0)
5683 {
5684 fprintf(outfile, "** Invalid glob escape '%c'\n",
5685 pat_patctl.convert_glob_escape);
5686 convert_return = PR_SKIP;
5687 goto CONVERT_FINISH;
5688 }
5689 }
5690
5691 if (pat_patctl.convert_glob_separator != 0)
5692 {
5693 PCRE2_SET_GLOB_SEPARATOR(rc, con_context, pat_patctl.convert_glob_separator);
5694 if (rc != 0)
5695 {
5696 fprintf(outfile, "** Invalid glob separator '%c'\n",
5697 pat_patctl.convert_glob_separator);
5698 convert_return = PR_SKIP;
5699 goto CONVERT_FINISH;
5700 }
5701 }
5702
5703 PCRE2_PATTERN_CONVERT(rc, pbuffer, patlen, convert_options,
5704 &converted_pattern, &converted_length, con_context);
5705
5706 if (rc != 0)
5707 {
5708 fprintf(outfile, "** Pattern conversion error at offset %" SIZ_FORM ": ",
5709 SIZ_CAST converted_length);
5710 convert_return = print_error_message(rc, "", "\n")? PR_SKIP:PR_ABEND;
5711 }
5712
5713 /* Output the converted pattern, then copy it. */
5714
5715 else
5716 {
5717 PCHARSV(converted_pattern, 0, converted_length, utf, outfile);
5718 fprintf(outfile, "\n");
5719 patlen = converted_length;
5720 CONVERT_COPY(pbuffer, converted_pattern, converted_length + 1);
5721 }
5722
5723 /* Free the converted pattern. */
5724
5725 CONVERT_FINISH:
5726 if (pat_patctl.convert_length != 0)
5727 free(converted_pattern);
5728 else
5729 PCRE2_CONVERTED_PATTERN_FREE(converted_pattern);
5730
5731 /* Return if conversion was unsuccessful. */
5732
5733 if (convert_return != PR_OK) return convert_return;
5734 }
5735
5736 /* By default we pass a zero-terminated pattern, but a length is passed if
5737 "use_length" was specified or this is a hex pattern (which might contain binary
5738 zeros). When valgrind is supported, arrange for the unused part of the buffer
5739 to be marked as no access. */
5740
5741 valgrind_access_length = patlen;
5742 if ((pat_patctl.control & (CTL_HEXPAT|CTL_USE_LENGTH)) == 0)
5743 {
5744 patlen = PCRE2_ZERO_TERMINATED;
5745 valgrind_access_length += 1; /* For the terminating zero */
5746 }
5747
5748 #ifdef SUPPORT_VALGRIND
5749 #ifdef SUPPORT_PCRE2_8
5750 if (test_mode == PCRE8_MODE && pbuffer8 != NULL)
5751 {
5752 VALGRIND_MAKE_MEM_NOACCESS(pbuffer8 + valgrind_access_length,
5753 pbuffer8_size - valgrind_access_length);
5754 }
5755 #endif
5756 #ifdef SUPPORT_PCRE2_16
5757 if (test_mode == PCRE16_MODE && pbuffer16 != NULL)
5758 {
5759 VALGRIND_MAKE_MEM_NOACCESS(pbuffer16 + valgrind_access_length,
5760 pbuffer16_size - valgrind_access_length*sizeof(uint16_t));
5761 }
5762 #endif
5763 #ifdef SUPPORT_PCRE2_32
5764 if (test_mode == PCRE32_MODE && pbuffer32 != NULL)
5765 {
5766 VALGRIND_MAKE_MEM_NOACCESS(pbuffer32 + valgrind_access_length,
5767 pbuffer32_size - valgrind_access_length*sizeof(uint32_t));
5768 }
5769 #endif
5770 #else /* Valgrind not supported */
5771 (void)valgrind_access_length; /* Avoid compiler warning */
5772 #endif
5773
5774 /* If #newline_default has been used and the library was not compiled with an
5775 appropriate default newline setting, local_newline_default will be non-zero. We
5776 use this if there is no explicit newline modifier. */
5777
5778 if ((pat_patctl.control2 & CTL2_NL_SET) == 0 && local_newline_default != 0)
5779 {
5780 SETFLD(pat_context, newline_convention, local_newline_default);
5781 }
5782
5783 /* The null_context modifier is used to test calling pcre2_compile() with a
5784 NULL context. */
5785
5786 use_pat_context = ((pat_patctl.control & CTL_NULLCONTEXT) != 0)?
5787 NULL : PTR(pat_context);
5788
5789 /* If PCRE2_LITERAL is set, set use_forbid_utf zero because PCRE2_NEVER_UTF
5790 and PCRE2_NEVER_UCP are invalid with it. */
5791
5792 if ((pat_patctl.options & PCRE2_LITERAL) != 0) use_forbid_utf = 0;
5793
5794 /* Compile many times when timing. */
5795
5796 if (timeit > 0)
5797 {
5798 int i;
5799 clock_t time_taken = 0;
5800 for (i = 0; i < timeit; i++)
5801 {
5802 clock_t start_time = clock();
5803 PCRE2_COMPILE(compiled_code, pbuffer, patlen,
5804 pat_patctl.options|use_forbid_utf, &errorcode, &erroroffset,
5805 use_pat_context);
5806 time_taken += clock() - start_time;
5807 if (TEST(compiled_code, !=, NULL))
5808 { SUB1(pcre2_code_free, compiled_code); }
5809 }
5810 total_compile_time += time_taken;
5811 fprintf(outfile, "Compile time %.4f milliseconds\n",
5812 (((double)time_taken * 1000.0) / (double)timeit) /
5813 (double)CLOCKS_PER_SEC);
5814 }
5815
5816 /* A final compile that is used "for real". */
5817
5818 PCRE2_COMPILE(compiled_code, pbuffer, patlen, pat_patctl.options|use_forbid_utf,
5819 &errorcode, &erroroffset, use_pat_context);
5820
5821 /* Call the JIT compiler if requested. When timing, we must free and recompile
5822 the pattern each time because that is the only way to free the JIT compiled
5823 code. We know that compilation will always succeed. */
5824
5825 if (TEST(compiled_code, !=, NULL) && pat_patctl.jit != 0)
5826 {
5827 if (timeit > 0)
5828 {
5829 int i;
5830 clock_t time_taken = 0;
5831
5832 for (i = 0; i < timeit; i++)
5833 {
5834 clock_t start_time;
5835 SUB1(pcre2_code_free, compiled_code);
5836 PCRE2_COMPILE(compiled_code, pbuffer, patlen,
5837 pat_patctl.options|use_forbid_utf, &errorcode, &erroroffset,
5838 use_pat_context);
5839 start_time = clock();
5840 PCRE2_JIT_COMPILE(jitrc, compiled_code, pat_patctl.jit);
5841 time_taken += clock() - start_time;
5842 }
5843 total_jit_compile_time += time_taken;
5844 fprintf(outfile, "JIT compile %.4f milliseconds\n",
5845 (((double)time_taken * 1000.0) / (double)timeit) /
5846 (double)CLOCKS_PER_SEC);
5847 }
5848 else
5849 {
5850 PCRE2_JIT_COMPILE(jitrc, compiled_code, pat_patctl.jit);
5851 }
5852 }
5853
5854 /* If valgrind is supported, mark the pbuffer as accessible again. The 16-bit
5855 and 32-bit buffers can be marked completely undefined, but we must leave the
5856 pattern in the 8-bit buffer defined because it may be read from a callout
5857 during matching. */
5858
5859 #ifdef SUPPORT_VALGRIND
5860 #ifdef SUPPORT_PCRE2_8
5861 if (test_mode == PCRE8_MODE)
5862 {
5863 VALGRIND_MAKE_MEM_UNDEFINED(pbuffer8 + valgrind_access_length,
5864 pbuffer8_size - valgrind_access_length);
5865 }
5866 #endif
5867 #ifdef SUPPORT_PCRE2_16
5868 if (test_mode == PCRE16_MODE)
5869 {
5870 VALGRIND_MAKE_MEM_UNDEFINED(pbuffer16, pbuffer16_size);
5871 }
5872 #endif
5873 #ifdef SUPPORT_PCRE2_32
5874 if (test_mode == PCRE32_MODE)
5875 {
5876 VALGRIND_MAKE_MEM_UNDEFINED(pbuffer32, pbuffer32_size);
5877 }
5878 #endif
5879 #endif
5880
5881 /* Compilation failed; go back for another re, skipping to blank line
5882 if non-interactive. */
5883
5884 if (TEST(compiled_code, ==, NULL))
5885 {
5886 fprintf(outfile, "Failed: error %d at offset %d: ", errorcode,
5887 (int)erroroffset);
5888 if (!print_error_message(errorcode, "", "\n")) return PR_ABEND;
5889 return PR_SKIP;
5890 }
5891
5892 /* If forbid_utf is non-zero, we are running a non-UTF test. UTF and UCP are
5893 locked out at compile time, but we must also check for occurrences of \P, \p,
5894 and \X, which are only supported when Unicode is supported. */
5895
5896 if (forbid_utf != 0)
5897 {
5898 if ((FLD(compiled_code, flags) & PCRE2_HASBKPORX) != 0)
5899 {
5900 fprintf(outfile, "** \\P, \\p, and \\X are not allowed after the "
5901 "#forbid_utf command\n");
5902 return PR_SKIP;
5903 }
5904 }
5905
5906 /* Remember the maximum lookbehind, for partial matching. */
5907
5908 if (pattern_info(PCRE2_INFO_MAXLOOKBEHIND, &maxlookbehind, FALSE) != 0)
5909 return PR_ABEND;
5910
5911 /* Remember the number of captures. */
5912
5913 if (pattern_info(PCRE2_INFO_CAPTURECOUNT, &maxcapcount, FALSE) < 0)
5914 return PR_ABEND;
5915
5916 /* If an explicit newline modifier was given, set the information flag in the
5917 pattern so that it is preserved over push/pop. */
5918
5919 if ((pat_patctl.control2 & CTL2_NL_SET) != 0)
5920 {
5921 SETFLD(compiled_code, flags, FLD(compiled_code, flags) | PCRE2_NL_SET);
5922 }
5923
5924 /* Output code size and other information if requested. */
5925
5926 if ((pat_patctl.control & CTL_MEMORY) != 0) show_memory_info();
5927 if ((pat_patctl.control & CTL_FRAMESIZE) != 0) show_framesize();
5928 if ((pat_patctl.control & CTL_ANYINFO) != 0)
5929 {
5930 int rc = show_pattern_info();
5931 if (rc != PR_OK) return rc;
5932 }
5933
5934 /* The "push" control requests that the compiled pattern be remembered on a
5935 stack. This is mainly for testing the serialization functionality. */
5936
5937 if ((pat_patctl.control & CTL_PUSH) != 0)
5938 {
5939 if (patstacknext >= PATSTACKSIZE)
5940 {
5941 fprintf(outfile, "** Too many pushed patterns (max %d)\n", PATSTACKSIZE);
5942 return PR_ABEND;
5943 }
5944 patstack[patstacknext++] = PTR(compiled_code);
5945 SET(compiled_code, NULL);
5946 }
5947
5948 /* The "pushcopy" and "pushtablescopy" controls are similar, but push a
5949 copy of the pattern, the latter with a copy of its character tables. This tests
5950 the pcre2_code_copy() and pcre2_code_copy_with_tables() functions. */
5951
5952 if ((pat_patctl.control & (CTL_PUSHCOPY|CTL_PUSHTABLESCOPY)) != 0)
5953 {
5954 if (patstacknext >= PATSTACKSIZE)
5955 {
5956 fprintf(outfile, "** Too many pushed patterns (max %d)\n", PATSTACKSIZE);
5957 return PR_ABEND;
5958 }
5959 if ((pat_patctl.control & CTL_PUSHCOPY) != 0)
5960 {
5961 PCRE2_CODE_COPY_TO_VOID(patstack[patstacknext++], compiled_code);
5962 }
5963 else
5964 {
5965 PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(patstack[patstacknext++],
5966 compiled_code); }
5967 }
5968
5969 return PR_OK;
5970 }
5971
5972
5973
5974 /*************************************************
5975 * Check heap, match or depth limit *
5976 *************************************************/
5977
5978 /* This is used for DFA, normal, and JIT fast matching. For DFA matching it
5979 should only be called with the third argument set to PCRE2_ERROR_DEPTHLIMIT.
5980
5981 Arguments:
5982 pp the subject string
5983 ulen length of subject or PCRE2_ZERO_TERMINATED
5984 errnumber defines which limit to test
5985 msg string to include in final message
5986
5987 Returns: the return from the final match function call
5988 */
5989
5990 static int
check_match_limit(uint8_t * pp,PCRE2_SIZE ulen,int errnumber,const char * msg)5991 check_match_limit(uint8_t *pp, PCRE2_SIZE ulen, int errnumber, const char *msg)
5992 {
5993 int capcount;
5994 uint32_t min = 0;
5995 uint32_t mid = 64;
5996 uint32_t max = UINT32_MAX;
5997
5998 PCRE2_SET_MATCH_LIMIT(dat_context, max);
5999 PCRE2_SET_DEPTH_LIMIT(dat_context, max);
6000 PCRE2_SET_HEAP_LIMIT(dat_context, max);
6001
6002 for (;;)
6003 {
6004 uint32_t stack_start = 0;
6005
6006 if (errnumber == PCRE2_ERROR_HEAPLIMIT)
6007 {
6008 PCRE2_SET_HEAP_LIMIT(dat_context, mid);
6009 }
6010 else if (errnumber == PCRE2_ERROR_MATCHLIMIT)
6011 {
6012 PCRE2_SET_MATCH_LIMIT(dat_context, mid);
6013 }
6014 else
6015 {
6016 PCRE2_SET_DEPTH_LIMIT(dat_context, mid);
6017 }
6018
6019 if ((dat_datctl.control & CTL_DFA) != 0)
6020 {
6021 stack_start = DFA_START_RWS_SIZE/1024;
6022 if (dfa_workspace == NULL)
6023 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
6024 if (dfa_matched++ == 0)
6025 dfa_workspace[0] = -1; /* To catch bad restart */
6026 PCRE2_DFA_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
6027 dat_datctl.options, match_data,
6028 PTR(dat_context), dfa_workspace, DFA_WS_DIMENSION);
6029 }
6030
6031 else if ((pat_patctl.control & CTL_JITFAST) != 0)
6032 PCRE2_JIT_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
6033 dat_datctl.options, match_data, PTR(dat_context));
6034
6035 else
6036 {
6037 stack_start = START_FRAMES_SIZE/1024;
6038 PCRE2_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
6039 dat_datctl.options, match_data, PTR(dat_context));
6040 }
6041
6042 if (capcount == errnumber)
6043 {
6044 if ((mid & 0x80000000u) != 0)
6045 {
6046 fprintf(outfile, "Can't find minimum %s limit: check pattern for "
6047 "restriction\n", msg);
6048 break;
6049 }
6050
6051 min = mid;
6052 mid = (mid == max - 1)? max : (max != UINT32_MAX)? (min + max)/2 : mid*2;
6053 }
6054 else if (capcount >= 0 ||
6055 capcount == PCRE2_ERROR_NOMATCH ||
6056 capcount == PCRE2_ERROR_PARTIAL)
6057 {
6058 /* If we've not hit the error with a heap limit less than the size of the
6059 initial stack frame vector (for pcre2_match()) or the initial stack
6060 workspace vector (for pcre2_dfa_match()), the heap is not being used, so
6061 the minimum limit is zero; there's no need to go on. The other limits are
6062 always greater than zero. */
6063
6064 if (errnumber == PCRE2_ERROR_HEAPLIMIT && mid < stack_start)
6065 {
6066 fprintf(outfile, "Minimum %s limit = 0\n", msg);
6067 break;
6068 }
6069 if (mid == min + 1)
6070 {
6071 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
6072 break;
6073 }
6074 max = mid;
6075 mid = (min + max)/2;
6076 }
6077 else break; /* Some other error */
6078 }
6079
6080 return capcount;
6081 }
6082
6083
6084
6085 /*************************************************
6086 * Substitute callout function *
6087 *************************************************/
6088
6089 /* Called from pcre2_substitute() when the substitute_callout modifier is set.
6090 Print out the data that is passed back. The substitute callout block is
6091 identical for all code unit widths, so we just pick one.
6092
6093 Arguments:
6094 scb pointer to substitute callout block
6095 data_ptr callout data
6096
6097 Returns: nothing
6098 */
6099
6100 static int
substitute_callout_function(pcre2_substitute_callout_block_8 * scb,void * data_ptr)6101 substitute_callout_function(pcre2_substitute_callout_block_8 *scb,
6102 void *data_ptr)
6103 {
6104 int yield = 0;
6105 BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
6106 (void)data_ptr; /* Not used */
6107
6108 fprintf(outfile, "%2d(%d) Old %" SIZ_FORM " %" SIZ_FORM " \"",
6109 scb->subscount, scb->oveccount,
6110 SIZ_CAST scb->ovector[0], SIZ_CAST scb->ovector[1]);
6111
6112 PCHARSV(scb->input, scb->ovector[0], scb->ovector[1] - scb->ovector[0],
6113 utf, outfile);
6114
6115 fprintf(outfile, "\" New %" SIZ_FORM " %" SIZ_FORM " \"",
6116 SIZ_CAST scb->output_offsets[0], SIZ_CAST scb->output_offsets[1]);
6117
6118 PCHARSV(scb->output, scb->output_offsets[0],
6119 scb->output_offsets[1] - scb->output_offsets[0], utf, outfile);
6120
6121 if (scb->subscount == dat_datctl.substitute_stop)
6122 {
6123 yield = -1;
6124 fprintf(outfile, " STOPPED");
6125 }
6126 else if (scb->subscount == dat_datctl.substitute_skip)
6127 {
6128 yield = +1;
6129 fprintf(outfile, " SKIPPED");
6130 }
6131
6132 fprintf(outfile, "\"\n");
6133 return yield;
6134 }
6135
6136
6137 /*************************************************
6138 * Callout function *
6139 *************************************************/
6140
6141 /* Called from a PCRE2 library as a result of the (?C) item. We print out where
6142 we are in the match (unless suppressed). Yield zero unless more callouts than
6143 the fail count, or the callout data is not zero. The only differences in the
6144 callout block for different code unit widths are that the pointers to the
6145 subject, the most recent MARK, and a callout argument string point to strings
6146 of the appropriate width. Casts can be used to deal with this.
6147
6148 Arguments:
6149 cb a pointer to a callout block
6150 callout_data_ptr the provided callout data
6151
6152 Returns: 0 or 1 or an error, as determined by settings
6153 */
6154
6155 static int
callout_function(pcre2_callout_block_8 * cb,void * callout_data_ptr)6156 callout_function(pcre2_callout_block_8 *cb, void *callout_data_ptr)
6157 {
6158 FILE *f, *fdefault;
6159 uint32_t i, pre_start, post_start, subject_length;
6160 PCRE2_SIZE current_position;
6161 BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
6162 BOOL callout_capture = (dat_datctl.control & CTL_CALLOUT_CAPTURE) != 0;
6163 BOOL callout_where = (dat_datctl.control2 & CTL2_CALLOUT_NO_WHERE) == 0;
6164
6165 /* The FILE f is used for echoing the subject string if it is non-NULL. This
6166 happens only once in simple cases, but we want to repeat after any additional
6167 output caused by CALLOUT_EXTRA. */
6168
6169 fdefault = (!first_callout && !callout_capture && cb->callout_string == NULL)?
6170 NULL : outfile;
6171
6172 if ((dat_datctl.control2 & CTL2_CALLOUT_EXTRA) != 0)
6173 {
6174 f = outfile;
6175 switch (cb->callout_flags)
6176 {
6177 case PCRE2_CALLOUT_BACKTRACK:
6178 fprintf(f, "Backtrack\n");
6179 break;
6180
6181 case PCRE2_CALLOUT_STARTMATCH|PCRE2_CALLOUT_BACKTRACK:
6182 fprintf(f, "Backtrack\nNo other matching paths\n");
6183 /* Fall through */
6184
6185 case PCRE2_CALLOUT_STARTMATCH:
6186 fprintf(f, "New match attempt\n");
6187 break;
6188
6189 default:
6190 f = fdefault;
6191 break;
6192 }
6193 }
6194 else f = fdefault;
6195
6196 /* For a callout with a string argument, show the string first because there
6197 isn't a tidy way to fit it in the rest of the data. */
6198
6199 if (cb->callout_string != NULL)
6200 {
6201 uint32_t delimiter = CODE_UNIT(cb->callout_string, -1);
6202 fprintf(outfile, "Callout (%" SIZ_FORM "): %c",
6203 SIZ_CAST cb->callout_string_offset, delimiter);
6204 PCHARSV(cb->callout_string, 0,
6205 cb->callout_string_length, utf, outfile);
6206 for (i = 0; callout_start_delims[i] != 0; i++)
6207 if (delimiter == callout_start_delims[i])
6208 {
6209 delimiter = callout_end_delims[i];
6210 break;
6211 }
6212 fprintf(outfile, "%c", delimiter);
6213 if (!callout_capture) fprintf(outfile, "\n");
6214 }
6215
6216 /* Show captured strings if required */
6217
6218 if (callout_capture)
6219 {
6220 if (cb->callout_string == NULL)
6221 fprintf(outfile, "Callout %d:", cb->callout_number);
6222 fprintf(outfile, " last capture = %d\n", cb->capture_last);
6223 for (i = 2; i < cb->capture_top * 2; i += 2)
6224 {
6225 fprintf(outfile, "%2d: ", i/2);
6226 if (cb->offset_vector[i] == PCRE2_UNSET)
6227 fprintf(outfile, "<unset>");
6228 else
6229 {
6230 PCHARSV(cb->subject, cb->offset_vector[i],
6231 cb->offset_vector[i+1] - cb->offset_vector[i], utf, f);
6232 }
6233 fprintf(outfile, "\n");
6234 }
6235 }
6236
6237 /* Unless suppressed, re-print the subject in canonical form (with escapes for
6238 non-printing characters), the first time, or if giving full details. On
6239 subsequent calls in the same match, we use PCHARS() just to find the printed
6240 lengths of the substrings. */
6241
6242 if (callout_where)
6243 {
6244 if (f != NULL) fprintf(f, "--->");
6245
6246 /* The subject before the match start. */
6247
6248 PCHARS(pre_start, cb->subject, 0, cb->start_match, utf, f);
6249
6250 /* If a lookbehind is involved, the current position may be earlier than the
6251 match start. If so, use the match start instead. */
6252
6253 current_position = (cb->current_position >= cb->start_match)?
6254 cb->current_position : cb->start_match;
6255
6256 /* The subject between the match start and the current position. */
6257
6258 PCHARS(post_start, cb->subject, cb->start_match,
6259 current_position - cb->start_match, utf, f);
6260
6261 /* Print from the current position to the end. */
6262
6263 PCHARSV(cb->subject, current_position, cb->subject_length - current_position,
6264 utf, f);
6265
6266 /* Calculate the total subject printed length (no print). */
6267
6268 PCHARS(subject_length, cb->subject, 0, cb->subject_length, utf, NULL);
6269
6270 if (f != NULL) fprintf(f, "\n");
6271
6272 /* For automatic callouts, show the pattern offset. Otherwise, for a
6273 numerical callout whose number has not already been shown with captured
6274 strings, show the number here. A callout with a string argument has been
6275 displayed above. */
6276
6277 if (cb->callout_number == 255)
6278 {
6279 fprintf(outfile, "%+3d ", (int)cb->pattern_position);
6280 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
6281 }
6282 else
6283 {
6284 if (callout_capture || cb->callout_string != NULL) fprintf(outfile, " ");
6285 else fprintf(outfile, "%3d ", cb->callout_number);
6286 }
6287
6288 /* Now show position indicators */
6289
6290 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
6291 fprintf(outfile, "^");
6292
6293 if (post_start > 0)
6294 {
6295 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
6296 fprintf(outfile, "^");
6297 }
6298
6299 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
6300 fprintf(outfile, " ");
6301
6302 if (cb->next_item_length != 0)
6303 fprintf(outfile, "%.*s", (int)(cb->next_item_length),
6304 pbuffer8 + cb->pattern_position);
6305 else
6306 fprintf(outfile, "End of pattern");
6307
6308 fprintf(outfile, "\n");
6309 }
6310
6311 first_callout = FALSE;
6312
6313 /* Show any mark info */
6314
6315 if (cb->mark != last_callout_mark)
6316 {
6317 if (cb->mark == NULL)
6318 fprintf(outfile, "Latest Mark: <unset>\n");
6319 else
6320 {
6321 fprintf(outfile, "Latest Mark: ");
6322 PCHARSV(cb->mark, -1, -1, utf, outfile);
6323 putc('\n', outfile);
6324 }
6325 last_callout_mark = cb->mark;
6326 }
6327
6328 /* Show callout data */
6329
6330 if (callout_data_ptr != NULL)
6331 {
6332 int callout_data = *((int32_t *)callout_data_ptr);
6333 if (callout_data != 0)
6334 {
6335 fprintf(outfile, "Callout data = %d\n", callout_data);
6336 return callout_data;
6337 }
6338 }
6339
6340 /* Keep count and give the appropriate return code */
6341
6342 callout_count++;
6343
6344 if (cb->callout_number == dat_datctl.cerror[0] &&
6345 callout_count >= dat_datctl.cerror[1])
6346 return PCRE2_ERROR_CALLOUT;
6347
6348 if (cb->callout_number == dat_datctl.cfail[0] &&
6349 callout_count >= dat_datctl.cfail[1])
6350 return 1;
6351
6352 return 0;
6353 }
6354
6355
6356
6357 /*************************************************
6358 * Handle *MARK and copy/get tests *
6359 *************************************************/
6360
6361 /* This function is called after complete and partial matches. It runs the
6362 tests for substring extraction.
6363
6364 Arguments:
6365 utf TRUE for utf
6366 capcount return from pcre2_match()
6367
6368 Returns: FALSE if print_error_message() fails
6369 */
6370
6371 static BOOL
copy_and_get(BOOL utf,int capcount)6372 copy_and_get(BOOL utf, int capcount)
6373 {
6374 int i;
6375 uint8_t *nptr;
6376
6377 /* Test copy strings by number */
6378
6379 for (i = 0; i < MAXCPYGET && dat_datctl.copy_numbers[i] >= 0; i++)
6380 {
6381 int rc;
6382 PCRE2_SIZE length, length2;
6383 uint32_t copybuffer[256];
6384 uint32_t n = (uint32_t)(dat_datctl.copy_numbers[i]);
6385 length = sizeof(copybuffer)/code_unit_size;
6386 PCRE2_SUBSTRING_COPY_BYNUMBER(rc, match_data, n, copybuffer, &length);
6387 if (rc < 0)
6388 {
6389 fprintf(outfile, "Copy substring %d failed (%d): ", n, rc);
6390 if (!print_error_message(rc, "", "\n")) return FALSE;
6391 }
6392 else
6393 {
6394 PCRE2_SUBSTRING_LENGTH_BYNUMBER(rc, match_data, n, &length2);
6395 if (rc < 0)
6396 {
6397 fprintf(outfile, "Get substring %d length failed (%d): ", n, rc);
6398 if (!print_error_message(rc, "", "\n")) return FALSE;
6399 }
6400 else if (length2 != length)
6401 {
6402 fprintf(outfile, "Mismatched substring lengths: %"
6403 SIZ_FORM " %" SIZ_FORM "\n", SIZ_CAST length, SIZ_CAST length2);
6404 }
6405 fprintf(outfile, "%2dC ", n);
6406 PCHARSV(copybuffer, 0, length, utf, outfile);
6407 fprintf(outfile, " (%" SIZ_FORM ")\n", SIZ_CAST length);
6408 }
6409 }
6410
6411 /* Test copy strings by name */
6412
6413 nptr = dat_datctl.copy_names;
6414 for (;;)
6415 {
6416 int rc;
6417 int groupnumber;
6418 PCRE2_SIZE length, length2;
6419 uint32_t copybuffer[256];
6420 int namelen = strlen((const char *)nptr);
6421 #if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32
6422 PCRE2_SIZE cnl = namelen;
6423 #endif
6424 if (namelen == 0) break;
6425
6426 #ifdef SUPPORT_PCRE2_8
6427 if (test_mode == PCRE8_MODE) strcpy((char *)pbuffer8, (char *)nptr);
6428 #endif
6429 #ifdef SUPPORT_PCRE2_16
6430 if (test_mode == PCRE16_MODE)(void)to16(nptr, utf, &cnl);
6431 #endif
6432 #ifdef SUPPORT_PCRE2_32
6433 if (test_mode == PCRE32_MODE)(void)to32(nptr, utf, &cnl);
6434 #endif
6435
6436 PCRE2_SUBSTRING_NUMBER_FROM_NAME(groupnumber, compiled_code, pbuffer);
6437 if (groupnumber < 0 && groupnumber != PCRE2_ERROR_NOUNIQUESUBSTRING)
6438 fprintf(outfile, "Number not found for group '%s'\n", nptr);
6439
6440 length = sizeof(copybuffer)/code_unit_size;
6441 PCRE2_SUBSTRING_COPY_BYNAME(rc, match_data, pbuffer, copybuffer, &length);
6442 if (rc < 0)
6443 {
6444 fprintf(outfile, "Copy substring '%s' failed (%d): ", nptr, rc);
6445 if (!print_error_message(rc, "", "\n")) return FALSE;
6446 }
6447 else
6448 {
6449 PCRE2_SUBSTRING_LENGTH_BYNAME(rc, match_data, pbuffer, &length2);
6450 if (rc < 0)
6451 {
6452 fprintf(outfile, "Get substring '%s' length failed (%d): ", nptr, rc);
6453 if (!print_error_message(rc, "", "\n")) return FALSE;
6454 }
6455 else if (length2 != length)
6456 {
6457 fprintf(outfile, "Mismatched substring lengths: %"
6458 SIZ_FORM " %" SIZ_FORM "\n", SIZ_CAST length, SIZ_CAST length2);
6459 }
6460 fprintf(outfile, " C ");
6461 PCHARSV(copybuffer, 0, length, utf, outfile);
6462 fprintf(outfile, " (%" SIZ_FORM ") %s", SIZ_CAST length, nptr);
6463 if (groupnumber >= 0) fprintf(outfile, " (group %d)\n", groupnumber);
6464 else fprintf(outfile, " (non-unique)\n");
6465 }
6466 nptr += namelen + 1;
6467 }
6468
6469 /* Test get strings by number */
6470
6471 for (i = 0; i < MAXCPYGET && dat_datctl.get_numbers[i] >= 0; i++)
6472 {
6473 int rc;
6474 PCRE2_SIZE length;
6475 void *gotbuffer;
6476 uint32_t n = (uint32_t)(dat_datctl.get_numbers[i]);
6477 PCRE2_SUBSTRING_GET_BYNUMBER(rc, match_data, n, &gotbuffer, &length);
6478 if (rc < 0)
6479 {
6480 fprintf(outfile, "Get substring %d failed (%d): ", n, rc);
6481 if (!print_error_message(rc, "", "\n")) return FALSE;
6482 }
6483 else
6484 {
6485 fprintf(outfile, "%2dG ", n);
6486 PCHARSV(gotbuffer, 0, length, utf, outfile);
6487 fprintf(outfile, " (%" SIZ_FORM ")\n", SIZ_CAST length);
6488 PCRE2_SUBSTRING_FREE(gotbuffer);
6489 }
6490 }
6491
6492 /* Test get strings by name */
6493
6494 nptr = dat_datctl.get_names;
6495 for (;;)
6496 {
6497 PCRE2_SIZE length;
6498 void *gotbuffer;
6499 int rc;
6500 int groupnumber;
6501 int namelen = strlen((const char *)nptr);
6502 #if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32
6503 PCRE2_SIZE cnl = namelen;
6504 #endif
6505 if (namelen == 0) break;
6506
6507 #ifdef SUPPORT_PCRE2_8
6508 if (test_mode == PCRE8_MODE) strcpy((char *)pbuffer8, (char *)nptr);
6509 #endif
6510 #ifdef SUPPORT_PCRE2_16
6511 if (test_mode == PCRE16_MODE)(void)to16(nptr, utf, &cnl);
6512 #endif
6513 #ifdef SUPPORT_PCRE2_32
6514 if (test_mode == PCRE32_MODE)(void)to32(nptr, utf, &cnl);
6515 #endif
6516
6517 PCRE2_SUBSTRING_NUMBER_FROM_NAME(groupnumber, compiled_code, pbuffer);
6518 if (groupnumber < 0 && groupnumber != PCRE2_ERROR_NOUNIQUESUBSTRING)
6519 fprintf(outfile, "Number not found for group '%s'\n", nptr);
6520
6521 PCRE2_SUBSTRING_GET_BYNAME(rc, match_data, pbuffer, &gotbuffer, &length);
6522 if (rc < 0)
6523 {
6524 fprintf(outfile, "Get substring '%s' failed (%d): ", nptr, rc);
6525 if (!print_error_message(rc, "", "\n")) return FALSE;
6526 }
6527 else
6528 {
6529 fprintf(outfile, " G ");
6530 PCHARSV(gotbuffer, 0, length, utf, outfile);
6531 fprintf(outfile, " (%" SIZ_FORM ") %s", SIZ_CAST length, nptr);
6532 if (groupnumber >= 0) fprintf(outfile, " (group %d)\n", groupnumber);
6533 else fprintf(outfile, " (non-unique)\n");
6534 PCRE2_SUBSTRING_FREE(gotbuffer);
6535 }
6536 nptr += namelen + 1;
6537 }
6538
6539 /* Test getting the complete list of captured strings. */
6540
6541 if ((dat_datctl.control & CTL_GETALL) != 0)
6542 {
6543 int rc;
6544 void **stringlist;
6545 PCRE2_SIZE *lengths;
6546 PCRE2_SUBSTRING_LIST_GET(rc, match_data, &stringlist, &lengths);
6547 if (rc < 0)
6548 {
6549 fprintf(outfile, "get substring list failed (%d): ", rc);
6550 if (!print_error_message(rc, "", "\n")) return FALSE;
6551 }
6552 else
6553 {
6554 for (i = 0; i < capcount; i++)
6555 {
6556 fprintf(outfile, "%2dL ", i);
6557 PCHARSV(stringlist[i], 0, lengths[i], utf, outfile);
6558 putc('\n', outfile);
6559 }
6560 if (stringlist[i] != NULL)
6561 fprintf(outfile, "string list not terminated by NULL\n");
6562 PCRE2_SUBSTRING_LIST_FREE(stringlist);
6563 }
6564 }
6565
6566 return TRUE;
6567 }
6568
6569
6570
6571 /*************************************************
6572 * Show an entire ovector *
6573 *************************************************/
6574
6575 /* This function is called after partial matching or match failure, when the
6576 "allvector" modifier is set. It is a means of checking the contents of the
6577 entire ovector, to ensure no modification of fields that should be unchanged.
6578
6579 Arguments:
6580 ovector points to the ovector
6581 oveccount number of pairs
6582
6583 Returns: nothing
6584 */
6585
6586 static void
show_ovector(PCRE2_SIZE * ovector,uint32_t oveccount)6587 show_ovector(PCRE2_SIZE *ovector, uint32_t oveccount)
6588 {
6589 uint32_t i;
6590 for (i = 0; i < 2*oveccount; i += 2)
6591 {
6592 PCRE2_SIZE start = ovector[i];
6593 PCRE2_SIZE end = ovector[i+1];
6594
6595 fprintf(outfile, "%2d: ", i/2);
6596 if (start == PCRE2_UNSET && end == PCRE2_UNSET)
6597 fprintf(outfile, "<unset>\n");
6598 else if (start == JUNK_OFFSET && end == JUNK_OFFSET)
6599 fprintf(outfile, "<unchanged>\n");
6600 else
6601 fprintf(outfile, "%ld %ld\n", (unsigned long int)start,
6602 (unsigned long int)end);
6603 }
6604 }
6605
6606
6607 /*************************************************
6608 * Process a data line *
6609 *************************************************/
6610
6611 /* The line is in buffer; it will not be empty.
6612
6613 Arguments: none
6614
6615 Returns: PR_OK continue processing next line
6616 PR_SKIP skip to a blank line
6617 PR_ABEND abort the pcre2test run
6618 */
6619
6620 static int
process_data(void)6621 process_data(void)
6622 {
6623 PCRE2_SIZE len, ulen, arg_ulen;
6624 uint32_t gmatched;
6625 uint32_t c, k;
6626 uint32_t g_notempty = 0;
6627 uint8_t *p, *pp, *start_rep;
6628 size_t needlen;
6629 void *use_dat_context;
6630 BOOL utf;
6631 BOOL subject_literal;
6632
6633 PCRE2_SIZE *ovector;
6634 PCRE2_SIZE ovecsave[3];
6635 uint32_t oveccount;
6636
6637 #ifdef SUPPORT_PCRE2_8
6638 uint8_t *q8 = NULL;
6639 #endif
6640 #ifdef SUPPORT_PCRE2_16
6641 uint16_t *q16 = NULL;
6642 #endif
6643 #ifdef SUPPORT_PCRE2_32
6644 uint32_t *q32 = NULL;
6645 #endif
6646
6647 subject_literal = (pat_patctl.control2 & CTL2_SUBJECT_LITERAL) != 0;
6648
6649 /* Copy the default context and data control blocks to the active ones. Then
6650 copy from the pattern the controls that can be set in either the pattern or the
6651 data. This allows them to be overridden in the data line. We do not do this for
6652 options because those that are common apply separately to compiling and
6653 matching. */
6654
6655 DATCTXCPY(dat_context, default_dat_context);
6656 memcpy(&dat_datctl, &def_datctl, sizeof(datctl));
6657 dat_datctl.control |= (pat_patctl.control & CTL_ALLPD);
6658 dat_datctl.control2 |= (pat_patctl.control2 & CTL2_ALLPD);
6659 strcpy((char *)dat_datctl.replacement, (char *)pat_patctl.replacement);
6660 if (dat_datctl.jitstack == 0) dat_datctl.jitstack = pat_patctl.jitstack;
6661
6662 if (dat_datctl.substitute_skip == 0)
6663 dat_datctl.substitute_skip = pat_patctl.substitute_skip;
6664 if (dat_datctl.substitute_stop == 0)
6665 dat_datctl.substitute_stop = pat_patctl.substitute_stop;
6666
6667 /* Initialize for scanning the data line. */
6668
6669 #ifdef SUPPORT_PCRE2_8
6670 utf = ((((pat_patctl.control & CTL_POSIX) != 0)?
6671 ((pcre2_real_code_8 *)preg.re_pcre2_code)->overall_options :
6672 FLD(compiled_code, overall_options)) & PCRE2_UTF) != 0;
6673 #else
6674 utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
6675 #endif
6676
6677 start_rep = NULL;
6678 len = strlen((const char *)buffer);
6679 while (len > 0 && isspace(buffer[len-1])) len--;
6680 buffer[len] = 0;
6681 p = buffer;
6682 while (isspace(*p)) p++;
6683
6684 /* Check that the data is well-formed UTF-8 if we're in UTF mode. To create
6685 invalid input to pcre2_match(), you must use \x?? or \x{} sequences. */
6686
6687 if (utf)
6688 {
6689 uint8_t *q;
6690 uint32_t cc;
6691 int n = 1;
6692 for (q = p; n > 0 && *q; q += n) n = utf82ord(q, &cc);
6693 if (n <= 0)
6694 {
6695 fprintf(outfile, "** Failed: invalid UTF-8 string cannot be used as input "
6696 "in UTF mode\n");
6697 return PR_OK;
6698 }
6699 }
6700
6701 #ifdef SUPPORT_VALGRIND
6702 /* Mark the dbuffer as addressable but undefined again. */
6703 if (dbuffer != NULL)
6704 {
6705 VALGRIND_MAKE_MEM_UNDEFINED(dbuffer, dbuffer_size);
6706 }
6707 #endif
6708
6709 /* Allocate a buffer to hold the data line; len+1 is an upper bound on
6710 the number of code units that will be needed (though the buffer may have to be
6711 extended if replication is involved). */
6712
6713 needlen = (size_t)((len+1) * code_unit_size);
6714 if (dbuffer == NULL || needlen >= dbuffer_size)
6715 {
6716 while (needlen >= dbuffer_size) dbuffer_size *= 2;
6717 dbuffer = (uint8_t *)realloc(dbuffer, dbuffer_size);
6718 if (dbuffer == NULL)
6719 {
6720 fprintf(stderr, "pcre2test: realloc(%d) failed\n", (int)dbuffer_size);
6721 exit(1);
6722 }
6723 }
6724 SETCASTPTR(q, dbuffer); /* Sets q8, q16, or q32, as appropriate. */
6725
6726 /* Scan the data line, interpreting data escapes, and put the result into a
6727 buffer of the appropriate width. In UTF mode, input is always UTF-8; otherwise,
6728 in 16- and 32-bit modes, it can be forced to UTF-8 by the utf8_input modifier.
6729 */
6730
6731 while ((c = *p++) != 0)
6732 {
6733 int32_t i = 0;
6734 size_t replen;
6735
6736 /* ] may mark the end of a replicated sequence */
6737
6738 if (c == ']' && start_rep != NULL)
6739 {
6740 long li;
6741 char *endptr;
6742 size_t qoffset = CAST8VAR(q) - dbuffer;
6743 size_t rep_offset = start_rep - dbuffer;
6744
6745 if (*p++ != '{')
6746 {
6747 fprintf(outfile, "** Expected '{' after \\[....]\n");
6748 return PR_OK;
6749 }
6750
6751 li = strtol((const char *)p, &endptr, 10);
6752 if (S32OVERFLOW(li))
6753 {
6754 fprintf(outfile, "** Repeat count too large\n");
6755 return PR_OK;
6756 }
6757
6758 p = (uint8_t *)endptr;
6759 if (*p++ != '}')
6760 {
6761 fprintf(outfile, "** Expected '}' after \\[...]{...\n");
6762 return PR_OK;
6763 }
6764
6765 i = (int32_t)li;
6766 if (i-- == 0)
6767 {
6768 fprintf(outfile, "** Zero repeat not allowed\n");
6769 return PR_OK;
6770 }
6771
6772 replen = CAST8VAR(q) - start_rep;
6773 needlen += replen * i;
6774
6775 if (needlen >= dbuffer_size)
6776 {
6777 while (needlen >= dbuffer_size) dbuffer_size *= 2;
6778 dbuffer = (uint8_t *)realloc(dbuffer, dbuffer_size);
6779 if (dbuffer == NULL)
6780 {
6781 fprintf(stderr, "pcre2test: realloc(%d) failed\n", (int)dbuffer_size);
6782 exit(1);
6783 }
6784 SETCASTPTR(q, dbuffer + qoffset);
6785 start_rep = dbuffer + rep_offset;
6786 }
6787
6788 while (i-- > 0)
6789 {
6790 memcpy(CAST8VAR(q), start_rep, replen);
6791 SETPLUS(q, replen/code_unit_size);
6792 }
6793
6794 start_rep = NULL;
6795 continue;
6796 }
6797
6798 /* Handle a non-escaped character. In non-UTF 32-bit mode with utf8_input
6799 set, do the fudge for setting the top bit. */
6800
6801 if (c != '\\' || subject_literal)
6802 {
6803 uint32_t topbit = 0;
6804 if (test_mode == PCRE32_MODE && c == 0xff && *p != 0)
6805 {
6806 topbit = 0x80000000;
6807 c = *p++;
6808 }
6809 if ((utf || (pat_patctl.control & CTL_UTF8_INPUT) != 0) &&
6810 HASUTF8EXTRALEN(c)) { GETUTF8INC(c, p); }
6811 c |= topbit;
6812 }
6813
6814 /* Handle backslash escapes */
6815
6816 else switch ((c = *p++))
6817 {
6818 case '\\': break;
6819 case 'a': c = CHAR_BEL; break;
6820 case 'b': c = '\b'; break;
6821 case 'e': c = CHAR_ESC; break;
6822 case 'f': c = '\f'; break;
6823 case 'n': c = '\n'; break;
6824 case 'r': c = '\r'; break;
6825 case 't': c = '\t'; break;
6826 case 'v': c = '\v'; break;
6827
6828 case '0': case '1': case '2': case '3':
6829 case '4': case '5': case '6': case '7':
6830 c -= '0';
6831 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
6832 c = c * 8 + *p++ - '0';
6833 break;
6834
6835 case 'o':
6836 if (*p == '{')
6837 {
6838 uint8_t *pt = p;
6839 c = 0;
6840 for (pt++; isdigit(*pt) && *pt != '8' && *pt != '9'; pt++)
6841 {
6842 if (++i == 12)
6843 fprintf(outfile, "** Too many octal digits in \\o{...} item; "
6844 "using only the first twelve.\n");
6845 else c = c * 8 + *pt - '0';
6846 }
6847 if (*pt == '}') p = pt + 1;
6848 else fprintf(outfile, "** Missing } after \\o{ (assumed)\n");
6849 }
6850 break;
6851
6852 case 'x':
6853 if (*p == '{')
6854 {
6855 uint8_t *pt = p;
6856 c = 0;
6857
6858 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
6859 when isxdigit() is a macro that refers to its argument more than
6860 once. This is banned by the C Standard, but apparently happens in at
6861 least one MacOS environment. */
6862
6863 for (pt++; isxdigit(*pt); pt++)
6864 {
6865 if (++i == 9)
6866 fprintf(outfile, "** Too many hex digits in \\x{...} item; "
6867 "using only the first eight.\n");
6868 else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
6869 }
6870 if (*pt == '}')
6871 {
6872 p = pt + 1;
6873 break;
6874 }
6875 /* Not correct form for \x{...}; fall through */
6876 }
6877
6878 /* \x without {} always defines just one byte in 8-bit mode. This
6879 allows UTF-8 characters to be constructed byte by byte, and also allows
6880 invalid UTF-8 sequences to be made. Just copy the byte in UTF-8 mode.
6881 Otherwise, pass it down as data. */
6882
6883 c = 0;
6884 while (i++ < 2 && isxdigit(*p))
6885 {
6886 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
6887 p++;
6888 }
6889 #if defined SUPPORT_PCRE2_8
6890 if (utf && (test_mode == PCRE8_MODE))
6891 {
6892 *q8++ = c;
6893 continue;
6894 }
6895 #endif
6896 break;
6897
6898 case 0: /* \ followed by EOF allows for an empty line */
6899 p--;
6900 continue;
6901
6902 case '=': /* \= terminates the data, starts modifiers */
6903 goto ENDSTRING;
6904
6905 case '[': /* \[ introduces a replicated character sequence */
6906 if (start_rep != NULL)
6907 {
6908 fprintf(outfile, "** Nested replication is not supported\n");
6909 return PR_OK;
6910 }
6911 start_rep = CAST8VAR(q);
6912 continue;
6913
6914 default:
6915 if (isalnum(c))
6916 {
6917 fprintf(outfile, "** Unrecognized escape sequence \"\\%c\"\n", c);
6918 return PR_OK;
6919 }
6920 }
6921
6922 /* We now have a character value in c that may be greater than 255.
6923 In 8-bit mode we convert to UTF-8 if we are in UTF mode. Values greater
6924 than 127 in UTF mode must have come from \x{...} or octal constructs
6925 because values from \x.. get this far only in non-UTF mode. */
6926
6927 #ifdef SUPPORT_PCRE2_8
6928 if (test_mode == PCRE8_MODE)
6929 {
6930 if (utf)
6931 {
6932 if (c > 0x7fffffff)
6933 {
6934 fprintf(outfile, "** Character \\x{%x} is greater than 0x7fffffff "
6935 "and so cannot be converted to UTF-8\n", c);
6936 return PR_OK;
6937 }
6938 q8 += ord2utf8(c, q8);
6939 }
6940 else
6941 {
6942 if (c > 0xffu)
6943 {
6944 fprintf(outfile, "** Character \\x{%x} is greater than 255 "
6945 "and UTF-8 mode is not enabled.\n", c);
6946 fprintf(outfile, "** Truncation will probably give the wrong "
6947 "result.\n");
6948 }
6949 *q8++ = (uint8_t)c;
6950 }
6951 }
6952 #endif
6953 #ifdef SUPPORT_PCRE2_16
6954 if (test_mode == PCRE16_MODE)
6955 {
6956 if (utf)
6957 {
6958 if (c > 0x10ffffu)
6959 {
6960 fprintf(outfile, "** Failed: character \\x{%x} is greater than "
6961 "0x10ffff and so cannot be converted to UTF-16\n", c);
6962 return PR_OK;
6963 }
6964 else if (c >= 0x10000u)
6965 {
6966 c-= 0x10000u;
6967 *q16++ = 0xD800 | (c >> 10);
6968 *q16++ = 0xDC00 | (c & 0x3ff);
6969 }
6970 else
6971 *q16++ = c;
6972 }
6973 else
6974 {
6975 if (c > 0xffffu)
6976 {
6977 fprintf(outfile, "** Character \\x{%x} is greater than 0xffff "
6978 "and UTF-16 mode is not enabled.\n", c);
6979 fprintf(outfile, "** Truncation will probably give the wrong "
6980 "result.\n");
6981 }
6982
6983 *q16++ = (uint16_t)c;
6984 }
6985 }
6986 #endif
6987 #ifdef SUPPORT_PCRE2_32
6988 if (test_mode == PCRE32_MODE)
6989 {
6990 *q32++ = c;
6991 }
6992 #endif
6993 }
6994
6995 ENDSTRING:
6996 SET(*q, 0);
6997 len = CASTVAR(uint8_t *, q) - dbuffer; /* Length in bytes */
6998 ulen = len/code_unit_size; /* Length in code units */
6999 arg_ulen = ulen; /* Value to use in match arg */
7000
7001 /* If the string was terminated by \= we must now interpret modifiers. */
7002
7003 if (p[-1] != 0 && !decode_modifiers(p, CTX_DAT, NULL, &dat_datctl))
7004 return PR_OK;
7005
7006 /* Setting substitute_{skip,fail} implies a substitute callout. */
7007
7008 if (dat_datctl.substitute_skip != 0 || dat_datctl.substitute_stop != 0)
7009 dat_datctl.control2 |= CTL2_SUBSTITUTE_CALLOUT;
7010
7011 /* Check for mutually exclusive modifiers. At present, these are all in the
7012 first control word. */
7013
7014 for (k = 0; k < sizeof(exclusive_dat_controls)/sizeof(uint32_t); k++)
7015 {
7016 c = dat_datctl.control & exclusive_dat_controls[k];
7017 if (c != 0 && c != (c & (~c+1)))
7018 {
7019 show_controls(c, 0, "** Not allowed together:");
7020 fprintf(outfile, "\n");
7021 return PR_OK;
7022 }
7023 }
7024
7025 if (pat_patctl.replacement[0] != 0)
7026 {
7027 if ((dat_datctl.control2 & CTL2_SUBSTITUTE_CALLOUT) != 0 &&
7028 (dat_datctl.control & CTL_NULLCONTEXT) != 0)
7029 {
7030 fprintf(outfile, "** Replacement callouts are not supported with null_context.\n");
7031 return PR_OK;
7032 }
7033
7034 if ((dat_datctl.control & CTL_ALLCAPTURES) != 0)
7035 fprintf(outfile, "** Ignored with replacement text: allcaptures\n");
7036 }
7037
7038 /* Warn for modifiers that are ignored for DFA. */
7039
7040 if ((dat_datctl.control & CTL_DFA) != 0)
7041 {
7042 if ((dat_datctl.control & CTL_ALLCAPTURES) != 0)
7043 fprintf(outfile, "** Ignored after DFA matching: allcaptures\n");
7044 }
7045
7046 /* We now have the subject in dbuffer, with len containing the byte length, and
7047 ulen containing the code unit length, with a copy in arg_ulen for use in match
7048 function arguments (this gets changed to PCRE2_ZERO_TERMINATED when the
7049 zero_terminate modifier is present).
7050
7051 Move the data to the end of the buffer so that a read over the end can be
7052 caught by valgrind or other means. If we have explicit valgrind support, mark
7053 the unused start of the buffer unaddressable. If we are using the POSIX
7054 interface, or testing zero-termination, we must include the terminating zero in
7055 the usable data. */
7056
7057 c = code_unit_size * (((pat_patctl.control & CTL_POSIX) +
7058 (dat_datctl.control & CTL_ZERO_TERMINATE) != 0)? 1:0);
7059 pp = memmove(dbuffer + dbuffer_size - len - c, dbuffer, len + c);
7060 #ifdef SUPPORT_VALGRIND
7061 VALGRIND_MAKE_MEM_NOACCESS(dbuffer, dbuffer_size - (len + c));
7062 #endif
7063
7064 /* Now pp points to the subject string. POSIX matching is only possible in
7065 8-bit mode, and it does not support timing or other fancy features. Some were
7066 checked at compile time, but we need to check the match-time settings here. */
7067
7068 #ifdef SUPPORT_PCRE2_8
7069 if ((pat_patctl.control & CTL_POSIX) != 0)
7070 {
7071 int rc;
7072 int eflags = 0;
7073 regmatch_t *pmatch = NULL;
7074 const char *msg = "** Ignored with POSIX interface:";
7075
7076 if (dat_datctl.cerror[0] != CFORE_UNSET || dat_datctl.cerror[1] != CFORE_UNSET)
7077 prmsg(&msg, "callout_error");
7078 if (dat_datctl.cfail[0] != CFORE_UNSET || dat_datctl.cfail[1] != CFORE_UNSET)
7079 prmsg(&msg, "callout_fail");
7080 if (dat_datctl.copy_numbers[0] >= 0 || dat_datctl.copy_names[0] != 0)
7081 prmsg(&msg, "copy");
7082 if (dat_datctl.get_numbers[0] >= 0 || dat_datctl.get_names[0] != 0)
7083 prmsg(&msg, "get");
7084 if (dat_datctl.jitstack != 0) prmsg(&msg, "jitstack");
7085 if (dat_datctl.offset != 0) prmsg(&msg, "offset");
7086
7087 if ((dat_datctl.options & ~POSIX_SUPPORTED_MATCH_OPTIONS) != 0)
7088 {
7089 fprintf(outfile, "%s", msg);
7090 show_match_options(dat_datctl.options & ~POSIX_SUPPORTED_MATCH_OPTIONS);
7091 msg = "";
7092 }
7093 if ((dat_datctl.control & ~POSIX_SUPPORTED_MATCH_CONTROLS) != 0 ||
7094 (dat_datctl.control2 & ~POSIX_SUPPORTED_MATCH_CONTROLS2) != 0)
7095 {
7096 show_controls(dat_datctl.control & ~POSIX_SUPPORTED_MATCH_CONTROLS,
7097 dat_datctl.control2 & ~POSIX_SUPPORTED_MATCH_CONTROLS2, msg);
7098 msg = "";
7099 }
7100
7101 if (msg[0] == 0) fprintf(outfile, "\n");
7102
7103 if (dat_datctl.oveccount > 0)
7104 {
7105 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * dat_datctl.oveccount);
7106 if (pmatch == NULL)
7107 {
7108 fprintf(outfile, "** Failed to get memory for recording matching "
7109 "information (size set = %du)\n", dat_datctl.oveccount);
7110 return PR_OK;
7111 }
7112 }
7113
7114 if (dat_datctl.startend[0] != CFORE_UNSET)
7115 {
7116 pmatch[0].rm_so = dat_datctl.startend[0];
7117 pmatch[0].rm_eo = (dat_datctl.startend[1] != 0)?
7118 dat_datctl.startend[1] : len;
7119 eflags |= REG_STARTEND;
7120 }
7121
7122 if ((dat_datctl.options & PCRE2_NOTBOL) != 0) eflags |= REG_NOTBOL;
7123 if ((dat_datctl.options & PCRE2_NOTEOL) != 0) eflags |= REG_NOTEOL;
7124 if ((dat_datctl.options & PCRE2_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
7125
7126 rc = regexec(&preg, (const char *)pp, dat_datctl.oveccount, pmatch, eflags);
7127 if (rc != 0)
7128 {
7129 (void)regerror(rc, &preg, (char *)pbuffer8, pbuffer8_size);
7130 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, pbuffer8);
7131 }
7132 else if ((pat_patctl.control & CTL_POSIX_NOSUB) != 0)
7133 fprintf(outfile, "Matched with REG_NOSUB\n");
7134 else if (dat_datctl.oveccount == 0)
7135 fprintf(outfile, "Matched without capture\n");
7136 else
7137 {
7138 size_t i, j;
7139 size_t last_printed = (size_t)dat_datctl.oveccount;
7140 for (i = 0; i < (size_t)dat_datctl.oveccount; i++)
7141 {
7142 if (pmatch[i].rm_so >= 0)
7143 {
7144 PCRE2_SIZE start = pmatch[i].rm_so;
7145 PCRE2_SIZE end = pmatch[i].rm_eo;
7146 for (j = last_printed + 1; j < i; j++)
7147 fprintf(outfile, "%2d: <unset>\n", (int)j);
7148 last_printed = i;
7149 if (start > end)
7150 {
7151 start = pmatch[i].rm_eo;
7152 end = pmatch[i].rm_so;
7153 fprintf(outfile, "Start of matched string is beyond its end - "
7154 "displaying from end to start.\n");
7155 }
7156 fprintf(outfile, "%2d: ", (int)i);
7157 PCHARSV(pp, start, end - start, utf, outfile);
7158 fprintf(outfile, "\n");
7159
7160 if ((i == 0 && (dat_datctl.control & CTL_AFTERTEXT) != 0) ||
7161 (dat_datctl.control & CTL_ALLAFTERTEXT) != 0)
7162 {
7163 fprintf(outfile, "%2d+ ", (int)i);
7164 /* Note: don't use the start/end variables here because we want to
7165 show the text from what is reported as the end. */
7166 PCHARSV(pp, pmatch[i].rm_eo, len - pmatch[i].rm_eo, utf, outfile);
7167 fprintf(outfile, "\n"); }
7168 }
7169 }
7170 }
7171 free(pmatch);
7172 return PR_OK;
7173 }
7174 #endif /* SUPPORT_PCRE2_8 */
7175
7176 /* Handle matching via the native interface. Check for consistency of
7177 modifiers. */
7178
7179 if (dat_datctl.startend[0] != CFORE_UNSET)
7180 fprintf(outfile, "** \\=posix_startend ignored for non-POSIX matching\n");
7181
7182 /* ALLUSEDTEXT is not supported with JIT, but JIT is not used with DFA
7183 matching, even if the JIT compiler was used. */
7184
7185 if ((dat_datctl.control & (CTL_ALLUSEDTEXT|CTL_DFA)) == CTL_ALLUSEDTEXT &&
7186 FLD(compiled_code, executable_jit) != NULL)
7187 {
7188 fprintf(outfile, "** Showing all consulted text is not supported by JIT: ignored\n");
7189 dat_datctl.control &= ~CTL_ALLUSEDTEXT;
7190 }
7191
7192 /* Handle passing the subject as zero-terminated. */
7193
7194 if ((dat_datctl.control & CTL_ZERO_TERMINATE) != 0)
7195 arg_ulen = PCRE2_ZERO_TERMINATED;
7196
7197 /* The nullcontext modifier is used to test calling pcre2_[jit_]match() with a
7198 NULL context. */
7199
7200 use_dat_context = ((dat_datctl.control & CTL_NULLCONTEXT) != 0)?
7201 NULL : PTR(dat_context);
7202
7203 /* Enable display of malloc/free if wanted. We can do this only if either the
7204 pattern or the subject is processed with a context. */
7205
7206 show_memory = (dat_datctl.control & CTL_MEMORY) != 0;
7207
7208 if (show_memory &&
7209 (pat_patctl.control & dat_datctl.control & CTL_NULLCONTEXT) != 0)
7210 fprintf(outfile, "** \\=memory requires either a pattern or a subject "
7211 "context: ignored\n");
7212
7213 /* Create and assign a JIT stack if requested. */
7214
7215 if (dat_datctl.jitstack != 0)
7216 {
7217 if (dat_datctl.jitstack != jit_stack_size)
7218 {
7219 PCRE2_JIT_STACK_FREE(jit_stack);
7220 PCRE2_JIT_STACK_CREATE(jit_stack, 1, dat_datctl.jitstack * 1024, NULL);
7221 jit_stack_size = dat_datctl.jitstack;
7222 }
7223 PCRE2_JIT_STACK_ASSIGN(dat_context, jit_callback, jit_stack);
7224 }
7225
7226 /* Or de-assign */
7227
7228 else if (jit_stack != NULL)
7229 {
7230 PCRE2_JIT_STACK_ASSIGN(dat_context, NULL, NULL);
7231 PCRE2_JIT_STACK_FREE(jit_stack);
7232 jit_stack = NULL;
7233 jit_stack_size = 0;
7234 }
7235
7236 /* When no JIT stack is assigned, we must ensure that there is a JIT callback
7237 if we want to verify that JIT was actually used. */
7238
7239 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_stack == NULL)
7240 {
7241 PCRE2_JIT_STACK_ASSIGN(dat_context, jit_callback, NULL);
7242 }
7243
7244 /* Adjust match_data according to size of offsets required. A size of zero
7245 causes a new match data block to be obtained that exactly fits the pattern. */
7246
7247 if (dat_datctl.oveccount == 0)
7248 {
7249 PCRE2_MATCH_DATA_FREE(match_data);
7250 PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(match_data, compiled_code, NULL);
7251 PCRE2_GET_OVECTOR_COUNT(max_oveccount, match_data);
7252 }
7253 else if (dat_datctl.oveccount <= max_oveccount)
7254 {
7255 SETFLD(match_data, oveccount, dat_datctl.oveccount);
7256 }
7257 else
7258 {
7259 max_oveccount = dat_datctl.oveccount;
7260 PCRE2_MATCH_DATA_FREE(match_data);
7261 PCRE2_MATCH_DATA_CREATE(match_data, max_oveccount, NULL);
7262 }
7263
7264 if (CASTVAR(void *, match_data) == NULL)
7265 {
7266 fprintf(outfile, "** Failed to get memory for recording matching "
7267 "information (size requested: %d)\n", dat_datctl.oveccount);
7268 max_oveccount = 0;
7269 return PR_OK;
7270 }
7271
7272 ovector = FLD(match_data, ovector);
7273 PCRE2_GET_OVECTOR_COUNT(oveccount, match_data);
7274
7275 /* Replacement processing is ignored for DFA matching. */
7276
7277 if (dat_datctl.replacement[0] != 0 && (dat_datctl.control & CTL_DFA) != 0)
7278 {
7279 fprintf(outfile, "** Ignored for DFA matching: replace\n");
7280 dat_datctl.replacement[0] = 0;
7281 }
7282
7283 /* If a replacement string is provided, call pcre2_substitute() instead of one
7284 of the matching functions. First we have to convert the replacement string to
7285 the appropriate width. */
7286
7287 if (dat_datctl.replacement[0] != 0)
7288 {
7289 int rc;
7290 uint8_t *pr;
7291 uint8_t rbuffer[REPLACE_BUFFSIZE];
7292 uint8_t nbuffer[REPLACE_BUFFSIZE];
7293 uint32_t xoptions;
7294 uint32_t emoption; /* External match option */
7295 PCRE2_SIZE j, rlen, nsize, erroroffset;
7296 BOOL badutf = FALSE;
7297
7298 #ifdef SUPPORT_PCRE2_8
7299 uint8_t *r8 = NULL;
7300 #endif
7301 #ifdef SUPPORT_PCRE2_16
7302 uint16_t *r16 = NULL;
7303 #endif
7304 #ifdef SUPPORT_PCRE2_32
7305 uint32_t *r32 = NULL;
7306 #endif
7307
7308 /* Fill the ovector with junk to detect elements that do not get set
7309 when they should be (relevant only when "allvector" is specified). */
7310
7311 for (j = 0; j < 2*oveccount; j++) ovector[j] = JUNK_OFFSET;
7312
7313 if (timeitm)
7314 fprintf(outfile, "** Timing is not supported with replace: ignored\n");
7315
7316 if ((dat_datctl.control & CTL_ALTGLOBAL) != 0)
7317 fprintf(outfile, "** Altglobal is not supported with replace: ignored\n");
7318
7319 /* Check for a test that does substitution after an initial external match.
7320 If this is set, we run the external match, but leave the interpretation of
7321 its output to pcre2_substitute(). */
7322
7323 emoption = ((dat_datctl.control2 & CTL2_SUBSTITUTE_MATCHED) == 0)? 0 :
7324 PCRE2_SUBSTITUTE_MATCHED;
7325
7326 if (emoption != 0)
7327 {
7328 PCRE2_MATCH(rc, compiled_code, pp, arg_ulen, dat_datctl.offset,
7329 dat_datctl.options, match_data, use_dat_context);
7330 }
7331
7332 xoptions = emoption |
7333 (((dat_datctl.control & CTL_GLOBAL) == 0)? 0 :
7334 PCRE2_SUBSTITUTE_GLOBAL) |
7335 (((dat_datctl.control2 & CTL2_SUBSTITUTE_EXTENDED) == 0)? 0 :
7336 PCRE2_SUBSTITUTE_EXTENDED) |
7337 (((dat_datctl.control2 & CTL2_SUBSTITUTE_LITERAL) == 0)? 0 :
7338 PCRE2_SUBSTITUTE_LITERAL) |
7339 (((dat_datctl.control2 & CTL2_SUBSTITUTE_OVERFLOW_LENGTH) == 0)? 0 :
7340 PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) |
7341 (((dat_datctl.control2 & CTL2_SUBSTITUTE_REPLACEMENT_ONLY) == 0)? 0 :
7342 PCRE2_SUBSTITUTE_REPLACEMENT_ONLY) |
7343 (((dat_datctl.control2 & CTL2_SUBSTITUTE_UNKNOWN_UNSET) == 0)? 0 :
7344 PCRE2_SUBSTITUTE_UNKNOWN_UNSET) |
7345 (((dat_datctl.control2 & CTL2_SUBSTITUTE_UNSET_EMPTY) == 0)? 0 :
7346 PCRE2_SUBSTITUTE_UNSET_EMPTY);
7347
7348 SETCASTPTR(r, rbuffer); /* Sets r8, r16, or r32, as appropriate. */
7349 pr = dat_datctl.replacement;
7350
7351 /* If the replacement starts with '[<number>]' we interpret that as length
7352 value for the replacement buffer. */
7353
7354 nsize = REPLACE_BUFFSIZE/code_unit_size;
7355 if (*pr == '[')
7356 {
7357 PCRE2_SIZE n = 0;
7358 while ((c = *(++pr)) >= CHAR_0 && c <= CHAR_9) n = n * 10 + c - CHAR_0;
7359 if (*pr++ != ']')
7360 {
7361 fprintf(outfile, "Bad buffer size in replacement string\n");
7362 return PR_OK;
7363 }
7364 if (n > nsize)
7365 {
7366 fprintf(outfile, "Replacement buffer setting (%" SIZ_FORM ") is too "
7367 "large (max %" SIZ_FORM ")\n", SIZ_CAST n, SIZ_CAST nsize);
7368 return PR_OK;
7369 }
7370 nsize = n;
7371 }
7372
7373 /* Now copy the replacement string to a buffer of the appropriate width. No
7374 escape processing is done for replacements. In UTF mode, check for an invalid
7375 UTF-8 input string, and if it is invalid, just copy its code units without
7376 UTF interpretation. This provides a means of checking that an invalid string
7377 is detected. Otherwise, UTF-8 can be used to include wide characters in a
7378 replacement. */
7379
7380 if (utf) badutf = valid_utf(pr, strlen((const char *)pr), &erroroffset);
7381
7382 /* Not UTF or invalid UTF-8: just copy the code units. */
7383
7384 if (!utf || badutf)
7385 {
7386 while ((c = *pr++) != 0)
7387 {
7388 #ifdef SUPPORT_PCRE2_8
7389 if (test_mode == PCRE8_MODE) *r8++ = c;
7390 #endif
7391 #ifdef SUPPORT_PCRE2_16
7392 if (test_mode == PCRE16_MODE) *r16++ = c;
7393 #endif
7394 #ifdef SUPPORT_PCRE2_32
7395 if (test_mode == PCRE32_MODE) *r32++ = c;
7396 #endif
7397 }
7398 }
7399
7400 /* Valid UTF-8 replacement string */
7401
7402 else while ((c = *pr++) != 0)
7403 {
7404 if (HASUTF8EXTRALEN(c)) { GETUTF8INC(c, pr); }
7405
7406 #ifdef SUPPORT_PCRE2_8
7407 if (test_mode == PCRE8_MODE) r8 += ord2utf8(c, r8);
7408 #endif
7409
7410 #ifdef SUPPORT_PCRE2_16
7411 if (test_mode == PCRE16_MODE)
7412 {
7413 if (c >= 0x10000u)
7414 {
7415 c-= 0x10000u;
7416 *r16++ = 0xD800 | (c >> 10);
7417 *r16++ = 0xDC00 | (c & 0x3ff);
7418 }
7419 else *r16++ = c;
7420 }
7421 #endif
7422
7423 #ifdef SUPPORT_PCRE2_32
7424 if (test_mode == PCRE32_MODE) *r32++ = c;
7425 #endif
7426 }
7427
7428 SET(*r, 0);
7429 if ((dat_datctl.control & CTL_ZERO_TERMINATE) != 0)
7430 rlen = PCRE2_ZERO_TERMINATED;
7431 else
7432 rlen = (CASTVAR(uint8_t *, r) - rbuffer)/code_unit_size;
7433
7434 if ((dat_datctl.control2 & CTL2_SUBSTITUTE_CALLOUT) != 0)
7435 {
7436 PCRE2_SET_SUBSTITUTE_CALLOUT(dat_context, substitute_callout_function, NULL);
7437 }
7438 else
7439 {
7440 PCRE2_SET_SUBSTITUTE_CALLOUT(dat_context, NULL, NULL); /* No callout */
7441 }
7442
7443 PCRE2_SUBSTITUTE(rc, compiled_code, pp, arg_ulen, dat_datctl.offset,
7444 dat_datctl.options|xoptions, match_data, use_dat_context,
7445 rbuffer, rlen, nbuffer, &nsize);
7446
7447 if (rc < 0)
7448 {
7449 fprintf(outfile, "Failed: error %d", rc);
7450 if (rc != PCRE2_ERROR_NOMEMORY && nsize != PCRE2_UNSET)
7451 fprintf(outfile, " at offset %ld in replacement", (long int)nsize);
7452 fprintf(outfile, ": ");
7453 if (!print_error_message(rc, "", "")) return PR_ABEND;
7454 if (rc == PCRE2_ERROR_NOMEMORY &&
7455 (xoptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) != 0)
7456 fprintf(outfile, ": %ld code units are needed", (long int)nsize);
7457 }
7458 else
7459 {
7460 fprintf(outfile, "%2d: ", rc);
7461 PCHARSV(nbuffer, 0, nsize, utf, outfile);
7462 }
7463
7464 fprintf(outfile, "\n");
7465 show_memory = FALSE;
7466
7467 /* Show final ovector contents if requested. */
7468
7469 if ((dat_datctl.control2 & CTL2_ALLVECTOR) != 0)
7470 show_ovector(ovector, oveccount);
7471
7472 return PR_OK;
7473 } /* End of substitution handling */
7474
7475 /* When a replacement string is not provided, run a loop for global matching
7476 with one of the basic matching functions. For altglobal (or first time round
7477 the loop), set an "unset" value for the previous match info. */
7478
7479 ovecsave[0] = ovecsave[1] = ovecsave[2] = PCRE2_UNSET;
7480
7481 for (gmatched = 0;; gmatched++)
7482 {
7483 PCRE2_SIZE j;
7484 int capcount;
7485
7486 /* Fill the ovector with junk to detect elements that do not get set
7487 when they should be. */
7488
7489 for (j = 0; j < 2*oveccount; j++) ovector[j] = JUNK_OFFSET;
7490
7491 /* When matching is via pcre2_match(), we will detect the use of JIT via the
7492 stack callback function. */
7493
7494 jit_was_used = (pat_patctl.control & CTL_JITFAST) != 0;
7495
7496 /* Do timing if required. */
7497
7498 if (timeitm > 0)
7499 {
7500 int i;
7501 clock_t start_time, time_taken;
7502
7503 if ((dat_datctl.control & CTL_DFA) != 0)
7504 {
7505 if ((dat_datctl.options & PCRE2_DFA_RESTART) != 0)
7506 {
7507 fprintf(outfile, "Timing DFA restarts is not supported\n");
7508 return PR_OK;
7509 }
7510 if (dfa_workspace == NULL)
7511 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
7512 start_time = clock();
7513 for (i = 0; i < timeitm; i++)
7514 {
7515 PCRE2_DFA_MATCH(capcount, compiled_code, pp, arg_ulen,
7516 dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
7517 use_dat_context, dfa_workspace, DFA_WS_DIMENSION);
7518 }
7519 }
7520
7521 else if ((pat_patctl.control & CTL_JITFAST) != 0)
7522 {
7523 start_time = clock();
7524 for (i = 0; i < timeitm; i++)
7525 {
7526 PCRE2_JIT_MATCH(capcount, compiled_code, pp, arg_ulen,
7527 dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
7528 use_dat_context);
7529 }
7530 }
7531
7532 else
7533 {
7534 start_time = clock();
7535 for (i = 0; i < timeitm; i++)
7536 {
7537 PCRE2_MATCH(capcount, compiled_code, pp, arg_ulen,
7538 dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
7539 use_dat_context);
7540 }
7541 }
7542 total_match_time += (time_taken = clock() - start_time);
7543 fprintf(outfile, "Match time %.4f milliseconds\n",
7544 (((double)time_taken * 1000.0) / (double)timeitm) /
7545 (double)CLOCKS_PER_SEC);
7546 }
7547
7548 /* Find the heap, match and depth limits if requested. The depth and heap
7549 limits are not relevant for JIT. The return from check_match_limit() is the
7550 return from the final call to pcre2_match() or pcre2_dfa_match(). */
7551
7552 if ((dat_datctl.control & CTL_FINDLIMITS) != 0)
7553 {
7554 capcount = 0; /* This stops compiler warnings */
7555
7556 if (FLD(compiled_code, executable_jit) == NULL ||
7557 (dat_datctl.options & PCRE2_NO_JIT) != 0)
7558 {
7559 (void)check_match_limit(pp, arg_ulen, PCRE2_ERROR_HEAPLIMIT, "heap");
7560 }
7561
7562 capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_MATCHLIMIT,
7563 "match");
7564
7565 if (FLD(compiled_code, executable_jit) == NULL ||
7566 (dat_datctl.options & PCRE2_NO_JIT) != 0 ||
7567 (dat_datctl.control & CTL_DFA) != 0)
7568 {
7569 capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_DEPTHLIMIT,
7570 "depth");
7571 }
7572
7573 if (capcount == 0)
7574 {
7575 fprintf(outfile, "Matched, but offsets vector is too small to show all matches\n");
7576 capcount = dat_datctl.oveccount;
7577 }
7578 }
7579
7580 /* Otherwise just run a single match, setting up a callout if required (the
7581 default). There is a copy of the pattern in pbuffer8 for use by callouts. */
7582
7583 else
7584 {
7585 if ((dat_datctl.control & CTL_CALLOUT_NONE) == 0)
7586 {
7587 PCRE2_SET_CALLOUT(dat_context, callout_function,
7588 (void *)(&dat_datctl.callout_data));
7589 first_callout = TRUE;
7590 last_callout_mark = NULL;
7591 callout_count = 0;
7592 }
7593 else
7594 {
7595 PCRE2_SET_CALLOUT(dat_context, NULL, NULL); /* No callout */
7596 }
7597
7598 /* Run a single DFA or NFA match. */
7599
7600 if ((dat_datctl.control & CTL_DFA) != 0)
7601 {
7602 if (dfa_workspace == NULL)
7603 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
7604 if (dfa_matched++ == 0)
7605 dfa_workspace[0] = -1; /* To catch bad restart */
7606 PCRE2_DFA_MATCH(capcount, compiled_code, pp, arg_ulen,
7607 dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
7608 use_dat_context, dfa_workspace, DFA_WS_DIMENSION);
7609 if (capcount == 0)
7610 {
7611 fprintf(outfile, "Matched, but offsets vector is too small to show all matches\n");
7612 capcount = dat_datctl.oveccount;
7613 }
7614 }
7615 else
7616 {
7617 if ((pat_patctl.control & CTL_JITFAST) != 0)
7618 PCRE2_JIT_MATCH(capcount, compiled_code, pp, arg_ulen, dat_datctl.offset,
7619 dat_datctl.options | g_notempty, match_data, use_dat_context);
7620 else
7621 PCRE2_MATCH(capcount, compiled_code, pp, arg_ulen, dat_datctl.offset,
7622 dat_datctl.options | g_notempty, match_data, use_dat_context);
7623 if (capcount == 0)
7624 {
7625 fprintf(outfile, "Matched, but too many substrings\n");
7626 capcount = dat_datctl.oveccount;
7627 }
7628 }
7629 }
7630
7631 /* The result of the match is now in capcount. First handle a successful
7632 match. */
7633
7634 if (capcount >= 0)
7635 {
7636 int i;
7637
7638 if (capcount > (int)oveccount) /* Check for lunatic return value */
7639 {
7640 fprintf(outfile,
7641 "** PCRE2 error: returned count %d is too big for ovector count %d\n",
7642 capcount, oveccount);
7643 capcount = oveccount;
7644 if ((dat_datctl.control & CTL_ANYGLOB) != 0)
7645 {
7646 fprintf(outfile, "** Global loop abandoned\n");
7647 dat_datctl.control &= ~CTL_ANYGLOB; /* Break g/G loop */
7648 }
7649 }
7650
7651 /* If PCRE2_COPY_MATCHED_SUBJECT was set, check that things are as they
7652 should be, but not for fast JIT, where it isn't supported. */
7653
7654 if ((dat_datctl.options & PCRE2_COPY_MATCHED_SUBJECT) != 0 &&
7655 (pat_patctl.control & CTL_JITFAST) == 0)
7656 {
7657 if ((FLD(match_data, flags) & PCRE2_MD_COPIED_SUBJECT) == 0)
7658 fprintf(outfile,
7659 "** PCRE2 error: flag not set after copy_matched_subject\n");
7660
7661 if (CASTFLD(void *, match_data, subject) == pp)
7662 fprintf(outfile,
7663 "** PCRE2 error: copy_matched_subject has not copied\n");
7664
7665 if (memcmp(CASTFLD(void *, match_data, subject), pp, ulen) != 0)
7666 fprintf(outfile,
7667 "** PCRE2 error: copy_matched_subject mismatch\n");
7668 }
7669
7670 /* If this is not the first time round a global loop, check that the
7671 returned string has changed. If it has not, check for an empty string match
7672 at different starting offset from the previous match. This is a failed test
7673 retry for null-matching patterns that don't match at their starting offset,
7674 for example /(?<=\G.)/. A repeated match at the same point is not such a
7675 pattern, and must be discarded, and we then proceed to seek a non-null
7676 match at the current point. For any other repeated match, there is a bug
7677 somewhere and we must break the loop because it will go on for ever. We
7678 know that there are always at least two elements in the ovector. */
7679
7680 if (gmatched > 0 && ovecsave[0] == ovector[0] && ovecsave[1] == ovector[1])
7681 {
7682 if (ovector[0] == ovector[1] && ovecsave[2] != dat_datctl.offset)
7683 {
7684 g_notempty = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
7685 ovecsave[2] = dat_datctl.offset;
7686 continue; /* Back to the top of the loop */
7687 }
7688 fprintf(outfile,
7689 "** PCRE2 error: global repeat returned the same string as previous\n");
7690 fprintf(outfile, "** Global loop abandoned\n");
7691 dat_datctl.control &= ~CTL_ANYGLOB; /* Break g/G loop */
7692 }
7693
7694 /* "allcaptures" requests showing of all captures in the pattern, to check
7695 unset ones at the end. It may be set on the pattern or the data. Implement
7696 by setting capcount to the maximum. This is not relevant for DFA matching,
7697 so ignore it (warning given above). */
7698
7699 if ((dat_datctl.control & (CTL_ALLCAPTURES|CTL_DFA)) == CTL_ALLCAPTURES)
7700 {
7701 capcount = maxcapcount + 1; /* Allow for full match */
7702 if (capcount > (int)oveccount) capcount = oveccount;
7703 }
7704
7705 /* "allvector" request showing the entire ovector. */
7706
7707 if ((dat_datctl.control2 & CTL2_ALLVECTOR) != 0) capcount = oveccount;
7708
7709 /* Output the captured substrings. Note that, for the matched string,
7710 the use of \K in an assertion can make the start later than the end. */
7711
7712 for (i = 0; i < 2*capcount; i += 2)
7713 {
7714 PCRE2_SIZE lleft, lmiddle, lright;
7715 PCRE2_SIZE start = ovector[i];
7716 PCRE2_SIZE end = ovector[i+1];
7717
7718 if (start > end)
7719 {
7720 start = ovector[i+1];
7721 end = ovector[i];
7722 fprintf(outfile, "Start of matched string is beyond its end - "
7723 "displaying from end to start.\n");
7724 }
7725
7726 fprintf(outfile, "%2d: ", i/2);
7727
7728 /* Check for an unset group */
7729
7730 if (start == PCRE2_UNSET && end == PCRE2_UNSET)
7731 {
7732 fprintf(outfile, "<unset>\n");
7733 continue;
7734 }
7735
7736 /* Check for silly offsets, in particular, values that have not been
7737 set when they should have been. However, if we are past the end of the
7738 captures for this pattern ("allvector" causes this), or if we are DFA
7739 matching, it isn't an error if the entry is unchanged. */
7740
7741 if (start > ulen || end > ulen)
7742 {
7743 if (((dat_datctl.control & CTL_DFA) != 0 ||
7744 i >= (int)(2*maxcapcount + 2)) &&
7745 start == JUNK_OFFSET && end == JUNK_OFFSET)
7746 fprintf(outfile, "<unchanged>\n");
7747 else
7748 fprintf(outfile, "ERROR: bad value(s) for offset(s): 0x%lx 0x%lx\n",
7749 (unsigned long int)start, (unsigned long int)end);
7750 continue;
7751 }
7752
7753 /* When JIT is not being used, ALLUSEDTEXT may be set. (It if is set with
7754 JIT, it is disabled above, with a comment.) When the match is done by the
7755 interpreter, leftchar and rightchar are available, and if ALLUSEDTEXT is
7756 set, and if the leftmost consulted character is before the start of the
7757 match or the rightmost consulted character is past the end of the match,
7758 we want to show all consulted characters for the main matched string, and
7759 indicate which were lookarounds. */
7760
7761 if (i == 0)
7762 {
7763 BOOL showallused;
7764 PCRE2_SIZE leftchar, rightchar;
7765
7766 if ((dat_datctl.control & CTL_ALLUSEDTEXT) != 0)
7767 {
7768 leftchar = FLD(match_data, leftchar);
7769 rightchar = FLD(match_data, rightchar);
7770 showallused = i == 0 && (leftchar < start || rightchar > end);
7771 }
7772 else showallused = FALSE;
7773
7774 if (showallused)
7775 {
7776 PCHARS(lleft, pp, leftchar, start - leftchar, utf, outfile);
7777 PCHARS(lmiddle, pp, start, end - start, utf, outfile);
7778 PCHARS(lright, pp, end, rightchar - end, utf, outfile);
7779 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
7780 fprintf(outfile, " (JIT)");
7781 fprintf(outfile, "\n ");
7782 for (j = 0; j < lleft; j++) fprintf(outfile, "<");
7783 for (j = 0; j < lmiddle; j++) fprintf(outfile, " ");
7784 for (j = 0; j < lright; j++) fprintf(outfile, ">");
7785 }
7786
7787 /* When a pattern contains \K, the start of match position may be
7788 different to the start of the matched string. When this is the case,
7789 show it when requested. */
7790
7791 else if ((dat_datctl.control & CTL_STARTCHAR) != 0)
7792 {
7793 PCRE2_SIZE startchar;
7794 PCRE2_GET_STARTCHAR(startchar, match_data);
7795 PCHARS(lleft, pp, startchar, start - startchar, utf, outfile);
7796 PCHARSV(pp, start, end - start, utf, outfile);
7797 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
7798 fprintf(outfile, " (JIT)");
7799 if (startchar != start)
7800 {
7801 fprintf(outfile, "\n ");
7802 for (j = 0; j < lleft; j++) fprintf(outfile, "^");
7803 }
7804 }
7805
7806 /* Otherwise, just show the matched string. */
7807
7808 else
7809 {
7810 PCHARSV(pp, start, end - start, utf, outfile);
7811 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
7812 fprintf(outfile, " (JIT)");
7813 }
7814 }
7815
7816 /* Not the main matched string. Just show it unadorned. */
7817
7818 else
7819 {
7820 PCHARSV(pp, start, end - start, utf, outfile);
7821 }
7822
7823 fprintf(outfile, "\n");
7824
7825 /* Note: don't use the start/end variables here because we want to
7826 show the text from what is reported as the end. */
7827
7828 if ((dat_datctl.control & CTL_ALLAFTERTEXT) != 0 ||
7829 (i == 0 && (dat_datctl.control & CTL_AFTERTEXT) != 0))
7830 {
7831 fprintf(outfile, "%2d+ ", i/2);
7832 PCHARSV(pp, ovector[i+1], ulen - ovector[i+1], utf, outfile);
7833 fprintf(outfile, "\n");
7834 }
7835 }
7836
7837 /* Output (*MARK) data if requested */
7838
7839 if ((dat_datctl.control & CTL_MARK) != 0 &&
7840 TESTFLD(match_data, mark, !=, NULL))
7841 {
7842 fprintf(outfile, "MK: ");
7843 PCHARSV(CASTFLD(void *, match_data, mark), -1, -1, utf, outfile);
7844 fprintf(outfile, "\n");
7845 }
7846
7847 /* Process copy/get strings */
7848
7849 if (!copy_and_get(utf, capcount)) return PR_ABEND;
7850
7851 } /* End of handling a successful match */
7852
7853 /* There was a partial match. The value of ovector[0] is the bumpalong point,
7854 that is, startchar, not any \K point that might have been passed. When JIT is
7855 not in use, "allusedtext" may be set, in which case we indicate the leftmost
7856 consulted character. */
7857
7858 else if (capcount == PCRE2_ERROR_PARTIAL)
7859 {
7860 PCRE2_SIZE leftchar;
7861 int backlength;
7862 int rubriclength = 0;
7863
7864 if ((dat_datctl.control & CTL_ALLUSEDTEXT) != 0)
7865 {
7866 leftchar = FLD(match_data, leftchar);
7867 }
7868 else leftchar = ovector[0];
7869
7870 fprintf(outfile, "Partial match");
7871 if ((dat_datctl.control & CTL_MARK) != 0 &&
7872 TESTFLD(match_data, mark, !=, NULL))
7873 {
7874 fprintf(outfile, ", mark=");
7875 PCHARS(rubriclength, CASTFLD(void *, match_data, mark), -1, -1, utf,
7876 outfile);
7877 rubriclength += 7;
7878 }
7879 fprintf(outfile, ": ");
7880 rubriclength += 15;
7881
7882 PCHARS(backlength, pp, leftchar, ovector[0] - leftchar, utf, outfile);
7883 PCHARSV(pp, ovector[0], ulen - ovector[0], utf, outfile);
7884
7885 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
7886 fprintf(outfile, " (JIT)");
7887 fprintf(outfile, "\n");
7888
7889 if (backlength != 0)
7890 {
7891 int i;
7892 for (i = 0; i < rubriclength; i++) fprintf(outfile, " ");
7893 for (i = 0; i < backlength; i++) fprintf(outfile, "<");
7894 fprintf(outfile, "\n");
7895 }
7896
7897 if (ulen != ovector[1])
7898 fprintf(outfile, "** ovector[1] is not equal to the subject length: "
7899 "%ld != %ld\n", (unsigned long int)ovector[1], (unsigned long int)ulen);
7900
7901 /* Process copy/get strings */
7902
7903 if (!copy_and_get(utf, 1)) return PR_ABEND;
7904
7905 /* "allvector" outputs the entire vector */
7906
7907 if ((dat_datctl.control2 & CTL2_ALLVECTOR) != 0)
7908 show_ovector(ovector, oveccount);
7909
7910 break; /* Out of the /g loop */
7911 } /* End of handling partial match */
7912
7913 /* Failed to match. If this is a /g or /G loop, we might previously have
7914 set g_notempty (to PCRE2_NOTEMPTY_ATSTART|PCRE2_ANCHORED) after a null match.
7915 If that is the case, this is not necessarily the end. We want to advance the
7916 start offset, and continue. We won't be at the end of the string - that was
7917 checked before setting g_notempty. We achieve the effect by pretending that a
7918 single character was matched.
7919
7920 Complication arises in the case when the newline convention is "any", "crlf",
7921 or "anycrlf". If the previous match was at the end of a line terminated by
7922 CRLF, an advance of one character just passes the CR, whereas we should
7923 prefer the longer newline sequence, as does the code in pcre2_match().
7924
7925 Otherwise, in the case of UTF-8 or UTF-16 matching, the advance must be one
7926 character, not one byte. */
7927
7928 else if (g_notempty != 0) /* There was a previous null match */
7929 {
7930 uint16_t nl = FLD(compiled_code, newline_convention);
7931 PCRE2_SIZE start_offset = dat_datctl.offset; /* Where the match was */
7932 PCRE2_SIZE end_offset = start_offset + 1;
7933
7934 if ((nl == PCRE2_NEWLINE_CRLF || nl == PCRE2_NEWLINE_ANY ||
7935 nl == PCRE2_NEWLINE_ANYCRLF) &&
7936 start_offset < ulen - 1 &&
7937 CODE_UNIT(pp, start_offset) == '\r' &&
7938 CODE_UNIT(pp, end_offset) == '\n')
7939 end_offset++;
7940
7941 else if (utf && test_mode != PCRE32_MODE)
7942 {
7943 if (test_mode == PCRE8_MODE)
7944 {
7945 for (; end_offset < ulen; end_offset++)
7946 if ((((PCRE2_SPTR8)pp)[end_offset] & 0xc0) != 0x80) break;
7947 }
7948 else /* 16-bit mode */
7949 {
7950 for (; end_offset < ulen; end_offset++)
7951 if ((((PCRE2_SPTR16)pp)[end_offset] & 0xfc00) != 0xdc00) break;
7952 }
7953 }
7954
7955 SETFLDVEC(match_data, ovector, 0, start_offset);
7956 SETFLDVEC(match_data, ovector, 1, end_offset);
7957 } /* End of handling null match in a global loop */
7958
7959 /* A "normal" match failure. There will be a negative error number in
7960 capcount. */
7961
7962 else
7963 {
7964 switch(capcount)
7965 {
7966 case PCRE2_ERROR_NOMATCH:
7967 if (gmatched == 0)
7968 {
7969 fprintf(outfile, "No match");
7970 if ((dat_datctl.control & CTL_MARK) != 0 &&
7971 TESTFLD(match_data, mark, !=, NULL))
7972 {
7973 fprintf(outfile, ", mark = ");
7974 PCHARSV(CASTFLD(void *, match_data, mark), -1, -1, utf, outfile);
7975 }
7976 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
7977 fprintf(outfile, " (JIT)");
7978 fprintf(outfile, "\n");
7979
7980 /* "allvector" outputs the entire vector */
7981
7982 if ((dat_datctl.control2 & CTL2_ALLVECTOR) != 0)
7983 show_ovector(ovector, oveccount);
7984 }
7985 break;
7986
7987 case PCRE2_ERROR_BADUTFOFFSET:
7988 fprintf(outfile, "Error %d (bad UTF-%d offset)\n", capcount, test_mode);
7989 break;
7990
7991 default:
7992 fprintf(outfile, "Failed: error %d: ", capcount);
7993 if (!print_error_message(capcount, "", "")) return PR_ABEND;
7994 if (capcount <= PCRE2_ERROR_UTF8_ERR1 &&
7995 capcount >= PCRE2_ERROR_UTF32_ERR2)
7996 {
7997 PCRE2_SIZE startchar;
7998 PCRE2_GET_STARTCHAR(startchar, match_data);
7999 fprintf(outfile, " at offset %" SIZ_FORM, SIZ_CAST startchar);
8000 }
8001 fprintf(outfile, "\n");
8002 break;
8003 }
8004
8005 break; /* Out of the /g loop */
8006 } /* End of failed match handling */
8007
8008 /* Control reaches here in two circumstances: (a) after a match, and (b)
8009 after a non-match that immediately followed a match on an empty string when
8010 doing a global search. Such a match is done with PCRE2_NOTEMPTY_ATSTART and
8011 PCRE2_ANCHORED set in g_notempty. The code above turns it into a fake match
8012 of one character. So effectively we get here only after a match. If we
8013 are not doing a global search, we are done. */
8014
8015 if ((dat_datctl.control & CTL_ANYGLOB) == 0) break; else
8016 {
8017 PCRE2_SIZE match_offset = FLD(match_data, ovector)[0];
8018 PCRE2_SIZE end_offset = FLD(match_data, ovector)[1];
8019
8020 /* We must now set up for the next iteration of a global search. If we have
8021 matched an empty string, first check to see if we are at the end of the
8022 subject. If so, the loop is over. Otherwise, mimic what Perl's /g option
8023 does. Set PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED and try the match again
8024 at the same point. If this fails it will be picked up above, where a fake
8025 match is set up so that at this point we advance to the next character.
8026
8027 However, in order to cope with patterns that never match at their starting
8028 offset (e.g. /(?<=\G.)/) we don't do this when the match offset is greater
8029 than the starting offset. This means there will be a retry with the
8030 starting offset at the match offset. If this returns the same match again,
8031 it is picked up above and ignored, and the special action is then taken. */
8032
8033 if (match_offset == end_offset)
8034 {
8035 if (end_offset == ulen) break; /* End of subject */
8036 if (match_offset <= dat_datctl.offset)
8037 g_notempty = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
8038 }
8039
8040 /* However, even after matching a non-empty string, there is still one
8041 tricky case. If a pattern contains \K within a lookbehind assertion at the
8042 start, the end of the matched string can be at the offset where the match
8043 started. In the case of a normal /g iteration without special action, this
8044 leads to a loop that keeps on returning the same substring. The loop would
8045 be caught above, but we really want to move on to the next match. */
8046
8047 else
8048 {
8049 g_notempty = 0; /* Set for a "normal" repeat */
8050 if ((dat_datctl.control & CTL_GLOBAL) != 0)
8051 {
8052 PCRE2_SIZE startchar;
8053 PCRE2_GET_STARTCHAR(startchar, match_data);
8054 if (end_offset <= startchar)
8055 {
8056 if (startchar >= ulen) break; /* End of subject */
8057 end_offset = startchar + 1;
8058 if (utf && test_mode != PCRE32_MODE)
8059 {
8060 if (test_mode == PCRE8_MODE)
8061 {
8062 for (; end_offset < ulen; end_offset++)
8063 if ((((PCRE2_SPTR8)pp)[end_offset] & 0xc0) != 0x80) break;
8064 }
8065 else /* 16-bit mode */
8066 {
8067 for (; end_offset < ulen; end_offset++)
8068 if ((((PCRE2_SPTR16)pp)[end_offset] & 0xfc00) != 0xdc00) break;
8069 }
8070 }
8071 }
8072 }
8073 }
8074
8075 /* For a normal global (/g) iteration, save the current ovector[0,1] and
8076 the starting offset so that we can check that they do change each time.
8077 Otherwise a matching bug that returns the same string causes an infinite
8078 loop. It has happened! Then update the start offset, leaving other
8079 parameters alone. */
8080
8081 if ((dat_datctl.control & CTL_GLOBAL) != 0)
8082 {
8083 ovecsave[0] = ovector[0];
8084 ovecsave[1] = ovector[1];
8085 ovecsave[2] = dat_datctl.offset;
8086 dat_datctl.offset = end_offset;
8087 }
8088
8089 /* For altglobal, just update the pointer and length. */
8090
8091 else
8092 {
8093 pp += end_offset * code_unit_size;
8094 len -= end_offset * code_unit_size;
8095 ulen -= end_offset;
8096 if (arg_ulen != PCRE2_ZERO_TERMINATED) arg_ulen -= end_offset;
8097 }
8098 }
8099 } /* End of global loop */
8100
8101 show_memory = FALSE;
8102 return PR_OK;
8103 }
8104
8105
8106
8107
8108 /*************************************************
8109 * Print PCRE2 version *
8110 *************************************************/
8111
8112 static void
print_version(FILE * f)8113 print_version(FILE *f)
8114 {
8115 VERSION_TYPE *vp;
8116 fprintf(f, "PCRE2 version ");
8117 for (vp = version; *vp != 0; vp++) fprintf(f, "%c", *vp);
8118 fprintf(f, "\n");
8119 }
8120
8121
8122
8123 /*************************************************
8124 * Print Unicode version *
8125 *************************************************/
8126
8127 static void
print_unicode_version(FILE * f)8128 print_unicode_version(FILE *f)
8129 {
8130 VERSION_TYPE *vp;
8131 fprintf(f, "Unicode version ");
8132 for (vp = uversion; *vp != 0; vp++) fprintf(f, "%c", *vp);
8133 }
8134
8135
8136
8137 /*************************************************
8138 * Print JIT target *
8139 *************************************************/
8140
8141 static void
print_jit_target(FILE * f)8142 print_jit_target(FILE *f)
8143 {
8144 VERSION_TYPE *vp;
8145 for (vp = jittarget; *vp != 0; vp++) fprintf(f, "%c", *vp);
8146 }
8147
8148
8149
8150 /*************************************************
8151 * Print newline configuration *
8152 *************************************************/
8153
8154 /* Output is always to stdout.
8155
8156 Arguments:
8157 rc the return code from PCRE2_CONFIG_NEWLINE
8158 isc TRUE if called from "-C newline"
8159 Returns: nothing
8160 */
8161
8162 static void
print_newline_config(uint32_t optval,BOOL isc)8163 print_newline_config(uint32_t optval, BOOL isc)
8164 {
8165 if (!isc) printf(" Default newline sequence is ");
8166 if (optval < sizeof(newlines)/sizeof(char *))
8167 printf("%s\n", newlines[optval]);
8168 else
8169 printf("a non-standard value: %d\n", optval);
8170 }
8171
8172
8173
8174 /*************************************************
8175 * Usage function *
8176 *************************************************/
8177
8178 static void
usage(void)8179 usage(void)
8180 {
8181 printf("Usage: pcre2test [options] [<input file> [<output file>]]\n\n");
8182 printf("Input and output default to stdin and stdout.\n");
8183 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
8184 printf("If input is a terminal, readline() is used to read from it.\n");
8185 #else
8186 printf("This version of pcre2test is not linked with readline().\n");
8187 #endif
8188 printf("\nOptions:\n");
8189 #ifdef SUPPORT_PCRE2_8
8190 printf(" -8 use the 8-bit library\n");
8191 #endif
8192 #ifdef SUPPORT_PCRE2_16
8193 printf(" -16 use the 16-bit library\n");
8194 #endif
8195 #ifdef SUPPORT_PCRE2_32
8196 printf(" -32 use the 32-bit library\n");
8197 #endif
8198 printf(" -ac set default pattern modifier PCRE2_AUTO_CALLOUT\n");
8199 printf(" -AC as -ac, but also set subject 'callout_extra' modifier\n");
8200 printf(" -b set default pattern modifier 'fullbincode'\n");
8201 printf(" -C show PCRE2 compile-time options and exit\n");
8202 printf(" -C arg show a specific compile-time option and exit with its\n");
8203 printf(" value if numeric (else 0). The arg can be:\n");
8204 printf(" backslash-C use of \\C is enabled [0, 1]\n");
8205 printf(" bsr \\R type [ANYCRLF, ANY]\n");
8206 printf(" ebcdic compiled for EBCDIC character code [0,1]\n");
8207 printf(" ebcdic-nl NL code if compiled for EBCDIC\n");
8208 printf(" jit just-in-time compiler supported [0, 1]\n");
8209 printf(" linksize internal link size [2, 3, 4]\n");
8210 printf(" newline newline type [CR, LF, CRLF, ANYCRLF, ANY, NUL]\n");
8211 printf(" pcre2-8 8 bit library support enabled [0, 1]\n");
8212 printf(" pcre2-16 16 bit library support enabled [0, 1]\n");
8213 printf(" pcre2-32 32 bit library support enabled [0, 1]\n");
8214 printf(" unicode Unicode and UTF support enabled [0, 1]\n");
8215 printf(" -d set default pattern modifier 'debug'\n");
8216 printf(" -dfa set default subject modifier 'dfa'\n");
8217 printf(" -error <n,m,..> show messages for error numbers, then exit\n");
8218 printf(" -help show usage information\n");
8219 printf(" -i set default pattern modifier 'info'\n");
8220 printf(" -jit set default pattern modifier 'jit'\n");
8221 printf(" -jitfast set default pattern modifier 'jitfast'\n");
8222 printf(" -jitverify set default pattern modifier 'jitverify'\n");
8223 printf(" -LM list pattern and subject modifiers, then exit\n");
8224 printf(" -q quiet: do not output PCRE2 version number at start\n");
8225 printf(" -pattern <s> set default pattern modifier fields\n");
8226 printf(" -subject <s> set default subject modifier fields\n");
8227 printf(" -S <n> set stack size to <n> mebibytes\n");
8228 printf(" -t [<n>] time compilation and execution, repeating <n> times\n");
8229 printf(" -tm [<n>] time execution (matching) only, repeating <n> times\n");
8230 printf(" -T same as -t, but show total times at the end\n");
8231 printf(" -TM same as -tm, but show total time at the end\n");
8232 printf(" -version show PCRE2 version and exit\n");
8233 }
8234
8235
8236
8237 /*************************************************
8238 * Handle -C option *
8239 *************************************************/
8240
8241 /* This option outputs configuration options and sets an appropriate return
8242 code when asked for a single option. The code is abstracted into a separate
8243 function because of its size. Use whichever pcre2_config() function is
8244 available.
8245
8246 Argument: an option name or NULL
8247 Returns: the return code
8248 */
8249
8250 static int
c_option(const char * arg)8251 c_option(const char *arg)
8252 {
8253 uint32_t optval;
8254 unsigned int i = COPTLISTCOUNT;
8255 int yield = 0;
8256
8257 if (arg != NULL && arg[0] != CHAR_MINUS)
8258 {
8259 for (i = 0; i < COPTLISTCOUNT; i++)
8260 if (strcmp(arg, coptlist[i].name) == 0) break;
8261
8262 if (i >= COPTLISTCOUNT)
8263 {
8264 fprintf(stderr, "** Unknown -C option '%s'\n", arg);
8265 return 0;
8266 }
8267
8268 switch (coptlist[i].type)
8269 {
8270 case CONF_BSR:
8271 (void)PCRE2_CONFIG(coptlist[i].value, &optval);
8272 printf("%s\n", (optval == PCRE2_BSR_ANYCRLF)? "ANYCRLF" : "ANY");
8273 break;
8274
8275 case CONF_FIX:
8276 yield = coptlist[i].value;
8277 printf("%d\n", yield);
8278 break;
8279
8280 case CONF_FIZ:
8281 optval = coptlist[i].value;
8282 printf("%d\n", optval);
8283 break;
8284
8285 case CONF_INT:
8286 (void)PCRE2_CONFIG(coptlist[i].value, &yield);
8287 printf("%d\n", yield);
8288 break;
8289
8290 case CONF_NL:
8291 (void)PCRE2_CONFIG(coptlist[i].value, &optval);
8292 print_newline_config(optval, TRUE);
8293 break;
8294 }
8295
8296 /* For VMS, return the value by setting a symbol, for certain values only. This
8297 is contributed code which the PCRE2 developers have no means of testing. */
8298
8299 #ifdef __VMS
8300
8301 /* This is the original code provided by the first VMS contributor. */
8302 #ifdef NEVER
8303 if (copytlist[i].type == CONF_FIX || coptlist[i].type == CONF_INT)
8304 {
8305 char ucname[16];
8306 strcpy(ucname, coptlist[i].name);
8307 for (i = 0; ucname[i] != 0; i++) ucname[i] = toupper[ucname[i]];
8308 vms_setsymbol(ucname, 0, optval);
8309 }
8310 #endif
8311
8312 /* This is the new code, provided by a second VMS contributor. */
8313
8314 if (coptlist[i].type == CONF_FIX || coptlist[i].type == CONF_INT)
8315 {
8316 char nam_buf[22], val_buf[4];
8317 $DESCRIPTOR(nam, nam_buf);
8318 $DESCRIPTOR(val, val_buf);
8319
8320 strcpy(nam_buf, coptlist[i].name);
8321 nam.dsc$w_length = strlen(nam_buf);
8322 sprintf(val_buf, "%d", yield);
8323 val.dsc$w_length = strlen(val_buf);
8324 lib$set_symbol(&nam, &val);
8325 }
8326 #endif /* __VMS */
8327
8328 return yield;
8329 }
8330
8331 /* No argument for -C: output all configuration information. */
8332
8333 print_version(stdout);
8334 printf("Compiled with\n");
8335
8336 #ifdef EBCDIC
8337 printf(" EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
8338 #if defined NATIVE_ZOS
8339 printf(" EBCDIC code page %s or similar\n", pcrz_cpversion());
8340 #endif
8341 #endif
8342
8343 (void)PCRE2_CONFIG(PCRE2_CONFIG_COMPILED_WIDTHS, &optval);
8344 if (optval & 1) printf(" 8-bit support\n");
8345 if (optval & 2) printf(" 16-bit support\n");
8346 if (optval & 4) printf(" 32-bit support\n");
8347
8348 #ifdef SUPPORT_VALGRIND
8349 printf(" Valgrind support\n");
8350 #endif
8351
8352 (void)PCRE2_CONFIG(PCRE2_CONFIG_UNICODE, &optval);
8353 if (optval != 0)
8354 {
8355 printf(" UTF and UCP support (");
8356 print_unicode_version(stdout);
8357 printf(")\n");
8358 }
8359 else printf(" No Unicode support\n");
8360
8361 (void)PCRE2_CONFIG(PCRE2_CONFIG_JIT, &optval);
8362 if (optval != 0)
8363 {
8364 printf(" Just-in-time compiler support: ");
8365 print_jit_target(stdout);
8366 printf("\n");
8367 }
8368 else
8369 {
8370 printf(" No just-in-time compiler support\n");
8371 }
8372
8373 (void)PCRE2_CONFIG(PCRE2_CONFIG_NEWLINE, &optval);
8374 print_newline_config(optval, FALSE);
8375 (void)PCRE2_CONFIG(PCRE2_CONFIG_BSR, &optval);
8376 printf(" \\R matches %s\n",
8377 (optval == PCRE2_BSR_ANYCRLF)? "CR, LF, or CRLF only" :
8378 "all Unicode newlines");
8379 (void)PCRE2_CONFIG(PCRE2_CONFIG_NEVER_BACKSLASH_C, &optval);
8380 printf(" \\C is %ssupported\n", optval? "not ":"");
8381 (void)PCRE2_CONFIG(PCRE2_CONFIG_LINKSIZE, &optval);
8382 printf(" Internal link size = %d\n", optval);
8383 (void)PCRE2_CONFIG(PCRE2_CONFIG_PARENSLIMIT, &optval);
8384 printf(" Parentheses nest limit = %d\n", optval);
8385 (void)PCRE2_CONFIG(PCRE2_CONFIG_HEAPLIMIT, &optval);
8386 printf(" Default heap limit = %d kibibytes\n", optval);
8387 (void)PCRE2_CONFIG(PCRE2_CONFIG_MATCHLIMIT, &optval);
8388 printf(" Default match limit = %d\n", optval);
8389 (void)PCRE2_CONFIG(PCRE2_CONFIG_DEPTHLIMIT, &optval);
8390 printf(" Default depth limit = %d\n", optval);
8391
8392 #if defined SUPPORT_LIBREADLINE
8393 printf(" pcre2test has libreadline support\n");
8394 #elif defined SUPPORT_LIBEDIT
8395 printf(" pcre2test has libedit support\n");
8396 #else
8397 printf(" pcre2test has neither libreadline nor libedit support\n");
8398 #endif
8399
8400 return 0;
8401 }
8402
8403
8404
8405 /*************************************************
8406 * Display one modifier *
8407 *************************************************/
8408
8409 static void
display_one_modifier(modstruct * m,BOOL for_pattern)8410 display_one_modifier(modstruct *m, BOOL for_pattern)
8411 {
8412 uint32_t c = (!for_pattern && (m->which == MOD_PND || m->which == MOD_PNDP))?
8413 '*' : ' ';
8414 printf("%c%s", c, m->name);
8415 }
8416
8417
8418
8419 /*************************************************
8420 * Display pattern or subject modifiers *
8421 *************************************************/
8422
8423 /* In order to print in two columns, first scan without printing to get a list
8424 of the modifiers that are required.
8425
8426 Arguments:
8427 for_pattern TRUE for pattern modifiers, FALSE for subject modifiers
8428 title string to be used in title
8429
8430 Returns: nothing
8431 */
8432
8433 static void
display_selected_modifiers(BOOL for_pattern,const char * title)8434 display_selected_modifiers(BOOL for_pattern, const char *title)
8435 {
8436 uint32_t i, j;
8437 uint32_t n = 0;
8438 uint32_t list[MODLISTCOUNT];
8439
8440 for (i = 0; i < MODLISTCOUNT; i++)
8441 {
8442 BOOL is_pattern = TRUE;
8443 modstruct *m = modlist + i;
8444
8445 switch (m->which)
8446 {
8447 case MOD_CTC: /* Compile context */
8448 case MOD_PAT: /* Pattern */
8449 case MOD_PATP: /* Pattern, OK for Perl-compatible test */
8450 break;
8451
8452 /* The MOD_PND and MOD_PNDP modifiers are precisely those that affect
8453 subjects, but can be given with a pattern. We list them as subject
8454 modifiers, but marked with an asterisk.*/
8455
8456 case MOD_CTM: /* Match context */
8457 case MOD_DAT: /* Subject line */
8458 case MOD_PND: /* As PD, but not default pattern */
8459 case MOD_PNDP: /* As PND, OK for Perl-compatible test */
8460 is_pattern = FALSE;
8461 break;
8462
8463 default: printf("** Unknown type for modifier '%s'\n", m->name);
8464 /* Fall through */
8465 case MOD_PD: /* Pattern or subject */
8466 case MOD_PDP: /* As PD, OK for Perl-compatible test */
8467 is_pattern = for_pattern;
8468 break;
8469 }
8470
8471 if (for_pattern == is_pattern) list[n++] = i;
8472 }
8473
8474 /* Now print from the list in two columns. */
8475
8476 printf("-------------- %s MODIFIERS --------------\n", title);
8477
8478 for (i = 0, j = (n+1)/2; i < (n+1)/2; i++, j++)
8479 {
8480 modstruct *m = modlist + list[i];
8481 display_one_modifier(m, for_pattern);
8482 if (j < n)
8483 {
8484 uint32_t k = 27 - strlen(m->name);
8485 while (k-- > 0) printf(" ");
8486 display_one_modifier(modlist + list[j], for_pattern);
8487 }
8488 printf("\n");
8489 }
8490 }
8491
8492
8493
8494 /*************************************************
8495 * Display the list of modifiers *
8496 *************************************************/
8497
8498 static void
display_modifiers(void)8499 display_modifiers(void)
8500 {
8501 printf(
8502 "An asterisk on a subject modifier means that it may be given on a pattern\n"
8503 "line, in order to apply to all subjects matched by that pattern. Modifiers\n"
8504 "that are listed for both patterns and subjects have different effects in\n"
8505 "each case.\n\n");
8506 display_selected_modifiers(TRUE, "PATTERN");
8507 printf("\n");
8508 display_selected_modifiers(FALSE, "SUBJECT");
8509 }
8510
8511
8512
8513 /*************************************************
8514 * Main Program *
8515 *************************************************/
8516
8517 int
main(int argc,char ** argv)8518 main(int argc, char **argv)
8519 {
8520 uint32_t temp;
8521 uint32_t yield = 0;
8522 uint32_t op = 1;
8523 BOOL notdone = TRUE;
8524 BOOL quiet = FALSE;
8525 BOOL showtotaltimes = FALSE;
8526 BOOL skipping = FALSE;
8527 char *arg_subject = NULL;
8528 char *arg_pattern = NULL;
8529 char *arg_error = NULL;
8530
8531 /* The offsets to the options and control bits fields of the pattern and data
8532 control blocks must be the same so that common options and controls such as
8533 "anchored" or "memory" can work for either of them from a single table entry.
8534 We cannot test this till runtime because "offsetof" does not work in the
8535 preprocessor. */
8536
8537 if (PO(options) != DO(options) || PO(control) != DO(control) ||
8538 PO(control2) != DO(control2))
8539 {
8540 fprintf(stderr, "** Coding error: "
8541 "options and control offsets for pattern and data must be the same.\n");
8542 return 1;
8543 }
8544
8545 /* Get the PCRE2 and Unicode version number and JIT target information, at the
8546 same time checking that a request for the length gives the same answer. Also
8547 check lengths for non-string items. */
8548
8549 if (PCRE2_CONFIG(PCRE2_CONFIG_VERSION, NULL) !=
8550 PCRE2_CONFIG(PCRE2_CONFIG_VERSION, version) ||
8551
8552 PCRE2_CONFIG(PCRE2_CONFIG_UNICODE_VERSION, NULL) !=
8553 PCRE2_CONFIG(PCRE2_CONFIG_UNICODE_VERSION, uversion) ||
8554
8555 PCRE2_CONFIG(PCRE2_CONFIG_JITTARGET, NULL) !=
8556 PCRE2_CONFIG(PCRE2_CONFIG_JITTARGET, jittarget) ||
8557
8558 PCRE2_CONFIG(PCRE2_CONFIG_UNICODE, NULL) != sizeof(uint32_t) ||
8559 PCRE2_CONFIG(PCRE2_CONFIG_MATCHLIMIT, NULL) != sizeof(uint32_t))
8560 {
8561 fprintf(stderr, "** Error in pcre2_config(): bad length\n");
8562 return 1;
8563 }
8564
8565 /* Check that bad options are diagnosed. */
8566
8567 if (PCRE2_CONFIG(999, NULL) != PCRE2_ERROR_BADOPTION ||
8568 PCRE2_CONFIG(999, &temp) != PCRE2_ERROR_BADOPTION)
8569 {
8570 fprintf(stderr, "** Error in pcre2_config(): bad option not diagnosed\n");
8571 return 1;
8572 }
8573
8574 /* This configuration option is now obsolete, but running a quick check ensures
8575 that its code is covered. */
8576
8577 (void)PCRE2_CONFIG(PCRE2_CONFIG_STACKRECURSE, &temp);
8578
8579 /* Get buffers from malloc() so that valgrind will check their misuse when
8580 debugging. They grow automatically when very long lines are read. The 16-
8581 and 32-bit buffers (pbuffer16, pbuffer32) are obtained only if needed. */
8582
8583 buffer = (uint8_t *)malloc(pbuffer8_size);
8584 pbuffer8 = (uint8_t *)malloc(pbuffer8_size);
8585
8586 /* The following _setmode() stuff is some Windows magic that tells its runtime
8587 library to translate CRLF into a single LF character. At least, that's what
8588 I've been told: never having used Windows I take this all on trust. Originally
8589 it set 0x8000, but then I was advised that _O_BINARY was better. */
8590
8591 #if defined(_WIN32) || defined(WIN32)
8592 _setmode( _fileno( stdout ), _O_BINARY );
8593 #endif
8594
8595 /* Initialization that does not depend on the running mode. */
8596
8597 locale_name[0] = 0;
8598
8599 memset(&def_patctl, 0, sizeof(patctl));
8600 def_patctl.convert_type = CONVERT_UNSET;
8601
8602 memset(&def_datctl, 0, sizeof(datctl));
8603 def_datctl.oveccount = DEFAULT_OVECCOUNT;
8604 def_datctl.copy_numbers[0] = -1;
8605 def_datctl.get_numbers[0] = -1;
8606 def_datctl.startend[0] = def_datctl.startend[1] = CFORE_UNSET;
8607 def_datctl.cerror[0] = def_datctl.cerror[1] = CFORE_UNSET;
8608 def_datctl.cfail[0] = def_datctl.cfail[1] = CFORE_UNSET;
8609
8610 /* Scan command line options. */
8611
8612 while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0)
8613 {
8614 char *endptr;
8615 char *arg = argv[op];
8616 unsigned long uli;
8617
8618 /* List modifiers and exit. */
8619
8620 if (strcmp(arg, "-LM") == 0)
8621 {
8622 display_modifiers();
8623 goto EXIT;
8624 }
8625
8626 /* Display and/or set return code for configuration options. */
8627
8628 if (strcmp(arg, "-C") == 0)
8629 {
8630 yield = c_option(argv[op + 1]);
8631 goto EXIT;
8632 }
8633
8634 /* Select operating mode. Ensure that pcre2_config() is called in 16-bit
8635 and 32-bit modes because that won't happen naturally when 8-bit is also
8636 configured. Also call some other functions that are not otherwise used. This
8637 means that a coverage report won't claim there are uncalled functions. */
8638
8639 if (strcmp(arg, "-8") == 0)
8640 {
8641 #ifdef SUPPORT_PCRE2_8
8642 test_mode = PCRE8_MODE;
8643 (void)pcre2_set_bsr_8(pat_context8, 999);
8644 (void)pcre2_set_newline_8(pat_context8, 999);
8645 #else
8646 fprintf(stderr,
8647 "** This version of PCRE2 was built without 8-bit support\n");
8648 exit(1);
8649 #endif
8650 }
8651
8652 else if (strcmp(arg, "-16") == 0)
8653 {
8654 #ifdef SUPPORT_PCRE2_16
8655 test_mode = PCRE16_MODE;
8656 (void)pcre2_config_16(PCRE2_CONFIG_VERSION, NULL);
8657 (void)pcre2_set_bsr_16(pat_context16, 999);
8658 (void)pcre2_set_newline_16(pat_context16, 999);
8659 #else
8660 fprintf(stderr,
8661 "** This version of PCRE2 was built without 16-bit support\n");
8662 exit(1);
8663 #endif
8664 }
8665
8666 else if (strcmp(arg, "-32") == 0)
8667 {
8668 #ifdef SUPPORT_PCRE2_32
8669 test_mode = PCRE32_MODE;
8670 (void)pcre2_config_32(PCRE2_CONFIG_VERSION, NULL);
8671 (void)pcre2_set_bsr_32(pat_context32, 999);
8672 (void)pcre2_set_newline_32(pat_context32, 999);
8673 #else
8674 fprintf(stderr,
8675 "** This version of PCRE2 was built without 32-bit support\n");
8676 exit(1);
8677 #endif
8678 }
8679
8680 /* Set quiet (no version verification) */
8681
8682 else if (strcmp(arg, "-q") == 0) quiet = TRUE;
8683
8684 /* Set system stack size */
8685
8686 else if (strcmp(arg, "-S") == 0 && argc > 2 &&
8687 ((uli = strtoul(argv[op+1], &endptr, 10)), *endptr == 0))
8688 {
8689 #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS) || defined(__VMS)
8690 fprintf(stderr, "pcre2test: -S is not supported on this OS\n");
8691 exit(1);
8692 #else
8693 int rc;
8694 uint32_t stack_size;
8695 struct rlimit rlim;
8696 if (U32OVERFLOW(uli))
8697 {
8698 fprintf(stderr, "** Argument for -S is too big\n");
8699 exit(1);
8700 }
8701 stack_size = (uint32_t)uli;
8702 getrlimit(RLIMIT_STACK, &rlim);
8703 rlim.rlim_cur = stack_size * 1024 * 1024;
8704 if (rlim.rlim_cur > rlim.rlim_max)
8705 {
8706 fprintf(stderr,
8707 "pcre2test: requested stack size %luMiB is greater than hard limit "
8708 "%luMiB\n", (unsigned long int)stack_size,
8709 (unsigned long int)(rlim.rlim_max));
8710 exit(1);
8711 }
8712 rc = setrlimit(RLIMIT_STACK, &rlim);
8713 if (rc != 0)
8714 {
8715 fprintf(stderr, "pcre2test: setting stack size %luMiB failed: %s\n",
8716 (unsigned long int)stack_size, strerror(errno));
8717 exit(1);
8718 }
8719 op++;
8720 argc--;
8721 #endif
8722 }
8723
8724 /* Set some common pattern and subject controls */
8725
8726 else if (strcmp(arg, "-AC") == 0)
8727 {
8728 def_patctl.options |= PCRE2_AUTO_CALLOUT;
8729 def_datctl.control2 |= CTL2_CALLOUT_EXTRA;
8730 }
8731 else if (strcmp(arg, "-ac") == 0) def_patctl.options |= PCRE2_AUTO_CALLOUT;
8732 else if (strcmp(arg, "-b") == 0) def_patctl.control |= CTL_FULLBINCODE;
8733 else if (strcmp(arg, "-d") == 0) def_patctl.control |= CTL_DEBUG;
8734 else if (strcmp(arg, "-dfa") == 0) def_datctl.control |= CTL_DFA;
8735 else if (strcmp(arg, "-i") == 0) def_patctl.control |= CTL_INFO;
8736 else if (strcmp(arg, "-jit") == 0 || strcmp(arg, "-jitverify") == 0 ||
8737 strcmp(arg, "-jitfast") == 0)
8738 {
8739 if (arg[4] == 'v') def_patctl.control |= CTL_JITVERIFY;
8740 else if (arg[4] == 'f') def_patctl.control |= CTL_JITFAST;
8741 def_patctl.jit = JIT_DEFAULT; /* full & partial */
8742 #ifndef SUPPORT_JIT
8743 fprintf(stderr, "** Warning: JIT support is not available: "
8744 "-jit[fast|verify] calls functions that do nothing.\n");
8745 #endif
8746 }
8747
8748 /* Set timing parameters */
8749
8750 else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0 ||
8751 strcmp(arg, "-T") == 0 || strcmp(arg, "-TM") == 0)
8752 {
8753 int both = arg[2] == 0;
8754 showtotaltimes = arg[1] == 'T';
8755 if (argc > 2 && (uli = strtoul(argv[op+1], &endptr, 10), *endptr == 0))
8756 {
8757 if (uli == 0)
8758 {
8759 fprintf(stderr, "** Argument for %s must not be zero\n", arg);
8760 exit(1);
8761 }
8762 if (U32OVERFLOW(uli))
8763 {
8764 fprintf(stderr, "** Argument for %s is too big\n", arg);
8765 exit(1);
8766 }
8767 timeitm = (int)uli;
8768 op++;
8769 argc--;
8770 }
8771 else timeitm = LOOPREPEAT;
8772 if (both) timeit = timeitm;
8773 }
8774
8775 /* Give help */
8776
8777 else if (strcmp(arg, "-help") == 0 ||
8778 strcmp(arg, "--help") == 0)
8779 {
8780 usage();
8781 goto EXIT;
8782 }
8783
8784 /* Show version */
8785
8786 else if (strcmp(arg, "-version") == 0 ||
8787 strcmp(arg, "--version") == 0)
8788 {
8789 print_version(stdout);
8790 goto EXIT;
8791 }
8792
8793 /* The following options save their data for processing once we know what
8794 the running mode is. */
8795
8796 else if (strcmp(arg, "-error") == 0)
8797 {
8798 arg_error = argv[op+1];
8799 goto CHECK_VALUE_EXISTS;
8800 }
8801
8802 else if (strcmp(arg, "-subject") == 0)
8803 {
8804 arg_subject = argv[op+1];
8805 goto CHECK_VALUE_EXISTS;
8806 }
8807
8808 else if (strcmp(arg, "-pattern") == 0)
8809 {
8810 arg_pattern = argv[op+1];
8811 CHECK_VALUE_EXISTS:
8812 if (argc <= 2)
8813 {
8814 fprintf(stderr, "** Missing value for %s\n", arg);
8815 yield = 1;
8816 goto EXIT;
8817 }
8818 op++;
8819 argc--;
8820 }
8821
8822 /* Unrecognized option */
8823
8824 else
8825 {
8826 fprintf(stderr, "** Unknown or malformed option '%s'\n", arg);
8827 usage();
8828 yield = 1;
8829 goto EXIT;
8830 }
8831 op++;
8832 argc--;
8833 }
8834
8835 /* If -error was present, get the error numbers, show the messages, and exit.
8836 We wait to do this until we know which mode we are in. */
8837
8838 if (arg_error != NULL)
8839 {
8840 int len;
8841 int errcode;
8842 char *endptr;
8843
8844 /* Ensure the relevant non-8-bit buffer is available. Ensure that it is at
8845 least 128 code units, because it is used for retrieving error messages. */
8846
8847 #ifdef SUPPORT_PCRE2_16
8848 if (test_mode == PCRE16_MODE)
8849 {
8850 pbuffer16_size = 256;
8851 pbuffer16 = (uint16_t *)malloc(pbuffer16_size);
8852 if (pbuffer16 == NULL)
8853 {
8854 fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer16\n",
8855 SIZ_CAST pbuffer16_size);
8856 yield = 1;
8857 goto EXIT;
8858 }
8859 }
8860 #endif
8861
8862 #ifdef SUPPORT_PCRE2_32
8863 if (test_mode == PCRE32_MODE)
8864 {
8865 pbuffer32_size = 512;
8866 pbuffer32 = (uint32_t *)malloc(pbuffer32_size);
8867 if (pbuffer32 == NULL)
8868 {
8869 fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer32\n",
8870 SIZ_CAST pbuffer32_size);
8871 yield = 1;
8872 goto EXIT;
8873 }
8874 }
8875 #endif
8876
8877 /* Loop along a list of error numbers. */
8878
8879 for (;;)
8880 {
8881 errcode = strtol(arg_error, &endptr, 10);
8882 if (*endptr != 0 && *endptr != CHAR_COMMA)
8883 {
8884 fprintf(stderr, "** '%s' is not a valid error number list\n", arg_error);
8885 yield = 1;
8886 goto EXIT;
8887 }
8888 printf("Error %d: ", errcode);
8889 PCRE2_GET_ERROR_MESSAGE(len, errcode, pbuffer);
8890 if (len < 0)
8891 {
8892 switch (len)
8893 {
8894 case PCRE2_ERROR_BADDATA:
8895 printf("PCRE2_ERROR_BADDATA (unknown error number)");
8896 break;
8897
8898 case PCRE2_ERROR_NOMEMORY:
8899 printf("PCRE2_ERROR_NOMEMORY (buffer too small)");
8900 break;
8901
8902 default:
8903 printf("Unexpected return (%d) from pcre2_get_error_message()", len);
8904 break;
8905 }
8906 }
8907 else
8908 {
8909 PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, stdout);
8910 }
8911 printf("\n");
8912 if (*endptr == 0) goto EXIT;
8913 arg_error = endptr + 1;
8914 }
8915 /* Control never reaches here */
8916 } /* End of -error handling */
8917
8918 /* Initialize things that cannot be done until we know which test mode we are
8919 running in. Exercise the general context copying and match data size functions,
8920 which are not otherwise used. */
8921
8922 code_unit_size = test_mode/8;
8923 max_oveccount = DEFAULT_OVECCOUNT;
8924
8925 /* Use macros to save a lot of duplication. */
8926
8927 #define CREATECONTEXTS \
8928 G(general_context,BITS) = G(pcre2_general_context_create_,BITS)(&my_malloc, &my_free, NULL); \
8929 G(general_context_copy,BITS) = G(pcre2_general_context_copy_,BITS)(G(general_context,BITS)); \
8930 G(default_pat_context,BITS) = G(pcre2_compile_context_create_,BITS)(G(general_context,BITS)); \
8931 G(pat_context,BITS) = G(pcre2_compile_context_copy_,BITS)(G(default_pat_context,BITS)); \
8932 G(default_dat_context,BITS) = G(pcre2_match_context_create_,BITS)(G(general_context,BITS)); \
8933 G(dat_context,BITS) = G(pcre2_match_context_copy_,BITS)(G(default_dat_context,BITS)); \
8934 G(default_con_context,BITS) = G(pcre2_convert_context_create_,BITS)(G(general_context,BITS)); \
8935 G(con_context,BITS) = G(pcre2_convert_context_copy_,BITS)(G(default_con_context,BITS)); \
8936 G(match_data,BITS) = G(pcre2_match_data_create_,BITS)(max_oveccount, G(general_context,BITS))
8937
8938 #define CONTEXTTESTS \
8939 (void)G(pcre2_set_compile_extra_options_,BITS)(G(pat_context,BITS), 0); \
8940 (void)G(pcre2_set_max_pattern_length_,BITS)(G(pat_context,BITS), 0); \
8941 (void)G(pcre2_set_offset_limit_,BITS)(G(dat_context,BITS), 0); \
8942 (void)G(pcre2_set_recursion_memory_management_,BITS)(G(dat_context,BITS), my_malloc, my_free, NULL); \
8943 (void)G(pcre2_get_match_data_size_,BITS)(G(match_data,BITS))
8944
8945
8946 /* Call the appropriate functions for the current mode, and exercise some
8947 functions that are not otherwise called. */
8948
8949 #ifdef SUPPORT_PCRE2_8
8950 #undef BITS
8951 #define BITS 8
8952 if (test_mode == PCRE8_MODE)
8953 {
8954 CREATECONTEXTS;
8955 CONTEXTTESTS;
8956 }
8957 #endif
8958
8959 #ifdef SUPPORT_PCRE2_16
8960 #undef BITS
8961 #define BITS 16
8962 if (test_mode == PCRE16_MODE)
8963 {
8964 CREATECONTEXTS;
8965 CONTEXTTESTS;
8966 }
8967 #endif
8968
8969 #ifdef SUPPORT_PCRE2_32
8970 #undef BITS
8971 #define BITS 32
8972 if (test_mode == PCRE32_MODE)
8973 {
8974 CREATECONTEXTS;
8975 CONTEXTTESTS;
8976 }
8977 #endif
8978
8979 /* Set a default parentheses nest limit that is large enough to run the
8980 standard tests (this also exercises the function). */
8981
8982 PCRE2_SET_PARENS_NEST_LIMIT(default_pat_context, PARENS_NEST_DEFAULT);
8983
8984 /* Handle command line modifier settings, sending any error messages to
8985 stderr. We need to know the mode before modifying the context, and it is tidier
8986 to do them all in the same way. */
8987
8988 outfile = stderr;
8989 if ((arg_pattern != NULL &&
8990 !decode_modifiers((uint8_t *)arg_pattern, CTX_DEFPAT, &def_patctl, NULL)) ||
8991 (arg_subject != NULL &&
8992 !decode_modifiers((uint8_t *)arg_subject, CTX_DEFDAT, NULL, &def_datctl)))
8993 {
8994 yield = 1;
8995 goto EXIT;
8996 }
8997
8998 /* Sort out the input and output files, defaulting to stdin/stdout. */
8999
9000 infile = stdin;
9001 outfile = stdout;
9002
9003 if (argc > 1 && strcmp(argv[op], "-") != 0)
9004 {
9005 infile = fopen(argv[op], INPUT_MODE);
9006 if (infile == NULL)
9007 {
9008 printf("** Failed to open '%s': %s\n", argv[op], strerror(errno));
9009 yield = 1;
9010 goto EXIT;
9011 }
9012 }
9013
9014 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
9015 if (INTERACTIVE(infile)) using_history();
9016 #endif
9017
9018 if (argc > 2)
9019 {
9020 outfile = fopen(argv[op+1], OUTPUT_MODE);
9021 if (outfile == NULL)
9022 {
9023 printf("** Failed to open '%s': %s\n", argv[op+1], strerror(errno));
9024 yield = 1;
9025 goto EXIT;
9026 }
9027 }
9028
9029 /* Output a heading line unless quiet, then process input lines. */
9030
9031 if (!quiet) print_version(outfile);
9032
9033 SET(compiled_code, NULL);
9034
9035 #ifdef SUPPORT_PCRE2_8
9036 preg.re_pcre2_code = NULL;
9037 preg.re_match_data = NULL;
9038 #endif
9039
9040 while (notdone)
9041 {
9042 uint8_t *p;
9043 int rc = PR_OK;
9044 BOOL expectdata = TEST(compiled_code, !=, NULL);
9045 #ifdef SUPPORT_PCRE2_8
9046 expectdata |= preg.re_pcre2_code != NULL;
9047 #endif
9048
9049 if (extend_inputline(infile, buffer, expectdata? "data> " : " re> ") == NULL)
9050 break;
9051 if (!INTERACTIVE(infile)) fprintf(outfile, "%s", (char *)buffer);
9052 fflush(outfile);
9053 p = buffer;
9054
9055 /* If we have a pattern set up for testing, or we are skipping after a
9056 compile failure, a blank line terminates this test. */
9057
9058 if (expectdata || skipping)
9059 {
9060 while (isspace(*p)) p++;
9061 if (*p == 0)
9062 {
9063 #ifdef SUPPORT_PCRE2_8
9064 if (preg.re_pcre2_code != NULL)
9065 {
9066 regfree(&preg);
9067 preg.re_pcre2_code = NULL;
9068 preg.re_match_data = NULL;
9069 }
9070 #endif /* SUPPORT_PCRE2_8 */
9071 if (TEST(compiled_code, !=, NULL))
9072 {
9073 SUB1(pcre2_code_free, compiled_code);
9074 SET(compiled_code, NULL);
9075 }
9076 skipping = FALSE;
9077 setlocale(LC_CTYPE, "C");
9078 }
9079
9080 /* Otherwise, if we are not skipping, and the line is not a data comment
9081 line starting with "\=", process a data line. */
9082
9083 else if (!skipping && !(p[0] == '\\' && p[1] == '=' && isspace(p[2])))
9084 {
9085 rc = process_data();
9086 }
9087 }
9088
9089 /* We do not have a pattern set up for testing. Lines starting with # are
9090 either comments or special commands. Blank lines are ignored. Otherwise, the
9091 line must start with a valid delimiter. It is then processed as a pattern
9092 line. A copy of the pattern is left in pbuffer8 for use by callouts. Under
9093 valgrind, make the unused part of the buffer undefined, to catch overruns. */
9094
9095 else if (*p == '#')
9096 {
9097 if (isspace(p[1]) || p[1] == '!' || p[1] == 0) continue;
9098 rc = process_command();
9099 }
9100
9101 else if (strchr("/!\"'`%&-=_:;,@~", *p) != NULL)
9102 {
9103 rc = process_pattern();
9104 dfa_matched = 0;
9105 }
9106
9107 else
9108 {
9109 while (isspace(*p)) p++;
9110 if (*p != 0)
9111 {
9112 fprintf(outfile, "** Invalid pattern delimiter '%c' (x%x).\n", *buffer,
9113 *buffer);
9114 rc = PR_SKIP;
9115 }
9116 }
9117
9118 if (rc == PR_SKIP && !INTERACTIVE(infile)) skipping = TRUE;
9119 else if (rc == PR_ABEND)
9120 {
9121 fprintf(outfile, "** pcre2test run abandoned\n");
9122 yield = 1;
9123 goto EXIT;
9124 }
9125 }
9126
9127 /* Finish off a normal run. */
9128
9129 if (INTERACTIVE(infile)) fprintf(outfile, "\n");
9130
9131 if (showtotaltimes)
9132 {
9133 const char *pad = "";
9134 fprintf(outfile, "--------------------------------------\n");
9135 if (timeit > 0)
9136 {
9137 fprintf(outfile, "Total compile time %.4f milliseconds\n",
9138 (((double)total_compile_time * 1000.0) / (double)timeit) /
9139 (double)CLOCKS_PER_SEC);
9140 if (total_jit_compile_time > 0)
9141 fprintf(outfile, "Total JIT compile %.4f milliseconds\n",
9142 (((double)total_jit_compile_time * 1000.0) / (double)timeit) /
9143 (double)CLOCKS_PER_SEC);
9144 pad = " ";
9145 }
9146 fprintf(outfile, "Total match time %s%.4f milliseconds\n", pad,
9147 (((double)total_match_time * 1000.0) / (double)timeitm) /
9148 (double)CLOCKS_PER_SEC);
9149 }
9150
9151
9152 EXIT:
9153
9154 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
9155 if (infile != NULL && INTERACTIVE(infile)) clear_history();
9156 #endif
9157
9158 if (infile != NULL && infile != stdin) fclose(infile);
9159 if (outfile != NULL && outfile != stdout) fclose(outfile);
9160
9161 free(buffer);
9162 free(dbuffer);
9163 free(pbuffer8);
9164 free(dfa_workspace);
9165 free((void *)locale_tables);
9166 free(tables3);
9167 PCRE2_MATCH_DATA_FREE(match_data);
9168 SUB1(pcre2_code_free, compiled_code);
9169
9170 while(patstacknext-- > 0)
9171 {
9172 SET(compiled_code, patstack[patstacknext]);
9173 SUB1(pcre2_code_free, compiled_code);
9174 }
9175
9176 PCRE2_JIT_FREE_UNUSED_MEMORY(general_context);
9177 if (jit_stack != NULL)
9178 {
9179 PCRE2_JIT_STACK_FREE(jit_stack);
9180 }
9181
9182 #define FREECONTEXTS \
9183 G(pcre2_general_context_free_,BITS)(G(general_context,BITS)); \
9184 G(pcre2_general_context_free_,BITS)(G(general_context_copy,BITS)); \
9185 G(pcre2_compile_context_free_,BITS)(G(pat_context,BITS)); \
9186 G(pcre2_compile_context_free_,BITS)(G(default_pat_context,BITS)); \
9187 G(pcre2_match_context_free_,BITS)(G(dat_context,BITS)); \
9188 G(pcre2_match_context_free_,BITS)(G(default_dat_context,BITS)); \
9189 G(pcre2_convert_context_free_,BITS)(G(default_con_context,BITS)); \
9190 G(pcre2_convert_context_free_,BITS)(G(con_context,BITS));
9191
9192 #ifdef SUPPORT_PCRE2_8
9193 #undef BITS
9194 #define BITS 8
9195 if (preg.re_pcre2_code != NULL) regfree(&preg);
9196 FREECONTEXTS;
9197 #endif
9198
9199 #ifdef SUPPORT_PCRE2_16
9200 #undef BITS
9201 #define BITS 16
9202 free(pbuffer16);
9203 FREECONTEXTS;
9204 #endif
9205
9206 #ifdef SUPPORT_PCRE2_32
9207 #undef BITS
9208 #define BITS 32
9209 free(pbuffer32);
9210 FREECONTEXTS;
9211 #endif
9212
9213 #if defined(__VMS)
9214 yield = SS$_NORMAL; /* Return values via DCL symbols */
9215 #endif
9216
9217 return yield;
9218 }
9219
9220 /* End of pcre2test.c */
9221