1 /*************************************************
2 * PCRE2 testing program *
3 *************************************************/
4
5 /* PCRE2 is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language. In 2014
7 the API was completely revised and '2' was added to the name, because the old
8 API, which had lasted for 16 years, could not accommodate new requirements. At
9 the same time, this testing program was re-designed because its original
10 hacked-up (non-) design had also run out of steam.
11
12 Written by Philip Hazel
13 Original code Copyright (c) 1997-2012 University of Cambridge
14 Rewritten code Copyright (c) 2016-2021 University of Cambridge
15
16 -----------------------------------------------------------------------------
17 Redistribution and use in source and binary forms, with or without
18 modification, are permitted provided that the following conditions are met:
19
20 * Redistributions of source code must retain the above copyright notice,
21 this list of conditions and the following disclaimer.
22
23 * Redistributions in binary form must reproduce the above copyright
24 notice, this list of conditions and the following disclaimer in the
25 documentation and/or other materials provided with the distribution.
26
27 * Neither the name of the University of Cambridge nor the names of its
28 contributors may be used to endorse or promote products derived from
29 this software without specific prior written permission.
30
31 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
32 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
35 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
36 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
37 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
38 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
39 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
41 POSSIBILITY OF SUCH DAMAGE.
42 -----------------------------------------------------------------------------
43 */
44
45
46 /* This program supports testing of the 8-bit, 16-bit, and 32-bit PCRE2
47 libraries in a single program, though its input and output are always 8-bit.
48 It is different from modules such as pcre2_compile.c in the library itself,
49 which are compiled separately for each code unit width. If two widths are
50 enabled, for example, pcre2_compile.c is compiled twice. In contrast,
51 pcre2test.c is compiled only once, and linked with all the enabled libraries.
52 Therefore, it must not make use of any of the macros from pcre2.h or
53 pcre2_internal.h that depend on PCRE2_CODE_UNIT_WIDTH. It does, however, make
54 use of SUPPORT_PCRE2_8, SUPPORT_PCRE2_16, and SUPPORT_PCRE2_32, to ensure that
55 it references only the enabled library functions. */
56
57 #ifdef HAVE_CONFIG_H
58 #include "config.h"
59 #endif
60
61 #include <ctype.h>
62 #include <stdio.h>
63 #include <string.h>
64 #include <stdlib.h>
65 #include <time.h>
66 #include <locale.h>
67 #include <errno.h>
68
69 #if defined NATIVE_ZOS
70 #include "pcrzoscs.h"
71 /* That header is not included in the main PCRE2 distribution because other
72 apparatus is needed to compile pcre2test for z/OS. The header can be found in
73 the special z/OS distribution, which is available from www.zaconsultants.net or
74 from www.cbttape.org. */
75 #endif
76
77 #ifdef HAVE_UNISTD_H
78 #include <unistd.h>
79 #endif
80
81 /* Debugging code enabler */
82
83 /* #define DEBUG_SHOW_MALLOC_ADDRESSES */
84
85 /* Both libreadline and libedit are optionally supported. The user-supplied
86 original patch uses readline/readline.h for libedit, but in at least one system
87 it is installed as editline/readline.h, so the configuration code now looks for
88 that first, falling back to readline/readline.h. */
89
90 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
91 #if defined(SUPPORT_LIBREADLINE)
92 #include <readline/readline.h>
93 #include <readline/history.h>
94 #else
95 #if defined(HAVE_EDITLINE_READLINE_H)
96 #include <editline/readline.h>
97 #else
98 #include <readline/readline.h>
99 #endif
100 #endif
101 #endif
102
103 /* Put the test for interactive input into a macro so that it can be changed if
104 required for different environments. */
105
106 #define INTERACTIVE(f) isatty(fileno(f))
107
108
109 /* ---------------------- System-specific definitions ---------------------- */
110
111 /* A number of things vary for Windows builds. Originally, pcretest opened its
112 input and output without "b"; then I was told that "b" was needed in some
113 environments, so it was added for release 5.0 to both the input and output. (It
114 makes no difference on Unix-like systems.) Later I was told that it is wrong
115 for the input on Windows. I've now abstracted the modes into macros that are
116 set here, to make it easier to fiddle with them, and removed "b" from the input
117 mode under Windows. The BINARY versions are used when saving/restoring compiled
118 patterns. */
119
120 #if defined(_WIN32) || defined(WIN32)
121 #include <io.h> /* For _setmode() */
122 #include <fcntl.h> /* For _O_BINARY */
123 #define INPUT_MODE "r"
124 #define OUTPUT_MODE "wb"
125 #define BINARY_INPUT_MODE "rb"
126 #define BINARY_OUTPUT_MODE "wb"
127
128 #ifndef isatty
129 #define isatty _isatty /* This is what Windows calls them, I'm told, */
130 #endif /* though in some environments they seem to */
131 /* be already defined, hence the #ifndefs. */
132 #ifndef fileno
133 #define fileno _fileno
134 #endif
135
136 /* A user sent this fix for Borland Builder 5 under Windows. */
137
138 #ifdef __BORLANDC__
139 #define _setmode(handle, mode) setmode(handle, mode)
140 #endif
141
142 /* Not Windows */
143
144 #else
145 #include <sys/time.h> /* These two includes are needed */
146 #include <sys/resource.h> /* for setrlimit(). */
147 #if defined NATIVE_ZOS /* z/OS uses non-binary I/O */
148 #define INPUT_MODE "r"
149 #define OUTPUT_MODE "w"
150 #define BINARY_INPUT_MODE "rb"
151 #define BINARY_OUTPUT_MODE "wb"
152 #else
153 #define INPUT_MODE "rb"
154 #define OUTPUT_MODE "wb"
155 #define BINARY_INPUT_MODE "rb"
156 #define BINARY_OUTPUT_MODE "wb"
157 #endif
158 #endif
159
160 /* VMS-specific code was included as suggested by a VMS user [1]. Another VMS
161 user [2] provided alternative code which worked better for him. I have
162 commented out the original, but kept it around just in case. */
163
164 #ifdef __VMS
165 #include <ssdef.h>
166 /* These two includes came from [2]. */
167 #include descrip
168 #include lib$routines
169 /* void vms_setsymbol( char *, char *, int ); Original code from [1]. */
170 #endif
171
172 /* old VC and older compilers don't support %td or %zu, and even some that
173 claim to be C99 don't support it (hence DISABLE_PERCENT_ZT). */
174
175 #if defined(DISABLE_PERCENT_ZT) || (defined(_MSC_VER) && (_MSC_VER < 1800)) || \
176 (!defined(_MSC_VER) && (!defined(__STDC_VERSION__) || (__STDC_VERSION__ < 199901L)))
177 #ifdef _WIN64
178 #define PTR_FORM "lld"
179 #define SIZ_FORM "llu"
180 #else
181 #define PTR_FORM "ld"
182 #define SIZ_FORM "lu"
183 #endif
184 #else
185 #define PTR_FORM "td"
186 #define SIZ_FORM "zu"
187 #endif
188
189 /* ------------------End of system-specific definitions -------------------- */
190
191 /* Glueing macros that are used in several places below. */
192
193 #define glue(a,b) a##b
194 #define G(a,b) glue(a,b)
195
196 /* Miscellaneous parameters and manifests */
197
198 #ifndef CLOCKS_PER_SEC
199 #ifdef CLK_TCK
200 #define CLOCKS_PER_SEC CLK_TCK
201 #else
202 #define CLOCKS_PER_SEC 100
203 #endif
204 #endif
205
206 #define CFORE_UNSET UINT32_MAX /* Unset value for startend/cfail/cerror fields */
207 #define CONVERT_UNSET UINT32_MAX /* Unset value for convert_type field */
208 #define DFA_WS_DIMENSION 1000 /* Size of DFA workspace */
209 #define DEFAULT_OVECCOUNT 15 /* Default ovector count */
210 #define JUNK_OFFSET 0xdeadbeef /* For initializing ovector */
211 #define LOCALESIZE 32 /* Size of locale name */
212 #define LOOPREPEAT 500000 /* Default loop count for timing */
213 #define MALLOCLISTSIZE 20 /* For remembering mallocs */
214 #define PARENS_NEST_DEFAULT 220 /* Default parentheses nest limit */
215 #define PATSTACKSIZE 20 /* Pattern stack for save/restore testing */
216 #define REPLACE_MODSIZE 100 /* Field for reading 8-bit replacement */
217 #define VERSION_SIZE 64 /* Size of buffer for the version strings */
218
219 /* Default JIT compile options */
220
221 #define JIT_DEFAULT (PCRE2_JIT_COMPLETE|\
222 PCRE2_JIT_PARTIAL_SOFT|\
223 PCRE2_JIT_PARTIAL_HARD)
224
225 /* Make sure the buffer into which replacement strings are copied is big enough
226 to hold them as 32-bit code units. */
227
228 #define REPLACE_BUFFSIZE 1024 /* This is a byte value */
229
230 /* Execution modes */
231
232 #define PCRE8_MODE 8
233 #define PCRE16_MODE 16
234 #define PCRE32_MODE 32
235
236 /* Processing returns */
237
238 enum { PR_OK, PR_SKIP, PR_ABEND };
239
240 /* The macro PRINTABLE determines whether to print an output character as-is or
241 as a hex value when showing compiled patterns. is We use it in cases when the
242 locale has not been explicitly changed, so as to get consistent output from
243 systems that differ in their output from isprint() even in the "C" locale. */
244
245 #ifdef EBCDIC
246 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
247 #else
248 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
249 #endif
250
251 #define PRINTOK(c) ((use_tables != NULL && c < 256)? isprint(c) : PRINTABLE(c))
252
253 /* We have to include some of the library source files because we need
254 to use some of the macros, internal structure definitions, and other internal
255 values - pcre2test has "inside information" compared to an application program
256 that strictly follows the PCRE2 API.
257
258 Before including pcre2_internal.h we define PRIV so that it does not get
259 defined therein. This ensures that PRIV names in the included files do not
260 clash with those in the libraries. Also, although pcre2_internal.h does itself
261 include pcre2.h, we explicitly include it beforehand, along with pcre2posix.h,
262 so that the PCRE2_EXP_xxx macros get set appropriately for an application, not
263 for building the library. */
264
265 #define PRIV(name) name
266 #define PCRE2_CODE_UNIT_WIDTH 0
267 #include "pcre2.h"
268 #include "pcre2posix.h"
269 #include "pcre2_internal.h"
270
271 /* We need access to some of the data tables that PCRE2 uses. Defining
272 PCRE2_PCRETEST makes some minor changes in the files. The previous definition
273 of PRIV avoids name clashes. */
274
275 #define PCRE2_PCRE2TEST
276 #include "pcre2_tables.c"
277 #include "pcre2_ucd.c"
278
279 /* 32-bit integer values in the input are read by strtoul() or strtol(). The
280 check needed for overflow depends on whether long ints are in fact longer than
281 ints. They are defined not to be shorter. */
282
283 #if ULONG_MAX > UINT32_MAX
284 #define U32OVERFLOW(x) (x > UINT32_MAX)
285 #else
286 #define U32OVERFLOW(x) (x == UINT32_MAX)
287 #endif
288
289 #if LONG_MAX > INT32_MAX
290 #define S32OVERFLOW(x) (x > INT32_MAX || x < INT32_MIN)
291 #else
292 #define S32OVERFLOW(x) (x == INT32_MAX || x == INT32_MIN)
293 #endif
294
295 /* When PCRE2_CODE_UNIT_WIDTH is zero, pcre2_internal.h does not include
296 pcre2_intmodedep.h, which is where mode-dependent macros and structures are
297 defined. We can now include it for each supported code unit width. Because
298 PCRE2_CODE_UNIT_WIDTH was defined as zero before including pcre2.h, it will
299 have left PCRE2_SUFFIX defined as a no-op. We must re-define it appropriately
300 while including these files, and then restore it to a no-op. Because LINK_SIZE
301 may be changed in 16-bit mode and forced to 1 in 32-bit mode, the order of
302 these inclusions should not be changed. */
303
304 #undef PCRE2_SUFFIX
305 #undef PCRE2_CODE_UNIT_WIDTH
306
307 #ifdef SUPPORT_PCRE2_8
308 #define PCRE2_CODE_UNIT_WIDTH 8
309 #define PCRE2_SUFFIX(a) G(a,8)
310 #include "pcre2_intmodedep.h"
311 #include "pcre2_printint.c"
312 #undef PCRE2_CODE_UNIT_WIDTH
313 #undef PCRE2_SUFFIX
314 #endif /* SUPPORT_PCRE2_8 */
315
316 #ifdef SUPPORT_PCRE2_16
317 #define PCRE2_CODE_UNIT_WIDTH 16
318 #define PCRE2_SUFFIX(a) G(a,16)
319 #include "pcre2_intmodedep.h"
320 #include "pcre2_printint.c"
321 #undef PCRE2_CODE_UNIT_WIDTH
322 #undef PCRE2_SUFFIX
323 #endif /* SUPPORT_PCRE2_16 */
324
325 #ifdef SUPPORT_PCRE2_32
326 #define PCRE2_CODE_UNIT_WIDTH 32
327 #define PCRE2_SUFFIX(a) G(a,32)
328 #include "pcre2_intmodedep.h"
329 #include "pcre2_printint.c"
330 #undef PCRE2_CODE_UNIT_WIDTH
331 #undef PCRE2_SUFFIX
332 #endif /* SUPPORT_PCRE2_32 */
333
334 #define PCRE2_SUFFIX(a) a
335
336 /* We need to be able to check input text for UTF-8 validity, whatever code
337 widths are actually available, because the input to pcre2test is always in
338 8-bit code units. So we include the UTF validity checking function for 8-bit
339 code units. */
340
341 extern int valid_utf(PCRE2_SPTR8, PCRE2_SIZE, PCRE2_SIZE *);
342
343 #define PCRE2_CODE_UNIT_WIDTH 8
344 #undef PCRE2_SPTR
345 #define PCRE2_SPTR PCRE2_SPTR8
346 #include "pcre2_valid_utf.c"
347 #undef PCRE2_CODE_UNIT_WIDTH
348 #undef PCRE2_SPTR
349
350 /* If we have 8-bit support, default to it; if there is also 16-or 32-bit
351 support, it can be selected by a command-line option. If there is no 8-bit
352 support, there must be 16-bit or 32-bit support, so default to one of them. The
353 config function, JIT stack, contexts, and version string are the same in all
354 modes, so use the form of the first that is available. */
355
356 #if defined SUPPORT_PCRE2_8
357 #define DEFAULT_TEST_MODE PCRE8_MODE
358 #define VERSION_TYPE PCRE2_UCHAR8
359 #define PCRE2_CONFIG pcre2_config_8
360 #define PCRE2_JIT_STACK pcre2_jit_stack_8
361 #define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_8
362 #define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_8
363 #define PCRE2_REAL_CONVERT_CONTEXT pcre2_real_convert_context_8
364 #define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_8
365
366 #elif defined SUPPORT_PCRE2_16
367 #define DEFAULT_TEST_MODE PCRE16_MODE
368 #define VERSION_TYPE PCRE2_UCHAR16
369 #define PCRE2_CONFIG pcre2_config_16
370 #define PCRE2_JIT_STACK pcre2_jit_stack_16
371 #define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_16
372 #define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_16
373 #define PCRE2_REAL_CONVERT_CONTEXT pcre2_real_convert_context_16
374 #define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_16
375
376 #elif defined SUPPORT_PCRE2_32
377 #define DEFAULT_TEST_MODE PCRE32_MODE
378 #define VERSION_TYPE PCRE2_UCHAR32
379 #define PCRE2_CONFIG pcre2_config_32
380 #define PCRE2_JIT_STACK pcre2_jit_stack_32
381 #define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_32
382 #define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_32
383 #define PCRE2_REAL_CONVERT_CONTEXT pcre2_real_convert_context_32
384 #define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_32
385 #endif
386
387 /* ------------- Structure and table for handling #-commands ------------- */
388
389 typedef struct cmdstruct {
390 const char *name;
391 int value;
392 } cmdstruct;
393
394 enum { CMD_FORBID_UTF, CMD_LOAD, CMD_LOADTABLES, CMD_NEWLINE_DEFAULT,
395 CMD_PATTERN, CMD_PERLTEST, CMD_POP, CMD_POPCOPY, CMD_SAVE, CMD_SUBJECT,
396 CMD_UNKNOWN };
397
398 static cmdstruct cmdlist[] = {
399 { "forbid_utf", CMD_FORBID_UTF },
400 { "load", CMD_LOAD },
401 { "loadtables", CMD_LOADTABLES },
402 { "newline_default", CMD_NEWLINE_DEFAULT },
403 { "pattern", CMD_PATTERN },
404 { "perltest", CMD_PERLTEST },
405 { "pop", CMD_POP },
406 { "popcopy", CMD_POPCOPY },
407 { "save", CMD_SAVE },
408 { "subject", CMD_SUBJECT }};
409
410 #define cmdlistcount (sizeof(cmdlist)/sizeof(cmdstruct))
411
412 /* ------------- Structures and tables for handling modifiers -------------- */
413
414 /* Table of names for newline types. Must be kept in step with the definitions
415 of PCRE2_NEWLINE_xx in pcre2.h. */
416
417 static const char *newlines[] = {
418 "DEFAULT", "CR", "LF", "CRLF", "ANY", "ANYCRLF", "NUL" };
419
420 /* Structure and table for handling pattern conversion types. */
421
422 typedef struct convertstruct {
423 const char *name;
424 uint32_t option;
425 } convertstruct;
426
427 static convertstruct convertlist[] = {
428 { "glob", PCRE2_CONVERT_GLOB },
429 { "glob_no_starstar", PCRE2_CONVERT_GLOB_NO_STARSTAR },
430 { "glob_no_wild_separator", PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR },
431 { "posix_basic", PCRE2_CONVERT_POSIX_BASIC },
432 { "posix_extended", PCRE2_CONVERT_POSIX_EXTENDED },
433 { "unset", CONVERT_UNSET }};
434
435 #define convertlistcount (sizeof(convertlist)/sizeof(convertstruct))
436
437 /* Modifier types and applicability */
438
439 enum { MOD_CTC, /* Applies to a compile context */
440 MOD_CTM, /* Applies to a match context */
441 MOD_PAT, /* Applies to a pattern */
442 MOD_PATP, /* Ditto, OK for Perl test */
443 MOD_DAT, /* Applies to a data line */
444 MOD_PD, /* Applies to a pattern or a data line */
445 MOD_PDP, /* As MOD_PD, OK for Perl test */
446 MOD_PND, /* As MOD_PD, but not for a default pattern */
447 MOD_PNDP, /* As MOD_PND, OK for Perl test */
448 MOD_CHR, /* Is a single character */
449 MOD_CON, /* Is a "convert" type/options list */
450 MOD_CTL, /* Is a control bit */
451 MOD_BSR, /* Is a BSR value */
452 MOD_IN2, /* Is one or two unsigned integers */
453 MOD_INS, /* Is a signed integer */
454 MOD_INT, /* Is an unsigned integer */
455 MOD_IND, /* Is an unsigned integer, but no value => default */
456 MOD_NL, /* Is a newline value */
457 MOD_NN, /* Is a number or a name; more than one may occur */
458 MOD_OPT, /* Is an option bit */
459 MOD_SIZ, /* Is a PCRE2_SIZE value */
460 MOD_STR }; /* Is a string */
461
462 /* Control bits. Some apply to compiling, some to matching, but some can be set
463 either on a pattern or a data line, so they must all be distinct. There are now
464 so many of them that they are split into two fields. */
465
466 #define CTL_AFTERTEXT 0x00000001u
467 #define CTL_ALLAFTERTEXT 0x00000002u
468 #define CTL_ALLCAPTURES 0x00000004u
469 #define CTL_ALLUSEDTEXT 0x00000008u
470 #define CTL_ALTGLOBAL 0x00000010u
471 #define CTL_BINCODE 0x00000020u
472 #define CTL_CALLOUT_CAPTURE 0x00000040u
473 #define CTL_CALLOUT_INFO 0x00000080u
474 #define CTL_CALLOUT_NONE 0x00000100u
475 #define CTL_DFA 0x00000200u
476 #define CTL_EXPAND 0x00000400u
477 #define CTL_FINDLIMITS 0x00000800u
478 #define CTL_FRAMESIZE 0x00001000u
479 #define CTL_FULLBINCODE 0x00002000u
480 #define CTL_GETALL 0x00004000u
481 #define CTL_GLOBAL 0x00008000u
482 #define CTL_HEXPAT 0x00010000u /* Same word as USE_LENGTH */
483 #define CTL_INFO 0x00020000u
484 #define CTL_JITFAST 0x00040000u
485 #define CTL_JITVERIFY 0x00080000u
486 #define CTL_MARK 0x00100000u
487 #define CTL_MEMORY 0x00200000u
488 #define CTL_NULLCONTEXT 0x00400000u
489 #define CTL_POSIX 0x00800000u
490 #define CTL_POSIX_NOSUB 0x01000000u
491 #define CTL_PUSH 0x02000000u /* These three must be */
492 #define CTL_PUSHCOPY 0x04000000u /* all in the same */
493 #define CTL_PUSHTABLESCOPY 0x08000000u /* word. */
494 #define CTL_STARTCHAR 0x10000000u
495 #define CTL_USE_LENGTH 0x20000000u /* Same word as HEXPAT */
496 #define CTL_UTF8_INPUT 0x40000000u
497 #define CTL_ZERO_TERMINATE 0x80000000u
498
499 /* Combinations */
500
501 #define CTL_DEBUG (CTL_FULLBINCODE|CTL_INFO) /* For setting */
502 #define CTL_ANYINFO (CTL_DEBUG|CTL_BINCODE|CTL_CALLOUT_INFO)
503 #define CTL_ANYGLOB (CTL_ALTGLOBAL|CTL_GLOBAL)
504
505 /* Second control word */
506
507 #define CTL2_SUBSTITUTE_CALLOUT 0x00000001u
508 #define CTL2_SUBSTITUTE_EXTENDED 0x00000002u
509 #define CTL2_SUBSTITUTE_LITERAL 0x00000004u
510 #define CTL2_SUBSTITUTE_MATCHED 0x00000008u
511 #define CTL2_SUBSTITUTE_OVERFLOW_LENGTH 0x00000010u
512 #define CTL2_SUBSTITUTE_REPLACEMENT_ONLY 0x00000020u
513 #define CTL2_SUBSTITUTE_UNKNOWN_UNSET 0x00000040u
514 #define CTL2_SUBSTITUTE_UNSET_EMPTY 0x00000080u
515 #define CTL2_SUBJECT_LITERAL 0x00000100u
516 #define CTL2_CALLOUT_NO_WHERE 0x00000200u
517 #define CTL2_CALLOUT_EXTRA 0x00000400u
518 #define CTL2_ALLVECTOR 0x00000800u
519
520 #define CTL2_NL_SET 0x40000000u /* Informational */
521 #define CTL2_BSR_SET 0x80000000u /* Informational */
522
523 /* These are the matching controls that may be set either on a pattern or on a
524 data line. They are copied from the pattern controls as initial settings for
525 data line controls. Note that CTL_MEMORY is not included here, because it does
526 different things in the two cases. */
527
528 #define CTL_ALLPD (CTL_AFTERTEXT|\
529 CTL_ALLAFTERTEXT|\
530 CTL_ALLCAPTURES|\
531 CTL_ALLUSEDTEXT|\
532 CTL_ALTGLOBAL|\
533 CTL_GLOBAL|\
534 CTL_MARK|\
535 CTL_STARTCHAR|\
536 CTL_UTF8_INPUT)
537
538 #define CTL2_ALLPD (CTL2_SUBSTITUTE_CALLOUT|\
539 CTL2_SUBSTITUTE_EXTENDED|\
540 CTL2_SUBSTITUTE_LITERAL|\
541 CTL2_SUBSTITUTE_MATCHED|\
542 CTL2_SUBSTITUTE_OVERFLOW_LENGTH|\
543 CTL2_SUBSTITUTE_REPLACEMENT_ONLY|\
544 CTL2_SUBSTITUTE_UNKNOWN_UNSET|\
545 CTL2_SUBSTITUTE_UNSET_EMPTY|\
546 CTL2_ALLVECTOR)
547
548 /* Structures for holding modifier information for patterns and subject strings
549 (data). Fields containing modifiers that can be set either for a pattern or a
550 subject must be at the start and in the same order in both cases so that the
551 same offset in the big table below works for both. */
552
553 typedef struct patctl { /* Structure for pattern modifiers. */
554 uint32_t options; /* Must be in same position as datctl */
555 uint32_t control; /* Must be in same position as datctl */
556 uint32_t control2; /* Must be in same position as datctl */
557 uint32_t jitstack; /* Must be in same position as datctl */
558 uint8_t replacement[REPLACE_MODSIZE]; /* So must this */
559 uint32_t substitute_skip; /* Must be in same position as patctl */
560 uint32_t substitute_stop; /* Must be in same position as patctl */
561 uint32_t jit;
562 uint32_t stackguard_test;
563 uint32_t tables_id;
564 uint32_t convert_type;
565 uint32_t convert_length;
566 uint32_t convert_glob_escape;
567 uint32_t convert_glob_separator;
568 uint32_t regerror_buffsize;
569 uint8_t locale[LOCALESIZE];
570 } patctl;
571
572 #define MAXCPYGET 10
573 #define LENCPYGET 64
574
575 typedef struct datctl { /* Structure for data line modifiers. */
576 uint32_t options; /* Must be in same position as patctl */
577 uint32_t control; /* Must be in same position as patctl */
578 uint32_t control2; /* Must be in same position as patctl */
579 uint32_t jitstack; /* Must be in same position as patctl */
580 uint8_t replacement[REPLACE_MODSIZE]; /* So must this */
581 uint32_t substitute_skip; /* Must be in same position as patctl */
582 uint32_t substitute_stop; /* Must be in same position as patctl */
583 uint32_t startend[2];
584 uint32_t cerror[2];
585 uint32_t cfail[2];
586 int32_t callout_data;
587 int32_t copy_numbers[MAXCPYGET];
588 int32_t get_numbers[MAXCPYGET];
589 uint32_t oveccount;
590 uint32_t offset;
591 uint8_t copy_names[LENCPYGET];
592 uint8_t get_names[LENCPYGET];
593 } datctl;
594
595 /* Ids for which context to modify. */
596
597 enum { CTX_PAT, /* Active pattern context */
598 CTX_POPPAT, /* Ditto, for a popped pattern */
599 CTX_DEFPAT, /* Default pattern context */
600 CTX_DAT, /* Active data (match) context */
601 CTX_DEFDAT }; /* Default data (match) context */
602
603 /* Macros to simplify the big table below. */
604
605 #define CO(name) offsetof(PCRE2_REAL_COMPILE_CONTEXT, name)
606 #define MO(name) offsetof(PCRE2_REAL_MATCH_CONTEXT, name)
607 #define PO(name) offsetof(patctl, name)
608 #define PD(name) PO(name)
609 #define DO(name) offsetof(datctl, name)
610
611 /* Table of all long-form modifiers. Must be in collating sequence of modifier
612 name because it is searched by binary chop. */
613
614 typedef struct modstruct {
615 const char *name;
616 uint16_t which;
617 uint16_t type;
618 uint32_t value;
619 PCRE2_SIZE offset;
620 } modstruct;
621
622 static modstruct modlist[] = {
623 { "aftertext", MOD_PNDP, MOD_CTL, CTL_AFTERTEXT, PO(control) },
624 { "allaftertext", MOD_PNDP, MOD_CTL, CTL_ALLAFTERTEXT, PO(control) },
625 { "allcaptures", MOD_PND, MOD_CTL, CTL_ALLCAPTURES, PO(control) },
626 { "allow_empty_class", MOD_PAT, MOD_OPT, PCRE2_ALLOW_EMPTY_CLASS, PO(options) },
627 { "allow_lookaround_bsk", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK, CO(extra_options) },
628 { "allow_surrogate_escapes", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES, CO(extra_options) },
629 { "allusedtext", MOD_PNDP, MOD_CTL, CTL_ALLUSEDTEXT, PO(control) },
630 { "allvector", MOD_PND, MOD_CTL, CTL2_ALLVECTOR, PO(control2) },
631 { "alt_bsux", MOD_PAT, MOD_OPT, PCRE2_ALT_BSUX, PO(options) },
632 { "alt_circumflex", MOD_PAT, MOD_OPT, PCRE2_ALT_CIRCUMFLEX, PO(options) },
633 { "alt_verbnames", MOD_PAT, MOD_OPT, PCRE2_ALT_VERBNAMES, PO(options) },
634 { "altglobal", MOD_PND, MOD_CTL, CTL_ALTGLOBAL, PO(control) },
635 { "anchored", MOD_PD, MOD_OPT, PCRE2_ANCHORED, PD(options) },
636 { "auto_callout", MOD_PAT, MOD_OPT, PCRE2_AUTO_CALLOUT, PO(options) },
637 { "bad_escape_is_literal", MOD_CTC, MOD_OPT, PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL, CO(extra_options) },
638 { "bincode", MOD_PAT, MOD_CTL, CTL_BINCODE, PO(control) },
639 { "bsr", MOD_CTC, MOD_BSR, 0, CO(bsr_convention) },
640 { "callout_capture", MOD_DAT, MOD_CTL, CTL_CALLOUT_CAPTURE, DO(control) },
641 { "callout_data", MOD_DAT, MOD_INS, 0, DO(callout_data) },
642 { "callout_error", MOD_DAT, MOD_IN2, 0, DO(cerror) },
643 { "callout_extra", MOD_DAT, MOD_CTL, CTL2_CALLOUT_EXTRA, DO(control2) },
644 { "callout_fail", MOD_DAT, MOD_IN2, 0, DO(cfail) },
645 { "callout_info", MOD_PAT, MOD_CTL, CTL_CALLOUT_INFO, PO(control) },
646 { "callout_no_where", MOD_DAT, MOD_CTL, CTL2_CALLOUT_NO_WHERE, DO(control2) },
647 { "callout_none", MOD_DAT, MOD_CTL, CTL_CALLOUT_NONE, DO(control) },
648 { "caseless", MOD_PATP, MOD_OPT, PCRE2_CASELESS, PO(options) },
649 { "convert", MOD_PAT, MOD_CON, 0, PO(convert_type) },
650 { "convert_glob_escape", MOD_PAT, MOD_CHR, 0, PO(convert_glob_escape) },
651 { "convert_glob_separator", MOD_PAT, MOD_CHR, 0, PO(convert_glob_separator) },
652 { "convert_length", MOD_PAT, MOD_INT, 0, PO(convert_length) },
653 { "copy", MOD_DAT, MOD_NN, DO(copy_numbers), DO(copy_names) },
654 { "copy_matched_subject", MOD_DAT, MOD_OPT, PCRE2_COPY_MATCHED_SUBJECT, DO(options) },
655 { "debug", MOD_PAT, MOD_CTL, CTL_DEBUG, PO(control) },
656 { "depth_limit", MOD_CTM, MOD_INT, 0, MO(depth_limit) },
657 { "dfa", MOD_DAT, MOD_CTL, CTL_DFA, DO(control) },
658 { "dfa_restart", MOD_DAT, MOD_OPT, PCRE2_DFA_RESTART, DO(options) },
659 { "dfa_shortest", MOD_DAT, MOD_OPT, PCRE2_DFA_SHORTEST, DO(options) },
660 { "dollar_endonly", MOD_PAT, MOD_OPT, PCRE2_DOLLAR_ENDONLY, PO(options) },
661 { "dotall", MOD_PATP, MOD_OPT, PCRE2_DOTALL, PO(options) },
662 { "dupnames", MOD_PATP, MOD_OPT, PCRE2_DUPNAMES, PO(options) },
663 { "endanchored", MOD_PD, MOD_OPT, PCRE2_ENDANCHORED, PD(options) },
664 { "escaped_cr_is_lf", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ESCAPED_CR_IS_LF, CO(extra_options) },
665 { "expand", MOD_PAT, MOD_CTL, CTL_EXPAND, PO(control) },
666 { "extended", MOD_PATP, MOD_OPT, PCRE2_EXTENDED, PO(options) },
667 { "extended_more", MOD_PATP, MOD_OPT, PCRE2_EXTENDED_MORE, PO(options) },
668 { "extra_alt_bsux", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ALT_BSUX, CO(extra_options) },
669 { "find_limits", MOD_DAT, MOD_CTL, CTL_FINDLIMITS, DO(control) },
670 { "firstline", MOD_PAT, MOD_OPT, PCRE2_FIRSTLINE, PO(options) },
671 { "framesize", MOD_PAT, MOD_CTL, CTL_FRAMESIZE, PO(control) },
672 { "fullbincode", MOD_PAT, MOD_CTL, CTL_FULLBINCODE, PO(control) },
673 { "get", MOD_DAT, MOD_NN, DO(get_numbers), DO(get_names) },
674 { "getall", MOD_DAT, MOD_CTL, CTL_GETALL, DO(control) },
675 { "global", MOD_PNDP, MOD_CTL, CTL_GLOBAL, PO(control) },
676 { "heap_limit", MOD_CTM, MOD_INT, 0, MO(heap_limit) },
677 { "hex", MOD_PAT, MOD_CTL, CTL_HEXPAT, PO(control) },
678 { "info", MOD_PAT, MOD_CTL, CTL_INFO, PO(control) },
679 { "jit", MOD_PAT, MOD_IND, 7, PO(jit) },
680 { "jitfast", MOD_PAT, MOD_CTL, CTL_JITFAST, PO(control) },
681 { "jitstack", MOD_PNDP, MOD_INT, 0, PO(jitstack) },
682 { "jitverify", MOD_PAT, MOD_CTL, CTL_JITVERIFY, PO(control) },
683 { "literal", MOD_PAT, MOD_OPT, PCRE2_LITERAL, PO(options) },
684 { "locale", MOD_PAT, MOD_STR, LOCALESIZE, PO(locale) },
685 { "mark", MOD_PNDP, MOD_CTL, CTL_MARK, PO(control) },
686 { "match_invalid_utf", MOD_PAT, MOD_OPT, PCRE2_MATCH_INVALID_UTF, PO(options) },
687 { "match_limit", MOD_CTM, MOD_INT, 0, MO(match_limit) },
688 { "match_line", MOD_CTC, MOD_OPT, PCRE2_EXTRA_MATCH_LINE, CO(extra_options) },
689 { "match_unset_backref", MOD_PAT, MOD_OPT, PCRE2_MATCH_UNSET_BACKREF, PO(options) },
690 { "match_word", MOD_CTC, MOD_OPT, PCRE2_EXTRA_MATCH_WORD, CO(extra_options) },
691 { "max_pattern_length", MOD_CTC, MOD_SIZ, 0, CO(max_pattern_length) },
692 { "memory", MOD_PD, MOD_CTL, CTL_MEMORY, PD(control) },
693 { "multiline", MOD_PATP, MOD_OPT, PCRE2_MULTILINE, PO(options) },
694 { "never_backslash_c", MOD_PAT, MOD_OPT, PCRE2_NEVER_BACKSLASH_C, PO(options) },
695 { "never_ucp", MOD_PAT, MOD_OPT, PCRE2_NEVER_UCP, PO(options) },
696 { "never_utf", MOD_PAT, MOD_OPT, PCRE2_NEVER_UTF, PO(options) },
697 { "newline", MOD_CTC, MOD_NL, 0, CO(newline_convention) },
698 { "no_auto_capture", MOD_PAT, MOD_OPT, PCRE2_NO_AUTO_CAPTURE, PO(options) },
699 { "no_auto_possess", MOD_PATP, MOD_OPT, PCRE2_NO_AUTO_POSSESS, PO(options) },
700 { "no_dotstar_anchor", MOD_PAT, MOD_OPT, PCRE2_NO_DOTSTAR_ANCHOR, PO(options) },
701 { "no_jit", MOD_DAT, MOD_OPT, PCRE2_NO_JIT, DO(options) },
702 { "no_start_optimize", MOD_PATP, MOD_OPT, PCRE2_NO_START_OPTIMIZE, PO(options) },
703 { "no_utf_check", MOD_PD, MOD_OPT, PCRE2_NO_UTF_CHECK, PD(options) },
704 { "notbol", MOD_DAT, MOD_OPT, PCRE2_NOTBOL, DO(options) },
705 { "notempty", MOD_DAT, MOD_OPT, PCRE2_NOTEMPTY, DO(options) },
706 { "notempty_atstart", MOD_DAT, MOD_OPT, PCRE2_NOTEMPTY_ATSTART, DO(options) },
707 { "noteol", MOD_DAT, MOD_OPT, PCRE2_NOTEOL, DO(options) },
708 { "null_context", MOD_PD, MOD_CTL, CTL_NULLCONTEXT, PO(control) },
709 { "offset", MOD_DAT, MOD_INT, 0, DO(offset) },
710 { "offset_limit", MOD_CTM, MOD_SIZ, 0, MO(offset_limit)},
711 { "ovector", MOD_DAT, MOD_INT, 0, DO(oveccount) },
712 { "parens_nest_limit", MOD_CTC, MOD_INT, 0, CO(parens_nest_limit) },
713 { "partial_hard", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_HARD, DO(options) },
714 { "partial_soft", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_SOFT, DO(options) },
715 { "ph", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_HARD, DO(options) },
716 { "posix", MOD_PAT, MOD_CTL, CTL_POSIX, PO(control) },
717 { "posix_nosub", MOD_PAT, MOD_CTL, CTL_POSIX|CTL_POSIX_NOSUB, PO(control) },
718 { "posix_startend", MOD_DAT, MOD_IN2, 0, DO(startend) },
719 { "ps", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_SOFT, DO(options) },
720 { "push", MOD_PAT, MOD_CTL, CTL_PUSH, PO(control) },
721 { "pushcopy", MOD_PAT, MOD_CTL, CTL_PUSHCOPY, PO(control) },
722 { "pushtablescopy", MOD_PAT, MOD_CTL, CTL_PUSHTABLESCOPY, PO(control) },
723 { "recursion_limit", MOD_CTM, MOD_INT, 0, MO(depth_limit) }, /* Obsolete synonym */
724 { "regerror_buffsize", MOD_PAT, MOD_INT, 0, PO(regerror_buffsize) },
725 { "replace", MOD_PND, MOD_STR, REPLACE_MODSIZE, PO(replacement) },
726 { "stackguard", MOD_PAT, MOD_INT, 0, PO(stackguard_test) },
727 { "startchar", MOD_PND, MOD_CTL, CTL_STARTCHAR, PO(control) },
728 { "startoffset", MOD_DAT, MOD_INT, 0, DO(offset) },
729 { "subject_literal", MOD_PATP, MOD_CTL, CTL2_SUBJECT_LITERAL, PO(control2) },
730 { "substitute_callout", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_CALLOUT, PO(control2) },
731 { "substitute_extended", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_EXTENDED, PO(control2) },
732 { "substitute_literal", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_LITERAL, PO(control2) },
733 { "substitute_matched", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_MATCHED, PO(control2) },
734 { "substitute_overflow_length", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_OVERFLOW_LENGTH, PO(control2) },
735 { "substitute_replacement_only", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_REPLACEMENT_ONLY, PO(control2) },
736 { "substitute_skip", MOD_PND, MOD_INT, 0, PO(substitute_skip) },
737 { "substitute_stop", MOD_PND, MOD_INT, 0, PO(substitute_stop) },
738 { "substitute_unknown_unset", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_UNKNOWN_UNSET, PO(control2) },
739 { "substitute_unset_empty", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_UNSET_EMPTY, PO(control2) },
740 { "tables", MOD_PAT, MOD_INT, 0, PO(tables_id) },
741 { "ucp", MOD_PATP, MOD_OPT, PCRE2_UCP, PO(options) },
742 { "ungreedy", MOD_PAT, MOD_OPT, PCRE2_UNGREEDY, PO(options) },
743 { "use_length", MOD_PAT, MOD_CTL, CTL_USE_LENGTH, PO(control) },
744 { "use_offset_limit", MOD_PAT, MOD_OPT, PCRE2_USE_OFFSET_LIMIT, PO(options) },
745 { "utf", MOD_PATP, MOD_OPT, PCRE2_UTF, PO(options) },
746 { "utf8_input", MOD_PAT, MOD_CTL, CTL_UTF8_INPUT, PO(control) },
747 { "zero_terminate", MOD_DAT, MOD_CTL, CTL_ZERO_TERMINATE, DO(control) }
748 };
749
750 #define MODLISTCOUNT sizeof(modlist)/sizeof(modstruct)
751
752 /* Controls and options that are supported for use with the POSIX interface. */
753
754 #define POSIX_SUPPORTED_COMPILE_OPTIONS ( \
755 PCRE2_CASELESS|PCRE2_DOTALL|PCRE2_LITERAL|PCRE2_MULTILINE|PCRE2_UCP| \
756 PCRE2_UTF|PCRE2_UNGREEDY)
757
758 #define POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS (0)
759
760 #define POSIX_SUPPORTED_COMPILE_CONTROLS ( \
761 CTL_AFTERTEXT|CTL_ALLAFTERTEXT|CTL_EXPAND|CTL_HEXPAT|CTL_POSIX| \
762 CTL_POSIX_NOSUB|CTL_USE_LENGTH)
763
764 #define POSIX_SUPPORTED_COMPILE_CONTROLS2 (0)
765
766 #define POSIX_SUPPORTED_MATCH_OPTIONS ( \
767 PCRE2_NOTBOL|PCRE2_NOTEMPTY|PCRE2_NOTEOL)
768
769 #define POSIX_SUPPORTED_MATCH_CONTROLS (CTL_AFTERTEXT|CTL_ALLAFTERTEXT)
770 #define POSIX_SUPPORTED_MATCH_CONTROLS2 (0)
771
772 /* Control bits that are not ignored with 'push'. */
773
774 #define PUSH_SUPPORTED_COMPILE_CONTROLS ( \
775 CTL_BINCODE|CTL_CALLOUT_INFO|CTL_FULLBINCODE|CTL_HEXPAT|CTL_INFO| \
776 CTL_JITVERIFY|CTL_MEMORY|CTL_FRAMESIZE|CTL_PUSH|CTL_PUSHCOPY| \
777 CTL_PUSHTABLESCOPY|CTL_USE_LENGTH)
778
779 #define PUSH_SUPPORTED_COMPILE_CONTROLS2 (CTL2_BSR_SET|CTL2_NL_SET)
780
781 /* Controls that apply only at compile time with 'push'. */
782
783 #define PUSH_COMPILE_ONLY_CONTROLS CTL_JITVERIFY
784 #define PUSH_COMPILE_ONLY_CONTROLS2 (0)
785
786 /* Controls that are forbidden with #pop or #popcopy. */
787
788 #define NOTPOP_CONTROLS (CTL_HEXPAT|CTL_POSIX|CTL_POSIX_NOSUB|CTL_PUSH| \
789 CTL_PUSHCOPY|CTL_PUSHTABLESCOPY|CTL_USE_LENGTH)
790
791 /* Pattern controls that are mutually exclusive. At present these are all in
792 the first control word. Note that CTL_POSIX_NOSUB is always accompanied by
793 CTL_POSIX, so it doesn't need its own entries. */
794
795 static uint32_t exclusive_pat_controls[] = {
796 CTL_POSIX | CTL_PUSH,
797 CTL_POSIX | CTL_PUSHCOPY,
798 CTL_POSIX | CTL_PUSHTABLESCOPY,
799 CTL_PUSH | CTL_PUSHCOPY,
800 CTL_PUSH | CTL_PUSHTABLESCOPY,
801 CTL_PUSHCOPY | CTL_PUSHTABLESCOPY,
802 CTL_EXPAND | CTL_HEXPAT };
803
804 /* Data controls that are mutually exclusive. At present these are all in the
805 first control word. */
806
807 static uint32_t exclusive_dat_controls[] = {
808 CTL_ALLUSEDTEXT | CTL_STARTCHAR,
809 CTL_FINDLIMITS | CTL_NULLCONTEXT };
810
811 /* Table of single-character abbreviated modifiers. The index field is
812 initialized to -1, but the first time the modifier is encountered, it is filled
813 in with the index of the full entry in modlist, to save repeated searching when
814 processing multiple test items. This short list is searched serially, so its
815 order does not matter. */
816
817 typedef struct c1modstruct {
818 const char *fullname;
819 uint32_t onechar;
820 int index;
821 } c1modstruct;
822
823 static c1modstruct c1modlist[] = {
824 { "bincode", 'B', -1 },
825 { "info", 'I', -1 },
826 { "global", 'g', -1 },
827 { "caseless", 'i', -1 },
828 { "multiline", 'm', -1 },
829 { "no_auto_capture", 'n', -1 },
830 { "dotall", 's', -1 },
831 { "extended", 'x', -1 }
832 };
833
834 #define C1MODLISTCOUNT sizeof(c1modlist)/sizeof(c1modstruct)
835
836 /* Table of arguments for the -C command line option. Use macros to make the
837 table itself easier to read. */
838
839 #if defined SUPPORT_PCRE2_8
840 #define SUPPORT_8 1
841 #endif
842 #if defined SUPPORT_PCRE2_16
843 #define SUPPORT_16 1
844 #endif
845 #if defined SUPPORT_PCRE2_32
846 #define SUPPORT_32 1
847 #endif
848
849 #ifndef SUPPORT_8
850 #define SUPPORT_8 0
851 #endif
852 #ifndef SUPPORT_16
853 #define SUPPORT_16 0
854 #endif
855 #ifndef SUPPORT_32
856 #define SUPPORT_32 0
857 #endif
858
859 #ifdef EBCDIC
860 #define SUPPORT_EBCDIC 1
861 #define EBCDIC_NL CHAR_LF
862 #else
863 #define SUPPORT_EBCDIC 0
864 #define EBCDIC_NL 0
865 #endif
866
867 #ifdef NEVER_BACKSLASH_C
868 #define BACKSLASH_C 0
869 #else
870 #define BACKSLASH_C 1
871 #endif
872
873 typedef struct coptstruct {
874 const char *name;
875 uint32_t type;
876 uint32_t value;
877 } coptstruct;
878
879 enum { CONF_BSR,
880 CONF_FIX,
881 CONF_FIZ,
882 CONF_INT,
883 CONF_NL
884 };
885
886 static coptstruct coptlist[] = {
887 { "backslash-C", CONF_FIX, BACKSLASH_C },
888 { "bsr", CONF_BSR, PCRE2_CONFIG_BSR },
889 { "ebcdic", CONF_FIX, SUPPORT_EBCDIC },
890 { "ebcdic-nl", CONF_FIZ, EBCDIC_NL },
891 { "jit", CONF_INT, PCRE2_CONFIG_JIT },
892 { "linksize", CONF_INT, PCRE2_CONFIG_LINKSIZE },
893 { "newline", CONF_NL, PCRE2_CONFIG_NEWLINE },
894 { "pcre2-16", CONF_FIX, SUPPORT_16 },
895 { "pcre2-32", CONF_FIX, SUPPORT_32 },
896 { "pcre2-8", CONF_FIX, SUPPORT_8 },
897 { "unicode", CONF_INT, PCRE2_CONFIG_UNICODE }
898 };
899
900 #define COPTLISTCOUNT sizeof(coptlist)/sizeof(coptstruct)
901
902 #undef SUPPORT_8
903 #undef SUPPORT_16
904 #undef SUPPORT_32
905 #undef SUPPORT_EBCDIC
906
907
908 /* ----------------------- Static variables ------------------------ */
909
910 static FILE *infile;
911 static FILE *outfile;
912
913 static const void *last_callout_mark;
914 static PCRE2_JIT_STACK *jit_stack = NULL;
915 static size_t jit_stack_size = 0;
916
917 static BOOL first_callout;
918 static BOOL jit_was_used;
919 static BOOL restrict_for_perl_test = FALSE;
920 static BOOL show_memory = FALSE;
921
922 static int code_unit_size; /* Bytes */
923 static int jitrc; /* Return from JIT compile */
924 static int test_mode = DEFAULT_TEST_MODE;
925 static int timeit = 0;
926 static int timeitm = 0;
927
928 clock_t total_compile_time = 0;
929 clock_t total_jit_compile_time = 0;
930 clock_t total_match_time = 0;
931
932 static uint32_t dfa_matched;
933 static uint32_t forbid_utf = 0;
934 static uint32_t maxlookbehind;
935 static uint32_t max_oveccount;
936 static uint32_t callout_count;
937 static uint32_t maxcapcount;
938
939 static uint16_t local_newline_default = 0;
940
941 static VERSION_TYPE jittarget[VERSION_SIZE];
942 static VERSION_TYPE version[VERSION_SIZE];
943 static VERSION_TYPE uversion[VERSION_SIZE];
944
945 static patctl def_patctl;
946 static patctl pat_patctl;
947 static datctl def_datctl;
948 static datctl dat_datctl;
949
950 static void *patstack[PATSTACKSIZE];
951 static int patstacknext = 0;
952
953 static void *malloclist[MALLOCLISTSIZE];
954 static PCRE2_SIZE malloclistlength[MALLOCLISTSIZE];
955 static uint32_t malloclistptr = 0;
956
957 #ifdef SUPPORT_PCRE2_8
958 static regex_t preg = { NULL, NULL, 0, 0, 0, 0 };
959 #endif
960
961 static int *dfa_workspace = NULL;
962 static const uint8_t *locale_tables = NULL;
963 static const uint8_t *use_tables = NULL;
964 static uint8_t locale_name[32];
965 static uint8_t *tables3 = NULL; /* For binary-loaded tables */
966 static uint32_t loadtables_length = 0;
967
968 /* We need buffers for building 16/32-bit strings; 8-bit strings don't need
969 rebuilding, but set up the same naming scheme for use in macros. The "buffer"
970 buffer is where all input lines are read. Its size is the same as pbuffer8.
971 Pattern lines are always copied to pbuffer8 for use in callouts, even if they
972 are actually compiled from pbuffer16 or pbuffer32. */
973
974 static size_t pbuffer8_size = 50000; /* Initial size, bytes */
975 static uint8_t *pbuffer8 = NULL;
976 static uint8_t *buffer = NULL;
977
978 /* The dbuffer is where all processed data lines are put. In non-8-bit modes it
979 is cast as needed. For long data lines it grows as necessary. */
980
981 static size_t dbuffer_size = 1u << 14; /* Initial size, bytes */
982 static uint8_t *dbuffer = NULL;
983
984
985 /* ---------------- Mode-dependent variables -------------------*/
986
987 #ifdef SUPPORT_PCRE2_8
988 static pcre2_code_8 *compiled_code8;
989 static pcre2_general_context_8 *general_context8, *general_context_copy8;
990 static pcre2_compile_context_8 *pat_context8, *default_pat_context8;
991 static pcre2_convert_context_8 *con_context8, *default_con_context8;
992 static pcre2_match_context_8 *dat_context8, *default_dat_context8;
993 static pcre2_match_data_8 *match_data8;
994 #endif
995
996 #ifdef SUPPORT_PCRE2_16
997 static pcre2_code_16 *compiled_code16;
998 static pcre2_general_context_16 *general_context16, *general_context_copy16;
999 static pcre2_compile_context_16 *pat_context16, *default_pat_context16;
1000 static pcre2_convert_context_16 *con_context16, *default_con_context16;
1001 static pcre2_match_context_16 *dat_context16, *default_dat_context16;
1002 static pcre2_match_data_16 *match_data16;
1003 static PCRE2_SIZE pbuffer16_size = 0; /* Set only when needed */
1004 static uint16_t *pbuffer16 = NULL;
1005 #endif
1006
1007 #ifdef SUPPORT_PCRE2_32
1008 static pcre2_code_32 *compiled_code32;
1009 static pcre2_general_context_32 *general_context32, *general_context_copy32;
1010 static pcre2_compile_context_32 *pat_context32, *default_pat_context32;
1011 static pcre2_convert_context_32 *con_context32, *default_con_context32;
1012 static pcre2_match_context_32 *dat_context32, *default_dat_context32;
1013 static pcre2_match_data_32 *match_data32;
1014 static PCRE2_SIZE pbuffer32_size = 0; /* Set only when needed */
1015 static uint32_t *pbuffer32 = NULL;
1016 #endif
1017
1018
1019 /* ---------------- Macros that work in all modes ----------------- */
1020
1021 #define CAST8VAR(x) CASTVAR(uint8_t *, x)
1022 #define SET(x,y) SETOP(x,y,=)
1023 #define SETPLUS(x,y) SETOP(x,y,+=)
1024 #define strlen8(x) strlen((char *)x)
1025
1026
1027 /* ---------------- Mode-dependent, runtime-testing macros ------------------*/
1028
1029 /* Define macros for variables and functions that must be selected dynamically
1030 depending on the mode setting (8, 16, 32). These are dependent on which modes
1031 are supported. */
1032
1033 #if (defined (SUPPORT_PCRE2_8) + defined (SUPPORT_PCRE2_16) + \
1034 defined (SUPPORT_PCRE2_32)) >= 2
1035
1036 /* ----- All three modes supported ----- */
1037
1038 #if defined(SUPPORT_PCRE2_8) && defined(SUPPORT_PCRE2_16) && defined(SUPPORT_PCRE2_32)
1039
1040 #define CASTFLD(t,a,b) ((test_mode == PCRE8_MODE)? (t)(G(a,8)->b) : \
1041 (test_mode == PCRE16_MODE)? (t)(G(a,16)->b) : (t)(G(a,32)->b))
1042
1043 #define CASTVAR(t,x) ( \
1044 (test_mode == PCRE8_MODE)? (t)G(x,8) : \
1045 (test_mode == PCRE16_MODE)? (t)G(x,16) : (t)G(x,32))
1046
1047 #define CODE_UNIT(a,b) ( \
1048 (test_mode == PCRE8_MODE)? (uint32_t)(((PCRE2_SPTR8)(a))[b]) : \
1049 (test_mode == PCRE16_MODE)? (uint32_t)(((PCRE2_SPTR16)(a))[b]) : \
1050 (uint32_t)(((PCRE2_SPTR32)(a))[b]))
1051
1052 #define CONCTXCPY(a,b) \
1053 if (test_mode == PCRE8_MODE) \
1054 memcpy(G(a,8),G(b,8),sizeof(pcre2_convert_context_8)); \
1055 else if (test_mode == PCRE16_MODE) \
1056 memcpy(G(a,16),G(b,16),sizeof(pcre2_convert_context_16)); \
1057 else memcpy(G(a,32),G(b,32),sizeof(pcre2_convert_context_32))
1058
1059 #define CONVERT_COPY(a,b,c) \
1060 if (test_mode == PCRE8_MODE) \
1061 memcpy(G(a,8),(char *)b,c); \
1062 else if (test_mode == PCRE16_MODE) \
1063 memcpy(G(a,16),(char *)b,(c)*2); \
1064 else if (test_mode == PCRE32_MODE) \
1065 memcpy(G(a,32),(char *)b,(c)*4)
1066
1067 #define DATCTXCPY(a,b) \
1068 if (test_mode == PCRE8_MODE) \
1069 memcpy(G(a,8),G(b,8),sizeof(pcre2_match_context_8)); \
1070 else if (test_mode == PCRE16_MODE) \
1071 memcpy(G(a,16),G(b,16),sizeof(pcre2_match_context_16)); \
1072 else memcpy(G(a,32),G(b,32),sizeof(pcre2_match_context_32))
1073
1074 #define FLD(a,b) ((test_mode == PCRE8_MODE)? G(a,8)->b : \
1075 (test_mode == PCRE16_MODE)? G(a,16)->b : G(a,32)->b)
1076
1077 #define PATCTXCPY(a,b) \
1078 if (test_mode == PCRE8_MODE) \
1079 memcpy(G(a,8),G(b,8),sizeof(pcre2_compile_context_8)); \
1080 else if (test_mode == PCRE16_MODE) \
1081 memcpy(G(a,16),G(b,16),sizeof(pcre2_compile_context_16)); \
1082 else memcpy(G(a,32),G(b,32),sizeof(pcre2_compile_context_32))
1083
1084 #define PCHARS(lv, p, offset, len, utf, f) \
1085 if (test_mode == PCRE32_MODE) \
1086 lv = pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f); \
1087 else if (test_mode == PCRE16_MODE) \
1088 lv = pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f); \
1089 else \
1090 lv = pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
1091
1092 #define PCHARSV(p, offset, len, utf, f) \
1093 if (test_mode == PCRE32_MODE) \
1094 (void)pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f); \
1095 else if (test_mode == PCRE16_MODE) \
1096 (void)pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f); \
1097 else \
1098 (void)pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
1099
1100 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
1101 if (test_mode == PCRE8_MODE) \
1102 a = pcre2_callout_enumerate_8(compiled_code8, \
1103 (int (*)(struct pcre2_callout_enumerate_block_8 *, void *))b,c); \
1104 else if (test_mode == PCRE16_MODE) \
1105 a = pcre2_callout_enumerate_16(compiled_code16, \
1106 (int(*)(struct pcre2_callout_enumerate_block_16 *, void *))b,c); \
1107 else \
1108 a = pcre2_callout_enumerate_32(compiled_code32, \
1109 (int (*)(struct pcre2_callout_enumerate_block_32 *, void *))b,c)
1110
1111 #define PCRE2_CODE_COPY_FROM_VOID(a,b) \
1112 if (test_mode == PCRE8_MODE) \
1113 G(a,8) = pcre2_code_copy_8(b); \
1114 else if (test_mode == PCRE16_MODE) \
1115 G(a,16) = pcre2_code_copy_16(b); \
1116 else \
1117 G(a,32) = pcre2_code_copy_32(b)
1118
1119 #define PCRE2_CODE_COPY_TO_VOID(a,b) \
1120 if (test_mode == PCRE8_MODE) \
1121 a = (void *)pcre2_code_copy_8(G(b,8)); \
1122 else if (test_mode == PCRE16_MODE) \
1123 a = (void *)pcre2_code_copy_16(G(b,16)); \
1124 else \
1125 a = (void *)pcre2_code_copy_32(G(b,32))
1126
1127 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) \
1128 if (test_mode == PCRE8_MODE) \
1129 a = (void *)pcre2_code_copy_with_tables_8(G(b,8)); \
1130 else if (test_mode == PCRE16_MODE) \
1131 a = (void *)pcre2_code_copy_with_tables_16(G(b,16)); \
1132 else \
1133 a = (void *)pcre2_code_copy_with_tables_32(G(b,32))
1134
1135 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
1136 if (test_mode == PCRE8_MODE) \
1137 G(a,8) = pcre2_compile_8(G(b,8),c,d,e,f,g); \
1138 else if (test_mode == PCRE16_MODE) \
1139 G(a,16) = pcre2_compile_16(G(b,16),c,d,e,f,g); \
1140 else \
1141 G(a,32) = pcre2_compile_32(G(b,32),c,d,e,f,g)
1142
1143 #define PCRE2_CONVERTED_PATTERN_FREE(a) \
1144 if (test_mode == PCRE8_MODE) pcre2_converted_pattern_free_8((PCRE2_UCHAR8 *)a); \
1145 else if (test_mode == PCRE16_MODE) pcre2_converted_pattern_free_16((PCRE2_UCHAR16 *)a); \
1146 else pcre2_converted_pattern_free_32((PCRE2_UCHAR32 *)a)
1147
1148 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
1149 if (test_mode == PCRE8_MODE) \
1150 a = pcre2_dfa_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h,i,j); \
1151 else if (test_mode == PCRE16_MODE) \
1152 a = pcre2_dfa_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h,i,j); \
1153 else \
1154 a = pcre2_dfa_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h,i,j)
1155
1156 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
1157 if (test_mode == PCRE8_MODE) \
1158 r = pcre2_get_error_message_8(a,G(b,8),G(G(b,8),_size)); \
1159 else if (test_mode == PCRE16_MODE) \
1160 r = pcre2_get_error_message_16(a,G(b,16),G(G(b,16),_size/2)); \
1161 else \
1162 r = pcre2_get_error_message_32(a,G(b,32),G(G(b,32),_size/4))
1163
1164 #define PCRE2_GET_OVECTOR_COUNT(a,b) \
1165 if (test_mode == PCRE8_MODE) \
1166 a = pcre2_get_ovector_count_8(G(b,8)); \
1167 else if (test_mode == PCRE16_MODE) \
1168 a = pcre2_get_ovector_count_16(G(b,16)); \
1169 else \
1170 a = pcre2_get_ovector_count_32(G(b,32))
1171
1172 #define PCRE2_GET_STARTCHAR(a,b) \
1173 if (test_mode == PCRE8_MODE) \
1174 a = pcre2_get_startchar_8(G(b,8)); \
1175 else if (test_mode == PCRE16_MODE) \
1176 a = pcre2_get_startchar_16(G(b,16)); \
1177 else \
1178 a = pcre2_get_startchar_32(G(b,32))
1179
1180 #define PCRE2_JIT_COMPILE(r,a,b) \
1181 if (test_mode == PCRE8_MODE) r = pcre2_jit_compile_8(G(a,8),b); \
1182 else if (test_mode == PCRE16_MODE) r = pcre2_jit_compile_16(G(a,16),b); \
1183 else r = pcre2_jit_compile_32(G(a,32),b)
1184
1185 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) \
1186 if (test_mode == PCRE8_MODE) pcre2_jit_free_unused_memory_8(G(a,8)); \
1187 else if (test_mode == PCRE16_MODE) pcre2_jit_free_unused_memory_16(G(a,16)); \
1188 else pcre2_jit_free_unused_memory_32(G(a,32))
1189
1190 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
1191 if (test_mode == PCRE8_MODE) \
1192 a = pcre2_jit_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h); \
1193 else if (test_mode == PCRE16_MODE) \
1194 a = pcre2_jit_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h); \
1195 else \
1196 a = pcre2_jit_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
1197
1198 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
1199 if (test_mode == PCRE8_MODE) \
1200 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_8(b,c,d); \
1201 else if (test_mode == PCRE16_MODE) \
1202 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_16(b,c,d); \
1203 else \
1204 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_32(b,c,d);
1205
1206 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
1207 if (test_mode == PCRE8_MODE) \
1208 pcre2_jit_stack_assign_8(G(a,8),(pcre2_jit_callback_8)b,c); \
1209 else if (test_mode == PCRE16_MODE) \
1210 pcre2_jit_stack_assign_16(G(a,16),(pcre2_jit_callback_16)b,c); \
1211 else \
1212 pcre2_jit_stack_assign_32(G(a,32),(pcre2_jit_callback_32)b,c);
1213
1214 #define PCRE2_JIT_STACK_FREE(a) \
1215 if (test_mode == PCRE8_MODE) \
1216 pcre2_jit_stack_free_8((pcre2_jit_stack_8 *)a); \
1217 else if (test_mode == PCRE16_MODE) \
1218 pcre2_jit_stack_free_16((pcre2_jit_stack_16 *)a); \
1219 else \
1220 pcre2_jit_stack_free_32((pcre2_jit_stack_32 *)a);
1221
1222 #define PCRE2_MAKETABLES(a) \
1223 if (test_mode == PCRE8_MODE) a = pcre2_maketables_8(NULL); \
1224 else if (test_mode == PCRE16_MODE) a = pcre2_maketables_16(NULL); \
1225 else a = pcre2_maketables_32(NULL)
1226
1227 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
1228 if (test_mode == PCRE8_MODE) \
1229 a = pcre2_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h); \
1230 else if (test_mode == PCRE16_MODE) \
1231 a = pcre2_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h); \
1232 else \
1233 a = pcre2_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
1234
1235 #define PCRE2_MATCH_DATA_CREATE(a,b,c) \
1236 if (test_mode == PCRE8_MODE) \
1237 G(a,8) = pcre2_match_data_create_8(b,c); \
1238 else if (test_mode == PCRE16_MODE) \
1239 G(a,16) = pcre2_match_data_create_16(b,c); \
1240 else \
1241 G(a,32) = pcre2_match_data_create_32(b,c)
1242
1243 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
1244 if (test_mode == PCRE8_MODE) \
1245 G(a,8) = pcre2_match_data_create_from_pattern_8(G(b,8),c); \
1246 else if (test_mode == PCRE16_MODE) \
1247 G(a,16) = pcre2_match_data_create_from_pattern_16(G(b,16),c); \
1248 else \
1249 G(a,32) = pcre2_match_data_create_from_pattern_32(G(b,32),c)
1250
1251 #define PCRE2_MATCH_DATA_FREE(a) \
1252 if (test_mode == PCRE8_MODE) \
1253 pcre2_match_data_free_8(G(a,8)); \
1254 else if (test_mode == PCRE16_MODE) \
1255 pcre2_match_data_free_16(G(a,16)); \
1256 else \
1257 pcre2_match_data_free_32(G(a,32))
1258
1259 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) \
1260 if (test_mode == PCRE8_MODE) \
1261 a = pcre2_pattern_convert_8(G(b,8),c,d,(PCRE2_UCHAR8 **)e,f,G(g,8)); \
1262 else if (test_mode == PCRE16_MODE) \
1263 a = pcre2_pattern_convert_16(G(b,16),c,d,(PCRE2_UCHAR16 **)e,f,G(g,16)); \
1264 else \
1265 a = pcre2_pattern_convert_32(G(b,32),c,d,(PCRE2_UCHAR32 **)e,f,G(g,32))
1266
1267 #define PCRE2_PATTERN_INFO(a,b,c,d) \
1268 if (test_mode == PCRE8_MODE) \
1269 a = pcre2_pattern_info_8(G(b,8),c,d); \
1270 else if (test_mode == PCRE16_MODE) \
1271 a = pcre2_pattern_info_16(G(b,16),c,d); \
1272 else \
1273 a = pcre2_pattern_info_32(G(b,32),c,d)
1274
1275 #define PCRE2_PRINTINT(a) \
1276 if (test_mode == PCRE8_MODE) \
1277 pcre2_printint_8(compiled_code8,outfile,a); \
1278 else if (test_mode == PCRE16_MODE) \
1279 pcre2_printint_16(compiled_code16,outfile,a); \
1280 else \
1281 pcre2_printint_32(compiled_code32,outfile,a)
1282
1283 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
1284 if (test_mode == PCRE8_MODE) \
1285 r = pcre2_serialize_decode_8((pcre2_code_8 **)a,b,c,G(d,8)); \
1286 else if (test_mode == PCRE16_MODE) \
1287 r = pcre2_serialize_decode_16((pcre2_code_16 **)a,b,c,G(d,16)); \
1288 else \
1289 r = pcre2_serialize_decode_32((pcre2_code_32 **)a,b,c,G(d,32))
1290
1291 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
1292 if (test_mode == PCRE8_MODE) \
1293 r = pcre2_serialize_encode_8((const pcre2_code_8 **)a,b,c,d,G(e,8)); \
1294 else if (test_mode == PCRE16_MODE) \
1295 r = pcre2_serialize_encode_16((const pcre2_code_16 **)a,b,c,d,G(e,16)); \
1296 else \
1297 r = pcre2_serialize_encode_32((const pcre2_code_32 **)a,b,c,d,G(e,32))
1298
1299 #define PCRE2_SERIALIZE_FREE(a) \
1300 if (test_mode == PCRE8_MODE) \
1301 pcre2_serialize_free_8(a); \
1302 else if (test_mode == PCRE16_MODE) \
1303 pcre2_serialize_free_16(a); \
1304 else \
1305 pcre2_serialize_free_32(a)
1306
1307 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
1308 if (test_mode == PCRE8_MODE) \
1309 r = pcre2_serialize_get_number_of_codes_8(a); \
1310 else if (test_mode == PCRE16_MODE) \
1311 r = pcre2_serialize_get_number_of_codes_16(a); \
1312 else \
1313 r = pcre2_serialize_get_number_of_codes_32(a); \
1314
1315 #define PCRE2_SET_CALLOUT(a,b,c) \
1316 if (test_mode == PCRE8_MODE) \
1317 pcre2_set_callout_8(G(a,8),(int (*)(pcre2_callout_block_8 *, void *))b,c); \
1318 else if (test_mode == PCRE16_MODE) \
1319 pcre2_set_callout_16(G(a,16),(int (*)(pcre2_callout_block_16 *, void *))b,c); \
1320 else \
1321 pcre2_set_callout_32(G(a,32),(int (*)(pcre2_callout_block_32 *, void *))b,c);
1322
1323 #define PCRE2_SET_CHARACTER_TABLES(a,b) \
1324 if (test_mode == PCRE8_MODE) \
1325 pcre2_set_character_tables_8(G(a,8),b); \
1326 else if (test_mode == PCRE16_MODE) \
1327 pcre2_set_character_tables_16(G(a,16),b); \
1328 else \
1329 pcre2_set_character_tables_32(G(a,32),b)
1330
1331 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
1332 if (test_mode == PCRE8_MODE) \
1333 pcre2_set_compile_recursion_guard_8(G(a,8),b,c); \
1334 else if (test_mode == PCRE16_MODE) \
1335 pcre2_set_compile_recursion_guard_16(G(a,16),b,c); \
1336 else \
1337 pcre2_set_compile_recursion_guard_32(G(a,32),b,c)
1338
1339 #define PCRE2_SET_DEPTH_LIMIT(a,b) \
1340 if (test_mode == PCRE8_MODE) \
1341 pcre2_set_depth_limit_8(G(a,8),b); \
1342 else if (test_mode == PCRE16_MODE) \
1343 pcre2_set_depth_limit_16(G(a,16),b); \
1344 else \
1345 pcre2_set_depth_limit_32(G(a,32),b)
1346
1347 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) \
1348 if (test_mode == PCRE8_MODE) \
1349 r = pcre2_set_glob_separator_8(G(a,8),b); \
1350 else if (test_mode == PCRE16_MODE) \
1351 r = pcre2_set_glob_separator_16(G(a,16),b); \
1352 else \
1353 r = pcre2_set_glob_separator_32(G(a,32),b)
1354
1355 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) \
1356 if (test_mode == PCRE8_MODE) \
1357 r = pcre2_set_glob_escape_8(G(a,8),b); \
1358 else if (test_mode == PCRE16_MODE) \
1359 r = pcre2_set_glob_escape_16(G(a,16),b); \
1360 else \
1361 r = pcre2_set_glob_escape_32(G(a,32),b)
1362
1363 #define PCRE2_SET_HEAP_LIMIT(a,b) \
1364 if (test_mode == PCRE8_MODE) \
1365 pcre2_set_heap_limit_8(G(a,8),b); \
1366 else if (test_mode == PCRE16_MODE) \
1367 pcre2_set_heap_limit_16(G(a,16),b); \
1368 else \
1369 pcre2_set_heap_limit_32(G(a,32),b)
1370
1371 #define PCRE2_SET_MATCH_LIMIT(a,b) \
1372 if (test_mode == PCRE8_MODE) \
1373 pcre2_set_match_limit_8(G(a,8),b); \
1374 else if (test_mode == PCRE16_MODE) \
1375 pcre2_set_match_limit_16(G(a,16),b); \
1376 else \
1377 pcre2_set_match_limit_32(G(a,32),b)
1378
1379 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) \
1380 if (test_mode == PCRE8_MODE) \
1381 pcre2_set_max_pattern_length_8(G(a,8),b); \
1382 else if (test_mode == PCRE16_MODE) \
1383 pcre2_set_max_pattern_length_16(G(a,16),b); \
1384 else \
1385 pcre2_set_max_pattern_length_32(G(a,32),b)
1386
1387 #define PCRE2_SET_OFFSET_LIMIT(a,b) \
1388 if (test_mode == PCRE8_MODE) \
1389 pcre2_set_offset_limit_8(G(a,8),b); \
1390 else if (test_mode == PCRE16_MODE) \
1391 pcre2_set_offset_limit_16(G(a,16),b); \
1392 else \
1393 pcre2_set_offset_limit_32(G(a,32),b)
1394
1395 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) \
1396 if (test_mode == PCRE8_MODE) \
1397 pcre2_set_parens_nest_limit_8(G(a,8),b); \
1398 else if (test_mode == PCRE16_MODE) \
1399 pcre2_set_parens_nest_limit_16(G(a,16),b); \
1400 else \
1401 pcre2_set_parens_nest_limit_32(G(a,32),b)
1402
1403 #define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
1404 if (test_mode == PCRE8_MODE) \
1405 pcre2_set_substitute_callout_8(G(a,8), \
1406 (int (*)(pcre2_substitute_callout_block_8 *, void *))b,c); \
1407 else if (test_mode == PCRE16_MODE) \
1408 pcre2_set_substitute_callout_16(G(a,16), \
1409 (int (*)(pcre2_substitute_callout_block_16 *, void *))b,c); \
1410 else \
1411 pcre2_set_substitute_callout_32(G(a,32), \
1412 (int (*)(pcre2_substitute_callout_block_32 *, void *))b,c)
1413
1414 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
1415 if (test_mode == PCRE8_MODE) \
1416 a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h, \
1417 (PCRE2_SPTR8)i,j,(PCRE2_UCHAR8 *)k,l); \
1418 else if (test_mode == PCRE16_MODE) \
1419 a = pcre2_substitute_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h, \
1420 (PCRE2_SPTR16)i,j,(PCRE2_UCHAR16 *)k,l); \
1421 else \
1422 a = pcre2_substitute_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h, \
1423 (PCRE2_SPTR32)i,j,(PCRE2_UCHAR32 *)k,l)
1424
1425 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
1426 if (test_mode == PCRE8_MODE) \
1427 a = pcre2_substring_copy_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 *)d,e); \
1428 else if (test_mode == PCRE16_MODE) \
1429 a = pcre2_substring_copy_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 *)d,e); \
1430 else \
1431 a = pcre2_substring_copy_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 *)d,e)
1432
1433 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
1434 if (test_mode == PCRE8_MODE) \
1435 a = pcre2_substring_copy_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 *)d,e); \
1436 else if (test_mode == PCRE16_MODE) \
1437 a = pcre2_substring_copy_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 *)d,e); \
1438 else \
1439 a = pcre2_substring_copy_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 *)d,e)
1440
1441 #define PCRE2_SUBSTRING_FREE(a) \
1442 if (test_mode == PCRE8_MODE) pcre2_substring_free_8((PCRE2_UCHAR8 *)a); \
1443 else if (test_mode == PCRE16_MODE) \
1444 pcre2_substring_free_16((PCRE2_UCHAR16 *)a); \
1445 else pcre2_substring_free_32((PCRE2_UCHAR32 *)a)
1446
1447 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
1448 if (test_mode == PCRE8_MODE) \
1449 a = pcre2_substring_get_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 **)d,e); \
1450 else if (test_mode == PCRE16_MODE) \
1451 a = pcre2_substring_get_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 **)d,e); \
1452 else \
1453 a = pcre2_substring_get_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 **)d,e)
1454
1455 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
1456 if (test_mode == PCRE8_MODE) \
1457 a = pcre2_substring_get_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 **)d,e); \
1458 else if (test_mode == PCRE16_MODE) \
1459 a = pcre2_substring_get_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 **)d,e); \
1460 else \
1461 a = pcre2_substring_get_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 **)d,e)
1462
1463 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
1464 if (test_mode == PCRE8_MODE) \
1465 a = pcre2_substring_length_byname_8(G(b,8),G(c,8),d); \
1466 else if (test_mode == PCRE16_MODE) \
1467 a = pcre2_substring_length_byname_16(G(b,16),G(c,16),d); \
1468 else \
1469 a = pcre2_substring_length_byname_32(G(b,32),G(c,32),d)
1470
1471 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
1472 if (test_mode == PCRE8_MODE) \
1473 a = pcre2_substring_length_bynumber_8(G(b,8),c,d); \
1474 else if (test_mode == PCRE16_MODE) \
1475 a = pcre2_substring_length_bynumber_16(G(b,16),c,d); \
1476 else \
1477 a = pcre2_substring_length_bynumber_32(G(b,32),c,d)
1478
1479 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
1480 if (test_mode == PCRE8_MODE) \
1481 a = pcre2_substring_list_get_8(G(b,8),(PCRE2_UCHAR8 ***)c,d); \
1482 else if (test_mode == PCRE16_MODE) \
1483 a = pcre2_substring_list_get_16(G(b,16),(PCRE2_UCHAR16 ***)c,d); \
1484 else \
1485 a = pcre2_substring_list_get_32(G(b,32),(PCRE2_UCHAR32 ***)c,d)
1486
1487 #define PCRE2_SUBSTRING_LIST_FREE(a) \
1488 if (test_mode == PCRE8_MODE) \
1489 pcre2_substring_list_free_8((PCRE2_SPTR8 *)a); \
1490 else if (test_mode == PCRE16_MODE) \
1491 pcre2_substring_list_free_16((PCRE2_SPTR16 *)a); \
1492 else \
1493 pcre2_substring_list_free_32((PCRE2_SPTR32 *)a)
1494
1495 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
1496 if (test_mode == PCRE8_MODE) \
1497 a = pcre2_substring_number_from_name_8(G(b,8),G(c,8)); \
1498 else if (test_mode == PCRE16_MODE) \
1499 a = pcre2_substring_number_from_name_16(G(b,16),G(c,16)); \
1500 else \
1501 a = pcre2_substring_number_from_name_32(G(b,32),G(c,32))
1502
1503 #define PTR(x) ( \
1504 (test_mode == PCRE8_MODE)? (void *)G(x,8) : \
1505 (test_mode == PCRE16_MODE)? (void *)G(x,16) : \
1506 (void *)G(x,32))
1507
1508 #define SETFLD(x,y,z) \
1509 if (test_mode == PCRE8_MODE) G(x,8)->y = z; \
1510 else if (test_mode == PCRE16_MODE) G(x,16)->y = z; \
1511 else G(x,32)->y = z
1512
1513 #define SETFLDVEC(x,y,v,z) \
1514 if (test_mode == PCRE8_MODE) G(x,8)->y[v] = z; \
1515 else if (test_mode == PCRE16_MODE) G(x,16)->y[v] = z; \
1516 else G(x,32)->y[v] = z
1517
1518 #define SETOP(x,y,z) \
1519 if (test_mode == PCRE8_MODE) G(x,8) z y; \
1520 else if (test_mode == PCRE16_MODE) G(x,16) z y; \
1521 else G(x,32) z y
1522
1523 #define SETCASTPTR(x,y) \
1524 if (test_mode == PCRE8_MODE) \
1525 G(x,8) = (uint8_t *)(y); \
1526 else if (test_mode == PCRE16_MODE) \
1527 G(x,16) = (uint16_t *)(y); \
1528 else \
1529 G(x,32) = (uint32_t *)(y)
1530
1531 #define STRLEN(p) ((test_mode == PCRE8_MODE)? ((int)strlen((char *)p)) : \
1532 (test_mode == PCRE16_MODE)? ((int)strlen16((PCRE2_SPTR16)p)) : \
1533 ((int)strlen32((PCRE2_SPTR32)p)))
1534
1535 #define SUB1(a,b) \
1536 if (test_mode == PCRE8_MODE) G(a,8)(G(b,8)); \
1537 else if (test_mode == PCRE16_MODE) G(a,16)(G(b,16)); \
1538 else G(a,32)(G(b,32))
1539
1540 #define SUB2(a,b,c) \
1541 if (test_mode == PCRE8_MODE) G(a,8)(G(b,8),G(c,8)); \
1542 else if (test_mode == PCRE16_MODE) G(a,16)(G(b,16),G(c,16)); \
1543 else G(a,32)(G(b,32),G(c,32))
1544
1545 #define TEST(x,r,y) ( \
1546 (test_mode == PCRE8_MODE && G(x,8) r (y)) || \
1547 (test_mode == PCRE16_MODE && G(x,16) r (y)) || \
1548 (test_mode == PCRE32_MODE && G(x,32) r (y)))
1549
1550 #define TESTFLD(x,f,r,y) ( \
1551 (test_mode == PCRE8_MODE && G(x,8)->f r (y)) || \
1552 (test_mode == PCRE16_MODE && G(x,16)->f r (y)) || \
1553 (test_mode == PCRE32_MODE && G(x,32)->f r (y)))
1554
1555
1556 /* ----- Two out of three modes are supported ----- */
1557
1558 #else
1559
1560 /* We can use some macro trickery to make a single set of definitions work in
1561 the three different cases. */
1562
1563 /* ----- 32-bit and 16-bit but not 8-bit supported ----- */
1564
1565 #if defined(SUPPORT_PCRE2_32) && defined(SUPPORT_PCRE2_16)
1566 #define BITONE 32
1567 #define BITTWO 16
1568
1569 /* ----- 32-bit and 8-bit but not 16-bit supported ----- */
1570
1571 #elif defined(SUPPORT_PCRE2_32) && defined(SUPPORT_PCRE2_8)
1572 #define BITONE 32
1573 #define BITTWO 8
1574
1575 /* ----- 16-bit and 8-bit but not 32-bit supported ----- */
1576
1577 #else
1578 #define BITONE 16
1579 #define BITTWO 8
1580 #endif
1581
1582
1583 /* ----- Common macros for two-mode cases ----- */
1584
1585 #define BYTEONE (BITONE/8)
1586 #define BYTETWO (BITTWO/8)
1587
1588 #define CASTFLD(t,a,b) \
1589 ((test_mode == G(G(PCRE,BITONE),_MODE))? (t)(G(a,BITONE)->b) : \
1590 (t)(G(a,BITTWO)->b))
1591
1592 #define CASTVAR(t,x) ( \
1593 (test_mode == G(G(PCRE,BITONE),_MODE))? \
1594 (t)G(x,BITONE) : (t)G(x,BITTWO))
1595
1596 #define CODE_UNIT(a,b) ( \
1597 (test_mode == G(G(PCRE,BITONE),_MODE))? \
1598 (uint32_t)(((G(PCRE2_SPTR,BITONE))(a))[b]) : \
1599 (uint32_t)(((G(PCRE2_SPTR,BITTWO))(a))[b]))
1600
1601 #define CONCTXCPY(a,b) \
1602 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1603 memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_convert_context_,BITONE))); \
1604 else \
1605 memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_convert_context_,BITTWO)))
1606
1607 #define CONVERT_COPY(a,b,c) \
1608 (test_mode == G(G(PCRE,BITONE),_MODE))? \
1609 memcpy(G(a,BITONE),(char *)b,(c)*BYTEONE) : \
1610 memcpy(G(a,BITTWO),(char *)b,(c)*BYTETWO)
1611
1612 #define DATCTXCPY(a,b) \
1613 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1614 memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_match_context_,BITONE))); \
1615 else \
1616 memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_match_context_,BITTWO)))
1617
1618 #define FLD(a,b) \
1619 ((test_mode == G(G(PCRE,BITONE),_MODE))? G(a,BITONE)->b : G(a,BITTWO)->b)
1620
1621 #define PATCTXCPY(a,b) \
1622 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1623 memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_compile_context_,BITONE))); \
1624 else \
1625 memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_compile_context_,BITTWO)))
1626
1627 #define PCHARS(lv, p, offset, len, utf, f) \
1628 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1629 lv = G(pchars,BITONE)((G(PCRE2_SPTR,BITONE))(p)+offset, len, utf, f); \
1630 else \
1631 lv = G(pchars,BITTWO)((G(PCRE2_SPTR,BITTWO))(p)+offset, len, utf, f)
1632
1633 #define PCHARSV(p, offset, len, utf, f) \
1634 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1635 (void)G(pchars,BITONE)((G(PCRE2_SPTR,BITONE))(p)+offset, len, utf, f); \
1636 else \
1637 (void)G(pchars,BITTWO)((G(PCRE2_SPTR,BITTWO))(p)+offset, len, utf, f)
1638
1639 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
1640 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1641 a = G(pcre2_callout_enumerate,BITONE)(G(compiled_code,BITONE), \
1642 (int (*)(struct G(pcre2_callout_enumerate_block_,BITONE) *, void *))b,c); \
1643 else \
1644 a = G(pcre2_callout_enumerate,BITTWO)(G(compiled_code,BITTWO), \
1645 (int (*)(struct G(pcre2_callout_enumerate_block_,BITTWO) *, void *))b,c)
1646
1647 #define PCRE2_CODE_COPY_FROM_VOID(a,b) \
1648 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1649 G(a,BITONE) = G(pcre2_code_copy_,BITONE)(b); \
1650 else \
1651 G(a,BITTWO) = G(pcre2_code_copy_,BITTWO)(b)
1652
1653 #define PCRE2_CODE_COPY_TO_VOID(a,b) \
1654 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1655 a = (void *)G(pcre2_code_copy_,BITONE)(G(b,BITONE)); \
1656 else \
1657 a = (void *)G(pcre2_code_copy_,BITTWO)(G(b,BITTWO))
1658
1659 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) \
1660 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1661 a = (void *)G(pcre2_code_copy_with_tables_,BITONE)(G(b,BITONE)); \
1662 else \
1663 a = (void *)G(pcre2_code_copy_with_tables_,BITTWO)(G(b,BITTWO))
1664
1665 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
1666 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1667 G(a,BITONE) = G(pcre2_compile_,BITONE)(G(b,BITONE),c,d,e,f,g); \
1668 else \
1669 G(a,BITTWO) = G(pcre2_compile_,BITTWO)(G(b,BITTWO),c,d,e,f,g)
1670
1671 #define PCRE2_CONVERTED_PATTERN_FREE(a) \
1672 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1673 G(pcre2_converted_pattern_free_,BITONE)((G(PCRE2_UCHAR,BITONE) *)a); \
1674 else \
1675 G(pcre2_converted_pattern_free_,BITTWO)((G(PCRE2_UCHAR,BITTWO) *)a)
1676
1677 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
1678 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1679 a = G(pcre2_dfa_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1680 G(g,BITONE),h,i,j); \
1681 else \
1682 a = G(pcre2_dfa_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1683 G(g,BITTWO),h,i,j)
1684
1685 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
1686 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1687 r = G(pcre2_get_error_message_,BITONE)(a,G(b,BITONE),G(G(b,BITONE),_size/BYTEONE)); \
1688 else \
1689 r = G(pcre2_get_error_message_,BITTWO)(a,G(b,BITTWO),G(G(b,BITTWO),_size/BYTETWO))
1690
1691 #define PCRE2_GET_OVECTOR_COUNT(a,b) \
1692 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1693 a = G(pcre2_get_ovector_count_,BITONE)(G(b,BITONE)); \
1694 else \
1695 a = G(pcre2_get_ovector_count_,BITTWO)(G(b,BITTWO))
1696
1697 #define PCRE2_GET_STARTCHAR(a,b) \
1698 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1699 a = G(pcre2_get_startchar_,BITONE)(G(b,BITONE)); \
1700 else \
1701 a = G(pcre2_get_startchar_,BITTWO)(G(b,BITTWO))
1702
1703 #define PCRE2_JIT_COMPILE(r,a,b) \
1704 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1705 r = G(pcre2_jit_compile_,BITONE)(G(a,BITONE),b); \
1706 else \
1707 r = G(pcre2_jit_compile_,BITTWO)(G(a,BITTWO),b)
1708
1709 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) \
1710 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1711 G(pcre2_jit_free_unused_memory_,BITONE)(G(a,BITONE)); \
1712 else \
1713 G(pcre2_jit_free_unused_memory_,BITTWO)(G(a,BITTWO))
1714
1715 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
1716 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1717 a = G(pcre2_jit_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1718 G(g,BITONE),h); \
1719 else \
1720 a = G(pcre2_jit_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1721 G(g,BITTWO),h)
1722
1723 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
1724 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1725 a = (PCRE2_JIT_STACK *)G(pcre2_jit_stack_create_,BITONE)(b,c,d); \
1726 else \
1727 a = (PCRE2_JIT_STACK *)G(pcre2_jit_stack_create_,BITTWO)(b,c,d); \
1728
1729 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
1730 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1731 G(pcre2_jit_stack_assign_,BITONE)(G(a,BITONE),(G(pcre2_jit_callback_,BITONE))b,c); \
1732 else \
1733 G(pcre2_jit_stack_assign_,BITTWO)(G(a,BITTWO),(G(pcre2_jit_callback_,BITTWO))b,c);
1734
1735 #define PCRE2_JIT_STACK_FREE(a) \
1736 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1737 G(pcre2_jit_stack_free_,BITONE)((G(pcre2_jit_stack_,BITONE) *)a); \
1738 else \
1739 G(pcre2_jit_stack_free_,BITTWO)((G(pcre2_jit_stack_,BITTWO) *)a);
1740
1741 #define PCRE2_MAKETABLES(a) \
1742 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1743 a = G(pcre2_maketables_,BITONE)(NULL); \
1744 else \
1745 a = G(pcre2_maketables_,BITTWO)(NULL)
1746
1747 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
1748 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1749 a = G(pcre2_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1750 G(g,BITONE),h); \
1751 else \
1752 a = G(pcre2_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1753 G(g,BITTWO),h)
1754
1755 #define PCRE2_MATCH_DATA_CREATE(a,b,c) \
1756 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1757 G(a,BITONE) = G(pcre2_match_data_create_,BITONE)(b,c); \
1758 else \
1759 G(a,BITTWO) = G(pcre2_match_data_create_,BITTWO)(b,c)
1760
1761 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
1762 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1763 G(a,BITONE) = G(pcre2_match_data_create_from_pattern_,BITONE)(G(b,BITONE),c); \
1764 else \
1765 G(a,BITTWO) = G(pcre2_match_data_create_from_pattern_,BITTWO)(G(b,BITTWO),c)
1766
1767 #define PCRE2_MATCH_DATA_FREE(a) \
1768 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1769 G(pcre2_match_data_free_,BITONE)(G(a,BITONE)); \
1770 else \
1771 G(pcre2_match_data_free_,BITTWO)(G(a,BITTWO))
1772
1773 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) \
1774 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1775 a = G(pcre2_pattern_convert_,BITONE)(G(b,BITONE),c,d,(G(PCRE2_UCHAR,BITONE) **)e,f,G(g,BITONE)); \
1776 else \
1777 a = G(pcre2_pattern_convert_,BITTWO)(G(b,BITTWO),c,d,(G(PCRE2_UCHAR,BITTWO) **)e,f,G(g,BITTWO))
1778
1779 #define PCRE2_PATTERN_INFO(a,b,c,d) \
1780 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1781 a = G(pcre2_pattern_info_,BITONE)(G(b,BITONE),c,d); \
1782 else \
1783 a = G(pcre2_pattern_info_,BITTWO)(G(b,BITTWO),c,d)
1784
1785 #define PCRE2_PRINTINT(a) \
1786 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1787 G(pcre2_printint_,BITONE)(G(compiled_code,BITONE),outfile,a); \
1788 else \
1789 G(pcre2_printint_,BITTWO)(G(compiled_code,BITTWO),outfile,a)
1790
1791 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
1792 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1793 r = G(pcre2_serialize_decode_,BITONE)((G(pcre2_code_,BITONE) **)a,b,c,G(d,BITONE)); \
1794 else \
1795 r = G(pcre2_serialize_decode_,BITTWO)((G(pcre2_code_,BITTWO) **)a,b,c,G(d,BITTWO))
1796
1797 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
1798 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1799 r = G(pcre2_serialize_encode_,BITONE)((G(const pcre2_code_,BITONE) **)a,b,c,d,G(e,BITONE)); \
1800 else \
1801 r = G(pcre2_serialize_encode_,BITTWO)((G(const pcre2_code_,BITTWO) **)a,b,c,d,G(e,BITTWO))
1802
1803 #define PCRE2_SERIALIZE_FREE(a) \
1804 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1805 G(pcre2_serialize_free_,BITONE)(a); \
1806 else \
1807 G(pcre2_serialize_free_,BITTWO)(a)
1808
1809 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
1810 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1811 r = G(pcre2_serialize_get_number_of_codes_,BITONE)(a); \
1812 else \
1813 r = G(pcre2_serialize_get_number_of_codes_,BITTWO)(a)
1814
1815 #define PCRE2_SET_CALLOUT(a,b,c) \
1816 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1817 G(pcre2_set_callout_,BITONE)(G(a,BITONE), \
1818 (int (*)(G(pcre2_callout_block_,BITONE) *, void *))b,c); \
1819 else \
1820 G(pcre2_set_callout_,BITTWO)(G(a,BITTWO), \
1821 (int (*)(G(pcre2_callout_block_,BITTWO) *, void *))b,c);
1822
1823 #define PCRE2_SET_CHARACTER_TABLES(a,b) \
1824 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1825 G(pcre2_set_character_tables_,BITONE)(G(a,BITONE),b); \
1826 else \
1827 G(pcre2_set_character_tables_,BITTWO)(G(a,BITTWO),b)
1828
1829 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
1830 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1831 G(pcre2_set_compile_recursion_guard_,BITONE)(G(a,BITONE),b,c); \
1832 else \
1833 G(pcre2_set_compile_recursion_guard_,BITTWO)(G(a,BITTWO),b,c)
1834
1835 #define PCRE2_SET_DEPTH_LIMIT(a,b) \
1836 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1837 G(pcre2_set_depth_limit_,BITONE)(G(a,BITONE),b); \
1838 else \
1839 G(pcre2_set_depth_limit_,BITTWO)(G(a,BITTWO),b)
1840
1841 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) \
1842 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1843 r = G(pcre2_set_glob_escape_,BITONE)(G(a,BITONE),b); \
1844 else \
1845 r = G(pcre2_set_glob_escape_,BITTWO)(G(a,BITTWO),b)
1846
1847 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) \
1848 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1849 r = G(pcre2_set_glob_separator_,BITONE)(G(a,BITONE),b); \
1850 else \
1851 r = G(pcre2_set_glob_separator_,BITTWO)(G(a,BITTWO),b)
1852
1853 #define PCRE2_SET_HEAP_LIMIT(a,b) \
1854 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1855 G(pcre2_set_heap_limit_,BITONE)(G(a,BITONE),b); \
1856 else \
1857 G(pcre2_set_heap_limit_,BITTWO)(G(a,BITTWO),b)
1858
1859 #define PCRE2_SET_MATCH_LIMIT(a,b) \
1860 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1861 G(pcre2_set_match_limit_,BITONE)(G(a,BITONE),b); \
1862 else \
1863 G(pcre2_set_match_limit_,BITTWO)(G(a,BITTWO),b)
1864
1865 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) \
1866 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1867 G(pcre2_set_max_pattern_length_,BITONE)(G(a,BITONE),b); \
1868 else \
1869 G(pcre2_set_max_pattern_length_,BITTWO)(G(a,BITTWO),b)
1870
1871 #define PCRE2_SET_OFFSET_LIMIT(a,b) \
1872 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1873 G(pcre2_set_offset_limit_,BITONE)(G(a,BITONE),b); \
1874 else \
1875 G(pcre2_set_offset_limit_,BITTWO)(G(a,BITTWO),b)
1876
1877 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) \
1878 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1879 G(pcre2_set_parens_nest_limit_,BITONE)(G(a,BITONE),b); \
1880 else \
1881 G(pcre2_set_parens_nest_limit_,BITTWO)(G(a,BITTWO),b)
1882
1883 #define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
1884 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1885 G(pcre2_set_substitute_callout_,BITONE)(G(a,BITONE), \
1886 (int (*)(G(pcre2_substitute_callout_block_,BITONE) *, void *))b,c); \
1887 else \
1888 G(pcre2_set_substitute_callout_,BITTWO)(G(a,BITTWO), \
1889 (int (*)(G(pcre2_substitute_callout_block_,BITTWO) *, void *))b,c)
1890
1891 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
1892 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1893 a = G(pcre2_substitute_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1894 G(g,BITONE),h,(G(PCRE2_SPTR,BITONE))i,j, \
1895 (G(PCRE2_UCHAR,BITONE) *)k,l); \
1896 else \
1897 a = G(pcre2_substitute_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1898 G(g,BITTWO),h,(G(PCRE2_SPTR,BITTWO))i,j, \
1899 (G(PCRE2_UCHAR,BITTWO) *)k,l)
1900
1901 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
1902 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1903 a = G(pcre2_substring_copy_byname_,BITONE)(G(b,BITONE),G(c,BITONE),\
1904 (G(PCRE2_UCHAR,BITONE) *)d,e); \
1905 else \
1906 a = G(pcre2_substring_copy_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),\
1907 (G(PCRE2_UCHAR,BITTWO) *)d,e)
1908
1909 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
1910 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1911 a = G(pcre2_substring_copy_bynumber_,BITONE)(G(b,BITONE),c,\
1912 (G(PCRE2_UCHAR,BITONE) *)d,e); \
1913 else \
1914 a = G(pcre2_substring_copy_bynumber_,BITTWO)(G(b,BITTWO),c,\
1915 (G(PCRE2_UCHAR,BITTWO) *)d,e)
1916
1917 #define PCRE2_SUBSTRING_FREE(a) \
1918 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1919 G(pcre2_substring_free_,BITONE)((G(PCRE2_UCHAR,BITONE) *)a); \
1920 else G(pcre2_substring_free_,BITTWO)((G(PCRE2_UCHAR,BITTWO) *)a)
1921
1922 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
1923 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1924 a = G(pcre2_substring_get_byname_,BITONE)(G(b,BITONE),G(c,BITONE),\
1925 (G(PCRE2_UCHAR,BITONE) **)d,e); \
1926 else \
1927 a = G(pcre2_substring_get_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),\
1928 (G(PCRE2_UCHAR,BITTWO) **)d,e)
1929
1930 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
1931 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1932 a = G(pcre2_substring_get_bynumber_,BITONE)(G(b,BITONE),c,\
1933 (G(PCRE2_UCHAR,BITONE) **)d,e); \
1934 else \
1935 a = G(pcre2_substring_get_bynumber_,BITTWO)(G(b,BITTWO),c,\
1936 (G(PCRE2_UCHAR,BITTWO) **)d,e)
1937
1938 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
1939 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1940 a = G(pcre2_substring_length_byname_,BITONE)(G(b,BITONE),G(c,BITONE),d); \
1941 else \
1942 a = G(pcre2_substring_length_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),d)
1943
1944 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
1945 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1946 a = G(pcre2_substring_length_bynumber_,BITONE)(G(b,BITONE),c,d); \
1947 else \
1948 a = G(pcre2_substring_length_bynumber_,BITTWO)(G(b,BITTWO),c,d)
1949
1950 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
1951 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1952 a = G(pcre2_substring_list_get_,BITONE)(G(b,BITONE), \
1953 (G(PCRE2_UCHAR,BITONE) ***)c,d); \
1954 else \
1955 a = G(pcre2_substring_list_get_,BITTWO)(G(b,BITTWO), \
1956 (G(PCRE2_UCHAR,BITTWO) ***)c,d)
1957
1958 #define PCRE2_SUBSTRING_LIST_FREE(a) \
1959 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1960 G(pcre2_substring_list_free_,BITONE)((G(PCRE2_SPTR,BITONE) *)a); \
1961 else \
1962 G(pcre2_substring_list_free_,BITTWO)((G(PCRE2_SPTR,BITTWO) *)a)
1963
1964 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
1965 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1966 a = G(pcre2_substring_number_from_name_,BITONE)(G(b,BITONE),G(c,BITONE)); \
1967 else \
1968 a = G(pcre2_substring_number_from_name_,BITTWO)(G(b,BITTWO),G(c,BITTWO))
1969
1970 #define PTR(x) ( \
1971 (test_mode == G(G(PCRE,BITONE),_MODE))? (void *)G(x,BITONE) : \
1972 (void *)G(x,BITTWO))
1973
1974 #define SETFLD(x,y,z) \
1975 if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE)->y = z; \
1976 else G(x,BITTWO)->y = z
1977
1978 #define SETFLDVEC(x,y,v,z) \
1979 if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE)->y[v] = z; \
1980 else G(x,BITTWO)->y[v] = z
1981
1982 #define SETOP(x,y,z) \
1983 if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE) z y; \
1984 else G(x,BITTWO) z y
1985
1986 #define SETCASTPTR(x,y) \
1987 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1988 G(x,BITONE) = (G(G(uint,BITONE),_t) *)(y); \
1989 else \
1990 G(x,BITTWO) = (G(G(uint,BITTWO),_t) *)(y)
1991
1992 #define STRLEN(p) ((test_mode == G(G(PCRE,BITONE),_MODE))? \
1993 G(strlen,BITONE)((G(PCRE2_SPTR,BITONE))p) : \
1994 G(strlen,BITTWO)((G(PCRE2_SPTR,BITTWO))p))
1995
1996 #define SUB1(a,b) \
1997 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1998 G(a,BITONE)(G(b,BITONE)); \
1999 else \
2000 G(a,BITTWO)(G(b,BITTWO))
2001
2002 #define SUB2(a,b,c) \
2003 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
2004 G(a,BITONE))(G(b,BITONE),G(c,BITONE)); \
2005 else \
2006 G(a,BITTWO))(G(b,BITTWO),G(c,BITTWO))
2007
2008 #define TEST(x,r,y) ( \
2009 (test_mode == G(G(PCRE,BITONE),_MODE) && G(x,BITONE) r (y)) || \
2010 (test_mode == G(G(PCRE,BITTWO),_MODE) && G(x,BITTWO) r (y)))
2011
2012 #define TESTFLD(x,f,r,y) ( \
2013 (test_mode == G(G(PCRE,BITONE),_MODE) && G(x,BITONE)->f r (y)) || \
2014 (test_mode == G(G(PCRE,BITTWO),_MODE) && G(x,BITTWO)->f r (y)))
2015
2016
2017 #endif /* Two out of three modes */
2018
2019 /* ----- End of cases where more than one mode is supported ----- */
2020
2021
2022 /* ----- Only 8-bit mode is supported ----- */
2023
2024 #elif defined SUPPORT_PCRE2_8
2025 #define CASTFLD(t,a,b) (t)(G(a,8)->b)
2026 #define CASTVAR(t,x) (t)G(x,8)
2027 #define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR8)(a))[b])
2028 #define CONCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_convert_context_8))
2029 #define CONVERT_COPY(a,b,c) memcpy(G(a,8),(char *)b, c)
2030 #define DATCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_match_context_8))
2031 #define FLD(a,b) G(a,8)->b
2032 #define PATCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_compile_context_8))
2033 #define PCHARS(lv, p, offset, len, utf, f) \
2034 lv = pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
2035 #define PCHARSV(p, offset, len, utf, f) \
2036 (void)pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
2037 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
2038 a = pcre2_callout_enumerate_8(compiled_code8, \
2039 (int (*)(struct pcre2_callout_enumerate_block_8 *, void *))b,c)
2040 #define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,8) = pcre2_code_copy_8(b)
2041 #define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_8(G(b,8))
2042 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_8(G(b,8))
2043 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
2044 G(a,8) = pcre2_compile_8(G(b,8),c,d,e,f,g)
2045 #define PCRE2_CONVERTED_PATTERN_FREE(a) \
2046 pcre2_converted_pattern_free_8((PCRE2_UCHAR8 *)a)
2047 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
2048 a = pcre2_dfa_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h,i,j)
2049 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
2050 r = pcre2_get_error_message_8(a,G(b,8),G(G(b,8),_size))
2051 #define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_8(G(b,8))
2052 #define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_8(G(b,8))
2053 #define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_8(G(a,8),b)
2054 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_8(G(a,8))
2055 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
2056 a = pcre2_jit_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h)
2057 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
2058 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_8(b,c,d);
2059 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
2060 pcre2_jit_stack_assign_8(G(a,8),(pcre2_jit_callback_8)b,c);
2061 #define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_8((pcre2_jit_stack_8 *)a);
2062 #define PCRE2_MAKETABLES(a) a = pcre2_maketables_8(NULL)
2063 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
2064 a = pcre2_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h)
2065 #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,8) = pcre2_match_data_create_8(b,c)
2066 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
2067 G(a,8) = pcre2_match_data_create_from_pattern_8(G(b,8),c)
2068 #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_8(G(a,8))
2069 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) a = pcre2_pattern_convert_8(G(b,8),c,d,(PCRE2_UCHAR8 **)e,f,G(g,8))
2070 #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_8(G(b,8),c,d)
2071 #define PCRE2_PRINTINT(a) pcre2_printint_8(compiled_code8,outfile,a)
2072 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
2073 r = pcre2_serialize_decode_8((pcre2_code_8 **)a,b,c,G(d,8))
2074 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
2075 r = pcre2_serialize_encode_8((const pcre2_code_8 **)a,b,c,d,G(e,8))
2076 #define PCRE2_SERIALIZE_FREE(a) pcre2_serialize_free_8(a)
2077 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
2078 r = pcre2_serialize_get_number_of_codes_8(a)
2079 #define PCRE2_SET_CALLOUT(a,b,c) \
2080 pcre2_set_callout_8(G(a,8),(int (*)(pcre2_callout_block_8 *, void *))b,c)
2081 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_8(G(a,8),b)
2082 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
2083 pcre2_set_compile_recursion_guard_8(G(a,8),b,c)
2084 #define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_8(G(a,8),b)
2085 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) r = pcre2_set_glob_escape_8(G(a,8),b)
2086 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_8(G(a,8),b)
2087 #define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_8(G(a,8),b)
2088 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_8(G(a,8),b)
2089 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_8(G(a,8),b)
2090 #define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_8(G(a,8),b)
2091 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_8(G(a,8),b)
2092 #define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
2093 pcre2_set_substitute_callout_8(G(a,8), \
2094 (int (*)(pcre2_substitute_callout_block_8 *, void *))b,c)
2095 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
2096 a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h, \
2097 (PCRE2_SPTR8)i,j,(PCRE2_UCHAR8 *)k,l)
2098 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
2099 a = pcre2_substring_copy_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 *)d,e)
2100 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
2101 a = pcre2_substring_copy_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 *)d,e)
2102 #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_8((PCRE2_UCHAR8 *)a)
2103 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
2104 a = pcre2_substring_get_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 **)d,e)
2105 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
2106 a = pcre2_substring_get_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 **)d,e)
2107 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
2108 a = pcre2_substring_length_byname_8(G(b,8),G(c,8),d)
2109 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
2110 a = pcre2_substring_length_bynumber_8(G(b,8),c,d)
2111 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
2112 a = pcre2_substring_list_get_8(G(b,8),(PCRE2_UCHAR8 ***)c,d)
2113 #define PCRE2_SUBSTRING_LIST_FREE(a) \
2114 pcre2_substring_list_free_8((PCRE2_SPTR8 *)a)
2115 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
2116 a = pcre2_substring_number_from_name_8(G(b,8),G(c,8));
2117 #define PTR(x) (void *)G(x,8)
2118 #define SETFLD(x,y,z) G(x,8)->y = z
2119 #define SETFLDVEC(x,y,v,z) G(x,8)->y[v] = z
2120 #define SETOP(x,y,z) G(x,8) z y
2121 #define SETCASTPTR(x,y) G(x,8) = (uint8_t *)(y)
2122 #define STRLEN(p) (int)strlen((char *)p)
2123 #define SUB1(a,b) G(a,8)(G(b,8))
2124 #define SUB2(a,b,c) G(a,8)(G(b,8),G(c,8))
2125 #define TEST(x,r,y) (G(x,8) r (y))
2126 #define TESTFLD(x,f,r,y) (G(x,8)->f r (y))
2127
2128
2129 /* ----- Only 16-bit mode is supported ----- */
2130
2131 #elif defined SUPPORT_PCRE2_16
2132 #define CASTFLD(t,a,b) (t)(G(a,16)->b)
2133 #define CASTVAR(t,x) (t)G(x,16)
2134 #define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR16)(a))[b])
2135 #define CONCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_convert_context_16))
2136 #define CONVERT_COPY(a,b,c) memcpy(G(a,16),(char *)b, (c)*2)
2137 #define DATCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_match_context_16))
2138 #define FLD(a,b) G(a,16)->b
2139 #define PATCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_compile_context_16))
2140 #define PCHARS(lv, p, offset, len, utf, f) \
2141 lv = pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f)
2142 #define PCHARSV(p, offset, len, utf, f) \
2143 (void)pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f)
2144 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
2145 a = pcre2_callout_enumerate_16(compiled_code16, \
2146 (int (*)(struct pcre2_callout_enumerate_block_16 *, void *))b,c)
2147 #define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,16) = pcre2_code_copy_16(b)
2148 #define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_16(G(b,16))
2149 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_16(G(b,16))
2150 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
2151 G(a,16) = pcre2_compile_16(G(b,16),c,d,e,f,g)
2152 #define PCRE2_CONVERTED_PATTERN_FREE(a) \
2153 pcre2_converted_pattern_free_16((PCRE2_UCHAR16 *)a)
2154 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
2155 a = pcre2_dfa_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h,i,j)
2156 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
2157 r = pcre2_get_error_message_16(a,G(b,16),G(G(b,16),_size/2))
2158 #define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_16(G(b,16))
2159 #define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_16(G(b,16))
2160 #define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_16(G(a,16),b)
2161 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_16(G(a,16))
2162 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
2163 a = pcre2_jit_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h)
2164 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
2165 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_16(b,c,d);
2166 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
2167 pcre2_jit_stack_assign_16(G(a,16),(pcre2_jit_callback_16)b,c);
2168 #define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_16((pcre2_jit_stack_16 *)a);
2169 #define PCRE2_MAKETABLES(a) a = pcre2_maketables_16(NULL)
2170 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
2171 a = pcre2_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h)
2172 #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,16) = pcre2_match_data_create_16(b,c)
2173 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
2174 G(a,16) = pcre2_match_data_create_from_pattern_16(G(b,16),c)
2175 #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_16(G(a,16))
2176 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) a = pcre2_pattern_convert_16(G(b,16),c,d,(PCRE2_UCHAR16 **)e,f,G(g,16))
2177 #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_16(G(b,16),c,d)
2178 #define PCRE2_PRINTINT(a) pcre2_printint_16(compiled_code16,outfile,a)
2179 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
2180 r = pcre2_serialize_decode_16((pcre2_code_16 **)a,b,c,G(d,16))
2181 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
2182 r = pcre2_serialize_encode_16((const pcre2_code_16 **)a,b,c,d,G(e,16))
2183 #define PCRE2_SERIALIZE_FREE(a) pcre2_serialize_free_16(a)
2184 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
2185 r = pcre2_serialize_get_number_of_codes_16(a)
2186 #define PCRE2_SET_CALLOUT(a,b,c) \
2187 pcre2_set_callout_16(G(a,16),(int (*)(pcre2_callout_block_16 *, void *))b,c);
2188 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_16(G(a,16),b)
2189 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
2190 pcre2_set_compile_recursion_guard_16(G(a,16),b,c)
2191 #define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_16(G(a,16),b)
2192 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) r = pcre2_set_glob_escape_16(G(a,16),b)
2193 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_16(G(a,16),b)
2194 #define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_16(G(a,16),b)
2195 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_16(G(a,16),b)
2196 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_16(G(a,16),b)
2197 #define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_16(G(a,16),b)
2198 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_16(G(a,16),b)
2199 #define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
2200 pcre2_set_substitute_callout_16(G(a,16), \
2201 (int (*)(pcre2_substitute_callout_block_16 *, void *))b,c)
2202 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
2203 a = pcre2_substitute_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h, \
2204 (PCRE2_SPTR16)i,j,(PCRE2_UCHAR16 *)k,l)
2205 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
2206 a = pcre2_substring_copy_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 *)d,e)
2207 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
2208 a = pcre2_substring_copy_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 *)d,e)
2209 #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_16((PCRE2_UCHAR16 *)a)
2210 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
2211 a = pcre2_substring_get_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 **)d,e)
2212 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
2213 a = pcre2_substring_get_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 **)d,e)
2214 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
2215 a = pcre2_substring_length_byname_16(G(b,16),G(c,16),d)
2216 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
2217 a = pcre2_substring_length_bynumber_16(G(b,16),c,d)
2218 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
2219 a = pcre2_substring_list_get_16(G(b,16),(PCRE2_UCHAR16 ***)c,d)
2220 #define PCRE2_SUBSTRING_LIST_FREE(a) \
2221 pcre2_substring_list_free_16((PCRE2_SPTR16 *)a)
2222 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
2223 a = pcre2_substring_number_from_name_16(G(b,16),G(c,16));
2224 #define PTR(x) (void *)G(x,16)
2225 #define SETFLD(x,y,z) G(x,16)->y = z
2226 #define SETFLDVEC(x,y,v,z) G(x,16)->y[v] = z
2227 #define SETOP(x,y,z) G(x,16) z y
2228 #define SETCASTPTR(x,y) G(x,16) = (uint16_t *)(y)
2229 #define STRLEN(p) (int)strlen16((PCRE2_SPTR16)p)
2230 #define SUB1(a,b) G(a,16)(G(b,16))
2231 #define SUB2(a,b,c) G(a,16)(G(b,16),G(c,16))
2232 #define TEST(x,r,y) (G(x,16) r (y))
2233 #define TESTFLD(x,f,r,y) (G(x,16)->f r (y))
2234
2235
2236 /* ----- Only 32-bit mode is supported ----- */
2237
2238 #elif defined SUPPORT_PCRE2_32
2239 #define CASTFLD(t,a,b) (t)(G(a,32)->b)
2240 #define CASTVAR(t,x) (t)G(x,32)
2241 #define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR32)(a))[b])
2242 #define CONCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_convert_context_32))
2243 #define CONVERT_COPY(a,b,c) memcpy(G(a,32),(char *)b, (c)*4)
2244 #define DATCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_match_context_32))
2245 #define FLD(a,b) G(a,32)->b
2246 #define PATCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_compile_context_32))
2247 #define PCHARS(lv, p, offset, len, utf, f) \
2248 lv = pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f)
2249 #define PCHARSV(p, offset, len, utf, f) \
2250 (void)pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f)
2251 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
2252 a = pcre2_callout_enumerate_32(compiled_code32, \
2253 (int (*)(struct pcre2_callout_enumerate_block_32 *, void *))b,c)
2254 #define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,32) = pcre2_code_copy_32(b)
2255 #define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_32(G(b,32))
2256 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_32(G(b,32))
2257 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
2258 G(a,32) = pcre2_compile_32(G(b,32),c,d,e,f,g)
2259 #define PCRE2_CONVERTED_PATTERN_FREE(a) \
2260 pcre2_converted_pattern_free_32((PCRE2_UCHAR32 *)a)
2261 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
2262 a = pcre2_dfa_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h,i,j)
2263 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
2264 r = pcre2_get_error_message_32(a,G(b,32),G(G(b,32),_size/4))
2265 #define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_32(G(b,32))
2266 #define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_32(G(b,32))
2267 #define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_32(G(a,32),b)
2268 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_32(G(a,32))
2269 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
2270 a = pcre2_jit_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
2271 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
2272 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_32(b,c,d);
2273 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
2274 pcre2_jit_stack_assign_32(G(a,32),(pcre2_jit_callback_32)b,c);
2275 #define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_32((pcre2_jit_stack_32 *)a);
2276 #define PCRE2_MAKETABLES(a) a = pcre2_maketables_32(NULL)
2277 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
2278 a = pcre2_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
2279 #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,32) = pcre2_match_data_create_32(b,c)
2280 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
2281 G(a,32) = pcre2_match_data_create_from_pattern_32(G(b,32),c)
2282 #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_32(G(a,32))
2283 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) a = pcre2_pattern_convert_32(G(b,32),c,d,(PCRE2_UCHAR32 **)e,f,G(g,32))
2284 #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_32(G(b,32),c,d)
2285 #define PCRE2_PRINTINT(a) pcre2_printint_32(compiled_code32,outfile,a)
2286 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
2287 r = pcre2_serialize_decode_32((pcre2_code_32 **)a,b,c,G(d,32))
2288 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
2289 r = pcre2_serialize_encode_32((const pcre2_code_32 **)a,b,c,d,G(e,32))
2290 #define PCRE2_SERIALIZE_FREE(a) pcre2_serialize_free_32(a)
2291 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
2292 r = pcre2_serialize_get_number_of_codes_32(a)
2293 #define PCRE2_SET_CALLOUT(a,b,c) \
2294 pcre2_set_callout_32(G(a,32),(int (*)(pcre2_callout_block_32 *, void *))b,c)
2295 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_32(G(a,32),b)
2296 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
2297 pcre2_set_compile_recursion_guard_32(G(a,32),b,c)
2298 #define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_32(G(a,32),b)
2299 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) r = pcre2_set_glob_escape_32(G(a,32),b)
2300 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_32(G(a,32),b)
2301 #define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_32(G(a,32),b)
2302 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_32(G(a,32),b)
2303 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_32(G(a,32),b)
2304 #define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_32(G(a,32),b)
2305 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_32(G(a,32),b)
2306 #define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
2307 pcre2_set_substitute_callout_32(G(a,32), \
2308 (int (*)(pcre2_substitute_callout_block_32 *, void *))b,c)
2309 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
2310 a = pcre2_substitute_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h, \
2311 (PCRE2_SPTR32)i,j,(PCRE2_UCHAR32 *)k,l)
2312 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
2313 a = pcre2_substring_copy_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 *)d,e)
2314 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
2315 a = pcre2_substring_copy_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 *)d,e);
2316 #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_32((PCRE2_UCHAR32 *)a)
2317 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
2318 a = pcre2_substring_get_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 **)d,e)
2319 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
2320 a = pcre2_substring_get_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 **)d,e)
2321 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
2322 a = pcre2_substring_length_byname_32(G(b,32),G(c,32),d)
2323 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
2324 a = pcre2_substring_length_bynumber_32(G(b,32),c,d)
2325 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
2326 a = pcre2_substring_list_get_32(G(b,32),(PCRE2_UCHAR32 ***)c,d)
2327 #define PCRE2_SUBSTRING_LIST_FREE(a) \
2328 pcre2_substring_list_free_32((PCRE2_SPTR32 *)a)
2329 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
2330 a = pcre2_substring_number_from_name_32(G(b,32),G(c,32));
2331 #define PTR(x) (void *)G(x,32)
2332 #define SETFLD(x,y,z) G(x,32)->y = z
2333 #define SETFLDVEC(x,y,v,z) G(x,32)->y[v] = z
2334 #define SETOP(x,y,z) G(x,32) z y
2335 #define SETCASTPTR(x,y) G(x,32) = (uint32_t *)(y)
2336 #define STRLEN(p) (int)strlen32((PCRE2_SPTR32)p)
2337 #define SUB1(a,b) G(a,32)(G(b,32))
2338 #define SUB2(a,b,c) G(a,32)(G(b,32),G(c,32))
2339 #define TEST(x,r,y) (G(x,32) r (y))
2340 #define TESTFLD(x,f,r,y) (G(x,32)->f r (y))
2341
2342 #endif
2343
2344 /* ----- End of mode-specific function call macros ----- */
2345
2346
2347
2348
2349 /*************************************************
2350 * Alternate character tables *
2351 *************************************************/
2352
2353 /* By default, the "tables" pointer in the compile context when calling
2354 pcre2_compile() is not set (= NULL), thereby using the default tables of the
2355 library. However, the tables modifier can be used to select alternate sets of
2356 tables, for different kinds of testing. Note that the locale modifier also
2357 adjusts the tables. */
2358
2359 /* This is the set of tables distributed as default with PCRE2. It recognizes
2360 only ASCII characters. */
2361
2362 static const uint8_t tables1[] = {
2363
2364 /* This table is a lower casing table. */
2365
2366 0, 1, 2, 3, 4, 5, 6, 7,
2367 8, 9, 10, 11, 12, 13, 14, 15,
2368 16, 17, 18, 19, 20, 21, 22, 23,
2369 24, 25, 26, 27, 28, 29, 30, 31,
2370 32, 33, 34, 35, 36, 37, 38, 39,
2371 40, 41, 42, 43, 44, 45, 46, 47,
2372 48, 49, 50, 51, 52, 53, 54, 55,
2373 56, 57, 58, 59, 60, 61, 62, 63,
2374 64, 97, 98, 99,100,101,102,103,
2375 104,105,106,107,108,109,110,111,
2376 112,113,114,115,116,117,118,119,
2377 120,121,122, 91, 92, 93, 94, 95,
2378 96, 97, 98, 99,100,101,102,103,
2379 104,105,106,107,108,109,110,111,
2380 112,113,114,115,116,117,118,119,
2381 120,121,122,123,124,125,126,127,
2382 128,129,130,131,132,133,134,135,
2383 136,137,138,139,140,141,142,143,
2384 144,145,146,147,148,149,150,151,
2385 152,153,154,155,156,157,158,159,
2386 160,161,162,163,164,165,166,167,
2387 168,169,170,171,172,173,174,175,
2388 176,177,178,179,180,181,182,183,
2389 184,185,186,187,188,189,190,191,
2390 192,193,194,195,196,197,198,199,
2391 200,201,202,203,204,205,206,207,
2392 208,209,210,211,212,213,214,215,
2393 216,217,218,219,220,221,222,223,
2394 224,225,226,227,228,229,230,231,
2395 232,233,234,235,236,237,238,239,
2396 240,241,242,243,244,245,246,247,
2397 248,249,250,251,252,253,254,255,
2398
2399 /* This table is a case flipping table. */
2400
2401 0, 1, 2, 3, 4, 5, 6, 7,
2402 8, 9, 10, 11, 12, 13, 14, 15,
2403 16, 17, 18, 19, 20, 21, 22, 23,
2404 24, 25, 26, 27, 28, 29, 30, 31,
2405 32, 33, 34, 35, 36, 37, 38, 39,
2406 40, 41, 42, 43, 44, 45, 46, 47,
2407 48, 49, 50, 51, 52, 53, 54, 55,
2408 56, 57, 58, 59, 60, 61, 62, 63,
2409 64, 97, 98, 99,100,101,102,103,
2410 104,105,106,107,108,109,110,111,
2411 112,113,114,115,116,117,118,119,
2412 120,121,122, 91, 92, 93, 94, 95,
2413 96, 65, 66, 67, 68, 69, 70, 71,
2414 72, 73, 74, 75, 76, 77, 78, 79,
2415 80, 81, 82, 83, 84, 85, 86, 87,
2416 88, 89, 90,123,124,125,126,127,
2417 128,129,130,131,132,133,134,135,
2418 136,137,138,139,140,141,142,143,
2419 144,145,146,147,148,149,150,151,
2420 152,153,154,155,156,157,158,159,
2421 160,161,162,163,164,165,166,167,
2422 168,169,170,171,172,173,174,175,
2423 176,177,178,179,180,181,182,183,
2424 184,185,186,187,188,189,190,191,
2425 192,193,194,195,196,197,198,199,
2426 200,201,202,203,204,205,206,207,
2427 208,209,210,211,212,213,214,215,
2428 216,217,218,219,220,221,222,223,
2429 224,225,226,227,228,229,230,231,
2430 232,233,234,235,236,237,238,239,
2431 240,241,242,243,244,245,246,247,
2432 248,249,250,251,252,253,254,255,
2433
2434 /* This table contains bit maps for various character classes. Each map is 32
2435 bytes long and the bits run from the least significant end of each byte. The
2436 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
2437 graph, print, punct, and cntrl. Other classes are built from combinations. */
2438
2439 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
2440 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2441 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2442 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2443
2444 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
2445 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
2446 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2447 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2448
2449 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
2450 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2451 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2452 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2453
2454 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2455 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
2456 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2457 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2458
2459 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2460 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
2461 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2462 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2463
2464 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
2465 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
2466 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2467 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2468
2469 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
2470 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
2471 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2472 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2473
2474 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
2475 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
2476 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2477 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2478
2479 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
2480 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
2481 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2482 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2483
2484 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
2485 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
2486 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2487 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2488
2489 /* This table identifies various classes of character by individual bits:
2490 0x01 white space character
2491 0x02 letter
2492 0x04 decimal digit
2493 0x08 hexadecimal digit
2494 0x10 alphanumeric or '_'
2495 0x80 regular expression metacharacter or binary zero
2496 */
2497
2498 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
2499 0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /* 8- 15 */
2500 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
2501 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
2502 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
2503 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
2504 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
2505 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
2506 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
2507 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
2508 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
2509 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
2510 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
2511 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
2512 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
2513 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
2514 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
2515 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
2516 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
2517 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
2518 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
2519 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
2520 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
2521 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
2522 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
2523 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
2524 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
2525 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
2526 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
2527 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
2528 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
2529 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
2530
2531 /* This is a set of tables that came originally from a Windows user. It seems
2532 to be at least an approximation of ISO 8859. In particular, there are
2533 characters greater than 128 that are marked as spaces, letters, etc. */
2534
2535 static const uint8_t tables2[] = {
2536 0,1,2,3,4,5,6,7,
2537 8,9,10,11,12,13,14,15,
2538 16,17,18,19,20,21,22,23,
2539 24,25,26,27,28,29,30,31,
2540 32,33,34,35,36,37,38,39,
2541 40,41,42,43,44,45,46,47,
2542 48,49,50,51,52,53,54,55,
2543 56,57,58,59,60,61,62,63,
2544 64,97,98,99,100,101,102,103,
2545 104,105,106,107,108,109,110,111,
2546 112,113,114,115,116,117,118,119,
2547 120,121,122,91,92,93,94,95,
2548 96,97,98,99,100,101,102,103,
2549 104,105,106,107,108,109,110,111,
2550 112,113,114,115,116,117,118,119,
2551 120,121,122,123,124,125,126,127,
2552 128,129,130,131,132,133,134,135,
2553 136,137,138,139,140,141,142,143,
2554 144,145,146,147,148,149,150,151,
2555 152,153,154,155,156,157,158,159,
2556 160,161,162,163,164,165,166,167,
2557 168,169,170,171,172,173,174,175,
2558 176,177,178,179,180,181,182,183,
2559 184,185,186,187,188,189,190,191,
2560 224,225,226,227,228,229,230,231,
2561 232,233,234,235,236,237,238,239,
2562 240,241,242,243,244,245,246,215,
2563 248,249,250,251,252,253,254,223,
2564 224,225,226,227,228,229,230,231,
2565 232,233,234,235,236,237,238,239,
2566 240,241,242,243,244,245,246,247,
2567 248,249,250,251,252,253,254,255,
2568 0,1,2,3,4,5,6,7,
2569 8,9,10,11,12,13,14,15,
2570 16,17,18,19,20,21,22,23,
2571 24,25,26,27,28,29,30,31,
2572 32,33,34,35,36,37,38,39,
2573 40,41,42,43,44,45,46,47,
2574 48,49,50,51,52,53,54,55,
2575 56,57,58,59,60,61,62,63,
2576 64,97,98,99,100,101,102,103,
2577 104,105,106,107,108,109,110,111,
2578 112,113,114,115,116,117,118,119,
2579 120,121,122,91,92,93,94,95,
2580 96,65,66,67,68,69,70,71,
2581 72,73,74,75,76,77,78,79,
2582 80,81,82,83,84,85,86,87,
2583 88,89,90,123,124,125,126,127,
2584 128,129,130,131,132,133,134,135,
2585 136,137,138,139,140,141,142,143,
2586 144,145,146,147,148,149,150,151,
2587 152,153,154,155,156,157,158,159,
2588 160,161,162,163,164,165,166,167,
2589 168,169,170,171,172,173,174,175,
2590 176,177,178,179,180,181,182,183,
2591 184,185,186,187,188,189,190,191,
2592 224,225,226,227,228,229,230,231,
2593 232,233,234,235,236,237,238,239,
2594 240,241,242,243,244,245,246,215,
2595 248,249,250,251,252,253,254,223,
2596 192,193,194,195,196,197,198,199,
2597 200,201,202,203,204,205,206,207,
2598 208,209,210,211,212,213,214,247,
2599 216,217,218,219,220,221,222,255,
2600 0,62,0,0,1,0,0,0,
2601 0,0,0,0,0,0,0,0,
2602 32,0,0,0,1,0,0,0,
2603 0,0,0,0,0,0,0,0,
2604 0,0,0,0,0,0,255,3,
2605 126,0,0,0,126,0,0,0,
2606 0,0,0,0,0,0,0,0,
2607 0,0,0,0,0,0,0,0,
2608 0,0,0,0,0,0,255,3,
2609 0,0,0,0,0,0,0,0,
2610 0,0,0,0,0,0,12,2,
2611 0,0,0,0,0,0,0,0,
2612 0,0,0,0,0,0,0,0,
2613 254,255,255,7,0,0,0,0,
2614 0,0,0,0,0,0,0,0,
2615 255,255,127,127,0,0,0,0,
2616 0,0,0,0,0,0,0,0,
2617 0,0,0,0,254,255,255,7,
2618 0,0,0,0,0,4,32,4,
2619 0,0,0,128,255,255,127,255,
2620 0,0,0,0,0,0,255,3,
2621 254,255,255,135,254,255,255,7,
2622 0,0,0,0,0,4,44,6,
2623 255,255,127,255,255,255,127,255,
2624 0,0,0,0,254,255,255,255,
2625 255,255,255,255,255,255,255,127,
2626 0,0,0,0,254,255,255,255,
2627 255,255,255,255,255,255,255,255,
2628 0,2,0,0,255,255,255,255,
2629 255,255,255,255,255,255,255,127,
2630 0,0,0,0,255,255,255,255,
2631 255,255,255,255,255,255,255,255,
2632 0,0,0,0,254,255,0,252,
2633 1,0,0,248,1,0,0,120,
2634 0,0,0,0,254,255,255,255,
2635 0,0,128,0,0,0,128,0,
2636 255,255,255,255,0,0,0,0,
2637 0,0,0,0,0,0,0,128,
2638 255,255,255,255,0,0,0,0,
2639 0,0,0,0,0,0,0,0,
2640 128,0,0,0,0,0,0,0,
2641 0,1,1,0,1,1,0,0,
2642 0,0,0,0,0,0,0,0,
2643 0,0,0,0,0,0,0,0,
2644 1,0,0,0,128,0,0,0,
2645 128,128,128,128,0,0,128,0,
2646 28,28,28,28,28,28,28,28,
2647 28,28,0,0,0,0,0,128,
2648 0,26,26,26,26,26,26,18,
2649 18,18,18,18,18,18,18,18,
2650 18,18,18,18,18,18,18,18,
2651 18,18,18,128,128,0,128,16,
2652 0,26,26,26,26,26,26,18,
2653 18,18,18,18,18,18,18,18,
2654 18,18,18,18,18,18,18,18,
2655 18,18,18,128,128,0,0,0,
2656 0,0,0,0,0,1,0,0,
2657 0,0,0,0,0,0,0,0,
2658 0,0,0,0,0,0,0,0,
2659 0,0,0,0,0,0,0,0,
2660 1,0,0,0,0,0,0,0,
2661 0,0,18,0,0,0,0,0,
2662 0,0,20,20,0,18,0,0,
2663 0,20,18,0,0,0,0,0,
2664 18,18,18,18,18,18,18,18,
2665 18,18,18,18,18,18,18,18,
2666 18,18,18,18,18,18,18,0,
2667 18,18,18,18,18,18,18,18,
2668 18,18,18,18,18,18,18,18,
2669 18,18,18,18,18,18,18,18,
2670 18,18,18,18,18,18,18,0,
2671 18,18,18,18,18,18,18,18
2672 };
2673
2674
2675
2676 #if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE)
2677 /*************************************************
2678 * Emulated memmove() for systems without it *
2679 *************************************************/
2680
2681 /* This function can make use of bcopy() if it is available. Otherwise do it by
2682 steam, as there are some non-Unix environments that lack both memmove() and
2683 bcopy(). */
2684
2685 static void *
emulated_memmove(void * d,const void * s,size_t n)2686 emulated_memmove(void *d, const void *s, size_t n)
2687 {
2688 #ifdef HAVE_BCOPY
2689 bcopy(s, d, n);
2690 return d;
2691 #else
2692 size_t i;
2693 unsigned char *dest = (unsigned char *)d;
2694 const unsigned char *src = (const unsigned char *)s;
2695 if (dest > src)
2696 {
2697 dest += n;
2698 src += n;
2699 for (i = 0; i < n; ++i) *(--dest) = *(--src);
2700 return (void *)dest;
2701 }
2702 else
2703 {
2704 for (i = 0; i < n; ++i) *dest++ = *src++;
2705 return (void *)(dest - n);
2706 }
2707 #endif /* not HAVE_BCOPY */
2708 }
2709 #undef memmove
2710 #define memmove(d,s,n) emulated_memmove(d,s,n)
2711 #endif /* not VPCOMPAT && not HAVE_MEMMOVE */
2712
2713
2714
2715 #ifndef HAVE_STRERROR
2716 /*************************************************
2717 * Provide strerror() for non-ANSI libraries *
2718 *************************************************/
2719
2720 /* Some old-fashioned systems (e.g. SunOS4) didn't have strerror() in their
2721 libraries. They may no longer be around, but just in case, we can try to
2722 provide the same facility by this simple alternative function. */
2723
2724 extern int sys_nerr;
2725 extern char *sys_errlist[];
2726
2727 char *
strerror(int n)2728 strerror(int n)
2729 {
2730 if (n < 0 || n >= sys_nerr) return "unknown error number";
2731 return sys_errlist[n];
2732 }
2733 #endif /* HAVE_STRERROR */
2734
2735
2736
2737 /*************************************************
2738 * Local memory functions *
2739 *************************************************/
2740
2741 /* Alternative memory functions, to test functionality. */
2742
my_malloc(PCRE2_SIZE size,void * data)2743 static void *my_malloc(PCRE2_SIZE size, void *data)
2744 {
2745 void *block = malloc(size);
2746 (void)data;
2747 if (show_memory)
2748 {
2749 if (block == NULL)
2750 {
2751 fprintf(outfile, "** malloc() failed for %" SIZ_FORM "\n", size);
2752 }
2753 else
2754 {
2755 fprintf(outfile, "malloc %5" SIZ_FORM, size);
2756 #ifdef DEBUG_SHOW_MALLOC_ADDRESSES
2757 fprintf(outfile, " %p", block); /* Not portable */
2758 #endif
2759 if (malloclistptr < MALLOCLISTSIZE)
2760 {
2761 malloclist[malloclistptr] = block;
2762 malloclistlength[malloclistptr++] = size;
2763 }
2764 else
2765 fprintf(outfile, " (not remembered)");
2766 fprintf(outfile, "\n");
2767 }
2768 }
2769 return block;
2770 }
2771
my_free(void * block,void * data)2772 static void my_free(void *block, void *data)
2773 {
2774 (void)data;
2775 if (show_memory)
2776 {
2777 uint32_t i, j;
2778 BOOL found = FALSE;
2779
2780 fprintf(outfile, "free");
2781 for (i = 0; i < malloclistptr; i++)
2782 {
2783 if (block == malloclist[i])
2784 {
2785 fprintf(outfile, " %5" SIZ_FORM, malloclistlength[i]);
2786 malloclistptr--;
2787 for (j = i; j < malloclistptr; j++)
2788 {
2789 malloclist[j] = malloclist[j+1];
2790 malloclistlength[j] = malloclistlength[j+1];
2791 }
2792 found = TRUE;
2793 break;
2794 }
2795 }
2796 if (!found) fprintf(outfile, " unremembered block");
2797 #ifdef DEBUG_SHOW_MALLOC_ADDRESSES
2798 fprintf(outfile, " %p", block); /* Not portable */
2799 #endif
2800 fprintf(outfile, "\n");
2801 }
2802 free(block);
2803 }
2804
2805
2806
2807 /*************************************************
2808 * Callback function for stack guard *
2809 *************************************************/
2810
2811 /* This is set up to be called from pcre2_compile() when the stackguard=n
2812 modifier sets a value greater than zero. The test we do is whether the
2813 parenthesis nesting depth is greater than the value set by the modifier.
2814
2815 Argument: the current parenthesis nesting depth
2816 Returns: non-zero to kill the compilation
2817 */
2818
2819 static int
stack_guard(uint32_t depth,void * user_data)2820 stack_guard(uint32_t depth, void *user_data)
2821 {
2822 (void)user_data;
2823 return depth > pat_patctl.stackguard_test;
2824 }
2825
2826
2827 /*************************************************
2828 * JIT memory callback *
2829 *************************************************/
2830
2831 static PCRE2_JIT_STACK*
jit_callback(void * arg)2832 jit_callback(void *arg)
2833 {
2834 jit_was_used = TRUE;
2835 return (PCRE2_JIT_STACK *)arg;
2836 }
2837
2838
2839 /*************************************************
2840 * Convert UTF-8 character to code point *
2841 *************************************************/
2842
2843 /* This function reads one or more bytes that represent a UTF-8 character,
2844 and returns the codepoint of that character. Note that the function supports
2845 the original UTF-8 definition of RFC 2279, allowing for values in the range 0
2846 to 0x7fffffff, up to 6 bytes long. This makes it possible to generate
2847 codepoints greater than 0x10ffff which are useful for testing PCRE2's error
2848 checking, and also for generating 32-bit non-UTF data values above the UTF
2849 limit.
2850
2851 Argument:
2852 utf8bytes a pointer to the byte vector
2853 vptr a pointer to an int to receive the value
2854
2855 Returns: > 0 => the number of bytes consumed
2856 -6 to 0 => malformed UTF-8 character at offset = (-return)
2857 */
2858
2859 static int
utf82ord(PCRE2_SPTR8 utf8bytes,uint32_t * vptr)2860 utf82ord(PCRE2_SPTR8 utf8bytes, uint32_t *vptr)
2861 {
2862 uint32_t c = *utf8bytes++;
2863 uint32_t d = c;
2864 int i, j, s;
2865
2866 for (i = -1; i < 6; i++) /* i is number of additional bytes */
2867 {
2868 if ((d & 0x80) == 0) break;
2869 d <<= 1;
2870 }
2871
2872 if (i == -1) { *vptr = c; return 1; } /* ascii character */
2873 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
2874
2875 /* i now has a value in the range 1-5 */
2876
2877 s = 6*i;
2878 d = (c & utf8_table3[i]) << s;
2879
2880 for (j = 0; j < i; j++)
2881 {
2882 c = *utf8bytes++;
2883 if ((c & 0xc0) != 0x80) return -(j+1);
2884 s -= 6;
2885 d |= (c & 0x3f) << s;
2886 }
2887
2888 /* Check that encoding was the correct unique one */
2889
2890 for (j = 0; j < utf8_table1_size; j++)
2891 if (d <= (uint32_t)utf8_table1[j]) break;
2892 if (j != i) return -(i+1);
2893
2894 /* Valid value */
2895
2896 *vptr = d;
2897 return i+1;
2898 }
2899
2900
2901
2902 /*************************************************
2903 * Print one character *
2904 *************************************************/
2905
2906 /* Print a single character either literally, or as a hex escape, and count how
2907 many printed characters are used.
2908
2909 Arguments:
2910 c the character
2911 utf TRUE in UTF mode
2912 f the FILE to print to, or NULL just to count characters
2913
2914 Returns: number of characters written
2915 */
2916
2917 static int
pchar(uint32_t c,BOOL utf,FILE * f)2918 pchar(uint32_t c, BOOL utf, FILE *f)
2919 {
2920 int n = 0;
2921 char tempbuffer[16];
2922
2923 if (PRINTOK(c))
2924 {
2925 if (f != NULL) fprintf(f, "%c", c);
2926 return 1;
2927 }
2928
2929 if (c < 0x100)
2930 {
2931 if (utf)
2932 {
2933 if (f != NULL) fprintf(f, "\\x{%02x}", c);
2934 return 6;
2935 }
2936 else
2937 {
2938 if (f != NULL) fprintf(f, "\\x%02x", c);
2939 return 4;
2940 }
2941 }
2942
2943 if (f != NULL) n = fprintf(f, "\\x{%02x}", c);
2944 else n = sprintf(tempbuffer, "\\x{%02x}", c);
2945
2946 return n >= 0 ? n : 0;
2947 }
2948
2949
2950
2951 #ifdef SUPPORT_PCRE2_16
2952 /*************************************************
2953 * Find length of 0-terminated 16-bit string *
2954 *************************************************/
2955
strlen16(PCRE2_SPTR16 p)2956 static size_t strlen16(PCRE2_SPTR16 p)
2957 {
2958 PCRE2_SPTR16 pp = p;
2959 while (*pp != 0) pp++;
2960 return (int)(pp - p);
2961 }
2962 #endif /* SUPPORT_PCRE2_16 */
2963
2964
2965
2966 #ifdef SUPPORT_PCRE2_32
2967 /*************************************************
2968 * Find length of 0-terminated 32-bit string *
2969 *************************************************/
2970
strlen32(PCRE2_SPTR32 p)2971 static size_t strlen32(PCRE2_SPTR32 p)
2972 {
2973 PCRE2_SPTR32 pp = p;
2974 while (*pp != 0) pp++;
2975 return (int)(pp - p);
2976 }
2977 #endif /* SUPPORT_PCRE2_32 */
2978
2979
2980 #ifdef SUPPORT_PCRE2_8
2981 /*************************************************
2982 * Print 8-bit character string *
2983 *************************************************/
2984
2985 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
2986 For printing *MARK strings, a negative length is given, indicating that the
2987 length is in the first code unit. If handed a NULL file, this function just
2988 counts chars without printing (because pchar() does that). */
2989
pchars8(PCRE2_SPTR8 p,int length,BOOL utf,FILE * f)2990 static int pchars8(PCRE2_SPTR8 p, int length, BOOL utf, FILE *f)
2991 {
2992 uint32_t c = 0;
2993 int yield = 0;
2994 if (length < 0) length = *p++;
2995 while (length-- > 0)
2996 {
2997 if (utf)
2998 {
2999 int rc = utf82ord(p, &c);
3000 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
3001 {
3002 length -= rc - 1;
3003 p += rc;
3004 yield += pchar(c, utf, f);
3005 continue;
3006 }
3007 }
3008 c = *p++;
3009 yield += pchar(c, utf, f);
3010 }
3011
3012 return yield;
3013 }
3014 #endif
3015
3016
3017 #ifdef SUPPORT_PCRE2_16
3018 /*************************************************
3019 * Print 16-bit character string *
3020 *************************************************/
3021
3022 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
3023 For printing *MARK strings, a negative length is given, indicating that the
3024 length is in the first code unit. If handed a NULL file, just counts chars
3025 without printing. */
3026
pchars16(PCRE2_SPTR16 p,int length,BOOL utf,FILE * f)3027 static int pchars16(PCRE2_SPTR16 p, int length, BOOL utf, FILE *f)
3028 {
3029 int yield = 0;
3030 if (length < 0) length = *p++;
3031 while (length-- > 0)
3032 {
3033 uint32_t c = *p++ & 0xffff;
3034 if (utf && c >= 0xD800 && c < 0xDC00 && length > 0)
3035 {
3036 int d = *p & 0xffff;
3037 if (d >= 0xDC00 && d <= 0xDFFF)
3038 {
3039 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
3040 length--;
3041 p++;
3042 }
3043 }
3044 yield += pchar(c, utf, f);
3045 }
3046 return yield;
3047 }
3048 #endif /* SUPPORT_PCRE2_16 */
3049
3050
3051
3052 #ifdef SUPPORT_PCRE2_32
3053 /*************************************************
3054 * Print 32-bit character string *
3055 *************************************************/
3056
3057 /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
3058 For printing *MARK strings, a negative length is given, indicating that the
3059 length is in the first code unit. If handed a NULL file, just counts chars
3060 without printing. */
3061
pchars32(PCRE2_SPTR32 p,int length,BOOL utf,FILE * f)3062 static int pchars32(PCRE2_SPTR32 p, int length, BOOL utf, FILE *f)
3063 {
3064 int yield = 0;
3065 (void)(utf); /* Avoid compiler warning */
3066 if (length < 0) length = *p++;
3067 while (length-- > 0)
3068 {
3069 uint32_t c = *p++;
3070 yield += pchar(c, utf, f);
3071 }
3072 return yield;
3073 }
3074 #endif /* SUPPORT_PCRE2_32 */
3075
3076
3077
3078
3079 /*************************************************
3080 * Convert character value to UTF-8 *
3081 *************************************************/
3082
3083 /* This function takes an integer value in the range 0 - 0x7fffffff
3084 and encodes it as a UTF-8 character in 0 to 6 bytes. It is needed even when the
3085 8-bit library is not supported, to generate UTF-8 output for non-ASCII
3086 characters.
3087
3088 Arguments:
3089 cvalue the character value
3090 utf8bytes pointer to buffer for result - at least 6 bytes long
3091
3092 Returns: number of characters placed in the buffer
3093 */
3094
3095 static int
ord2utf8(uint32_t cvalue,uint8_t * utf8bytes)3096 ord2utf8(uint32_t cvalue, uint8_t *utf8bytes)
3097 {
3098 int i, j;
3099 if (cvalue > 0x7fffffffu)
3100 return -1;
3101 for (i = 0; i < utf8_table1_size; i++)
3102 if (cvalue <= (uint32_t)utf8_table1[i]) break;
3103 utf8bytes += i;
3104 for (j = i; j > 0; j--)
3105 {
3106 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
3107 cvalue >>= 6;
3108 }
3109 *utf8bytes = utf8_table2[i] | cvalue;
3110 return i + 1;
3111 }
3112
3113
3114
3115 #ifdef SUPPORT_PCRE2_16
3116 /*************************************************
3117 * Convert string to 16-bit *
3118 *************************************************/
3119
3120 /* In UTF mode the input is always interpreted as a string of UTF-8 bytes using
3121 the original UTF-8 definition of RFC 2279, which allows for up to 6 bytes, and
3122 code values from 0 to 0x7fffffff. However, values greater than the later UTF
3123 limit of 0x10ffff cause an error. In non-UTF mode the input is interpreted as
3124 UTF-8 if the utf8_input modifier is set, but an error is generated for values
3125 greater than 0xffff.
3126
3127 If all the input bytes are ASCII, the space needed for a 16-bit string is
3128 exactly double the 8-bit size. Otherwise, the size needed for a 16-bit string
3129 is no more than double, because up to 0xffff uses no more than 3 bytes in UTF-8
3130 but possibly 4 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes
3131 in UTF-16. The result is always left in pbuffer16. Impose a minimum size to
3132 save repeated re-sizing.
3133
3134 Note that this function does not object to surrogate values. This is
3135 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
3136 for the purpose of testing that they are correctly faulted.
3137
3138 Arguments:
3139 p points to a byte string
3140 utf true in UTF mode
3141 lenptr points to number of bytes in the string (excluding trailing zero)
3142
3143 Returns: 0 on success, with the length updated to the number of 16-bit
3144 data items used (excluding the trailing zero)
3145 OR -1 if a UTF-8 string is malformed
3146 OR -2 if a value > 0x10ffff is encountered in UTF mode
3147 OR -3 if a value > 0xffff is encountered when not in UTF mode
3148 */
3149
3150 static PCRE2_SIZE
to16(uint8_t * p,int utf,PCRE2_SIZE * lenptr)3151 to16(uint8_t *p, int utf, PCRE2_SIZE *lenptr)
3152 {
3153 uint16_t *pp;
3154 PCRE2_SIZE len = *lenptr;
3155
3156 if (pbuffer16_size < 2*len + 2)
3157 {
3158 if (pbuffer16 != NULL) free(pbuffer16);
3159 pbuffer16_size = 2*len + 2;
3160 if (pbuffer16_size < 4096) pbuffer16_size = 4096;
3161 pbuffer16 = (uint16_t *)malloc(pbuffer16_size);
3162 if (pbuffer16 == NULL)
3163 {
3164 fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer16\n",
3165 pbuffer16_size);
3166 exit(1);
3167 }
3168 }
3169
3170 pp = pbuffer16;
3171 if (!utf && (pat_patctl.control & CTL_UTF8_INPUT) == 0)
3172 {
3173 for (; len > 0; len--) *pp++ = *p++;
3174 }
3175 else while (len > 0)
3176 {
3177 uint32_t c;
3178 int chlen = utf82ord(p, &c);
3179 if (chlen <= 0) return -1;
3180 if (!utf && c > 0xffff) return -3;
3181 if (c > 0x10ffff) return -2;
3182 p += chlen;
3183 len -= chlen;
3184 if (c < 0x10000) *pp++ = c; else
3185 {
3186 c -= 0x10000;
3187 *pp++ = 0xD800 | (c >> 10);
3188 *pp++ = 0xDC00 | (c & 0x3ff);
3189 }
3190 }
3191
3192 *pp = 0;
3193 *lenptr = pp - pbuffer16;
3194 return 0;
3195 }
3196 #endif
3197
3198
3199
3200 #ifdef SUPPORT_PCRE2_32
3201 /*************************************************
3202 * Convert string to 32-bit *
3203 *************************************************/
3204
3205 /* In UTF mode the input is always interpreted as a string of UTF-8 bytes using
3206 the original UTF-8 definition of RFC 2279, which allows for up to 6 bytes, and
3207 code values from 0 to 0x7fffffff. However, values greater than the later UTF
3208 limit of 0x10ffff cause an error.
3209
3210 In non-UTF mode the input is interpreted as UTF-8 if the utf8_input modifier
3211 is set, and no limit is imposed. There is special interpretation of the 0xff
3212 byte (which is illegal in UTF-8) in this case: it causes the top bit of the
3213 next character to be set. This provides a way of generating 32-bit characters
3214 greater than 0x7fffffff.
3215
3216 If all the input bytes are ASCII, the space needed for a 32-bit string is
3217 exactly four times the 8-bit size. Otherwise, the size needed for a 32-bit
3218 string is no more than four times, because the number of characters must be
3219 less than the number of bytes. The result is always left in pbuffer32. Impose a
3220 minimum size to save repeated re-sizing.
3221
3222 Note that this function does not object to surrogate values. This is
3223 deliberate; it makes it possible to construct UTF-32 strings that are invalid,
3224 for the purpose of testing that they are correctly faulted.
3225
3226 Arguments:
3227 p points to a byte string
3228 utf true in UTF mode
3229 lenptr points to number of bytes in the string (excluding trailing zero)
3230
3231 Returns: 0 on success, with the length updated to the number of 32-bit
3232 data items used (excluding the trailing zero)
3233 OR -1 if a UTF-8 string is malformed
3234 OR -2 if a value > 0x10ffff is encountered in UTF mode
3235 */
3236
3237 static PCRE2_SIZE
to32(uint8_t * p,int utf,PCRE2_SIZE * lenptr)3238 to32(uint8_t *p, int utf, PCRE2_SIZE *lenptr)
3239 {
3240 uint32_t *pp;
3241 PCRE2_SIZE len = *lenptr;
3242
3243 if (pbuffer32_size < 4*len + 4)
3244 {
3245 if (pbuffer32 != NULL) free(pbuffer32);
3246 pbuffer32_size = 4*len + 4;
3247 if (pbuffer32_size < 8192) pbuffer32_size = 8192;
3248 pbuffer32 = (uint32_t *)malloc(pbuffer32_size);
3249 if (pbuffer32 == NULL)
3250 {
3251 fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer32\n",
3252 pbuffer32_size);
3253 exit(1);
3254 }
3255 }
3256
3257 pp = pbuffer32;
3258
3259 if (!utf && (pat_patctl.control & CTL_UTF8_INPUT) == 0)
3260 {
3261 for (; len > 0; len--) *pp++ = *p++;
3262 }
3263
3264 else while (len > 0)
3265 {
3266 int chlen;
3267 uint32_t c;
3268 uint32_t topbit = 0;
3269 if (!utf && *p == 0xff && len > 1)
3270 {
3271 topbit = 0x80000000u;
3272 p++;
3273 len--;
3274 }
3275 chlen = utf82ord(p, &c);
3276 if (chlen <= 0) return -1;
3277 if (utf && c > 0x10ffff) return -2;
3278 p += chlen;
3279 len -= chlen;
3280 *pp++ = c | topbit;
3281 }
3282
3283 *pp = 0;
3284 *lenptr = pp - pbuffer32;
3285 return 0;
3286 }
3287 #endif /* SUPPORT_PCRE2_32 */
3288
3289
3290
3291 /* This function is no longer used. Keep it around for a while, just in case it
3292 needs to be re-instated. */
3293
3294 #ifdef NEVERNEVERNEVER
3295
3296 /*************************************************
3297 * Move back by so many characters *
3298 *************************************************/
3299
3300 /* Given a code unit offset in a subject string, move backwards by a number of
3301 characters, and return the resulting offset.
3302
3303 Arguments:
3304 subject pointer to the string
3305 offset start offset
3306 count count to move back by
3307 utf TRUE if in UTF mode
3308
3309 Returns: a possibly changed offset
3310 */
3311
3312 static PCRE2_SIZE
backchars(uint8_t * subject,PCRE2_SIZE offset,uint32_t count,BOOL utf)3313 backchars(uint8_t *subject, PCRE2_SIZE offset, uint32_t count, BOOL utf)
3314 {
3315 if (!utf || test_mode == PCRE32_MODE)
3316 return (count >= offset)? 0 : (offset - count);
3317
3318 else if (test_mode == PCRE8_MODE)
3319 {
3320 PCRE2_SPTR8 pp = (PCRE2_SPTR8)subject + offset;
3321 for (; count > 0 && pp > (PCRE2_SPTR8)subject; count--)
3322 {
3323 pp--;
3324 while ((*pp & 0xc0) == 0x80) pp--;
3325 }
3326 return pp - (PCRE2_SPTR8)subject;
3327 }
3328
3329 else /* 16-bit mode */
3330 {
3331 PCRE2_SPTR16 pp = (PCRE2_SPTR16)subject + offset;
3332 for (; count > 0 && pp > (PCRE2_SPTR16)subject; count--)
3333 {
3334 pp--;
3335 if ((*pp & 0xfc00) == 0xdc00) pp--;
3336 }
3337 return pp - (PCRE2_SPTR16)subject;
3338 }
3339 }
3340 #endif /* NEVERNEVERNEVER */
3341
3342
3343
3344 /*************************************************
3345 * Expand input buffers *
3346 *************************************************/
3347
3348 /* This function doubles the size of the input buffer and the buffer for
3349 keeping an 8-bit copy of patterns (pbuffer8), and copies the current buffers to
3350 the new ones.
3351
3352 Arguments: none
3353 Returns: nothing (aborts if malloc() fails)
3354 */
3355
3356 static void
expand_input_buffers(void)3357 expand_input_buffers(void)
3358 {
3359 int new_pbuffer8_size = 2*pbuffer8_size;
3360 uint8_t *new_buffer = (uint8_t *)malloc(new_pbuffer8_size);
3361 uint8_t *new_pbuffer8 = (uint8_t *)malloc(new_pbuffer8_size);
3362
3363 if (new_buffer == NULL || new_pbuffer8 == NULL)
3364 {
3365 fprintf(stderr, "pcre2test: malloc(%d) failed\n", new_pbuffer8_size);
3366 exit(1);
3367 }
3368
3369 memcpy(new_buffer, buffer, pbuffer8_size);
3370 memcpy(new_pbuffer8, pbuffer8, pbuffer8_size);
3371
3372 pbuffer8_size = new_pbuffer8_size;
3373
3374 free(buffer);
3375 free(pbuffer8);
3376
3377 buffer = new_buffer;
3378 pbuffer8 = new_pbuffer8;
3379 }
3380
3381
3382
3383 /*************************************************
3384 * Read or extend an input line *
3385 *************************************************/
3386
3387 /* Input lines are read into buffer, but both patterns and data lines can be
3388 continued over multiple input lines. In addition, if the buffer fills up, we
3389 want to automatically expand it so as to be able to handle extremely large
3390 lines that are needed for certain stress tests, although this is less likely
3391 now that there are repetition features for both patterns and data. When the
3392 input buffer is expanded, the other two buffers must also be expanded likewise,
3393 and the contents of pbuffer, which are a copy of the input for callouts, must
3394 be preserved (for when expansion happens for a data line). This is not the most
3395 optimal way of handling this, but hey, this is just a test program!
3396
3397 Arguments:
3398 f the file to read
3399 start where in buffer to start (this *must* be within buffer)
3400 prompt for stdin or readline()
3401
3402 Returns: pointer to the start of new data
3403 could be a copy of start, or could be moved
3404 NULL if no data read and EOF reached
3405 */
3406
3407 static uint8_t *
extend_inputline(FILE * f,uint8_t * start,const char * prompt)3408 extend_inputline(FILE *f, uint8_t *start, const char *prompt)
3409 {
3410 uint8_t *here = start;
3411
3412 for (;;)
3413 {
3414 size_t rlen = (size_t)(pbuffer8_size - (here - buffer));
3415
3416 if (rlen > 1000)
3417 {
3418 size_t dlen;
3419
3420 /* If libreadline or libedit support is required, use readline() to read a
3421 line if the input is a terminal. Note that readline() removes the trailing
3422 newline, so we must put it back again, to be compatible with fgets(). */
3423
3424 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
3425 if (INTERACTIVE(f))
3426 {
3427 size_t len;
3428 char *s = readline(prompt);
3429 if (s == NULL) return (here == start)? NULL : start;
3430 len = strlen(s);
3431 if (len > 0) add_history(s);
3432 if (len > rlen - 1) len = rlen - 1;
3433 memcpy(here, s, len);
3434 here[len] = '\n';
3435 here[len+1] = 0;
3436 free(s);
3437 }
3438 else
3439 #endif
3440
3441 /* Read the next line by normal means, prompting if the file is a tty. */
3442
3443 {
3444 if (INTERACTIVE(f)) printf("%s", prompt);
3445 if (fgets((char *)here, rlen, f) == NULL)
3446 return (here == start)? NULL : start;
3447 }
3448
3449 dlen = strlen((char *)here);
3450 here += dlen;
3451
3452 /* Check for end of line reached. Take care not to read data from before
3453 start (dlen will be zero for a file starting with a binary zero). */
3454
3455 if (here > start && here[-1] == '\n') return start;
3456
3457 /* If we have not read a newline when reading a file, we have either filled
3458 the buffer or reached the end of the file. We can detect the former by
3459 checking that the string fills the buffer, and the latter by feof(). If
3460 neither of these is true, it means we read a binary zero which has caused
3461 strlen() to give a short length. This is a hard error because pcre2test
3462 expects to work with C strings. */
3463
3464 if (!INTERACTIVE(f) && dlen < rlen - 1 && !feof(f))
3465 {
3466 fprintf(outfile, "** Binary zero encountered in input\n");
3467 fprintf(outfile, "** pcre2test run abandoned\n");
3468 exit(1);
3469 }
3470 }
3471
3472 else
3473 {
3474 size_t start_offset = start - buffer;
3475 size_t here_offset = here - buffer;
3476 expand_input_buffers();
3477 start = buffer + start_offset;
3478 here = buffer + here_offset;
3479 }
3480 }
3481
3482 /* Control never gets here */
3483 }
3484
3485
3486
3487 /*************************************************
3488 * Case-independent strncmp() function *
3489 *************************************************/
3490
3491 /*
3492 Arguments:
3493 s first string
3494 t second string
3495 n number of characters to compare
3496
3497 Returns: < 0, = 0, or > 0, according to the comparison
3498 */
3499
3500 static int
strncmpic(const uint8_t * s,const uint8_t * t,int n)3501 strncmpic(const uint8_t *s, const uint8_t *t, int n)
3502 {
3503 while (n--)
3504 {
3505 int c = tolower(*s++) - tolower(*t++);
3506 if (c != 0) return c;
3507 }
3508 return 0;
3509 }
3510
3511
3512
3513 /*************************************************
3514 * Scan the main modifier list *
3515 *************************************************/
3516
3517 /* This function searches the modifier list for a long modifier name.
3518
3519 Argument:
3520 p start of the name
3521 lenp length of the name
3522
3523 Returns: an index in the modifier list, or -1 on failure
3524 */
3525
3526 static int
scan_modifiers(const uint8_t * p,unsigned int len)3527 scan_modifiers(const uint8_t *p, unsigned int len)
3528 {
3529 int bot = 0;
3530 int top = MODLISTCOUNT;
3531
3532 while (top > bot)
3533 {
3534 int mid = (bot + top)/2;
3535 unsigned int mlen = strlen(modlist[mid].name);
3536 int c = strncmp((char *)p, modlist[mid].name, (len < mlen)? len : mlen);
3537 if (c == 0)
3538 {
3539 if (len == mlen) return mid;
3540 c = (int)len - (int)mlen;
3541 }
3542 if (c > 0) bot = mid + 1; else top = mid;
3543 }
3544
3545 return -1;
3546
3547 }
3548
3549
3550
3551 /*************************************************
3552 * Check a modifer and find its field *
3553 *************************************************/
3554
3555 /* This function is called when a modifier has been identified. We check that
3556 it is allowed here and find the field that is to be changed.
3557
3558 Arguments:
3559 m the modifier list entry
3560 ctx CTX_PAT => pattern context
3561 CTX_POPPAT => pattern context for popped pattern
3562 CTX_DEFPAT => default pattern context
3563 CTX_DAT => data context
3564 CTX_DEFDAT => default data context
3565 pctl point to pattern control block
3566 dctl point to data control block
3567 c a single character or 0
3568
3569 Returns: a field pointer or NULL
3570 */
3571
3572 static void *
check_modifier(modstruct * m,int ctx,patctl * pctl,datctl * dctl,uint32_t c)3573 check_modifier(modstruct *m, int ctx, patctl *pctl, datctl *dctl, uint32_t c)
3574 {
3575 void *field = NULL;
3576 PCRE2_SIZE offset = m->offset;
3577
3578 if (restrict_for_perl_test) switch(m->which)
3579 {
3580 case MOD_PNDP:
3581 case MOD_PATP:
3582 case MOD_PDP:
3583 break;
3584
3585 default:
3586 fprintf(outfile, "** '%s' is not allowed in a Perl-compatible test\n",
3587 m->name);
3588 return NULL;
3589 }
3590
3591 switch (m->which)
3592 {
3593 case MOD_CTC: /* Compile context modifier */
3594 if (ctx == CTX_DEFPAT) field = PTR(default_pat_context);
3595 else if (ctx == CTX_PAT) field = PTR(pat_context);
3596 break;
3597
3598 case MOD_CTM: /* Match context modifier */
3599 if (ctx == CTX_DEFDAT) field = PTR(default_dat_context);
3600 else if (ctx == CTX_DAT) field = PTR(dat_context);
3601 break;
3602
3603 case MOD_DAT: /* Data line modifier */
3604 if (dctl != NULL) field = dctl;
3605 break;
3606
3607 case MOD_PAT: /* Pattern modifier */
3608 case MOD_PATP: /* Allowed for Perl test */
3609 if (pctl != NULL) field = pctl;
3610 break;
3611
3612 case MOD_PD: /* Pattern or data line modifier */
3613 case MOD_PDP: /* Ditto, allowed for Perl test */
3614 case MOD_PND: /* Ditto, but not default pattern */
3615 case MOD_PNDP: /* Ditto, allowed for Perl test */
3616 if (dctl != NULL) field = dctl;
3617 else if (pctl != NULL && (m->which == MOD_PD || m->which == MOD_PDP ||
3618 ctx != CTX_DEFPAT))
3619 field = pctl;
3620 break;
3621 }
3622
3623 if (field == NULL)
3624 {
3625 if (c == 0)
3626 fprintf(outfile, "** '%s' is not valid here\n", m->name);
3627 else
3628 fprintf(outfile, "** /%c is not valid here\n", c);
3629 return NULL;
3630 }
3631
3632 return (char *)field + offset;
3633 }
3634
3635
3636
3637 /*************************************************
3638 * Decode a modifier list *
3639 *************************************************/
3640
3641 /* A pointer to a control block is NULL when called in cases when that block is
3642 not relevant. They are never all relevant in one call. At least one of patctl
3643 and datctl is NULL. The second argument specifies which context to use for
3644 modifiers that apply to contexts.
3645
3646 Arguments:
3647 p point to modifier string
3648 ctx CTX_PAT => pattern context
3649 CTX_POPPAT => pattern context for popped pattern
3650 CTX_DEFPAT => default pattern context
3651 CTX_DAT => data context
3652 CTX_DEFDAT => default data context
3653 pctl point to pattern control block
3654 dctl point to data control block
3655
3656 Returns: TRUE if successful decode, FALSE otherwise
3657 */
3658
3659 static BOOL
decode_modifiers(uint8_t * p,int ctx,patctl * pctl,datctl * dctl)3660 decode_modifiers(uint8_t *p, int ctx, patctl *pctl, datctl *dctl)
3661 {
3662 uint8_t *ep, *pp;
3663 long li;
3664 unsigned long uli;
3665 BOOL first = TRUE;
3666
3667 for (;;)
3668 {
3669 void *field;
3670 modstruct *m;
3671 BOOL off = FALSE;
3672 unsigned int i, len;
3673 int index;
3674 char *endptr;
3675
3676 /* Skip white space and commas. */
3677
3678 while (isspace(*p) || *p == ',') p++;
3679 if (*p == 0) break;
3680
3681 /* Find the end of the item; lose trailing whitespace at end of line. */
3682
3683 for (ep = p; *ep != 0 && *ep != ','; ep++);
3684 if (*ep == 0)
3685 {
3686 while (ep > p && isspace(ep[-1])) ep--;
3687 *ep = 0;
3688 }
3689
3690 /* Remember if the first character is '-'. */
3691
3692 if (*p == '-')
3693 {
3694 off = TRUE;
3695 p++;
3696 }
3697
3698 /* Find the length of a full-length modifier name, and scan for it. */
3699
3700 pp = p;
3701 while (pp < ep && *pp != '=') pp++;
3702 index = scan_modifiers(p, pp - p);
3703
3704 /* If the first modifier is unrecognized, try to interpret it as a sequence
3705 of single-character abbreviated modifiers. None of these modifiers have any
3706 associated data. They just set options or control bits. */
3707
3708 if (index < 0)
3709 {
3710 uint32_t cc;
3711 uint8_t *mp = p;
3712
3713 if (!first)
3714 {
3715 fprintf(outfile, "** Unrecognized modifier '%.*s'\n", (int)(ep-p), p);
3716 if (ep - p == 1)
3717 fprintf(outfile, "** Single-character modifiers must come first\n");
3718 return FALSE;
3719 }
3720
3721 for (cc = *p; cc != ',' && cc != '\n' && cc != 0; cc = *(++p))
3722 {
3723 for (i = 0; i < C1MODLISTCOUNT; i++)
3724 if (cc == c1modlist[i].onechar) break;
3725
3726 if (i >= C1MODLISTCOUNT)
3727 {
3728 fprintf(outfile, "** Unrecognized modifier '%c' in '%.*s'\n",
3729 *p, (int)(ep-mp), mp);
3730 return FALSE;
3731 }
3732
3733 if (c1modlist[i].index >= 0)
3734 {
3735 index = c1modlist[i].index;
3736 }
3737
3738 else
3739 {
3740 index = scan_modifiers((uint8_t *)(c1modlist[i].fullname),
3741 strlen(c1modlist[i].fullname));
3742 if (index < 0)
3743 {
3744 fprintf(outfile, "** Internal error: single-character equivalent "
3745 "modifier '%s' not found\n", c1modlist[i].fullname);
3746 return FALSE;
3747 }
3748 c1modlist[i].index = index; /* Cache for next time */
3749 }
3750
3751 field = check_modifier(modlist + index, ctx, pctl, dctl, *p);
3752 if (field == NULL) return FALSE;
3753
3754 /* /x is a special case; a second appearance changes PCRE2_EXTENDED to
3755 PCRE2_EXTENDED_MORE. */
3756
3757 if (cc == 'x' && (*((uint32_t *)field) & PCRE2_EXTENDED) != 0)
3758 {
3759 *((uint32_t *)field) &= ~PCRE2_EXTENDED;
3760 *((uint32_t *)field) |= PCRE2_EXTENDED_MORE;
3761 }
3762 else
3763 *((uint32_t *)field) |= modlist[index].value;
3764 }
3765
3766 continue; /* With tne next (fullname) modifier */
3767 }
3768
3769 /* We have a match on a full-name modifier. Check for the existence of data
3770 when needed. */
3771
3772 m = modlist + index; /* Save typing */
3773 if (m->type != MOD_CTL && m->type != MOD_OPT &&
3774 (m->type != MOD_IND || *pp == '='))
3775 {
3776 if (*pp++ != '=')
3777 {
3778 fprintf(outfile, "** '=' expected after '%s'\n", m->name);
3779 return FALSE;
3780 }
3781 if (off)
3782 {
3783 fprintf(outfile, "** '-' is not valid for '%s'\n", m->name);
3784 return FALSE;
3785 }
3786 }
3787
3788 /* These on/off types have no data. */
3789
3790 else if (*pp != ',' && *pp != '\n' && *pp != ' ' && *pp != 0)
3791 {
3792 fprintf(outfile, "** Unrecognized modifier '%.*s'\n", (int)(ep-p), p);
3793 return FALSE;
3794 }
3795
3796 /* Set the data length for those types that have data. Then find the field
3797 that is to be set. If check_modifier() returns NULL, it has already output an
3798 error message. */
3799
3800 len = ep - pp;
3801 field = check_modifier(m, ctx, pctl, dctl, 0);
3802 if (field == NULL) return FALSE;
3803
3804 /* Process according to data type. */
3805
3806 switch (m->type)
3807 {
3808 case MOD_CTL:
3809 case MOD_OPT:
3810 if (off) *((uint32_t *)field) &= ~m->value;
3811 else *((uint32_t *)field) |= m->value;
3812 break;
3813
3814 case MOD_BSR:
3815 if (len == 7 && strncmpic(pp, (const uint8_t *)"default", 7) == 0)
3816 {
3817 #ifdef BSR_ANYCRLF
3818 *((uint16_t *)field) = PCRE2_BSR_ANYCRLF;
3819 #else
3820 *((uint16_t *)field) = PCRE2_BSR_UNICODE;
3821 #endif
3822 if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 &= ~CTL2_BSR_SET;
3823 else dctl->control2 &= ~CTL2_BSR_SET;
3824 }
3825 else
3826 {
3827 if (len == 7 && strncmpic(pp, (const uint8_t *)"anycrlf", 7) == 0)
3828 *((uint16_t *)field) = PCRE2_BSR_ANYCRLF;
3829 else if (len == 7 && strncmpic(pp, (const uint8_t *)"unicode", 7) == 0)
3830 *((uint16_t *)field) = PCRE2_BSR_UNICODE;
3831 else goto INVALID_VALUE;
3832 if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 |= CTL2_BSR_SET;
3833 else dctl->control2 |= CTL2_BSR_SET;
3834 }
3835 pp = ep;
3836 break;
3837
3838 case MOD_CHR: /* A single character */
3839 *((uint32_t *)field) = *pp++;
3840 break;
3841
3842 case MOD_CON: /* A convert type/options list */
3843 for (;; pp++)
3844 {
3845 uint8_t *colon = (uint8_t *)strchr((const char *)pp, ':');
3846 len = ((colon != NULL && colon < ep)? colon:ep) - pp;
3847 for (i = 0; i < convertlistcount; i++)
3848 {
3849 if (strncmpic(pp, (const uint8_t *)convertlist[i].name, len) == 0)
3850 {
3851 if (*((uint32_t *)field) == CONVERT_UNSET)
3852 *((uint32_t *)field) = convertlist[i].option;
3853 else
3854 *((uint32_t *)field) |= convertlist[i].option;
3855 break;
3856 }
3857 }
3858 if (i >= convertlistcount) goto INVALID_VALUE;
3859 pp += len;
3860 if (*pp != ':') break;
3861 }
3862 break;
3863
3864 case MOD_IN2: /* One or two unsigned integers */
3865 if (!isdigit(*pp)) goto INVALID_VALUE;
3866 uli = strtoul((const char *)pp, &endptr, 10);
3867 if (U32OVERFLOW(uli)) goto INVALID_VALUE;
3868 ((uint32_t *)field)[0] = (uint32_t)uli;
3869 if (*endptr == ':')
3870 {
3871 uli = strtoul((const char *)endptr+1, &endptr, 10);
3872 if (U32OVERFLOW(uli)) goto INVALID_VALUE;
3873 ((uint32_t *)field)[1] = (uint32_t)uli;
3874 }
3875 else ((uint32_t *)field)[1] = 0;
3876 pp = (uint8_t *)endptr;
3877 break;
3878
3879 /* PCRE2_SIZE_MAX is usually SIZE_MAX, which may be greater, equal to, or
3880 less than ULONG_MAX. So first test for overflowing the long int, and then
3881 test for overflowing PCRE2_SIZE_MAX if it is smaller than ULONG_MAX. */
3882
3883 case MOD_SIZ: /* PCRE2_SIZE value */
3884 if (!isdigit(*pp)) goto INVALID_VALUE;
3885 uli = strtoul((const char *)pp, &endptr, 10);
3886 if (uli == ULONG_MAX) goto INVALID_VALUE;
3887 #if ULONG_MAX > PCRE2_SIZE_MAX
3888 if (uli > PCRE2_SIZE_MAX) goto INVALID_VALUE;
3889 #endif
3890 *((PCRE2_SIZE *)field) = (PCRE2_SIZE)uli;
3891 pp = (uint8_t *)endptr;
3892 break;
3893
3894 case MOD_IND: /* Unsigned integer with default */
3895 if (len == 0)
3896 {
3897 *((uint32_t *)field) = (uint32_t)(m->value);
3898 break;
3899 }
3900 /* Fall through */
3901
3902 case MOD_INT: /* Unsigned integer */
3903 if (!isdigit(*pp)) goto INVALID_VALUE;
3904 uli = strtoul((const char *)pp, &endptr, 10);
3905 if (U32OVERFLOW(uli)) goto INVALID_VALUE;
3906 *((uint32_t *)field) = (uint32_t)uli;
3907 pp = (uint8_t *)endptr;
3908 break;
3909
3910 case MOD_INS: /* Signed integer */
3911 if (!isdigit(*pp) && *pp != '-') goto INVALID_VALUE;
3912 li = strtol((const char *)pp, &endptr, 10);
3913 if (S32OVERFLOW(li)) goto INVALID_VALUE;
3914 *((int32_t *)field) = (int32_t)li;
3915 pp = (uint8_t *)endptr;
3916 break;
3917
3918 case MOD_NL:
3919 for (i = 0; i < sizeof(newlines)/sizeof(char *); i++)
3920 if (len == strlen(newlines[i]) &&
3921 strncmpic(pp, (const uint8_t *)newlines[i], len) == 0) break;
3922 if (i >= sizeof(newlines)/sizeof(char *)) goto INVALID_VALUE;
3923 if (i == 0)
3924 {
3925 *((uint16_t *)field) = NEWLINE_DEFAULT;
3926 if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 &= ~CTL2_NL_SET;
3927 else dctl->control2 &= ~CTL2_NL_SET;
3928 }
3929 else
3930 {
3931 *((uint16_t *)field) = i;
3932 if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 |= CTL2_NL_SET;
3933 else dctl->control2 |= CTL2_NL_SET;
3934 }
3935 pp = ep;
3936 break;
3937
3938 case MOD_NN: /* Name or (signed) number; may be several */
3939 if (isdigit(*pp) || *pp == '-')
3940 {
3941 int ct = MAXCPYGET - 1;
3942 int32_t value;
3943 li = strtol((const char *)pp, &endptr, 10);
3944 if (S32OVERFLOW(li)) goto INVALID_VALUE;
3945 value = (int32_t)li;
3946 field = (char *)field - m->offset + m->value; /* Adjust field ptr */
3947 if (value >= 0) /* Add new number */
3948 {
3949 while (*((int32_t *)field) >= 0 && ct-- > 0) /* Skip previous */
3950 field = (char *)field + sizeof(int32_t);
3951 if (ct <= 0)
3952 {
3953 fprintf(outfile, "** Too many numeric '%s' modifiers\n", m->name);
3954 return FALSE;
3955 }
3956 }
3957 *((int32_t *)field) = value;
3958 if (ct > 0) ((int32_t *)field)[1] = -1;
3959 pp = (uint8_t *)endptr;
3960 }
3961
3962 /* Multiple strings are put end to end. */
3963
3964 else
3965 {
3966 char *nn = (char *)field;
3967 if (len > 0) /* Add new name */
3968 {
3969 if (len > MAX_NAME_SIZE)
3970 {
3971 fprintf(outfile, "** Group name in '%s' is too long\n", m->name);
3972 return FALSE;
3973 }
3974 while (*nn != 0) nn += strlen(nn) + 1;
3975 if (nn + len + 2 - (char *)field > LENCPYGET)
3976 {
3977 fprintf(outfile, "** Too many characters in named '%s' modifiers\n",
3978 m->name);
3979 return FALSE;
3980 }
3981 memcpy(nn, pp, len);
3982 }
3983 nn[len] = 0 ;
3984 nn[len+1] = 0;
3985 pp = ep;
3986 }
3987 break;
3988
3989 case MOD_STR:
3990 if (len + 1 > m->value)
3991 {
3992 fprintf(outfile, "** Overlong value for '%s' (max %d code units)\n",
3993 m->name, m->value - 1);
3994 return FALSE;
3995 }
3996 memcpy(field, pp, len);
3997 ((uint8_t *)field)[len] = 0;
3998 pp = ep;
3999 break;
4000 }
4001
4002 if (*pp != ',' && *pp != '\n' && *pp != ' ' && *pp != 0)
4003 {
4004 fprintf(outfile, "** Comma expected after modifier item '%s'\n", m->name);
4005 return FALSE;
4006 }
4007
4008 p = pp;
4009 first = FALSE;
4010
4011 if (ctx == CTX_POPPAT &&
4012 (pctl->options != 0 ||
4013 pctl->tables_id != 0 ||
4014 pctl->locale[0] != 0 ||
4015 (pctl->control & NOTPOP_CONTROLS) != 0))
4016 {
4017 fprintf(outfile, "** '%s' is not valid here\n", m->name);
4018 return FALSE;
4019 }
4020 }
4021
4022 return TRUE;
4023
4024 INVALID_VALUE:
4025 fprintf(outfile, "** Invalid value in '%.*s'\n", (int)(ep-p), p);
4026 return FALSE;
4027 }
4028
4029
4030 /*************************************************
4031 * Get info from a pattern *
4032 *************************************************/
4033
4034 /* A wrapped call to pcre2_pattern_info(), applied to the current compiled
4035 pattern.
4036
4037 Arguments:
4038 what code for the required information
4039 where where to put the answer
4040 unsetok PCRE2_ERROR_UNSET is an "expected" result
4041
4042 Returns: the return from pcre2_pattern_info()
4043 */
4044
4045 static int
pattern_info(int what,void * where,BOOL unsetok)4046 pattern_info(int what, void *where, BOOL unsetok)
4047 {
4048 int rc;
4049 PCRE2_PATTERN_INFO(rc, compiled_code, what, NULL); /* Exercise the code */
4050 PCRE2_PATTERN_INFO(rc, compiled_code, what, where);
4051 if (rc >= 0) return 0;
4052 if (rc != PCRE2_ERROR_UNSET || !unsetok)
4053 {
4054 fprintf(outfile, "Error %d from pcre2_pattern_info_%d(%d)\n", rc, test_mode,
4055 what);
4056 if (rc == PCRE2_ERROR_BADMODE)
4057 fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
4058 "%d-bit mode\n", test_mode,
4059 8 * (FLD(compiled_code, flags) & PCRE2_MODE_MASK));
4060 }
4061 return rc;
4062 }
4063
4064
4065
4066 #ifdef SUPPORT_PCRE2_8
4067 /*************************************************
4068 * Show something in a list *
4069 *************************************************/
4070
4071 /* This function just helps to keep the code that uses it tidier. It's used for
4072 various lists of things where there needs to be introductory text before the
4073 first item. As these calls are all in the POSIX-support code, they happen only
4074 when 8-bit mode is supported. */
4075
4076 static void
prmsg(const char ** msg,const char * s)4077 prmsg(const char **msg, const char *s)
4078 {
4079 fprintf(outfile, "%s %s", *msg, s);
4080 *msg = "";
4081 }
4082 #endif /* SUPPORT_PCRE2_8 */
4083
4084
4085
4086 /*************************************************
4087 * Show control bits *
4088 *************************************************/
4089
4090 /* Called for mutually exclusive controls and for unsupported POSIX controls.
4091 Because the bits are unique, this can be used for both pattern and data control
4092 words.
4093
4094 Arguments:
4095 controls control bits
4096 controls2 more control bits
4097 before text to print before
4098
4099 Returns: nothing
4100 */
4101
4102 static void
show_controls(uint32_t controls,uint32_t controls2,const char * before)4103 show_controls(uint32_t controls, uint32_t controls2, const char *before)
4104 {
4105 fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
4106 before,
4107 ((controls & CTL_AFTERTEXT) != 0)? " aftertext" : "",
4108 ((controls & CTL_ALLAFTERTEXT) != 0)? " allaftertext" : "",
4109 ((controls & CTL_ALLCAPTURES) != 0)? " allcaptures" : "",
4110 ((controls & CTL_ALLUSEDTEXT) != 0)? " allusedtext" : "",
4111 ((controls2 & CTL2_ALLVECTOR) != 0)? " allvector" : "",
4112 ((controls & CTL_ALTGLOBAL) != 0)? " altglobal" : "",
4113 ((controls & CTL_BINCODE) != 0)? " bincode" : "",
4114 ((controls2 & CTL2_BSR_SET) != 0)? " bsr" : "",
4115 ((controls & CTL_CALLOUT_CAPTURE) != 0)? " callout_capture" : "",
4116 ((controls2 & CTL2_CALLOUT_EXTRA) != 0)? " callout_extra" : "",
4117 ((controls & CTL_CALLOUT_INFO) != 0)? " callout_info" : "",
4118 ((controls & CTL_CALLOUT_NONE) != 0)? " callout_none" : "",
4119 ((controls2 & CTL2_CALLOUT_NO_WHERE) != 0)? " callout_no_where" : "",
4120 ((controls & CTL_DFA) != 0)? " dfa" : "",
4121 ((controls & CTL_EXPAND) != 0)? " expand" : "",
4122 ((controls & CTL_FINDLIMITS) != 0)? " find_limits" : "",
4123 ((controls & CTL_FRAMESIZE) != 0)? " framesize" : "",
4124 ((controls & CTL_FULLBINCODE) != 0)? " fullbincode" : "",
4125 ((controls & CTL_GETALL) != 0)? " getall" : "",
4126 ((controls & CTL_GLOBAL) != 0)? " global" : "",
4127 ((controls & CTL_HEXPAT) != 0)? " hex" : "",
4128 ((controls & CTL_INFO) != 0)? " info" : "",
4129 ((controls & CTL_JITFAST) != 0)? " jitfast" : "",
4130 ((controls & CTL_JITVERIFY) != 0)? " jitverify" : "",
4131 ((controls & CTL_MARK) != 0)? " mark" : "",
4132 ((controls & CTL_MEMORY) != 0)? " memory" : "",
4133 ((controls2 & CTL2_NL_SET) != 0)? " newline" : "",
4134 ((controls & CTL_NULLCONTEXT) != 0)? " null_context" : "",
4135 ((controls & CTL_POSIX) != 0)? " posix" : "",
4136 ((controls & CTL_POSIX_NOSUB) != 0)? " posix_nosub" : "",
4137 ((controls & CTL_PUSH) != 0)? " push" : "",
4138 ((controls & CTL_PUSHCOPY) != 0)? " pushcopy" : "",
4139 ((controls & CTL_PUSHTABLESCOPY) != 0)? " pushtablescopy" : "",
4140 ((controls & CTL_STARTCHAR) != 0)? " startchar" : "",
4141 ((controls2 & CTL2_SUBSTITUTE_CALLOUT) != 0)? " substitute_callout" : "",
4142 ((controls2 & CTL2_SUBSTITUTE_EXTENDED) != 0)? " substitute_extended" : "",
4143 ((controls2 & CTL2_SUBSTITUTE_LITERAL) != 0)? " substitute_literal" : "",
4144 ((controls2 & CTL2_SUBSTITUTE_MATCHED) != 0)? " substitute_matched" : "",
4145 ((controls2 & CTL2_SUBSTITUTE_OVERFLOW_LENGTH) != 0)? " substitute_overflow_length" : "",
4146 ((controls2 & CTL2_SUBSTITUTE_REPLACEMENT_ONLY) != 0)? " substitute_replacement_only" : "",
4147 ((controls2 & CTL2_SUBSTITUTE_UNKNOWN_UNSET) != 0)? " substitute_unknown_unset" : "",
4148 ((controls2 & CTL2_SUBSTITUTE_UNSET_EMPTY) != 0)? " substitute_unset_empty" : "",
4149 ((controls & CTL_USE_LENGTH) != 0)? " use_length" : "",
4150 ((controls & CTL_UTF8_INPUT) != 0)? " utf8_input" : "",
4151 ((controls & CTL_ZERO_TERMINATE) != 0)? " zero_terminate" : "");
4152 }
4153
4154
4155
4156 /*************************************************
4157 * Show compile options *
4158 *************************************************/
4159
4160 /* Called from show_pattern_info() and for unsupported POSIX options.
4161
4162 Arguments:
4163 options an options word
4164 before text to print before
4165 after text to print after
4166
4167 Returns: nothing
4168 */
4169
4170 static void
show_compile_options(uint32_t options,const char * before,const char * after)4171 show_compile_options(uint32_t options, const char *before, const char *after)
4172 {
4173 if (options == 0) fprintf(outfile, "%s <none>%s", before, after);
4174 else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
4175 before,
4176 ((options & PCRE2_ALT_BSUX) != 0)? " alt_bsux" : "",
4177 ((options & PCRE2_ALT_CIRCUMFLEX) != 0)? " alt_circumflex" : "",
4178 ((options & PCRE2_ALT_VERBNAMES) != 0)? " alt_verbnames" : "",
4179 ((options & PCRE2_ALLOW_EMPTY_CLASS) != 0)? " allow_empty_class" : "",
4180 ((options & PCRE2_ANCHORED) != 0)? " anchored" : "",
4181 ((options & PCRE2_AUTO_CALLOUT) != 0)? " auto_callout" : "",
4182 ((options & PCRE2_CASELESS) != 0)? " caseless" : "",
4183 ((options & PCRE2_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
4184 ((options & PCRE2_DOTALL) != 0)? " dotall" : "",
4185 ((options & PCRE2_DUPNAMES) != 0)? " dupnames" : "",
4186 ((options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "",
4187 ((options & PCRE2_EXTENDED) != 0)? " extended" : "",
4188 ((options & PCRE2_EXTENDED_MORE) != 0)? " extended_more" : "",
4189 ((options & PCRE2_FIRSTLINE) != 0)? " firstline" : "",
4190 ((options & PCRE2_LITERAL) != 0)? " literal" : "",
4191 ((options & PCRE2_MATCH_INVALID_UTF) != 0)? " match_invalid_utf" : "",
4192 ((options & PCRE2_MATCH_UNSET_BACKREF) != 0)? " match_unset_backref" : "",
4193 ((options & PCRE2_MULTILINE) != 0)? " multiline" : "",
4194 ((options & PCRE2_NEVER_BACKSLASH_C) != 0)? " never_backslash_c" : "",
4195 ((options & PCRE2_NEVER_UCP) != 0)? " never_ucp" : "",
4196 ((options & PCRE2_NEVER_UTF) != 0)? " never_utf" : "",
4197 ((options & PCRE2_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
4198 ((options & PCRE2_NO_AUTO_POSSESS) != 0)? " no_auto_possess" : "",
4199 ((options & PCRE2_NO_DOTSTAR_ANCHOR) != 0)? " no_dotstar_anchor" : "",
4200 ((options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
4201 ((options & PCRE2_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
4202 ((options & PCRE2_UCP) != 0)? " ucp" : "",
4203 ((options & PCRE2_UNGREEDY) != 0)? " ungreedy" : "",
4204 ((options & PCRE2_USE_OFFSET_LIMIT) != 0)? " use_offset_limit" : "",
4205 ((options & PCRE2_UTF) != 0)? " utf" : "",
4206 after);
4207 }
4208
4209
4210 /*************************************************
4211 * Show compile extra options *
4212 *************************************************/
4213
4214 /* Called from show_pattern_info() and for unsupported POSIX options.
4215
4216 Arguments:
4217 options an options word
4218 before text to print before
4219 after text to print after
4220
4221 Returns: nothing
4222 */
4223
4224 static void
show_compile_extra_options(uint32_t options,const char * before,const char * after)4225 show_compile_extra_options(uint32_t options, const char *before,
4226 const char *after)
4227 {
4228 if (options == 0) fprintf(outfile, "%s <none>%s", before, after);
4229 else fprintf(outfile, "%s%s%s%s%s%s%s%s",
4230 before,
4231 ((options & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) != 0)? " allow_surrogate_escapes" : "",
4232 ((options & PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL) != 0)? " bad_escape_is_literal" : "",
4233 ((options & PCRE2_EXTRA_ALT_BSUX) != 0)? " extra_alt_bsux" : "",
4234 ((options & PCRE2_EXTRA_MATCH_WORD) != 0)? " match_word" : "",
4235 ((options & PCRE2_EXTRA_MATCH_LINE) != 0)? " match_line" : "",
4236 ((options & PCRE2_EXTRA_ESCAPED_CR_IS_LF) != 0)? " escaped_cr_is_lf" : "",
4237 after);
4238 }
4239
4240
4241
4242 #ifdef SUPPORT_PCRE2_8
4243 /*************************************************
4244 * Show match options *
4245 *************************************************/
4246
4247 /* Called for unsupported POSIX options. */
4248
4249 static void
show_match_options(uint32_t options)4250 show_match_options(uint32_t options)
4251 {
4252 fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s",
4253 ((options & PCRE2_ANCHORED) != 0)? " anchored" : "",
4254 ((options & PCRE2_COPY_MATCHED_SUBJECT) != 0)? " copy_matched_subject" : "",
4255 ((options & PCRE2_DFA_RESTART) != 0)? " dfa_restart" : "",
4256 ((options & PCRE2_DFA_SHORTEST) != 0)? " dfa_shortest" : "",
4257 ((options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "",
4258 ((options & PCRE2_NO_JIT) != 0)? " no_jit" : "",
4259 ((options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
4260 ((options & PCRE2_NOTBOL) != 0)? " notbol" : "",
4261 ((options & PCRE2_NOTEMPTY) != 0)? " notempty" : "",
4262 ((options & PCRE2_NOTEMPTY_ATSTART) != 0)? " notempty_atstart" : "",
4263 ((options & PCRE2_NOTEOL) != 0)? " noteol" : "",
4264 ((options & PCRE2_PARTIAL_HARD) != 0)? " partial_hard" : "",
4265 ((options & PCRE2_PARTIAL_SOFT) != 0)? " partial_soft" : "");
4266 }
4267 #endif /* SUPPORT_PCRE2_8 */
4268
4269
4270
4271 /*************************************************
4272 * Show memory usage info for a pattern *
4273 *************************************************/
4274
4275 static void
show_memory_info(void)4276 show_memory_info(void)
4277 {
4278 uint32_t name_count, name_entry_size;
4279 size_t size, cblock_size;
4280
4281 /* One of the test_mode values will always be true, but to stop a compiler
4282 warning we must initialize cblock_size. */
4283
4284 cblock_size = 0;
4285 #ifdef SUPPORT_PCRE2_8
4286 if (test_mode == PCRE8_MODE) cblock_size = sizeof(pcre2_real_code_8);
4287 #endif
4288 #ifdef SUPPORT_PCRE2_16
4289 if (test_mode == PCRE16_MODE) cblock_size = sizeof(pcre2_real_code_16);
4290 #endif
4291 #ifdef SUPPORT_PCRE2_32
4292 if (test_mode == PCRE32_MODE) cblock_size = sizeof(pcre2_real_code_32);
4293 #endif
4294
4295 (void)pattern_info(PCRE2_INFO_SIZE, &size, FALSE);
4296 (void)pattern_info(PCRE2_INFO_NAMECOUNT, &name_count, FALSE);
4297 (void)pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &name_entry_size, FALSE);
4298 fprintf(outfile, "Memory allocation (code space): %d\n",
4299 (int)(size - name_count*name_entry_size*code_unit_size - cblock_size));
4300 if (pat_patctl.jit != 0)
4301 {
4302 (void)pattern_info(PCRE2_INFO_JITSIZE, &size, FALSE);
4303 fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)size);
4304 }
4305 }
4306
4307
4308
4309 /*************************************************
4310 * Show frame size info for a pattern *
4311 *************************************************/
4312
4313 static void
show_framesize(void)4314 show_framesize(void)
4315 {
4316 size_t frame_size;
4317 (void)pattern_info(PCRE2_INFO_FRAMESIZE, &frame_size, FALSE);
4318 fprintf(outfile, "Frame size for pcre2_match(): %d\n", (int)frame_size);
4319 }
4320
4321
4322
4323 /*************************************************
4324 * Get and output an error message *
4325 *************************************************/
4326
4327 static BOOL
print_error_message(int errorcode,const char * before,const char * after)4328 print_error_message(int errorcode, const char *before, const char *after)
4329 {
4330 int len;
4331 PCRE2_GET_ERROR_MESSAGE(len, errorcode, pbuffer);
4332 if (len < 0)
4333 {
4334 fprintf(outfile, "\n** pcre2test internal error: cannot interpret error "
4335 "number\n** Unexpected return (%d) from pcre2_get_error_message()\n", len);
4336 }
4337 else
4338 {
4339 fprintf(outfile, "%s", before);
4340 PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, outfile);
4341 fprintf(outfile, "%s", after);
4342 }
4343 return len >= 0;
4344 }
4345
4346
4347 /*************************************************
4348 * Callback function for callout enumeration *
4349 *************************************************/
4350
4351 /* The only differences in the callout emumeration block for different code
4352 unit widths are that the pointers to the subject, the most recent MARK, and a
4353 callout argument string point to strings of the appropriate width. Casts can be
4354 used to deal with this.
4355
4356 Argument:
4357 cb pointer to enumerate block
4358 callout_data user data
4359
4360 Returns: 0
4361 */
4362
callout_callback(pcre2_callout_enumerate_block_8 * cb,void * callout_data)4363 static int callout_callback(pcre2_callout_enumerate_block_8 *cb,
4364 void *callout_data)
4365 {
4366 uint32_t i;
4367 BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
4368
4369 (void)callout_data; /* Not currently displayed */
4370
4371 fprintf(outfile, "Callout ");
4372 if (cb->callout_string != NULL)
4373 {
4374 uint32_t delimiter = CODE_UNIT(cb->callout_string, -1);
4375 fprintf(outfile, "%c", delimiter);
4376 PCHARSV(cb->callout_string, 0,
4377 cb->callout_string_length, utf, outfile);
4378 for (i = 0; callout_start_delims[i] != 0; i++)
4379 if (delimiter == callout_start_delims[i])
4380 {
4381 delimiter = callout_end_delims[i];
4382 break;
4383 }
4384 fprintf(outfile, "%c ", delimiter);
4385 }
4386 else fprintf(outfile, "%d ", cb->callout_number);
4387
4388 fprintf(outfile, "%.*s\n",
4389 (int)((cb->next_item_length == 0)? 1 : cb->next_item_length),
4390 pbuffer8 + cb->pattern_position);
4391
4392 return 0;
4393 }
4394
4395
4396
4397 /*************************************************
4398 * Show information about a pattern *
4399 *************************************************/
4400
4401 /* This function is called after a pattern has been compiled if any of the
4402 information-requesting controls have been set.
4403
4404 Arguments: none
4405
4406 Returns: PR_OK continue processing next line
4407 PR_SKIP skip to a blank line
4408 PR_ABEND abort the pcre2test run
4409 */
4410
4411 static int
show_pattern_info(void)4412 show_pattern_info(void)
4413 {
4414 uint32_t compile_options, overall_options, extra_options;
4415 BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
4416
4417 if ((pat_patctl.control & (CTL_BINCODE|CTL_FULLBINCODE)) != 0)
4418 {
4419 fprintf(outfile, "------------------------------------------------------------------\n");
4420 PCRE2_PRINTINT((pat_patctl.control & CTL_FULLBINCODE) != 0);
4421 }
4422
4423 if ((pat_patctl.control & CTL_INFO) != 0)
4424 {
4425 int rc;
4426 void *nametable;
4427 uint8_t *start_bits;
4428 BOOL heap_limit_set, match_limit_set, depth_limit_set;
4429 uint32_t backrefmax, bsr_convention, capture_count, first_ctype, first_cunit,
4430 hasbackslashc, hascrorlf, jchanged, last_ctype, last_cunit, match_empty,
4431 depth_limit, heap_limit, match_limit, minlength, nameentrysize, namecount,
4432 newline_convention;
4433
4434 /* Exercise the error route. */
4435
4436 PCRE2_PATTERN_INFO(rc, compiled_code, 999, NULL);
4437 (void)rc;
4438
4439 /* These info requests may return PCRE2_ERROR_UNSET. */
4440
4441 switch(pattern_info(PCRE2_INFO_HEAPLIMIT, &heap_limit, TRUE))
4442 {
4443 case 0:
4444 heap_limit_set = TRUE;
4445 break;
4446
4447 case PCRE2_ERROR_UNSET:
4448 heap_limit_set = FALSE;
4449 break;
4450
4451 default:
4452 return PR_ABEND;
4453 }
4454
4455 switch(pattern_info(PCRE2_INFO_MATCHLIMIT, &match_limit, TRUE))
4456 {
4457 case 0:
4458 match_limit_set = TRUE;
4459 break;
4460
4461 case PCRE2_ERROR_UNSET:
4462 match_limit_set = FALSE;
4463 break;
4464
4465 default:
4466 return PR_ABEND;
4467 }
4468
4469 switch(pattern_info(PCRE2_INFO_DEPTHLIMIT, &depth_limit, TRUE))
4470 {
4471 case 0:
4472 depth_limit_set = TRUE;
4473 break;
4474
4475 case PCRE2_ERROR_UNSET:
4476 depth_limit_set = FALSE;
4477 break;
4478
4479 default:
4480 return PR_ABEND;
4481 }
4482
4483 /* These info requests should always succeed. */
4484
4485 if (pattern_info(PCRE2_INFO_BACKREFMAX, &backrefmax, FALSE) +
4486 pattern_info(PCRE2_INFO_BSR, &bsr_convention, FALSE) +
4487 pattern_info(PCRE2_INFO_CAPTURECOUNT, &capture_count, FALSE) +
4488 pattern_info(PCRE2_INFO_FIRSTBITMAP, &start_bits, FALSE) +
4489 pattern_info(PCRE2_INFO_FIRSTCODEUNIT, &first_cunit, FALSE) +
4490 pattern_info(PCRE2_INFO_FIRSTCODETYPE, &first_ctype, FALSE) +
4491 pattern_info(PCRE2_INFO_HASBACKSLASHC, &hasbackslashc, FALSE) +
4492 pattern_info(PCRE2_INFO_HASCRORLF, &hascrorlf, FALSE) +
4493 pattern_info(PCRE2_INFO_JCHANGED, &jchanged, FALSE) +
4494 pattern_info(PCRE2_INFO_LASTCODEUNIT, &last_cunit, FALSE) +
4495 pattern_info(PCRE2_INFO_LASTCODETYPE, &last_ctype, FALSE) +
4496 pattern_info(PCRE2_INFO_MATCHEMPTY, &match_empty, FALSE) +
4497 pattern_info(PCRE2_INFO_MINLENGTH, &minlength, FALSE) +
4498 pattern_info(PCRE2_INFO_NAMECOUNT, &namecount, FALSE) +
4499 pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &nameentrysize, FALSE) +
4500 pattern_info(PCRE2_INFO_NAMETABLE, &nametable, FALSE) +
4501 pattern_info(PCRE2_INFO_NEWLINE, &newline_convention, FALSE)
4502 != 0)
4503 return PR_ABEND;
4504
4505 fprintf(outfile, "Capture group count = %d\n", capture_count);
4506
4507 if (backrefmax > 0)
4508 fprintf(outfile, "Max back reference = %d\n", backrefmax);
4509
4510 if (maxlookbehind > 0)
4511 fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
4512
4513 if (heap_limit_set)
4514 fprintf(outfile, "Heap limit = %u\n", heap_limit);
4515
4516 if (match_limit_set)
4517 fprintf(outfile, "Match limit = %u\n", match_limit);
4518
4519 if (depth_limit_set)
4520 fprintf(outfile, "Depth limit = %u\n", depth_limit);
4521
4522 if (namecount > 0)
4523 {
4524 fprintf(outfile, "Named capture groups:\n");
4525 for (; namecount > 0; namecount--)
4526 {
4527 int imm2_size = test_mode == PCRE8_MODE ? 2 : 1;
4528 uint32_t length = (uint32_t)STRLEN(nametable + imm2_size);
4529 fprintf(outfile, " ");
4530
4531 /* In UTF mode the name may be a UTF string containing non-ASCII
4532 letters and digits. We must output it as a UTF-8 string. In non-UTF mode,
4533 use the normal string printing functions, which use escapes for all
4534 non-ASCII characters. */
4535
4536 if (utf)
4537 {
4538 #ifdef SUPPORT_PCRE2_32
4539 if (test_mode == PCRE32_MODE)
4540 {
4541 PCRE2_SPTR32 nameptr = (PCRE2_SPTR32)nametable + imm2_size;
4542 while (*nameptr != 0)
4543 {
4544 uint8_t u8buff[6];
4545 int len = ord2utf8(*nameptr++, u8buff);
4546 fprintf(outfile, "%.*s", len, u8buff);
4547 }
4548 }
4549 #endif
4550 #ifdef SUPPORT_PCRE2_16
4551 if (test_mode == PCRE16_MODE)
4552 {
4553 PCRE2_SPTR16 nameptr = (PCRE2_SPTR16)nametable + imm2_size;
4554 while (*nameptr != 0)
4555 {
4556 int len;
4557 uint8_t u8buff[6];
4558 uint32_t c = *nameptr++ & 0xffff;
4559 if (c >= 0xD800 && c < 0xDC00)
4560 c = ((c & 0x3ff) << 10) + (*nameptr++ & 0x3ff) + 0x10000;
4561 len = ord2utf8(c, u8buff);
4562 fprintf(outfile, "%.*s", len, u8buff);
4563 }
4564 }
4565 #endif
4566 #ifdef SUPPORT_PCRE2_8
4567 if (test_mode == PCRE8_MODE)
4568 fprintf(outfile, "%s", (PCRE2_SPTR8)nametable + imm2_size);
4569 #endif
4570 }
4571 else /* Not UTF mode */
4572 {
4573 PCHARSV(nametable, imm2_size, length, FALSE, outfile);
4574 }
4575
4576 while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
4577
4578 #ifdef SUPPORT_PCRE2_32
4579 if (test_mode == PCRE32_MODE)
4580 fprintf(outfile, "%3d\n", (int)(((PCRE2_SPTR32)nametable)[0]));
4581 #endif
4582 #ifdef SUPPORT_PCRE2_16
4583 if (test_mode == PCRE16_MODE)
4584 fprintf(outfile, "%3d\n", (int)(((PCRE2_SPTR16)nametable)[0]));
4585 #endif
4586 #ifdef SUPPORT_PCRE2_8
4587 if (test_mode == PCRE8_MODE)
4588 fprintf(outfile, "%3d\n", (int)(
4589 ((((PCRE2_SPTR8)nametable)[0]) << 8) | ((PCRE2_SPTR8)nametable)[1]));
4590 #endif
4591
4592 nametable = (void*)((PCRE2_SPTR8)nametable + nameentrysize * code_unit_size);
4593 }
4594 }
4595
4596 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
4597 if (hasbackslashc) fprintf(outfile, "Contains \\C\n");
4598 if (match_empty) fprintf(outfile, "May match empty string\n");
4599
4600 pattern_info(PCRE2_INFO_ARGOPTIONS, &compile_options, FALSE);
4601 pattern_info(PCRE2_INFO_ALLOPTIONS, &overall_options, FALSE);
4602 pattern_info(PCRE2_INFO_EXTRAOPTIONS, &extra_options, FALSE);
4603
4604 /* Remove UTF/UCP if they were there only because of forbid_utf. This saves
4605 cluttering up the verification output of non-UTF test files. */
4606
4607 if ((pat_patctl.options & PCRE2_NEVER_UTF) == 0)
4608 {
4609 compile_options &= ~PCRE2_NEVER_UTF;
4610 overall_options &= ~PCRE2_NEVER_UTF;
4611 }
4612
4613 if ((pat_patctl.options & PCRE2_NEVER_UCP) == 0)
4614 {
4615 compile_options &= ~PCRE2_NEVER_UCP;
4616 overall_options &= ~PCRE2_NEVER_UCP;
4617 }
4618
4619 if ((compile_options|overall_options) != 0)
4620 {
4621 if (compile_options == overall_options)
4622 show_compile_options(compile_options, "Options:", "\n");
4623 else
4624 {
4625 show_compile_options(compile_options, "Compile options:", "\n");
4626 show_compile_options(overall_options, "Overall options:", "\n");
4627 }
4628 }
4629
4630 if (extra_options != 0)
4631 show_compile_extra_options(extra_options, "Extra options:", "\n");
4632
4633 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
4634
4635 if ((pat_patctl.control2 & CTL2_BSR_SET) != 0 ||
4636 (FLD(compiled_code, flags) & PCRE2_BSR_SET) != 0)
4637 fprintf(outfile, "\\R matches %s\n", (bsr_convention == PCRE2_BSR_UNICODE)?
4638 "any Unicode newline" : "CR, LF, or CRLF");
4639
4640 if ((FLD(compiled_code, flags) & PCRE2_NL_SET) != 0)
4641 {
4642 switch (newline_convention)
4643 {
4644 case PCRE2_NEWLINE_CR:
4645 fprintf(outfile, "Forced newline is CR\n");
4646 break;
4647
4648 case PCRE2_NEWLINE_LF:
4649 fprintf(outfile, "Forced newline is LF\n");
4650 break;
4651
4652 case PCRE2_NEWLINE_CRLF:
4653 fprintf(outfile, "Forced newline is CRLF\n");
4654 break;
4655
4656 case PCRE2_NEWLINE_ANYCRLF:
4657 fprintf(outfile, "Forced newline is CR, LF, or CRLF\n");
4658 break;
4659
4660 case PCRE2_NEWLINE_ANY:
4661 fprintf(outfile, "Forced newline is any Unicode newline\n");
4662 break;
4663
4664 case PCRE2_NEWLINE_NUL:
4665 fprintf(outfile, "Forced newline is NUL\n");
4666 break;
4667
4668 default:
4669 break;
4670 }
4671 }
4672
4673 if (first_ctype == 2)
4674 {
4675 fprintf(outfile, "First code unit at start or follows newline\n");
4676 }
4677 else if (first_ctype == 1)
4678 {
4679 const char *caseless =
4680 ((FLD(compiled_code, flags) & PCRE2_FIRSTCASELESS) == 0)?
4681 "" : " (caseless)";
4682 if (PRINTOK(first_cunit))
4683 fprintf(outfile, "First code unit = \'%c\'%s\n", first_cunit, caseless);
4684 else
4685 {
4686 fprintf(outfile, "First code unit = ");
4687 pchar(first_cunit, FALSE, outfile);
4688 fprintf(outfile, "%s\n", caseless);
4689 }
4690 }
4691 else if (start_bits != NULL)
4692 {
4693 int i;
4694 int c = 24;
4695 fprintf(outfile, "Starting code units: ");
4696 for (i = 0; i < 256; i++)
4697 {
4698 if ((start_bits[i/8] & (1u << (i&7))) != 0)
4699 {
4700 if (c > 75)
4701 {
4702 fprintf(outfile, "\n ");
4703 c = 2;
4704 }
4705 if (PRINTOK(i) && i != ' ')
4706 {
4707 fprintf(outfile, "%c ", i);
4708 c += 2;
4709 }
4710 else
4711 {
4712 fprintf(outfile, "\\x%02x ", i);
4713 c += 5;
4714 }
4715 }
4716 }
4717 fprintf(outfile, "\n");
4718 }
4719
4720 if (last_ctype != 0)
4721 {
4722 const char *caseless =
4723 ((FLD(compiled_code, flags) & PCRE2_LASTCASELESS) == 0)?
4724 "" : " (caseless)";
4725 if (PRINTOK(last_cunit))
4726 fprintf(outfile, "Last code unit = \'%c\'%s\n", last_cunit, caseless);
4727 else
4728 {
4729 fprintf(outfile, "Last code unit = ");
4730 pchar(last_cunit, FALSE, outfile);
4731 fprintf(outfile, "%s\n", caseless);
4732 }
4733 }
4734
4735 if ((FLD(compiled_code, overall_options) & PCRE2_NO_START_OPTIMIZE) == 0)
4736 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
4737
4738 if (pat_patctl.jit != 0 && (pat_patctl.control & CTL_JITVERIFY) != 0)
4739 {
4740 if (FLD(compiled_code, executable_jit) != NULL)
4741 fprintf(outfile, "JIT compilation was successful\n");
4742 else
4743 {
4744 #ifdef SUPPORT_JIT
4745 fprintf(outfile, "JIT compilation was not successful");
4746 if (jitrc != 0 && !print_error_message(jitrc, " (", ")"))
4747 return PR_ABEND;
4748 fprintf(outfile, "\n");
4749 #else
4750 fprintf(outfile, "JIT support is not available in this version of PCRE2\n");
4751 #endif
4752 }
4753 }
4754 }
4755
4756 if ((pat_patctl.control & CTL_CALLOUT_INFO) != 0)
4757 {
4758 int errorcode;
4759 PCRE2_CALLOUT_ENUMERATE(errorcode, callout_callback, 0);
4760 if (errorcode != 0)
4761 {
4762 fprintf(outfile, "Callout enumerate failed: error %d: ", errorcode);
4763 if (errorcode < 0 && !print_error_message(errorcode, "", "\n"))
4764 return PR_ABEND;
4765 return PR_SKIP;
4766 }
4767 }
4768
4769 return PR_OK;
4770 }
4771
4772
4773
4774 /*************************************************
4775 * Handle serialization error *
4776 *************************************************/
4777
4778 /* Print an error message after a serialization failure.
4779
4780 Arguments:
4781 rc the error code
4782 msg an initial message for what failed
4783
4784 Returns: FALSE if print_error_message() fails
4785 */
4786
4787 static BOOL
serial_error(int rc,const char * msg)4788 serial_error(int rc, const char *msg)
4789 {
4790 fprintf(outfile, "%s failed: error %d: ", msg, rc);
4791 return print_error_message(rc, "", "\n");
4792 }
4793
4794
4795
4796 /*************************************************
4797 * Open file for save/load commands *
4798 *************************************************/
4799
4800 /* This function decodes the file name and opens the file.
4801
4802 Arguments:
4803 buffptr point after the #command
4804 mode open mode
4805 fptr points to the FILE variable
4806 name name of # command
4807
4808 Returns: PR_OK or PR_ABEND
4809 */
4810
4811 static int
open_file(uint8_t * buffptr,const char * mode,FILE ** fptr,const char * name)4812 open_file(uint8_t *buffptr, const char *mode, FILE **fptr, const char *name)
4813 {
4814 char *endf;
4815 char *filename = (char *)buffptr;
4816 while (isspace(*filename)) filename++;
4817 endf = filename + strlen8(filename);
4818 while (endf > filename && isspace(endf[-1])) endf--;
4819
4820 if (endf == filename)
4821 {
4822 fprintf(outfile, "** File name expected after %s\n", name);
4823 return PR_ABEND;
4824 }
4825
4826 *endf = 0;
4827 *fptr = fopen((const char *)filename, mode);
4828 if (*fptr == NULL)
4829 {
4830 fprintf(outfile, "** Failed to open '%s': %s\n", filename, strerror(errno));
4831 return PR_ABEND;
4832 }
4833
4834 return PR_OK;
4835 }
4836
4837
4838
4839 /*************************************************
4840 * Process command line *
4841 *************************************************/
4842
4843 /* This function is called for lines beginning with # and a character that is
4844 not ! or whitespace, when encountered between tests, which means that there is
4845 no compiled pattern (compiled_code is NULL). The line is in buffer.
4846
4847 Arguments: none
4848
4849 Returns: PR_OK continue processing next line
4850 PR_SKIP skip to a blank line
4851 PR_ABEND abort the pcre2test run
4852 */
4853
4854 static int
process_command(void)4855 process_command(void)
4856 {
4857 FILE *f;
4858 PCRE2_SIZE serial_size;
4859 size_t i;
4860 int rc, cmd, cmdlen, yield;
4861 uint16_t first_listed_newline;
4862 const char *cmdname;
4863 uint8_t *argptr, *serial;
4864
4865 yield = PR_OK;
4866 cmd = CMD_UNKNOWN;
4867 cmdlen = 0;
4868
4869 for (i = 0; i < cmdlistcount; i++)
4870 {
4871 cmdname = cmdlist[i].name;
4872 cmdlen = strlen(cmdname);
4873 if (strncmp((char *)(buffer+1), cmdname, cmdlen) == 0 &&
4874 isspace(buffer[cmdlen+1]))
4875 {
4876 cmd = cmdlist[i].value;
4877 break;
4878 }
4879 }
4880
4881 argptr = buffer + cmdlen + 1;
4882
4883 if (restrict_for_perl_test && cmd != CMD_PATTERN && cmd != CMD_SUBJECT)
4884 {
4885 fprintf(outfile, "** #%s is not allowed after #perltest\n", cmdname);
4886 return PR_ABEND;
4887 }
4888
4889 switch(cmd)
4890 {
4891 case CMD_UNKNOWN:
4892 fprintf(outfile, "** Unknown command: %s", buffer);
4893 break;
4894
4895 case CMD_FORBID_UTF:
4896 forbid_utf = PCRE2_NEVER_UTF|PCRE2_NEVER_UCP;
4897 break;
4898
4899 case CMD_PERLTEST:
4900 restrict_for_perl_test = TRUE;
4901 break;
4902
4903 /* Set default pattern modifiers */
4904
4905 case CMD_PATTERN:
4906 (void)decode_modifiers(argptr, CTX_DEFPAT, &def_patctl, NULL);
4907 if (def_patctl.jit == 0 && (def_patctl.control & CTL_JITVERIFY) != 0)
4908 def_patctl.jit = JIT_DEFAULT;
4909 break;
4910
4911 /* Set default subject modifiers */
4912
4913 case CMD_SUBJECT:
4914 (void)decode_modifiers(argptr, CTX_DEFDAT, NULL, &def_datctl);
4915 break;
4916
4917 /* Check the default newline, and if not one of those listed, set up the
4918 first one to be forced. An empty list unsets. */
4919
4920 case CMD_NEWLINE_DEFAULT:
4921 local_newline_default = 0; /* Unset */
4922 first_listed_newline = 0;
4923 for (;;)
4924 {
4925 while (isspace(*argptr)) argptr++;
4926 if (*argptr == 0) break;
4927 for (i = 1; i < sizeof(newlines)/sizeof(char *); i++)
4928 {
4929 size_t nlen = strlen(newlines[i]);
4930 if (strncmpic(argptr, (const uint8_t *)newlines[i], nlen) == 0 &&
4931 isspace(argptr[nlen]))
4932 {
4933 if (i == NEWLINE_DEFAULT) return PR_OK; /* Default is valid */
4934 if (first_listed_newline == 0) first_listed_newline = i;
4935 }
4936 }
4937 while (*argptr != 0 && !isspace(*argptr)) argptr++;
4938 }
4939 local_newline_default = first_listed_newline;
4940 break;
4941
4942 /* Pop or copy a compiled pattern off the stack. Modifiers that do not affect
4943 the compiled pattern (e.g. to give information) are permitted. The default
4944 pattern modifiers are ignored. */
4945
4946 case CMD_POP:
4947 case CMD_POPCOPY:
4948 if (patstacknext <= 0)
4949 {
4950 fprintf(outfile, "** Can't pop off an empty stack\n");
4951 return PR_SKIP;
4952 }
4953 memset(&pat_patctl, 0, sizeof(patctl)); /* Completely unset */
4954 if (!decode_modifiers(argptr, CTX_POPPAT, &pat_patctl, NULL))
4955 return PR_SKIP;
4956
4957 if (cmd == CMD_POP)
4958 {
4959 SET(compiled_code, patstack[--patstacknext]);
4960 }
4961 else
4962 {
4963 PCRE2_CODE_COPY_FROM_VOID(compiled_code, patstack[patstacknext - 1]);
4964 }
4965
4966 if (pat_patctl.jit != 0)
4967 {
4968 PCRE2_JIT_COMPILE(jitrc, compiled_code, pat_patctl.jit);
4969 }
4970 if ((pat_patctl.control & CTL_MEMORY) != 0) show_memory_info();
4971 if ((pat_patctl.control & CTL_FRAMESIZE) != 0) show_framesize();
4972 if ((pat_patctl.control & CTL_ANYINFO) != 0)
4973 {
4974 rc = show_pattern_info();
4975 if (rc != PR_OK) return rc;
4976 }
4977 break;
4978
4979 /* Save the stack of compiled patterns to a file, then empty the stack. */
4980
4981 case CMD_SAVE:
4982 if (patstacknext <= 0)
4983 {
4984 fprintf(outfile, "** No stacked patterns to save\n");
4985 return PR_OK;
4986 }
4987
4988 rc = open_file(argptr+1, BINARY_OUTPUT_MODE, &f, "#save");
4989 if (rc != PR_OK) return rc;
4990
4991 PCRE2_SERIALIZE_ENCODE(rc, patstack, patstacknext, &serial, &serial_size,
4992 general_context);
4993 if (rc < 0)
4994 {
4995 fclose(f);
4996 if (!serial_error(rc, "Serialization")) return PR_ABEND;
4997 break;
4998 }
4999
5000 /* Write the length at the start of the file to make it straightforward to
5001 get the right memory when re-loading. This saves having to read the file size
5002 in different operating systems. To allow for different endianness (even
5003 though reloading with the opposite endianness does not work), write the
5004 length byte-by-byte. */
5005
5006 for (i = 0; i < 4; i++) fputc((serial_size >> (i*8)) & 255, f);
5007 if (fwrite(serial, 1, serial_size, f) != serial_size)
5008 {
5009 fprintf(outfile, "** Wrong return from fwrite()\n");
5010 fclose(f);
5011 return PR_ABEND;
5012 }
5013
5014 fclose(f);
5015 PCRE2_SERIALIZE_FREE(serial);
5016 while(patstacknext > 0)
5017 {
5018 SET(compiled_code, patstack[--patstacknext]);
5019 SUB1(pcre2_code_free, compiled_code);
5020 }
5021 SET(compiled_code, NULL);
5022 break;
5023
5024 /* Load a set of compiled patterns from a file onto the stack */
5025
5026 case CMD_LOAD:
5027 rc = open_file(argptr+1, BINARY_INPUT_MODE, &f, "#load");
5028 if (rc != PR_OK) return rc;
5029
5030 serial_size = 0;
5031 for (i = 0; i < 4; i++) serial_size |= fgetc(f) << (i*8);
5032
5033 serial = malloc(serial_size);
5034 if (serial == NULL)
5035 {
5036 fprintf(outfile, "** Failed to get memory (size %" SIZ_FORM ") for #load\n",
5037 serial_size);
5038 fclose(f);
5039 return PR_ABEND;
5040 }
5041
5042 i = fread(serial, 1, serial_size, f);
5043 fclose(f);
5044
5045 if (i != serial_size)
5046 {
5047 fprintf(outfile, "** Wrong return from fread()\n");
5048 yield = PR_ABEND;
5049 }
5050 else
5051 {
5052 PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(rc, serial);
5053 if (rc < 0)
5054 {
5055 if (!serial_error(rc, "Get number of codes")) yield = PR_ABEND;
5056 }
5057 else
5058 {
5059 if (rc + patstacknext > PATSTACKSIZE)
5060 {
5061 fprintf(outfile, "** Not enough space on pattern stack for %d pattern%s\n",
5062 rc, (rc == 1)? "" : "s");
5063 rc = PATSTACKSIZE - patstacknext;
5064 fprintf(outfile, "** Decoding %d pattern%s\n", rc,
5065 (rc == 1)? "" : "s");
5066 }
5067 PCRE2_SERIALIZE_DECODE(rc, patstack + patstacknext, rc, serial,
5068 general_context);
5069 if (rc < 0)
5070 {
5071 if (!serial_error(rc, "Deserialization")) yield = PR_ABEND;
5072 }
5073 else patstacknext += rc;
5074 }
5075 }
5076
5077 free(serial);
5078 break;
5079
5080 /* Load a set of binary tables into tables3. */
5081
5082 case CMD_LOADTABLES:
5083 rc = open_file(argptr+1, BINARY_INPUT_MODE, &f, "#loadtables");
5084 if (rc != PR_OK) return rc;
5085
5086 if (tables3 == NULL)
5087 {
5088 (void)PCRE2_CONFIG(PCRE2_CONFIG_TABLES_LENGTH, &loadtables_length);
5089 tables3 = malloc(loadtables_length);
5090 }
5091
5092 if (tables3 == NULL)
5093 {
5094 fprintf(outfile, "** Failed: malloc failed for #loadtables\n");
5095 yield = PR_ABEND;
5096 }
5097 else if (fread(tables3, 1, loadtables_length, f) != loadtables_length)
5098 {
5099 fprintf(outfile, "** Wrong return from fread()\n");
5100 yield = PR_ABEND;
5101 }
5102
5103 fclose(f);
5104 break;
5105 }
5106
5107 return yield;
5108 }
5109
5110
5111
5112 /*************************************************
5113 * Process pattern line *
5114 *************************************************/
5115
5116 /* This function is called when the input buffer contains the start of a
5117 pattern. The first character is known to be a valid delimiter. The pattern is
5118 read, modifiers are interpreted, and a suitable local context is set up for
5119 this test. The pattern is then compiled.
5120
5121 Arguments: none
5122
5123 Returns: PR_OK continue processing next line
5124 PR_SKIP skip to a blank line
5125 PR_ABEND abort the pcre2test run
5126 */
5127
5128 static int
process_pattern(void)5129 process_pattern(void)
5130 {
5131 BOOL utf;
5132 uint32_t k;
5133 uint8_t *p = buffer;
5134 unsigned int delimiter = *p++;
5135 int errorcode;
5136 void *use_pat_context;
5137 uint32_t use_forbid_utf = forbid_utf;
5138 PCRE2_SIZE patlen;
5139 PCRE2_SIZE valgrind_access_length;
5140 PCRE2_SIZE erroroffset;
5141
5142 /* The perltest.sh script supports only / as a delimiter. */
5143
5144 if (restrict_for_perl_test && delimiter != '/')
5145 {
5146 fprintf(outfile, "** The only allowed delimiter after #perltest is '/'\n");
5147 return PR_ABEND;
5148 }
5149
5150 /* Initialize the context and pattern/data controls for this test from the
5151 defaults. */
5152
5153 PATCTXCPY(pat_context, default_pat_context);
5154 memcpy(&pat_patctl, &def_patctl, sizeof(patctl));
5155
5156 /* Find the end of the pattern, reading more lines if necessary. */
5157
5158 for(;;)
5159 {
5160 while (*p != 0)
5161 {
5162 if (*p == '\\' && p[1] != 0) p++;
5163 else if (*p == delimiter) break;
5164 p++;
5165 }
5166 if (*p != 0) break;
5167 if ((p = extend_inputline(infile, p, " > ")) == NULL)
5168 {
5169 fprintf(outfile, "** Unexpected EOF\n");
5170 return PR_ABEND;
5171 }
5172 if (!INTERACTIVE(infile)) fprintf(outfile, "%s", (char *)p);
5173 }
5174
5175 /* If the first character after the delimiter is backslash, make the pattern
5176 end with backslash. This is purely to provide a way of testing for the error
5177 message when a pattern ends with backslash. */
5178
5179 if (p[1] == '\\') *p++ = '\\';
5180
5181 /* Terminate the pattern at the delimiter, and compute the length. */
5182
5183 *p++ = 0;
5184 patlen = p - buffer - 2;
5185
5186 /* Look for modifiers and options after the final delimiter. */
5187
5188 if (!decode_modifiers(p, CTX_PAT, &pat_patctl, NULL)) return PR_SKIP;
5189
5190 /* Note that the match_invalid_utf option also sets utf when passed to
5191 pcre2_compile(). */
5192
5193 utf = (pat_patctl.options & (PCRE2_UTF|PCRE2_MATCH_INVALID_UTF)) != 0;
5194
5195 /* The utf8_input modifier is not allowed in 8-bit mode, and is mutually
5196 exclusive with the utf modifier. */
5197
5198 if ((pat_patctl.control & CTL_UTF8_INPUT) != 0)
5199 {
5200 if (test_mode == PCRE8_MODE)
5201 {
5202 fprintf(outfile, "** The utf8_input modifier is not allowed in 8-bit mode\n");
5203 return PR_SKIP;
5204 }
5205 if (utf)
5206 {
5207 fprintf(outfile, "** The utf and utf8_input modifiers are mutually exclusive\n");
5208 return PR_SKIP;
5209 }
5210 }
5211
5212 /* The convert and posix modifiers are mutually exclusive. */
5213
5214 if (pat_patctl.convert_type != CONVERT_UNSET &&
5215 (pat_patctl.control & CTL_POSIX) != 0)
5216 {
5217 fprintf(outfile, "** The convert and posix modifiers are mutually exclusive\n");
5218 return PR_SKIP;
5219 }
5220
5221 /* Check for mutually exclusive control modifiers. At present, these are all in
5222 the first control word. */
5223
5224 for (k = 0; k < sizeof(exclusive_pat_controls)/sizeof(uint32_t); k++)
5225 {
5226 uint32_t c = pat_patctl.control & exclusive_pat_controls[k];
5227 if (c != 0 && c != (c & (~c+1)))
5228 {
5229 show_controls(c, 0, "** Not allowed together:");
5230 fprintf(outfile, "\n");
5231 return PR_SKIP;
5232 }
5233 }
5234
5235 /* Assume full JIT compile for jitverify and/or jitfast if nothing else was
5236 specified. */
5237
5238 if (pat_patctl.jit == 0 &&
5239 (pat_patctl.control & (CTL_JITVERIFY|CTL_JITFAST)) != 0)
5240 pat_patctl.jit = JIT_DEFAULT;
5241
5242 /* Now copy the pattern to pbuffer8 for use in 8-bit testing and for reflecting
5243 in callouts. Convert from hex if requested (literal strings in quotes may be
5244 present within the hexadecimal pairs). The result must necessarily be fewer
5245 characters so will always fit in pbuffer8. */
5246
5247 if ((pat_patctl.control & CTL_HEXPAT) != 0)
5248 {
5249 uint8_t *pp, *pt;
5250 uint32_t c, d;
5251
5252 pt = pbuffer8;
5253 for (pp = buffer + 1; *pp != 0; pp++)
5254 {
5255 if (isspace(*pp)) continue;
5256 c = *pp++;
5257
5258 /* Handle a literal substring */
5259
5260 if (c == '\'' || c == '"')
5261 {
5262 uint8_t *pq = pp;
5263 for (;; pp++)
5264 {
5265 d = *pp;
5266 if (d == 0)
5267 {
5268 fprintf(outfile, "** Missing closing quote in hex pattern: "
5269 "opening quote is at offset %" PTR_FORM ".\n", pq - buffer - 2);
5270 return PR_SKIP;
5271 }
5272 if (d == c) break;
5273 *pt++ = d;
5274 }
5275 }
5276
5277 /* Expect a hex pair */
5278
5279 else
5280 {
5281 if (!isxdigit(c))
5282 {
5283 fprintf(outfile, "** Unexpected non-hex-digit '%c' at offset %"
5284 PTR_FORM " in hex pattern: quote missing?\n", c, pp - buffer - 2);
5285 return PR_SKIP;
5286 }
5287 if (*pp == 0)
5288 {
5289 fprintf(outfile, "** Odd number of digits in hex pattern\n");
5290 return PR_SKIP;
5291 }
5292 d = *pp;
5293 if (!isxdigit(d))
5294 {
5295 fprintf(outfile, "** Unexpected non-hex-digit '%c' at offset %"
5296 PTR_FORM " in hex pattern: quote missing?\n", d, pp - buffer - 1);
5297 return PR_SKIP;
5298 }
5299 c = toupper(c);
5300 d = toupper(d);
5301 *pt++ = ((isdigit(c)? (c - '0') : (c - 'A' + 10)) << 4) +
5302 (isdigit(d)? (d - '0') : (d - 'A' + 10));
5303 }
5304 }
5305 *pt = 0;
5306 patlen = pt - pbuffer8;
5307 }
5308
5309 /* If not a hex string, process for repetition expansion if requested. */
5310
5311 else if ((pat_patctl.control & CTL_EXPAND) != 0)
5312 {
5313 uint8_t *pp, *pt;
5314
5315 pt = pbuffer8;
5316 for (pp = buffer + 1; *pp != 0; pp++)
5317 {
5318 uint8_t *pc = pp;
5319 uint32_t count = 1;
5320 size_t length = 1;
5321
5322 /* Check for replication syntax; if not found, the defaults just set will
5323 prevail and one character will be copied. */
5324
5325 if (pp[0] == '\\' && pp[1] == '[')
5326 {
5327 uint8_t *pe;
5328 for (pe = pp + 2; *pe != 0; pe++)
5329 {
5330 if (pe[0] == ']' && pe[1] == '{')
5331 {
5332 uint32_t clen = pe - pc - 2;
5333 uint32_t i = 0;
5334 unsigned long uli;
5335 char *endptr;
5336
5337 pe += 2;
5338 uli = strtoul((const char *)pe, &endptr, 10);
5339 if (U32OVERFLOW(uli))
5340 {
5341 fprintf(outfile, "** Pattern repeat count too large\n");
5342 return PR_SKIP;
5343 }
5344
5345 i = (uint32_t)uli;
5346 pe = (uint8_t *)endptr;
5347 if (*pe == '}')
5348 {
5349 if (i == 0)
5350 {
5351 fprintf(outfile, "** Zero repeat not allowed\n");
5352 return PR_SKIP;
5353 }
5354 pc += 2;
5355 count = i;
5356 length = clen;
5357 pp = pe;
5358 break;
5359 }
5360 }
5361 }
5362 }
5363
5364 /* Add to output. If the buffer is too small expand it. The function for
5365 expanding buffers always keeps buffer and pbuffer8 in step as far as their
5366 size goes. */
5367
5368 while (pt + count * length > pbuffer8 + pbuffer8_size)
5369 {
5370 size_t pc_offset = pc - buffer;
5371 size_t pp_offset = pp - buffer;
5372 size_t pt_offset = pt - pbuffer8;
5373 expand_input_buffers();
5374 pc = buffer + pc_offset;
5375 pp = buffer + pp_offset;
5376 pt = pbuffer8 + pt_offset;
5377 }
5378
5379 for (; count > 0; count--)
5380 {
5381 memcpy(pt, pc, length);
5382 pt += length;
5383 }
5384 }
5385
5386 *pt = 0;
5387 patlen = pt - pbuffer8;
5388
5389 if ((pat_patctl.control & CTL_INFO) != 0)
5390 fprintf(outfile, "Expanded: %s\n", pbuffer8);
5391 }
5392
5393 /* Neither hex nor expanded, just copy the input verbatim. */
5394
5395 else
5396 {
5397 strncpy((char *)pbuffer8, (char *)(buffer+1), patlen + 1);
5398 }
5399
5400 /* Sort out character tables */
5401
5402 if (pat_patctl.locale[0] != 0)
5403 {
5404 if (pat_patctl.tables_id != 0)
5405 {
5406 fprintf(outfile, "** 'Locale' and 'tables' must not both be set\n");
5407 return PR_SKIP;
5408 }
5409 if (setlocale(LC_CTYPE, (const char *)pat_patctl.locale) == NULL)
5410 {
5411 fprintf(outfile, "** Failed to set locale '%s'\n", pat_patctl.locale);
5412 return PR_SKIP;
5413 }
5414 if (strcmp((const char *)pat_patctl.locale, (const char *)locale_name) != 0)
5415 {
5416 strcpy((char *)locale_name, (char *)pat_patctl.locale);
5417 if (locale_tables != NULL) free((void *)locale_tables);
5418 PCRE2_MAKETABLES(locale_tables);
5419 }
5420 use_tables = locale_tables;
5421 }
5422
5423 else switch (pat_patctl.tables_id)
5424 {
5425 case 0: use_tables = NULL; break;
5426 case 1: use_tables = tables1; break;
5427 case 2: use_tables = tables2; break;
5428
5429 case 3:
5430 if (tables3 == NULL)
5431 {
5432 fprintf(outfile, "** 'Tables = 3' is invalid: binary tables have not "
5433 "been loaded\n");
5434 return PR_SKIP;
5435 }
5436 use_tables = tables3;
5437 break;
5438
5439 default:
5440 fprintf(outfile, "** 'Tables' must specify 0, 1, 2, or 3.\n");
5441 return PR_SKIP;
5442 }
5443
5444 PCRE2_SET_CHARACTER_TABLES(pat_context, use_tables);
5445
5446 /* Set up for the stackguard test. */
5447
5448 if (pat_patctl.stackguard_test != 0)
5449 {
5450 PCRE2_SET_COMPILE_RECURSION_GUARD(pat_context, stack_guard, NULL);
5451 }
5452
5453 /* Handle compiling via the POSIX interface, which doesn't support the
5454 timing, showing, or debugging options, nor the ability to pass over
5455 local character tables. Neither does it have 16-bit or 32-bit support. */
5456
5457 if ((pat_patctl.control & CTL_POSIX) != 0)
5458 {
5459 #ifdef SUPPORT_PCRE2_8
5460 int rc;
5461 int cflags = 0;
5462 const char *msg = "** Ignored with POSIX interface:";
5463 #endif
5464
5465 if (test_mode != PCRE8_MODE)
5466 {
5467 fprintf(outfile, "** The POSIX interface is available only in 8-bit mode\n");
5468 return PR_SKIP;
5469 }
5470
5471 #ifdef SUPPORT_PCRE2_8
5472 /* Check for features that the POSIX interface does not support. */
5473
5474 if (pat_patctl.locale[0] != 0) prmsg(&msg, "locale");
5475 if (pat_patctl.replacement[0] != 0) prmsg(&msg, "replace");
5476 if (pat_patctl.tables_id != 0) prmsg(&msg, "tables");
5477 if (pat_patctl.stackguard_test != 0) prmsg(&msg, "stackguard");
5478 if (timeit > 0) prmsg(&msg, "timing");
5479 if (pat_patctl.jit != 0) prmsg(&msg, "JIT");
5480
5481 if ((pat_patctl.options & ~POSIX_SUPPORTED_COMPILE_OPTIONS) != 0)
5482 {
5483 show_compile_options(
5484 pat_patctl.options & ~POSIX_SUPPORTED_COMPILE_OPTIONS, msg, "");
5485 msg = "";
5486 }
5487
5488 if ((FLD(pat_context, extra_options) &
5489 ~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS) != 0)
5490 {
5491 show_compile_extra_options(
5492 FLD(pat_context, extra_options) & ~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS,
5493 msg, "");
5494 msg = "";
5495 }
5496
5497 if ((pat_patctl.control & ~POSIX_SUPPORTED_COMPILE_CONTROLS) != 0 ||
5498 (pat_patctl.control2 & ~POSIX_SUPPORTED_COMPILE_CONTROLS2) != 0)
5499 {
5500 show_controls(pat_patctl.control & ~POSIX_SUPPORTED_COMPILE_CONTROLS,
5501 pat_patctl.control2 & ~POSIX_SUPPORTED_COMPILE_CONTROLS2, msg);
5502 msg = "";
5503 }
5504
5505 if (local_newline_default != 0) prmsg(&msg, "#newline_default");
5506 if (FLD(pat_context, max_pattern_length) != PCRE2_UNSET)
5507 prmsg(&msg, "max_pattern_length");
5508 if (FLD(pat_context, parens_nest_limit) != PARENS_NEST_DEFAULT)
5509 prmsg(&msg, "parens_nest_limit");
5510
5511 if (msg[0] == 0) fprintf(outfile, "\n");
5512
5513 /* Translate PCRE2 options to POSIX options and then compile. */
5514
5515 if (utf) cflags |= REG_UTF;
5516 if ((pat_patctl.control & CTL_POSIX_NOSUB) != 0) cflags |= REG_NOSUB;
5517 if ((pat_patctl.options & PCRE2_UCP) != 0) cflags |= REG_UCP;
5518 if ((pat_patctl.options & PCRE2_CASELESS) != 0) cflags |= REG_ICASE;
5519 if ((pat_patctl.options & PCRE2_LITERAL) != 0) cflags |= REG_NOSPEC;
5520 if ((pat_patctl.options & PCRE2_MULTILINE) != 0) cflags |= REG_NEWLINE;
5521 if ((pat_patctl.options & PCRE2_DOTALL) != 0) cflags |= REG_DOTALL;
5522 if ((pat_patctl.options & PCRE2_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
5523
5524 if ((pat_patctl.control & (CTL_HEXPAT|CTL_USE_LENGTH)) != 0)
5525 {
5526 preg.re_endp = (char *)pbuffer8 + patlen;
5527 cflags |= REG_PEND;
5528 }
5529
5530 rc = regcomp(&preg, (char *)pbuffer8, cflags);
5531
5532 /* Compiling failed */
5533
5534 if (rc != 0)
5535 {
5536 size_t bsize, usize;
5537 int psize;
5538
5539 preg.re_pcre2_code = NULL; /* In case something was left in there */
5540 preg.re_match_data = NULL;
5541
5542 bsize = (pat_patctl.regerror_buffsize != 0)?
5543 pat_patctl.regerror_buffsize : pbuffer8_size;
5544 if (bsize + 8 < pbuffer8_size)
5545 memcpy(pbuffer8 + bsize, "DEADBEEF", 8);
5546 usize = regerror(rc, &preg, (char *)pbuffer8, bsize);
5547
5548 /* Inside regerror(), snprintf() is used. If the buffer is too small, some
5549 versions of snprintf() put a zero byte at the end, but others do not.
5550 Therefore, we print a maximum of one less than the size of the buffer. */
5551
5552 psize = (int)bsize - 1;
5553 fprintf(outfile, "Failed: POSIX code %d: %.*s\n", rc, psize, pbuffer8);
5554 if (usize > bsize)
5555 {
5556 fprintf(outfile, "** regerror() message truncated\n");
5557 if (memcmp(pbuffer8 + bsize, "DEADBEEF", 8) != 0)
5558 fprintf(outfile, "** regerror() buffer overflow\n");
5559 }
5560 return PR_SKIP;
5561 }
5562
5563 /* Compiling succeeded. Check that the values in the preg block are sensible.
5564 It can happen that pcre2test is accidentally linked with a different POSIX
5565 library which succeeds, but of course puts different things into preg. In
5566 this situation, calling regfree() may cause a segfault (or invalid free() in
5567 valgrind), so ensure that preg.re_pcre2_code is NULL, which suppresses the
5568 calling of regfree() on exit. */
5569
5570 if (preg.re_pcre2_code == NULL ||
5571 ((pcre2_real_code_8 *)preg.re_pcre2_code)->magic_number != MAGIC_NUMBER ||
5572 ((pcre2_real_code_8 *)preg.re_pcre2_code)->top_bracket != preg.re_nsub ||
5573 preg.re_match_data == NULL ||
5574 preg.re_cflags != cflags)
5575 {
5576 fprintf(outfile,
5577 "** The regcomp() function returned zero (success), but the values set\n"
5578 "** in the preg block are not valid for PCRE2. Check that pcre2test is\n"
5579 "** linked with PCRE2's pcre2posix module (-lpcre2-posix) and not with\n"
5580 "** some other POSIX regex library.\n**\n");
5581 preg.re_pcre2_code = NULL;
5582 return PR_ABEND;
5583 }
5584
5585 return PR_OK;
5586 #endif /* SUPPORT_PCRE2_8 */
5587 }
5588
5589 /* Handle compiling via the native interface. Controls that act later are
5590 ignored with "push". Replacements are locked out. */
5591
5592 if ((pat_patctl.control & (CTL_PUSH|CTL_PUSHCOPY|CTL_PUSHTABLESCOPY)) != 0)
5593 {
5594 if (pat_patctl.replacement[0] != 0)
5595 {
5596 fprintf(outfile, "** Replacement text is not supported with 'push'.\n");
5597 return PR_OK;
5598 }
5599 if ((pat_patctl.control & ~PUSH_SUPPORTED_COMPILE_CONTROLS) != 0 ||
5600 (pat_patctl.control2 & ~PUSH_SUPPORTED_COMPILE_CONTROLS2) != 0)
5601 {
5602 show_controls(pat_patctl.control & ~PUSH_SUPPORTED_COMPILE_CONTROLS,
5603 pat_patctl.control2 & ~PUSH_SUPPORTED_COMPILE_CONTROLS2,
5604 "** Ignored when compiled pattern is stacked with 'push':");
5605 fprintf(outfile, "\n");
5606 }
5607 if ((pat_patctl.control & PUSH_COMPILE_ONLY_CONTROLS) != 0 ||
5608 (pat_patctl.control2 & PUSH_COMPILE_ONLY_CONTROLS2) != 0)
5609 {
5610 show_controls(pat_patctl.control & PUSH_COMPILE_ONLY_CONTROLS,
5611 pat_patctl.control2 & PUSH_COMPILE_ONLY_CONTROLS2,
5612 "** Applies only to compile when pattern is stacked with 'push':");
5613 fprintf(outfile, "\n");
5614 }
5615 }
5616
5617 /* Convert the input in non-8-bit modes. */
5618
5619 errorcode = 0;
5620
5621 #ifdef SUPPORT_PCRE2_16
5622 if (test_mode == PCRE16_MODE) errorcode = to16(pbuffer8, utf, &patlen);
5623 #endif
5624
5625 #ifdef SUPPORT_PCRE2_32
5626 if (test_mode == PCRE32_MODE) errorcode = to32(pbuffer8, utf, &patlen);
5627 #endif
5628
5629 switch(errorcode)
5630 {
5631 case -1:
5632 fprintf(outfile, "** Failed: invalid UTF-8 string cannot be "
5633 "converted to %d-bit string\n", (test_mode == PCRE16_MODE)? 16:32);
5634 return PR_SKIP;
5635
5636 case -2:
5637 fprintf(outfile, "** Failed: character value greater than 0x10ffff "
5638 "cannot be converted to UTF\n");
5639 return PR_SKIP;
5640
5641 case -3:
5642 fprintf(outfile, "** Failed: character value greater than 0xffff "
5643 "cannot be converted to 16-bit in non-UTF mode\n");
5644 return PR_SKIP;
5645
5646 default:
5647 break;
5648 }
5649
5650 /* The pattern is now in pbuffer[8|16|32], with the length in code units in
5651 patlen. If it is to be converted, copy the result back afterwards so that it
5652 ends up back in the usual place. */
5653
5654 if (pat_patctl.convert_type != CONVERT_UNSET)
5655 {
5656 int rc;
5657 int convert_return = PR_OK;
5658 uint32_t convert_options = pat_patctl.convert_type;
5659 void *converted_pattern;
5660 PCRE2_SIZE converted_length;
5661
5662 if (pat_patctl.convert_length != 0)
5663 {
5664 converted_length = pat_patctl.convert_length;
5665 converted_pattern = malloc(converted_length * code_unit_size);
5666 if (converted_pattern == NULL)
5667 {
5668 fprintf(outfile, "** Failed: malloc failed for converted pattern\n");
5669 return PR_SKIP;
5670 }
5671 }
5672 else converted_pattern = NULL; /* Let the library allocate */
5673
5674 if (utf) convert_options |= PCRE2_CONVERT_UTF;
5675 if ((pat_patctl.options & PCRE2_NO_UTF_CHECK) != 0)
5676 convert_options |= PCRE2_CONVERT_NO_UTF_CHECK;
5677
5678 CONCTXCPY(con_context, default_con_context);
5679
5680 if (pat_patctl.convert_glob_escape != 0)
5681 {
5682 uint32_t escape = (pat_patctl.convert_glob_escape == '0')? 0 :
5683 pat_patctl.convert_glob_escape;
5684 PCRE2_SET_GLOB_ESCAPE(rc, con_context, escape);
5685 if (rc != 0)
5686 {
5687 fprintf(outfile, "** Invalid glob escape '%c'\n",
5688 pat_patctl.convert_glob_escape);
5689 convert_return = PR_SKIP;
5690 goto CONVERT_FINISH;
5691 }
5692 }
5693
5694 if (pat_patctl.convert_glob_separator != 0)
5695 {
5696 PCRE2_SET_GLOB_SEPARATOR(rc, con_context, pat_patctl.convert_glob_separator);
5697 if (rc != 0)
5698 {
5699 fprintf(outfile, "** Invalid glob separator '%c'\n",
5700 pat_patctl.convert_glob_separator);
5701 convert_return = PR_SKIP;
5702 goto CONVERT_FINISH;
5703 }
5704 }
5705
5706 PCRE2_PATTERN_CONVERT(rc, pbuffer, patlen, convert_options,
5707 &converted_pattern, &converted_length, con_context);
5708
5709 if (rc != 0)
5710 {
5711 fprintf(outfile, "** Pattern conversion error at offset %" SIZ_FORM ": ",
5712 converted_length);
5713 convert_return = print_error_message(rc, "", "\n")? PR_SKIP:PR_ABEND;
5714 }
5715
5716 /* Output the converted pattern, then copy it. */
5717
5718 else
5719 {
5720 PCHARSV(converted_pattern, 0, converted_length, utf, outfile);
5721 fprintf(outfile, "\n");
5722 patlen = converted_length;
5723 CONVERT_COPY(pbuffer, converted_pattern, converted_length + 1);
5724 }
5725
5726 /* Free the converted pattern. */
5727
5728 CONVERT_FINISH:
5729 if (pat_patctl.convert_length != 0)
5730 free(converted_pattern);
5731 else
5732 PCRE2_CONVERTED_PATTERN_FREE(converted_pattern);
5733
5734 /* Return if conversion was unsuccessful. */
5735
5736 if (convert_return != PR_OK) return convert_return;
5737 }
5738
5739 /* By default we pass a zero-terminated pattern, but a length is passed if
5740 "use_length" was specified or this is a hex pattern (which might contain binary
5741 zeros). When valgrind is supported, arrange for the unused part of the buffer
5742 to be marked as no access. */
5743
5744 valgrind_access_length = patlen;
5745 if ((pat_patctl.control & (CTL_HEXPAT|CTL_USE_LENGTH)) == 0)
5746 {
5747 patlen = PCRE2_ZERO_TERMINATED;
5748 valgrind_access_length += 1; /* For the terminating zero */
5749 }
5750
5751 #ifdef SUPPORT_VALGRIND
5752 #ifdef SUPPORT_PCRE2_8
5753 if (test_mode == PCRE8_MODE && pbuffer8 != NULL)
5754 {
5755 VALGRIND_MAKE_MEM_NOACCESS(pbuffer8 + valgrind_access_length,
5756 pbuffer8_size - valgrind_access_length);
5757 }
5758 #endif
5759 #ifdef SUPPORT_PCRE2_16
5760 if (test_mode == PCRE16_MODE && pbuffer16 != NULL)
5761 {
5762 VALGRIND_MAKE_MEM_NOACCESS(pbuffer16 + valgrind_access_length,
5763 pbuffer16_size - valgrind_access_length*sizeof(uint16_t));
5764 }
5765 #endif
5766 #ifdef SUPPORT_PCRE2_32
5767 if (test_mode == PCRE32_MODE && pbuffer32 != NULL)
5768 {
5769 VALGRIND_MAKE_MEM_NOACCESS(pbuffer32 + valgrind_access_length,
5770 pbuffer32_size - valgrind_access_length*sizeof(uint32_t));
5771 }
5772 #endif
5773 #else /* Valgrind not supported */
5774 (void)valgrind_access_length; /* Avoid compiler warning */
5775 #endif
5776
5777 /* If #newline_default has been used and the library was not compiled with an
5778 appropriate default newline setting, local_newline_default will be non-zero. We
5779 use this if there is no explicit newline modifier. */
5780
5781 if ((pat_patctl.control2 & CTL2_NL_SET) == 0 && local_newline_default != 0)
5782 {
5783 SETFLD(pat_context, newline_convention, local_newline_default);
5784 }
5785
5786 /* The null_context modifier is used to test calling pcre2_compile() with a
5787 NULL context. */
5788
5789 use_pat_context = ((pat_patctl.control & CTL_NULLCONTEXT) != 0)?
5790 NULL : PTR(pat_context);
5791
5792 /* If PCRE2_LITERAL is set, set use_forbid_utf zero because PCRE2_NEVER_UTF
5793 and PCRE2_NEVER_UCP are invalid with it. */
5794
5795 if ((pat_patctl.options & PCRE2_LITERAL) != 0) use_forbid_utf = 0;
5796
5797 /* Compile many times when timing. */
5798
5799 if (timeit > 0)
5800 {
5801 int i;
5802 clock_t time_taken = 0;
5803 for (i = 0; i < timeit; i++)
5804 {
5805 clock_t start_time = clock();
5806 PCRE2_COMPILE(compiled_code, pbuffer, patlen,
5807 pat_patctl.options|use_forbid_utf, &errorcode, &erroroffset,
5808 use_pat_context);
5809 time_taken += clock() - start_time;
5810 if (TEST(compiled_code, !=, NULL))
5811 { SUB1(pcre2_code_free, compiled_code); }
5812 }
5813 total_compile_time += time_taken;
5814 fprintf(outfile, "Compile time %.4f milliseconds\n",
5815 (((double)time_taken * 1000.0) / (double)timeit) /
5816 (double)CLOCKS_PER_SEC);
5817 }
5818
5819 /* A final compile that is used "for real". */
5820
5821 PCRE2_COMPILE(compiled_code, pbuffer, patlen, pat_patctl.options|use_forbid_utf,
5822 &errorcode, &erroroffset, use_pat_context);
5823
5824 /* Call the JIT compiler if requested. When timing, we must free and recompile
5825 the pattern each time because that is the only way to free the JIT compiled
5826 code. We know that compilation will always succeed. */
5827
5828 if (TEST(compiled_code, !=, NULL) && pat_patctl.jit != 0)
5829 {
5830 if (timeit > 0)
5831 {
5832 int i;
5833 clock_t time_taken = 0;
5834
5835 for (i = 0; i < timeit; i++)
5836 {
5837 clock_t start_time;
5838 SUB1(pcre2_code_free, compiled_code);
5839 PCRE2_COMPILE(compiled_code, pbuffer, patlen,
5840 pat_patctl.options|use_forbid_utf, &errorcode, &erroroffset,
5841 use_pat_context);
5842 start_time = clock();
5843 PCRE2_JIT_COMPILE(jitrc, compiled_code, pat_patctl.jit);
5844 time_taken += clock() - start_time;
5845 }
5846 total_jit_compile_time += time_taken;
5847 fprintf(outfile, "JIT compile %.4f milliseconds\n",
5848 (((double)time_taken * 1000.0) / (double)timeit) /
5849 (double)CLOCKS_PER_SEC);
5850 }
5851 else
5852 {
5853 PCRE2_JIT_COMPILE(jitrc, compiled_code, pat_patctl.jit);
5854 }
5855 }
5856
5857 /* If valgrind is supported, mark the pbuffer as accessible again. The 16-bit
5858 and 32-bit buffers can be marked completely undefined, but we must leave the
5859 pattern in the 8-bit buffer defined because it may be read from a callout
5860 during matching. */
5861
5862 #ifdef SUPPORT_VALGRIND
5863 #ifdef SUPPORT_PCRE2_8
5864 if (test_mode == PCRE8_MODE)
5865 {
5866 VALGRIND_MAKE_MEM_UNDEFINED(pbuffer8 + valgrind_access_length,
5867 pbuffer8_size - valgrind_access_length);
5868 }
5869 #endif
5870 #ifdef SUPPORT_PCRE2_16
5871 if (test_mode == PCRE16_MODE)
5872 {
5873 VALGRIND_MAKE_MEM_UNDEFINED(pbuffer16, pbuffer16_size);
5874 }
5875 #endif
5876 #ifdef SUPPORT_PCRE2_32
5877 if (test_mode == PCRE32_MODE)
5878 {
5879 VALGRIND_MAKE_MEM_UNDEFINED(pbuffer32, pbuffer32_size);
5880 }
5881 #endif
5882 #endif
5883
5884 /* Compilation failed; go back for another re, skipping to blank line
5885 if non-interactive. */
5886
5887 if (TEST(compiled_code, ==, NULL))
5888 {
5889 fprintf(outfile, "Failed: error %d at offset %d: ", errorcode,
5890 (int)erroroffset);
5891 if (!print_error_message(errorcode, "", "\n")) return PR_ABEND;
5892 return PR_SKIP;
5893 }
5894
5895 /* If forbid_utf is non-zero, we are running a non-UTF test. UTF and UCP are
5896 locked out at compile time, but we must also check for occurrences of \P, \p,
5897 and \X, which are only supported when Unicode is supported. */
5898
5899 if (forbid_utf != 0)
5900 {
5901 if ((FLD(compiled_code, flags) & PCRE2_HASBKPORX) != 0)
5902 {
5903 fprintf(outfile, "** \\P, \\p, and \\X are not allowed after the "
5904 "#forbid_utf command\n");
5905 return PR_SKIP;
5906 }
5907 }
5908
5909 /* Remember the maximum lookbehind, for partial matching. */
5910
5911 if (pattern_info(PCRE2_INFO_MAXLOOKBEHIND, &maxlookbehind, FALSE) != 0)
5912 return PR_ABEND;
5913
5914 /* Remember the number of captures. */
5915
5916 if (pattern_info(PCRE2_INFO_CAPTURECOUNT, &maxcapcount, FALSE) < 0)
5917 return PR_ABEND;
5918
5919 /* If an explicit newline modifier was given, set the information flag in the
5920 pattern so that it is preserved over push/pop. */
5921
5922 if ((pat_patctl.control2 & CTL2_NL_SET) != 0)
5923 {
5924 SETFLD(compiled_code, flags, FLD(compiled_code, flags) | PCRE2_NL_SET);
5925 }
5926
5927 /* Output code size and other information if requested. */
5928
5929 if ((pat_patctl.control & CTL_MEMORY) != 0) show_memory_info();
5930 if ((pat_patctl.control & CTL_FRAMESIZE) != 0) show_framesize();
5931 if ((pat_patctl.control & CTL_ANYINFO) != 0)
5932 {
5933 int rc = show_pattern_info();
5934 if (rc != PR_OK) return rc;
5935 }
5936
5937 /* The "push" control requests that the compiled pattern be remembered on a
5938 stack. This is mainly for testing the serialization functionality. */
5939
5940 if ((pat_patctl.control & CTL_PUSH) != 0)
5941 {
5942 if (patstacknext >= PATSTACKSIZE)
5943 {
5944 fprintf(outfile, "** Too many pushed patterns (max %d)\n", PATSTACKSIZE);
5945 return PR_ABEND;
5946 }
5947 patstack[patstacknext++] = PTR(compiled_code);
5948 SET(compiled_code, NULL);
5949 }
5950
5951 /* The "pushcopy" and "pushtablescopy" controls are similar, but push a
5952 copy of the pattern, the latter with a copy of its character tables. This tests
5953 the pcre2_code_copy() and pcre2_code_copy_with_tables() functions. */
5954
5955 if ((pat_patctl.control & (CTL_PUSHCOPY|CTL_PUSHTABLESCOPY)) != 0)
5956 {
5957 if (patstacknext >= PATSTACKSIZE)
5958 {
5959 fprintf(outfile, "** Too many pushed patterns (max %d)\n", PATSTACKSIZE);
5960 return PR_ABEND;
5961 }
5962 if ((pat_patctl.control & CTL_PUSHCOPY) != 0)
5963 {
5964 PCRE2_CODE_COPY_TO_VOID(patstack[patstacknext++], compiled_code);
5965 }
5966 else
5967 {
5968 PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(patstack[patstacknext++],
5969 compiled_code); }
5970 }
5971
5972 return PR_OK;
5973 }
5974
5975
5976
5977 /*************************************************
5978 * Check heap, match or depth limit *
5979 *************************************************/
5980
5981 /* This is used for DFA, normal, and JIT fast matching. For DFA matching it
5982 should only be called with the third argument set to PCRE2_ERROR_DEPTHLIMIT.
5983
5984 Arguments:
5985 pp the subject string
5986 ulen length of subject or PCRE2_ZERO_TERMINATED
5987 errnumber defines which limit to test
5988 msg string to include in final message
5989
5990 Returns: the return from the final match function call
5991 */
5992
5993 static int
check_match_limit(uint8_t * pp,PCRE2_SIZE ulen,int errnumber,const char * msg)5994 check_match_limit(uint8_t *pp, PCRE2_SIZE ulen, int errnumber, const char *msg)
5995 {
5996 int capcount;
5997 uint32_t min = 0;
5998 uint32_t mid = 64;
5999 uint32_t max = UINT32_MAX;
6000
6001 PCRE2_SET_MATCH_LIMIT(dat_context, max);
6002 PCRE2_SET_DEPTH_LIMIT(dat_context, max);
6003 PCRE2_SET_HEAP_LIMIT(dat_context, max);
6004
6005 for (;;)
6006 {
6007 uint32_t stack_start = 0;
6008
6009 if (errnumber == PCRE2_ERROR_HEAPLIMIT)
6010 {
6011 PCRE2_SET_HEAP_LIMIT(dat_context, mid);
6012 }
6013 else if (errnumber == PCRE2_ERROR_MATCHLIMIT)
6014 {
6015 PCRE2_SET_MATCH_LIMIT(dat_context, mid);
6016 }
6017 else
6018 {
6019 PCRE2_SET_DEPTH_LIMIT(dat_context, mid);
6020 }
6021
6022 if ((dat_datctl.control & CTL_DFA) != 0)
6023 {
6024 stack_start = DFA_START_RWS_SIZE/1024;
6025 if (dfa_workspace == NULL)
6026 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
6027 if (dfa_matched++ == 0)
6028 dfa_workspace[0] = -1; /* To catch bad restart */
6029 PCRE2_DFA_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
6030 dat_datctl.options, match_data,
6031 PTR(dat_context), dfa_workspace, DFA_WS_DIMENSION);
6032 }
6033
6034 else if ((pat_patctl.control & CTL_JITFAST) != 0)
6035 PCRE2_JIT_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
6036 dat_datctl.options, match_data, PTR(dat_context));
6037
6038 else
6039 {
6040 stack_start = START_FRAMES_SIZE/1024;
6041 PCRE2_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
6042 dat_datctl.options, match_data, PTR(dat_context));
6043 }
6044
6045 if (capcount == errnumber)
6046 {
6047 if ((mid & 0x80000000u) != 0)
6048 {
6049 fprintf(outfile, "Can't find minimum %s limit: check pattern for "
6050 "restriction\n", msg);
6051 break;
6052 }
6053
6054 min = mid;
6055 mid = (mid == max - 1)? max : (max != UINT32_MAX)? (min + max)/2 : mid*2;
6056 }
6057 else if (capcount >= 0 ||
6058 capcount == PCRE2_ERROR_NOMATCH ||
6059 capcount == PCRE2_ERROR_PARTIAL)
6060 {
6061 /* If we've not hit the error with a heap limit less than the size of the
6062 initial stack frame vector (for pcre2_match()) or the initial stack
6063 workspace vector (for pcre2_dfa_match()), the heap is not being used, so
6064 the minimum limit is zero; there's no need to go on. The other limits are
6065 always greater than zero. */
6066
6067 if (errnumber == PCRE2_ERROR_HEAPLIMIT && mid < stack_start)
6068 {
6069 fprintf(outfile, "Minimum %s limit = 0\n", msg);
6070 break;
6071 }
6072 if (mid == min + 1)
6073 {
6074 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
6075 break;
6076 }
6077 max = mid;
6078 mid = (min + max)/2;
6079 }
6080 else break; /* Some other error */
6081 }
6082
6083 return capcount;
6084 }
6085
6086
6087
6088 /*************************************************
6089 * Substitute callout function *
6090 *************************************************/
6091
6092 /* Called from pcre2_substitute() when the substitute_callout modifier is set.
6093 Print out the data that is passed back. The substitute callout block is
6094 identical for all code unit widths, so we just pick one.
6095
6096 Arguments:
6097 scb pointer to substitute callout block
6098 data_ptr callout data
6099
6100 Returns: nothing
6101 */
6102
6103 static int
substitute_callout_function(pcre2_substitute_callout_block_8 * scb,void * data_ptr)6104 substitute_callout_function(pcre2_substitute_callout_block_8 *scb,
6105 void *data_ptr)
6106 {
6107 int yield = 0;
6108 BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
6109 (void)data_ptr; /* Not used */
6110
6111 fprintf(outfile, "%2d(%d) Old %" SIZ_FORM " %" SIZ_FORM " \"",
6112 scb->subscount, scb->oveccount,
6113 scb->ovector[0], scb->ovector[1]);
6114
6115 PCHARSV(scb->input, scb->ovector[0], scb->ovector[1] - scb->ovector[0],
6116 utf, outfile);
6117
6118 fprintf(outfile, "\" New %" SIZ_FORM " %" SIZ_FORM " \"",
6119 scb->output_offsets[0], scb->output_offsets[1]);
6120
6121 PCHARSV(scb->output, scb->output_offsets[0],
6122 scb->output_offsets[1] - scb->output_offsets[0], utf, outfile);
6123
6124 if (scb->subscount == dat_datctl.substitute_stop)
6125 {
6126 yield = -1;
6127 fprintf(outfile, " STOPPED");
6128 }
6129 else if (scb->subscount == dat_datctl.substitute_skip)
6130 {
6131 yield = +1;
6132 fprintf(outfile, " SKIPPED");
6133 }
6134
6135 fprintf(outfile, "\"\n");
6136 return yield;
6137 }
6138
6139
6140 /*************************************************
6141 * Callout function *
6142 *************************************************/
6143
6144 /* Called from a PCRE2 library as a result of the (?C) item. We print out where
6145 we are in the match (unless suppressed). Yield zero unless more callouts than
6146 the fail count, or the callout data is not zero. The only differences in the
6147 callout block for different code unit widths are that the pointers to the
6148 subject, the most recent MARK, and a callout argument string point to strings
6149 of the appropriate width. Casts can be used to deal with this.
6150
6151 Arguments:
6152 cb a pointer to a callout block
6153 callout_data_ptr the provided callout data
6154
6155 Returns: 0 or 1 or an error, as determined by settings
6156 */
6157
6158 static int
callout_function(pcre2_callout_block_8 * cb,void * callout_data_ptr)6159 callout_function(pcre2_callout_block_8 *cb, void *callout_data_ptr)
6160 {
6161 FILE *f, *fdefault;
6162 uint32_t i, pre_start, post_start, subject_length;
6163 PCRE2_SIZE current_position;
6164 BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
6165 BOOL callout_capture = (dat_datctl.control & CTL_CALLOUT_CAPTURE) != 0;
6166 BOOL callout_where = (dat_datctl.control2 & CTL2_CALLOUT_NO_WHERE) == 0;
6167
6168 /* The FILE f is used for echoing the subject string if it is non-NULL. This
6169 happens only once in simple cases, but we want to repeat after any additional
6170 output caused by CALLOUT_EXTRA. */
6171
6172 fdefault = (!first_callout && !callout_capture && cb->callout_string == NULL)?
6173 NULL : outfile;
6174
6175 if ((dat_datctl.control2 & CTL2_CALLOUT_EXTRA) != 0)
6176 {
6177 f = outfile;
6178 switch (cb->callout_flags)
6179 {
6180 case PCRE2_CALLOUT_BACKTRACK:
6181 fprintf(f, "Backtrack\n");
6182 break;
6183
6184 case PCRE2_CALLOUT_STARTMATCH|PCRE2_CALLOUT_BACKTRACK:
6185 fprintf(f, "Backtrack\nNo other matching paths\n");
6186 /* Fall through */
6187
6188 case PCRE2_CALLOUT_STARTMATCH:
6189 fprintf(f, "New match attempt\n");
6190 break;
6191
6192 default:
6193 f = fdefault;
6194 break;
6195 }
6196 }
6197 else f = fdefault;
6198
6199 /* For a callout with a string argument, show the string first because there
6200 isn't a tidy way to fit it in the rest of the data. */
6201
6202 if (cb->callout_string != NULL)
6203 {
6204 uint32_t delimiter = CODE_UNIT(cb->callout_string, -1);
6205 fprintf(outfile, "Callout (%" SIZ_FORM "): %c",
6206 cb->callout_string_offset, delimiter);
6207 PCHARSV(cb->callout_string, 0,
6208 cb->callout_string_length, utf, outfile);
6209 for (i = 0; callout_start_delims[i] != 0; i++)
6210 if (delimiter == callout_start_delims[i])
6211 {
6212 delimiter = callout_end_delims[i];
6213 break;
6214 }
6215 fprintf(outfile, "%c", delimiter);
6216 if (!callout_capture) fprintf(outfile, "\n");
6217 }
6218
6219 /* Show captured strings if required */
6220
6221 if (callout_capture)
6222 {
6223 if (cb->callout_string == NULL)
6224 fprintf(outfile, "Callout %d:", cb->callout_number);
6225 fprintf(outfile, " last capture = %d\n", cb->capture_last);
6226 for (i = 2; i < cb->capture_top * 2; i += 2)
6227 {
6228 fprintf(outfile, "%2d: ", i/2);
6229 if (cb->offset_vector[i] == PCRE2_UNSET)
6230 fprintf(outfile, "<unset>");
6231 else
6232 {
6233 PCHARSV(cb->subject, cb->offset_vector[i],
6234 cb->offset_vector[i+1] - cb->offset_vector[i], utf, f);
6235 }
6236 fprintf(outfile, "\n");
6237 }
6238 }
6239
6240 /* Unless suppressed, re-print the subject in canonical form (with escapes for
6241 non-printing characters), the first time, or if giving full details. On
6242 subsequent calls in the same match, we use PCHARS() just to find the printed
6243 lengths of the substrings. */
6244
6245 if (callout_where)
6246 {
6247 if (f != NULL) fprintf(f, "--->");
6248
6249 /* The subject before the match start. */
6250
6251 PCHARS(pre_start, cb->subject, 0, cb->start_match, utf, f);
6252
6253 /* If a lookbehind is involved, the current position may be earlier than the
6254 match start. If so, use the match start instead. */
6255
6256 current_position = (cb->current_position >= cb->start_match)?
6257 cb->current_position : cb->start_match;
6258
6259 /* The subject between the match start and the current position. */
6260
6261 PCHARS(post_start, cb->subject, cb->start_match,
6262 current_position - cb->start_match, utf, f);
6263
6264 /* Print from the current position to the end. */
6265
6266 PCHARSV(cb->subject, current_position, cb->subject_length - current_position,
6267 utf, f);
6268
6269 /* Calculate the total subject printed length (no print). */
6270
6271 PCHARS(subject_length, cb->subject, 0, cb->subject_length, utf, NULL);
6272
6273 if (f != NULL) fprintf(f, "\n");
6274
6275 /* For automatic callouts, show the pattern offset. Otherwise, for a
6276 numerical callout whose number has not already been shown with captured
6277 strings, show the number here. A callout with a string argument has been
6278 displayed above. */
6279
6280 if (cb->callout_number == 255)
6281 {
6282 fprintf(outfile, "%+3d ", (int)cb->pattern_position);
6283 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
6284 }
6285 else
6286 {
6287 if (callout_capture || cb->callout_string != NULL) fprintf(outfile, " ");
6288 else fprintf(outfile, "%3d ", cb->callout_number);
6289 }
6290
6291 /* Now show position indicators */
6292
6293 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
6294 fprintf(outfile, "^");
6295
6296 if (post_start > 0)
6297 {
6298 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
6299 fprintf(outfile, "^");
6300 }
6301
6302 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
6303 fprintf(outfile, " ");
6304
6305 if (cb->next_item_length != 0)
6306 fprintf(outfile, "%.*s", (int)(cb->next_item_length),
6307 pbuffer8 + cb->pattern_position);
6308 else
6309 fprintf(outfile, "End of pattern");
6310
6311 fprintf(outfile, "\n");
6312 }
6313
6314 first_callout = FALSE;
6315
6316 /* Show any mark info */
6317
6318 if (cb->mark != last_callout_mark)
6319 {
6320 if (cb->mark == NULL)
6321 fprintf(outfile, "Latest Mark: <unset>\n");
6322 else
6323 {
6324 fprintf(outfile, "Latest Mark: ");
6325 PCHARSV(cb->mark, -1, -1, utf, outfile);
6326 putc('\n', outfile);
6327 }
6328 last_callout_mark = cb->mark;
6329 }
6330
6331 /* Show callout data */
6332
6333 if (callout_data_ptr != NULL)
6334 {
6335 int callout_data = *((int32_t *)callout_data_ptr);
6336 if (callout_data != 0)
6337 {
6338 fprintf(outfile, "Callout data = %d\n", callout_data);
6339 return callout_data;
6340 }
6341 }
6342
6343 /* Keep count and give the appropriate return code */
6344
6345 callout_count++;
6346
6347 if (cb->callout_number == dat_datctl.cerror[0] &&
6348 callout_count >= dat_datctl.cerror[1])
6349 return PCRE2_ERROR_CALLOUT;
6350
6351 if (cb->callout_number == dat_datctl.cfail[0] &&
6352 callout_count >= dat_datctl.cfail[1])
6353 return 1;
6354
6355 return 0;
6356 }
6357
6358
6359
6360 /*************************************************
6361 * Handle *MARK and copy/get tests *
6362 *************************************************/
6363
6364 /* This function is called after complete and partial matches. It runs the
6365 tests for substring extraction.
6366
6367 Arguments:
6368 utf TRUE for utf
6369 capcount return from pcre2_match()
6370
6371 Returns: FALSE if print_error_message() fails
6372 */
6373
6374 static BOOL
copy_and_get(BOOL utf,int capcount)6375 copy_and_get(BOOL utf, int capcount)
6376 {
6377 int i;
6378 uint8_t *nptr;
6379
6380 /* Test copy strings by number */
6381
6382 for (i = 0; i < MAXCPYGET && dat_datctl.copy_numbers[i] >= 0; i++)
6383 {
6384 int rc;
6385 PCRE2_SIZE length, length2;
6386 uint32_t copybuffer[256];
6387 uint32_t n = (uint32_t)(dat_datctl.copy_numbers[i]);
6388 length = sizeof(copybuffer)/code_unit_size;
6389 PCRE2_SUBSTRING_COPY_BYNUMBER(rc, match_data, n, copybuffer, &length);
6390 if (rc < 0)
6391 {
6392 fprintf(outfile, "Copy substring %d failed (%d): ", n, rc);
6393 if (!print_error_message(rc, "", "\n")) return FALSE;
6394 }
6395 else
6396 {
6397 PCRE2_SUBSTRING_LENGTH_BYNUMBER(rc, match_data, n, &length2);
6398 if (rc < 0)
6399 {
6400 fprintf(outfile, "Get substring %d length failed (%d): ", n, rc);
6401 if (!print_error_message(rc, "", "\n")) return FALSE;
6402 }
6403 else if (length2 != length)
6404 {
6405 fprintf(outfile, "Mismatched substring lengths: %"
6406 SIZ_FORM " %" SIZ_FORM "\n", length, length2);
6407 }
6408 fprintf(outfile, "%2dC ", n);
6409 PCHARSV(copybuffer, 0, length, utf, outfile);
6410 fprintf(outfile, " (%" SIZ_FORM ")\n", length);
6411 }
6412 }
6413
6414 /* Test copy strings by name */
6415
6416 nptr = dat_datctl.copy_names;
6417 for (;;)
6418 {
6419 int rc;
6420 int groupnumber;
6421 PCRE2_SIZE length, length2;
6422 uint32_t copybuffer[256];
6423 int namelen = strlen((const char *)nptr);
6424 #if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32
6425 PCRE2_SIZE cnl = namelen;
6426 #endif
6427 if (namelen == 0) break;
6428
6429 #ifdef SUPPORT_PCRE2_8
6430 if (test_mode == PCRE8_MODE) strcpy((char *)pbuffer8, (char *)nptr);
6431 #endif
6432 #ifdef SUPPORT_PCRE2_16
6433 if (test_mode == PCRE16_MODE)(void)to16(nptr, utf, &cnl);
6434 #endif
6435 #ifdef SUPPORT_PCRE2_32
6436 if (test_mode == PCRE32_MODE)(void)to32(nptr, utf, &cnl);
6437 #endif
6438
6439 PCRE2_SUBSTRING_NUMBER_FROM_NAME(groupnumber, compiled_code, pbuffer);
6440 if (groupnumber < 0 && groupnumber != PCRE2_ERROR_NOUNIQUESUBSTRING)
6441 fprintf(outfile, "Number not found for group '%s'\n", nptr);
6442
6443 length = sizeof(copybuffer)/code_unit_size;
6444 PCRE2_SUBSTRING_COPY_BYNAME(rc, match_data, pbuffer, copybuffer, &length);
6445 if (rc < 0)
6446 {
6447 fprintf(outfile, "Copy substring '%s' failed (%d): ", nptr, rc);
6448 if (!print_error_message(rc, "", "\n")) return FALSE;
6449 }
6450 else
6451 {
6452 PCRE2_SUBSTRING_LENGTH_BYNAME(rc, match_data, pbuffer, &length2);
6453 if (rc < 0)
6454 {
6455 fprintf(outfile, "Get substring '%s' length failed (%d): ", nptr, rc);
6456 if (!print_error_message(rc, "", "\n")) return FALSE;
6457 }
6458 else if (length2 != length)
6459 {
6460 fprintf(outfile, "Mismatched substring lengths: %"
6461 SIZ_FORM " %" SIZ_FORM "\n", length, length2);
6462 }
6463 fprintf(outfile, " C ");
6464 PCHARSV(copybuffer, 0, length, utf, outfile);
6465 fprintf(outfile, " (%" SIZ_FORM ") %s", length, nptr);
6466 if (groupnumber >= 0) fprintf(outfile, " (group %d)\n", groupnumber);
6467 else fprintf(outfile, " (non-unique)\n");
6468 }
6469 nptr += namelen + 1;
6470 }
6471
6472 /* Test get strings by number */
6473
6474 for (i = 0; i < MAXCPYGET && dat_datctl.get_numbers[i] >= 0; i++)
6475 {
6476 int rc;
6477 PCRE2_SIZE length;
6478 void *gotbuffer;
6479 uint32_t n = (uint32_t)(dat_datctl.get_numbers[i]);
6480 PCRE2_SUBSTRING_GET_BYNUMBER(rc, match_data, n, &gotbuffer, &length);
6481 if (rc < 0)
6482 {
6483 fprintf(outfile, "Get substring %d failed (%d): ", n, rc);
6484 if (!print_error_message(rc, "", "\n")) return FALSE;
6485 }
6486 else
6487 {
6488 fprintf(outfile, "%2dG ", n);
6489 PCHARSV(gotbuffer, 0, length, utf, outfile);
6490 fprintf(outfile, " (%" SIZ_FORM ")\n", length);
6491 PCRE2_SUBSTRING_FREE(gotbuffer);
6492 }
6493 }
6494
6495 /* Test get strings by name */
6496
6497 nptr = dat_datctl.get_names;
6498 for (;;)
6499 {
6500 PCRE2_SIZE length;
6501 void *gotbuffer;
6502 int rc;
6503 int groupnumber;
6504 int namelen = strlen((const char *)nptr);
6505 #if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32
6506 PCRE2_SIZE cnl = namelen;
6507 #endif
6508 if (namelen == 0) break;
6509
6510 #ifdef SUPPORT_PCRE2_8
6511 if (test_mode == PCRE8_MODE) strcpy((char *)pbuffer8, (char *)nptr);
6512 #endif
6513 #ifdef SUPPORT_PCRE2_16
6514 if (test_mode == PCRE16_MODE)(void)to16(nptr, utf, &cnl);
6515 #endif
6516 #ifdef SUPPORT_PCRE2_32
6517 if (test_mode == PCRE32_MODE)(void)to32(nptr, utf, &cnl);
6518 #endif
6519
6520 PCRE2_SUBSTRING_NUMBER_FROM_NAME(groupnumber, compiled_code, pbuffer);
6521 if (groupnumber < 0 && groupnumber != PCRE2_ERROR_NOUNIQUESUBSTRING)
6522 fprintf(outfile, "Number not found for group '%s'\n", nptr);
6523
6524 PCRE2_SUBSTRING_GET_BYNAME(rc, match_data, pbuffer, &gotbuffer, &length);
6525 if (rc < 0)
6526 {
6527 fprintf(outfile, "Get substring '%s' failed (%d): ", nptr, rc);
6528 if (!print_error_message(rc, "", "\n")) return FALSE;
6529 }
6530 else
6531 {
6532 fprintf(outfile, " G ");
6533 PCHARSV(gotbuffer, 0, length, utf, outfile);
6534 fprintf(outfile, " (%" SIZ_FORM ") %s", length, nptr);
6535 if (groupnumber >= 0) fprintf(outfile, " (group %d)\n", groupnumber);
6536 else fprintf(outfile, " (non-unique)\n");
6537 PCRE2_SUBSTRING_FREE(gotbuffer);
6538 }
6539 nptr += namelen + 1;
6540 }
6541
6542 /* Test getting the complete list of captured strings. */
6543
6544 if ((dat_datctl.control & CTL_GETALL) != 0)
6545 {
6546 int rc;
6547 void **stringlist;
6548 PCRE2_SIZE *lengths;
6549 PCRE2_SUBSTRING_LIST_GET(rc, match_data, &stringlist, &lengths);
6550 if (rc < 0)
6551 {
6552 fprintf(outfile, "get substring list failed (%d): ", rc);
6553 if (!print_error_message(rc, "", "\n")) return FALSE;
6554 }
6555 else
6556 {
6557 for (i = 0; i < capcount; i++)
6558 {
6559 fprintf(outfile, "%2dL ", i);
6560 PCHARSV(stringlist[i], 0, lengths[i], utf, outfile);
6561 putc('\n', outfile);
6562 }
6563 if (stringlist[i] != NULL)
6564 fprintf(outfile, "string list not terminated by NULL\n");
6565 PCRE2_SUBSTRING_LIST_FREE(stringlist);
6566 }
6567 }
6568
6569 return TRUE;
6570 }
6571
6572
6573
6574 /*************************************************
6575 * Show an entire ovector *
6576 *************************************************/
6577
6578 /* This function is called after partial matching or match failure, when the
6579 "allvector" modifier is set. It is a means of checking the contents of the
6580 entire ovector, to ensure no modification of fields that should be unchanged.
6581
6582 Arguments:
6583 ovector points to the ovector
6584 oveccount number of pairs
6585
6586 Returns: nothing
6587 */
6588
6589 static void
show_ovector(PCRE2_SIZE * ovector,uint32_t oveccount)6590 show_ovector(PCRE2_SIZE *ovector, uint32_t oveccount)
6591 {
6592 uint32_t i;
6593 for (i = 0; i < 2*oveccount; i += 2)
6594 {
6595 PCRE2_SIZE start = ovector[i];
6596 PCRE2_SIZE end = ovector[i+1];
6597
6598 fprintf(outfile, "%2d: ", i/2);
6599 if (start == PCRE2_UNSET && end == PCRE2_UNSET)
6600 fprintf(outfile, "<unset>\n");
6601 else if (start == JUNK_OFFSET && end == JUNK_OFFSET)
6602 fprintf(outfile, "<unchanged>\n");
6603 else
6604 fprintf(outfile, "%ld %ld\n", (unsigned long int)start,
6605 (unsigned long int)end);
6606 }
6607 }
6608
6609
6610 /*************************************************
6611 * Process a data line *
6612 *************************************************/
6613
6614 /* The line is in buffer; it will not be empty.
6615
6616 Arguments: none
6617
6618 Returns: PR_OK continue processing next line
6619 PR_SKIP skip to a blank line
6620 PR_ABEND abort the pcre2test run
6621 */
6622
6623 static int
process_data(void)6624 process_data(void)
6625 {
6626 PCRE2_SIZE len, ulen, arg_ulen;
6627 uint32_t gmatched;
6628 uint32_t c, k;
6629 uint32_t g_notempty = 0;
6630 uint8_t *p, *pp, *start_rep;
6631 size_t needlen;
6632 void *use_dat_context;
6633 BOOL utf;
6634 BOOL subject_literal;
6635
6636 PCRE2_SIZE *ovector;
6637 PCRE2_SIZE ovecsave[3];
6638 uint32_t oveccount;
6639
6640 #ifdef SUPPORT_PCRE2_8
6641 uint8_t *q8 = NULL;
6642 #endif
6643 #ifdef SUPPORT_PCRE2_16
6644 uint16_t *q16 = NULL;
6645 #endif
6646 #ifdef SUPPORT_PCRE2_32
6647 uint32_t *q32 = NULL;
6648 #endif
6649
6650 subject_literal = (pat_patctl.control2 & CTL2_SUBJECT_LITERAL) != 0;
6651
6652 /* Copy the default context and data control blocks to the active ones. Then
6653 copy from the pattern the controls that can be set in either the pattern or the
6654 data. This allows them to be overridden in the data line. We do not do this for
6655 options because those that are common apply separately to compiling and
6656 matching. */
6657
6658 DATCTXCPY(dat_context, default_dat_context);
6659 memcpy(&dat_datctl, &def_datctl, sizeof(datctl));
6660 dat_datctl.control |= (pat_patctl.control & CTL_ALLPD);
6661 dat_datctl.control2 |= (pat_patctl.control2 & CTL2_ALLPD);
6662 strcpy((char *)dat_datctl.replacement, (char *)pat_patctl.replacement);
6663 if (dat_datctl.jitstack == 0) dat_datctl.jitstack = pat_patctl.jitstack;
6664
6665 if (dat_datctl.substitute_skip == 0)
6666 dat_datctl.substitute_skip = pat_patctl.substitute_skip;
6667 if (dat_datctl.substitute_stop == 0)
6668 dat_datctl.substitute_stop = pat_patctl.substitute_stop;
6669
6670 /* Initialize for scanning the data line. */
6671
6672 #ifdef SUPPORT_PCRE2_8
6673 utf = ((((pat_patctl.control & CTL_POSIX) != 0)?
6674 ((pcre2_real_code_8 *)preg.re_pcre2_code)->overall_options :
6675 FLD(compiled_code, overall_options)) & PCRE2_UTF) != 0;
6676 #else
6677 utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
6678 #endif
6679
6680 start_rep = NULL;
6681 len = strlen((const char *)buffer);
6682 while (len > 0 && isspace(buffer[len-1])) len--;
6683 buffer[len] = 0;
6684 p = buffer;
6685 while (isspace(*p)) p++;
6686
6687 /* Check that the data is well-formed UTF-8 if we're in UTF mode. To create
6688 invalid input to pcre2_match(), you must use \x?? or \x{} sequences. */
6689
6690 if (utf)
6691 {
6692 uint8_t *q;
6693 uint32_t cc;
6694 int n = 1;
6695 for (q = p; n > 0 && *q; q += n) n = utf82ord(q, &cc);
6696 if (n <= 0)
6697 {
6698 fprintf(outfile, "** Failed: invalid UTF-8 string cannot be used as input "
6699 "in UTF mode\n");
6700 return PR_OK;
6701 }
6702 }
6703
6704 #ifdef SUPPORT_VALGRIND
6705 /* Mark the dbuffer as addressable but undefined again. */
6706 if (dbuffer != NULL)
6707 {
6708 VALGRIND_MAKE_MEM_UNDEFINED(dbuffer, dbuffer_size);
6709 }
6710 #endif
6711
6712 /* Allocate a buffer to hold the data line; len+1 is an upper bound on
6713 the number of code units that will be needed (though the buffer may have to be
6714 extended if replication is involved). */
6715
6716 needlen = (size_t)((len+1) * code_unit_size);
6717 if (dbuffer == NULL || needlen >= dbuffer_size)
6718 {
6719 while (needlen >= dbuffer_size) dbuffer_size *= 2;
6720 dbuffer = (uint8_t *)realloc(dbuffer, dbuffer_size);
6721 if (dbuffer == NULL)
6722 {
6723 fprintf(stderr, "pcre2test: realloc(%d) failed\n", (int)dbuffer_size);
6724 exit(1);
6725 }
6726 }
6727 SETCASTPTR(q, dbuffer); /* Sets q8, q16, or q32, as appropriate. */
6728
6729 /* Scan the data line, interpreting data escapes, and put the result into a
6730 buffer of the appropriate width. In UTF mode, input is always UTF-8; otherwise,
6731 in 16- and 32-bit modes, it can be forced to UTF-8 by the utf8_input modifier.
6732 */
6733
6734 while ((c = *p++) != 0)
6735 {
6736 int32_t i = 0;
6737 size_t replen;
6738
6739 /* ] may mark the end of a replicated sequence */
6740
6741 if (c == ']' && start_rep != NULL)
6742 {
6743 long li;
6744 char *endptr;
6745 size_t qoffset = CAST8VAR(q) - dbuffer;
6746 size_t rep_offset = start_rep - dbuffer;
6747
6748 if (*p++ != '{')
6749 {
6750 fprintf(outfile, "** Expected '{' after \\[....]\n");
6751 return PR_OK;
6752 }
6753
6754 li = strtol((const char *)p, &endptr, 10);
6755 if (S32OVERFLOW(li))
6756 {
6757 fprintf(outfile, "** Repeat count too large\n");
6758 return PR_OK;
6759 }
6760
6761 p = (uint8_t *)endptr;
6762 if (*p++ != '}')
6763 {
6764 fprintf(outfile, "** Expected '}' after \\[...]{...\n");
6765 return PR_OK;
6766 }
6767
6768 i = (int32_t)li;
6769 if (i-- <= 0)
6770 {
6771 fprintf(outfile, "** Zero or negative repeat not allowed\n");
6772 return PR_OK;
6773 }
6774
6775 replen = CAST8VAR(q) - start_rep;
6776 needlen += replen * i;
6777
6778 if (needlen >= dbuffer_size)
6779 {
6780 while (needlen >= dbuffer_size) dbuffer_size *= 2;
6781 dbuffer = (uint8_t *)realloc(dbuffer, dbuffer_size);
6782 if (dbuffer == NULL)
6783 {
6784 fprintf(stderr, "pcre2test: realloc(%d) failed\n", (int)dbuffer_size);
6785 exit(1);
6786 }
6787 SETCASTPTR(q, dbuffer + qoffset);
6788 start_rep = dbuffer + rep_offset;
6789 }
6790
6791 while (i-- > 0)
6792 {
6793 memcpy(CAST8VAR(q), start_rep, replen);
6794 SETPLUS(q, replen/code_unit_size);
6795 }
6796
6797 start_rep = NULL;
6798 continue;
6799 }
6800
6801 /* Handle a non-escaped character. In non-UTF 32-bit mode with utf8_input
6802 set, do the fudge for setting the top bit. */
6803
6804 if (c != '\\' || subject_literal)
6805 {
6806 uint32_t topbit = 0;
6807 if (test_mode == PCRE32_MODE && c == 0xff && *p != 0)
6808 {
6809 topbit = 0x80000000;
6810 c = *p++;
6811 }
6812 if ((utf || (pat_patctl.control & CTL_UTF8_INPUT) != 0) &&
6813 HASUTF8EXTRALEN(c)) { GETUTF8INC(c, p); }
6814 c |= topbit;
6815 }
6816
6817 /* Handle backslash escapes */
6818
6819 else switch ((c = *p++))
6820 {
6821 case '\\': break;
6822 case 'a': c = CHAR_BEL; break;
6823 case 'b': c = '\b'; break;
6824 case 'e': c = CHAR_ESC; break;
6825 case 'f': c = '\f'; break;
6826 case 'n': c = '\n'; break;
6827 case 'r': c = '\r'; break;
6828 case 't': c = '\t'; break;
6829 case 'v': c = '\v'; break;
6830
6831 case '0': case '1': case '2': case '3':
6832 case '4': case '5': case '6': case '7':
6833 c -= '0';
6834 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
6835 c = c * 8 + *p++ - '0';
6836 break;
6837
6838 case 'o':
6839 if (*p == '{')
6840 {
6841 uint8_t *pt = p;
6842 c = 0;
6843 for (pt++; isdigit(*pt) && *pt != '8' && *pt != '9'; pt++)
6844 {
6845 if (++i == 12)
6846 fprintf(outfile, "** Too many octal digits in \\o{...} item; "
6847 "using only the first twelve.\n");
6848 else c = c * 8 + *pt - '0';
6849 }
6850 if (*pt == '}') p = pt + 1;
6851 else fprintf(outfile, "** Missing } after \\o{ (assumed)\n");
6852 }
6853 break;
6854
6855 case 'x':
6856 if (*p == '{')
6857 {
6858 uint8_t *pt = p;
6859 c = 0;
6860
6861 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
6862 when isxdigit() is a macro that refers to its argument more than
6863 once. This is banned by the C Standard, but apparently happens in at
6864 least one MacOS environment. */
6865
6866 for (pt++; isxdigit(*pt); pt++)
6867 {
6868 if (++i == 9)
6869 fprintf(outfile, "** Too many hex digits in \\x{...} item; "
6870 "using only the first eight.\n");
6871 else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
6872 }
6873 if (*pt == '}')
6874 {
6875 p = pt + 1;
6876 break;
6877 }
6878 /* Not correct form for \x{...}; fall through */
6879 }
6880
6881 /* \x without {} always defines just one byte in 8-bit mode. This
6882 allows UTF-8 characters to be constructed byte by byte, and also allows
6883 invalid UTF-8 sequences to be made. Just copy the byte in UTF-8 mode.
6884 Otherwise, pass it down as data. */
6885
6886 c = 0;
6887 while (i++ < 2 && isxdigit(*p))
6888 {
6889 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
6890 p++;
6891 }
6892 #if defined SUPPORT_PCRE2_8
6893 if (utf && (test_mode == PCRE8_MODE))
6894 {
6895 *q8++ = c;
6896 continue;
6897 }
6898 #endif
6899 break;
6900
6901 case 0: /* \ followed by EOF allows for an empty line */
6902 p--;
6903 continue;
6904
6905 case '=': /* \= terminates the data, starts modifiers */
6906 goto ENDSTRING;
6907
6908 case '[': /* \[ introduces a replicated character sequence */
6909 if (start_rep != NULL)
6910 {
6911 fprintf(outfile, "** Nested replication is not supported\n");
6912 return PR_OK;
6913 }
6914 start_rep = CAST8VAR(q);
6915 continue;
6916
6917 default:
6918 if (isalnum(c))
6919 {
6920 fprintf(outfile, "** Unrecognized escape sequence \"\\%c\"\n", c);
6921 return PR_OK;
6922 }
6923 }
6924
6925 /* We now have a character value in c that may be greater than 255.
6926 In 8-bit mode we convert to UTF-8 if we are in UTF mode. Values greater
6927 than 127 in UTF mode must have come from \x{...} or octal constructs
6928 because values from \x.. get this far only in non-UTF mode. */
6929
6930 #ifdef SUPPORT_PCRE2_8
6931 if (test_mode == PCRE8_MODE)
6932 {
6933 if (utf)
6934 {
6935 if (c > 0x7fffffff)
6936 {
6937 fprintf(outfile, "** Character \\x{%x} is greater than 0x7fffffff "
6938 "and so cannot be converted to UTF-8\n", c);
6939 return PR_OK;
6940 }
6941 q8 += ord2utf8(c, q8);
6942 }
6943 else
6944 {
6945 if (c > 0xffu)
6946 {
6947 fprintf(outfile, "** Character \\x{%x} is greater than 255 "
6948 "and UTF-8 mode is not enabled.\n", c);
6949 fprintf(outfile, "** Truncation will probably give the wrong "
6950 "result.\n");
6951 }
6952 *q8++ = (uint8_t)c;
6953 }
6954 }
6955 #endif
6956 #ifdef SUPPORT_PCRE2_16
6957 if (test_mode == PCRE16_MODE)
6958 {
6959 if (utf)
6960 {
6961 if (c > 0x10ffffu)
6962 {
6963 fprintf(outfile, "** Failed: character \\x{%x} is greater than "
6964 "0x10ffff and so cannot be converted to UTF-16\n", c);
6965 return PR_OK;
6966 }
6967 else if (c >= 0x10000u)
6968 {
6969 c-= 0x10000u;
6970 *q16++ = 0xD800 | (c >> 10);
6971 *q16++ = 0xDC00 | (c & 0x3ff);
6972 }
6973 else
6974 *q16++ = c;
6975 }
6976 else
6977 {
6978 if (c > 0xffffu)
6979 {
6980 fprintf(outfile, "** Character \\x{%x} is greater than 0xffff "
6981 "and UTF-16 mode is not enabled.\n", c);
6982 fprintf(outfile, "** Truncation will probably give the wrong "
6983 "result.\n");
6984 }
6985
6986 *q16++ = (uint16_t)c;
6987 }
6988 }
6989 #endif
6990 #ifdef SUPPORT_PCRE2_32
6991 if (test_mode == PCRE32_MODE)
6992 {
6993 *q32++ = c;
6994 }
6995 #endif
6996 }
6997
6998 ENDSTRING:
6999 SET(*q, 0);
7000 len = CASTVAR(uint8_t *, q) - dbuffer; /* Length in bytes */
7001 ulen = len/code_unit_size; /* Length in code units */
7002 arg_ulen = ulen; /* Value to use in match arg */
7003
7004 /* If the string was terminated by \= we must now interpret modifiers. */
7005
7006 if (p[-1] != 0 && !decode_modifiers(p, CTX_DAT, NULL, &dat_datctl))
7007 return PR_OK;
7008
7009 /* Setting substitute_{skip,fail} implies a substitute callout. */
7010
7011 if (dat_datctl.substitute_skip != 0 || dat_datctl.substitute_stop != 0)
7012 dat_datctl.control2 |= CTL2_SUBSTITUTE_CALLOUT;
7013
7014 /* Check for mutually exclusive modifiers. At present, these are all in the
7015 first control word. */
7016
7017 for (k = 0; k < sizeof(exclusive_dat_controls)/sizeof(uint32_t); k++)
7018 {
7019 c = dat_datctl.control & exclusive_dat_controls[k];
7020 if (c != 0 && c != (c & (~c+1)))
7021 {
7022 show_controls(c, 0, "** Not allowed together:");
7023 fprintf(outfile, "\n");
7024 return PR_OK;
7025 }
7026 }
7027
7028 if (pat_patctl.replacement[0] != 0)
7029 {
7030 if ((dat_datctl.control2 & CTL2_SUBSTITUTE_CALLOUT) != 0 &&
7031 (dat_datctl.control & CTL_NULLCONTEXT) != 0)
7032 {
7033 fprintf(outfile, "** Replacement callouts are not supported with null_context.\n");
7034 return PR_OK;
7035 }
7036
7037 if ((dat_datctl.control & CTL_ALLCAPTURES) != 0)
7038 fprintf(outfile, "** Ignored with replacement text: allcaptures\n");
7039 }
7040
7041 /* Warn for modifiers that are ignored for DFA. */
7042
7043 if ((dat_datctl.control & CTL_DFA) != 0)
7044 {
7045 if ((dat_datctl.control & CTL_ALLCAPTURES) != 0)
7046 fprintf(outfile, "** Ignored after DFA matching: allcaptures\n");
7047 }
7048
7049 /* We now have the subject in dbuffer, with len containing the byte length, and
7050 ulen containing the code unit length, with a copy in arg_ulen for use in match
7051 function arguments (this gets changed to PCRE2_ZERO_TERMINATED when the
7052 zero_terminate modifier is present).
7053
7054 Move the data to the end of the buffer so that a read over the end can be
7055 caught by valgrind or other means. If we have explicit valgrind support, mark
7056 the unused start of the buffer unaddressable. If we are using the POSIX
7057 interface, or testing zero-termination, we must include the terminating zero in
7058 the usable data. */
7059
7060 c = code_unit_size * (((pat_patctl.control & CTL_POSIX) +
7061 (dat_datctl.control & CTL_ZERO_TERMINATE) != 0)? 1:0);
7062 pp = memmove(dbuffer + dbuffer_size - len - c, dbuffer, len + c);
7063 #ifdef SUPPORT_VALGRIND
7064 VALGRIND_MAKE_MEM_NOACCESS(dbuffer, dbuffer_size - (len + c));
7065 #endif
7066
7067 /* Now pp points to the subject string. POSIX matching is only possible in
7068 8-bit mode, and it does not support timing or other fancy features. Some were
7069 checked at compile time, but we need to check the match-time settings here. */
7070
7071 #ifdef SUPPORT_PCRE2_8
7072 if ((pat_patctl.control & CTL_POSIX) != 0)
7073 {
7074 int rc;
7075 int eflags = 0;
7076 regmatch_t *pmatch = NULL;
7077 const char *msg = "** Ignored with POSIX interface:";
7078
7079 if (dat_datctl.cerror[0] != CFORE_UNSET || dat_datctl.cerror[1] != CFORE_UNSET)
7080 prmsg(&msg, "callout_error");
7081 if (dat_datctl.cfail[0] != CFORE_UNSET || dat_datctl.cfail[1] != CFORE_UNSET)
7082 prmsg(&msg, "callout_fail");
7083 if (dat_datctl.copy_numbers[0] >= 0 || dat_datctl.copy_names[0] != 0)
7084 prmsg(&msg, "copy");
7085 if (dat_datctl.get_numbers[0] >= 0 || dat_datctl.get_names[0] != 0)
7086 prmsg(&msg, "get");
7087 if (dat_datctl.jitstack != 0) prmsg(&msg, "jitstack");
7088 if (dat_datctl.offset != 0) prmsg(&msg, "offset");
7089
7090 if ((dat_datctl.options & ~POSIX_SUPPORTED_MATCH_OPTIONS) != 0)
7091 {
7092 fprintf(outfile, "%s", msg);
7093 show_match_options(dat_datctl.options & ~POSIX_SUPPORTED_MATCH_OPTIONS);
7094 msg = "";
7095 }
7096 if ((dat_datctl.control & ~POSIX_SUPPORTED_MATCH_CONTROLS) != 0 ||
7097 (dat_datctl.control2 & ~POSIX_SUPPORTED_MATCH_CONTROLS2) != 0)
7098 {
7099 show_controls(dat_datctl.control & ~POSIX_SUPPORTED_MATCH_CONTROLS,
7100 dat_datctl.control2 & ~POSIX_SUPPORTED_MATCH_CONTROLS2, msg);
7101 msg = "";
7102 }
7103
7104 if (msg[0] == 0) fprintf(outfile, "\n");
7105
7106 if (dat_datctl.oveccount > 0)
7107 {
7108 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * dat_datctl.oveccount);
7109 if (pmatch == NULL)
7110 {
7111 fprintf(outfile, "** Failed to get memory for recording matching "
7112 "information (size set = %du)\n", dat_datctl.oveccount);
7113 return PR_OK;
7114 }
7115 }
7116
7117 if (dat_datctl.startend[0] != CFORE_UNSET)
7118 {
7119 pmatch[0].rm_so = dat_datctl.startend[0];
7120 pmatch[0].rm_eo = (dat_datctl.startend[1] != 0)?
7121 dat_datctl.startend[1] : len;
7122 eflags |= REG_STARTEND;
7123 }
7124
7125 if ((dat_datctl.options & PCRE2_NOTBOL) != 0) eflags |= REG_NOTBOL;
7126 if ((dat_datctl.options & PCRE2_NOTEOL) != 0) eflags |= REG_NOTEOL;
7127 if ((dat_datctl.options & PCRE2_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
7128
7129 rc = regexec(&preg, (const char *)pp, dat_datctl.oveccount, pmatch, eflags);
7130 if (rc != 0)
7131 {
7132 (void)regerror(rc, &preg, (char *)pbuffer8, pbuffer8_size);
7133 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, pbuffer8);
7134 }
7135 else if ((pat_patctl.control & CTL_POSIX_NOSUB) != 0)
7136 fprintf(outfile, "Matched with REG_NOSUB\n");
7137 else if (dat_datctl.oveccount == 0)
7138 fprintf(outfile, "Matched without capture\n");
7139 else
7140 {
7141 size_t i, j;
7142 size_t last_printed = (size_t)dat_datctl.oveccount;
7143 for (i = 0; i < (size_t)dat_datctl.oveccount; i++)
7144 {
7145 if (pmatch[i].rm_so >= 0)
7146 {
7147 PCRE2_SIZE start = pmatch[i].rm_so;
7148 PCRE2_SIZE end = pmatch[i].rm_eo;
7149 for (j = last_printed + 1; j < i; j++)
7150 fprintf(outfile, "%2d: <unset>\n", (int)j);
7151 last_printed = i;
7152 if (start > end)
7153 {
7154 start = pmatch[i].rm_eo;
7155 end = pmatch[i].rm_so;
7156 fprintf(outfile, "Start of matched string is beyond its end - "
7157 "displaying from end to start.\n");
7158 }
7159 fprintf(outfile, "%2d: ", (int)i);
7160 PCHARSV(pp, start, end - start, utf, outfile);
7161 fprintf(outfile, "\n");
7162
7163 if ((i == 0 && (dat_datctl.control & CTL_AFTERTEXT) != 0) ||
7164 (dat_datctl.control & CTL_ALLAFTERTEXT) != 0)
7165 {
7166 fprintf(outfile, "%2d+ ", (int)i);
7167 /* Note: don't use the start/end variables here because we want to
7168 show the text from what is reported as the end. */
7169 PCHARSV(pp, pmatch[i].rm_eo, len - pmatch[i].rm_eo, utf, outfile);
7170 fprintf(outfile, "\n"); }
7171 }
7172 }
7173 }
7174 free(pmatch);
7175 return PR_OK;
7176 }
7177 #endif /* SUPPORT_PCRE2_8 */
7178
7179 /* Handle matching via the native interface. Check for consistency of
7180 modifiers. */
7181
7182 if (dat_datctl.startend[0] != CFORE_UNSET)
7183 fprintf(outfile, "** \\=posix_startend ignored for non-POSIX matching\n");
7184
7185 /* ALLUSEDTEXT is not supported with JIT, but JIT is not used with DFA
7186 matching, even if the JIT compiler was used. */
7187
7188 if ((dat_datctl.control & (CTL_ALLUSEDTEXT|CTL_DFA)) == CTL_ALLUSEDTEXT &&
7189 FLD(compiled_code, executable_jit) != NULL)
7190 {
7191 fprintf(outfile, "** Showing all consulted text is not supported by JIT: ignored\n");
7192 dat_datctl.control &= ~CTL_ALLUSEDTEXT;
7193 }
7194
7195 /* Handle passing the subject as zero-terminated. */
7196
7197 if ((dat_datctl.control & CTL_ZERO_TERMINATE) != 0)
7198 arg_ulen = PCRE2_ZERO_TERMINATED;
7199
7200 /* The nullcontext modifier is used to test calling pcre2_[jit_]match() with a
7201 NULL context. */
7202
7203 use_dat_context = ((dat_datctl.control & CTL_NULLCONTEXT) != 0)?
7204 NULL : PTR(dat_context);
7205
7206 /* Enable display of malloc/free if wanted. We can do this only if either the
7207 pattern or the subject is processed with a context. */
7208
7209 show_memory = (dat_datctl.control & CTL_MEMORY) != 0;
7210
7211 if (show_memory &&
7212 (pat_patctl.control & dat_datctl.control & CTL_NULLCONTEXT) != 0)
7213 fprintf(outfile, "** \\=memory requires either a pattern or a subject "
7214 "context: ignored\n");
7215
7216 /* Create and assign a JIT stack if requested. */
7217
7218 if (dat_datctl.jitstack != 0)
7219 {
7220 if (dat_datctl.jitstack != jit_stack_size)
7221 {
7222 PCRE2_JIT_STACK_FREE(jit_stack);
7223 PCRE2_JIT_STACK_CREATE(jit_stack, 1, dat_datctl.jitstack * 1024, NULL);
7224 jit_stack_size = dat_datctl.jitstack;
7225 }
7226 PCRE2_JIT_STACK_ASSIGN(dat_context, jit_callback, jit_stack);
7227 }
7228
7229 /* Or de-assign */
7230
7231 else if (jit_stack != NULL)
7232 {
7233 PCRE2_JIT_STACK_ASSIGN(dat_context, NULL, NULL);
7234 PCRE2_JIT_STACK_FREE(jit_stack);
7235 jit_stack = NULL;
7236 jit_stack_size = 0;
7237 }
7238
7239 /* When no JIT stack is assigned, we must ensure that there is a JIT callback
7240 if we want to verify that JIT was actually used. */
7241
7242 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_stack == NULL)
7243 {
7244 PCRE2_JIT_STACK_ASSIGN(dat_context, jit_callback, NULL);
7245 }
7246
7247 /* Adjust match_data according to size of offsets required. A size of zero
7248 causes a new match data block to be obtained that exactly fits the pattern. */
7249
7250 if (dat_datctl.oveccount == 0)
7251 {
7252 PCRE2_MATCH_DATA_FREE(match_data);
7253 PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(match_data, compiled_code, NULL);
7254 PCRE2_GET_OVECTOR_COUNT(max_oveccount, match_data);
7255 }
7256 else if (dat_datctl.oveccount <= max_oveccount)
7257 {
7258 SETFLD(match_data, oveccount, dat_datctl.oveccount);
7259 }
7260 else
7261 {
7262 max_oveccount = dat_datctl.oveccount;
7263 PCRE2_MATCH_DATA_FREE(match_data);
7264 PCRE2_MATCH_DATA_CREATE(match_data, max_oveccount, NULL);
7265 }
7266
7267 if (CASTVAR(void *, match_data) == NULL)
7268 {
7269 fprintf(outfile, "** Failed to get memory for recording matching "
7270 "information (size requested: %d)\n", dat_datctl.oveccount);
7271 max_oveccount = 0;
7272 return PR_OK;
7273 }
7274
7275 ovector = FLD(match_data, ovector);
7276 PCRE2_GET_OVECTOR_COUNT(oveccount, match_data);
7277
7278 /* Replacement processing is ignored for DFA matching. */
7279
7280 if (dat_datctl.replacement[0] != 0 && (dat_datctl.control & CTL_DFA) != 0)
7281 {
7282 fprintf(outfile, "** Ignored for DFA matching: replace\n");
7283 dat_datctl.replacement[0] = 0;
7284 }
7285
7286 /* If a replacement string is provided, call pcre2_substitute() instead of one
7287 of the matching functions. First we have to convert the replacement string to
7288 the appropriate width. */
7289
7290 if (dat_datctl.replacement[0] != 0)
7291 {
7292 int rc;
7293 uint8_t *pr;
7294 uint8_t rbuffer[REPLACE_BUFFSIZE];
7295 uint8_t nbuffer[REPLACE_BUFFSIZE];
7296 uint32_t xoptions;
7297 uint32_t emoption; /* External match option */
7298 PCRE2_SIZE j, rlen, nsize, erroroffset;
7299 BOOL badutf = FALSE;
7300
7301 #ifdef SUPPORT_PCRE2_8
7302 uint8_t *r8 = NULL;
7303 #endif
7304 #ifdef SUPPORT_PCRE2_16
7305 uint16_t *r16 = NULL;
7306 #endif
7307 #ifdef SUPPORT_PCRE2_32
7308 uint32_t *r32 = NULL;
7309 #endif
7310
7311 /* Fill the ovector with junk to detect elements that do not get set
7312 when they should be (relevant only when "allvector" is specified). */
7313
7314 for (j = 0; j < 2*oveccount; j++) ovector[j] = JUNK_OFFSET;
7315
7316 if (timeitm)
7317 fprintf(outfile, "** Timing is not supported with replace: ignored\n");
7318
7319 if ((dat_datctl.control & CTL_ALTGLOBAL) != 0)
7320 fprintf(outfile, "** Altglobal is not supported with replace: ignored\n");
7321
7322 /* Check for a test that does substitution after an initial external match.
7323 If this is set, we run the external match, but leave the interpretation of
7324 its output to pcre2_substitute(). */
7325
7326 emoption = ((dat_datctl.control2 & CTL2_SUBSTITUTE_MATCHED) == 0)? 0 :
7327 PCRE2_SUBSTITUTE_MATCHED;
7328
7329 if (emoption != 0)
7330 {
7331 PCRE2_MATCH(rc, compiled_code, pp, arg_ulen, dat_datctl.offset,
7332 dat_datctl.options, match_data, use_dat_context);
7333 }
7334
7335 xoptions = emoption |
7336 (((dat_datctl.control & CTL_GLOBAL) == 0)? 0 :
7337 PCRE2_SUBSTITUTE_GLOBAL) |
7338 (((dat_datctl.control2 & CTL2_SUBSTITUTE_EXTENDED) == 0)? 0 :
7339 PCRE2_SUBSTITUTE_EXTENDED) |
7340 (((dat_datctl.control2 & CTL2_SUBSTITUTE_LITERAL) == 0)? 0 :
7341 PCRE2_SUBSTITUTE_LITERAL) |
7342 (((dat_datctl.control2 & CTL2_SUBSTITUTE_OVERFLOW_LENGTH) == 0)? 0 :
7343 PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) |
7344 (((dat_datctl.control2 & CTL2_SUBSTITUTE_REPLACEMENT_ONLY) == 0)? 0 :
7345 PCRE2_SUBSTITUTE_REPLACEMENT_ONLY) |
7346 (((dat_datctl.control2 & CTL2_SUBSTITUTE_UNKNOWN_UNSET) == 0)? 0 :
7347 PCRE2_SUBSTITUTE_UNKNOWN_UNSET) |
7348 (((dat_datctl.control2 & CTL2_SUBSTITUTE_UNSET_EMPTY) == 0)? 0 :
7349 PCRE2_SUBSTITUTE_UNSET_EMPTY);
7350
7351 SETCASTPTR(r, rbuffer); /* Sets r8, r16, or r32, as appropriate. */
7352 pr = dat_datctl.replacement;
7353
7354 /* If the replacement starts with '[<number>]' we interpret that as length
7355 value for the replacement buffer. */
7356
7357 nsize = REPLACE_BUFFSIZE/code_unit_size;
7358 if (*pr == '[')
7359 {
7360 PCRE2_SIZE n = 0;
7361 while ((c = *(++pr)) >= CHAR_0 && c <= CHAR_9) n = n * 10 + c - CHAR_0;
7362 if (*pr++ != ']')
7363 {
7364 fprintf(outfile, "Bad buffer size in replacement string\n");
7365 return PR_OK;
7366 }
7367 if (n > nsize)
7368 {
7369 fprintf(outfile, "Replacement buffer setting (%" SIZ_FORM ") is too "
7370 "large (max %" SIZ_FORM ")\n", n, nsize);
7371 return PR_OK;
7372 }
7373 nsize = n;
7374 }
7375
7376 /* Now copy the replacement string to a buffer of the appropriate width. No
7377 escape processing is done for replacements. In UTF mode, check for an invalid
7378 UTF-8 input string, and if it is invalid, just copy its code units without
7379 UTF interpretation. This provides a means of checking that an invalid string
7380 is detected. Otherwise, UTF-8 can be used to include wide characters in a
7381 replacement. */
7382
7383 if (utf) badutf = valid_utf(pr, strlen((const char *)pr), &erroroffset);
7384
7385 /* Not UTF or invalid UTF-8: just copy the code units. */
7386
7387 if (!utf || badutf)
7388 {
7389 while ((c = *pr++) != 0)
7390 {
7391 #ifdef SUPPORT_PCRE2_8
7392 if (test_mode == PCRE8_MODE) *r8++ = c;
7393 #endif
7394 #ifdef SUPPORT_PCRE2_16
7395 if (test_mode == PCRE16_MODE) *r16++ = c;
7396 #endif
7397 #ifdef SUPPORT_PCRE2_32
7398 if (test_mode == PCRE32_MODE) *r32++ = c;
7399 #endif
7400 }
7401 }
7402
7403 /* Valid UTF-8 replacement string */
7404
7405 else while ((c = *pr++) != 0)
7406 {
7407 if (HASUTF8EXTRALEN(c)) { GETUTF8INC(c, pr); }
7408
7409 #ifdef SUPPORT_PCRE2_8
7410 if (test_mode == PCRE8_MODE) r8 += ord2utf8(c, r8);
7411 #endif
7412
7413 #ifdef SUPPORT_PCRE2_16
7414 if (test_mode == PCRE16_MODE)
7415 {
7416 if (c >= 0x10000u)
7417 {
7418 c-= 0x10000u;
7419 *r16++ = 0xD800 | (c >> 10);
7420 *r16++ = 0xDC00 | (c & 0x3ff);
7421 }
7422 else *r16++ = c;
7423 }
7424 #endif
7425
7426 #ifdef SUPPORT_PCRE2_32
7427 if (test_mode == PCRE32_MODE) *r32++ = c;
7428 #endif
7429 }
7430
7431 SET(*r, 0);
7432 if ((dat_datctl.control & CTL_ZERO_TERMINATE) != 0)
7433 rlen = PCRE2_ZERO_TERMINATED;
7434 else
7435 rlen = (CASTVAR(uint8_t *, r) - rbuffer)/code_unit_size;
7436
7437 if ((dat_datctl.control2 & CTL2_SUBSTITUTE_CALLOUT) != 0)
7438 {
7439 PCRE2_SET_SUBSTITUTE_CALLOUT(dat_context, substitute_callout_function, NULL);
7440 }
7441 else
7442 {
7443 PCRE2_SET_SUBSTITUTE_CALLOUT(dat_context, NULL, NULL); /* No callout */
7444 }
7445
7446 PCRE2_SUBSTITUTE(rc, compiled_code, pp, arg_ulen, dat_datctl.offset,
7447 dat_datctl.options|xoptions, match_data, use_dat_context,
7448 rbuffer, rlen, nbuffer, &nsize);
7449
7450 if (rc < 0)
7451 {
7452 fprintf(outfile, "Failed: error %d", rc);
7453 if (rc != PCRE2_ERROR_NOMEMORY && nsize != PCRE2_UNSET)
7454 fprintf(outfile, " at offset %ld in replacement", (long int)nsize);
7455 fprintf(outfile, ": ");
7456 if (!print_error_message(rc, "", "")) return PR_ABEND;
7457 if (rc == PCRE2_ERROR_NOMEMORY &&
7458 (xoptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) != 0)
7459 fprintf(outfile, ": %ld code units are needed", (long int)nsize);
7460 }
7461 else
7462 {
7463 fprintf(outfile, "%2d: ", rc);
7464 PCHARSV(nbuffer, 0, nsize, utf, outfile);
7465 }
7466
7467 fprintf(outfile, "\n");
7468 show_memory = FALSE;
7469
7470 /* Show final ovector contents if requested. */
7471
7472 if ((dat_datctl.control2 & CTL2_ALLVECTOR) != 0)
7473 show_ovector(ovector, oveccount);
7474
7475 return PR_OK;
7476 } /* End of substitution handling */
7477
7478 /* When a replacement string is not provided, run a loop for global matching
7479 with one of the basic matching functions. For altglobal (or first time round
7480 the loop), set an "unset" value for the previous match info. */
7481
7482 ovecsave[0] = ovecsave[1] = ovecsave[2] = PCRE2_UNSET;
7483
7484 for (gmatched = 0;; gmatched++)
7485 {
7486 PCRE2_SIZE j;
7487 int capcount;
7488
7489 /* Fill the ovector with junk to detect elements that do not get set
7490 when they should be. */
7491
7492 for (j = 0; j < 2*oveccount; j++) ovector[j] = JUNK_OFFSET;
7493
7494 /* When matching is via pcre2_match(), we will detect the use of JIT via the
7495 stack callback function. */
7496
7497 jit_was_used = (pat_patctl.control & CTL_JITFAST) != 0;
7498
7499 /* Do timing if required. */
7500
7501 if (timeitm > 0)
7502 {
7503 int i;
7504 clock_t start_time, time_taken;
7505
7506 if ((dat_datctl.control & CTL_DFA) != 0)
7507 {
7508 if ((dat_datctl.options & PCRE2_DFA_RESTART) != 0)
7509 {
7510 fprintf(outfile, "Timing DFA restarts is not supported\n");
7511 return PR_OK;
7512 }
7513 if (dfa_workspace == NULL)
7514 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
7515 start_time = clock();
7516 for (i = 0; i < timeitm; i++)
7517 {
7518 PCRE2_DFA_MATCH(capcount, compiled_code, pp, arg_ulen,
7519 dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
7520 use_dat_context, dfa_workspace, DFA_WS_DIMENSION);
7521 }
7522 }
7523
7524 else if ((pat_patctl.control & CTL_JITFAST) != 0)
7525 {
7526 start_time = clock();
7527 for (i = 0; i < timeitm; i++)
7528 {
7529 PCRE2_JIT_MATCH(capcount, compiled_code, pp, arg_ulen,
7530 dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
7531 use_dat_context);
7532 }
7533 }
7534
7535 else
7536 {
7537 start_time = clock();
7538 for (i = 0; i < timeitm; i++)
7539 {
7540 PCRE2_MATCH(capcount, compiled_code, pp, arg_ulen,
7541 dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
7542 use_dat_context);
7543 }
7544 }
7545 total_match_time += (time_taken = clock() - start_time);
7546 fprintf(outfile, "Match time %.4f milliseconds\n",
7547 (((double)time_taken * 1000.0) / (double)timeitm) /
7548 (double)CLOCKS_PER_SEC);
7549 }
7550
7551 /* Find the heap, match and depth limits if requested. The depth and heap
7552 limits are not relevant for JIT. The return from check_match_limit() is the
7553 return from the final call to pcre2_match() or pcre2_dfa_match(). */
7554
7555 if ((dat_datctl.control & CTL_FINDLIMITS) != 0)
7556 {
7557 capcount = 0; /* This stops compiler warnings */
7558
7559 if (FLD(compiled_code, executable_jit) == NULL ||
7560 (dat_datctl.options & PCRE2_NO_JIT) != 0)
7561 {
7562 (void)check_match_limit(pp, arg_ulen, PCRE2_ERROR_HEAPLIMIT, "heap");
7563 }
7564
7565 capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_MATCHLIMIT,
7566 "match");
7567
7568 if (FLD(compiled_code, executable_jit) == NULL ||
7569 (dat_datctl.options & PCRE2_NO_JIT) != 0 ||
7570 (dat_datctl.control & CTL_DFA) != 0)
7571 {
7572 capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_DEPTHLIMIT,
7573 "depth");
7574 }
7575
7576 if (capcount == 0)
7577 {
7578 fprintf(outfile, "Matched, but offsets vector is too small to show all matches\n");
7579 capcount = dat_datctl.oveccount;
7580 }
7581 }
7582
7583 /* Otherwise just run a single match, setting up a callout if required (the
7584 default). There is a copy of the pattern in pbuffer8 for use by callouts. */
7585
7586 else
7587 {
7588 if ((dat_datctl.control & CTL_CALLOUT_NONE) == 0)
7589 {
7590 PCRE2_SET_CALLOUT(dat_context, callout_function,
7591 (void *)(&dat_datctl.callout_data));
7592 first_callout = TRUE;
7593 last_callout_mark = NULL;
7594 callout_count = 0;
7595 }
7596 else
7597 {
7598 PCRE2_SET_CALLOUT(dat_context, NULL, NULL); /* No callout */
7599 }
7600
7601 /* Run a single DFA or NFA match. */
7602
7603 if ((dat_datctl.control & CTL_DFA) != 0)
7604 {
7605 if (dfa_workspace == NULL)
7606 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
7607 if (dfa_matched++ == 0)
7608 dfa_workspace[0] = -1; /* To catch bad restart */
7609 PCRE2_DFA_MATCH(capcount, compiled_code, pp, arg_ulen,
7610 dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
7611 use_dat_context, dfa_workspace, DFA_WS_DIMENSION);
7612 if (capcount == 0)
7613 {
7614 fprintf(outfile, "Matched, but offsets vector is too small to show all matches\n");
7615 capcount = dat_datctl.oveccount;
7616 }
7617 }
7618 else
7619 {
7620 if ((pat_patctl.control & CTL_JITFAST) != 0)
7621 PCRE2_JIT_MATCH(capcount, compiled_code, pp, arg_ulen, dat_datctl.offset,
7622 dat_datctl.options | g_notempty, match_data, use_dat_context);
7623 else
7624 PCRE2_MATCH(capcount, compiled_code, pp, arg_ulen, dat_datctl.offset,
7625 dat_datctl.options | g_notempty, match_data, use_dat_context);
7626 if (capcount == 0)
7627 {
7628 fprintf(outfile, "Matched, but too many substrings\n");
7629 capcount = dat_datctl.oveccount;
7630 }
7631 }
7632 }
7633
7634 /* The result of the match is now in capcount. First handle a successful
7635 match. */
7636
7637 if (capcount >= 0)
7638 {
7639 int i;
7640
7641 if (capcount > (int)oveccount) /* Check for lunatic return value */
7642 {
7643 fprintf(outfile,
7644 "** PCRE2 error: returned count %d is too big for ovector count %d\n",
7645 capcount, oveccount);
7646 capcount = oveccount;
7647 if ((dat_datctl.control & CTL_ANYGLOB) != 0)
7648 {
7649 fprintf(outfile, "** Global loop abandoned\n");
7650 dat_datctl.control &= ~CTL_ANYGLOB; /* Break g/G loop */
7651 }
7652 }
7653
7654 /* If PCRE2_COPY_MATCHED_SUBJECT was set, check that things are as they
7655 should be, but not for fast JIT, where it isn't supported. */
7656
7657 if ((dat_datctl.options & PCRE2_COPY_MATCHED_SUBJECT) != 0 &&
7658 (pat_patctl.control & CTL_JITFAST) == 0)
7659 {
7660 if ((FLD(match_data, flags) & PCRE2_MD_COPIED_SUBJECT) == 0)
7661 fprintf(outfile,
7662 "** PCRE2 error: flag not set after copy_matched_subject\n");
7663
7664 if (CASTFLD(void *, match_data, subject) == pp)
7665 fprintf(outfile,
7666 "** PCRE2 error: copy_matched_subject has not copied\n");
7667
7668 if (memcmp(CASTFLD(void *, match_data, subject), pp, ulen) != 0)
7669 fprintf(outfile,
7670 "** PCRE2 error: copy_matched_subject mismatch\n");
7671 }
7672
7673 /* If this is not the first time round a global loop, check that the
7674 returned string has changed. If it has not, check for an empty string match
7675 at different starting offset from the previous match. This is a failed test
7676 retry for null-matching patterns that don't match at their starting offset,
7677 for example /(?<=\G.)/. A repeated match at the same point is not such a
7678 pattern, and must be discarded, and we then proceed to seek a non-null
7679 match at the current point. For any other repeated match, there is a bug
7680 somewhere and we must break the loop because it will go on for ever. We
7681 know that there are always at least two elements in the ovector. */
7682
7683 if (gmatched > 0 && ovecsave[0] == ovector[0] && ovecsave[1] == ovector[1])
7684 {
7685 if (ovector[0] == ovector[1] && ovecsave[2] != dat_datctl.offset)
7686 {
7687 g_notempty = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
7688 ovecsave[2] = dat_datctl.offset;
7689 continue; /* Back to the top of the loop */
7690 }
7691 fprintf(outfile,
7692 "** PCRE2 error: global repeat returned the same string as previous\n");
7693 fprintf(outfile, "** Global loop abandoned\n");
7694 dat_datctl.control &= ~CTL_ANYGLOB; /* Break g/G loop */
7695 }
7696
7697 /* "allcaptures" requests showing of all captures in the pattern, to check
7698 unset ones at the end. It may be set on the pattern or the data. Implement
7699 by setting capcount to the maximum. This is not relevant for DFA matching,
7700 so ignore it (warning given above). */
7701
7702 if ((dat_datctl.control & (CTL_ALLCAPTURES|CTL_DFA)) == CTL_ALLCAPTURES)
7703 {
7704 capcount = maxcapcount + 1; /* Allow for full match */
7705 if (capcount > (int)oveccount) capcount = oveccount;
7706 }
7707
7708 /* "allvector" request showing the entire ovector. */
7709
7710 if ((dat_datctl.control2 & CTL2_ALLVECTOR) != 0) capcount = oveccount;
7711
7712 /* Output the captured substrings. Note that, for the matched string,
7713 the use of \K in an assertion can make the start later than the end. */
7714
7715 for (i = 0; i < 2*capcount; i += 2)
7716 {
7717 PCRE2_SIZE lleft, lmiddle, lright;
7718 PCRE2_SIZE start = ovector[i];
7719 PCRE2_SIZE end = ovector[i+1];
7720
7721 if (start > end)
7722 {
7723 start = ovector[i+1];
7724 end = ovector[i];
7725 fprintf(outfile, "Start of matched string is beyond its end - "
7726 "displaying from end to start.\n");
7727 }
7728
7729 fprintf(outfile, "%2d: ", i/2);
7730
7731 /* Check for an unset group */
7732
7733 if (start == PCRE2_UNSET && end == PCRE2_UNSET)
7734 {
7735 fprintf(outfile, "<unset>\n");
7736 continue;
7737 }
7738
7739 /* Check for silly offsets, in particular, values that have not been
7740 set when they should have been. However, if we are past the end of the
7741 captures for this pattern ("allvector" causes this), or if we are DFA
7742 matching, it isn't an error if the entry is unchanged. */
7743
7744 if (start > ulen || end > ulen)
7745 {
7746 if (((dat_datctl.control & CTL_DFA) != 0 ||
7747 i >= (int)(2*maxcapcount + 2)) &&
7748 start == JUNK_OFFSET && end == JUNK_OFFSET)
7749 fprintf(outfile, "<unchanged>\n");
7750 else
7751 fprintf(outfile, "ERROR: bad value(s) for offset(s): 0x%lx 0x%lx\n",
7752 (unsigned long int)start, (unsigned long int)end);
7753 continue;
7754 }
7755
7756 /* When JIT is not being used, ALLUSEDTEXT may be set. (It if is set with
7757 JIT, it is disabled above, with a comment.) When the match is done by the
7758 interpreter, leftchar and rightchar are available, and if ALLUSEDTEXT is
7759 set, and if the leftmost consulted character is before the start of the
7760 match or the rightmost consulted character is past the end of the match,
7761 we want to show all consulted characters for the main matched string, and
7762 indicate which were lookarounds. */
7763
7764 if (i == 0)
7765 {
7766 BOOL showallused;
7767 PCRE2_SIZE leftchar, rightchar;
7768
7769 if ((dat_datctl.control & CTL_ALLUSEDTEXT) != 0)
7770 {
7771 leftchar = FLD(match_data, leftchar);
7772 rightchar = FLD(match_data, rightchar);
7773 showallused = i == 0 && (leftchar < start || rightchar > end);
7774 }
7775 else showallused = FALSE;
7776
7777 if (showallused)
7778 {
7779 PCHARS(lleft, pp, leftchar, start - leftchar, utf, outfile);
7780 PCHARS(lmiddle, pp, start, end - start, utf, outfile);
7781 PCHARS(lright, pp, end, rightchar - end, utf, outfile);
7782 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
7783 fprintf(outfile, " (JIT)");
7784 fprintf(outfile, "\n ");
7785 for (j = 0; j < lleft; j++) fprintf(outfile, "<");
7786 for (j = 0; j < lmiddle; j++) fprintf(outfile, " ");
7787 for (j = 0; j < lright; j++) fprintf(outfile, ">");
7788 }
7789
7790 /* When a pattern contains \K, the start of match position may be
7791 different to the start of the matched string. When this is the case,
7792 show it when requested. */
7793
7794 else if ((dat_datctl.control & CTL_STARTCHAR) != 0)
7795 {
7796 PCRE2_SIZE startchar;
7797 PCRE2_GET_STARTCHAR(startchar, match_data);
7798 PCHARS(lleft, pp, startchar, start - startchar, utf, outfile);
7799 PCHARSV(pp, start, end - start, utf, outfile);
7800 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
7801 fprintf(outfile, " (JIT)");
7802 if (startchar != start)
7803 {
7804 fprintf(outfile, "\n ");
7805 for (j = 0; j < lleft; j++) fprintf(outfile, "^");
7806 }
7807 }
7808
7809 /* Otherwise, just show the matched string. */
7810
7811 else
7812 {
7813 PCHARSV(pp, start, end - start, utf, outfile);
7814 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
7815 fprintf(outfile, " (JIT)");
7816 }
7817 }
7818
7819 /* Not the main matched string. Just show it unadorned. */
7820
7821 else
7822 {
7823 PCHARSV(pp, start, end - start, utf, outfile);
7824 }
7825
7826 fprintf(outfile, "\n");
7827
7828 /* Note: don't use the start/end variables here because we want to
7829 show the text from what is reported as the end. */
7830
7831 if ((dat_datctl.control & CTL_ALLAFTERTEXT) != 0 ||
7832 (i == 0 && (dat_datctl.control & CTL_AFTERTEXT) != 0))
7833 {
7834 fprintf(outfile, "%2d+ ", i/2);
7835 PCHARSV(pp, ovector[i+1], ulen - ovector[i+1], utf, outfile);
7836 fprintf(outfile, "\n");
7837 }
7838 }
7839
7840 /* Output (*MARK) data if requested */
7841
7842 if ((dat_datctl.control & CTL_MARK) != 0 &&
7843 TESTFLD(match_data, mark, !=, NULL))
7844 {
7845 fprintf(outfile, "MK: ");
7846 PCHARSV(CASTFLD(void *, match_data, mark), -1, -1, utf, outfile);
7847 fprintf(outfile, "\n");
7848 }
7849
7850 /* Process copy/get strings */
7851
7852 if (!copy_and_get(utf, capcount)) return PR_ABEND;
7853
7854 } /* End of handling a successful match */
7855
7856 /* There was a partial match. The value of ovector[0] is the bumpalong point,
7857 that is, startchar, not any \K point that might have been passed. When JIT is
7858 not in use, "allusedtext" may be set, in which case we indicate the leftmost
7859 consulted character. */
7860
7861 else if (capcount == PCRE2_ERROR_PARTIAL)
7862 {
7863 PCRE2_SIZE leftchar;
7864 int backlength;
7865 int rubriclength = 0;
7866
7867 if ((dat_datctl.control & CTL_ALLUSEDTEXT) != 0)
7868 {
7869 leftchar = FLD(match_data, leftchar);
7870 }
7871 else leftchar = ovector[0];
7872
7873 fprintf(outfile, "Partial match");
7874 if ((dat_datctl.control & CTL_MARK) != 0 &&
7875 TESTFLD(match_data, mark, !=, NULL))
7876 {
7877 fprintf(outfile, ", mark=");
7878 PCHARS(rubriclength, CASTFLD(void *, match_data, mark), -1, -1, utf,
7879 outfile);
7880 rubriclength += 7;
7881 }
7882 fprintf(outfile, ": ");
7883 rubriclength += 15;
7884
7885 PCHARS(backlength, pp, leftchar, ovector[0] - leftchar, utf, outfile);
7886 PCHARSV(pp, ovector[0], ulen - ovector[0], utf, outfile);
7887
7888 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
7889 fprintf(outfile, " (JIT)");
7890 fprintf(outfile, "\n");
7891
7892 if (backlength != 0)
7893 {
7894 int i;
7895 for (i = 0; i < rubriclength; i++) fprintf(outfile, " ");
7896 for (i = 0; i < backlength; i++) fprintf(outfile, "<");
7897 fprintf(outfile, "\n");
7898 }
7899
7900 if (ulen != ovector[1])
7901 fprintf(outfile, "** ovector[1] is not equal to the subject length: "
7902 "%ld != %ld\n", (unsigned long int)ovector[1], (unsigned long int)ulen);
7903
7904 /* Process copy/get strings */
7905
7906 if (!copy_and_get(utf, 1)) return PR_ABEND;
7907
7908 /* "allvector" outputs the entire vector */
7909
7910 if ((dat_datctl.control2 & CTL2_ALLVECTOR) != 0)
7911 show_ovector(ovector, oveccount);
7912
7913 break; /* Out of the /g loop */
7914 } /* End of handling partial match */
7915
7916 /* Failed to match. If this is a /g or /G loop, we might previously have
7917 set g_notempty (to PCRE2_NOTEMPTY_ATSTART|PCRE2_ANCHORED) after a null match.
7918 If that is the case, this is not necessarily the end. We want to advance the
7919 start offset, and continue. We won't be at the end of the string - that was
7920 checked before setting g_notempty. We achieve the effect by pretending that a
7921 single character was matched.
7922
7923 Complication arises in the case when the newline convention is "any", "crlf",
7924 or "anycrlf". If the previous match was at the end of a line terminated by
7925 CRLF, an advance of one character just passes the CR, whereas we should
7926 prefer the longer newline sequence, as does the code in pcre2_match().
7927
7928 Otherwise, in the case of UTF-8 or UTF-16 matching, the advance must be one
7929 character, not one byte. */
7930
7931 else if (g_notempty != 0) /* There was a previous null match */
7932 {
7933 uint16_t nl = FLD(compiled_code, newline_convention);
7934 PCRE2_SIZE start_offset = dat_datctl.offset; /* Where the match was */
7935 PCRE2_SIZE end_offset = start_offset + 1;
7936
7937 if ((nl == PCRE2_NEWLINE_CRLF || nl == PCRE2_NEWLINE_ANY ||
7938 nl == PCRE2_NEWLINE_ANYCRLF) &&
7939 start_offset < ulen - 1 &&
7940 CODE_UNIT(pp, start_offset) == '\r' &&
7941 CODE_UNIT(pp, end_offset) == '\n')
7942 end_offset++;
7943
7944 else if (utf && test_mode != PCRE32_MODE)
7945 {
7946 if (test_mode == PCRE8_MODE)
7947 {
7948 for (; end_offset < ulen; end_offset++)
7949 if ((((PCRE2_SPTR8)pp)[end_offset] & 0xc0) != 0x80) break;
7950 }
7951 else /* 16-bit mode */
7952 {
7953 for (; end_offset < ulen; end_offset++)
7954 if ((((PCRE2_SPTR16)pp)[end_offset] & 0xfc00) != 0xdc00) break;
7955 }
7956 }
7957
7958 SETFLDVEC(match_data, ovector, 0, start_offset);
7959 SETFLDVEC(match_data, ovector, 1, end_offset);
7960 } /* End of handling null match in a global loop */
7961
7962 /* A "normal" match failure. There will be a negative error number in
7963 capcount. */
7964
7965 else
7966 {
7967 switch(capcount)
7968 {
7969 case PCRE2_ERROR_NOMATCH:
7970 if (gmatched == 0)
7971 {
7972 fprintf(outfile, "No match");
7973 if ((dat_datctl.control & CTL_MARK) != 0 &&
7974 TESTFLD(match_data, mark, !=, NULL))
7975 {
7976 fprintf(outfile, ", mark = ");
7977 PCHARSV(CASTFLD(void *, match_data, mark), -1, -1, utf, outfile);
7978 }
7979 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
7980 fprintf(outfile, " (JIT)");
7981 fprintf(outfile, "\n");
7982
7983 /* "allvector" outputs the entire vector */
7984
7985 if ((dat_datctl.control2 & CTL2_ALLVECTOR) != 0)
7986 show_ovector(ovector, oveccount);
7987 }
7988 break;
7989
7990 case PCRE2_ERROR_BADUTFOFFSET:
7991 fprintf(outfile, "Error %d (bad UTF-%d offset)\n", capcount, test_mode);
7992 break;
7993
7994 default:
7995 fprintf(outfile, "Failed: error %d: ", capcount);
7996 if (!print_error_message(capcount, "", "")) return PR_ABEND;
7997 if (capcount <= PCRE2_ERROR_UTF8_ERR1 &&
7998 capcount >= PCRE2_ERROR_UTF32_ERR2)
7999 {
8000 PCRE2_SIZE startchar;
8001 PCRE2_GET_STARTCHAR(startchar, match_data);
8002 fprintf(outfile, " at offset %" SIZ_FORM, startchar);
8003 }
8004 fprintf(outfile, "\n");
8005 break;
8006 }
8007
8008 break; /* Out of the /g loop */
8009 } /* End of failed match handling */
8010
8011 /* Control reaches here in two circumstances: (a) after a match, and (b)
8012 after a non-match that immediately followed a match on an empty string when
8013 doing a global search. Such a match is done with PCRE2_NOTEMPTY_ATSTART and
8014 PCRE2_ANCHORED set in g_notempty. The code above turns it into a fake match
8015 of one character. So effectively we get here only after a match. If we
8016 are not doing a global search, we are done. */
8017
8018 if ((dat_datctl.control & CTL_ANYGLOB) == 0) break; else
8019 {
8020 PCRE2_SIZE match_offset = FLD(match_data, ovector)[0];
8021 PCRE2_SIZE end_offset = FLD(match_data, ovector)[1];
8022
8023 /* We must now set up for the next iteration of a global search. If we have
8024 matched an empty string, first check to see if we are at the end of the
8025 subject. If so, the loop is over. Otherwise, mimic what Perl's /g option
8026 does. Set PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED and try the match again
8027 at the same point. If this fails it will be picked up above, where a fake
8028 match is set up so that at this point we advance to the next character.
8029
8030 However, in order to cope with patterns that never match at their starting
8031 offset (e.g. /(?<=\G.)/) we don't do this when the match offset is greater
8032 than the starting offset. This means there will be a retry with the
8033 starting offset at the match offset. If this returns the same match again,
8034 it is picked up above and ignored, and the special action is then taken. */
8035
8036 if (match_offset == end_offset)
8037 {
8038 if (end_offset == ulen) break; /* End of subject */
8039 if (match_offset <= dat_datctl.offset)
8040 g_notempty = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
8041 }
8042
8043 /* However, even after matching a non-empty string, there is still one
8044 tricky case. If a pattern contains \K within a lookbehind assertion at the
8045 start, the end of the matched string can be at the offset where the match
8046 started. In the case of a normal /g iteration without special action, this
8047 leads to a loop that keeps on returning the same substring. The loop would
8048 be caught above, but we really want to move on to the next match. */
8049
8050 else
8051 {
8052 g_notempty = 0; /* Set for a "normal" repeat */
8053 if ((dat_datctl.control & CTL_GLOBAL) != 0)
8054 {
8055 PCRE2_SIZE startchar;
8056 PCRE2_GET_STARTCHAR(startchar, match_data);
8057 if (end_offset <= startchar)
8058 {
8059 if (startchar >= ulen) break; /* End of subject */
8060 end_offset = startchar + 1;
8061 if (utf && test_mode != PCRE32_MODE)
8062 {
8063 if (test_mode == PCRE8_MODE)
8064 {
8065 for (; end_offset < ulen; end_offset++)
8066 if ((((PCRE2_SPTR8)pp)[end_offset] & 0xc0) != 0x80) break;
8067 }
8068 else /* 16-bit mode */
8069 {
8070 for (; end_offset < ulen; end_offset++)
8071 if ((((PCRE2_SPTR16)pp)[end_offset] & 0xfc00) != 0xdc00) break;
8072 }
8073 }
8074 }
8075 }
8076 }
8077
8078 /* For a normal global (/g) iteration, save the current ovector[0,1] and
8079 the starting offset so that we can check that they do change each time.
8080 Otherwise a matching bug that returns the same string causes an infinite
8081 loop. It has happened! Then update the start offset, leaving other
8082 parameters alone. */
8083
8084 if ((dat_datctl.control & CTL_GLOBAL) != 0)
8085 {
8086 ovecsave[0] = ovector[0];
8087 ovecsave[1] = ovector[1];
8088 ovecsave[2] = dat_datctl.offset;
8089 dat_datctl.offset = end_offset;
8090 }
8091
8092 /* For altglobal, just update the pointer and length. */
8093
8094 else
8095 {
8096 pp += end_offset * code_unit_size;
8097 len -= end_offset * code_unit_size;
8098 ulen -= end_offset;
8099 if (arg_ulen != PCRE2_ZERO_TERMINATED) arg_ulen -= end_offset;
8100 }
8101 }
8102 } /* End of global loop */
8103
8104 show_memory = FALSE;
8105 return PR_OK;
8106 }
8107
8108
8109
8110
8111 /*************************************************
8112 * Print PCRE2 version *
8113 *************************************************/
8114
8115 static void
print_version(FILE * f)8116 print_version(FILE *f)
8117 {
8118 VERSION_TYPE *vp;
8119 fprintf(f, "PCRE2 version ");
8120 for (vp = version; *vp != 0; vp++) fprintf(f, "%c", *vp);
8121 fprintf(f, "\n");
8122 }
8123
8124
8125
8126 /*************************************************
8127 * Print Unicode version *
8128 *************************************************/
8129
8130 static void
print_unicode_version(FILE * f)8131 print_unicode_version(FILE *f)
8132 {
8133 VERSION_TYPE *vp;
8134 fprintf(f, "Unicode version ");
8135 for (vp = uversion; *vp != 0; vp++) fprintf(f, "%c", *vp);
8136 }
8137
8138
8139
8140 /*************************************************
8141 * Print JIT target *
8142 *************************************************/
8143
8144 static void
print_jit_target(FILE * f)8145 print_jit_target(FILE *f)
8146 {
8147 VERSION_TYPE *vp;
8148 for (vp = jittarget; *vp != 0; vp++) fprintf(f, "%c", *vp);
8149 }
8150
8151
8152
8153 /*************************************************
8154 * Print newline configuration *
8155 *************************************************/
8156
8157 /* Output is always to stdout.
8158
8159 Arguments:
8160 rc the return code from PCRE2_CONFIG_NEWLINE
8161 isc TRUE if called from "-C newline"
8162 Returns: nothing
8163 */
8164
8165 static void
print_newline_config(uint32_t optval,BOOL isc)8166 print_newline_config(uint32_t optval, BOOL isc)
8167 {
8168 if (!isc) printf(" Default newline sequence is ");
8169 if (optval < sizeof(newlines)/sizeof(char *))
8170 printf("%s\n", newlines[optval]);
8171 else
8172 printf("a non-standard value: %d\n", optval);
8173 }
8174
8175
8176
8177 /*************************************************
8178 * Usage function *
8179 *************************************************/
8180
8181 static void
usage(void)8182 usage(void)
8183 {
8184 printf("Usage: pcre2test [options] [<input file> [<output file>]]\n\n");
8185 printf("Input and output default to stdin and stdout.\n");
8186 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
8187 printf("If input is a terminal, readline() is used to read from it.\n");
8188 #else
8189 printf("This version of pcre2test is not linked with readline().\n");
8190 #endif
8191 printf("\nOptions:\n");
8192 #ifdef SUPPORT_PCRE2_8
8193 printf(" -8 use the 8-bit library\n");
8194 #endif
8195 #ifdef SUPPORT_PCRE2_16
8196 printf(" -16 use the 16-bit library\n");
8197 #endif
8198 #ifdef SUPPORT_PCRE2_32
8199 printf(" -32 use the 32-bit library\n");
8200 #endif
8201 printf(" -ac set default pattern modifier PCRE2_AUTO_CALLOUT\n");
8202 printf(" -AC as -ac, but also set subject 'callout_extra' modifier\n");
8203 printf(" -b set default pattern modifier 'fullbincode'\n");
8204 printf(" -C show PCRE2 compile-time options and exit\n");
8205 printf(" -C arg show a specific compile-time option and exit with its\n");
8206 printf(" value if numeric (else 0). The arg can be:\n");
8207 printf(" backslash-C use of \\C is enabled [0, 1]\n");
8208 printf(" bsr \\R type [ANYCRLF, ANY]\n");
8209 printf(" ebcdic compiled for EBCDIC character code [0,1]\n");
8210 printf(" ebcdic-nl NL code if compiled for EBCDIC\n");
8211 printf(" jit just-in-time compiler supported [0, 1]\n");
8212 printf(" linksize internal link size [2, 3, 4]\n");
8213 printf(" newline newline type [CR, LF, CRLF, ANYCRLF, ANY, NUL]\n");
8214 printf(" pcre2-8 8 bit library support enabled [0, 1]\n");
8215 printf(" pcre2-16 16 bit library support enabled [0, 1]\n");
8216 printf(" pcre2-32 32 bit library support enabled [0, 1]\n");
8217 printf(" unicode Unicode and UTF support enabled [0, 1]\n");
8218 printf(" -d set default pattern modifier 'debug'\n");
8219 printf(" -dfa set default subject modifier 'dfa'\n");
8220 printf(" -error <n,m,..> show messages for error numbers, then exit\n");
8221 printf(" -help show usage information\n");
8222 printf(" -i set default pattern modifier 'info'\n");
8223 printf(" -jit set default pattern modifier 'jit'\n");
8224 printf(" -jitfast set default pattern modifier 'jitfast'\n");
8225 printf(" -jitverify set default pattern modifier 'jitverify'\n");
8226 printf(" -LM list pattern and subject modifiers, then exit\n");
8227 printf(" -q quiet: do not output PCRE2 version number at start\n");
8228 printf(" -pattern <s> set default pattern modifier fields\n");
8229 printf(" -subject <s> set default subject modifier fields\n");
8230 printf(" -S <n> set stack size to <n> mebibytes\n");
8231 printf(" -t [<n>] time compilation and execution, repeating <n> times\n");
8232 printf(" -tm [<n>] time execution (matching) only, repeating <n> times\n");
8233 printf(" -T same as -t, but show total times at the end\n");
8234 printf(" -TM same as -tm, but show total time at the end\n");
8235 printf(" -version show PCRE2 version and exit\n");
8236 }
8237
8238
8239
8240 /*************************************************
8241 * Handle -C option *
8242 *************************************************/
8243
8244 /* This option outputs configuration options and sets an appropriate return
8245 code when asked for a single option. The code is abstracted into a separate
8246 function because of its size. Use whichever pcre2_config() function is
8247 available.
8248
8249 Argument: an option name or NULL
8250 Returns: the return code
8251 */
8252
8253 static int
c_option(const char * arg)8254 c_option(const char *arg)
8255 {
8256 uint32_t optval;
8257 unsigned int i = COPTLISTCOUNT;
8258 int yield = 0;
8259
8260 if (arg != NULL && arg[0] != CHAR_MINUS)
8261 {
8262 for (i = 0; i < COPTLISTCOUNT; i++)
8263 if (strcmp(arg, coptlist[i].name) == 0) break;
8264
8265 if (i >= COPTLISTCOUNT)
8266 {
8267 fprintf(stderr, "** Unknown -C option '%s'\n", arg);
8268 return 0;
8269 }
8270
8271 switch (coptlist[i].type)
8272 {
8273 case CONF_BSR:
8274 (void)PCRE2_CONFIG(coptlist[i].value, &optval);
8275 printf("%s\n", (optval == PCRE2_BSR_ANYCRLF)? "ANYCRLF" : "ANY");
8276 break;
8277
8278 case CONF_FIX:
8279 yield = coptlist[i].value;
8280 printf("%d\n", yield);
8281 break;
8282
8283 case CONF_FIZ:
8284 optval = coptlist[i].value;
8285 printf("%d\n", optval);
8286 break;
8287
8288 case CONF_INT:
8289 (void)PCRE2_CONFIG(coptlist[i].value, &yield);
8290 printf("%d\n", yield);
8291 break;
8292
8293 case CONF_NL:
8294 (void)PCRE2_CONFIG(coptlist[i].value, &optval);
8295 print_newline_config(optval, TRUE);
8296 break;
8297 }
8298
8299 /* For VMS, return the value by setting a symbol, for certain values only. This
8300 is contributed code which the PCRE2 developers have no means of testing. */
8301
8302 #ifdef __VMS
8303
8304 /* This is the original code provided by the first VMS contributor. */
8305 #ifdef NEVER
8306 if (copytlist[i].type == CONF_FIX || coptlist[i].type == CONF_INT)
8307 {
8308 char ucname[16];
8309 strcpy(ucname, coptlist[i].name);
8310 for (i = 0; ucname[i] != 0; i++) ucname[i] = toupper[ucname[i]];
8311 vms_setsymbol(ucname, 0, optval);
8312 }
8313 #endif
8314
8315 /* This is the new code, provided by a second VMS contributor. */
8316
8317 if (coptlist[i].type == CONF_FIX || coptlist[i].type == CONF_INT)
8318 {
8319 char nam_buf[22], val_buf[4];
8320 $DESCRIPTOR(nam, nam_buf);
8321 $DESCRIPTOR(val, val_buf);
8322
8323 strcpy(nam_buf, coptlist[i].name);
8324 nam.dsc$w_length = strlen(nam_buf);
8325 sprintf(val_buf, "%d", yield);
8326 val.dsc$w_length = strlen(val_buf);
8327 lib$set_symbol(&nam, &val);
8328 }
8329 #endif /* __VMS */
8330
8331 return yield;
8332 }
8333
8334 /* No argument for -C: output all configuration information. */
8335
8336 print_version(stdout);
8337 printf("Compiled with\n");
8338
8339 #ifdef EBCDIC
8340 printf(" EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
8341 #if defined NATIVE_ZOS
8342 printf(" EBCDIC code page %s or similar\n", pcrz_cpversion());
8343 #endif
8344 #endif
8345
8346 (void)PCRE2_CONFIG(PCRE2_CONFIG_COMPILED_WIDTHS, &optval);
8347 if (optval & 1) printf(" 8-bit support\n");
8348 if (optval & 2) printf(" 16-bit support\n");
8349 if (optval & 4) printf(" 32-bit support\n");
8350
8351 #ifdef SUPPORT_VALGRIND
8352 printf(" Valgrind support\n");
8353 #endif
8354
8355 (void)PCRE2_CONFIG(PCRE2_CONFIG_UNICODE, &optval);
8356 if (optval != 0)
8357 {
8358 printf(" UTF and UCP support (");
8359 print_unicode_version(stdout);
8360 printf(")\n");
8361 }
8362 else printf(" No Unicode support\n");
8363
8364 (void)PCRE2_CONFIG(PCRE2_CONFIG_JIT, &optval);
8365 if (optval != 0)
8366 {
8367 printf(" Just-in-time compiler support: ");
8368 print_jit_target(stdout);
8369 printf("\n");
8370 }
8371 else
8372 {
8373 printf(" No just-in-time compiler support\n");
8374 }
8375
8376 (void)PCRE2_CONFIG(PCRE2_CONFIG_NEWLINE, &optval);
8377 print_newline_config(optval, FALSE);
8378 (void)PCRE2_CONFIG(PCRE2_CONFIG_BSR, &optval);
8379 printf(" \\R matches %s\n",
8380 (optval == PCRE2_BSR_ANYCRLF)? "CR, LF, or CRLF only" :
8381 "all Unicode newlines");
8382 (void)PCRE2_CONFIG(PCRE2_CONFIG_NEVER_BACKSLASH_C, &optval);
8383 printf(" \\C is %ssupported\n", optval? "not ":"");
8384 (void)PCRE2_CONFIG(PCRE2_CONFIG_LINKSIZE, &optval);
8385 printf(" Internal link size = %d\n", optval);
8386 (void)PCRE2_CONFIG(PCRE2_CONFIG_PARENSLIMIT, &optval);
8387 printf(" Parentheses nest limit = %d\n", optval);
8388 (void)PCRE2_CONFIG(PCRE2_CONFIG_HEAPLIMIT, &optval);
8389 printf(" Default heap limit = %d kibibytes\n", optval);
8390 (void)PCRE2_CONFIG(PCRE2_CONFIG_MATCHLIMIT, &optval);
8391 printf(" Default match limit = %d\n", optval);
8392 (void)PCRE2_CONFIG(PCRE2_CONFIG_DEPTHLIMIT, &optval);
8393 printf(" Default depth limit = %d\n", optval);
8394
8395 #if defined SUPPORT_LIBREADLINE
8396 printf(" pcre2test has libreadline support\n");
8397 #elif defined SUPPORT_LIBEDIT
8398 printf(" pcre2test has libedit support\n");
8399 #else
8400 printf(" pcre2test has neither libreadline nor libedit support\n");
8401 #endif
8402
8403 return 0;
8404 }
8405
8406
8407
8408 /*************************************************
8409 * Display one modifier *
8410 *************************************************/
8411
8412 static void
display_one_modifier(modstruct * m,BOOL for_pattern)8413 display_one_modifier(modstruct *m, BOOL for_pattern)
8414 {
8415 uint32_t c = (!for_pattern && (m->which == MOD_PND || m->which == MOD_PNDP))?
8416 '*' : ' ';
8417 printf("%c%s", c, m->name);
8418 }
8419
8420
8421
8422 /*************************************************
8423 * Display pattern or subject modifiers *
8424 *************************************************/
8425
8426 /* In order to print in two columns, first scan without printing to get a list
8427 of the modifiers that are required.
8428
8429 Arguments:
8430 for_pattern TRUE for pattern modifiers, FALSE for subject modifiers
8431 title string to be used in title
8432
8433 Returns: nothing
8434 */
8435
8436 static void
display_selected_modifiers(BOOL for_pattern,const char * title)8437 display_selected_modifiers(BOOL for_pattern, const char *title)
8438 {
8439 uint32_t i, j;
8440 uint32_t n = 0;
8441 uint32_t list[MODLISTCOUNT];
8442
8443 for (i = 0; i < MODLISTCOUNT; i++)
8444 {
8445 BOOL is_pattern = TRUE;
8446 modstruct *m = modlist + i;
8447
8448 switch (m->which)
8449 {
8450 case MOD_CTC: /* Compile context */
8451 case MOD_PAT: /* Pattern */
8452 case MOD_PATP: /* Pattern, OK for Perl-compatible test */
8453 break;
8454
8455 /* The MOD_PND and MOD_PNDP modifiers are precisely those that affect
8456 subjects, but can be given with a pattern. We list them as subject
8457 modifiers, but marked with an asterisk.*/
8458
8459 case MOD_CTM: /* Match context */
8460 case MOD_DAT: /* Subject line */
8461 case MOD_PND: /* As PD, but not default pattern */
8462 case MOD_PNDP: /* As PND, OK for Perl-compatible test */
8463 is_pattern = FALSE;
8464 break;
8465
8466 default: printf("** Unknown type for modifier '%s'\n", m->name);
8467 /* Fall through */
8468 case MOD_PD: /* Pattern or subject */
8469 case MOD_PDP: /* As PD, OK for Perl-compatible test */
8470 is_pattern = for_pattern;
8471 break;
8472 }
8473
8474 if (for_pattern == is_pattern) list[n++] = i;
8475 }
8476
8477 /* Now print from the list in two columns. */
8478
8479 printf("-------------- %s MODIFIERS --------------\n", title);
8480
8481 for (i = 0, j = (n+1)/2; i < (n+1)/2; i++, j++)
8482 {
8483 modstruct *m = modlist + list[i];
8484 display_one_modifier(m, for_pattern);
8485 if (j < n)
8486 {
8487 uint32_t k = 27 - strlen(m->name);
8488 while (k-- > 0) printf(" ");
8489 display_one_modifier(modlist + list[j], for_pattern);
8490 }
8491 printf("\n");
8492 }
8493 }
8494
8495
8496
8497 /*************************************************
8498 * Display the list of modifiers *
8499 *************************************************/
8500
8501 static void
display_modifiers(void)8502 display_modifiers(void)
8503 {
8504 printf(
8505 "An asterisk on a subject modifier means that it may be given on a pattern\n"
8506 "line, in order to apply to all subjects matched by that pattern. Modifiers\n"
8507 "that are listed for both patterns and subjects have different effects in\n"
8508 "each case.\n\n");
8509 display_selected_modifiers(TRUE, "PATTERN");
8510 printf("\n");
8511 display_selected_modifiers(FALSE, "SUBJECT");
8512 }
8513
8514
8515
8516 /*************************************************
8517 * Main Program *
8518 *************************************************/
8519
8520 int
main(int argc,char ** argv)8521 main(int argc, char **argv)
8522 {
8523 uint32_t temp;
8524 uint32_t yield = 0;
8525 uint32_t op = 1;
8526 BOOL notdone = TRUE;
8527 BOOL quiet = FALSE;
8528 BOOL showtotaltimes = FALSE;
8529 BOOL skipping = FALSE;
8530 char *arg_subject = NULL;
8531 char *arg_pattern = NULL;
8532 char *arg_error = NULL;
8533
8534 /* The offsets to the options and control bits fields of the pattern and data
8535 control blocks must be the same so that common options and controls such as
8536 "anchored" or "memory" can work for either of them from a single table entry.
8537 We cannot test this till runtime because "offsetof" does not work in the
8538 preprocessor. */
8539
8540 if (PO(options) != DO(options) || PO(control) != DO(control) ||
8541 PO(control2) != DO(control2))
8542 {
8543 fprintf(stderr, "** Coding error: "
8544 "options and control offsets for pattern and data must be the same.\n");
8545 return 1;
8546 }
8547
8548 /* Get the PCRE2 and Unicode version number and JIT target information, at the
8549 same time checking that a request for the length gives the same answer. Also
8550 check lengths for non-string items. */
8551
8552 if (PCRE2_CONFIG(PCRE2_CONFIG_VERSION, NULL) !=
8553 PCRE2_CONFIG(PCRE2_CONFIG_VERSION, version) ||
8554
8555 PCRE2_CONFIG(PCRE2_CONFIG_UNICODE_VERSION, NULL) !=
8556 PCRE2_CONFIG(PCRE2_CONFIG_UNICODE_VERSION, uversion) ||
8557
8558 PCRE2_CONFIG(PCRE2_CONFIG_JITTARGET, NULL) !=
8559 PCRE2_CONFIG(PCRE2_CONFIG_JITTARGET, jittarget) ||
8560
8561 PCRE2_CONFIG(PCRE2_CONFIG_UNICODE, NULL) != sizeof(uint32_t) ||
8562 PCRE2_CONFIG(PCRE2_CONFIG_MATCHLIMIT, NULL) != sizeof(uint32_t))
8563 {
8564 fprintf(stderr, "** Error in pcre2_config(): bad length\n");
8565 return 1;
8566 }
8567
8568 /* Check that bad options are diagnosed. */
8569
8570 if (PCRE2_CONFIG(999, NULL) != PCRE2_ERROR_BADOPTION ||
8571 PCRE2_CONFIG(999, &temp) != PCRE2_ERROR_BADOPTION)
8572 {
8573 fprintf(stderr, "** Error in pcre2_config(): bad option not diagnosed\n");
8574 return 1;
8575 }
8576
8577 /* This configuration option is now obsolete, but running a quick check ensures
8578 that its code is covered. */
8579
8580 (void)PCRE2_CONFIG(PCRE2_CONFIG_STACKRECURSE, &temp);
8581
8582 /* Get buffers from malloc() so that valgrind will check their misuse when
8583 debugging. They grow automatically when very long lines are read. The 16-
8584 and 32-bit buffers (pbuffer16, pbuffer32) are obtained only if needed. */
8585
8586 buffer = (uint8_t *)malloc(pbuffer8_size);
8587 pbuffer8 = (uint8_t *)malloc(pbuffer8_size);
8588
8589 /* The following _setmode() stuff is some Windows magic that tells its runtime
8590 library to translate CRLF into a single LF character. At least, that's what
8591 I've been told: never having used Windows I take this all on trust. Originally
8592 it set 0x8000, but then I was advised that _O_BINARY was better. */
8593
8594 #if defined(_WIN32) || defined(WIN32)
8595 _setmode( _fileno( stdout ), _O_BINARY );
8596 #endif
8597
8598 /* Initialization that does not depend on the running mode. */
8599
8600 locale_name[0] = 0;
8601
8602 memset(&def_patctl, 0, sizeof(patctl));
8603 def_patctl.convert_type = CONVERT_UNSET;
8604
8605 memset(&def_datctl, 0, sizeof(datctl));
8606 def_datctl.oveccount = DEFAULT_OVECCOUNT;
8607 def_datctl.copy_numbers[0] = -1;
8608 def_datctl.get_numbers[0] = -1;
8609 def_datctl.startend[0] = def_datctl.startend[1] = CFORE_UNSET;
8610 def_datctl.cerror[0] = def_datctl.cerror[1] = CFORE_UNSET;
8611 def_datctl.cfail[0] = def_datctl.cfail[1] = CFORE_UNSET;
8612
8613 /* Scan command line options. */
8614
8615 while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0)
8616 {
8617 char *endptr;
8618 char *arg = argv[op];
8619 unsigned long uli;
8620
8621 /* List modifiers and exit. */
8622
8623 if (strcmp(arg, "-LM") == 0)
8624 {
8625 display_modifiers();
8626 goto EXIT;
8627 }
8628
8629 /* Display and/or set return code for configuration options. */
8630
8631 if (strcmp(arg, "-C") == 0)
8632 {
8633 yield = c_option(argv[op + 1]);
8634 goto EXIT;
8635 }
8636
8637 /* Select operating mode. Ensure that pcre2_config() is called in 16-bit
8638 and 32-bit modes because that won't happen naturally when 8-bit is also
8639 configured. Also call some other functions that are not otherwise used. This
8640 means that a coverage report won't claim there are uncalled functions. */
8641
8642 if (strcmp(arg, "-8") == 0)
8643 {
8644 #ifdef SUPPORT_PCRE2_8
8645 test_mode = PCRE8_MODE;
8646 (void)pcre2_set_bsr_8(pat_context8, 999);
8647 (void)pcre2_set_newline_8(pat_context8, 999);
8648 #else
8649 fprintf(stderr,
8650 "** This version of PCRE2 was built without 8-bit support\n");
8651 exit(1);
8652 #endif
8653 }
8654
8655 else if (strcmp(arg, "-16") == 0)
8656 {
8657 #ifdef SUPPORT_PCRE2_16
8658 test_mode = PCRE16_MODE;
8659 (void)pcre2_config_16(PCRE2_CONFIG_VERSION, NULL);
8660 (void)pcre2_set_bsr_16(pat_context16, 999);
8661 (void)pcre2_set_newline_16(pat_context16, 999);
8662 #else
8663 fprintf(stderr,
8664 "** This version of PCRE2 was built without 16-bit support\n");
8665 exit(1);
8666 #endif
8667 }
8668
8669 else if (strcmp(arg, "-32") == 0)
8670 {
8671 #ifdef SUPPORT_PCRE2_32
8672 test_mode = PCRE32_MODE;
8673 (void)pcre2_config_32(PCRE2_CONFIG_VERSION, NULL);
8674 (void)pcre2_set_bsr_32(pat_context32, 999);
8675 (void)pcre2_set_newline_32(pat_context32, 999);
8676 #else
8677 fprintf(stderr,
8678 "** This version of PCRE2 was built without 32-bit support\n");
8679 exit(1);
8680 #endif
8681 }
8682
8683 /* Set quiet (no version verification) */
8684
8685 else if (strcmp(arg, "-q") == 0) quiet = TRUE;
8686
8687 /* Set system stack size */
8688
8689 else if (strcmp(arg, "-S") == 0 && argc > 2 &&
8690 ((uli = strtoul(argv[op+1], &endptr, 10)), *endptr == 0))
8691 {
8692 #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS) || defined(__VMS)
8693 fprintf(stderr, "pcre2test: -S is not supported on this OS\n");
8694 exit(1);
8695 #else
8696 int rc;
8697 uint32_t stack_size;
8698 struct rlimit rlim;
8699 if (U32OVERFLOW(uli))
8700 {
8701 fprintf(stderr, "** Argument for -S is too big\n");
8702 exit(1);
8703 }
8704 stack_size = (uint32_t)uli;
8705 getrlimit(RLIMIT_STACK, &rlim);
8706 rlim.rlim_cur = stack_size * 1024 * 1024;
8707 if (rlim.rlim_cur > rlim.rlim_max)
8708 {
8709 fprintf(stderr,
8710 "pcre2test: requested stack size %luMiB is greater than hard limit "
8711 "%luMiB\n", (unsigned long int)stack_size,
8712 (unsigned long int)(rlim.rlim_max));
8713 exit(1);
8714 }
8715 rc = setrlimit(RLIMIT_STACK, &rlim);
8716 if (rc != 0)
8717 {
8718 fprintf(stderr, "pcre2test: setting stack size %luMiB failed: %s\n",
8719 (unsigned long int)stack_size, strerror(errno));
8720 exit(1);
8721 }
8722 op++;
8723 argc--;
8724 #endif
8725 }
8726
8727 /* Set some common pattern and subject controls */
8728
8729 else if (strcmp(arg, "-AC") == 0)
8730 {
8731 def_patctl.options |= PCRE2_AUTO_CALLOUT;
8732 def_datctl.control2 |= CTL2_CALLOUT_EXTRA;
8733 }
8734 else if (strcmp(arg, "-ac") == 0) def_patctl.options |= PCRE2_AUTO_CALLOUT;
8735 else if (strcmp(arg, "-b") == 0) def_patctl.control |= CTL_FULLBINCODE;
8736 else if (strcmp(arg, "-d") == 0) def_patctl.control |= CTL_DEBUG;
8737 else if (strcmp(arg, "-dfa") == 0) def_datctl.control |= CTL_DFA;
8738 else if (strcmp(arg, "-i") == 0) def_patctl.control |= CTL_INFO;
8739 else if (strcmp(arg, "-jit") == 0 || strcmp(arg, "-jitverify") == 0 ||
8740 strcmp(arg, "-jitfast") == 0)
8741 {
8742 if (arg[4] == 'v') def_patctl.control |= CTL_JITVERIFY;
8743 else if (arg[4] == 'f') def_patctl.control |= CTL_JITFAST;
8744 def_patctl.jit = JIT_DEFAULT; /* full & partial */
8745 #ifndef SUPPORT_JIT
8746 fprintf(stderr, "** Warning: JIT support is not available: "
8747 "-jit[fast|verify] calls functions that do nothing.\n");
8748 #endif
8749 }
8750
8751 /* Set timing parameters */
8752
8753 else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0 ||
8754 strcmp(arg, "-T") == 0 || strcmp(arg, "-TM") == 0)
8755 {
8756 int both = arg[2] == 0;
8757 showtotaltimes = arg[1] == 'T';
8758 if (argc > 2 && (uli = strtoul(argv[op+1], &endptr, 10), *endptr == 0))
8759 {
8760 if (uli == 0)
8761 {
8762 fprintf(stderr, "** Argument for %s must not be zero\n", arg);
8763 exit(1);
8764 }
8765 if (U32OVERFLOW(uli))
8766 {
8767 fprintf(stderr, "** Argument for %s is too big\n", arg);
8768 exit(1);
8769 }
8770 timeitm = (int)uli;
8771 op++;
8772 argc--;
8773 }
8774 else timeitm = LOOPREPEAT;
8775 if (both) timeit = timeitm;
8776 }
8777
8778 /* Give help */
8779
8780 else if (strcmp(arg, "-help") == 0 ||
8781 strcmp(arg, "--help") == 0)
8782 {
8783 usage();
8784 goto EXIT;
8785 }
8786
8787 /* Show version */
8788
8789 else if (strcmp(arg, "-version") == 0 ||
8790 strcmp(arg, "--version") == 0)
8791 {
8792 print_version(stdout);
8793 goto EXIT;
8794 }
8795
8796 /* The following options save their data for processing once we know what
8797 the running mode is. */
8798
8799 else if (strcmp(arg, "-error") == 0)
8800 {
8801 arg_error = argv[op+1];
8802 goto CHECK_VALUE_EXISTS;
8803 }
8804
8805 else if (strcmp(arg, "-subject") == 0)
8806 {
8807 arg_subject = argv[op+1];
8808 goto CHECK_VALUE_EXISTS;
8809 }
8810
8811 else if (strcmp(arg, "-pattern") == 0)
8812 {
8813 arg_pattern = argv[op+1];
8814 CHECK_VALUE_EXISTS:
8815 if (argc <= 2)
8816 {
8817 fprintf(stderr, "** Missing value for %s\n", arg);
8818 yield = 1;
8819 goto EXIT;
8820 }
8821 op++;
8822 argc--;
8823 }
8824
8825 /* Unrecognized option */
8826
8827 else
8828 {
8829 fprintf(stderr, "** Unknown or malformed option '%s'\n", arg);
8830 usage();
8831 yield = 1;
8832 goto EXIT;
8833 }
8834 op++;
8835 argc--;
8836 }
8837
8838 /* If -error was present, get the error numbers, show the messages, and exit.
8839 We wait to do this until we know which mode we are in. */
8840
8841 if (arg_error != NULL)
8842 {
8843 int len;
8844 int errcode;
8845 char *endptr;
8846
8847 /* Ensure the relevant non-8-bit buffer is available. Ensure that it is at
8848 least 128 code units, because it is used for retrieving error messages. */
8849
8850 #ifdef SUPPORT_PCRE2_16
8851 if (test_mode == PCRE16_MODE)
8852 {
8853 pbuffer16_size = 256;
8854 pbuffer16 = (uint16_t *)malloc(pbuffer16_size);
8855 if (pbuffer16 == NULL)
8856 {
8857 fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer16\n",
8858 pbuffer16_size);
8859 yield = 1;
8860 goto EXIT;
8861 }
8862 }
8863 #endif
8864
8865 #ifdef SUPPORT_PCRE2_32
8866 if (test_mode == PCRE32_MODE)
8867 {
8868 pbuffer32_size = 512;
8869 pbuffer32 = (uint32_t *)malloc(pbuffer32_size);
8870 if (pbuffer32 == NULL)
8871 {
8872 fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer32\n",
8873 pbuffer32_size);
8874 yield = 1;
8875 goto EXIT;
8876 }
8877 }
8878 #endif
8879
8880 /* Loop along a list of error numbers. */
8881
8882 for (;;)
8883 {
8884 errcode = strtol(arg_error, &endptr, 10);
8885 if (*endptr != 0 && *endptr != CHAR_COMMA)
8886 {
8887 fprintf(stderr, "** '%s' is not a valid error number list\n", arg_error);
8888 yield = 1;
8889 goto EXIT;
8890 }
8891 printf("Error %d: ", errcode);
8892 PCRE2_GET_ERROR_MESSAGE(len, errcode, pbuffer);
8893 if (len < 0)
8894 {
8895 switch (len)
8896 {
8897 case PCRE2_ERROR_BADDATA:
8898 printf("PCRE2_ERROR_BADDATA (unknown error number)");
8899 break;
8900
8901 case PCRE2_ERROR_NOMEMORY:
8902 printf("PCRE2_ERROR_NOMEMORY (buffer too small)");
8903 break;
8904
8905 default:
8906 printf("Unexpected return (%d) from pcre2_get_error_message()", len);
8907 break;
8908 }
8909 }
8910 else
8911 {
8912 PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, stdout);
8913 }
8914 printf("\n");
8915 if (*endptr == 0) goto EXIT;
8916 arg_error = endptr + 1;
8917 }
8918 /* Control never reaches here */
8919 } /* End of -error handling */
8920
8921 /* Initialize things that cannot be done until we know which test mode we are
8922 running in. Exercise the general context copying and match data size functions,
8923 which are not otherwise used. */
8924
8925 code_unit_size = test_mode/8;
8926 max_oveccount = DEFAULT_OVECCOUNT;
8927
8928 /* Use macros to save a lot of duplication. */
8929
8930 #define CREATECONTEXTS \
8931 G(general_context,BITS) = G(pcre2_general_context_create_,BITS)(&my_malloc, &my_free, NULL); \
8932 G(general_context_copy,BITS) = G(pcre2_general_context_copy_,BITS)(G(general_context,BITS)); \
8933 G(default_pat_context,BITS) = G(pcre2_compile_context_create_,BITS)(G(general_context,BITS)); \
8934 G(pat_context,BITS) = G(pcre2_compile_context_copy_,BITS)(G(default_pat_context,BITS)); \
8935 G(default_dat_context,BITS) = G(pcre2_match_context_create_,BITS)(G(general_context,BITS)); \
8936 G(dat_context,BITS) = G(pcre2_match_context_copy_,BITS)(G(default_dat_context,BITS)); \
8937 G(default_con_context,BITS) = G(pcre2_convert_context_create_,BITS)(G(general_context,BITS)); \
8938 G(con_context,BITS) = G(pcre2_convert_context_copy_,BITS)(G(default_con_context,BITS)); \
8939 G(match_data,BITS) = G(pcre2_match_data_create_,BITS)(max_oveccount, G(general_context,BITS))
8940
8941 #define CONTEXTTESTS \
8942 (void)G(pcre2_set_compile_extra_options_,BITS)(G(pat_context,BITS), 0); \
8943 (void)G(pcre2_set_max_pattern_length_,BITS)(G(pat_context,BITS), 0); \
8944 (void)G(pcre2_set_offset_limit_,BITS)(G(dat_context,BITS), 0); \
8945 (void)G(pcre2_set_recursion_memory_management_,BITS)(G(dat_context,BITS), my_malloc, my_free, NULL); \
8946 (void)G(pcre2_get_match_data_size_,BITS)(G(match_data,BITS))
8947
8948
8949 /* Call the appropriate functions for the current mode, and exercise some
8950 functions that are not otherwise called. */
8951
8952 #ifdef SUPPORT_PCRE2_8
8953 #undef BITS
8954 #define BITS 8
8955 if (test_mode == PCRE8_MODE)
8956 {
8957 CREATECONTEXTS;
8958 CONTEXTTESTS;
8959 }
8960 #endif
8961
8962 #ifdef SUPPORT_PCRE2_16
8963 #undef BITS
8964 #define BITS 16
8965 if (test_mode == PCRE16_MODE)
8966 {
8967 CREATECONTEXTS;
8968 CONTEXTTESTS;
8969 }
8970 #endif
8971
8972 #ifdef SUPPORT_PCRE2_32
8973 #undef BITS
8974 #define BITS 32
8975 if (test_mode == PCRE32_MODE)
8976 {
8977 CREATECONTEXTS;
8978 CONTEXTTESTS;
8979 }
8980 #endif
8981
8982 /* Set a default parentheses nest limit that is large enough to run the
8983 standard tests (this also exercises the function). */
8984
8985 PCRE2_SET_PARENS_NEST_LIMIT(default_pat_context, PARENS_NEST_DEFAULT);
8986
8987 /* Handle command line modifier settings, sending any error messages to
8988 stderr. We need to know the mode before modifying the context, and it is tidier
8989 to do them all in the same way. */
8990
8991 outfile = stderr;
8992 if ((arg_pattern != NULL &&
8993 !decode_modifiers((uint8_t *)arg_pattern, CTX_DEFPAT, &def_patctl, NULL)) ||
8994 (arg_subject != NULL &&
8995 !decode_modifiers((uint8_t *)arg_subject, CTX_DEFDAT, NULL, &def_datctl)))
8996 {
8997 yield = 1;
8998 goto EXIT;
8999 }
9000
9001 /* Sort out the input and output files, defaulting to stdin/stdout. */
9002
9003 infile = stdin;
9004 outfile = stdout;
9005
9006 if (argc > 1 && strcmp(argv[op], "-") != 0)
9007 {
9008 infile = fopen(argv[op], INPUT_MODE);
9009 if (infile == NULL)
9010 {
9011 printf("** Failed to open '%s': %s\n", argv[op], strerror(errno));
9012 yield = 1;
9013 goto EXIT;
9014 }
9015 }
9016
9017 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
9018 if (INTERACTIVE(infile)) using_history();
9019 #endif
9020
9021 if (argc > 2)
9022 {
9023 outfile = fopen(argv[op+1], OUTPUT_MODE);
9024 if (outfile == NULL)
9025 {
9026 printf("** Failed to open '%s': %s\n", argv[op+1], strerror(errno));
9027 yield = 1;
9028 goto EXIT;
9029 }
9030 }
9031
9032 /* Output a heading line unless quiet, then process input lines. */
9033
9034 if (!quiet) print_version(outfile);
9035
9036 SET(compiled_code, NULL);
9037
9038 #ifdef SUPPORT_PCRE2_8
9039 preg.re_pcre2_code = NULL;
9040 preg.re_match_data = NULL;
9041 #endif
9042
9043 while (notdone)
9044 {
9045 uint8_t *p;
9046 int rc = PR_OK;
9047 BOOL expectdata = TEST(compiled_code, !=, NULL);
9048 #ifdef SUPPORT_PCRE2_8
9049 expectdata |= preg.re_pcre2_code != NULL;
9050 #endif
9051
9052 if (extend_inputline(infile, buffer, expectdata? "data> " : " re> ") == NULL)
9053 break;
9054 if (!INTERACTIVE(infile)) fprintf(outfile, "%s", (char *)buffer);
9055 fflush(outfile);
9056 p = buffer;
9057
9058 /* If we have a pattern set up for testing, or we are skipping after a
9059 compile failure, a blank line terminates this test. */
9060
9061 if (expectdata || skipping)
9062 {
9063 while (isspace(*p)) p++;
9064 if (*p == 0)
9065 {
9066 #ifdef SUPPORT_PCRE2_8
9067 if (preg.re_pcre2_code != NULL)
9068 {
9069 regfree(&preg);
9070 preg.re_pcre2_code = NULL;
9071 preg.re_match_data = NULL;
9072 }
9073 #endif /* SUPPORT_PCRE2_8 */
9074 if (TEST(compiled_code, !=, NULL))
9075 {
9076 SUB1(pcre2_code_free, compiled_code);
9077 SET(compiled_code, NULL);
9078 }
9079 skipping = FALSE;
9080 setlocale(LC_CTYPE, "C");
9081 }
9082
9083 /* Otherwise, if we are not skipping, and the line is not a data comment
9084 line starting with "\=", process a data line. */
9085
9086 else if (!skipping && !(p[0] == '\\' && p[1] == '=' && isspace(p[2])))
9087 {
9088 rc = process_data();
9089 }
9090 }
9091
9092 /* We do not have a pattern set up for testing. Lines starting with # are
9093 either comments or special commands. Blank lines are ignored. Otherwise, the
9094 line must start with a valid delimiter. It is then processed as a pattern
9095 line. A copy of the pattern is left in pbuffer8 for use by callouts. Under
9096 valgrind, make the unused part of the buffer undefined, to catch overruns. */
9097
9098 else if (*p == '#')
9099 {
9100 if (isspace(p[1]) || p[1] == '!' || p[1] == 0) continue;
9101 rc = process_command();
9102 }
9103
9104 else if (strchr("/!\"'`%&-=_:;,@~", *p) != NULL)
9105 {
9106 rc = process_pattern();
9107 dfa_matched = 0;
9108 }
9109
9110 else
9111 {
9112 while (isspace(*p)) p++;
9113 if (*p != 0)
9114 {
9115 fprintf(outfile, "** Invalid pattern delimiter '%c' (x%x).\n", *buffer,
9116 *buffer);
9117 rc = PR_SKIP;
9118 }
9119 }
9120
9121 if (rc == PR_SKIP && !INTERACTIVE(infile)) skipping = TRUE;
9122 else if (rc == PR_ABEND)
9123 {
9124 fprintf(outfile, "** pcre2test run abandoned\n");
9125 yield = 1;
9126 goto EXIT;
9127 }
9128 }
9129
9130 /* Finish off a normal run. */
9131
9132 if (INTERACTIVE(infile)) fprintf(outfile, "\n");
9133
9134 if (showtotaltimes)
9135 {
9136 const char *pad = "";
9137 fprintf(outfile, "--------------------------------------\n");
9138 if (timeit > 0)
9139 {
9140 fprintf(outfile, "Total compile time %.4f milliseconds\n",
9141 (((double)total_compile_time * 1000.0) / (double)timeit) /
9142 (double)CLOCKS_PER_SEC);
9143 if (total_jit_compile_time > 0)
9144 fprintf(outfile, "Total JIT compile %.4f milliseconds\n",
9145 (((double)total_jit_compile_time * 1000.0) / (double)timeit) /
9146 (double)CLOCKS_PER_SEC);
9147 pad = " ";
9148 }
9149 fprintf(outfile, "Total match time %s%.4f milliseconds\n", pad,
9150 (((double)total_match_time * 1000.0) / (double)timeitm) /
9151 (double)CLOCKS_PER_SEC);
9152 }
9153
9154
9155 EXIT:
9156
9157 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
9158 if (infile != NULL && INTERACTIVE(infile)) clear_history();
9159 #endif
9160
9161 if (infile != NULL && infile != stdin) fclose(infile);
9162 if (outfile != NULL && outfile != stdout) fclose(outfile);
9163
9164 free(buffer);
9165 free(dbuffer);
9166 free(pbuffer8);
9167 free(dfa_workspace);
9168 free((void *)locale_tables);
9169 free(tables3);
9170 PCRE2_MATCH_DATA_FREE(match_data);
9171 SUB1(pcre2_code_free, compiled_code);
9172
9173 while(patstacknext-- > 0)
9174 {
9175 SET(compiled_code, patstack[patstacknext]);
9176 SUB1(pcre2_code_free, compiled_code);
9177 }
9178
9179 PCRE2_JIT_FREE_UNUSED_MEMORY(general_context);
9180 if (jit_stack != NULL)
9181 {
9182 PCRE2_JIT_STACK_FREE(jit_stack);
9183 }
9184
9185 #define FREECONTEXTS \
9186 G(pcre2_general_context_free_,BITS)(G(general_context,BITS)); \
9187 G(pcre2_general_context_free_,BITS)(G(general_context_copy,BITS)); \
9188 G(pcre2_compile_context_free_,BITS)(G(pat_context,BITS)); \
9189 G(pcre2_compile_context_free_,BITS)(G(default_pat_context,BITS)); \
9190 G(pcre2_match_context_free_,BITS)(G(dat_context,BITS)); \
9191 G(pcre2_match_context_free_,BITS)(G(default_dat_context,BITS)); \
9192 G(pcre2_convert_context_free_,BITS)(G(default_con_context,BITS)); \
9193 G(pcre2_convert_context_free_,BITS)(G(con_context,BITS));
9194
9195 #ifdef SUPPORT_PCRE2_8
9196 #undef BITS
9197 #define BITS 8
9198 if (preg.re_pcre2_code != NULL) regfree(&preg);
9199 FREECONTEXTS;
9200 #endif
9201
9202 #ifdef SUPPORT_PCRE2_16
9203 #undef BITS
9204 #define BITS 16
9205 free(pbuffer16);
9206 FREECONTEXTS;
9207 #endif
9208
9209 #ifdef SUPPORT_PCRE2_32
9210 #undef BITS
9211 #define BITS 32
9212 free(pbuffer32);
9213 FREECONTEXTS;
9214 #endif
9215
9216 #if defined(__VMS)
9217 yield = SS$_NORMAL; /* Return values via DCL symbols */
9218 #endif
9219
9220 return yield;
9221 }
9222
9223 /* End of pcre2test.c */
9224