1 /*************************************************
2 * PCRE2 testing program *
3 *************************************************/
4
5 /* PCRE2 is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language. In 2014
7 the API was completely revised and '2' was added to the name, because the old
8 API, which had lasted for 16 years, could not accommodate new requirements. At
9 the same time, this testing program was re-designed because its original
10 hacked-up (non-) design had also run out of steam.
11
12 Written by Philip Hazel
13 Original code Copyright (c) 1997-2012 University of Cambridge
14 Rewritten code Copyright (c) 2016-2019 University of Cambridge
15
16 -----------------------------------------------------------------------------
17 Redistribution and use in source and binary forms, with or without
18 modification, are permitted provided that the following conditions are met:
19
20 * Redistributions of source code must retain the above copyright notice,
21 this list of conditions and the following disclaimer.
22
23 * Redistributions in binary form must reproduce the above copyright
24 notice, this list of conditions and the following disclaimer in the
25 documentation and/or other materials provided with the distribution.
26
27 * Neither the name of the University of Cambridge nor the names of its
28 contributors may be used to endorse or promote products derived from
29 this software without specific prior written permission.
30
31 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
32 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
35 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
36 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
37 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
38 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
39 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
41 POSSIBILITY OF SUCH DAMAGE.
42 -----------------------------------------------------------------------------
43 */
44
45
46 /* This program supports testing of the 8-bit, 16-bit, and 32-bit PCRE2
47 libraries in a single program, though its input and output are always 8-bit.
48 It is different from modules such as pcre2_compile.c in the library itself,
49 which are compiled separately for each code unit width. If two widths are
50 enabled, for example, pcre2_compile.c is compiled twice. In contrast,
51 pcre2test.c is compiled only once, and linked with all the enabled libraries.
52 Therefore, it must not make use of any of the macros from pcre2.h or
53 pcre2_internal.h that depend on PCRE2_CODE_UNIT_WIDTH. It does, however, make
54 use of SUPPORT_PCRE2_8, SUPPORT_PCRE2_16, and SUPPORT_PCRE2_32, to ensure that
55 it references only the enabled library functions. */
56
57 #ifdef HAVE_CONFIG_H
58 #include "config.h"
59 #endif
60
61 #include <ctype.h>
62 #include <stdio.h>
63 #include <string.h>
64 #include <stdlib.h>
65 #include <time.h>
66 #include <locale.h>
67 #include <errno.h>
68
69 #if defined NATIVE_ZOS
70 #include "pcrzoscs.h"
71 /* That header is not included in the main PCRE2 distribution because other
72 apparatus is needed to compile pcre2test for z/OS. The header can be found in
73 the special z/OS distribution, which is available from www.zaconsultants.net or
74 from www.cbttape.org. */
75 #endif
76
77 #ifdef HAVE_UNISTD_H
78 #include <unistd.h>
79 #endif
80
81 /* Debugging code enabler */
82
83 /* #define DEBUG_SHOW_MALLOC_ADDRESSES */
84
85 /* Both libreadline and libedit are optionally supported. The user-supplied
86 original patch uses readline/readline.h for libedit, but in at least one system
87 it is installed as editline/readline.h, so the configuration code now looks for
88 that first, falling back to readline/readline.h. */
89
90 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
91 #if defined(SUPPORT_LIBREADLINE)
92 #include <readline/readline.h>
93 #include <readline/history.h>
94 #else
95 #if defined(HAVE_EDITLINE_READLINE_H)
96 #include <editline/readline.h>
97 #else
98 #include <readline/readline.h>
99 #endif
100 #endif
101 #endif
102
103 /* Put the test for interactive input into a macro so that it can be changed if
104 required for different environments. */
105
106 #define INTERACTIVE(f) isatty(fileno(f))
107
108
109 /* ---------------------- System-specific definitions ---------------------- */
110
111 /* A number of things vary for Windows builds. Originally, pcretest opened its
112 input and output without "b"; then I was told that "b" was needed in some
113 environments, so it was added for release 5.0 to both the input and output. (It
114 makes no difference on Unix-like systems.) Later I was told that it is wrong
115 for the input on Windows. I've now abstracted the modes into macros that are
116 set here, to make it easier to fiddle with them, and removed "b" from the input
117 mode under Windows. The BINARY versions are used when saving/restoring compiled
118 patterns. */
119
120 #if defined(_WIN32) || defined(WIN32)
121 #include <io.h> /* For _setmode() */
122 #include <fcntl.h> /* For _O_BINARY */
123 #define INPUT_MODE "r"
124 #define OUTPUT_MODE "wb"
125 #define BINARY_INPUT_MODE "rb"
126 #define BINARY_OUTPUT_MODE "wb"
127
128 #ifndef isatty
129 #define isatty _isatty /* This is what Windows calls them, I'm told, */
130 #endif /* though in some environments they seem to */
131 /* be already defined, hence the #ifndefs. */
132 #ifndef fileno
133 #define fileno _fileno
134 #endif
135
136 /* A user sent this fix for Borland Builder 5 under Windows. */
137
138 #ifdef __BORLANDC__
139 #define _setmode(handle, mode) setmode(handle, mode)
140 #endif
141
142 /* Not Windows */
143
144 #else
145 #include <sys/time.h> /* These two includes are needed */
146 #include <sys/resource.h> /* for setrlimit(). */
147 #if defined NATIVE_ZOS /* z/OS uses non-binary I/O */
148 #define INPUT_MODE "r"
149 #define OUTPUT_MODE "w"
150 #define BINARY_INPUT_MODE "rb"
151 #define BINARY_OUTPUT_MODE "wb"
152 #else
153 #define INPUT_MODE "rb"
154 #define OUTPUT_MODE "wb"
155 #define BINARY_INPUT_MODE "rb"
156 #define BINARY_OUTPUT_MODE "wb"
157 #endif
158 #endif
159
160 /* VMS-specific code was included as suggested by a VMS user [1]. Another VMS
161 user [2] provided alternative code which worked better for him. I have
162 commented out the original, but kept it around just in case. */
163
164 #ifdef __VMS
165 #include <ssdef.h>
166 /* These two includes came from [2]. */
167 #include descrip
168 #include lib$routines
169 /* void vms_setsymbol( char *, char *, int ); Original code from [1]. */
170 #endif
171
172 /* VC and older compilers don't support %td or %zu, and even some that claim to
173 be C99 don't support it (hence DISABLE_PERCENT_ZT). */
174
175 #if defined(_MSC_VER) || !defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L || defined(DISABLE_PERCENT_ZT)
176 #define PTR_FORM "lu"
177 #define SIZ_FORM "lu"
178 #define SIZ_CAST (unsigned long int)
179 #else
180 #define PTR_FORM "td"
181 #define SIZ_FORM "zu"
182 #define SIZ_CAST
183 #endif
184
185 /* ------------------End of system-specific definitions -------------------- */
186
187 /* Glueing macros that are used in several places below. */
188
189 #define glue(a,b) a##b
190 #define G(a,b) glue(a,b)
191
192 /* Miscellaneous parameters and manifests */
193
194 #ifndef CLOCKS_PER_SEC
195 #ifdef CLK_TCK
196 #define CLOCKS_PER_SEC CLK_TCK
197 #else
198 #define CLOCKS_PER_SEC 100
199 #endif
200 #endif
201
202 #define CFORE_UNSET UINT32_MAX /* Unset value for startend/cfail/cerror fields */
203 #define CONVERT_UNSET UINT32_MAX /* Unset value for convert_type field */
204 #define DFA_WS_DIMENSION 1000 /* Size of DFA workspace */
205 #define DEFAULT_OVECCOUNT 15 /* Default ovector count */
206 #define JUNK_OFFSET 0xdeadbeef /* For initializing ovector */
207 #define LOCALESIZE 32 /* Size of locale name */
208 #define LOOPREPEAT 500000 /* Default loop count for timing */
209 #define MALLOCLISTSIZE 20 /* For remembering mallocs */
210 #define PARENS_NEST_DEFAULT 220 /* Default parentheses nest limit */
211 #define PATSTACKSIZE 20 /* Pattern stack for save/restore testing */
212 #define REPLACE_MODSIZE 100 /* Field for reading 8-bit replacement */
213 #define VERSION_SIZE 64 /* Size of buffer for the version strings */
214
215 /* Make sure the buffer into which replacement strings are copied is big enough
216 to hold them as 32-bit code units. */
217
218 #define REPLACE_BUFFSIZE 1024 /* This is a byte value */
219
220 /* Execution modes */
221
222 #define PCRE8_MODE 8
223 #define PCRE16_MODE 16
224 #define PCRE32_MODE 32
225
226 /* Processing returns */
227
228 enum { PR_OK, PR_SKIP, PR_ABEND };
229
230 /* The macro PRINTABLE determines whether to print an output character as-is or
231 as a hex value when showing compiled patterns. is We use it in cases when the
232 locale has not been explicitly changed, so as to get consistent output from
233 systems that differ in their output from isprint() even in the "C" locale. */
234
235 #ifdef EBCDIC
236 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
237 #else
238 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
239 #endif
240
241 #define PRINTOK(c) ((use_tables != NULL && c < 256)? isprint(c) : PRINTABLE(c))
242
243 /* We have to include some of the library source files because we need
244 to use some of the macros, internal structure definitions, and other internal
245 values - pcre2test has "inside information" compared to an application program
246 that strictly follows the PCRE2 API.
247
248 Before including pcre2_internal.h we define PRIV so that it does not get
249 defined therein. This ensures that PRIV names in the included files do not
250 clash with those in the libraries. Also, although pcre2_internal.h does itself
251 include pcre2.h, we explicitly include it beforehand, along with pcre2posix.h,
252 so that the PCRE2_EXP_xxx macros get set appropriately for an application, not
253 for building the library. */
254
255 #define PRIV(name) name
256 #define PCRE2_CODE_UNIT_WIDTH 0
257 #include "pcre2.h"
258 #include "pcre2posix.h"
259 #include "pcre2_internal.h"
260
261 /* We need access to some of the data tables that PCRE2 uses. Defining
262 PCRE2_PCRETEST makes some minor changes in the files. The previous definition
263 of PRIV avoids name clashes. */
264
265 #define PCRE2_PCRE2TEST
266 #include "pcre2_tables.c"
267 #include "pcre2_ucd.c"
268
269 /* 32-bit integer values in the input are read by strtoul() or strtol(). The
270 check needed for overflow depends on whether long ints are in fact longer than
271 ints. They are defined not to be shorter. */
272
273 #if ULONG_MAX > UINT32_MAX
274 #define U32OVERFLOW(x) (x > UINT32_MAX)
275 #else
276 #define U32OVERFLOW(x) (x == UINT32_MAX)
277 #endif
278
279 #if LONG_MAX > INT32_MAX
280 #define S32OVERFLOW(x) (x > INT32_MAX || x < INT32_MIN)
281 #else
282 #define S32OVERFLOW(x) (x == INT32_MAX || x == INT32_MIN)
283 #endif
284
285 /* When PCRE2_CODE_UNIT_WIDTH is zero, pcre2_internal.h does not include
286 pcre2_intmodedep.h, which is where mode-dependent macros and structures are
287 defined. We can now include it for each supported code unit width. Because
288 PCRE2_CODE_UNIT_WIDTH was defined as zero before including pcre2.h, it will
289 have left PCRE2_SUFFIX defined as a no-op. We must re-define it appropriately
290 while including these files, and then restore it to a no-op. Because LINK_SIZE
291 may be changed in 16-bit mode and forced to 1 in 32-bit mode, the order of
292 these inclusions should not be changed. */
293
294 #undef PCRE2_SUFFIX
295 #undef PCRE2_CODE_UNIT_WIDTH
296
297 #ifdef SUPPORT_PCRE2_8
298 #define PCRE2_CODE_UNIT_WIDTH 8
299 #define PCRE2_SUFFIX(a) G(a,8)
300 #include "pcre2_intmodedep.h"
301 #include "pcre2_printint.c"
302 #undef PCRE2_CODE_UNIT_WIDTH
303 #undef PCRE2_SUFFIX
304 #endif /* SUPPORT_PCRE2_8 */
305
306 #ifdef SUPPORT_PCRE2_16
307 #define PCRE2_CODE_UNIT_WIDTH 16
308 #define PCRE2_SUFFIX(a) G(a,16)
309 #include "pcre2_intmodedep.h"
310 #include "pcre2_printint.c"
311 #undef PCRE2_CODE_UNIT_WIDTH
312 #undef PCRE2_SUFFIX
313 #endif /* SUPPORT_PCRE2_16 */
314
315 #ifdef SUPPORT_PCRE2_32
316 #define PCRE2_CODE_UNIT_WIDTH 32
317 #define PCRE2_SUFFIX(a) G(a,32)
318 #include "pcre2_intmodedep.h"
319 #include "pcre2_printint.c"
320 #undef PCRE2_CODE_UNIT_WIDTH
321 #undef PCRE2_SUFFIX
322 #endif /* SUPPORT_PCRE2_32 */
323
324 #define PCRE2_SUFFIX(a) a
325
326 /* We need to be able to check input text for UTF-8 validity, whatever code
327 widths are actually available, because the input to pcre2test is always in
328 8-bit code units. So we include the UTF validity checking function for 8-bit
329 code units. */
330
331 extern int valid_utf(PCRE2_SPTR8, PCRE2_SIZE, PCRE2_SIZE *);
332
333 #define PCRE2_CODE_UNIT_WIDTH 8
334 #undef PCRE2_SPTR
335 #define PCRE2_SPTR PCRE2_SPTR8
336 #include "pcre2_valid_utf.c"
337 #undef PCRE2_CODE_UNIT_WIDTH
338 #undef PCRE2_SPTR
339
340 /* If we have 8-bit support, default to it; if there is also 16-or 32-bit
341 support, it can be selected by a command-line option. If there is no 8-bit
342 support, there must be 16-bit or 32-bit support, so default to one of them. The
343 config function, JIT stack, contexts, and version string are the same in all
344 modes, so use the form of the first that is available. */
345
346 #if defined SUPPORT_PCRE2_8
347 #define DEFAULT_TEST_MODE PCRE8_MODE
348 #define VERSION_TYPE PCRE2_UCHAR8
349 #define PCRE2_CONFIG pcre2_config_8
350 #define PCRE2_JIT_STACK pcre2_jit_stack_8
351 #define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_8
352 #define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_8
353 #define PCRE2_REAL_CONVERT_CONTEXT pcre2_real_convert_context_8
354 #define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_8
355
356 #elif defined SUPPORT_PCRE2_16
357 #define DEFAULT_TEST_MODE PCRE16_MODE
358 #define VERSION_TYPE PCRE2_UCHAR16
359 #define PCRE2_CONFIG pcre2_config_16
360 #define PCRE2_JIT_STACK pcre2_jit_stack_16
361 #define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_16
362 #define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_16
363 #define PCRE2_REAL_CONVERT_CONTEXT pcre2_real_convert_context_16
364 #define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_16
365
366 #elif defined SUPPORT_PCRE2_32
367 #define DEFAULT_TEST_MODE PCRE32_MODE
368 #define VERSION_TYPE PCRE2_UCHAR32
369 #define PCRE2_CONFIG pcre2_config_32
370 #define PCRE2_JIT_STACK pcre2_jit_stack_32
371 #define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_32
372 #define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_32
373 #define PCRE2_REAL_CONVERT_CONTEXT pcre2_real_convert_context_32
374 #define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_32
375 #endif
376
377 /* ------------- Structure and table for handling #-commands ------------- */
378
379 typedef struct cmdstruct {
380 const char *name;
381 int value;
382 } cmdstruct;
383
384 enum { CMD_FORBID_UTF, CMD_LOAD, CMD_NEWLINE_DEFAULT, CMD_PATTERN,
385 CMD_PERLTEST, CMD_POP, CMD_POPCOPY, CMD_SAVE, CMD_SUBJECT, CMD_UNKNOWN };
386
387 static cmdstruct cmdlist[] = {
388 { "forbid_utf", CMD_FORBID_UTF },
389 { "load", CMD_LOAD },
390 { "newline_default", CMD_NEWLINE_DEFAULT },
391 { "pattern", CMD_PATTERN },
392 { "perltest", CMD_PERLTEST },
393 { "pop", CMD_POP },
394 { "popcopy", CMD_POPCOPY },
395 { "save", CMD_SAVE },
396 { "subject", CMD_SUBJECT }};
397
398 #define cmdlistcount (sizeof(cmdlist)/sizeof(cmdstruct))
399
400 /* ------------- Structures and tables for handling modifiers -------------- */
401
402 /* Table of names for newline types. Must be kept in step with the definitions
403 of PCRE2_NEWLINE_xx in pcre2.h. */
404
405 static const char *newlines[] = {
406 "DEFAULT", "CR", "LF", "CRLF", "ANY", "ANYCRLF", "NUL" };
407
408 /* Structure and table for handling pattern conversion types. */
409
410 typedef struct convertstruct {
411 const char *name;
412 uint32_t option;
413 } convertstruct;
414
415 static convertstruct convertlist[] = {
416 { "glob", PCRE2_CONVERT_GLOB },
417 { "glob_no_starstar", PCRE2_CONVERT_GLOB_NO_STARSTAR },
418 { "glob_no_wild_separator", PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR },
419 { "posix_basic", PCRE2_CONVERT_POSIX_BASIC },
420 { "posix_extended", PCRE2_CONVERT_POSIX_EXTENDED },
421 { "unset", CONVERT_UNSET }};
422
423 #define convertlistcount (sizeof(convertlist)/sizeof(convertstruct))
424
425 /* Modifier types and applicability */
426
427 enum { MOD_CTC, /* Applies to a compile context */
428 MOD_CTM, /* Applies to a match context */
429 MOD_PAT, /* Applies to a pattern */
430 MOD_PATP, /* Ditto, OK for Perl test */
431 MOD_DAT, /* Applies to a data line */
432 MOD_PD, /* Applies to a pattern or a data line */
433 MOD_PDP, /* As MOD_PD, OK for Perl test */
434 MOD_PND, /* As MOD_PD, but not for a default pattern */
435 MOD_PNDP, /* As MOD_PND, OK for Perl test */
436 MOD_CHR, /* Is a single character */
437 MOD_CON, /* Is a "convert" type/options list */
438 MOD_CTL, /* Is a control bit */
439 MOD_BSR, /* Is a BSR value */
440 MOD_IN2, /* Is one or two unsigned integers */
441 MOD_INS, /* Is a signed integer */
442 MOD_INT, /* Is an unsigned integer */
443 MOD_IND, /* Is an unsigned integer, but no value => default */
444 MOD_NL, /* Is a newline value */
445 MOD_NN, /* Is a number or a name; more than one may occur */
446 MOD_OPT, /* Is an option bit */
447 MOD_SIZ, /* Is a PCRE2_SIZE value */
448 MOD_STR }; /* Is a string */
449
450 /* Control bits. Some apply to compiling, some to matching, but some can be set
451 either on a pattern or a data line, so they must all be distinct. There are now
452 so many of them that they are split into two fields. */
453
454 #define CTL_AFTERTEXT 0x00000001u
455 #define CTL_ALLAFTERTEXT 0x00000002u
456 #define CTL_ALLCAPTURES 0x00000004u
457 #define CTL_ALLUSEDTEXT 0x00000008u
458 #define CTL_ALTGLOBAL 0x00000010u
459 #define CTL_BINCODE 0x00000020u
460 #define CTL_CALLOUT_CAPTURE 0x00000040u
461 #define CTL_CALLOUT_INFO 0x00000080u
462 #define CTL_CALLOUT_NONE 0x00000100u
463 #define CTL_DFA 0x00000200u
464 #define CTL_EXPAND 0x00000400u
465 #define CTL_FINDLIMITS 0x00000800u
466 #define CTL_FRAMESIZE 0x00001000u
467 #define CTL_FULLBINCODE 0x00002000u
468 #define CTL_GETALL 0x00004000u
469 #define CTL_GLOBAL 0x00008000u
470 #define CTL_HEXPAT 0x00010000u /* Same word as USE_LENGTH */
471 #define CTL_INFO 0x00020000u
472 #define CTL_JITFAST 0x00040000u
473 #define CTL_JITVERIFY 0x00080000u
474 #define CTL_MARK 0x00100000u
475 #define CTL_MEMORY 0x00200000u
476 #define CTL_NULLCONTEXT 0x00400000u
477 #define CTL_POSIX 0x00800000u
478 #define CTL_POSIX_NOSUB 0x01000000u
479 #define CTL_PUSH 0x02000000u /* These three must be */
480 #define CTL_PUSHCOPY 0x04000000u /* all in the same */
481 #define CTL_PUSHTABLESCOPY 0x08000000u /* word. */
482 #define CTL_STARTCHAR 0x10000000u
483 #define CTL_USE_LENGTH 0x20000000u /* Same word as HEXPAT */
484 #define CTL_UTF8_INPUT 0x40000000u
485 #define CTL_ZERO_TERMINATE 0x80000000u
486
487 /* Combinations */
488
489 #define CTL_DEBUG (CTL_FULLBINCODE|CTL_INFO) /* For setting */
490 #define CTL_ANYINFO (CTL_DEBUG|CTL_BINCODE|CTL_CALLOUT_INFO)
491 #define CTL_ANYGLOB (CTL_ALTGLOBAL|CTL_GLOBAL)
492
493 /* Second control word */
494
495 #define CTL2_SUBSTITUTE_CALLOUT 0x00000001u
496 #define CTL2_SUBSTITUTE_EXTENDED 0x00000002u
497 #define CTL2_SUBSTITUTE_OVERFLOW_LENGTH 0x00000004u
498 #define CTL2_SUBSTITUTE_UNKNOWN_UNSET 0x00000008u
499 #define CTL2_SUBSTITUTE_UNSET_EMPTY 0x00000010u
500 #define CTL2_SUBJECT_LITERAL 0x00000020u
501 #define CTL2_CALLOUT_NO_WHERE 0x00000040u
502 #define CTL2_CALLOUT_EXTRA 0x00000080u
503 #define CTL2_ALLVECTOR 0x00000100u
504
505 #define CTL2_NL_SET 0x40000000u /* Informational */
506 #define CTL2_BSR_SET 0x80000000u /* Informational */
507
508 /* These are the matching controls that may be set either on a pattern or on a
509 data line. They are copied from the pattern controls as initial settings for
510 data line controls. Note that CTL_MEMORY is not included here, because it does
511 different things in the two cases. */
512
513 #define CTL_ALLPD (CTL_AFTERTEXT|\
514 CTL_ALLAFTERTEXT|\
515 CTL_ALLCAPTURES|\
516 CTL_ALLUSEDTEXT|\
517 CTL_ALTGLOBAL|\
518 CTL_GLOBAL|\
519 CTL_MARK|\
520 CTL_STARTCHAR|\
521 CTL_UTF8_INPUT)
522
523 #define CTL2_ALLPD (CTL2_SUBSTITUTE_CALLOUT|\
524 CTL2_SUBSTITUTE_EXTENDED|\
525 CTL2_SUBSTITUTE_OVERFLOW_LENGTH|\
526 CTL2_SUBSTITUTE_UNKNOWN_UNSET|\
527 CTL2_SUBSTITUTE_UNSET_EMPTY|\
528 CTL2_ALLVECTOR)
529
530 /* Structures for holding modifier information for patterns and subject strings
531 (data). Fields containing modifiers that can be set either for a pattern or a
532 subject must be at the start and in the same order in both cases so that the
533 same offset in the big table below works for both. */
534
535 typedef struct patctl { /* Structure for pattern modifiers. */
536 uint32_t options; /* Must be in same position as datctl */
537 uint32_t control; /* Must be in same position as datctl */
538 uint32_t control2; /* Must be in same position as datctl */
539 uint32_t jitstack; /* Must be in same position as datctl */
540 uint8_t replacement[REPLACE_MODSIZE]; /* So must this */
541 uint32_t substitute_skip; /* Must be in same position as patctl */
542 uint32_t substitute_stop; /* Must be in same position as patctl */
543 uint32_t jit;
544 uint32_t stackguard_test;
545 uint32_t tables_id;
546 uint32_t convert_type;
547 uint32_t convert_length;
548 uint32_t convert_glob_escape;
549 uint32_t convert_glob_separator;
550 uint32_t regerror_buffsize;
551 uint8_t locale[LOCALESIZE];
552 } patctl;
553
554 #define MAXCPYGET 10
555 #define LENCPYGET 64
556
557 typedef struct datctl { /* Structure for data line modifiers. */
558 uint32_t options; /* Must be in same position as patctl */
559 uint32_t control; /* Must be in same position as patctl */
560 uint32_t control2; /* Must be in same position as patctl */
561 uint32_t jitstack; /* Must be in same position as patctl */
562 uint8_t replacement[REPLACE_MODSIZE]; /* So must this */
563 uint32_t substitute_skip; /* Must be in same position as patctl */
564 uint32_t substitute_stop; /* Must be in same position as patctl */
565 uint32_t startend[2];
566 uint32_t cerror[2];
567 uint32_t cfail[2];
568 int32_t callout_data;
569 int32_t copy_numbers[MAXCPYGET];
570 int32_t get_numbers[MAXCPYGET];
571 uint32_t oveccount;
572 uint32_t offset;
573 uint8_t copy_names[LENCPYGET];
574 uint8_t get_names[LENCPYGET];
575 } datctl;
576
577 /* Ids for which context to modify. */
578
579 enum { CTX_PAT, /* Active pattern context */
580 CTX_POPPAT, /* Ditto, for a popped pattern */
581 CTX_DEFPAT, /* Default pattern context */
582 CTX_DAT, /* Active data (match) context */
583 CTX_DEFDAT }; /* Default data (match) context */
584
585 /* Macros to simplify the big table below. */
586
587 #define CO(name) offsetof(PCRE2_REAL_COMPILE_CONTEXT, name)
588 #define MO(name) offsetof(PCRE2_REAL_MATCH_CONTEXT, name)
589 #define PO(name) offsetof(patctl, name)
590 #define PD(name) PO(name)
591 #define DO(name) offsetof(datctl, name)
592
593 /* Table of all long-form modifiers. Must be in collating sequence of modifier
594 name because it is searched by binary chop. */
595
596 typedef struct modstruct {
597 const char *name;
598 uint16_t which;
599 uint16_t type;
600 uint32_t value;
601 PCRE2_SIZE offset;
602 } modstruct;
603
604 static modstruct modlist[] = {
605 { "aftertext", MOD_PNDP, MOD_CTL, CTL_AFTERTEXT, PO(control) },
606 { "allaftertext", MOD_PNDP, MOD_CTL, CTL_ALLAFTERTEXT, PO(control) },
607 { "allcaptures", MOD_PND, MOD_CTL, CTL_ALLCAPTURES, PO(control) },
608 { "allow_empty_class", MOD_PAT, MOD_OPT, PCRE2_ALLOW_EMPTY_CLASS, PO(options) },
609 { "allow_surrogate_escapes", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES, CO(extra_options) },
610 { "allusedtext", MOD_PNDP, MOD_CTL, CTL_ALLUSEDTEXT, PO(control) },
611 { "allvector", MOD_PND, MOD_CTL, CTL2_ALLVECTOR, PO(control2) },
612 { "alt_bsux", MOD_PAT, MOD_OPT, PCRE2_ALT_BSUX, PO(options) },
613 { "alt_circumflex", MOD_PAT, MOD_OPT, PCRE2_ALT_CIRCUMFLEX, PO(options) },
614 { "alt_verbnames", MOD_PAT, MOD_OPT, PCRE2_ALT_VERBNAMES, PO(options) },
615 { "altglobal", MOD_PND, MOD_CTL, CTL_ALTGLOBAL, PO(control) },
616 { "anchored", MOD_PD, MOD_OPT, PCRE2_ANCHORED, PD(options) },
617 { "auto_callout", MOD_PAT, MOD_OPT, PCRE2_AUTO_CALLOUT, PO(options) },
618 { "bad_escape_is_literal", MOD_CTC, MOD_OPT, PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL, CO(extra_options) },
619 { "bincode", MOD_PAT, MOD_CTL, CTL_BINCODE, PO(control) },
620 { "bsr", MOD_CTC, MOD_BSR, 0, CO(bsr_convention) },
621 { "callout_capture", MOD_DAT, MOD_CTL, CTL_CALLOUT_CAPTURE, DO(control) },
622 { "callout_data", MOD_DAT, MOD_INS, 0, DO(callout_data) },
623 { "callout_error", MOD_DAT, MOD_IN2, 0, DO(cerror) },
624 { "callout_extra", MOD_DAT, MOD_CTL, CTL2_CALLOUT_EXTRA, DO(control2) },
625 { "callout_fail", MOD_DAT, MOD_IN2, 0, DO(cfail) },
626 { "callout_info", MOD_PAT, MOD_CTL, CTL_CALLOUT_INFO, PO(control) },
627 { "callout_no_where", MOD_DAT, MOD_CTL, CTL2_CALLOUT_NO_WHERE, DO(control2) },
628 { "callout_none", MOD_DAT, MOD_CTL, CTL_CALLOUT_NONE, DO(control) },
629 { "caseless", MOD_PATP, MOD_OPT, PCRE2_CASELESS, PO(options) },
630 { "convert", MOD_PAT, MOD_CON, 0, PO(convert_type) },
631 { "convert_glob_escape", MOD_PAT, MOD_CHR, 0, PO(convert_glob_escape) },
632 { "convert_glob_separator", MOD_PAT, MOD_CHR, 0, PO(convert_glob_separator) },
633 { "convert_length", MOD_PAT, MOD_INT, 0, PO(convert_length) },
634 { "copy", MOD_DAT, MOD_NN, DO(copy_numbers), DO(copy_names) },
635 { "copy_matched_subject", MOD_DAT, MOD_OPT, PCRE2_COPY_MATCHED_SUBJECT, DO(options) },
636 { "debug", MOD_PAT, MOD_CTL, CTL_DEBUG, PO(control) },
637 { "depth_limit", MOD_CTM, MOD_INT, 0, MO(depth_limit) },
638 { "dfa", MOD_DAT, MOD_CTL, CTL_DFA, DO(control) },
639 { "dfa_restart", MOD_DAT, MOD_OPT, PCRE2_DFA_RESTART, DO(options) },
640 { "dfa_shortest", MOD_DAT, MOD_OPT, PCRE2_DFA_SHORTEST, DO(options) },
641 { "dollar_endonly", MOD_PAT, MOD_OPT, PCRE2_DOLLAR_ENDONLY, PO(options) },
642 { "dotall", MOD_PATP, MOD_OPT, PCRE2_DOTALL, PO(options) },
643 { "dupnames", MOD_PATP, MOD_OPT, PCRE2_DUPNAMES, PO(options) },
644 { "endanchored", MOD_PD, MOD_OPT, PCRE2_ENDANCHORED, PD(options) },
645 { "escaped_cr_is_lf", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ESCAPED_CR_IS_LF, CO(extra_options) },
646 { "expand", MOD_PAT, MOD_CTL, CTL_EXPAND, PO(control) },
647 { "extended", MOD_PATP, MOD_OPT, PCRE2_EXTENDED, PO(options) },
648 { "extended_more", MOD_PATP, MOD_OPT, PCRE2_EXTENDED_MORE, PO(options) },
649 { "extra_alt_bsux", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ALT_BSUX, CO(extra_options) },
650 { "find_limits", MOD_DAT, MOD_CTL, CTL_FINDLIMITS, DO(control) },
651 { "firstline", MOD_PAT, MOD_OPT, PCRE2_FIRSTLINE, PO(options) },
652 { "framesize", MOD_PAT, MOD_CTL, CTL_FRAMESIZE, PO(control) },
653 { "fullbincode", MOD_PAT, MOD_CTL, CTL_FULLBINCODE, PO(control) },
654 { "get", MOD_DAT, MOD_NN, DO(get_numbers), DO(get_names) },
655 { "getall", MOD_DAT, MOD_CTL, CTL_GETALL, DO(control) },
656 { "global", MOD_PNDP, MOD_CTL, CTL_GLOBAL, PO(control) },
657 { "heap_limit", MOD_CTM, MOD_INT, 0, MO(heap_limit) },
658 { "hex", MOD_PAT, MOD_CTL, CTL_HEXPAT, PO(control) },
659 { "info", MOD_PAT, MOD_CTL, CTL_INFO, PO(control) },
660 { "jit", MOD_PAT, MOD_IND, 7, PO(jit) },
661 { "jitfast", MOD_PAT, MOD_CTL, CTL_JITFAST, PO(control) },
662 { "jitstack", MOD_PNDP, MOD_INT, 0, PO(jitstack) },
663 { "jitverify", MOD_PAT, MOD_CTL, CTL_JITVERIFY, PO(control) },
664 { "literal", MOD_PAT, MOD_OPT, PCRE2_LITERAL, PO(options) },
665 { "locale", MOD_PAT, MOD_STR, LOCALESIZE, PO(locale) },
666 { "mark", MOD_PNDP, MOD_CTL, CTL_MARK, PO(control) },
667 { "match_limit", MOD_CTM, MOD_INT, 0, MO(match_limit) },
668 { "match_line", MOD_CTC, MOD_OPT, PCRE2_EXTRA_MATCH_LINE, CO(extra_options) },
669 { "match_unset_backref", MOD_PAT, MOD_OPT, PCRE2_MATCH_UNSET_BACKREF, PO(options) },
670 { "match_word", MOD_CTC, MOD_OPT, PCRE2_EXTRA_MATCH_WORD, CO(extra_options) },
671 { "max_pattern_length", MOD_CTC, MOD_SIZ, 0, CO(max_pattern_length) },
672 { "memory", MOD_PD, MOD_CTL, CTL_MEMORY, PD(control) },
673 { "multiline", MOD_PATP, MOD_OPT, PCRE2_MULTILINE, PO(options) },
674 { "never_backslash_c", MOD_PAT, MOD_OPT, PCRE2_NEVER_BACKSLASH_C, PO(options) },
675 { "never_ucp", MOD_PAT, MOD_OPT, PCRE2_NEVER_UCP, PO(options) },
676 { "never_utf", MOD_PAT, MOD_OPT, PCRE2_NEVER_UTF, PO(options) },
677 { "newline", MOD_CTC, MOD_NL, 0, CO(newline_convention) },
678 { "no_auto_capture", MOD_PAT, MOD_OPT, PCRE2_NO_AUTO_CAPTURE, PO(options) },
679 { "no_auto_possess", MOD_PATP, MOD_OPT, PCRE2_NO_AUTO_POSSESS, PO(options) },
680 { "no_dotstar_anchor", MOD_PAT, MOD_OPT, PCRE2_NO_DOTSTAR_ANCHOR, PO(options) },
681 { "no_jit", MOD_DAT, MOD_OPT, PCRE2_NO_JIT, DO(options) },
682 { "no_start_optimize", MOD_PATP, MOD_OPT, PCRE2_NO_START_OPTIMIZE, PO(options) },
683 { "no_utf_check", MOD_PD, MOD_OPT, PCRE2_NO_UTF_CHECK, PD(options) },
684 { "notbol", MOD_DAT, MOD_OPT, PCRE2_NOTBOL, DO(options) },
685 { "notempty", MOD_DAT, MOD_OPT, PCRE2_NOTEMPTY, DO(options) },
686 { "notempty_atstart", MOD_DAT, MOD_OPT, PCRE2_NOTEMPTY_ATSTART, DO(options) },
687 { "noteol", MOD_DAT, MOD_OPT, PCRE2_NOTEOL, DO(options) },
688 { "null_context", MOD_PD, MOD_CTL, CTL_NULLCONTEXT, PO(control) },
689 { "offset", MOD_DAT, MOD_INT, 0, DO(offset) },
690 { "offset_limit", MOD_CTM, MOD_SIZ, 0, MO(offset_limit)},
691 { "ovector", MOD_DAT, MOD_INT, 0, DO(oveccount) },
692 { "parens_nest_limit", MOD_CTC, MOD_INT, 0, CO(parens_nest_limit) },
693 { "partial_hard", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_HARD, DO(options) },
694 { "partial_soft", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_SOFT, DO(options) },
695 { "ph", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_HARD, DO(options) },
696 { "posix", MOD_PAT, MOD_CTL, CTL_POSIX, PO(control) },
697 { "posix_nosub", MOD_PAT, MOD_CTL, CTL_POSIX|CTL_POSIX_NOSUB, PO(control) },
698 { "posix_startend", MOD_DAT, MOD_IN2, 0, DO(startend) },
699 { "ps", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_SOFT, DO(options) },
700 { "push", MOD_PAT, MOD_CTL, CTL_PUSH, PO(control) },
701 { "pushcopy", MOD_PAT, MOD_CTL, CTL_PUSHCOPY, PO(control) },
702 { "pushtablescopy", MOD_PAT, MOD_CTL, CTL_PUSHTABLESCOPY, PO(control) },
703 { "recursion_limit", MOD_CTM, MOD_INT, 0, MO(depth_limit) }, /* Obsolete synonym */
704 { "regerror_buffsize", MOD_PAT, MOD_INT, 0, PO(regerror_buffsize) },
705 { "replace", MOD_PND, MOD_STR, REPLACE_MODSIZE, PO(replacement) },
706 { "stackguard", MOD_PAT, MOD_INT, 0, PO(stackguard_test) },
707 { "startchar", MOD_PND, MOD_CTL, CTL_STARTCHAR, PO(control) },
708 { "startoffset", MOD_DAT, MOD_INT, 0, DO(offset) },
709 { "subject_literal", MOD_PATP, MOD_CTL, CTL2_SUBJECT_LITERAL, PO(control2) },
710 { "substitute_callout", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_CALLOUT, PO(control2) },
711 { "substitute_extended", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_EXTENDED, PO(control2) },
712 { "substitute_overflow_length", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_OVERFLOW_LENGTH, PO(control2) },
713 { "substitute_skip", MOD_PND, MOD_INT, 0, PO(substitute_skip) },
714 { "substitute_stop", MOD_PND, MOD_INT, 0, PO(substitute_stop) },
715 { "substitute_unknown_unset", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_UNKNOWN_UNSET, PO(control2) },
716 { "substitute_unset_empty", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_UNSET_EMPTY, PO(control2) },
717 { "tables", MOD_PAT, MOD_INT, 0, PO(tables_id) },
718 { "ucp", MOD_PATP, MOD_OPT, PCRE2_UCP, PO(options) },
719 { "ungreedy", MOD_PAT, MOD_OPT, PCRE2_UNGREEDY, PO(options) },
720 { "use_length", MOD_PAT, MOD_CTL, CTL_USE_LENGTH, PO(control) },
721 { "use_offset_limit", MOD_PAT, MOD_OPT, PCRE2_USE_OFFSET_LIMIT, PO(options) },
722 { "utf", MOD_PATP, MOD_OPT, PCRE2_UTF, PO(options) },
723 { "utf8_input", MOD_PAT, MOD_CTL, CTL_UTF8_INPUT, PO(control) },
724 { "zero_terminate", MOD_DAT, MOD_CTL, CTL_ZERO_TERMINATE, DO(control) }
725 };
726
727 #define MODLISTCOUNT sizeof(modlist)/sizeof(modstruct)
728
729 /* Controls and options that are supported for use with the POSIX interface. */
730
731 #define POSIX_SUPPORTED_COMPILE_OPTIONS ( \
732 PCRE2_CASELESS|PCRE2_DOTALL|PCRE2_LITERAL|PCRE2_MULTILINE|PCRE2_UCP| \
733 PCRE2_UTF|PCRE2_UNGREEDY)
734
735 #define POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS (0)
736
737 #define POSIX_SUPPORTED_COMPILE_CONTROLS ( \
738 CTL_AFTERTEXT|CTL_ALLAFTERTEXT|CTL_EXPAND|CTL_HEXPAT|CTL_POSIX| \
739 CTL_POSIX_NOSUB|CTL_USE_LENGTH)
740
741 #define POSIX_SUPPORTED_COMPILE_CONTROLS2 (0)
742
743 #define POSIX_SUPPORTED_MATCH_OPTIONS ( \
744 PCRE2_NOTBOL|PCRE2_NOTEMPTY|PCRE2_NOTEOL)
745
746 #define POSIX_SUPPORTED_MATCH_CONTROLS (CTL_AFTERTEXT|CTL_ALLAFTERTEXT)
747 #define POSIX_SUPPORTED_MATCH_CONTROLS2 (0)
748
749 /* Control bits that are not ignored with 'push'. */
750
751 #define PUSH_SUPPORTED_COMPILE_CONTROLS ( \
752 CTL_BINCODE|CTL_CALLOUT_INFO|CTL_FULLBINCODE|CTL_HEXPAT|CTL_INFO| \
753 CTL_JITVERIFY|CTL_MEMORY|CTL_FRAMESIZE|CTL_PUSH|CTL_PUSHCOPY| \
754 CTL_PUSHTABLESCOPY|CTL_USE_LENGTH)
755
756 #define PUSH_SUPPORTED_COMPILE_CONTROLS2 (CTL2_BSR_SET|CTL2_NL_SET)
757
758 /* Controls that apply only at compile time with 'push'. */
759
760 #define PUSH_COMPILE_ONLY_CONTROLS CTL_JITVERIFY
761 #define PUSH_COMPILE_ONLY_CONTROLS2 (0)
762
763 /* Controls that are forbidden with #pop or #popcopy. */
764
765 #define NOTPOP_CONTROLS (CTL_HEXPAT|CTL_POSIX|CTL_POSIX_NOSUB|CTL_PUSH| \
766 CTL_PUSHCOPY|CTL_PUSHTABLESCOPY|CTL_USE_LENGTH)
767
768 /* Pattern controls that are mutually exclusive. At present these are all in
769 the first control word. Note that CTL_POSIX_NOSUB is always accompanied by
770 CTL_POSIX, so it doesn't need its own entries. */
771
772 static uint32_t exclusive_pat_controls[] = {
773 CTL_POSIX | CTL_PUSH,
774 CTL_POSIX | CTL_PUSHCOPY,
775 CTL_POSIX | CTL_PUSHTABLESCOPY,
776 CTL_PUSH | CTL_PUSHCOPY,
777 CTL_PUSH | CTL_PUSHTABLESCOPY,
778 CTL_PUSHCOPY | CTL_PUSHTABLESCOPY,
779 CTL_EXPAND | CTL_HEXPAT };
780
781 /* Data controls that are mutually exclusive. At present these are all in the
782 first control word. */
783
784 static uint32_t exclusive_dat_controls[] = {
785 CTL_ALLUSEDTEXT | CTL_STARTCHAR,
786 CTL_FINDLIMITS | CTL_NULLCONTEXT };
787
788 /* Table of single-character abbreviated modifiers. The index field is
789 initialized to -1, but the first time the modifier is encountered, it is filled
790 in with the index of the full entry in modlist, to save repeated searching when
791 processing multiple test items. This short list is searched serially, so its
792 order does not matter. */
793
794 typedef struct c1modstruct {
795 const char *fullname;
796 uint32_t onechar;
797 int index;
798 } c1modstruct;
799
800 static c1modstruct c1modlist[] = {
801 { "bincode", 'B', -1 },
802 { "info", 'I', -1 },
803 { "global", 'g', -1 },
804 { "caseless", 'i', -1 },
805 { "multiline", 'm', -1 },
806 { "no_auto_capture", 'n', -1 },
807 { "dotall", 's', -1 },
808 { "extended", 'x', -1 }
809 };
810
811 #define C1MODLISTCOUNT sizeof(c1modlist)/sizeof(c1modstruct)
812
813 /* Table of arguments for the -C command line option. Use macros to make the
814 table itself easier to read. */
815
816 #if defined SUPPORT_PCRE2_8
817 #define SUPPORT_8 1
818 #endif
819 #if defined SUPPORT_PCRE2_16
820 #define SUPPORT_16 1
821 #endif
822 #if defined SUPPORT_PCRE2_32
823 #define SUPPORT_32 1
824 #endif
825
826 #ifndef SUPPORT_8
827 #define SUPPORT_8 0
828 #endif
829 #ifndef SUPPORT_16
830 #define SUPPORT_16 0
831 #endif
832 #ifndef SUPPORT_32
833 #define SUPPORT_32 0
834 #endif
835
836 #ifdef EBCDIC
837 #define SUPPORT_EBCDIC 1
838 #define EBCDIC_NL CHAR_LF
839 #else
840 #define SUPPORT_EBCDIC 0
841 #define EBCDIC_NL 0
842 #endif
843
844 #ifdef NEVER_BACKSLASH_C
845 #define BACKSLASH_C 0
846 #else
847 #define BACKSLASH_C 1
848 #endif
849
850 typedef struct coptstruct {
851 const char *name;
852 uint32_t type;
853 uint32_t value;
854 } coptstruct;
855
856 enum { CONF_BSR,
857 CONF_FIX,
858 CONF_FIZ,
859 CONF_INT,
860 CONF_NL
861 };
862
863 static coptstruct coptlist[] = {
864 { "backslash-C", CONF_FIX, BACKSLASH_C },
865 { "bsr", CONF_BSR, PCRE2_CONFIG_BSR },
866 { "ebcdic", CONF_FIX, SUPPORT_EBCDIC },
867 { "ebcdic-nl", CONF_FIZ, EBCDIC_NL },
868 { "jit", CONF_INT, PCRE2_CONFIG_JIT },
869 { "linksize", CONF_INT, PCRE2_CONFIG_LINKSIZE },
870 { "newline", CONF_NL, PCRE2_CONFIG_NEWLINE },
871 { "pcre2-16", CONF_FIX, SUPPORT_16 },
872 { "pcre2-32", CONF_FIX, SUPPORT_32 },
873 { "pcre2-8", CONF_FIX, SUPPORT_8 },
874 { "unicode", CONF_INT, PCRE2_CONFIG_UNICODE }
875 };
876
877 #define COPTLISTCOUNT sizeof(coptlist)/sizeof(coptstruct)
878
879 #undef SUPPORT_8
880 #undef SUPPORT_16
881 #undef SUPPORT_32
882 #undef SUPPORT_EBCDIC
883
884
885 /* ----------------------- Static variables ------------------------ */
886
887 static FILE *infile;
888 static FILE *outfile;
889
890 static const void *last_callout_mark;
891 static PCRE2_JIT_STACK *jit_stack = NULL;
892 static size_t jit_stack_size = 0;
893
894 static BOOL first_callout;
895 static BOOL jit_was_used;
896 static BOOL restrict_for_perl_test = FALSE;
897 static BOOL show_memory = FALSE;
898
899 static int code_unit_size; /* Bytes */
900 static int jitrc; /* Return from JIT compile */
901 static int test_mode = DEFAULT_TEST_MODE;
902 static int timeit = 0;
903 static int timeitm = 0;
904
905 clock_t total_compile_time = 0;
906 clock_t total_jit_compile_time = 0;
907 clock_t total_match_time = 0;
908
909 static uint32_t dfa_matched;
910 static uint32_t forbid_utf = 0;
911 static uint32_t maxlookbehind;
912 static uint32_t max_oveccount;
913 static uint32_t callout_count;
914 static uint32_t maxcapcount;
915
916 static uint16_t local_newline_default = 0;
917
918 static VERSION_TYPE jittarget[VERSION_SIZE];
919 static VERSION_TYPE version[VERSION_SIZE];
920 static VERSION_TYPE uversion[VERSION_SIZE];
921
922 static patctl def_patctl;
923 static patctl pat_patctl;
924 static datctl def_datctl;
925 static datctl dat_datctl;
926
927 static void *patstack[PATSTACKSIZE];
928 static int patstacknext = 0;
929
930 static void *malloclist[MALLOCLISTSIZE];
931 static PCRE2_SIZE malloclistlength[MALLOCLISTSIZE];
932 static uint32_t malloclistptr = 0;
933
934 #ifdef SUPPORT_PCRE2_8
935 static regex_t preg = { NULL, NULL, 0, 0, 0, 0 };
936 #endif
937
938 static int *dfa_workspace = NULL;
939 static const uint8_t *locale_tables = NULL;
940 static const uint8_t *use_tables = NULL;
941 static uint8_t locale_name[32];
942
943 /* We need buffers for building 16/32-bit strings; 8-bit strings don't need
944 rebuilding, but set up the same naming scheme for use in macros. The "buffer"
945 buffer is where all input lines are read. Its size is the same as pbuffer8.
946 Pattern lines are always copied to pbuffer8 for use in callouts, even if they
947 are actually compiled from pbuffer16 or pbuffer32. */
948
949 static size_t pbuffer8_size = 50000; /* Initial size, bytes */
950 static uint8_t *pbuffer8 = NULL;
951 static uint8_t *buffer = NULL;
952
953 /* The dbuffer is where all processed data lines are put. In non-8-bit modes it
954 is cast as needed. For long data lines it grows as necessary. */
955
956 static size_t dbuffer_size = 1u << 14; /* Initial size, bytes */
957 static uint8_t *dbuffer = NULL;
958
959
960 /* ---------------- Mode-dependent variables -------------------*/
961
962 #ifdef SUPPORT_PCRE2_8
963 static pcre2_code_8 *compiled_code8;
964 static pcre2_general_context_8 *general_context8, *general_context_copy8;
965 static pcre2_compile_context_8 *pat_context8, *default_pat_context8;
966 static pcre2_convert_context_8 *con_context8, *default_con_context8;
967 static pcre2_match_context_8 *dat_context8, *default_dat_context8;
968 static pcre2_match_data_8 *match_data8;
969 #endif
970
971 #ifdef SUPPORT_PCRE2_16
972 static pcre2_code_16 *compiled_code16;
973 static pcre2_general_context_16 *general_context16, *general_context_copy16;
974 static pcre2_compile_context_16 *pat_context16, *default_pat_context16;
975 static pcre2_convert_context_16 *con_context16, *default_con_context16;
976 static pcre2_match_context_16 *dat_context16, *default_dat_context16;
977 static pcre2_match_data_16 *match_data16;
978 static PCRE2_SIZE pbuffer16_size = 0; /* Set only when needed */
979 static uint16_t *pbuffer16 = NULL;
980 #endif
981
982 #ifdef SUPPORT_PCRE2_32
983 static pcre2_code_32 *compiled_code32;
984 static pcre2_general_context_32 *general_context32, *general_context_copy32;
985 static pcre2_compile_context_32 *pat_context32, *default_pat_context32;
986 static pcre2_convert_context_32 *con_context32, *default_con_context32;
987 static pcre2_match_context_32 *dat_context32, *default_dat_context32;
988 static pcre2_match_data_32 *match_data32;
989 static PCRE2_SIZE pbuffer32_size = 0; /* Set only when needed */
990 static uint32_t *pbuffer32 = NULL;
991 #endif
992
993
994 /* ---------------- Macros that work in all modes ----------------- */
995
996 #define CAST8VAR(x) CASTVAR(uint8_t *, x)
997 #define SET(x,y) SETOP(x,y,=)
998 #define SETPLUS(x,y) SETOP(x,y,+=)
999 #define strlen8(x) strlen((char *)x)
1000
1001
1002 /* ---------------- Mode-dependent, runtime-testing macros ------------------*/
1003
1004 /* Define macros for variables and functions that must be selected dynamically
1005 depending on the mode setting (8, 16, 32). These are dependent on which modes
1006 are supported. */
1007
1008 #if (defined (SUPPORT_PCRE2_8) + defined (SUPPORT_PCRE2_16) + \
1009 defined (SUPPORT_PCRE2_32)) >= 2
1010
1011 /* ----- All three modes supported ----- */
1012
1013 #if defined(SUPPORT_PCRE2_8) && defined(SUPPORT_PCRE2_16) && defined(SUPPORT_PCRE2_32)
1014
1015 #define CASTFLD(t,a,b) ((test_mode == PCRE8_MODE)? (t)(G(a,8)->b) : \
1016 (test_mode == PCRE16_MODE)? (t)(G(a,16)->b) : (t)(G(a,32)->b))
1017
1018 #define CASTVAR(t,x) ( \
1019 (test_mode == PCRE8_MODE)? (t)G(x,8) : \
1020 (test_mode == PCRE16_MODE)? (t)G(x,16) : (t)G(x,32))
1021
1022 #define CODE_UNIT(a,b) ( \
1023 (test_mode == PCRE8_MODE)? (uint32_t)(((PCRE2_SPTR8)(a))[b]) : \
1024 (test_mode == PCRE16_MODE)? (uint32_t)(((PCRE2_SPTR16)(a))[b]) : \
1025 (uint32_t)(((PCRE2_SPTR32)(a))[b]))
1026
1027 #define CONCTXCPY(a,b) \
1028 if (test_mode == PCRE8_MODE) \
1029 memcpy(G(a,8),G(b,8),sizeof(pcre2_convert_context_8)); \
1030 else if (test_mode == PCRE16_MODE) \
1031 memcpy(G(a,16),G(b,16),sizeof(pcre2_convert_context_16)); \
1032 else memcpy(G(a,32),G(b,32),sizeof(pcre2_convert_context_32))
1033
1034 #define CONVERT_COPY(a,b,c) \
1035 if (test_mode == PCRE8_MODE) \
1036 memcpy(G(a,8),(char *)b,c); \
1037 else if (test_mode == PCRE16_MODE) \
1038 memcpy(G(a,16),(char *)b,(c)*2); \
1039 else if (test_mode == PCRE32_MODE) \
1040 memcpy(G(a,32),(char *)b,(c)*4)
1041
1042 #define DATCTXCPY(a,b) \
1043 if (test_mode == PCRE8_MODE) \
1044 memcpy(G(a,8),G(b,8),sizeof(pcre2_match_context_8)); \
1045 else if (test_mode == PCRE16_MODE) \
1046 memcpy(G(a,16),G(b,16),sizeof(pcre2_match_context_16)); \
1047 else memcpy(G(a,32),G(b,32),sizeof(pcre2_match_context_32))
1048
1049 #define FLD(a,b) ((test_mode == PCRE8_MODE)? G(a,8)->b : \
1050 (test_mode == PCRE16_MODE)? G(a,16)->b : G(a,32)->b)
1051
1052 #define PATCTXCPY(a,b) \
1053 if (test_mode == PCRE8_MODE) \
1054 memcpy(G(a,8),G(b,8),sizeof(pcre2_compile_context_8)); \
1055 else if (test_mode == PCRE16_MODE) \
1056 memcpy(G(a,16),G(b,16),sizeof(pcre2_compile_context_16)); \
1057 else memcpy(G(a,32),G(b,32),sizeof(pcre2_compile_context_32))
1058
1059 #define PCHARS(lv, p, offset, len, utf, f) \
1060 if (test_mode == PCRE32_MODE) \
1061 lv = pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f); \
1062 else if (test_mode == PCRE16_MODE) \
1063 lv = pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f); \
1064 else \
1065 lv = pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
1066
1067 #define PCHARSV(p, offset, len, utf, f) \
1068 if (test_mode == PCRE32_MODE) \
1069 (void)pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f); \
1070 else if (test_mode == PCRE16_MODE) \
1071 (void)pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f); \
1072 else \
1073 (void)pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
1074
1075 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
1076 if (test_mode == PCRE8_MODE) \
1077 a = pcre2_callout_enumerate_8(compiled_code8, \
1078 (int (*)(struct pcre2_callout_enumerate_block_8 *, void *))b,c); \
1079 else if (test_mode == PCRE16_MODE) \
1080 a = pcre2_callout_enumerate_16(compiled_code16, \
1081 (int(*)(struct pcre2_callout_enumerate_block_16 *, void *))b,c); \
1082 else \
1083 a = pcre2_callout_enumerate_32(compiled_code32, \
1084 (int (*)(struct pcre2_callout_enumerate_block_32 *, void *))b,c)
1085
1086 #define PCRE2_CODE_COPY_FROM_VOID(a,b) \
1087 if (test_mode == PCRE8_MODE) \
1088 G(a,8) = pcre2_code_copy_8(b); \
1089 else if (test_mode == PCRE16_MODE) \
1090 G(a,16) = pcre2_code_copy_16(b); \
1091 else \
1092 G(a,32) = pcre2_code_copy_32(b)
1093
1094 #define PCRE2_CODE_COPY_TO_VOID(a,b) \
1095 if (test_mode == PCRE8_MODE) \
1096 a = (void *)pcre2_code_copy_8(G(b,8)); \
1097 else if (test_mode == PCRE16_MODE) \
1098 a = (void *)pcre2_code_copy_16(G(b,16)); \
1099 else \
1100 a = (void *)pcre2_code_copy_32(G(b,32))
1101
1102 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) \
1103 if (test_mode == PCRE8_MODE) \
1104 a = (void *)pcre2_code_copy_with_tables_8(G(b,8)); \
1105 else if (test_mode == PCRE16_MODE) \
1106 a = (void *)pcre2_code_copy_with_tables_16(G(b,16)); \
1107 else \
1108 a = (void *)pcre2_code_copy_with_tables_32(G(b,32))
1109
1110 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
1111 if (test_mode == PCRE8_MODE) \
1112 G(a,8) = pcre2_compile_8(G(b,8),c,d,e,f,g); \
1113 else if (test_mode == PCRE16_MODE) \
1114 G(a,16) = pcre2_compile_16(G(b,16),c,d,e,f,g); \
1115 else \
1116 G(a,32) = pcre2_compile_32(G(b,32),c,d,e,f,g)
1117
1118 #define PCRE2_CONVERTED_PATTERN_FREE(a) \
1119 if (test_mode == PCRE8_MODE) pcre2_converted_pattern_free_8((PCRE2_UCHAR8 *)a); \
1120 else if (test_mode == PCRE16_MODE) pcre2_converted_pattern_free_16((PCRE2_UCHAR16 *)a); \
1121 else pcre2_converted_pattern_free_32((PCRE2_UCHAR32 *)a)
1122
1123 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
1124 if (test_mode == PCRE8_MODE) \
1125 a = pcre2_dfa_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h,i,j); \
1126 else if (test_mode == PCRE16_MODE) \
1127 a = pcre2_dfa_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h,i,j); \
1128 else \
1129 a = pcre2_dfa_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h,i,j)
1130
1131 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
1132 if (test_mode == PCRE8_MODE) \
1133 r = pcre2_get_error_message_8(a,G(b,8),G(G(b,8),_size)); \
1134 else if (test_mode == PCRE16_MODE) \
1135 r = pcre2_get_error_message_16(a,G(b,16),G(G(b,16),_size/2)); \
1136 else \
1137 r = pcre2_get_error_message_32(a,G(b,32),G(G(b,32),_size/4))
1138
1139 #define PCRE2_GET_OVECTOR_COUNT(a,b) \
1140 if (test_mode == PCRE8_MODE) \
1141 a = pcre2_get_ovector_count_8(G(b,8)); \
1142 else if (test_mode == PCRE16_MODE) \
1143 a = pcre2_get_ovector_count_16(G(b,16)); \
1144 else \
1145 a = pcre2_get_ovector_count_32(G(b,32))
1146
1147 #define PCRE2_GET_STARTCHAR(a,b) \
1148 if (test_mode == PCRE8_MODE) \
1149 a = pcre2_get_startchar_8(G(b,8)); \
1150 else if (test_mode == PCRE16_MODE) \
1151 a = pcre2_get_startchar_16(G(b,16)); \
1152 else \
1153 a = pcre2_get_startchar_32(G(b,32))
1154
1155 #define PCRE2_JIT_COMPILE(r,a,b) \
1156 if (test_mode == PCRE8_MODE) r = pcre2_jit_compile_8(G(a,8),b); \
1157 else if (test_mode == PCRE16_MODE) r = pcre2_jit_compile_16(G(a,16),b); \
1158 else r = pcre2_jit_compile_32(G(a,32),b)
1159
1160 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) \
1161 if (test_mode == PCRE8_MODE) pcre2_jit_free_unused_memory_8(G(a,8)); \
1162 else if (test_mode == PCRE16_MODE) pcre2_jit_free_unused_memory_16(G(a,16)); \
1163 else pcre2_jit_free_unused_memory_32(G(a,32))
1164
1165 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
1166 if (test_mode == PCRE8_MODE) \
1167 a = pcre2_jit_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h); \
1168 else if (test_mode == PCRE16_MODE) \
1169 a = pcre2_jit_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h); \
1170 else \
1171 a = pcre2_jit_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
1172
1173 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
1174 if (test_mode == PCRE8_MODE) \
1175 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_8(b,c,d); \
1176 else if (test_mode == PCRE16_MODE) \
1177 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_16(b,c,d); \
1178 else \
1179 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_32(b,c,d);
1180
1181 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
1182 if (test_mode == PCRE8_MODE) \
1183 pcre2_jit_stack_assign_8(G(a,8),(pcre2_jit_callback_8)b,c); \
1184 else if (test_mode == PCRE16_MODE) \
1185 pcre2_jit_stack_assign_16(G(a,16),(pcre2_jit_callback_16)b,c); \
1186 else \
1187 pcre2_jit_stack_assign_32(G(a,32),(pcre2_jit_callback_32)b,c);
1188
1189 #define PCRE2_JIT_STACK_FREE(a) \
1190 if (test_mode == PCRE8_MODE) \
1191 pcre2_jit_stack_free_8((pcre2_jit_stack_8 *)a); \
1192 else if (test_mode == PCRE16_MODE) \
1193 pcre2_jit_stack_free_16((pcre2_jit_stack_16 *)a); \
1194 else \
1195 pcre2_jit_stack_free_32((pcre2_jit_stack_32 *)a);
1196
1197 #define PCRE2_MAKETABLES(a) \
1198 if (test_mode == PCRE8_MODE) a = pcre2_maketables_8(NULL); \
1199 else if (test_mode == PCRE16_MODE) a = pcre2_maketables_16(NULL); \
1200 else a = pcre2_maketables_32(NULL)
1201
1202 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
1203 if (test_mode == PCRE8_MODE) \
1204 a = pcre2_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h); \
1205 else if (test_mode == PCRE16_MODE) \
1206 a = pcre2_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h); \
1207 else \
1208 a = pcre2_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
1209
1210 #define PCRE2_MATCH_DATA_CREATE(a,b,c) \
1211 if (test_mode == PCRE8_MODE) \
1212 G(a,8) = pcre2_match_data_create_8(b,c); \
1213 else if (test_mode == PCRE16_MODE) \
1214 G(a,16) = pcre2_match_data_create_16(b,c); \
1215 else \
1216 G(a,32) = pcre2_match_data_create_32(b,c)
1217
1218 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
1219 if (test_mode == PCRE8_MODE) \
1220 G(a,8) = pcre2_match_data_create_from_pattern_8(G(b,8),c); \
1221 else if (test_mode == PCRE16_MODE) \
1222 G(a,16) = pcre2_match_data_create_from_pattern_16(G(b,16),c); \
1223 else \
1224 G(a,32) = pcre2_match_data_create_from_pattern_32(G(b,32),c)
1225
1226 #define PCRE2_MATCH_DATA_FREE(a) \
1227 if (test_mode == PCRE8_MODE) \
1228 pcre2_match_data_free_8(G(a,8)); \
1229 else if (test_mode == PCRE16_MODE) \
1230 pcre2_match_data_free_16(G(a,16)); \
1231 else \
1232 pcre2_match_data_free_32(G(a,32))
1233
1234 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) \
1235 if (test_mode == PCRE8_MODE) \
1236 a = pcre2_pattern_convert_8(G(b,8),c,d,(PCRE2_UCHAR8 **)e,f,G(g,8)); \
1237 else if (test_mode == PCRE16_MODE) \
1238 a = pcre2_pattern_convert_16(G(b,16),c,d,(PCRE2_UCHAR16 **)e,f,G(g,16)); \
1239 else \
1240 a = pcre2_pattern_convert_32(G(b,32),c,d,(PCRE2_UCHAR32 **)e,f,G(g,32))
1241
1242 #define PCRE2_PATTERN_INFO(a,b,c,d) \
1243 if (test_mode == PCRE8_MODE) \
1244 a = pcre2_pattern_info_8(G(b,8),c,d); \
1245 else if (test_mode == PCRE16_MODE) \
1246 a = pcre2_pattern_info_16(G(b,16),c,d); \
1247 else \
1248 a = pcre2_pattern_info_32(G(b,32),c,d)
1249
1250 #define PCRE2_PRINTINT(a) \
1251 if (test_mode == PCRE8_MODE) \
1252 pcre2_printint_8(compiled_code8,outfile,a); \
1253 else if (test_mode == PCRE16_MODE) \
1254 pcre2_printint_16(compiled_code16,outfile,a); \
1255 else \
1256 pcre2_printint_32(compiled_code32,outfile,a)
1257
1258 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
1259 if (test_mode == PCRE8_MODE) \
1260 r = pcre2_serialize_decode_8((pcre2_code_8 **)a,b,c,G(d,8)); \
1261 else if (test_mode == PCRE16_MODE) \
1262 r = pcre2_serialize_decode_16((pcre2_code_16 **)a,b,c,G(d,16)); \
1263 else \
1264 r = pcre2_serialize_decode_32((pcre2_code_32 **)a,b,c,G(d,32))
1265
1266 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
1267 if (test_mode == PCRE8_MODE) \
1268 r = pcre2_serialize_encode_8((const pcre2_code_8 **)a,b,c,d,G(e,8)); \
1269 else if (test_mode == PCRE16_MODE) \
1270 r = pcre2_serialize_encode_16((const pcre2_code_16 **)a,b,c,d,G(e,16)); \
1271 else \
1272 r = pcre2_serialize_encode_32((const pcre2_code_32 **)a,b,c,d,G(e,32))
1273
1274 #define PCRE2_SERIALIZE_FREE(a) \
1275 if (test_mode == PCRE8_MODE) \
1276 pcre2_serialize_free_8(a); \
1277 else if (test_mode == PCRE16_MODE) \
1278 pcre2_serialize_free_16(a); \
1279 else \
1280 pcre2_serialize_free_32(a)
1281
1282 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
1283 if (test_mode == PCRE8_MODE) \
1284 r = pcre2_serialize_get_number_of_codes_8(a); \
1285 else if (test_mode == PCRE16_MODE) \
1286 r = pcre2_serialize_get_number_of_codes_16(a); \
1287 else \
1288 r = pcre2_serialize_get_number_of_codes_32(a); \
1289
1290 #define PCRE2_SET_CALLOUT(a,b,c) \
1291 if (test_mode == PCRE8_MODE) \
1292 pcre2_set_callout_8(G(a,8),(int (*)(pcre2_callout_block_8 *, void *))b,c); \
1293 else if (test_mode == PCRE16_MODE) \
1294 pcre2_set_callout_16(G(a,16),(int (*)(pcre2_callout_block_16 *, void *))b,c); \
1295 else \
1296 pcre2_set_callout_32(G(a,32),(int (*)(pcre2_callout_block_32 *, void *))b,c);
1297
1298 #define PCRE2_SET_CHARACTER_TABLES(a,b) \
1299 if (test_mode == PCRE8_MODE) \
1300 pcre2_set_character_tables_8(G(a,8),b); \
1301 else if (test_mode == PCRE16_MODE) \
1302 pcre2_set_character_tables_16(G(a,16),b); \
1303 else \
1304 pcre2_set_character_tables_32(G(a,32),b)
1305
1306 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
1307 if (test_mode == PCRE8_MODE) \
1308 pcre2_set_compile_recursion_guard_8(G(a,8),b,c); \
1309 else if (test_mode == PCRE16_MODE) \
1310 pcre2_set_compile_recursion_guard_16(G(a,16),b,c); \
1311 else \
1312 pcre2_set_compile_recursion_guard_32(G(a,32),b,c)
1313
1314 #define PCRE2_SET_DEPTH_LIMIT(a,b) \
1315 if (test_mode == PCRE8_MODE) \
1316 pcre2_set_depth_limit_8(G(a,8),b); \
1317 else if (test_mode == PCRE16_MODE) \
1318 pcre2_set_depth_limit_16(G(a,16),b); \
1319 else \
1320 pcre2_set_depth_limit_32(G(a,32),b)
1321
1322 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) \
1323 if (test_mode == PCRE8_MODE) \
1324 r = pcre2_set_glob_separator_8(G(a,8),b); \
1325 else if (test_mode == PCRE16_MODE) \
1326 r = pcre2_set_glob_separator_16(G(a,16),b); \
1327 else \
1328 r = pcre2_set_glob_separator_32(G(a,32),b)
1329
1330 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) \
1331 if (test_mode == PCRE8_MODE) \
1332 r = pcre2_set_glob_escape_8(G(a,8),b); \
1333 else if (test_mode == PCRE16_MODE) \
1334 r = pcre2_set_glob_escape_16(G(a,16),b); \
1335 else \
1336 r = pcre2_set_glob_escape_32(G(a,32),b)
1337
1338 #define PCRE2_SET_HEAP_LIMIT(a,b) \
1339 if (test_mode == PCRE8_MODE) \
1340 pcre2_set_heap_limit_8(G(a,8),b); \
1341 else if (test_mode == PCRE16_MODE) \
1342 pcre2_set_heap_limit_16(G(a,16),b); \
1343 else \
1344 pcre2_set_heap_limit_32(G(a,32),b)
1345
1346 #define PCRE2_SET_MATCH_LIMIT(a,b) \
1347 if (test_mode == PCRE8_MODE) \
1348 pcre2_set_match_limit_8(G(a,8),b); \
1349 else if (test_mode == PCRE16_MODE) \
1350 pcre2_set_match_limit_16(G(a,16),b); \
1351 else \
1352 pcre2_set_match_limit_32(G(a,32),b)
1353
1354 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) \
1355 if (test_mode == PCRE8_MODE) \
1356 pcre2_set_max_pattern_length_8(G(a,8),b); \
1357 else if (test_mode == PCRE16_MODE) \
1358 pcre2_set_max_pattern_length_16(G(a,16),b); \
1359 else \
1360 pcre2_set_max_pattern_length_32(G(a,32),b)
1361
1362 #define PCRE2_SET_OFFSET_LIMIT(a,b) \
1363 if (test_mode == PCRE8_MODE) \
1364 pcre2_set_offset_limit_8(G(a,8),b); \
1365 else if (test_mode == PCRE16_MODE) \
1366 pcre2_set_offset_limit_16(G(a,16),b); \
1367 else \
1368 pcre2_set_offset_limit_32(G(a,32),b)
1369
1370 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) \
1371 if (test_mode == PCRE8_MODE) \
1372 pcre2_set_parens_nest_limit_8(G(a,8),b); \
1373 else if (test_mode == PCRE16_MODE) \
1374 pcre2_set_parens_nest_limit_16(G(a,16),b); \
1375 else \
1376 pcre2_set_parens_nest_limit_32(G(a,32),b)
1377
1378 #define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
1379 if (test_mode == PCRE8_MODE) \
1380 pcre2_set_substitute_callout_8(G(a,8), \
1381 (int (*)(pcre2_substitute_callout_block_8 *, void *))b,c); \
1382 else if (test_mode == PCRE16_MODE) \
1383 pcre2_set_substitute_callout_16(G(a,16), \
1384 (int (*)(pcre2_substitute_callout_block_16 *, void *))b,c); \
1385 else \
1386 pcre2_set_substitute_callout_32(G(a,32), \
1387 (int (*)(pcre2_substitute_callout_block_32 *, void *))b,c)
1388
1389 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
1390 if (test_mode == PCRE8_MODE) \
1391 a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h, \
1392 (PCRE2_SPTR8)i,j,(PCRE2_UCHAR8 *)k,l); \
1393 else if (test_mode == PCRE16_MODE) \
1394 a = pcre2_substitute_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h, \
1395 (PCRE2_SPTR16)i,j,(PCRE2_UCHAR16 *)k,l); \
1396 else \
1397 a = pcre2_substitute_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h, \
1398 (PCRE2_SPTR32)i,j,(PCRE2_UCHAR32 *)k,l)
1399
1400 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
1401 if (test_mode == PCRE8_MODE) \
1402 a = pcre2_substring_copy_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 *)d,e); \
1403 else if (test_mode == PCRE16_MODE) \
1404 a = pcre2_substring_copy_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 *)d,e); \
1405 else \
1406 a = pcre2_substring_copy_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 *)d,e)
1407
1408 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
1409 if (test_mode == PCRE8_MODE) \
1410 a = pcre2_substring_copy_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 *)d,e); \
1411 else if (test_mode == PCRE16_MODE) \
1412 a = pcre2_substring_copy_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 *)d,e); \
1413 else \
1414 a = pcre2_substring_copy_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 *)d,e)
1415
1416 #define PCRE2_SUBSTRING_FREE(a) \
1417 if (test_mode == PCRE8_MODE) pcre2_substring_free_8((PCRE2_UCHAR8 *)a); \
1418 else if (test_mode == PCRE16_MODE) \
1419 pcre2_substring_free_16((PCRE2_UCHAR16 *)a); \
1420 else pcre2_substring_free_32((PCRE2_UCHAR32 *)a)
1421
1422 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
1423 if (test_mode == PCRE8_MODE) \
1424 a = pcre2_substring_get_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 **)d,e); \
1425 else if (test_mode == PCRE16_MODE) \
1426 a = pcre2_substring_get_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 **)d,e); \
1427 else \
1428 a = pcre2_substring_get_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 **)d,e)
1429
1430 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
1431 if (test_mode == PCRE8_MODE) \
1432 a = pcre2_substring_get_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 **)d,e); \
1433 else if (test_mode == PCRE16_MODE) \
1434 a = pcre2_substring_get_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 **)d,e); \
1435 else \
1436 a = pcre2_substring_get_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 **)d,e)
1437
1438 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
1439 if (test_mode == PCRE8_MODE) \
1440 a = pcre2_substring_length_byname_8(G(b,8),G(c,8),d); \
1441 else if (test_mode == PCRE16_MODE) \
1442 a = pcre2_substring_length_byname_16(G(b,16),G(c,16),d); \
1443 else \
1444 a = pcre2_substring_length_byname_32(G(b,32),G(c,32),d)
1445
1446 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
1447 if (test_mode == PCRE8_MODE) \
1448 a = pcre2_substring_length_bynumber_8(G(b,8),c,d); \
1449 else if (test_mode == PCRE16_MODE) \
1450 a = pcre2_substring_length_bynumber_16(G(b,16),c,d); \
1451 else \
1452 a = pcre2_substring_length_bynumber_32(G(b,32),c,d)
1453
1454 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
1455 if (test_mode == PCRE8_MODE) \
1456 a = pcre2_substring_list_get_8(G(b,8),(PCRE2_UCHAR8 ***)c,d); \
1457 else if (test_mode == PCRE16_MODE) \
1458 a = pcre2_substring_list_get_16(G(b,16),(PCRE2_UCHAR16 ***)c,d); \
1459 else \
1460 a = pcre2_substring_list_get_32(G(b,32),(PCRE2_UCHAR32 ***)c,d)
1461
1462 #define PCRE2_SUBSTRING_LIST_FREE(a) \
1463 if (test_mode == PCRE8_MODE) \
1464 pcre2_substring_list_free_8((PCRE2_SPTR8 *)a); \
1465 else if (test_mode == PCRE16_MODE) \
1466 pcre2_substring_list_free_16((PCRE2_SPTR16 *)a); \
1467 else \
1468 pcre2_substring_list_free_32((PCRE2_SPTR32 *)a)
1469
1470 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
1471 if (test_mode == PCRE8_MODE) \
1472 a = pcre2_substring_number_from_name_8(G(b,8),G(c,8)); \
1473 else if (test_mode == PCRE16_MODE) \
1474 a = pcre2_substring_number_from_name_16(G(b,16),G(c,16)); \
1475 else \
1476 a = pcre2_substring_number_from_name_32(G(b,32),G(c,32))
1477
1478 #define PTR(x) ( \
1479 (test_mode == PCRE8_MODE)? (void *)G(x,8) : \
1480 (test_mode == PCRE16_MODE)? (void *)G(x,16) : \
1481 (void *)G(x,32))
1482
1483 #define SETFLD(x,y,z) \
1484 if (test_mode == PCRE8_MODE) G(x,8)->y = z; \
1485 else if (test_mode == PCRE16_MODE) G(x,16)->y = z; \
1486 else G(x,32)->y = z
1487
1488 #define SETFLDVEC(x,y,v,z) \
1489 if (test_mode == PCRE8_MODE) G(x,8)->y[v] = z; \
1490 else if (test_mode == PCRE16_MODE) G(x,16)->y[v] = z; \
1491 else G(x,32)->y[v] = z
1492
1493 #define SETOP(x,y,z) \
1494 if (test_mode == PCRE8_MODE) G(x,8) z y; \
1495 else if (test_mode == PCRE16_MODE) G(x,16) z y; \
1496 else G(x,32) z y
1497
1498 #define SETCASTPTR(x,y) \
1499 if (test_mode == PCRE8_MODE) \
1500 G(x,8) = (uint8_t *)(y); \
1501 else if (test_mode == PCRE16_MODE) \
1502 G(x,16) = (uint16_t *)(y); \
1503 else \
1504 G(x,32) = (uint32_t *)(y)
1505
1506 #define STRLEN(p) ((test_mode == PCRE8_MODE)? ((int)strlen((char *)p)) : \
1507 (test_mode == PCRE16_MODE)? ((int)strlen16((PCRE2_SPTR16)p)) : \
1508 ((int)strlen32((PCRE2_SPTR32)p)))
1509
1510 #define SUB1(a,b) \
1511 if (test_mode == PCRE8_MODE) G(a,8)(G(b,8)); \
1512 else if (test_mode == PCRE16_MODE) G(a,16)(G(b,16)); \
1513 else G(a,32)(G(b,32))
1514
1515 #define SUB2(a,b,c) \
1516 if (test_mode == PCRE8_MODE) G(a,8)(G(b,8),G(c,8)); \
1517 else if (test_mode == PCRE16_MODE) G(a,16)(G(b,16),G(c,16)); \
1518 else G(a,32)(G(b,32),G(c,32))
1519
1520 #define TEST(x,r,y) ( \
1521 (test_mode == PCRE8_MODE && G(x,8) r (y)) || \
1522 (test_mode == PCRE16_MODE && G(x,16) r (y)) || \
1523 (test_mode == PCRE32_MODE && G(x,32) r (y)))
1524
1525 #define TESTFLD(x,f,r,y) ( \
1526 (test_mode == PCRE8_MODE && G(x,8)->f r (y)) || \
1527 (test_mode == PCRE16_MODE && G(x,16)->f r (y)) || \
1528 (test_mode == PCRE32_MODE && G(x,32)->f r (y)))
1529
1530
1531 /* ----- Two out of three modes are supported ----- */
1532
1533 #else
1534
1535 /* We can use some macro trickery to make a single set of definitions work in
1536 the three different cases. */
1537
1538 /* ----- 32-bit and 16-bit but not 8-bit supported ----- */
1539
1540 #if defined(SUPPORT_PCRE2_32) && defined(SUPPORT_PCRE2_16)
1541 #define BITONE 32
1542 #define BITTWO 16
1543
1544 /* ----- 32-bit and 8-bit but not 16-bit supported ----- */
1545
1546 #elif defined(SUPPORT_PCRE2_32) && defined(SUPPORT_PCRE2_8)
1547 #define BITONE 32
1548 #define BITTWO 8
1549
1550 /* ----- 16-bit and 8-bit but not 32-bit supported ----- */
1551
1552 #else
1553 #define BITONE 16
1554 #define BITTWO 8
1555 #endif
1556
1557
1558 /* ----- Common macros for two-mode cases ----- */
1559
1560 #define BYTEONE (BITONE/8)
1561 #define BYTETWO (BITTWO/8)
1562
1563 #define CASTFLD(t,a,b) \
1564 ((test_mode == G(G(PCRE,BITONE),_MODE))? (t)(G(a,BITONE)->b) : \
1565 (t)(G(a,BITTWO)->b))
1566
1567 #define CASTVAR(t,x) ( \
1568 (test_mode == G(G(PCRE,BITONE),_MODE))? \
1569 (t)G(x,BITONE) : (t)G(x,BITTWO))
1570
1571 #define CODE_UNIT(a,b) ( \
1572 (test_mode == G(G(PCRE,BITONE),_MODE))? \
1573 (uint32_t)(((G(PCRE2_SPTR,BITONE))(a))[b]) : \
1574 (uint32_t)(((G(PCRE2_SPTR,BITTWO))(a))[b]))
1575
1576 #define CONCTXCPY(a,b) \
1577 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1578 memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_convert_context_,BITONE))); \
1579 else \
1580 memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_convert_context_,BITTWO)))
1581
1582 #define CONVERT_COPY(a,b,c) \
1583 (test_mode == G(G(PCRE,BITONE),_MODE))? \
1584 memcpy(G(a,BITONE),(char *)b,(c)*BYTEONE) : \
1585 memcpy(G(a,BITTWO),(char *)b,(c)*BYTETWO)
1586
1587 #define DATCTXCPY(a,b) \
1588 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1589 memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_match_context_,BITONE))); \
1590 else \
1591 memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_match_context_,BITTWO)))
1592
1593 #define FLD(a,b) \
1594 ((test_mode == G(G(PCRE,BITONE),_MODE))? G(a,BITONE)->b : G(a,BITTWO)->b)
1595
1596 #define PATCTXCPY(a,b) \
1597 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1598 memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_compile_context_,BITONE))); \
1599 else \
1600 memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_compile_context_,BITTWO)))
1601
1602 #define PCHARS(lv, p, offset, len, utf, f) \
1603 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1604 lv = G(pchars,BITONE)((G(PCRE2_SPTR,BITONE))(p)+offset, len, utf, f); \
1605 else \
1606 lv = G(pchars,BITTWO)((G(PCRE2_SPTR,BITTWO))(p)+offset, len, utf, f)
1607
1608 #define PCHARSV(p, offset, len, utf, f) \
1609 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1610 (void)G(pchars,BITONE)((G(PCRE2_SPTR,BITONE))(p)+offset, len, utf, f); \
1611 else \
1612 (void)G(pchars,BITTWO)((G(PCRE2_SPTR,BITTWO))(p)+offset, len, utf, f)
1613
1614 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
1615 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1616 a = G(pcre2_callout_enumerate,BITONE)(G(compiled_code,BITONE), \
1617 (int (*)(struct G(pcre2_callout_enumerate_block_,BITONE) *, void *))b,c); \
1618 else \
1619 a = G(pcre2_callout_enumerate,BITTWO)(G(compiled_code,BITTWO), \
1620 (int (*)(struct G(pcre2_callout_enumerate_block_,BITTWO) *, void *))b,c)
1621
1622 #define PCRE2_CODE_COPY_FROM_VOID(a,b) \
1623 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1624 G(a,BITONE) = G(pcre2_code_copy_,BITONE)(b); \
1625 else \
1626 G(a,BITTWO) = G(pcre2_code_copy_,BITTWO)(b)
1627
1628 #define PCRE2_CODE_COPY_TO_VOID(a,b) \
1629 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1630 a = (void *)G(pcre2_code_copy_,BITONE)(G(b,BITONE)); \
1631 else \
1632 a = (void *)G(pcre2_code_copy_,BITTWO)(G(b,BITTWO))
1633
1634 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) \
1635 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1636 a = (void *)G(pcre2_code_copy_with_tables_,BITONE)(G(b,BITONE)); \
1637 else \
1638 a = (void *)G(pcre2_code_copy_with_tables_,BITTWO)(G(b,BITTWO))
1639
1640 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
1641 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1642 G(a,BITONE) = G(pcre2_compile_,BITONE)(G(b,BITONE),c,d,e,f,g); \
1643 else \
1644 G(a,BITTWO) = G(pcre2_compile_,BITTWO)(G(b,BITTWO),c,d,e,f,g)
1645
1646 #define PCRE2_CONVERTED_PATTERN_FREE(a) \
1647 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1648 G(pcre2_converted_pattern_free_,BITONE)((G(PCRE2_UCHAR,BITONE) *)a); \
1649 else \
1650 G(pcre2_converted_pattern_free_,BITTWO)((G(PCRE2_UCHAR,BITTWO) *)a)
1651
1652 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
1653 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1654 a = G(pcre2_dfa_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1655 G(g,BITONE),h,i,j); \
1656 else \
1657 a = G(pcre2_dfa_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1658 G(g,BITTWO),h,i,j)
1659
1660 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
1661 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1662 r = G(pcre2_get_error_message_,BITONE)(a,G(b,BITONE),G(G(b,BITONE),_size/BYTEONE)); \
1663 else \
1664 r = G(pcre2_get_error_message_,BITTWO)(a,G(b,BITTWO),G(G(b,BITTWO),_size/BYTETWO))
1665
1666 #define PCRE2_GET_OVECTOR_COUNT(a,b) \
1667 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1668 a = G(pcre2_get_ovector_count_,BITONE)(G(b,BITONE)); \
1669 else \
1670 a = G(pcre2_get_ovector_count_,BITTWO)(G(b,BITTWO))
1671
1672 #define PCRE2_GET_STARTCHAR(a,b) \
1673 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1674 a = G(pcre2_get_startchar_,BITONE)(G(b,BITONE)); \
1675 else \
1676 a = G(pcre2_get_startchar_,BITTWO)(G(b,BITTWO))
1677
1678 #define PCRE2_JIT_COMPILE(r,a,b) \
1679 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1680 r = G(pcre2_jit_compile_,BITONE)(G(a,BITONE),b); \
1681 else \
1682 r = G(pcre2_jit_compile_,BITTWO)(G(a,BITTWO),b)
1683
1684 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) \
1685 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1686 G(pcre2_jit_free_unused_memory_,BITONE)(G(a,BITONE)); \
1687 else \
1688 G(pcre2_jit_free_unused_memory_,BITTWO)(G(a,BITTWO))
1689
1690 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
1691 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1692 a = G(pcre2_jit_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1693 G(g,BITONE),h); \
1694 else \
1695 a = G(pcre2_jit_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1696 G(g,BITTWO),h)
1697
1698 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
1699 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1700 a = (PCRE2_JIT_STACK *)G(pcre2_jit_stack_create_,BITONE)(b,c,d); \
1701 else \
1702 a = (PCRE2_JIT_STACK *)G(pcre2_jit_stack_create_,BITTWO)(b,c,d); \
1703
1704 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
1705 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1706 G(pcre2_jit_stack_assign_,BITONE)(G(a,BITONE),(G(pcre2_jit_callback_,BITONE))b,c); \
1707 else \
1708 G(pcre2_jit_stack_assign_,BITTWO)(G(a,BITTWO),(G(pcre2_jit_callback_,BITTWO))b,c);
1709
1710 #define PCRE2_JIT_STACK_FREE(a) \
1711 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1712 G(pcre2_jit_stack_free_,BITONE)((G(pcre2_jit_stack_,BITONE) *)a); \
1713 else \
1714 G(pcre2_jit_stack_free_,BITTWO)((G(pcre2_jit_stack_,BITTWO) *)a);
1715
1716 #define PCRE2_MAKETABLES(a) \
1717 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1718 a = G(pcre2_maketables_,BITONE)(NULL); \
1719 else \
1720 a = G(pcre2_maketables_,BITTWO)(NULL)
1721
1722 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
1723 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1724 a = G(pcre2_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1725 G(g,BITONE),h); \
1726 else \
1727 a = G(pcre2_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1728 G(g,BITTWO),h)
1729
1730 #define PCRE2_MATCH_DATA_CREATE(a,b,c) \
1731 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1732 G(a,BITONE) = G(pcre2_match_data_create_,BITONE)(b,c); \
1733 else \
1734 G(a,BITTWO) = G(pcre2_match_data_create_,BITTWO)(b,c)
1735
1736 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
1737 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1738 G(a,BITONE) = G(pcre2_match_data_create_from_pattern_,BITONE)(G(b,BITONE),c); \
1739 else \
1740 G(a,BITTWO) = G(pcre2_match_data_create_from_pattern_,BITTWO)(G(b,BITTWO),c)
1741
1742 #define PCRE2_MATCH_DATA_FREE(a) \
1743 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1744 G(pcre2_match_data_free_,BITONE)(G(a,BITONE)); \
1745 else \
1746 G(pcre2_match_data_free_,BITTWO)(G(a,BITTWO))
1747
1748 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) \
1749 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1750 a = G(pcre2_pattern_convert_,BITONE)(G(b,BITONE),c,d,(G(PCRE2_UCHAR,BITONE) **)e,f,G(g,BITONE)); \
1751 else \
1752 a = G(pcre2_pattern_convert_,BITTWO)(G(b,BITTWO),c,d,(G(PCRE2_UCHAR,BITTWO) **)e,f,G(g,BITTWO))
1753
1754 #define PCRE2_PATTERN_INFO(a,b,c,d) \
1755 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1756 a = G(pcre2_pattern_info_,BITONE)(G(b,BITONE),c,d); \
1757 else \
1758 a = G(pcre2_pattern_info_,BITTWO)(G(b,BITTWO),c,d)
1759
1760 #define PCRE2_PRINTINT(a) \
1761 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1762 G(pcre2_printint_,BITONE)(G(compiled_code,BITONE),outfile,a); \
1763 else \
1764 G(pcre2_printint_,BITTWO)(G(compiled_code,BITTWO),outfile,a)
1765
1766 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
1767 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1768 r = G(pcre2_serialize_decode_,BITONE)((G(pcre2_code_,BITONE) **)a,b,c,G(d,BITONE)); \
1769 else \
1770 r = G(pcre2_serialize_decode_,BITTWO)((G(pcre2_code_,BITTWO) **)a,b,c,G(d,BITTWO))
1771
1772 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
1773 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1774 r = G(pcre2_serialize_encode_,BITONE)((G(const pcre2_code_,BITONE) **)a,b,c,d,G(e,BITONE)); \
1775 else \
1776 r = G(pcre2_serialize_encode_,BITTWO)((G(const pcre2_code_,BITTWO) **)a,b,c,d,G(e,BITTWO))
1777
1778 #define PCRE2_SERIALIZE_FREE(a) \
1779 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1780 G(pcre2_serialize_free_,BITONE)(a); \
1781 else \
1782 G(pcre2_serialize_free_,BITTWO)(a)
1783
1784 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
1785 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1786 r = G(pcre2_serialize_get_number_of_codes_,BITONE)(a); \
1787 else \
1788 r = G(pcre2_serialize_get_number_of_codes_,BITTWO)(a)
1789
1790 #define PCRE2_SET_CALLOUT(a,b,c) \
1791 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1792 G(pcre2_set_callout_,BITONE)(G(a,BITONE), \
1793 (int (*)(G(pcre2_callout_block_,BITONE) *, void *))b,c); \
1794 else \
1795 G(pcre2_set_callout_,BITTWO)(G(a,BITTWO), \
1796 (int (*)(G(pcre2_callout_block_,BITTWO) *, void *))b,c);
1797
1798 #define PCRE2_SET_CHARACTER_TABLES(a,b) \
1799 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1800 G(pcre2_set_character_tables_,BITONE)(G(a,BITONE),b); \
1801 else \
1802 G(pcre2_set_character_tables_,BITTWO)(G(a,BITTWO),b)
1803
1804 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
1805 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1806 G(pcre2_set_compile_recursion_guard_,BITONE)(G(a,BITONE),b,c); \
1807 else \
1808 G(pcre2_set_compile_recursion_guard_,BITTWO)(G(a,BITTWO),b,c)
1809
1810 #define PCRE2_SET_DEPTH_LIMIT(a,b) \
1811 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1812 G(pcre2_set_depth_limit_,BITONE)(G(a,BITONE),b); \
1813 else \
1814 G(pcre2_set_depth_limit_,BITTWO)(G(a,BITTWO),b)
1815
1816 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) \
1817 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1818 r = G(pcre2_set_glob_escape_,BITONE)(G(a,BITONE),b); \
1819 else \
1820 r = G(pcre2_set_glob_escape_,BITTWO)(G(a,BITTWO),b)
1821
1822 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) \
1823 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1824 r = G(pcre2_set_glob_separator_,BITONE)(G(a,BITONE),b); \
1825 else \
1826 r = G(pcre2_set_glob_separator_,BITTWO)(G(a,BITTWO),b)
1827
1828 #define PCRE2_SET_HEAP_LIMIT(a,b) \
1829 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1830 G(pcre2_set_heap_limit_,BITONE)(G(a,BITONE),b); \
1831 else \
1832 G(pcre2_set_heap_limit_,BITTWO)(G(a,BITTWO),b)
1833
1834 #define PCRE2_SET_MATCH_LIMIT(a,b) \
1835 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1836 G(pcre2_set_match_limit_,BITONE)(G(a,BITONE),b); \
1837 else \
1838 G(pcre2_set_match_limit_,BITTWO)(G(a,BITTWO),b)
1839
1840 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) \
1841 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1842 G(pcre2_set_max_pattern_length_,BITONE)(G(a,BITONE),b); \
1843 else \
1844 G(pcre2_set_max_pattern_length_,BITTWO)(G(a,BITTWO),b)
1845
1846 #define PCRE2_SET_OFFSET_LIMIT(a,b) \
1847 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1848 G(pcre2_set_offset_limit_,BITONE)(G(a,BITONE),b); \
1849 else \
1850 G(pcre2_set_offset_limit_,BITTWO)(G(a,BITTWO),b)
1851
1852 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) \
1853 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1854 G(pcre2_set_parens_nest_limit_,BITONE)(G(a,BITONE),b); \
1855 else \
1856 G(pcre2_set_parens_nest_limit_,BITTWO)(G(a,BITTWO),b)
1857
1858 #define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
1859 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1860 G(pcre2_set_substitute_callout_,BITONE)(G(a,BITONE), \
1861 (int (*)(G(pcre2_substitute_callout_block_,BITONE) *, void *))b,c); \
1862 else \
1863 G(pcre2_set_substitute_callout_,BITTWO)(G(a,BITTWO), \
1864 (int (*)(G(pcre2_substitute_callout_block_,BITTWO) *, void *))b,c)
1865
1866 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
1867 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1868 a = G(pcre2_substitute_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1869 G(g,BITONE),h,(G(PCRE2_SPTR,BITONE))i,j, \
1870 (G(PCRE2_UCHAR,BITONE) *)k,l); \
1871 else \
1872 a = G(pcre2_substitute_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1873 G(g,BITTWO),h,(G(PCRE2_SPTR,BITTWO))i,j, \
1874 (G(PCRE2_UCHAR,BITTWO) *)k,l)
1875
1876 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
1877 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1878 a = G(pcre2_substring_copy_byname_,BITONE)(G(b,BITONE),G(c,BITONE),\
1879 (G(PCRE2_UCHAR,BITONE) *)d,e); \
1880 else \
1881 a = G(pcre2_substring_copy_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),\
1882 (G(PCRE2_UCHAR,BITTWO) *)d,e)
1883
1884 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
1885 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1886 a = G(pcre2_substring_copy_bynumber_,BITONE)(G(b,BITONE),c,\
1887 (G(PCRE2_UCHAR,BITONE) *)d,e); \
1888 else \
1889 a = G(pcre2_substring_copy_bynumber_,BITTWO)(G(b,BITTWO),c,\
1890 (G(PCRE2_UCHAR,BITTWO) *)d,e)
1891
1892 #define PCRE2_SUBSTRING_FREE(a) \
1893 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1894 G(pcre2_substring_free_,BITONE)((G(PCRE2_UCHAR,BITONE) *)a); \
1895 else G(pcre2_substring_free_,BITTWO)((G(PCRE2_UCHAR,BITTWO) *)a)
1896
1897 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
1898 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1899 a = G(pcre2_substring_get_byname_,BITONE)(G(b,BITONE),G(c,BITONE),\
1900 (G(PCRE2_UCHAR,BITONE) **)d,e); \
1901 else \
1902 a = G(pcre2_substring_get_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),\
1903 (G(PCRE2_UCHAR,BITTWO) **)d,e)
1904
1905 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
1906 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1907 a = G(pcre2_substring_get_bynumber_,BITONE)(G(b,BITONE),c,\
1908 (G(PCRE2_UCHAR,BITONE) **)d,e); \
1909 else \
1910 a = G(pcre2_substring_get_bynumber_,BITTWO)(G(b,BITTWO),c,\
1911 (G(PCRE2_UCHAR,BITTWO) **)d,e)
1912
1913 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
1914 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1915 a = G(pcre2_substring_length_byname_,BITONE)(G(b,BITONE),G(c,BITONE),d); \
1916 else \
1917 a = G(pcre2_substring_length_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),d)
1918
1919 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
1920 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1921 a = G(pcre2_substring_length_bynumber_,BITONE)(G(b,BITONE),c,d); \
1922 else \
1923 a = G(pcre2_substring_length_bynumber_,BITTWO)(G(b,BITTWO),c,d)
1924
1925 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
1926 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1927 a = G(pcre2_substring_list_get_,BITONE)(G(b,BITONE), \
1928 (G(PCRE2_UCHAR,BITONE) ***)c,d); \
1929 else \
1930 a = G(pcre2_substring_list_get_,BITTWO)(G(b,BITTWO), \
1931 (G(PCRE2_UCHAR,BITTWO) ***)c,d)
1932
1933 #define PCRE2_SUBSTRING_LIST_FREE(a) \
1934 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1935 G(pcre2_substring_list_free_,BITONE)((G(PCRE2_SPTR,BITONE) *)a); \
1936 else \
1937 G(pcre2_substring_list_free_,BITTWO)((G(PCRE2_SPTR,BITTWO) *)a)
1938
1939 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
1940 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1941 a = G(pcre2_substring_number_from_name_,BITONE)(G(b,BITONE),G(c,BITONE)); \
1942 else \
1943 a = G(pcre2_substring_number_from_name_,BITTWO)(G(b,BITTWO),G(c,BITTWO))
1944
1945 #define PTR(x) ( \
1946 (test_mode == G(G(PCRE,BITONE),_MODE))? (void *)G(x,BITONE) : \
1947 (void *)G(x,BITTWO))
1948
1949 #define SETFLD(x,y,z) \
1950 if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE)->y = z; \
1951 else G(x,BITTWO)->y = z
1952
1953 #define SETFLDVEC(x,y,v,z) \
1954 if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE)->y[v] = z; \
1955 else G(x,BITTWO)->y[v] = z
1956
1957 #define SETOP(x,y,z) \
1958 if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE) z y; \
1959 else G(x,BITTWO) z y
1960
1961 #define SETCASTPTR(x,y) \
1962 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1963 G(x,BITONE) = (G(G(uint,BITONE),_t) *)(y); \
1964 else \
1965 G(x,BITTWO) = (G(G(uint,BITTWO),_t) *)(y)
1966
1967 #define STRLEN(p) ((test_mode == G(G(PCRE,BITONE),_MODE))? \
1968 G(strlen,BITONE)((G(PCRE2_SPTR,BITONE))p) : \
1969 G(strlen,BITTWO)((G(PCRE2_SPTR,BITTWO))p))
1970
1971 #define SUB1(a,b) \
1972 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1973 G(a,BITONE)(G(b,BITONE)); \
1974 else \
1975 G(a,BITTWO)(G(b,BITTWO))
1976
1977 #define SUB2(a,b,c) \
1978 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1979 G(a,BITONE))(G(b,BITONE),G(c,BITONE)); \
1980 else \
1981 G(a,BITTWO))(G(b,BITTWO),G(c,BITTWO))
1982
1983 #define TEST(x,r,y) ( \
1984 (test_mode == G(G(PCRE,BITONE),_MODE) && G(x,BITONE) r (y)) || \
1985 (test_mode == G(G(PCRE,BITTWO),_MODE) && G(x,BITTWO) r (y)))
1986
1987 #define TESTFLD(x,f,r,y) ( \
1988 (test_mode == G(G(PCRE,BITONE),_MODE) && G(x,BITONE)->f r (y)) || \
1989 (test_mode == G(G(PCRE,BITTWO),_MODE) && G(x,BITTWO)->f r (y)))
1990
1991
1992 #endif /* Two out of three modes */
1993
1994 /* ----- End of cases where more than one mode is supported ----- */
1995
1996
1997 /* ----- Only 8-bit mode is supported ----- */
1998
1999 #elif defined SUPPORT_PCRE2_8
2000 #define CASTFLD(t,a,b) (t)(G(a,8)->b)
2001 #define CASTVAR(t,x) (t)G(x,8)
2002 #define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR8)(a))[b])
2003 #define CONCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_convert_context_8))
2004 #define CONVERT_COPY(a,b,c) memcpy(G(a,8),(char *)b, c)
2005 #define DATCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_match_context_8))
2006 #define FLD(a,b) G(a,8)->b
2007 #define PATCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_compile_context_8))
2008 #define PCHARS(lv, p, offset, len, utf, f) \
2009 lv = pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
2010 #define PCHARSV(p, offset, len, utf, f) \
2011 (void)pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
2012 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
2013 a = pcre2_callout_enumerate_8(compiled_code8, \
2014 (int (*)(struct pcre2_callout_enumerate_block_8 *, void *))b,c)
2015 #define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,8) = pcre2_code_copy_8(b)
2016 #define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_8(G(b,8))
2017 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_8(G(b,8))
2018 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
2019 G(a,8) = pcre2_compile_8(G(b,8),c,d,e,f,g)
2020 #define PCRE2_CONVERTED_PATTERN_FREE(a) \
2021 pcre2_converted_pattern_free_8((PCRE2_UCHAR8 *)a)
2022 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
2023 a = pcre2_dfa_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h,i,j)
2024 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
2025 r = pcre2_get_error_message_8(a,G(b,8),G(G(b,8),_size))
2026 #define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_8(G(b,8))
2027 #define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_8(G(b,8))
2028 #define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_8(G(a,8),b)
2029 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_8(G(a,8))
2030 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
2031 a = pcre2_jit_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h)
2032 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
2033 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_8(b,c,d);
2034 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
2035 pcre2_jit_stack_assign_8(G(a,8),(pcre2_jit_callback_8)b,c);
2036 #define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_8((pcre2_jit_stack_8 *)a);
2037 #define PCRE2_MAKETABLES(a) a = pcre2_maketables_8(NULL)
2038 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
2039 a = pcre2_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h)
2040 #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,8) = pcre2_match_data_create_8(b,c)
2041 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
2042 G(a,8) = pcre2_match_data_create_from_pattern_8(G(b,8),c)
2043 #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_8(G(a,8))
2044 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) a = pcre2_pattern_convert_8(G(b,8),c,d,(PCRE2_UCHAR8 **)e,f,G(g,8))
2045 #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_8(G(b,8),c,d)
2046 #define PCRE2_PRINTINT(a) pcre2_printint_8(compiled_code8,outfile,a)
2047 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
2048 r = pcre2_serialize_decode_8((pcre2_code_8 **)a,b,c,G(d,8))
2049 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
2050 r = pcre2_serialize_encode_8((const pcre2_code_8 **)a,b,c,d,G(e,8))
2051 #define PCRE2_SERIALIZE_FREE(a) pcre2_serialize_free_8(a)
2052 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
2053 r = pcre2_serialize_get_number_of_codes_8(a)
2054 #define PCRE2_SET_CALLOUT(a,b,c) \
2055 pcre2_set_callout_8(G(a,8),(int (*)(pcre2_callout_block_8 *, void *))b,c)
2056 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_8(G(a,8),b)
2057 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
2058 pcre2_set_compile_recursion_guard_8(G(a,8),b,c)
2059 #define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_8(G(a,8),b)
2060 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) r = pcre2_set_glob_escape_8(G(a,8),b)
2061 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_8(G(a,8),b)
2062 #define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_8(G(a,8),b)
2063 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_8(G(a,8),b)
2064 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_8(G(a,8),b)
2065 #define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_8(G(a,8),b)
2066 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_8(G(a,8),b)
2067 #define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
2068 pcre2_set_substitute_callout_8(G(a,8), \
2069 (int (*)(pcre2_substitute_callout_block_8 *, void *))b,c)
2070 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
2071 a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h, \
2072 (PCRE2_SPTR8)i,j,(PCRE2_UCHAR8 *)k,l)
2073 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
2074 a = pcre2_substring_copy_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 *)d,e)
2075 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
2076 a = pcre2_substring_copy_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 *)d,e)
2077 #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_8((PCRE2_UCHAR8 *)a)
2078 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
2079 a = pcre2_substring_get_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 **)d,e)
2080 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
2081 a = pcre2_substring_get_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 **)d,e)
2082 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
2083 a = pcre2_substring_length_byname_8(G(b,8),G(c,8),d)
2084 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
2085 a = pcre2_substring_length_bynumber_8(G(b,8),c,d)
2086 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
2087 a = pcre2_substring_list_get_8(G(b,8),(PCRE2_UCHAR8 ***)c,d)
2088 #define PCRE2_SUBSTRING_LIST_FREE(a) \
2089 pcre2_substring_list_free_8((PCRE2_SPTR8 *)a)
2090 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
2091 a = pcre2_substring_number_from_name_8(G(b,8),G(c,8));
2092 #define PTR(x) (void *)G(x,8)
2093 #define SETFLD(x,y,z) G(x,8)->y = z
2094 #define SETFLDVEC(x,y,v,z) G(x,8)->y[v] = z
2095 #define SETOP(x,y,z) G(x,8) z y
2096 #define SETCASTPTR(x,y) G(x,8) = (uint8_t *)(y)
2097 #define STRLEN(p) (int)strlen((char *)p)
2098 #define SUB1(a,b) G(a,8)(G(b,8))
2099 #define SUB2(a,b,c) G(a,8)(G(b,8),G(c,8))
2100 #define TEST(x,r,y) (G(x,8) r (y))
2101 #define TESTFLD(x,f,r,y) (G(x,8)->f r (y))
2102
2103
2104 /* ----- Only 16-bit mode is supported ----- */
2105
2106 #elif defined SUPPORT_PCRE2_16
2107 #define CASTFLD(t,a,b) (t)(G(a,16)->b)
2108 #define CASTVAR(t,x) (t)G(x,16)
2109 #define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR16)(a))[b])
2110 #define CONCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_convert_context_16))
2111 #define CONVERT_COPY(a,b,c) memcpy(G(a,16),(char *)b, (c)*2)
2112 #define DATCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_match_context_16))
2113 #define FLD(a,b) G(a,16)->b
2114 #define PATCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_compile_context_16))
2115 #define PCHARS(lv, p, offset, len, utf, f) \
2116 lv = pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f)
2117 #define PCHARSV(p, offset, len, utf, f) \
2118 (void)pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f)
2119 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
2120 a = pcre2_callout_enumerate_16(compiled_code16, \
2121 (int (*)(struct pcre2_callout_enumerate_block_16 *, void *))b,c)
2122 #define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,16) = pcre2_code_copy_16(b)
2123 #define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_16(G(b,16))
2124 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_16(G(b,16))
2125 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
2126 G(a,16) = pcre2_compile_16(G(b,16),c,d,e,f,g)
2127 #define PCRE2_CONVERTED_PATTERN_FREE(a) \
2128 pcre2_converted_pattern_free_16((PCRE2_UCHAR16 *)a)
2129 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
2130 a = pcre2_dfa_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h,i,j)
2131 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
2132 r = pcre2_get_error_message_16(a,G(b,16),G(G(b,16),_size/2))
2133 #define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_16(G(b,16))
2134 #define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_16(G(b,16))
2135 #define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_16(G(a,16),b)
2136 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_16(G(a,16))
2137 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
2138 a = pcre2_jit_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h)
2139 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
2140 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_16(b,c,d);
2141 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
2142 pcre2_jit_stack_assign_16(G(a,16),(pcre2_jit_callback_16)b,c);
2143 #define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_16((pcre2_jit_stack_16 *)a);
2144 #define PCRE2_MAKETABLES(a) a = pcre2_maketables_16(NULL)
2145 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
2146 a = pcre2_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h)
2147 #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,16) = pcre2_match_data_create_16(b,c)
2148 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
2149 G(a,16) = pcre2_match_data_create_from_pattern_16(G(b,16),c)
2150 #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_16(G(a,16))
2151 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) a = pcre2_pattern_convert_16(G(b,16),c,d,(PCRE2_UCHAR16 **)e,f,G(g,16))
2152 #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_16(G(b,16),c,d)
2153 #define PCRE2_PRINTINT(a) pcre2_printint_16(compiled_code16,outfile,a)
2154 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
2155 r = pcre2_serialize_decode_16((pcre2_code_16 **)a,b,c,G(d,16))
2156 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
2157 r = pcre2_serialize_encode_16((const pcre2_code_16 **)a,b,c,d,G(e,16))
2158 #define PCRE2_SERIALIZE_FREE(a) pcre2_serialize_free_16(a)
2159 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
2160 r = pcre2_serialize_get_number_of_codes_16(a)
2161 #define PCRE2_SET_CALLOUT(a,b,c) \
2162 pcre2_set_callout_16(G(a,16),(int (*)(pcre2_callout_block_16 *, void *))b,c);
2163 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_16(G(a,16),b)
2164 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
2165 pcre2_set_compile_recursion_guard_16(G(a,16),b,c)
2166 #define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_16(G(a,16),b)
2167 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) r = pcre2_set_glob_escape_16(G(a,16),b)
2168 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_16(G(a,16),b)
2169 #define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_16(G(a,16),b)
2170 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_16(G(a,16),b)
2171 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_16(G(a,16),b)
2172 #define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_16(G(a,16),b)
2173 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_16(G(a,16),b)
2174 #define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
2175 pcre2_set_substitute_callout_16(G(a,16), \
2176 (int (*)(pcre2_substitute_callout_block_16 *, void *))b,c)
2177 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
2178 a = pcre2_substitute_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h, \
2179 (PCRE2_SPTR16)i,j,(PCRE2_UCHAR16 *)k,l)
2180 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
2181 a = pcre2_substring_copy_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 *)d,e)
2182 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
2183 a = pcre2_substring_copy_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 *)d,e)
2184 #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_16((PCRE2_UCHAR16 *)a)
2185 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
2186 a = pcre2_substring_get_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 **)d,e)
2187 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
2188 a = pcre2_substring_get_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 **)d,e)
2189 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
2190 a = pcre2_substring_length_byname_16(G(b,16),G(c,16),d)
2191 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
2192 a = pcre2_substring_length_bynumber_16(G(b,16),c,d)
2193 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
2194 a = pcre2_substring_list_get_16(G(b,16),(PCRE2_UCHAR16 ***)c,d)
2195 #define PCRE2_SUBSTRING_LIST_FREE(a) \
2196 pcre2_substring_list_free_16((PCRE2_SPTR16 *)a)
2197 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
2198 a = pcre2_substring_number_from_name_16(G(b,16),G(c,16));
2199 #define PTR(x) (void *)G(x,16)
2200 #define SETFLD(x,y,z) G(x,16)->y = z
2201 #define SETFLDVEC(x,y,v,z) G(x,16)->y[v] = z
2202 #define SETOP(x,y,z) G(x,16) z y
2203 #define SETCASTPTR(x,y) G(x,16) = (uint16_t *)(y)
2204 #define STRLEN(p) (int)strlen16((PCRE2_SPTR16)p)
2205 #define SUB1(a,b) G(a,16)(G(b,16))
2206 #define SUB2(a,b,c) G(a,16)(G(b,16),G(c,16))
2207 #define TEST(x,r,y) (G(x,16) r (y))
2208 #define TESTFLD(x,f,r,y) (G(x,16)->f r (y))
2209
2210
2211 /* ----- Only 32-bit mode is supported ----- */
2212
2213 #elif defined SUPPORT_PCRE2_32
2214 #define CASTFLD(t,a,b) (t)(G(a,32)->b)
2215 #define CASTVAR(t,x) (t)G(x,32)
2216 #define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR32)(a))[b])
2217 #define CONCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_convert_context_32))
2218 #define CONVERT_COPY(a,b,c) memcpy(G(a,32),(char *)b, (c)*4)
2219 #define DATCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_match_context_32))
2220 #define FLD(a,b) G(a,32)->b
2221 #define PATCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_compile_context_32))
2222 #define PCHARS(lv, p, offset, len, utf, f) \
2223 lv = pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f)
2224 #define PCHARSV(p, offset, len, utf, f) \
2225 (void)pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f)
2226 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
2227 a = pcre2_callout_enumerate_32(compiled_code32, \
2228 (int (*)(struct pcre2_callout_enumerate_block_32 *, void *))b,c)
2229 #define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,32) = pcre2_code_copy_32(b)
2230 #define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_32(G(b,32))
2231 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_32(G(b,32))
2232 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
2233 G(a,32) = pcre2_compile_32(G(b,32),c,d,e,f,g)
2234 #define PCRE2_CONVERTED_PATTERN_FREE(a) \
2235 pcre2_converted_pattern_free_32((PCRE2_UCHAR32 *)a)
2236 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
2237 a = pcre2_dfa_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h,i,j)
2238 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
2239 r = pcre2_get_error_message_32(a,G(b,32),G(G(b,32),_size/4))
2240 #define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_32(G(b,32))
2241 #define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_32(G(b,32))
2242 #define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_32(G(a,32),b)
2243 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_32(G(a,32))
2244 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
2245 a = pcre2_jit_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
2246 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
2247 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_32(b,c,d);
2248 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
2249 pcre2_jit_stack_assign_32(G(a,32),(pcre2_jit_callback_32)b,c);
2250 #define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_32((pcre2_jit_stack_32 *)a);
2251 #define PCRE2_MAKETABLES(a) a = pcre2_maketables_32(NULL)
2252 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
2253 a = pcre2_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
2254 #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,32) = pcre2_match_data_create_32(b,c)
2255 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
2256 G(a,32) = pcre2_match_data_create_from_pattern_32(G(b,32),c)
2257 #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_32(G(a,32))
2258 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) a = pcre2_pattern_convert_32(G(b,32),c,d,(PCRE2_UCHAR32 **)e,f,G(g,32))
2259 #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_32(G(b,32),c,d)
2260 #define PCRE2_PRINTINT(a) pcre2_printint_32(compiled_code32,outfile,a)
2261 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
2262 r = pcre2_serialize_decode_32((pcre2_code_32 **)a,b,c,G(d,32))
2263 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
2264 r = pcre2_serialize_encode_32((const pcre2_code_32 **)a,b,c,d,G(e,32))
2265 #define PCRE2_SERIALIZE_FREE(a) pcre2_serialize_free_32(a)
2266 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
2267 r = pcre2_serialize_get_number_of_codes_32(a)
2268 #define PCRE2_SET_CALLOUT(a,b,c) \
2269 pcre2_set_callout_32(G(a,32),(int (*)(pcre2_callout_block_32 *, void *))b,c)
2270 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_32(G(a,32),b)
2271 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
2272 pcre2_set_compile_recursion_guard_32(G(a,32),b,c)
2273 #define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_32(G(a,32),b)
2274 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) r = pcre2_set_glob_escape_32(G(a,32),b)
2275 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_32(G(a,32),b)
2276 #define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_32(G(a,32),b)
2277 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_32(G(a,32),b)
2278 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_32(G(a,32),b)
2279 #define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_32(G(a,32),b)
2280 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_32(G(a,32),b)
2281 #define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
2282 pcre2_set_substitute_callout_32(G(a,32), \
2283 (int (*)(pcre2_substitute_callout_block_32 *, void *))b,c)
2284 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
2285 a = pcre2_substitute_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h, \
2286 (PCRE2_SPTR32)i,j,(PCRE2_UCHAR32 *)k,l)
2287 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
2288 a = pcre2_substring_copy_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 *)d,e)
2289 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
2290 a = pcre2_substring_copy_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 *)d,e);
2291 #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_32((PCRE2_UCHAR32 *)a)
2292 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
2293 a = pcre2_substring_get_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 **)d,e)
2294 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
2295 a = pcre2_substring_get_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 **)d,e)
2296 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
2297 a = pcre2_substring_length_byname_32(G(b,32),G(c,32),d)
2298 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
2299 a = pcre2_substring_length_bynumber_32(G(b,32),c,d)
2300 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
2301 a = pcre2_substring_list_get_32(G(b,32),(PCRE2_UCHAR32 ***)c,d)
2302 #define PCRE2_SUBSTRING_LIST_FREE(a) \
2303 pcre2_substring_list_free_32((PCRE2_SPTR32 *)a)
2304 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
2305 a = pcre2_substring_number_from_name_32(G(b,32),G(c,32));
2306 #define PTR(x) (void *)G(x,32)
2307 #define SETFLD(x,y,z) G(x,32)->y = z
2308 #define SETFLDVEC(x,y,v,z) G(x,32)->y[v] = z
2309 #define SETOP(x,y,z) G(x,32) z y
2310 #define SETCASTPTR(x,y) G(x,32) = (uint32_t *)(y)
2311 #define STRLEN(p) (int)strlen32((PCRE2_SPTR32)p)
2312 #define SUB1(a,b) G(a,32)(G(b,32))
2313 #define SUB2(a,b,c) G(a,32)(G(b,32),G(c,32))
2314 #define TEST(x,r,y) (G(x,32) r (y))
2315 #define TESTFLD(x,f,r,y) (G(x,32)->f r (y))
2316
2317 #endif
2318
2319 /* ----- End of mode-specific function call macros ----- */
2320
2321
2322
2323
2324 /*************************************************
2325 * Alternate character tables *
2326 *************************************************/
2327
2328 /* By default, the "tables" pointer in the compile context when calling
2329 pcre2_compile() is not set (= NULL), thereby using the default tables of the
2330 library. However, the tables modifier can be used to select alternate sets of
2331 tables, for different kinds of testing. Note that the locale modifier also
2332 adjusts the tables. */
2333
2334 /* This is the set of tables distributed as default with PCRE2. It recognizes
2335 only ASCII characters. */
2336
2337 static const uint8_t tables1[] = {
2338
2339 /* This table is a lower casing table. */
2340
2341 0, 1, 2, 3, 4, 5, 6, 7,
2342 8, 9, 10, 11, 12, 13, 14, 15,
2343 16, 17, 18, 19, 20, 21, 22, 23,
2344 24, 25, 26, 27, 28, 29, 30, 31,
2345 32, 33, 34, 35, 36, 37, 38, 39,
2346 40, 41, 42, 43, 44, 45, 46, 47,
2347 48, 49, 50, 51, 52, 53, 54, 55,
2348 56, 57, 58, 59, 60, 61, 62, 63,
2349 64, 97, 98, 99,100,101,102,103,
2350 104,105,106,107,108,109,110,111,
2351 112,113,114,115,116,117,118,119,
2352 120,121,122, 91, 92, 93, 94, 95,
2353 96, 97, 98, 99,100,101,102,103,
2354 104,105,106,107,108,109,110,111,
2355 112,113,114,115,116,117,118,119,
2356 120,121,122,123,124,125,126,127,
2357 128,129,130,131,132,133,134,135,
2358 136,137,138,139,140,141,142,143,
2359 144,145,146,147,148,149,150,151,
2360 152,153,154,155,156,157,158,159,
2361 160,161,162,163,164,165,166,167,
2362 168,169,170,171,172,173,174,175,
2363 176,177,178,179,180,181,182,183,
2364 184,185,186,187,188,189,190,191,
2365 192,193,194,195,196,197,198,199,
2366 200,201,202,203,204,205,206,207,
2367 208,209,210,211,212,213,214,215,
2368 216,217,218,219,220,221,222,223,
2369 224,225,226,227,228,229,230,231,
2370 232,233,234,235,236,237,238,239,
2371 240,241,242,243,244,245,246,247,
2372 248,249,250,251,252,253,254,255,
2373
2374 /* This table is a case flipping table. */
2375
2376 0, 1, 2, 3, 4, 5, 6, 7,
2377 8, 9, 10, 11, 12, 13, 14, 15,
2378 16, 17, 18, 19, 20, 21, 22, 23,
2379 24, 25, 26, 27, 28, 29, 30, 31,
2380 32, 33, 34, 35, 36, 37, 38, 39,
2381 40, 41, 42, 43, 44, 45, 46, 47,
2382 48, 49, 50, 51, 52, 53, 54, 55,
2383 56, 57, 58, 59, 60, 61, 62, 63,
2384 64, 97, 98, 99,100,101,102,103,
2385 104,105,106,107,108,109,110,111,
2386 112,113,114,115,116,117,118,119,
2387 120,121,122, 91, 92, 93, 94, 95,
2388 96, 65, 66, 67, 68, 69, 70, 71,
2389 72, 73, 74, 75, 76, 77, 78, 79,
2390 80, 81, 82, 83, 84, 85, 86, 87,
2391 88, 89, 90,123,124,125,126,127,
2392 128,129,130,131,132,133,134,135,
2393 136,137,138,139,140,141,142,143,
2394 144,145,146,147,148,149,150,151,
2395 152,153,154,155,156,157,158,159,
2396 160,161,162,163,164,165,166,167,
2397 168,169,170,171,172,173,174,175,
2398 176,177,178,179,180,181,182,183,
2399 184,185,186,187,188,189,190,191,
2400 192,193,194,195,196,197,198,199,
2401 200,201,202,203,204,205,206,207,
2402 208,209,210,211,212,213,214,215,
2403 216,217,218,219,220,221,222,223,
2404 224,225,226,227,228,229,230,231,
2405 232,233,234,235,236,237,238,239,
2406 240,241,242,243,244,245,246,247,
2407 248,249,250,251,252,253,254,255,
2408
2409 /* This table contains bit maps for various character classes. Each map is 32
2410 bytes long and the bits run from the least significant end of each byte. The
2411 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
2412 graph, print, punct, and cntrl. Other classes are built from combinations. */
2413
2414 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
2415 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2416 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2417 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2418
2419 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
2420 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
2421 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2422 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2423
2424 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
2425 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2426 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2427 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2428
2429 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2430 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
2431 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2432 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2433
2434 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2435 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
2436 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2437 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2438
2439 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
2440 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
2441 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2442 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2443
2444 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
2445 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
2446 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2447 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2448
2449 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
2450 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
2451 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2452 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2453
2454 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
2455 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
2456 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2457 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2458
2459 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
2460 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
2461 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2462 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2463
2464 /* This table identifies various classes of character by individual bits:
2465 0x01 white space character
2466 0x02 letter
2467 0x04 decimal digit
2468 0x08 hexadecimal digit
2469 0x10 alphanumeric or '_'
2470 0x80 regular expression metacharacter or binary zero
2471 */
2472
2473 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
2474 0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /* 8- 15 */
2475 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
2476 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
2477 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
2478 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
2479 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
2480 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
2481 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
2482 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
2483 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
2484 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
2485 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
2486 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
2487 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
2488 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
2489 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
2490 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
2491 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
2492 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
2493 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
2494 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
2495 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
2496 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
2497 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
2498 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
2499 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
2500 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
2501 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
2502 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
2503 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
2504 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
2505
2506 /* This is a set of tables that came originally from a Windows user. It seems
2507 to be at least an approximation of ISO 8859. In particular, there are
2508 characters greater than 128 that are marked as spaces, letters, etc. */
2509
2510 static const uint8_t tables2[] = {
2511 0,1,2,3,4,5,6,7,
2512 8,9,10,11,12,13,14,15,
2513 16,17,18,19,20,21,22,23,
2514 24,25,26,27,28,29,30,31,
2515 32,33,34,35,36,37,38,39,
2516 40,41,42,43,44,45,46,47,
2517 48,49,50,51,52,53,54,55,
2518 56,57,58,59,60,61,62,63,
2519 64,97,98,99,100,101,102,103,
2520 104,105,106,107,108,109,110,111,
2521 112,113,114,115,116,117,118,119,
2522 120,121,122,91,92,93,94,95,
2523 96,97,98,99,100,101,102,103,
2524 104,105,106,107,108,109,110,111,
2525 112,113,114,115,116,117,118,119,
2526 120,121,122,123,124,125,126,127,
2527 128,129,130,131,132,133,134,135,
2528 136,137,138,139,140,141,142,143,
2529 144,145,146,147,148,149,150,151,
2530 152,153,154,155,156,157,158,159,
2531 160,161,162,163,164,165,166,167,
2532 168,169,170,171,172,173,174,175,
2533 176,177,178,179,180,181,182,183,
2534 184,185,186,187,188,189,190,191,
2535 224,225,226,227,228,229,230,231,
2536 232,233,234,235,236,237,238,239,
2537 240,241,242,243,244,245,246,215,
2538 248,249,250,251,252,253,254,223,
2539 224,225,226,227,228,229,230,231,
2540 232,233,234,235,236,237,238,239,
2541 240,241,242,243,244,245,246,247,
2542 248,249,250,251,252,253,254,255,
2543 0,1,2,3,4,5,6,7,
2544 8,9,10,11,12,13,14,15,
2545 16,17,18,19,20,21,22,23,
2546 24,25,26,27,28,29,30,31,
2547 32,33,34,35,36,37,38,39,
2548 40,41,42,43,44,45,46,47,
2549 48,49,50,51,52,53,54,55,
2550 56,57,58,59,60,61,62,63,
2551 64,97,98,99,100,101,102,103,
2552 104,105,106,107,108,109,110,111,
2553 112,113,114,115,116,117,118,119,
2554 120,121,122,91,92,93,94,95,
2555 96,65,66,67,68,69,70,71,
2556 72,73,74,75,76,77,78,79,
2557 80,81,82,83,84,85,86,87,
2558 88,89,90,123,124,125,126,127,
2559 128,129,130,131,132,133,134,135,
2560 136,137,138,139,140,141,142,143,
2561 144,145,146,147,148,149,150,151,
2562 152,153,154,155,156,157,158,159,
2563 160,161,162,163,164,165,166,167,
2564 168,169,170,171,172,173,174,175,
2565 176,177,178,179,180,181,182,183,
2566 184,185,186,187,188,189,190,191,
2567 224,225,226,227,228,229,230,231,
2568 232,233,234,235,236,237,238,239,
2569 240,241,242,243,244,245,246,215,
2570 248,249,250,251,252,253,254,223,
2571 192,193,194,195,196,197,198,199,
2572 200,201,202,203,204,205,206,207,
2573 208,209,210,211,212,213,214,247,
2574 216,217,218,219,220,221,222,255,
2575 0,62,0,0,1,0,0,0,
2576 0,0,0,0,0,0,0,0,
2577 32,0,0,0,1,0,0,0,
2578 0,0,0,0,0,0,0,0,
2579 0,0,0,0,0,0,255,3,
2580 126,0,0,0,126,0,0,0,
2581 0,0,0,0,0,0,0,0,
2582 0,0,0,0,0,0,0,0,
2583 0,0,0,0,0,0,255,3,
2584 0,0,0,0,0,0,0,0,
2585 0,0,0,0,0,0,12,2,
2586 0,0,0,0,0,0,0,0,
2587 0,0,0,0,0,0,0,0,
2588 254,255,255,7,0,0,0,0,
2589 0,0,0,0,0,0,0,0,
2590 255,255,127,127,0,0,0,0,
2591 0,0,0,0,0,0,0,0,
2592 0,0,0,0,254,255,255,7,
2593 0,0,0,0,0,4,32,4,
2594 0,0,0,128,255,255,127,255,
2595 0,0,0,0,0,0,255,3,
2596 254,255,255,135,254,255,255,7,
2597 0,0,0,0,0,4,44,6,
2598 255,255,127,255,255,255,127,255,
2599 0,0,0,0,254,255,255,255,
2600 255,255,255,255,255,255,255,127,
2601 0,0,0,0,254,255,255,255,
2602 255,255,255,255,255,255,255,255,
2603 0,2,0,0,255,255,255,255,
2604 255,255,255,255,255,255,255,127,
2605 0,0,0,0,255,255,255,255,
2606 255,255,255,255,255,255,255,255,
2607 0,0,0,0,254,255,0,252,
2608 1,0,0,248,1,0,0,120,
2609 0,0,0,0,254,255,255,255,
2610 0,0,128,0,0,0,128,0,
2611 255,255,255,255,0,0,0,0,
2612 0,0,0,0,0,0,0,128,
2613 255,255,255,255,0,0,0,0,
2614 0,0,0,0,0,0,0,0,
2615 128,0,0,0,0,0,0,0,
2616 0,1,1,0,1,1,0,0,
2617 0,0,0,0,0,0,0,0,
2618 0,0,0,0,0,0,0,0,
2619 1,0,0,0,128,0,0,0,
2620 128,128,128,128,0,0,128,0,
2621 28,28,28,28,28,28,28,28,
2622 28,28,0,0,0,0,0,128,
2623 0,26,26,26,26,26,26,18,
2624 18,18,18,18,18,18,18,18,
2625 18,18,18,18,18,18,18,18,
2626 18,18,18,128,128,0,128,16,
2627 0,26,26,26,26,26,26,18,
2628 18,18,18,18,18,18,18,18,
2629 18,18,18,18,18,18,18,18,
2630 18,18,18,128,128,0,0,0,
2631 0,0,0,0,0,1,0,0,
2632 0,0,0,0,0,0,0,0,
2633 0,0,0,0,0,0,0,0,
2634 0,0,0,0,0,0,0,0,
2635 1,0,0,0,0,0,0,0,
2636 0,0,18,0,0,0,0,0,
2637 0,0,20,20,0,18,0,0,
2638 0,20,18,0,0,0,0,0,
2639 18,18,18,18,18,18,18,18,
2640 18,18,18,18,18,18,18,18,
2641 18,18,18,18,18,18,18,0,
2642 18,18,18,18,18,18,18,18,
2643 18,18,18,18,18,18,18,18,
2644 18,18,18,18,18,18,18,18,
2645 18,18,18,18,18,18,18,0,
2646 18,18,18,18,18,18,18,18
2647 };
2648
2649
2650
2651 #if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE)
2652 /*************************************************
2653 * Emulated memmove() for systems without it *
2654 *************************************************/
2655
2656 /* This function can make use of bcopy() if it is available. Otherwise do it by
2657 steam, as there are some non-Unix environments that lack both memmove() and
2658 bcopy(). */
2659
2660 static void *
emulated_memmove(void * d,const void * s,size_t n)2661 emulated_memmove(void *d, const void *s, size_t n)
2662 {
2663 #ifdef HAVE_BCOPY
2664 bcopy(s, d, n);
2665 return d;
2666 #else
2667 size_t i;
2668 unsigned char *dest = (unsigned char *)d;
2669 const unsigned char *src = (const unsigned char *)s;
2670 if (dest > src)
2671 {
2672 dest += n;
2673 src += n;
2674 for (i = 0; i < n; ++i) *(--dest) = *(--src);
2675 return (void *)dest;
2676 }
2677 else
2678 {
2679 for (i = 0; i < n; ++i) *dest++ = *src++;
2680 return (void *)(dest - n);
2681 }
2682 #endif /* not HAVE_BCOPY */
2683 }
2684 #undef memmove
2685 #define memmove(d,s,n) emulated_memmove(d,s,n)
2686 #endif /* not VPCOMPAT && not HAVE_MEMMOVE */
2687
2688
2689
2690 #ifndef HAVE_STRERROR
2691 /*************************************************
2692 * Provide strerror() for non-ANSI libraries *
2693 *************************************************/
2694
2695 /* Some old-fashioned systems (e.g. SunOS4) didn't have strerror() in their
2696 libraries. They may no longer be around, but just in case, we can try to
2697 provide the same facility by this simple alternative function. */
2698
2699 extern int sys_nerr;
2700 extern char *sys_errlist[];
2701
2702 char *
strerror(int n)2703 strerror(int n)
2704 {
2705 if (n < 0 || n >= sys_nerr) return "unknown error number";
2706 return sys_errlist[n];
2707 }
2708 #endif /* HAVE_STRERROR */
2709
2710
2711
2712 /*************************************************
2713 * Local memory functions *
2714 *************************************************/
2715
2716 /* Alternative memory functions, to test functionality. */
2717
my_malloc(PCRE2_SIZE size,void * data)2718 static void *my_malloc(PCRE2_SIZE size, void *data)
2719 {
2720 void *block = malloc(size);
2721 (void)data;
2722 if (show_memory)
2723 {
2724 if (block == NULL)
2725 {
2726 fprintf(outfile, "** malloc() failed for %" SIZ_FORM "\n", SIZ_CAST size);
2727 }
2728 else
2729 {
2730 fprintf(outfile, "malloc %5" SIZ_FORM, SIZ_CAST size);
2731 #ifdef DEBUG_SHOW_MALLOC_ADDRESSES
2732 fprintf(outfile, " %p", block); /* Not portable */
2733 #endif
2734 if (malloclistptr < MALLOCLISTSIZE)
2735 {
2736 malloclist[malloclistptr] = block;
2737 malloclistlength[malloclistptr++] = size;
2738 }
2739 else
2740 fprintf(outfile, " (not remembered)");
2741 fprintf(outfile, "\n");
2742 }
2743 }
2744 return block;
2745 }
2746
my_free(void * block,void * data)2747 static void my_free(void *block, void *data)
2748 {
2749 (void)data;
2750 if (show_memory)
2751 {
2752 uint32_t i, j;
2753 BOOL found = FALSE;
2754
2755 fprintf(outfile, "free");
2756 for (i = 0; i < malloclistptr; i++)
2757 {
2758 if (block == malloclist[i])
2759 {
2760 fprintf(outfile, " %5" SIZ_FORM, SIZ_CAST malloclistlength[i]);
2761 malloclistptr--;
2762 for (j = i; j < malloclistptr; j++)
2763 {
2764 malloclist[j] = malloclist[j+1];
2765 malloclistlength[j] = malloclistlength[j+1];
2766 }
2767 found = TRUE;
2768 break;
2769 }
2770 }
2771 if (!found) fprintf(outfile, " unremembered block");
2772 #ifdef DEBUG_SHOW_MALLOC_ADDRESSES
2773 fprintf(outfile, " %p", block); /* Not portable */
2774 #endif
2775 fprintf(outfile, "\n");
2776 }
2777 free(block);
2778 }
2779
2780
2781
2782 /*************************************************
2783 * Callback function for stack guard *
2784 *************************************************/
2785
2786 /* This is set up to be called from pcre2_compile() when the stackguard=n
2787 modifier sets a value greater than zero. The test we do is whether the
2788 parenthesis nesting depth is greater than the value set by the modifier.
2789
2790 Argument: the current parenthesis nesting depth
2791 Returns: non-zero to kill the compilation
2792 */
2793
2794 static int
stack_guard(uint32_t depth,void * user_data)2795 stack_guard(uint32_t depth, void *user_data)
2796 {
2797 (void)user_data;
2798 return depth > pat_patctl.stackguard_test;
2799 }
2800
2801
2802 /*************************************************
2803 * JIT memory callback *
2804 *************************************************/
2805
2806 static PCRE2_JIT_STACK*
jit_callback(void * arg)2807 jit_callback(void *arg)
2808 {
2809 jit_was_used = TRUE;
2810 return (PCRE2_JIT_STACK *)arg;
2811 }
2812
2813
2814 /*************************************************
2815 * Convert UTF-8 character to code point *
2816 *************************************************/
2817
2818 /* This function reads one or more bytes that represent a UTF-8 character,
2819 and returns the codepoint of that character. Note that the function supports
2820 the original UTF-8 definition of RFC 2279, allowing for values in the range 0
2821 to 0x7fffffff, up to 6 bytes long. This makes it possible to generate
2822 codepoints greater than 0x10ffff which are useful for testing PCRE2's error
2823 checking, and also for generating 32-bit non-UTF data values above the UTF
2824 limit.
2825
2826 Argument:
2827 utf8bytes a pointer to the byte vector
2828 vptr a pointer to an int to receive the value
2829
2830 Returns: > 0 => the number of bytes consumed
2831 -6 to 0 => malformed UTF-8 character at offset = (-return)
2832 */
2833
2834 static int
utf82ord(PCRE2_SPTR8 utf8bytes,uint32_t * vptr)2835 utf82ord(PCRE2_SPTR8 utf8bytes, uint32_t *vptr)
2836 {
2837 uint32_t c = *utf8bytes++;
2838 uint32_t d = c;
2839 int i, j, s;
2840
2841 for (i = -1; i < 6; i++) /* i is number of additional bytes */
2842 {
2843 if ((d & 0x80) == 0) break;
2844 d <<= 1;
2845 }
2846
2847 if (i == -1) { *vptr = c; return 1; } /* ascii character */
2848 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
2849
2850 /* i now has a value in the range 1-5 */
2851
2852 s = 6*i;
2853 d = (c & utf8_table3[i]) << s;
2854
2855 for (j = 0; j < i; j++)
2856 {
2857 c = *utf8bytes++;
2858 if ((c & 0xc0) != 0x80) return -(j+1);
2859 s -= 6;
2860 d |= (c & 0x3f) << s;
2861 }
2862
2863 /* Check that encoding was the correct unique one */
2864
2865 for (j = 0; j < utf8_table1_size; j++)
2866 if (d <= (uint32_t)utf8_table1[j]) break;
2867 if (j != i) return -(i+1);
2868
2869 /* Valid value */
2870
2871 *vptr = d;
2872 return i+1;
2873 }
2874
2875
2876
2877 /*************************************************
2878 * Print one character *
2879 *************************************************/
2880
2881 /* Print a single character either literally, or as a hex escape, and count how
2882 many printed characters are used.
2883
2884 Arguments:
2885 c the character
2886 utf TRUE in UTF mode
2887 f the FILE to print to, or NULL just to count characters
2888
2889 Returns: number of characters written
2890 */
2891
2892 static int
pchar(uint32_t c,BOOL utf,FILE * f)2893 pchar(uint32_t c, BOOL utf, FILE *f)
2894 {
2895 int n = 0;
2896 char tempbuffer[16];
2897
2898 if (PRINTOK(c))
2899 {
2900 if (f != NULL) fprintf(f, "%c", c);
2901 return 1;
2902 }
2903
2904 if (c < 0x100)
2905 {
2906 if (utf)
2907 {
2908 if (f != NULL) fprintf(f, "\\x{%02x}", c);
2909 return 6;
2910 }
2911 else
2912 {
2913 if (f != NULL) fprintf(f, "\\x%02x", c);
2914 return 4;
2915 }
2916 }
2917
2918 if (f != NULL) n = fprintf(f, "\\x{%02x}", c);
2919 else n = sprintf(tempbuffer, "\\x{%02x}", c);
2920
2921 return n >= 0 ? n : 0;
2922 }
2923
2924
2925
2926 #ifdef SUPPORT_PCRE2_16
2927 /*************************************************
2928 * Find length of 0-terminated 16-bit string *
2929 *************************************************/
2930
strlen16(PCRE2_SPTR16 p)2931 static size_t strlen16(PCRE2_SPTR16 p)
2932 {
2933 PCRE2_SPTR16 pp = p;
2934 while (*pp != 0) pp++;
2935 return (int)(pp - p);
2936 }
2937 #endif /* SUPPORT_PCRE2_16 */
2938
2939
2940
2941 #ifdef SUPPORT_PCRE2_32
2942 /*************************************************
2943 * Find length of 0-terminated 32-bit string *
2944 *************************************************/
2945
strlen32(PCRE2_SPTR32 p)2946 static size_t strlen32(PCRE2_SPTR32 p)
2947 {
2948 PCRE2_SPTR32 pp = p;
2949 while (*pp != 0) pp++;
2950 return (int)(pp - p);
2951 }
2952 #endif /* SUPPORT_PCRE2_32 */
2953
2954
2955 #ifdef SUPPORT_PCRE2_8
2956 /*************************************************
2957 * Print 8-bit character string *
2958 *************************************************/
2959
2960 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
2961 For printing *MARK strings, a negative length is given. If handed a NULL file,
2962 just counts chars without printing (because pchar() does that). */
2963
pchars8(PCRE2_SPTR8 p,int length,BOOL utf,FILE * f)2964 static int pchars8(PCRE2_SPTR8 p, int length, BOOL utf, FILE *f)
2965 {
2966 uint32_t c = 0;
2967 int yield = 0;
2968
2969 if (length < 0) length = p[-1];
2970 while (length-- > 0)
2971 {
2972 if (utf)
2973 {
2974 int rc = utf82ord(p, &c);
2975 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
2976 {
2977 length -= rc - 1;
2978 p += rc;
2979 yield += pchar(c, utf, f);
2980 continue;
2981 }
2982 }
2983 c = *p++;
2984 yield += pchar(c, utf, f);
2985 }
2986
2987 return yield;
2988 }
2989 #endif
2990
2991
2992 #ifdef SUPPORT_PCRE2_16
2993 /*************************************************
2994 * Print 16-bit character string *
2995 *************************************************/
2996
2997 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
2998 For printing *MARK strings, a negative length is given. If handed a NULL file,
2999 just counts chars without printing. */
3000
pchars16(PCRE2_SPTR16 p,int length,BOOL utf,FILE * f)3001 static int pchars16(PCRE2_SPTR16 p, int length, BOOL utf, FILE *f)
3002 {
3003 int yield = 0;
3004 if (length < 0) length = p[-1];
3005 while (length-- > 0)
3006 {
3007 uint32_t c = *p++ & 0xffff;
3008 if (utf && c >= 0xD800 && c < 0xDC00 && length > 0)
3009 {
3010 int d = *p & 0xffff;
3011 if (d >= 0xDC00 && d <= 0xDFFF)
3012 {
3013 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
3014 length--;
3015 p++;
3016 }
3017 }
3018 yield += pchar(c, utf, f);
3019 }
3020 return yield;
3021 }
3022 #endif /* SUPPORT_PCRE2_16 */
3023
3024
3025
3026 #ifdef SUPPORT_PCRE2_32
3027 /*************************************************
3028 * Print 32-bit character string *
3029 *************************************************/
3030
3031 /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
3032 For printing *MARK strings, a negative length is given. If handed a NULL file,
3033 just counts chars without printing. */
3034
pchars32(PCRE2_SPTR32 p,int length,BOOL utf,FILE * f)3035 static int pchars32(PCRE2_SPTR32 p, int length, BOOL utf, FILE *f)
3036 {
3037 int yield = 0;
3038 (void)(utf); /* Avoid compiler warning */
3039
3040 if (length < 0) length = p[-1];
3041 while (length-- > 0)
3042 {
3043 uint32_t c = *p++;
3044 yield += pchar(c, utf, f);
3045 }
3046 return yield;
3047 }
3048 #endif /* SUPPORT_PCRE2_32 */
3049
3050
3051
3052
3053 /*************************************************
3054 * Convert character value to UTF-8 *
3055 *************************************************/
3056
3057 /* This function takes an integer value in the range 0 - 0x7fffffff
3058 and encodes it as a UTF-8 character in 0 to 6 bytes. It is needed even when the
3059 8-bit library is not supported, to generate UTF-8 output for non-ASCII
3060 characters.
3061
3062 Arguments:
3063 cvalue the character value
3064 utf8bytes pointer to buffer for result - at least 6 bytes long
3065
3066 Returns: number of characters placed in the buffer
3067 */
3068
3069 static int
ord2utf8(uint32_t cvalue,uint8_t * utf8bytes)3070 ord2utf8(uint32_t cvalue, uint8_t *utf8bytes)
3071 {
3072 int i, j;
3073 if (cvalue > 0x7fffffffu)
3074 return -1;
3075 for (i = 0; i < utf8_table1_size; i++)
3076 if (cvalue <= (uint32_t)utf8_table1[i]) break;
3077 utf8bytes += i;
3078 for (j = i; j > 0; j--)
3079 {
3080 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
3081 cvalue >>= 6;
3082 }
3083 *utf8bytes = utf8_table2[i] | cvalue;
3084 return i + 1;
3085 }
3086
3087
3088
3089 #ifdef SUPPORT_PCRE2_16
3090 /*************************************************
3091 * Convert string to 16-bit *
3092 *************************************************/
3093
3094 /* In UTF mode the input is always interpreted as a string of UTF-8 bytes using
3095 the original UTF-8 definition of RFC 2279, which allows for up to 6 bytes, and
3096 code values from 0 to 0x7fffffff. However, values greater than the later UTF
3097 limit of 0x10ffff cause an error. In non-UTF mode the input is interpreted as
3098 UTF-8 if the utf8_input modifier is set, but an error is generated for values
3099 greater than 0xffff.
3100
3101 If all the input bytes are ASCII, the space needed for a 16-bit string is
3102 exactly double the 8-bit size. Otherwise, the size needed for a 16-bit string
3103 is no more than double, because up to 0xffff uses no more than 3 bytes in UTF-8
3104 but possibly 4 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes
3105 in UTF-16. The result is always left in pbuffer16. Impose a minimum size to
3106 save repeated re-sizing.
3107
3108 Note that this function does not object to surrogate values. This is
3109 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
3110 for the purpose of testing that they are correctly faulted.
3111
3112 Arguments:
3113 p points to a byte string
3114 utf true in UTF mode
3115 lenptr points to number of bytes in the string (excluding trailing zero)
3116
3117 Returns: 0 on success, with the length updated to the number of 16-bit
3118 data items used (excluding the trailing zero)
3119 OR -1 if a UTF-8 string is malformed
3120 OR -2 if a value > 0x10ffff is encountered in UTF mode
3121 OR -3 if a value > 0xffff is encountered when not in UTF mode
3122 */
3123
3124 static PCRE2_SIZE
to16(uint8_t * p,int utf,PCRE2_SIZE * lenptr)3125 to16(uint8_t *p, int utf, PCRE2_SIZE *lenptr)
3126 {
3127 uint16_t *pp;
3128 PCRE2_SIZE len = *lenptr;
3129
3130 if (pbuffer16_size < 2*len + 2)
3131 {
3132 if (pbuffer16 != NULL) free(pbuffer16);
3133 pbuffer16_size = 2*len + 2;
3134 if (pbuffer16_size < 4096) pbuffer16_size = 4096;
3135 pbuffer16 = (uint16_t *)malloc(pbuffer16_size);
3136 if (pbuffer16 == NULL)
3137 {
3138 fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer16\n",
3139 SIZ_CAST pbuffer16_size);
3140 exit(1);
3141 }
3142 }
3143
3144 pp = pbuffer16;
3145 if (!utf && (pat_patctl.control & CTL_UTF8_INPUT) == 0)
3146 {
3147 for (; len > 0; len--) *pp++ = *p++;
3148 }
3149 else while (len > 0)
3150 {
3151 uint32_t c;
3152 int chlen = utf82ord(p, &c);
3153 if (chlen <= 0) return -1;
3154 if (!utf && c > 0xffff) return -3;
3155 if (c > 0x10ffff) return -2;
3156 p += chlen;
3157 len -= chlen;
3158 if (c < 0x10000) *pp++ = c; else
3159 {
3160 c -= 0x10000;
3161 *pp++ = 0xD800 | (c >> 10);
3162 *pp++ = 0xDC00 | (c & 0x3ff);
3163 }
3164 }
3165
3166 *pp = 0;
3167 *lenptr = pp - pbuffer16;
3168 return 0;
3169 }
3170 #endif
3171
3172
3173
3174 #ifdef SUPPORT_PCRE2_32
3175 /*************************************************
3176 * Convert string to 32-bit *
3177 *************************************************/
3178
3179 /* In UTF mode the input is always interpreted as a string of UTF-8 bytes using
3180 the original UTF-8 definition of RFC 2279, which allows for up to 6 bytes, and
3181 code values from 0 to 0x7fffffff. However, values greater than the later UTF
3182 limit of 0x10ffff cause an error.
3183
3184 In non-UTF mode the input is interpreted as UTF-8 if the utf8_input modifier
3185 is set, and no limit is imposed. There is special interpretation of the 0xff
3186 byte (which is illegal in UTF-8) in this case: it causes the top bit of the
3187 next character to be set. This provides a way of generating 32-bit characters
3188 greater than 0x7fffffff.
3189
3190 If all the input bytes are ASCII, the space needed for a 32-bit string is
3191 exactly four times the 8-bit size. Otherwise, the size needed for a 32-bit
3192 string is no more than four times, because the number of characters must be
3193 less than the number of bytes. The result is always left in pbuffer32. Impose a
3194 minimum size to save repeated re-sizing.
3195
3196 Note that this function does not object to surrogate values. This is
3197 deliberate; it makes it possible to construct UTF-32 strings that are invalid,
3198 for the purpose of testing that they are correctly faulted.
3199
3200 Arguments:
3201 p points to a byte string
3202 utf true in UTF mode
3203 lenptr points to number of bytes in the string (excluding trailing zero)
3204
3205 Returns: 0 on success, with the length updated to the number of 32-bit
3206 data items used (excluding the trailing zero)
3207 OR -1 if a UTF-8 string is malformed
3208 OR -2 if a value > 0x10ffff is encountered in UTF mode
3209 */
3210
3211 static PCRE2_SIZE
to32(uint8_t * p,int utf,PCRE2_SIZE * lenptr)3212 to32(uint8_t *p, int utf, PCRE2_SIZE *lenptr)
3213 {
3214 uint32_t *pp;
3215 PCRE2_SIZE len = *lenptr;
3216
3217 if (pbuffer32_size < 4*len + 4)
3218 {
3219 if (pbuffer32 != NULL) free(pbuffer32);
3220 pbuffer32_size = 4*len + 4;
3221 if (pbuffer32_size < 8192) pbuffer32_size = 8192;
3222 pbuffer32 = (uint32_t *)malloc(pbuffer32_size);
3223 if (pbuffer32 == NULL)
3224 {
3225 fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer32\n",
3226 SIZ_CAST pbuffer32_size);
3227 exit(1);
3228 }
3229 }
3230
3231 pp = pbuffer32;
3232
3233 if (!utf && (pat_patctl.control & CTL_UTF8_INPUT) == 0)
3234 {
3235 for (; len > 0; len--) *pp++ = *p++;
3236 }
3237
3238 else while (len > 0)
3239 {
3240 int chlen;
3241 uint32_t c;
3242 uint32_t topbit = 0;
3243 if (!utf && *p == 0xff && len > 1)
3244 {
3245 topbit = 0x80000000u;
3246 p++;
3247 len--;
3248 }
3249 chlen = utf82ord(p, &c);
3250 if (chlen <= 0) return -1;
3251 if (utf && c > 0x10ffff) return -2;
3252 p += chlen;
3253 len -= chlen;
3254 *pp++ = c | topbit;
3255 }
3256
3257 *pp = 0;
3258 *lenptr = pp - pbuffer32;
3259 return 0;
3260 }
3261 #endif /* SUPPORT_PCRE2_32 */
3262
3263
3264
3265 /*************************************************
3266 * Move back by so many characters *
3267 *************************************************/
3268
3269 /* Given a code unit offset in a subject string, move backwards by a number of
3270 characters, and return the resulting offset.
3271
3272 Arguments:
3273 subject pointer to the string
3274 offset start offset
3275 count count to move back by
3276 utf TRUE if in UTF mode
3277
3278 Returns: a possibly changed offset
3279 */
3280
3281 static PCRE2_SIZE
backchars(uint8_t * subject,PCRE2_SIZE offset,uint32_t count,BOOL utf)3282 backchars(uint8_t *subject, PCRE2_SIZE offset, uint32_t count, BOOL utf)
3283 {
3284 if (!utf || test_mode == PCRE32_MODE)
3285 return (count >= offset)? 0 : (offset - count);
3286
3287 else if (test_mode == PCRE8_MODE)
3288 {
3289 PCRE2_SPTR8 pp = (PCRE2_SPTR8)subject + offset;
3290 for (; count > 0 && pp > (PCRE2_SPTR8)subject; count--)
3291 {
3292 pp--;
3293 while ((*pp & 0xc0) == 0x80) pp--;
3294 }
3295 return pp - (PCRE2_SPTR8)subject;
3296 }
3297
3298 else /* 16-bit mode */
3299 {
3300 PCRE2_SPTR16 pp = (PCRE2_SPTR16)subject + offset;
3301 for (; count > 0 && pp > (PCRE2_SPTR16)subject; count--)
3302 {
3303 pp--;
3304 if ((*pp & 0xfc00) == 0xdc00) pp--;
3305 }
3306 return pp - (PCRE2_SPTR16)subject;
3307 }
3308 }
3309
3310
3311
3312 /*************************************************
3313 * Expand input buffers *
3314 *************************************************/
3315
3316 /* This function doubles the size of the input buffer and the buffer for
3317 keeping an 8-bit copy of patterns (pbuffer8), and copies the current buffers to
3318 the new ones.
3319
3320 Arguments: none
3321 Returns: nothing (aborts if malloc() fails)
3322 */
3323
3324 static void
expand_input_buffers(void)3325 expand_input_buffers(void)
3326 {
3327 int new_pbuffer8_size = 2*pbuffer8_size;
3328 uint8_t *new_buffer = (uint8_t *)malloc(new_pbuffer8_size);
3329 uint8_t *new_pbuffer8 = (uint8_t *)malloc(new_pbuffer8_size);
3330
3331 if (new_buffer == NULL || new_pbuffer8 == NULL)
3332 {
3333 fprintf(stderr, "pcre2test: malloc(%d) failed\n", new_pbuffer8_size);
3334 exit(1);
3335 }
3336
3337 memcpy(new_buffer, buffer, pbuffer8_size);
3338 memcpy(new_pbuffer8, pbuffer8, pbuffer8_size);
3339
3340 pbuffer8_size = new_pbuffer8_size;
3341
3342 free(buffer);
3343 free(pbuffer8);
3344
3345 buffer = new_buffer;
3346 pbuffer8 = new_pbuffer8;
3347 }
3348
3349
3350
3351 /*************************************************
3352 * Read or extend an input line *
3353 *************************************************/
3354
3355 /* Input lines are read into buffer, but both patterns and data lines can be
3356 continued over multiple input lines. In addition, if the buffer fills up, we
3357 want to automatically expand it so as to be able to handle extremely large
3358 lines that are needed for certain stress tests, although this is less likely
3359 now that there are repetition features for both patterns and data. When the
3360 input buffer is expanded, the other two buffers must also be expanded likewise,
3361 and the contents of pbuffer, which are a copy of the input for callouts, must
3362 be preserved (for when expansion happens for a data line). This is not the most
3363 optimal way of handling this, but hey, this is just a test program!
3364
3365 Arguments:
3366 f the file to read
3367 start where in buffer to start (this *must* be within buffer)
3368 prompt for stdin or readline()
3369
3370 Returns: pointer to the start of new data
3371 could be a copy of start, or could be moved
3372 NULL if no data read and EOF reached
3373 */
3374
3375 static uint8_t *
extend_inputline(FILE * f,uint8_t * start,const char * prompt)3376 extend_inputline(FILE *f, uint8_t *start, const char *prompt)
3377 {
3378 uint8_t *here = start;
3379
3380 for (;;)
3381 {
3382 size_t rlen = (size_t)(pbuffer8_size - (here - buffer));
3383
3384 if (rlen > 1000)
3385 {
3386 size_t dlen;
3387
3388 /* If libreadline or libedit support is required, use readline() to read a
3389 line if the input is a terminal. Note that readline() removes the trailing
3390 newline, so we must put it back again, to be compatible with fgets(). */
3391
3392 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
3393 if (INTERACTIVE(f))
3394 {
3395 size_t len;
3396 char *s = readline(prompt);
3397 if (s == NULL) return (here == start)? NULL : start;
3398 len = strlen(s);
3399 if (len > 0) add_history(s);
3400 if (len > rlen - 1) len = rlen - 1;
3401 memcpy(here, s, len);
3402 here[len] = '\n';
3403 here[len+1] = 0;
3404 free(s);
3405 }
3406 else
3407 #endif
3408
3409 /* Read the next line by normal means, prompting if the file is a tty. */
3410
3411 {
3412 if (INTERACTIVE(f)) printf("%s", prompt);
3413 if (fgets((char *)here, rlen, f) == NULL)
3414 return (here == start)? NULL : start;
3415 }
3416
3417 dlen = strlen((char *)here);
3418 here += dlen;
3419
3420 /* Check for end of line reached. Take care not to read data from before
3421 start (dlen will be zero for a file starting with a binary zero). */
3422
3423 if (here > start && here[-1] == '\n') return start;
3424
3425 /* If we have not read a newline when reading a file, we have either filled
3426 the buffer or reached the end of the file. We can detect the former by
3427 checking that the string fills the buffer, and the latter by feof(). If
3428 neither of these is true, it means we read a binary zero which has caused
3429 strlen() to give a short length. This is a hard error because pcre2test
3430 expects to work with C strings. */
3431
3432 if (!INTERACTIVE(f) && dlen < rlen - 1 && !feof(f))
3433 {
3434 fprintf(outfile, "** Binary zero encountered in input\n");
3435 fprintf(outfile, "** pcre2test run abandoned\n");
3436 exit(1);
3437 }
3438 }
3439
3440 else
3441 {
3442 size_t start_offset = start - buffer;
3443 size_t here_offset = here - buffer;
3444 expand_input_buffers();
3445 start = buffer + start_offset;
3446 here = buffer + here_offset;
3447 }
3448 }
3449
3450 /* Control never gets here */
3451 }
3452
3453
3454
3455 /*************************************************
3456 * Case-independent strncmp() function *
3457 *************************************************/
3458
3459 /*
3460 Arguments:
3461 s first string
3462 t second string
3463 n number of characters to compare
3464
3465 Returns: < 0, = 0, or > 0, according to the comparison
3466 */
3467
3468 static int
strncmpic(const uint8_t * s,const uint8_t * t,int n)3469 strncmpic(const uint8_t *s, const uint8_t *t, int n)
3470 {
3471 while (n--)
3472 {
3473 int c = tolower(*s++) - tolower(*t++);
3474 if (c != 0) return c;
3475 }
3476 return 0;
3477 }
3478
3479
3480
3481 /*************************************************
3482 * Scan the main modifier list *
3483 *************************************************/
3484
3485 /* This function searches the modifier list for a long modifier name.
3486
3487 Argument:
3488 p start of the name
3489 lenp length of the name
3490
3491 Returns: an index in the modifier list, or -1 on failure
3492 */
3493
3494 static int
scan_modifiers(const uint8_t * p,unsigned int len)3495 scan_modifiers(const uint8_t *p, unsigned int len)
3496 {
3497 int bot = 0;
3498 int top = MODLISTCOUNT;
3499
3500 while (top > bot)
3501 {
3502 int mid = (bot + top)/2;
3503 unsigned int mlen = strlen(modlist[mid].name);
3504 int c = strncmp((char *)p, modlist[mid].name, (len < mlen)? len : mlen);
3505 if (c == 0)
3506 {
3507 if (len == mlen) return mid;
3508 c = (int)len - (int)mlen;
3509 }
3510 if (c > 0) bot = mid + 1; else top = mid;
3511 }
3512
3513 return -1;
3514
3515 }
3516
3517
3518
3519 /*************************************************
3520 * Check a modifer and find its field *
3521 *************************************************/
3522
3523 /* This function is called when a modifier has been identified. We check that
3524 it is allowed here and find the field that is to be changed.
3525
3526 Arguments:
3527 m the modifier list entry
3528 ctx CTX_PAT => pattern context
3529 CTX_POPPAT => pattern context for popped pattern
3530 CTX_DEFPAT => default pattern context
3531 CTX_DAT => data context
3532 CTX_DEFDAT => default data context
3533 pctl point to pattern control block
3534 dctl point to data control block
3535 c a single character or 0
3536
3537 Returns: a field pointer or NULL
3538 */
3539
3540 static void *
check_modifier(modstruct * m,int ctx,patctl * pctl,datctl * dctl,uint32_t c)3541 check_modifier(modstruct *m, int ctx, patctl *pctl, datctl *dctl, uint32_t c)
3542 {
3543 void *field = NULL;
3544 PCRE2_SIZE offset = m->offset;
3545
3546 if (restrict_for_perl_test) switch(m->which)
3547 {
3548 case MOD_PNDP:
3549 case MOD_PATP:
3550 case MOD_PDP:
3551 break;
3552
3553 default:
3554 fprintf(outfile, "** '%s' is not allowed in a Perl-compatible test\n",
3555 m->name);
3556 return NULL;
3557 }
3558
3559 switch (m->which)
3560 {
3561 case MOD_CTC: /* Compile context modifier */
3562 if (ctx == CTX_DEFPAT) field = PTR(default_pat_context);
3563 else if (ctx == CTX_PAT) field = PTR(pat_context);
3564 break;
3565
3566 case MOD_CTM: /* Match context modifier */
3567 if (ctx == CTX_DEFDAT) field = PTR(default_dat_context);
3568 else if (ctx == CTX_DAT) field = PTR(dat_context);
3569 break;
3570
3571 case MOD_DAT: /* Data line modifier */
3572 if (dctl != NULL) field = dctl;
3573 break;
3574
3575 case MOD_PAT: /* Pattern modifier */
3576 case MOD_PATP: /* Allowed for Perl test */
3577 if (pctl != NULL) field = pctl;
3578 break;
3579
3580 case MOD_PD: /* Pattern or data line modifier */
3581 case MOD_PDP: /* Ditto, allowed for Perl test */
3582 case MOD_PND: /* Ditto, but not default pattern */
3583 case MOD_PNDP: /* Ditto, allowed for Perl test */
3584 if (dctl != NULL) field = dctl;
3585 else if (pctl != NULL && (m->which == MOD_PD || m->which == MOD_PDP ||
3586 ctx != CTX_DEFPAT))
3587 field = pctl;
3588 break;
3589 }
3590
3591 if (field == NULL)
3592 {
3593 if (c == 0)
3594 fprintf(outfile, "** '%s' is not valid here\n", m->name);
3595 else
3596 fprintf(outfile, "** /%c is not valid here\n", c);
3597 return NULL;
3598 }
3599
3600 return (char *)field + offset;
3601 }
3602
3603
3604
3605 /*************************************************
3606 * Decode a modifier list *
3607 *************************************************/
3608
3609 /* A pointer to a control block is NULL when called in cases when that block is
3610 not relevant. They are never all relevant in one call. At least one of patctl
3611 and datctl is NULL. The second argument specifies which context to use for
3612 modifiers that apply to contexts.
3613
3614 Arguments:
3615 p point to modifier string
3616 ctx CTX_PAT => pattern context
3617 CTX_POPPAT => pattern context for popped pattern
3618 CTX_DEFPAT => default pattern context
3619 CTX_DAT => data context
3620 CTX_DEFDAT => default data context
3621 pctl point to pattern control block
3622 dctl point to data control block
3623
3624 Returns: TRUE if successful decode, FALSE otherwise
3625 */
3626
3627 static BOOL
decode_modifiers(uint8_t * p,int ctx,patctl * pctl,datctl * dctl)3628 decode_modifiers(uint8_t *p, int ctx, patctl *pctl, datctl *dctl)
3629 {
3630 uint8_t *ep, *pp;
3631 long li;
3632 unsigned long uli;
3633 BOOL first = TRUE;
3634
3635 for (;;)
3636 {
3637 void *field;
3638 modstruct *m;
3639 BOOL off = FALSE;
3640 unsigned int i, len;
3641 int index;
3642 char *endptr;
3643
3644 /* Skip white space and commas. */
3645
3646 while (isspace(*p) || *p == ',') p++;
3647 if (*p == 0) break;
3648
3649 /* Find the end of the item; lose trailing whitespace at end of line. */
3650
3651 for (ep = p; *ep != 0 && *ep != ','; ep++);
3652 if (*ep == 0)
3653 {
3654 while (ep > p && isspace(ep[-1])) ep--;
3655 *ep = 0;
3656 }
3657
3658 /* Remember if the first character is '-'. */
3659
3660 if (*p == '-')
3661 {
3662 off = TRUE;
3663 p++;
3664 }
3665
3666 /* Find the length of a full-length modifier name, and scan for it. */
3667
3668 pp = p;
3669 while (pp < ep && *pp != '=') pp++;
3670 index = scan_modifiers(p, pp - p);
3671
3672 /* If the first modifier is unrecognized, try to interpret it as a sequence
3673 of single-character abbreviated modifiers. None of these modifiers have any
3674 associated data. They just set options or control bits. */
3675
3676 if (index < 0)
3677 {
3678 uint32_t cc;
3679 uint8_t *mp = p;
3680
3681 if (!first)
3682 {
3683 fprintf(outfile, "** Unrecognized modifier '%.*s'\n", (int)(ep-p), p);
3684 if (ep - p == 1)
3685 fprintf(outfile, "** Single-character modifiers must come first\n");
3686 return FALSE;
3687 }
3688
3689 for (cc = *p; cc != ',' && cc != '\n' && cc != 0; cc = *(++p))
3690 {
3691 for (i = 0; i < C1MODLISTCOUNT; i++)
3692 if (cc == c1modlist[i].onechar) break;
3693
3694 if (i >= C1MODLISTCOUNT)
3695 {
3696 fprintf(outfile, "** Unrecognized modifier '%c' in '%.*s'\n",
3697 *p, (int)(ep-mp), mp);
3698 return FALSE;
3699 }
3700
3701 if (c1modlist[i].index >= 0)
3702 {
3703 index = c1modlist[i].index;
3704 }
3705
3706 else
3707 {
3708 index = scan_modifiers((uint8_t *)(c1modlist[i].fullname),
3709 strlen(c1modlist[i].fullname));
3710 if (index < 0)
3711 {
3712 fprintf(outfile, "** Internal error: single-character equivalent "
3713 "modifier '%s' not found\n", c1modlist[i].fullname);
3714 return FALSE;
3715 }
3716 c1modlist[i].index = index; /* Cache for next time */
3717 }
3718
3719 field = check_modifier(modlist + index, ctx, pctl, dctl, *p);
3720 if (field == NULL) return FALSE;
3721
3722 /* /x is a special case; a second appearance changes PCRE2_EXTENDED to
3723 PCRE2_EXTENDED_MORE. */
3724
3725 if (cc == 'x' && (*((uint32_t *)field) & PCRE2_EXTENDED) != 0)
3726 {
3727 *((uint32_t *)field) &= ~PCRE2_EXTENDED;
3728 *((uint32_t *)field) |= PCRE2_EXTENDED_MORE;
3729 }
3730 else
3731 *((uint32_t *)field) |= modlist[index].value;
3732 }
3733
3734 continue; /* With tne next (fullname) modifier */
3735 }
3736
3737 /* We have a match on a full-name modifier. Check for the existence of data
3738 when needed. */
3739
3740 m = modlist + index; /* Save typing */
3741 if (m->type != MOD_CTL && m->type != MOD_OPT &&
3742 (m->type != MOD_IND || *pp == '='))
3743 {
3744 if (*pp++ != '=')
3745 {
3746 fprintf(outfile, "** '=' expected after '%s'\n", m->name);
3747 return FALSE;
3748 }
3749 if (off)
3750 {
3751 fprintf(outfile, "** '-' is not valid for '%s'\n", m->name);
3752 return FALSE;
3753 }
3754 }
3755
3756 /* These on/off types have no data. */
3757
3758 else if (*pp != ',' && *pp != '\n' && *pp != ' ' && *pp != 0)
3759 {
3760 fprintf(outfile, "** Unrecognized modifier '%.*s'\n", (int)(ep-p), p);
3761 return FALSE;
3762 }
3763
3764 /* Set the data length for those types that have data. Then find the field
3765 that is to be set. If check_modifier() returns NULL, it has already output an
3766 error message. */
3767
3768 len = ep - pp;
3769 field = check_modifier(m, ctx, pctl, dctl, 0);
3770 if (field == NULL) return FALSE;
3771
3772 /* Process according to data type. */
3773
3774 switch (m->type)
3775 {
3776 case MOD_CTL:
3777 case MOD_OPT:
3778 if (off) *((uint32_t *)field) &= ~m->value;
3779 else *((uint32_t *)field) |= m->value;
3780 break;
3781
3782 case MOD_BSR:
3783 if (len == 7 && strncmpic(pp, (const uint8_t *)"default", 7) == 0)
3784 {
3785 #ifdef BSR_ANYCRLF
3786 *((uint16_t *)field) = PCRE2_BSR_ANYCRLF;
3787 #else
3788 *((uint16_t *)field) = PCRE2_BSR_UNICODE;
3789 #endif
3790 if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 &= ~CTL2_BSR_SET;
3791 else dctl->control2 &= ~CTL2_BSR_SET;
3792 }
3793 else
3794 {
3795 if (len == 7 && strncmpic(pp, (const uint8_t *)"anycrlf", 7) == 0)
3796 *((uint16_t *)field) = PCRE2_BSR_ANYCRLF;
3797 else if (len == 7 && strncmpic(pp, (const uint8_t *)"unicode", 7) == 0)
3798 *((uint16_t *)field) = PCRE2_BSR_UNICODE;
3799 else goto INVALID_VALUE;
3800 if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 |= CTL2_BSR_SET;
3801 else dctl->control2 |= CTL2_BSR_SET;
3802 }
3803 pp = ep;
3804 break;
3805
3806 case MOD_CHR: /* A single character */
3807 *((uint32_t *)field) = *pp++;
3808 break;
3809
3810 case MOD_CON: /* A convert type/options list */
3811 for (;; pp++)
3812 {
3813 uint8_t *colon = (uint8_t *)strchr((const char *)pp, ':');
3814 len = ((colon != NULL && colon < ep)? colon:ep) - pp;
3815 for (i = 0; i < convertlistcount; i++)
3816 {
3817 if (strncmpic(pp, (const uint8_t *)convertlist[i].name, len) == 0)
3818 {
3819 if (*((uint32_t *)field) == CONVERT_UNSET)
3820 *((uint32_t *)field) = convertlist[i].option;
3821 else
3822 *((uint32_t *)field) |= convertlist[i].option;
3823 break;
3824 }
3825 }
3826 if (i >= convertlistcount) goto INVALID_VALUE;
3827 pp += len;
3828 if (*pp != ':') break;
3829 }
3830 break;
3831
3832 case MOD_IN2: /* One or two unsigned integers */
3833 if (!isdigit(*pp)) goto INVALID_VALUE;
3834 uli = strtoul((const char *)pp, &endptr, 10);
3835 if (U32OVERFLOW(uli)) goto INVALID_VALUE;
3836 ((uint32_t *)field)[0] = (uint32_t)uli;
3837 if (*endptr == ':')
3838 {
3839 uli = strtoul((const char *)endptr+1, &endptr, 10);
3840 if (U32OVERFLOW(uli)) goto INVALID_VALUE;
3841 ((uint32_t *)field)[1] = (uint32_t)uli;
3842 }
3843 else ((uint32_t *)field)[1] = 0;
3844 pp = (uint8_t *)endptr;
3845 break;
3846
3847 /* PCRE2_SIZE_MAX is usually SIZE_MAX, which may be greater, equal to, or
3848 less than ULONG_MAX. So first test for overflowing the long int, and then
3849 test for overflowing PCRE2_SIZE_MAX if it is smaller than ULONG_MAX. */
3850
3851 case MOD_SIZ: /* PCRE2_SIZE value */
3852 if (!isdigit(*pp)) goto INVALID_VALUE;
3853 uli = strtoul((const char *)pp, &endptr, 10);
3854 if (uli == ULONG_MAX) goto INVALID_VALUE;
3855 #if ULONG_MAX > PCRE2_SIZE_MAX
3856 if (uli > PCRE2_SIZE_MAX) goto INVALID_VALUE;
3857 #endif
3858 *((PCRE2_SIZE *)field) = (PCRE2_SIZE)uli;
3859 pp = (uint8_t *)endptr;
3860 break;
3861
3862 case MOD_IND: /* Unsigned integer with default */
3863 if (len == 0)
3864 {
3865 *((uint32_t *)field) = (uint32_t)(m->value);
3866 break;
3867 }
3868 /* Fall through */
3869
3870 case MOD_INT: /* Unsigned integer */
3871 if (!isdigit(*pp)) goto INVALID_VALUE;
3872 uli = strtoul((const char *)pp, &endptr, 10);
3873 if (U32OVERFLOW(uli)) goto INVALID_VALUE;
3874 *((uint32_t *)field) = (uint32_t)uli;
3875 pp = (uint8_t *)endptr;
3876 break;
3877
3878 case MOD_INS: /* Signed integer */
3879 if (!isdigit(*pp) && *pp != '-') goto INVALID_VALUE;
3880 li = strtol((const char *)pp, &endptr, 10);
3881 if (S32OVERFLOW(li)) goto INVALID_VALUE;
3882 *((int32_t *)field) = (int32_t)li;
3883 pp = (uint8_t *)endptr;
3884 break;
3885
3886 case MOD_NL:
3887 for (i = 0; i < sizeof(newlines)/sizeof(char *); i++)
3888 if (len == strlen(newlines[i]) &&
3889 strncmpic(pp, (const uint8_t *)newlines[i], len) == 0) break;
3890 if (i >= sizeof(newlines)/sizeof(char *)) goto INVALID_VALUE;
3891 if (i == 0)
3892 {
3893 *((uint16_t *)field) = NEWLINE_DEFAULT;
3894 if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 &= ~CTL2_NL_SET;
3895 else dctl->control2 &= ~CTL2_NL_SET;
3896 }
3897 else
3898 {
3899 *((uint16_t *)field) = i;
3900 if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 |= CTL2_NL_SET;
3901 else dctl->control2 |= CTL2_NL_SET;
3902 }
3903 pp = ep;
3904 break;
3905
3906 case MOD_NN: /* Name or (signed) number; may be several */
3907 if (isdigit(*pp) || *pp == '-')
3908 {
3909 int ct = MAXCPYGET - 1;
3910 int32_t value;
3911 li = strtol((const char *)pp, &endptr, 10);
3912 if (S32OVERFLOW(li)) goto INVALID_VALUE;
3913 value = (int32_t)li;
3914 field = (char *)field - m->offset + m->value; /* Adjust field ptr */
3915 if (value >= 0) /* Add new number */
3916 {
3917 while (*((int32_t *)field) >= 0 && ct-- > 0) /* Skip previous */
3918 field = (char *)field + sizeof(int32_t);
3919 if (ct <= 0)
3920 {
3921 fprintf(outfile, "** Too many numeric '%s' modifiers\n", m->name);
3922 return FALSE;
3923 }
3924 }
3925 *((int32_t *)field) = value;
3926 if (ct > 0) ((int32_t *)field)[1] = -1;
3927 pp = (uint8_t *)endptr;
3928 }
3929
3930 /* Multiple strings are put end to end. */
3931
3932 else
3933 {
3934 char *nn = (char *)field;
3935 if (len > 0) /* Add new name */
3936 {
3937 if (len > MAX_NAME_SIZE)
3938 {
3939 fprintf(outfile, "** Group name in '%s' is too long\n", m->name);
3940 return FALSE;
3941 }
3942 while (*nn != 0) nn += strlen(nn) + 1;
3943 if (nn + len + 2 - (char *)field > LENCPYGET)
3944 {
3945 fprintf(outfile, "** Too many characters in named '%s' modifiers\n",
3946 m->name);
3947 return FALSE;
3948 }
3949 memcpy(nn, pp, len);
3950 }
3951 nn[len] = 0 ;
3952 nn[len+1] = 0;
3953 pp = ep;
3954 }
3955 break;
3956
3957 case MOD_STR:
3958 if (len + 1 > m->value)
3959 {
3960 fprintf(outfile, "** Overlong value for '%s' (max %d code units)\n",
3961 m->name, m->value - 1);
3962 return FALSE;
3963 }
3964 memcpy(field, pp, len);
3965 ((uint8_t *)field)[len] = 0;
3966 pp = ep;
3967 break;
3968 }
3969
3970 if (*pp != ',' && *pp != '\n' && *pp != ' ' && *pp != 0)
3971 {
3972 fprintf(outfile, "** Comma expected after modifier item '%s'\n", m->name);
3973 return FALSE;
3974 }
3975
3976 p = pp;
3977 first = FALSE;
3978
3979 if (ctx == CTX_POPPAT &&
3980 (pctl->options != 0 ||
3981 pctl->tables_id != 0 ||
3982 pctl->locale[0] != 0 ||
3983 (pctl->control & NOTPOP_CONTROLS) != 0))
3984 {
3985 fprintf(outfile, "** '%s' is not valid here\n", m->name);
3986 return FALSE;
3987 }
3988 }
3989
3990 return TRUE;
3991
3992 INVALID_VALUE:
3993 fprintf(outfile, "** Invalid value in '%.*s'\n", (int)(ep-p), p);
3994 return FALSE;
3995 }
3996
3997
3998 /*************************************************
3999 * Get info from a pattern *
4000 *************************************************/
4001
4002 /* A wrapped call to pcre2_pattern_info(), applied to the current compiled
4003 pattern.
4004
4005 Arguments:
4006 what code for the required information
4007 where where to put the answer
4008 unsetok PCRE2_ERROR_UNSET is an "expected" result
4009
4010 Returns: the return from pcre2_pattern_info()
4011 */
4012
4013 static int
pattern_info(int what,void * where,BOOL unsetok)4014 pattern_info(int what, void *where, BOOL unsetok)
4015 {
4016 int rc;
4017 PCRE2_PATTERN_INFO(rc, compiled_code, what, NULL); /* Exercise the code */
4018 PCRE2_PATTERN_INFO(rc, compiled_code, what, where);
4019 if (rc >= 0) return 0;
4020 if (rc != PCRE2_ERROR_UNSET || !unsetok)
4021 {
4022 fprintf(outfile, "Error %d from pcre2_pattern_info_%d(%d)\n", rc, test_mode,
4023 what);
4024 if (rc == PCRE2_ERROR_BADMODE)
4025 fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
4026 "%d-bit mode\n", test_mode,
4027 8 * (FLD(compiled_code, flags) & PCRE2_MODE_MASK));
4028 }
4029 return rc;
4030 }
4031
4032
4033
4034 #ifdef SUPPORT_PCRE2_8
4035 /*************************************************
4036 * Show something in a list *
4037 *************************************************/
4038
4039 /* This function just helps to keep the code that uses it tidier. It's used for
4040 various lists of things where there needs to be introductory text before the
4041 first item. As these calls are all in the POSIX-support code, they happen only
4042 when 8-bit mode is supported. */
4043
4044 static void
prmsg(const char ** msg,const char * s)4045 prmsg(const char **msg, const char *s)
4046 {
4047 fprintf(outfile, "%s %s", *msg, s);
4048 *msg = "";
4049 }
4050 #endif /* SUPPORT_PCRE2_8 */
4051
4052
4053
4054 /*************************************************
4055 * Show control bits *
4056 *************************************************/
4057
4058 /* Called for mutually exclusive controls and for unsupported POSIX controls.
4059 Because the bits are unique, this can be used for both pattern and data control
4060 words.
4061
4062 Arguments:
4063 controls control bits
4064 controls2 more control bits
4065 before text to print before
4066
4067 Returns: nothing
4068 */
4069
4070 static void
show_controls(uint32_t controls,uint32_t controls2,const char * before)4071 show_controls(uint32_t controls, uint32_t controls2, const char *before)
4072 {
4073 fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
4074 before,
4075 ((controls & CTL_AFTERTEXT) != 0)? " aftertext" : "",
4076 ((controls & CTL_ALLAFTERTEXT) != 0)? " allaftertext" : "",
4077 ((controls & CTL_ALLCAPTURES) != 0)? " allcaptures" : "",
4078 ((controls & CTL_ALLUSEDTEXT) != 0)? " allusedtext" : "",
4079 ((controls2 & CTL2_ALLVECTOR) != 0)? " allvector" : "",
4080 ((controls & CTL_ALTGLOBAL) != 0)? " altglobal" : "",
4081 ((controls & CTL_BINCODE) != 0)? " bincode" : "",
4082 ((controls2 & CTL2_BSR_SET) != 0)? " bsr" : "",
4083 ((controls & CTL_CALLOUT_CAPTURE) != 0)? " callout_capture" : "",
4084 ((controls2 & CTL2_CALLOUT_EXTRA) != 0)? " callout_extra" : "",
4085 ((controls & CTL_CALLOUT_INFO) != 0)? " callout_info" : "",
4086 ((controls & CTL_CALLOUT_NONE) != 0)? " callout_none" : "",
4087 ((controls2 & CTL2_CALLOUT_NO_WHERE) != 0)? " callout_no_where" : "",
4088 ((controls & CTL_DFA) != 0)? " dfa" : "",
4089 ((controls & CTL_EXPAND) != 0)? " expand" : "",
4090 ((controls & CTL_FINDLIMITS) != 0)? " find_limits" : "",
4091 ((controls & CTL_FRAMESIZE) != 0)? " framesize" : "",
4092 ((controls & CTL_FULLBINCODE) != 0)? " fullbincode" : "",
4093 ((controls & CTL_GETALL) != 0)? " getall" : "",
4094 ((controls & CTL_GLOBAL) != 0)? " global" : "",
4095 ((controls & CTL_HEXPAT) != 0)? " hex" : "",
4096 ((controls & CTL_INFO) != 0)? " info" : "",
4097 ((controls & CTL_JITFAST) != 0)? " jitfast" : "",
4098 ((controls & CTL_JITVERIFY) != 0)? " jitverify" : "",
4099 ((controls & CTL_MARK) != 0)? " mark" : "",
4100 ((controls & CTL_MEMORY) != 0)? " memory" : "",
4101 ((controls2 & CTL2_NL_SET) != 0)? " newline" : "",
4102 ((controls & CTL_NULLCONTEXT) != 0)? " null_context" : "",
4103 ((controls & CTL_POSIX) != 0)? " posix" : "",
4104 ((controls & CTL_POSIX_NOSUB) != 0)? " posix_nosub" : "",
4105 ((controls & CTL_PUSH) != 0)? " push" : "",
4106 ((controls & CTL_PUSHCOPY) != 0)? " pushcopy" : "",
4107 ((controls & CTL_PUSHTABLESCOPY) != 0)? " pushtablescopy" : "",
4108 ((controls & CTL_STARTCHAR) != 0)? " startchar" : "",
4109 ((controls2 & CTL2_SUBSTITUTE_CALLOUT) != 0)? " substitute_callout" : "",
4110 ((controls2 & CTL2_SUBSTITUTE_EXTENDED) != 0)? " substitute_extended" : "",
4111 ((controls2 & CTL2_SUBSTITUTE_OVERFLOW_LENGTH) != 0)? " substitute_overflow_length" : "",
4112 ((controls2 & CTL2_SUBSTITUTE_UNKNOWN_UNSET) != 0)? " substitute_unknown_unset" : "",
4113 ((controls2 & CTL2_SUBSTITUTE_UNSET_EMPTY) != 0)? " substitute_unset_empty" : "",
4114 ((controls & CTL_USE_LENGTH) != 0)? " use_length" : "",
4115 ((controls & CTL_UTF8_INPUT) != 0)? " utf8_input" : "",
4116 ((controls & CTL_ZERO_TERMINATE) != 0)? " zero_terminate" : "");
4117 }
4118
4119
4120
4121 /*************************************************
4122 * Show compile options *
4123 *************************************************/
4124
4125 /* Called from show_pattern_info() and for unsupported POSIX options.
4126
4127 Arguments:
4128 options an options word
4129 before text to print before
4130 after text to print after
4131
4132 Returns: nothing
4133 */
4134
4135 static void
show_compile_options(uint32_t options,const char * before,const char * after)4136 show_compile_options(uint32_t options, const char *before, const char *after)
4137 {
4138 if (options == 0) fprintf(outfile, "%s <none>%s", before, after);
4139 else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
4140 before,
4141 ((options & PCRE2_ALT_BSUX) != 0)? " alt_bsux" : "",
4142 ((options & PCRE2_ALT_CIRCUMFLEX) != 0)? " alt_circumflex" : "",
4143 ((options & PCRE2_ALT_VERBNAMES) != 0)? " alt_verbnames" : "",
4144 ((options & PCRE2_ALLOW_EMPTY_CLASS) != 0)? " allow_empty_class" : "",
4145 ((options & PCRE2_ANCHORED) != 0)? " anchored" : "",
4146 ((options & PCRE2_AUTO_CALLOUT) != 0)? " auto_callout" : "",
4147 ((options & PCRE2_CASELESS) != 0)? " caseless" : "",
4148 ((options & PCRE2_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
4149 ((options & PCRE2_DOTALL) != 0)? " dotall" : "",
4150 ((options & PCRE2_DUPNAMES) != 0)? " dupnames" : "",
4151 ((options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "",
4152 ((options & PCRE2_EXTENDED) != 0)? " extended" : "",
4153 ((options & PCRE2_EXTENDED_MORE) != 0)? " extended_more" : "",
4154 ((options & PCRE2_FIRSTLINE) != 0)? " firstline" : "",
4155 ((options & PCRE2_LITERAL) != 0)? " literal" : "",
4156 ((options & PCRE2_MATCH_UNSET_BACKREF) != 0)? " match_unset_backref" : "",
4157 ((options & PCRE2_MULTILINE) != 0)? " multiline" : "",
4158 ((options & PCRE2_NEVER_BACKSLASH_C) != 0)? " never_backslash_c" : "",
4159 ((options & PCRE2_NEVER_UCP) != 0)? " never_ucp" : "",
4160 ((options & PCRE2_NEVER_UTF) != 0)? " never_utf" : "",
4161 ((options & PCRE2_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
4162 ((options & PCRE2_NO_AUTO_POSSESS) != 0)? " no_auto_possess" : "",
4163 ((options & PCRE2_NO_DOTSTAR_ANCHOR) != 0)? " no_dotstar_anchor" : "",
4164 ((options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
4165 ((options & PCRE2_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
4166 ((options & PCRE2_UCP) != 0)? " ucp" : "",
4167 ((options & PCRE2_UNGREEDY) != 0)? " ungreedy" : "",
4168 ((options & PCRE2_USE_OFFSET_LIMIT) != 0)? " use_offset_limit" : "",
4169 ((options & PCRE2_UTF) != 0)? " utf" : "",
4170 after);
4171 }
4172
4173
4174 /*************************************************
4175 * Show compile extra options *
4176 *************************************************/
4177
4178 /* Called from show_pattern_info() and for unsupported POSIX options.
4179
4180 Arguments:
4181 options an options word
4182 before text to print before
4183 after text to print after
4184
4185 Returns: nothing
4186 */
4187
4188 static void
show_compile_extra_options(uint32_t options,const char * before,const char * after)4189 show_compile_extra_options(uint32_t options, const char *before,
4190 const char *after)
4191 {
4192 if (options == 0) fprintf(outfile, "%s <none>%s", before, after);
4193 else fprintf(outfile, "%s%s%s%s%s%s%s%s",
4194 before,
4195 ((options & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) != 0)? " allow_surrogate_escapes" : "",
4196 ((options & PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL) != 0)? " bad_escape_is_literal" : "",
4197 ((options & PCRE2_EXTRA_ALT_BSUX) != 0)? " extra_alt_bsux" : "",
4198 ((options & PCRE2_EXTRA_MATCH_WORD) != 0)? " match_word" : "",
4199 ((options & PCRE2_EXTRA_MATCH_LINE) != 0)? " match_line" : "",
4200 ((options & PCRE2_EXTRA_ESCAPED_CR_IS_LF) != 0)? " escaped_cr_is_lf" : "",
4201 after);
4202 }
4203
4204
4205
4206 #ifdef SUPPORT_PCRE2_8
4207 /*************************************************
4208 * Show match options *
4209 *************************************************/
4210
4211 /* Called for unsupported POSIX options. */
4212
4213 static void
show_match_options(uint32_t options)4214 show_match_options(uint32_t options)
4215 {
4216 fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s",
4217 ((options & PCRE2_ANCHORED) != 0)? " anchored" : "",
4218 ((options & PCRE2_COPY_MATCHED_SUBJECT) != 0)? " copy_matched_subject" : "",
4219 ((options & PCRE2_DFA_RESTART) != 0)? " dfa_restart" : "",
4220 ((options & PCRE2_DFA_SHORTEST) != 0)? " dfa_shortest" : "",
4221 ((options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "",
4222 ((options & PCRE2_NO_JIT) != 0)? " no_jit" : "",
4223 ((options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
4224 ((options & PCRE2_NOTBOL) != 0)? " notbol" : "",
4225 ((options & PCRE2_NOTEMPTY) != 0)? " notempty" : "",
4226 ((options & PCRE2_NOTEMPTY_ATSTART) != 0)? " notempty_atstart" : "",
4227 ((options & PCRE2_NOTEOL) != 0)? " noteol" : "",
4228 ((options & PCRE2_PARTIAL_HARD) != 0)? " partial_hard" : "",
4229 ((options & PCRE2_PARTIAL_SOFT) != 0)? " partial_soft" : "");
4230 }
4231 #endif /* SUPPORT_PCRE2_8 */
4232
4233
4234
4235 /*************************************************
4236 * Show memory usage info for a pattern *
4237 *************************************************/
4238
4239 static void
show_memory_info(void)4240 show_memory_info(void)
4241 {
4242 uint32_t name_count, name_entry_size;
4243 size_t size, cblock_size;
4244
4245 /* One of the test_mode values will always be true, but to stop a compiler
4246 warning we must initialize cblock_size. */
4247
4248 cblock_size = 0;
4249 #ifdef SUPPORT_PCRE2_8
4250 if (test_mode == PCRE8_MODE) cblock_size = sizeof(pcre2_real_code_8);
4251 #endif
4252 #ifdef SUPPORT_PCRE2_16
4253 if (test_mode == PCRE16_MODE) cblock_size = sizeof(pcre2_real_code_16);
4254 #endif
4255 #ifdef SUPPORT_PCRE2_32
4256 if (test_mode == PCRE32_MODE) cblock_size = sizeof(pcre2_real_code_32);
4257 #endif
4258
4259 (void)pattern_info(PCRE2_INFO_SIZE, &size, FALSE);
4260 (void)pattern_info(PCRE2_INFO_NAMECOUNT, &name_count, FALSE);
4261 (void)pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &name_entry_size, FALSE);
4262 fprintf(outfile, "Memory allocation (code space): %d\n",
4263 (int)(size - name_count*name_entry_size*code_unit_size - cblock_size));
4264 if (pat_patctl.jit != 0)
4265 {
4266 (void)pattern_info(PCRE2_INFO_JITSIZE, &size, FALSE);
4267 fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)size);
4268 }
4269 }
4270
4271
4272
4273 /*************************************************
4274 * Show frame size info for a pattern *
4275 *************************************************/
4276
4277 static void
show_framesize(void)4278 show_framesize(void)
4279 {
4280 size_t frame_size;
4281 (void)pattern_info(PCRE2_INFO_FRAMESIZE, &frame_size, FALSE);
4282 fprintf(outfile, "Frame size for pcre2_match(): %d\n", (int)frame_size);
4283 }
4284
4285
4286
4287 /*************************************************
4288 * Get and output an error message *
4289 *************************************************/
4290
4291 static BOOL
print_error_message(int errorcode,const char * before,const char * after)4292 print_error_message(int errorcode, const char *before, const char *after)
4293 {
4294 int len;
4295 PCRE2_GET_ERROR_MESSAGE(len, errorcode, pbuffer);
4296 if (len < 0)
4297 {
4298 fprintf(outfile, "\n** pcre2test internal error: cannot interpret error "
4299 "number\n** Unexpected return (%d) from pcre2_get_error_message()\n", len);
4300 }
4301 else
4302 {
4303 fprintf(outfile, "%s", before);
4304 PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, outfile);
4305 fprintf(outfile, "%s", after);
4306 }
4307 return len >= 0;
4308 }
4309
4310
4311 /*************************************************
4312 * Callback function for callout enumeration *
4313 *************************************************/
4314
4315 /* The only differences in the callout emumeration block for different code
4316 unit widths are that the pointers to the subject, the most recent MARK, and a
4317 callout argument string point to strings of the appropriate width. Casts can be
4318 used to deal with this.
4319
4320 Argument:
4321 cb pointer to enumerate block
4322 callout_data user data
4323
4324 Returns: 0
4325 */
4326
callout_callback(pcre2_callout_enumerate_block_8 * cb,void * callout_data)4327 static int callout_callback(pcre2_callout_enumerate_block_8 *cb,
4328 void *callout_data)
4329 {
4330 uint32_t i;
4331 BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
4332
4333 (void)callout_data; /* Not currently displayed */
4334
4335 fprintf(outfile, "Callout ");
4336 if (cb->callout_string != NULL)
4337 {
4338 uint32_t delimiter = CODE_UNIT(cb->callout_string, -1);
4339 fprintf(outfile, "%c", delimiter);
4340 PCHARSV(cb->callout_string, 0,
4341 cb->callout_string_length, utf, outfile);
4342 for (i = 0; callout_start_delims[i] != 0; i++)
4343 if (delimiter == callout_start_delims[i])
4344 {
4345 delimiter = callout_end_delims[i];
4346 break;
4347 }
4348 fprintf(outfile, "%c ", delimiter);
4349 }
4350 else fprintf(outfile, "%d ", cb->callout_number);
4351
4352 fprintf(outfile, "%.*s\n",
4353 (int)((cb->next_item_length == 0)? 1 : cb->next_item_length),
4354 pbuffer8 + cb->pattern_position);
4355
4356 return 0;
4357 }
4358
4359
4360
4361 /*************************************************
4362 * Show information about a pattern *
4363 *************************************************/
4364
4365 /* This function is called after a pattern has been compiled if any of the
4366 information-requesting controls have been set.
4367
4368 Arguments: none
4369
4370 Returns: PR_OK continue processing next line
4371 PR_SKIP skip to a blank line
4372 PR_ABEND abort the pcre2test run
4373 */
4374
4375 static int
show_pattern_info(void)4376 show_pattern_info(void)
4377 {
4378 uint32_t compile_options, overall_options, extra_options;
4379 BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
4380
4381 if ((pat_patctl.control & (CTL_BINCODE|CTL_FULLBINCODE)) != 0)
4382 {
4383 fprintf(outfile, "------------------------------------------------------------------\n");
4384 PCRE2_PRINTINT((pat_patctl.control & CTL_FULLBINCODE) != 0);
4385 }
4386
4387 if ((pat_patctl.control & CTL_INFO) != 0)
4388 {
4389 int rc;
4390 void *nametable;
4391 uint8_t *start_bits;
4392 BOOL heap_limit_set, match_limit_set, depth_limit_set;
4393 uint32_t backrefmax, bsr_convention, capture_count, first_ctype, first_cunit,
4394 hasbackslashc, hascrorlf, jchanged, last_ctype, last_cunit, match_empty,
4395 depth_limit, heap_limit, match_limit, minlength, nameentrysize, namecount,
4396 newline_convention;
4397
4398 /* Exercise the error route. */
4399
4400 PCRE2_PATTERN_INFO(rc, compiled_code, 999, NULL);
4401 (void)rc;
4402
4403 /* These info requests may return PCRE2_ERROR_UNSET. */
4404
4405 switch(pattern_info(PCRE2_INFO_HEAPLIMIT, &heap_limit, TRUE))
4406 {
4407 case 0:
4408 heap_limit_set = TRUE;
4409 break;
4410
4411 case PCRE2_ERROR_UNSET:
4412 heap_limit_set = FALSE;
4413 break;
4414
4415 default:
4416 return PR_ABEND;
4417 }
4418
4419 switch(pattern_info(PCRE2_INFO_MATCHLIMIT, &match_limit, TRUE))
4420 {
4421 case 0:
4422 match_limit_set = TRUE;
4423 break;
4424
4425 case PCRE2_ERROR_UNSET:
4426 match_limit_set = FALSE;
4427 break;
4428
4429 default:
4430 return PR_ABEND;
4431 }
4432
4433 switch(pattern_info(PCRE2_INFO_DEPTHLIMIT, &depth_limit, TRUE))
4434 {
4435 case 0:
4436 depth_limit_set = TRUE;
4437 break;
4438
4439 case PCRE2_ERROR_UNSET:
4440 depth_limit_set = FALSE;
4441 break;
4442
4443 default:
4444 return PR_ABEND;
4445 }
4446
4447 /* These info requests should always succeed. */
4448
4449 if (pattern_info(PCRE2_INFO_BACKREFMAX, &backrefmax, FALSE) +
4450 pattern_info(PCRE2_INFO_BSR, &bsr_convention, FALSE) +
4451 pattern_info(PCRE2_INFO_CAPTURECOUNT, &capture_count, FALSE) +
4452 pattern_info(PCRE2_INFO_FIRSTBITMAP, &start_bits, FALSE) +
4453 pattern_info(PCRE2_INFO_FIRSTCODEUNIT, &first_cunit, FALSE) +
4454 pattern_info(PCRE2_INFO_FIRSTCODETYPE, &first_ctype, FALSE) +
4455 pattern_info(PCRE2_INFO_HASBACKSLASHC, &hasbackslashc, FALSE) +
4456 pattern_info(PCRE2_INFO_HASCRORLF, &hascrorlf, FALSE) +
4457 pattern_info(PCRE2_INFO_JCHANGED, &jchanged, FALSE) +
4458 pattern_info(PCRE2_INFO_LASTCODEUNIT, &last_cunit, FALSE) +
4459 pattern_info(PCRE2_INFO_LASTCODETYPE, &last_ctype, FALSE) +
4460 pattern_info(PCRE2_INFO_MATCHEMPTY, &match_empty, FALSE) +
4461 pattern_info(PCRE2_INFO_MINLENGTH, &minlength, FALSE) +
4462 pattern_info(PCRE2_INFO_NAMECOUNT, &namecount, FALSE) +
4463 pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &nameentrysize, FALSE) +
4464 pattern_info(PCRE2_INFO_NAMETABLE, &nametable, FALSE) +
4465 pattern_info(PCRE2_INFO_NEWLINE, &newline_convention, FALSE)
4466 != 0)
4467 return PR_ABEND;
4468
4469 fprintf(outfile, "Capture group count = %d\n", capture_count);
4470
4471 if (backrefmax > 0)
4472 fprintf(outfile, "Max back reference = %d\n", backrefmax);
4473
4474 if (maxlookbehind > 0)
4475 fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
4476
4477 if (heap_limit_set)
4478 fprintf(outfile, "Heap limit = %u\n", heap_limit);
4479
4480 if (match_limit_set)
4481 fprintf(outfile, "Match limit = %u\n", match_limit);
4482
4483 if (depth_limit_set)
4484 fprintf(outfile, "Depth limit = %u\n", depth_limit);
4485
4486 if (namecount > 0)
4487 {
4488 fprintf(outfile, "Named capture groups:\n");
4489 for (; namecount > 0; namecount--)
4490 {
4491 int imm2_size = test_mode == PCRE8_MODE ? 2 : 1;
4492 uint32_t length = (uint32_t)STRLEN(nametable + imm2_size);
4493 fprintf(outfile, " ");
4494
4495 /* In UTF mode the name may be a UTF string containing non-ASCII
4496 letters and digits. We must output it as a UTF-8 string. In non-UTF mode,
4497 use the normal string printing functions, which use escapes for all
4498 non-ASCII characters. */
4499
4500 if (utf)
4501 {
4502 #ifdef SUPPORT_PCRE2_32
4503 if (test_mode == PCRE32_MODE)
4504 {
4505 PCRE2_SPTR32 nameptr = (PCRE2_SPTR32)nametable + imm2_size;
4506 while (*nameptr != 0)
4507 {
4508 uint8_t u8buff[6];
4509 int len = ord2utf8(*nameptr++, u8buff);
4510 fprintf(outfile, "%.*s", len, u8buff);
4511 }
4512 }
4513 #endif
4514 #ifdef SUPPORT_PCRE2_16
4515 if (test_mode == PCRE16_MODE)
4516 {
4517 PCRE2_SPTR16 nameptr = (PCRE2_SPTR16)nametable + imm2_size;
4518 while (*nameptr != 0)
4519 {
4520 int len;
4521 uint8_t u8buff[6];
4522 uint32_t c = *nameptr++ & 0xffff;
4523 if (c >= 0xD800 && c < 0xDC00)
4524 c = ((c & 0x3ff) << 10) + (*nameptr++ & 0x3ff) + 0x10000;
4525 len = ord2utf8(c, u8buff);
4526 fprintf(outfile, "%.*s", len, u8buff);
4527 }
4528 }
4529 #endif
4530 #ifdef SUPPORT_PCRE2_8
4531 if (test_mode == PCRE8_MODE)
4532 fprintf(outfile, "%s", (PCRE2_SPTR8)nametable + imm2_size);
4533 #endif
4534 }
4535 else /* Not UTF mode */
4536 {
4537 PCHARSV(nametable, imm2_size, length, FALSE, outfile);
4538 }
4539
4540 while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
4541
4542 #ifdef SUPPORT_PCRE2_32
4543 if (test_mode == PCRE32_MODE)
4544 fprintf(outfile, "%3d\n", (int)(((PCRE2_SPTR32)nametable)[0]));
4545 #endif
4546 #ifdef SUPPORT_PCRE2_16
4547 if (test_mode == PCRE16_MODE)
4548 fprintf(outfile, "%3d\n", (int)(((PCRE2_SPTR16)nametable)[0]));
4549 #endif
4550 #ifdef SUPPORT_PCRE2_8
4551 if (test_mode == PCRE8_MODE)
4552 fprintf(outfile, "%3d\n", (int)(
4553 ((((PCRE2_SPTR8)nametable)[0]) << 8) | ((PCRE2_SPTR8)nametable)[1]));
4554 #endif
4555
4556 nametable = (void*)((PCRE2_SPTR8)nametable + nameentrysize * code_unit_size);
4557 }
4558 }
4559
4560 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
4561 if (hasbackslashc) fprintf(outfile, "Contains \\C\n");
4562 if (match_empty) fprintf(outfile, "May match empty string\n");
4563
4564 pattern_info(PCRE2_INFO_ARGOPTIONS, &compile_options, FALSE);
4565 pattern_info(PCRE2_INFO_ALLOPTIONS, &overall_options, FALSE);
4566 pattern_info(PCRE2_INFO_EXTRAOPTIONS, &extra_options, FALSE);
4567
4568 /* Remove UTF/UCP if they were there only because of forbid_utf. This saves
4569 cluttering up the verification output of non-UTF test files. */
4570
4571 if ((pat_patctl.options & PCRE2_NEVER_UTF) == 0)
4572 {
4573 compile_options &= ~PCRE2_NEVER_UTF;
4574 overall_options &= ~PCRE2_NEVER_UTF;
4575 }
4576
4577 if ((pat_patctl.options & PCRE2_NEVER_UCP) == 0)
4578 {
4579 compile_options &= ~PCRE2_NEVER_UCP;
4580 overall_options &= ~PCRE2_NEVER_UCP;
4581 }
4582
4583 if ((compile_options|overall_options) != 0)
4584 {
4585 if (compile_options == overall_options)
4586 show_compile_options(compile_options, "Options:", "\n");
4587 else
4588 {
4589 show_compile_options(compile_options, "Compile options:", "\n");
4590 show_compile_options(overall_options, "Overall options:", "\n");
4591 }
4592 }
4593
4594 if (extra_options != 0)
4595 show_compile_extra_options(extra_options, "Extra options:", "\n");
4596
4597 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
4598
4599 if ((pat_patctl.control2 & CTL2_BSR_SET) != 0 ||
4600 (FLD(compiled_code, flags) & PCRE2_BSR_SET) != 0)
4601 fprintf(outfile, "\\R matches %s\n", (bsr_convention == PCRE2_BSR_UNICODE)?
4602 "any Unicode newline" : "CR, LF, or CRLF");
4603
4604 if ((FLD(compiled_code, flags) & PCRE2_NL_SET) != 0)
4605 {
4606 switch (newline_convention)
4607 {
4608 case PCRE2_NEWLINE_CR:
4609 fprintf(outfile, "Forced newline is CR\n");
4610 break;
4611
4612 case PCRE2_NEWLINE_LF:
4613 fprintf(outfile, "Forced newline is LF\n");
4614 break;
4615
4616 case PCRE2_NEWLINE_CRLF:
4617 fprintf(outfile, "Forced newline is CRLF\n");
4618 break;
4619
4620 case PCRE2_NEWLINE_ANYCRLF:
4621 fprintf(outfile, "Forced newline is CR, LF, or CRLF\n");
4622 break;
4623
4624 case PCRE2_NEWLINE_ANY:
4625 fprintf(outfile, "Forced newline is any Unicode newline\n");
4626 break;
4627
4628 case PCRE2_NEWLINE_NUL:
4629 fprintf(outfile, "Forced newline is NUL\n");
4630 break;
4631
4632 default:
4633 break;
4634 }
4635 }
4636
4637 if (first_ctype == 2)
4638 {
4639 fprintf(outfile, "First code unit at start or follows newline\n");
4640 }
4641 else if (first_ctype == 1)
4642 {
4643 const char *caseless =
4644 ((FLD(compiled_code, flags) & PCRE2_FIRSTCASELESS) == 0)?
4645 "" : " (caseless)";
4646 if (PRINTOK(first_cunit))
4647 fprintf(outfile, "First code unit = \'%c\'%s\n", first_cunit, caseless);
4648 else
4649 {
4650 fprintf(outfile, "First code unit = ");
4651 pchar(first_cunit, FALSE, outfile);
4652 fprintf(outfile, "%s\n", caseless);
4653 }
4654 }
4655 else if (start_bits != NULL)
4656 {
4657 int i;
4658 int c = 24;
4659 fprintf(outfile, "Starting code units: ");
4660 for (i = 0; i < 256; i++)
4661 {
4662 if ((start_bits[i/8] & (1u << (i&7))) != 0)
4663 {
4664 if (c > 75)
4665 {
4666 fprintf(outfile, "\n ");
4667 c = 2;
4668 }
4669 if (PRINTOK(i) && i != ' ')
4670 {
4671 fprintf(outfile, "%c ", i);
4672 c += 2;
4673 }
4674 else
4675 {
4676 fprintf(outfile, "\\x%02x ", i);
4677 c += 5;
4678 }
4679 }
4680 }
4681 fprintf(outfile, "\n");
4682 }
4683
4684 if (last_ctype != 0)
4685 {
4686 const char *caseless =
4687 ((FLD(compiled_code, flags) & PCRE2_LASTCASELESS) == 0)?
4688 "" : " (caseless)";
4689 if (PRINTOK(last_cunit))
4690 fprintf(outfile, "Last code unit = \'%c\'%s\n", last_cunit, caseless);
4691 else
4692 {
4693 fprintf(outfile, "Last code unit = ");
4694 pchar(last_cunit, FALSE, outfile);
4695 fprintf(outfile, "%s\n", caseless);
4696 }
4697 }
4698
4699 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
4700
4701 if (pat_patctl.jit != 0 && (pat_patctl.control & CTL_JITVERIFY) != 0)
4702 {
4703 if (FLD(compiled_code, executable_jit) != NULL)
4704 fprintf(outfile, "JIT compilation was successful\n");
4705 else
4706 {
4707 #ifdef SUPPORT_JIT
4708 fprintf(outfile, "JIT compilation was not successful");
4709 if (jitrc != 0 && !print_error_message(jitrc, " (", ")"))
4710 return PR_ABEND;
4711 fprintf(outfile, "\n");
4712 #else
4713 fprintf(outfile, "JIT support is not available in this version of PCRE2\n");
4714 #endif
4715 }
4716 }
4717 }
4718
4719 if ((pat_patctl.control & CTL_CALLOUT_INFO) != 0)
4720 {
4721 int errorcode;
4722 PCRE2_CALLOUT_ENUMERATE(errorcode, callout_callback, 0);
4723 if (errorcode != 0)
4724 {
4725 fprintf(outfile, "Callout enumerate failed: error %d: ", errorcode);
4726 if (errorcode < 0 && !print_error_message(errorcode, "", "\n"))
4727 return PR_ABEND;
4728 return PR_SKIP;
4729 }
4730 }
4731
4732 return PR_OK;
4733 }
4734
4735
4736
4737 /*************************************************
4738 * Handle serialization error *
4739 *************************************************/
4740
4741 /* Print an error message after a serialization failure.
4742
4743 Arguments:
4744 rc the error code
4745 msg an initial message for what failed
4746
4747 Returns: FALSE if print_error_message() fails
4748 */
4749
4750 static BOOL
serial_error(int rc,const char * msg)4751 serial_error(int rc, const char *msg)
4752 {
4753 fprintf(outfile, "%s failed: error %d: ", msg, rc);
4754 return print_error_message(rc, "", "\n");
4755 }
4756
4757
4758
4759 /*************************************************
4760 * Open file for save/load commands *
4761 *************************************************/
4762
4763 /* This function decodes the file name and opens the file.
4764
4765 Arguments:
4766 buffptr point after the #command
4767 mode open mode
4768 fptr points to the FILE variable
4769
4770 Returns: PR_OK or PR_ABEND
4771 */
4772
4773 static int
open_file(uint8_t * buffptr,const char * mode,FILE ** fptr)4774 open_file(uint8_t *buffptr, const char *mode, FILE **fptr)
4775 {
4776 char *endf;
4777 char *filename = (char *)buffptr;
4778 while (isspace(*filename)) filename++;
4779 endf = filename + strlen8(filename);
4780 while (endf > filename && isspace(endf[-1])) endf--;
4781
4782 if (endf == filename)
4783 {
4784 fprintf(outfile, "** File name expected after #save\n");
4785 return PR_ABEND;
4786 }
4787
4788 *endf = 0;
4789 *fptr = fopen((const char *)filename, mode);
4790 if (*fptr == NULL)
4791 {
4792 fprintf(outfile, "** Failed to open '%s': %s\n", filename, strerror(errno));
4793 return PR_ABEND;
4794 }
4795
4796 return PR_OK;
4797 }
4798
4799
4800
4801 /*************************************************
4802 * Process command line *
4803 *************************************************/
4804
4805 /* This function is called for lines beginning with # and a character that is
4806 not ! or whitespace, when encountered between tests, which means that there is
4807 no compiled pattern (compiled_code is NULL). The line is in buffer.
4808
4809 Arguments: none
4810
4811 Returns: PR_OK continue processing next line
4812 PR_SKIP skip to a blank line
4813 PR_ABEND abort the pcre2test run
4814 */
4815
4816 static int
process_command(void)4817 process_command(void)
4818 {
4819 FILE *f;
4820 PCRE2_SIZE serial_size;
4821 size_t i;
4822 int rc, cmd, cmdlen, yield;
4823 uint16_t first_listed_newline;
4824 const char *cmdname;
4825 uint8_t *argptr, *serial;
4826
4827 yield = PR_OK;
4828 cmd = CMD_UNKNOWN;
4829 cmdlen = 0;
4830
4831 for (i = 0; i < cmdlistcount; i++)
4832 {
4833 cmdname = cmdlist[i].name;
4834 cmdlen = strlen(cmdname);
4835 if (strncmp((char *)(buffer+1), cmdname, cmdlen) == 0 &&
4836 isspace(buffer[cmdlen+1]))
4837 {
4838 cmd = cmdlist[i].value;
4839 break;
4840 }
4841 }
4842
4843 argptr = buffer + cmdlen + 1;
4844
4845 if (restrict_for_perl_test && cmd != CMD_PATTERN && cmd != CMD_SUBJECT)
4846 {
4847 fprintf(outfile, "** #%s is not allowed after #perltest\n", cmdname);
4848 return PR_ABEND;
4849 }
4850
4851 switch(cmd)
4852 {
4853 case CMD_UNKNOWN:
4854 fprintf(outfile, "** Unknown command: %s", buffer);
4855 break;
4856
4857 case CMD_FORBID_UTF:
4858 forbid_utf = PCRE2_NEVER_UTF|PCRE2_NEVER_UCP;
4859 break;
4860
4861 case CMD_PERLTEST:
4862 restrict_for_perl_test = TRUE;
4863 break;
4864
4865 /* Set default pattern modifiers */
4866
4867 case CMD_PATTERN:
4868 (void)decode_modifiers(argptr, CTX_DEFPAT, &def_patctl, NULL);
4869 if (def_patctl.jit == 0 && (def_patctl.control & CTL_JITVERIFY) != 0)
4870 def_patctl.jit = 7;
4871 break;
4872
4873 /* Set default subject modifiers */
4874
4875 case CMD_SUBJECT:
4876 (void)decode_modifiers(argptr, CTX_DEFDAT, NULL, &def_datctl);
4877 break;
4878
4879 /* Check the default newline, and if not one of those listed, set up the
4880 first one to be forced. An empty list unsets. */
4881
4882 case CMD_NEWLINE_DEFAULT:
4883 local_newline_default = 0; /* Unset */
4884 first_listed_newline = 0;
4885 for (;;)
4886 {
4887 while (isspace(*argptr)) argptr++;
4888 if (*argptr == 0) break;
4889 for (i = 1; i < sizeof(newlines)/sizeof(char *); i++)
4890 {
4891 size_t nlen = strlen(newlines[i]);
4892 if (strncmpic(argptr, (const uint8_t *)newlines[i], nlen) == 0 &&
4893 isspace(argptr[nlen]))
4894 {
4895 if (i == NEWLINE_DEFAULT) return PR_OK; /* Default is valid */
4896 if (first_listed_newline == 0) first_listed_newline = i;
4897 }
4898 }
4899 while (*argptr != 0 && !isspace(*argptr)) argptr++;
4900 }
4901 local_newline_default = first_listed_newline;
4902 break;
4903
4904 /* Pop or copy a compiled pattern off the stack. Modifiers that do not affect
4905 the compiled pattern (e.g. to give information) are permitted. The default
4906 pattern modifiers are ignored. */
4907
4908 case CMD_POP:
4909 case CMD_POPCOPY:
4910 if (patstacknext <= 0)
4911 {
4912 fprintf(outfile, "** Can't pop off an empty stack\n");
4913 return PR_SKIP;
4914 }
4915 memset(&pat_patctl, 0, sizeof(patctl)); /* Completely unset */
4916 if (!decode_modifiers(argptr, CTX_POPPAT, &pat_patctl, NULL))
4917 return PR_SKIP;
4918
4919 if (cmd == CMD_POP)
4920 {
4921 SET(compiled_code, patstack[--patstacknext]);
4922 }
4923 else
4924 {
4925 PCRE2_CODE_COPY_FROM_VOID(compiled_code, patstack[patstacknext - 1]);
4926 }
4927
4928 if (pat_patctl.jit != 0)
4929 {
4930 PCRE2_JIT_COMPILE(jitrc, compiled_code, pat_patctl.jit);
4931 }
4932 if ((pat_patctl.control & CTL_MEMORY) != 0) show_memory_info();
4933 if ((pat_patctl.control & CTL_FRAMESIZE) != 0) show_framesize();
4934 if ((pat_patctl.control & CTL_ANYINFO) != 0)
4935 {
4936 rc = show_pattern_info();
4937 if (rc != PR_OK) return rc;
4938 }
4939 break;
4940
4941 /* Save the stack of compiled patterns to a file, then empty the stack. */
4942
4943 case CMD_SAVE:
4944 if (patstacknext <= 0)
4945 {
4946 fprintf(outfile, "** No stacked patterns to save\n");
4947 return PR_OK;
4948 }
4949
4950 rc = open_file(argptr+1, BINARY_OUTPUT_MODE, &f);
4951 if (rc != PR_OK) return rc;
4952
4953 PCRE2_SERIALIZE_ENCODE(rc, patstack, patstacknext, &serial, &serial_size,
4954 general_context);
4955 if (rc < 0)
4956 {
4957 fclose(f);
4958 if (!serial_error(rc, "Serialization")) return PR_ABEND;
4959 break;
4960 }
4961
4962 /* Write the length at the start of the file to make it straightforward to
4963 get the right memory when re-loading. This saves having to read the file size
4964 in different operating systems. To allow for different endianness (even
4965 though reloading with the opposite endianness does not work), write the
4966 length byte-by-byte. */
4967
4968 for (i = 0; i < 4; i++) fputc((serial_size >> (i*8)) & 255, f);
4969 if (fwrite(serial, 1, serial_size, f) != serial_size)
4970 {
4971 fprintf(outfile, "** Wrong return from fwrite()\n");
4972 fclose(f);
4973 return PR_ABEND;
4974 }
4975
4976 fclose(f);
4977 PCRE2_SERIALIZE_FREE(serial);
4978 while(patstacknext > 0)
4979 {
4980 SET(compiled_code, patstack[--patstacknext]);
4981 SUB1(pcre2_code_free, compiled_code);
4982 }
4983 SET(compiled_code, NULL);
4984 break;
4985
4986 /* Load a set of compiled patterns from a file onto the stack */
4987
4988 case CMD_LOAD:
4989 rc = open_file(argptr+1, BINARY_INPUT_MODE, &f);
4990 if (rc != PR_OK) return rc;
4991
4992 serial_size = 0;
4993 for (i = 0; i < 4; i++) serial_size |= fgetc(f) << (i*8);
4994
4995 serial = malloc(serial_size);
4996 if (serial == NULL)
4997 {
4998 fprintf(outfile, "** Failed to get memory (size %" SIZ_FORM ") for #load\n",
4999 SIZ_CAST serial_size);
5000 fclose(f);
5001 return PR_ABEND;
5002 }
5003
5004 i = fread(serial, 1, serial_size, f);
5005 fclose(f);
5006
5007 if (i != serial_size)
5008 {
5009 fprintf(outfile, "** Wrong return from fread()\n");
5010 yield = PR_ABEND;
5011 }
5012 else
5013 {
5014 PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(rc, serial);
5015 if (rc < 0)
5016 {
5017 if (!serial_error(rc, "Get number of codes")) yield = PR_ABEND;
5018 }
5019 else
5020 {
5021 if (rc + patstacknext > PATSTACKSIZE)
5022 {
5023 fprintf(outfile, "** Not enough space on pattern stack for %d pattern%s\n",
5024 rc, (rc == 1)? "" : "s");
5025 rc = PATSTACKSIZE - patstacknext;
5026 fprintf(outfile, "** Decoding %d pattern%s\n", rc,
5027 (rc == 1)? "" : "s");
5028 }
5029 PCRE2_SERIALIZE_DECODE(rc, patstack + patstacknext, rc, serial,
5030 general_context);
5031 if (rc < 0)
5032 {
5033 if (!serial_error(rc, "Deserialization")) yield = PR_ABEND;
5034 }
5035 else patstacknext += rc;
5036 }
5037 }
5038
5039 free(serial);
5040 break;
5041 }
5042
5043 return yield;
5044 }
5045
5046
5047
5048 /*************************************************
5049 * Process pattern line *
5050 *************************************************/
5051
5052 /* This function is called when the input buffer contains the start of a
5053 pattern. The first character is known to be a valid delimiter. The pattern is
5054 read, modifiers are interpreted, and a suitable local context is set up for
5055 this test. The pattern is then compiled.
5056
5057 Arguments: none
5058
5059 Returns: PR_OK continue processing next line
5060 PR_SKIP skip to a blank line
5061 PR_ABEND abort the pcre2test run
5062 */
5063
5064 static int
process_pattern(void)5065 process_pattern(void)
5066 {
5067 BOOL utf;
5068 uint32_t k;
5069 uint8_t *p = buffer;
5070 unsigned int delimiter = *p++;
5071 int errorcode;
5072 void *use_pat_context;
5073 uint32_t use_forbid_utf = forbid_utf;
5074 PCRE2_SIZE patlen;
5075 PCRE2_SIZE valgrind_access_length;
5076 PCRE2_SIZE erroroffset;
5077
5078 /* Initialize the context and pattern/data controls for this test from the
5079 defaults. */
5080
5081 PATCTXCPY(pat_context, default_pat_context);
5082 memcpy(&pat_patctl, &def_patctl, sizeof(patctl));
5083
5084 /* Find the end of the pattern, reading more lines if necessary. */
5085
5086 for(;;)
5087 {
5088 while (*p != 0)
5089 {
5090 if (*p == '\\' && p[1] != 0) p++;
5091 else if (*p == delimiter) break;
5092 p++;
5093 }
5094 if (*p != 0) break;
5095 if ((p = extend_inputline(infile, p, " > ")) == NULL)
5096 {
5097 fprintf(outfile, "** Unexpected EOF\n");
5098 return PR_ABEND;
5099 }
5100 if (!INTERACTIVE(infile)) fprintf(outfile, "%s", (char *)p);
5101 }
5102
5103 /* If the first character after the delimiter is backslash, make the pattern
5104 end with backslash. This is purely to provide a way of testing for the error
5105 message when a pattern ends with backslash. */
5106
5107 if (p[1] == '\\') *p++ = '\\';
5108
5109 /* Terminate the pattern at the delimiter, and compute the length. */
5110
5111 *p++ = 0;
5112 patlen = p - buffer - 2;
5113
5114 /* Look for modifiers and options after the final delimiter. */
5115
5116 if (!decode_modifiers(p, CTX_PAT, &pat_patctl, NULL)) return PR_SKIP;
5117 utf = (pat_patctl.options & PCRE2_UTF) != 0;
5118
5119 /* The utf8_input modifier is not allowed in 8-bit mode, and is mutually
5120 exclusive with the utf modifier. */
5121
5122 if ((pat_patctl.control & CTL_UTF8_INPUT) != 0)
5123 {
5124 if (test_mode == PCRE8_MODE)
5125 {
5126 fprintf(outfile, "** The utf8_input modifier is not allowed in 8-bit mode\n");
5127 return PR_SKIP;
5128 }
5129 if (utf)
5130 {
5131 fprintf(outfile, "** The utf and utf8_input modifiers are mutually exclusive\n");
5132 return PR_SKIP;
5133 }
5134 }
5135
5136 /* The convert and posix modifiers are mutually exclusive. */
5137
5138 if (pat_patctl.convert_type != CONVERT_UNSET &&
5139 (pat_patctl.control & CTL_POSIX) != 0)
5140 {
5141 fprintf(outfile, "** The convert and posix modifiers are mutually exclusive\n");
5142 return PR_SKIP;
5143 }
5144
5145 /* Check for mutually exclusive control modifiers. At present, these are all in
5146 the first control word. */
5147
5148 for (k = 0; k < sizeof(exclusive_pat_controls)/sizeof(uint32_t); k++)
5149 {
5150 uint32_t c = pat_patctl.control & exclusive_pat_controls[k];
5151 if (c != 0 && c != (c & (~c+1)))
5152 {
5153 show_controls(c, 0, "** Not allowed together:");
5154 fprintf(outfile, "\n");
5155 return PR_SKIP;
5156 }
5157 }
5158
5159 /* Assume full JIT compile for jitverify and/or jitfast if nothing else was
5160 specified. */
5161
5162 if (pat_patctl.jit == 0 &&
5163 (pat_patctl.control & (CTL_JITVERIFY|CTL_JITFAST)) != 0)
5164 pat_patctl.jit = 7;
5165
5166 /* Now copy the pattern to pbuffer8 for use in 8-bit testing and for reflecting
5167 in callouts. Convert from hex if requested (literal strings in quotes may be
5168 present within the hexadecimal pairs). The result must necessarily be fewer
5169 characters so will always fit in pbuffer8. */
5170
5171 if ((pat_patctl.control & CTL_HEXPAT) != 0)
5172 {
5173 uint8_t *pp, *pt;
5174 uint32_t c, d;
5175
5176 pt = pbuffer8;
5177 for (pp = buffer + 1; *pp != 0; pp++)
5178 {
5179 if (isspace(*pp)) continue;
5180 c = *pp++;
5181
5182 /* Handle a literal substring */
5183
5184 if (c == '\'' || c == '"')
5185 {
5186 uint8_t *pq = pp;
5187 for (;; pp++)
5188 {
5189 d = *pp;
5190 if (d == 0)
5191 {
5192 fprintf(outfile, "** Missing closing quote in hex pattern: "
5193 "opening quote is at offset %" PTR_FORM ".\n", pq - buffer - 2);
5194 return PR_SKIP;
5195 }
5196 if (d == c) break;
5197 *pt++ = d;
5198 }
5199 }
5200
5201 /* Expect a hex pair */
5202
5203 else
5204 {
5205 if (!isxdigit(c))
5206 {
5207 fprintf(outfile, "** Unexpected non-hex-digit '%c' at offset %"
5208 PTR_FORM " in hex pattern: quote missing?\n", c, pp - buffer - 2);
5209 return PR_SKIP;
5210 }
5211 if (*pp == 0)
5212 {
5213 fprintf(outfile, "** Odd number of digits in hex pattern\n");
5214 return PR_SKIP;
5215 }
5216 d = *pp;
5217 if (!isxdigit(d))
5218 {
5219 fprintf(outfile, "** Unexpected non-hex-digit '%c' at offset %"
5220 PTR_FORM " in hex pattern: quote missing?\n", d, pp - buffer - 1);
5221 return PR_SKIP;
5222 }
5223 c = toupper(c);
5224 d = toupper(d);
5225 *pt++ = ((isdigit(c)? (c - '0') : (c - 'A' + 10)) << 4) +
5226 (isdigit(d)? (d - '0') : (d - 'A' + 10));
5227 }
5228 }
5229 *pt = 0;
5230 patlen = pt - pbuffer8;
5231 }
5232
5233 /* If not a hex string, process for repetition expansion if requested. */
5234
5235 else if ((pat_patctl.control & CTL_EXPAND) != 0)
5236 {
5237 uint8_t *pp, *pt;
5238
5239 pt = pbuffer8;
5240 for (pp = buffer + 1; *pp != 0; pp++)
5241 {
5242 uint8_t *pc = pp;
5243 uint32_t count = 1;
5244 size_t length = 1;
5245
5246 /* Check for replication syntax; if not found, the defaults just set will
5247 prevail and one character will be copied. */
5248
5249 if (pp[0] == '\\' && pp[1] == '[')
5250 {
5251 uint8_t *pe;
5252 for (pe = pp + 2; *pe != 0; pe++)
5253 {
5254 if (pe[0] == ']' && pe[1] == '{')
5255 {
5256 uint32_t clen = pe - pc - 2;
5257 uint32_t i = 0;
5258 unsigned long uli;
5259 char *endptr;
5260
5261 pe += 2;
5262 uli = strtoul((const char *)pe, &endptr, 10);
5263 if (U32OVERFLOW(uli))
5264 {
5265 fprintf(outfile, "** Pattern repeat count too large\n");
5266 return PR_SKIP;
5267 }
5268
5269 i = (uint32_t)uli;
5270 pe = (uint8_t *)endptr;
5271 if (*pe == '}')
5272 {
5273 if (i == 0)
5274 {
5275 fprintf(outfile, "** Zero repeat not allowed\n");
5276 return PR_SKIP;
5277 }
5278 pc += 2;
5279 count = i;
5280 length = clen;
5281 pp = pe;
5282 break;
5283 }
5284 }
5285 }
5286 }
5287
5288 /* Add to output. If the buffer is too small expand it. The function for
5289 expanding buffers always keeps buffer and pbuffer8 in step as far as their
5290 size goes. */
5291
5292 while (pt + count * length > pbuffer8 + pbuffer8_size)
5293 {
5294 size_t pc_offset = pc - buffer;
5295 size_t pp_offset = pp - buffer;
5296 size_t pt_offset = pt - pbuffer8;
5297 expand_input_buffers();
5298 pc = buffer + pc_offset;
5299 pp = buffer + pp_offset;
5300 pt = pbuffer8 + pt_offset;
5301 }
5302
5303 for (; count > 0; count--)
5304 {
5305 memcpy(pt, pc, length);
5306 pt += length;
5307 }
5308 }
5309
5310 *pt = 0;
5311 patlen = pt - pbuffer8;
5312
5313 if ((pat_patctl.control & CTL_INFO) != 0)
5314 fprintf(outfile, "Expanded: %s\n", pbuffer8);
5315 }
5316
5317 /* Neither hex nor expanded, just copy the input verbatim. */
5318
5319 else
5320 {
5321 strncpy((char *)pbuffer8, (char *)(buffer+1), patlen + 1);
5322 }
5323
5324 /* Sort out character tables */
5325
5326 if (pat_patctl.locale[0] != 0)
5327 {
5328 if (pat_patctl.tables_id != 0)
5329 {
5330 fprintf(outfile, "** 'Locale' and 'tables' must not both be set\n");
5331 return PR_SKIP;
5332 }
5333 if (setlocale(LC_CTYPE, (const char *)pat_patctl.locale) == NULL)
5334 {
5335 fprintf(outfile, "** Failed to set locale '%s'\n", pat_patctl.locale);
5336 return PR_SKIP;
5337 }
5338 if (strcmp((const char *)pat_patctl.locale, (const char *)locale_name) != 0)
5339 {
5340 strcpy((char *)locale_name, (char *)pat_patctl.locale);
5341 if (locale_tables != NULL) free((void *)locale_tables);
5342 PCRE2_MAKETABLES(locale_tables);
5343 }
5344 use_tables = locale_tables;
5345 }
5346
5347 else switch (pat_patctl.tables_id)
5348 {
5349 case 0: use_tables = NULL; break;
5350 case 1: use_tables = tables1; break;
5351 case 2: use_tables = tables2; break;
5352 default:
5353 fprintf(outfile, "** 'Tables' must specify 0, 1, or 2.\n");
5354 return PR_SKIP;
5355 }
5356
5357 PCRE2_SET_CHARACTER_TABLES(pat_context, use_tables);
5358
5359 /* Set up for the stackguard test. */
5360
5361 if (pat_patctl.stackguard_test != 0)
5362 {
5363 PCRE2_SET_COMPILE_RECURSION_GUARD(pat_context, stack_guard, NULL);
5364 }
5365
5366 /* Handle compiling via the POSIX interface, which doesn't support the
5367 timing, showing, or debugging options, nor the ability to pass over
5368 local character tables. Neither does it have 16-bit or 32-bit support. */
5369
5370 if ((pat_patctl.control & CTL_POSIX) != 0)
5371 {
5372 #ifdef SUPPORT_PCRE2_8
5373 int rc;
5374 int cflags = 0;
5375 const char *msg = "** Ignored with POSIX interface:";
5376 #endif
5377
5378 if (test_mode != PCRE8_MODE)
5379 {
5380 fprintf(outfile, "** The POSIX interface is available only in 8-bit mode\n");
5381 return PR_SKIP;
5382 }
5383
5384 #ifdef SUPPORT_PCRE2_8
5385 /* Check for features that the POSIX interface does not support. */
5386
5387 if (pat_patctl.locale[0] != 0) prmsg(&msg, "locale");
5388 if (pat_patctl.replacement[0] != 0) prmsg(&msg, "replace");
5389 if (pat_patctl.tables_id != 0) prmsg(&msg, "tables");
5390 if (pat_patctl.stackguard_test != 0) prmsg(&msg, "stackguard");
5391 if (timeit > 0) prmsg(&msg, "timing");
5392 if (pat_patctl.jit != 0) prmsg(&msg, "JIT");
5393
5394 if ((pat_patctl.options & ~POSIX_SUPPORTED_COMPILE_OPTIONS) != 0)
5395 {
5396 show_compile_options(
5397 pat_patctl.options & ~POSIX_SUPPORTED_COMPILE_OPTIONS, msg, "");
5398 msg = "";
5399 }
5400
5401 if ((FLD(pat_context, extra_options) &
5402 ~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS) != 0)
5403 {
5404 show_compile_extra_options(
5405 FLD(pat_context, extra_options) & ~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS,
5406 msg, "");
5407 msg = "";
5408 }
5409
5410 if ((pat_patctl.control & ~POSIX_SUPPORTED_COMPILE_CONTROLS) != 0 ||
5411 (pat_patctl.control2 & ~POSIX_SUPPORTED_COMPILE_CONTROLS2) != 0)
5412 {
5413 show_controls(pat_patctl.control & ~POSIX_SUPPORTED_COMPILE_CONTROLS,
5414 pat_patctl.control2 & ~POSIX_SUPPORTED_COMPILE_CONTROLS2, msg);
5415 msg = "";
5416 }
5417
5418 if (local_newline_default != 0) prmsg(&msg, "#newline_default");
5419 if (FLD(pat_context, max_pattern_length) != PCRE2_UNSET)
5420 prmsg(&msg, "max_pattern_length");
5421 if (FLD(pat_context, parens_nest_limit) != PARENS_NEST_DEFAULT)
5422 prmsg(&msg, "parens_nest_limit");
5423
5424 if (msg[0] == 0) fprintf(outfile, "\n");
5425
5426 /* Translate PCRE2 options to POSIX options and then compile. */
5427
5428 if (utf) cflags |= REG_UTF;
5429 if ((pat_patctl.control & CTL_POSIX_NOSUB) != 0) cflags |= REG_NOSUB;
5430 if ((pat_patctl.options & PCRE2_UCP) != 0) cflags |= REG_UCP;
5431 if ((pat_patctl.options & PCRE2_CASELESS) != 0) cflags |= REG_ICASE;
5432 if ((pat_patctl.options & PCRE2_LITERAL) != 0) cflags |= REG_NOSPEC;
5433 if ((pat_patctl.options & PCRE2_MULTILINE) != 0) cflags |= REG_NEWLINE;
5434 if ((pat_patctl.options & PCRE2_DOTALL) != 0) cflags |= REG_DOTALL;
5435 if ((pat_patctl.options & PCRE2_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
5436
5437 if ((pat_patctl.control & (CTL_HEXPAT|CTL_USE_LENGTH)) != 0)
5438 {
5439 preg.re_endp = (char *)pbuffer8 + patlen;
5440 cflags |= REG_PEND;
5441 }
5442
5443 rc = regcomp(&preg, (char *)pbuffer8, cflags);
5444
5445 /* Compiling failed */
5446
5447 if (rc != 0)
5448 {
5449 size_t bsize, usize;
5450 int psize;
5451
5452 preg.re_pcre2_code = NULL; /* In case something was left in there */
5453 preg.re_match_data = NULL;
5454
5455 bsize = (pat_patctl.regerror_buffsize != 0)?
5456 pat_patctl.regerror_buffsize : pbuffer8_size;
5457 if (bsize + 8 < pbuffer8_size)
5458 memcpy(pbuffer8 + bsize, "DEADBEEF", 8);
5459 usize = regerror(rc, &preg, (char *)pbuffer8, bsize);
5460
5461 /* Inside regerror(), snprintf() is used. If the buffer is too small, some
5462 versions of snprintf() put a zero byte at the end, but others do not.
5463 Therefore, we print a maximum of one less than the size of the buffer. */
5464
5465 psize = (int)bsize - 1;
5466 fprintf(outfile, "Failed: POSIX code %d: %.*s\n", rc, psize, pbuffer8);
5467 if (usize > bsize)
5468 {
5469 fprintf(outfile, "** regerror() message truncated\n");
5470 if (memcmp(pbuffer8 + bsize, "DEADBEEF", 8) != 0)
5471 fprintf(outfile, "** regerror() buffer overflow\n");
5472 }
5473 return PR_SKIP;
5474 }
5475
5476 /* Compiling succeeded. Check that the values in the preg block are sensible.
5477 It can happen that pcre2test is accidentally linked with a different POSIX
5478 library which succeeds, but of course puts different things into preg. In
5479 this situation, calling regfree() may cause a segfault (or invalid free() in
5480 valgrind), so ensure that preg.re_pcre2_code is NULL, which suppresses the
5481 calling of regfree() on exit. */
5482
5483 if (preg.re_pcre2_code == NULL ||
5484 ((pcre2_real_code_8 *)preg.re_pcre2_code)->magic_number != MAGIC_NUMBER ||
5485 ((pcre2_real_code_8 *)preg.re_pcre2_code)->top_bracket != preg.re_nsub ||
5486 preg.re_match_data == NULL ||
5487 preg.re_cflags != cflags)
5488 {
5489 fprintf(outfile,
5490 "** The regcomp() function returned zero (success), but the values set\n"
5491 "** in the preg block are not valid for PCRE2. Check that pcre2test is\n"
5492 "** linked with PCRE2's pcre2posix module (-lpcre2-posix) and not with\n"
5493 "** some other POSIX regex library.\n**\n");
5494 preg.re_pcre2_code = NULL;
5495 return PR_ABEND;
5496 }
5497
5498 return PR_OK;
5499 #endif /* SUPPORT_PCRE2_8 */
5500 }
5501
5502 /* Handle compiling via the native interface. Controls that act later are
5503 ignored with "push". Replacements are locked out. */
5504
5505 if ((pat_patctl.control & (CTL_PUSH|CTL_PUSHCOPY|CTL_PUSHTABLESCOPY)) != 0)
5506 {
5507 if (pat_patctl.replacement[0] != 0)
5508 {
5509 fprintf(outfile, "** Replacement text is not supported with 'push'.\n");
5510 return PR_OK;
5511 }
5512 if ((pat_patctl.control & ~PUSH_SUPPORTED_COMPILE_CONTROLS) != 0 ||
5513 (pat_patctl.control2 & ~PUSH_SUPPORTED_COMPILE_CONTROLS2) != 0)
5514 {
5515 show_controls(pat_patctl.control & ~PUSH_SUPPORTED_COMPILE_CONTROLS,
5516 pat_patctl.control2 & ~PUSH_SUPPORTED_COMPILE_CONTROLS2,
5517 "** Ignored when compiled pattern is stacked with 'push':");
5518 fprintf(outfile, "\n");
5519 }
5520 if ((pat_patctl.control & PUSH_COMPILE_ONLY_CONTROLS) != 0 ||
5521 (pat_patctl.control2 & PUSH_COMPILE_ONLY_CONTROLS2) != 0)
5522 {
5523 show_controls(pat_patctl.control & PUSH_COMPILE_ONLY_CONTROLS,
5524 pat_patctl.control2 & PUSH_COMPILE_ONLY_CONTROLS2,
5525 "** Applies only to compile when pattern is stacked with 'push':");
5526 fprintf(outfile, "\n");
5527 }
5528 }
5529
5530 /* Convert the input in non-8-bit modes. */
5531
5532 errorcode = 0;
5533
5534 #ifdef SUPPORT_PCRE2_16
5535 if (test_mode == PCRE16_MODE) errorcode = to16(pbuffer8, utf, &patlen);
5536 #endif
5537
5538 #ifdef SUPPORT_PCRE2_32
5539 if (test_mode == PCRE32_MODE) errorcode = to32(pbuffer8, utf, &patlen);
5540 #endif
5541
5542 switch(errorcode)
5543 {
5544 case -1:
5545 fprintf(outfile, "** Failed: invalid UTF-8 string cannot be "
5546 "converted to %d-bit string\n", (test_mode == PCRE16_MODE)? 16:32);
5547 return PR_SKIP;
5548
5549 case -2:
5550 fprintf(outfile, "** Failed: character value greater than 0x10ffff "
5551 "cannot be converted to UTF\n");
5552 return PR_SKIP;
5553
5554 case -3:
5555 fprintf(outfile, "** Failed: character value greater than 0xffff "
5556 "cannot be converted to 16-bit in non-UTF mode\n");
5557 return PR_SKIP;
5558
5559 default:
5560 break;
5561 }
5562
5563 /* The pattern is now in pbuffer[8|16|32], with the length in code units in
5564 patlen. If it is to be converted, copy the result back afterwards so that it
5565 ends up back in the usual place. */
5566
5567 if (pat_patctl.convert_type != CONVERT_UNSET)
5568 {
5569 int rc;
5570 int convert_return = PR_OK;
5571 uint32_t convert_options = pat_patctl.convert_type;
5572 void *converted_pattern;
5573 PCRE2_SIZE converted_length;
5574
5575 if (pat_patctl.convert_length != 0)
5576 {
5577 converted_length = pat_patctl.convert_length;
5578 converted_pattern = malloc(converted_length * code_unit_size);
5579 if (converted_pattern == NULL)
5580 {
5581 fprintf(outfile, "** Failed: malloc failed for converted pattern\n");
5582 return PR_SKIP;
5583 }
5584 }
5585 else converted_pattern = NULL; /* Let the library allocate */
5586
5587 if (utf) convert_options |= PCRE2_CONVERT_UTF;
5588 if ((pat_patctl.options & PCRE2_NO_UTF_CHECK) != 0)
5589 convert_options |= PCRE2_CONVERT_NO_UTF_CHECK;
5590
5591 CONCTXCPY(con_context, default_con_context);
5592
5593 if (pat_patctl.convert_glob_escape != 0)
5594 {
5595 uint32_t escape = (pat_patctl.convert_glob_escape == '0')? 0 :
5596 pat_patctl.convert_glob_escape;
5597 PCRE2_SET_GLOB_ESCAPE(rc, con_context, escape);
5598 if (rc != 0)
5599 {
5600 fprintf(outfile, "** Invalid glob escape '%c'\n",
5601 pat_patctl.convert_glob_escape);
5602 convert_return = PR_SKIP;
5603 goto CONVERT_FINISH;
5604 }
5605 }
5606
5607 if (pat_patctl.convert_glob_separator != 0)
5608 {
5609 PCRE2_SET_GLOB_SEPARATOR(rc, con_context, pat_patctl.convert_glob_separator);
5610 if (rc != 0)
5611 {
5612 fprintf(outfile, "** Invalid glob separator '%c'\n",
5613 pat_patctl.convert_glob_separator);
5614 convert_return = PR_SKIP;
5615 goto CONVERT_FINISH;
5616 }
5617 }
5618
5619 PCRE2_PATTERN_CONVERT(rc, pbuffer, patlen, convert_options,
5620 &converted_pattern, &converted_length, con_context);
5621
5622 if (rc != 0)
5623 {
5624 fprintf(outfile, "** Pattern conversion error at offset %" SIZ_FORM ": ",
5625 SIZ_CAST converted_length);
5626 convert_return = print_error_message(rc, "", "\n")? PR_SKIP:PR_ABEND;
5627 }
5628
5629 /* Output the converted pattern, then copy it. */
5630
5631 else
5632 {
5633 PCHARSV(converted_pattern, 0, converted_length, utf, outfile);
5634 fprintf(outfile, "\n");
5635 patlen = converted_length;
5636 CONVERT_COPY(pbuffer, converted_pattern, converted_length + 1);
5637 }
5638
5639 /* Free the converted pattern. */
5640
5641 CONVERT_FINISH:
5642 if (pat_patctl.convert_length != 0)
5643 free(converted_pattern);
5644 else
5645 PCRE2_CONVERTED_PATTERN_FREE(converted_pattern);
5646
5647 /* Return if conversion was unsuccessful. */
5648
5649 if (convert_return != PR_OK) return convert_return;
5650 }
5651
5652 /* By default we pass a zero-terminated pattern, but a length is passed if
5653 "use_length" was specified or this is a hex pattern (which might contain binary
5654 zeros). When valgrind is supported, arrange for the unused part of the buffer
5655 to be marked as no access. */
5656
5657 valgrind_access_length = patlen;
5658 if ((pat_patctl.control & (CTL_HEXPAT|CTL_USE_LENGTH)) == 0)
5659 {
5660 patlen = PCRE2_ZERO_TERMINATED;
5661 valgrind_access_length += 1; /* For the terminating zero */
5662 }
5663
5664 #ifdef SUPPORT_VALGRIND
5665 #ifdef SUPPORT_PCRE2_8
5666 if (test_mode == PCRE8_MODE && pbuffer8 != NULL)
5667 {
5668 VALGRIND_MAKE_MEM_NOACCESS(pbuffer8 + valgrind_access_length,
5669 pbuffer8_size - valgrind_access_length);
5670 }
5671 #endif
5672 #ifdef SUPPORT_PCRE2_16
5673 if (test_mode == PCRE16_MODE && pbuffer16 != NULL)
5674 {
5675 VALGRIND_MAKE_MEM_NOACCESS(pbuffer16 + valgrind_access_length,
5676 pbuffer16_size - valgrind_access_length*sizeof(uint16_t));
5677 }
5678 #endif
5679 #ifdef SUPPORT_PCRE2_32
5680 if (test_mode == PCRE32_MODE && pbuffer32 != NULL)
5681 {
5682 VALGRIND_MAKE_MEM_NOACCESS(pbuffer32 + valgrind_access_length,
5683 pbuffer32_size - valgrind_access_length*sizeof(uint32_t));
5684 }
5685 #endif
5686 #else /* Valgrind not supported */
5687 (void)valgrind_access_length; /* Avoid compiler warning */
5688 #endif
5689
5690 /* If #newline_default has been used and the library was not compiled with an
5691 appropriate default newline setting, local_newline_default will be non-zero. We
5692 use this if there is no explicit newline modifier. */
5693
5694 if ((pat_patctl.control2 & CTL2_NL_SET) == 0 && local_newline_default != 0)
5695 {
5696 SETFLD(pat_context, newline_convention, local_newline_default);
5697 }
5698
5699 /* The null_context modifier is used to test calling pcre2_compile() with a
5700 NULL context. */
5701
5702 use_pat_context = ((pat_patctl.control & CTL_NULLCONTEXT) != 0)?
5703 NULL : PTR(pat_context);
5704
5705 /* If PCRE2_LITERAL is set, set use_forbid_utf zero because PCRE2_NEVER_UTF
5706 and PCRE2_NEVER_UCP are invalid with it. */
5707
5708 if ((pat_patctl.options & PCRE2_LITERAL) != 0) use_forbid_utf = 0;
5709
5710 /* Compile many times when timing. */
5711
5712 if (timeit > 0)
5713 {
5714 int i;
5715 clock_t time_taken = 0;
5716 for (i = 0; i < timeit; i++)
5717 {
5718 clock_t start_time = clock();
5719 PCRE2_COMPILE(compiled_code, pbuffer, patlen,
5720 pat_patctl.options|use_forbid_utf, &errorcode, &erroroffset,
5721 use_pat_context);
5722 time_taken += clock() - start_time;
5723 if (TEST(compiled_code, !=, NULL))
5724 { SUB1(pcre2_code_free, compiled_code); }
5725 }
5726 total_compile_time += time_taken;
5727 fprintf(outfile, "Compile time %.4f milliseconds\n",
5728 (((double)time_taken * 1000.0) / (double)timeit) /
5729 (double)CLOCKS_PER_SEC);
5730 }
5731
5732 /* A final compile that is used "for real". */
5733
5734 PCRE2_COMPILE(compiled_code, pbuffer, patlen, pat_patctl.options|use_forbid_utf,
5735 &errorcode, &erroroffset, use_pat_context);
5736
5737 /* Call the JIT compiler if requested. When timing, we must free and recompile
5738 the pattern each time because that is the only way to free the JIT compiled
5739 code. We know that compilation will always succeed. */
5740
5741 if (TEST(compiled_code, !=, NULL) && pat_patctl.jit != 0)
5742 {
5743 if (timeit > 0)
5744 {
5745 int i;
5746 clock_t time_taken = 0;
5747 for (i = 0; i < timeit; i++)
5748 {
5749 clock_t start_time;
5750 SUB1(pcre2_code_free, compiled_code);
5751 PCRE2_COMPILE(compiled_code, pbuffer, patlen,
5752 pat_patctl.options|use_forbid_utf, &errorcode, &erroroffset,
5753 use_pat_context);
5754 start_time = clock();
5755 PCRE2_JIT_COMPILE(jitrc,compiled_code, pat_patctl.jit);
5756 time_taken += clock() - start_time;
5757 }
5758 total_jit_compile_time += time_taken;
5759 fprintf(outfile, "JIT compile %.4f milliseconds\n",
5760 (((double)time_taken * 1000.0) / (double)timeit) /
5761 (double)CLOCKS_PER_SEC);
5762 }
5763 else
5764 {
5765 PCRE2_JIT_COMPILE(jitrc, compiled_code, pat_patctl.jit);
5766 }
5767 }
5768
5769 /* If valgrind is supported, mark the pbuffer as accessible again. The 16-bit
5770 and 32-bit buffers can be marked completely undefined, but we must leave the
5771 pattern in the 8-bit buffer defined because it may be read from a callout
5772 during matching. */
5773
5774 #ifdef SUPPORT_VALGRIND
5775 #ifdef SUPPORT_PCRE2_8
5776 if (test_mode == PCRE8_MODE)
5777 {
5778 VALGRIND_MAKE_MEM_UNDEFINED(pbuffer8 + valgrind_access_length,
5779 pbuffer8_size - valgrind_access_length);
5780 }
5781 #endif
5782 #ifdef SUPPORT_PCRE2_16
5783 if (test_mode == PCRE16_MODE)
5784 {
5785 VALGRIND_MAKE_MEM_UNDEFINED(pbuffer16, pbuffer16_size);
5786 }
5787 #endif
5788 #ifdef SUPPORT_PCRE2_32
5789 if (test_mode == PCRE32_MODE)
5790 {
5791 VALGRIND_MAKE_MEM_UNDEFINED(pbuffer32, pbuffer32_size);
5792 }
5793 #endif
5794 #endif
5795
5796 /* Compilation failed; go back for another re, skipping to blank line
5797 if non-interactive. */
5798
5799 if (TEST(compiled_code, ==, NULL))
5800 {
5801 fprintf(outfile, "Failed: error %d at offset %d: ", errorcode,
5802 (int)erroroffset);
5803 if (!print_error_message(errorcode, "", "\n")) return PR_ABEND;
5804 return PR_SKIP;
5805 }
5806
5807 /* If forbid_utf is non-zero, we are running a non-UTF test. UTF and UCP are
5808 locked out at compile time, but we must also check for occurrences of \P, \p,
5809 and \X, which are only supported when Unicode is supported. */
5810
5811 if (forbid_utf != 0)
5812 {
5813 if ((FLD(compiled_code, flags) & PCRE2_HASBKPORX) != 0)
5814 {
5815 fprintf(outfile, "** \\P, \\p, and \\X are not allowed after the "
5816 "#forbid_utf command\n");
5817 return PR_SKIP;
5818 }
5819 }
5820
5821 /* Remember the maximum lookbehind, for partial matching. */
5822
5823 if (pattern_info(PCRE2_INFO_MAXLOOKBEHIND, &maxlookbehind, FALSE) != 0)
5824 return PR_ABEND;
5825
5826 /* Remember the number of captures. */
5827
5828 if (pattern_info(PCRE2_INFO_CAPTURECOUNT, &maxcapcount, FALSE) < 0)
5829 return PR_ABEND;
5830
5831 /* If an explicit newline modifier was given, set the information flag in the
5832 pattern so that it is preserved over push/pop. */
5833
5834 if ((pat_patctl.control2 & CTL2_NL_SET) != 0)
5835 {
5836 SETFLD(compiled_code, flags, FLD(compiled_code, flags) | PCRE2_NL_SET);
5837 }
5838
5839 /* Output code size and other information if requested. */
5840
5841 if ((pat_patctl.control & CTL_MEMORY) != 0) show_memory_info();
5842 if ((pat_patctl.control & CTL_FRAMESIZE) != 0) show_framesize();
5843 if ((pat_patctl.control & CTL_ANYINFO) != 0)
5844 {
5845 int rc = show_pattern_info();
5846 if (rc != PR_OK) return rc;
5847 }
5848
5849 /* The "push" control requests that the compiled pattern be remembered on a
5850 stack. This is mainly for testing the serialization functionality. */
5851
5852 if ((pat_patctl.control & CTL_PUSH) != 0)
5853 {
5854 if (patstacknext >= PATSTACKSIZE)
5855 {
5856 fprintf(outfile, "** Too many pushed patterns (max %d)\n", PATSTACKSIZE);
5857 return PR_ABEND;
5858 }
5859 patstack[patstacknext++] = PTR(compiled_code);
5860 SET(compiled_code, NULL);
5861 }
5862
5863 /* The "pushcopy" and "pushtablescopy" controls are similar, but push a
5864 copy of the pattern, the latter with a copy of its character tables. This tests
5865 the pcre2_code_copy() and pcre2_code_copy_with_tables() functions. */
5866
5867 if ((pat_patctl.control & (CTL_PUSHCOPY|CTL_PUSHTABLESCOPY)) != 0)
5868 {
5869 if (patstacknext >= PATSTACKSIZE)
5870 {
5871 fprintf(outfile, "** Too many pushed patterns (max %d)\n", PATSTACKSIZE);
5872 return PR_ABEND;
5873 }
5874 if ((pat_patctl.control & CTL_PUSHCOPY) != 0)
5875 {
5876 PCRE2_CODE_COPY_TO_VOID(patstack[patstacknext++], compiled_code);
5877 }
5878 else
5879 {
5880 PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(patstack[patstacknext++],
5881 compiled_code); }
5882 }
5883
5884 return PR_OK;
5885 }
5886
5887
5888
5889 /*************************************************
5890 * Check heap, match or depth limit *
5891 *************************************************/
5892
5893 /* This is used for DFA, normal, and JIT fast matching. For DFA matching it
5894 should only be called with the third argument set to PCRE2_ERROR_DEPTHLIMIT.
5895
5896 Arguments:
5897 pp the subject string
5898 ulen length of subject or PCRE2_ZERO_TERMINATED
5899 errnumber defines which limit to test
5900 msg string to include in final message
5901
5902 Returns: the return from the final match function call
5903 */
5904
5905 static int
check_match_limit(uint8_t * pp,PCRE2_SIZE ulen,int errnumber,const char * msg)5906 check_match_limit(uint8_t *pp, PCRE2_SIZE ulen, int errnumber, const char *msg)
5907 {
5908 int capcount;
5909 uint32_t min = 0;
5910 uint32_t mid = 64;
5911 uint32_t max = UINT32_MAX;
5912
5913 PCRE2_SET_MATCH_LIMIT(dat_context, max);
5914 PCRE2_SET_DEPTH_LIMIT(dat_context, max);
5915 PCRE2_SET_HEAP_LIMIT(dat_context, max);
5916
5917 for (;;)
5918 {
5919 uint32_t stack_start = 0;
5920
5921 if (errnumber == PCRE2_ERROR_HEAPLIMIT)
5922 {
5923 PCRE2_SET_HEAP_LIMIT(dat_context, mid);
5924 }
5925 else if (errnumber == PCRE2_ERROR_MATCHLIMIT)
5926 {
5927 PCRE2_SET_MATCH_LIMIT(dat_context, mid);
5928 }
5929 else
5930 {
5931 PCRE2_SET_DEPTH_LIMIT(dat_context, mid);
5932 }
5933
5934 if ((dat_datctl.control & CTL_DFA) != 0)
5935 {
5936 stack_start = DFA_START_RWS_SIZE/1024;
5937 if (dfa_workspace == NULL)
5938 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
5939 if (dfa_matched++ == 0)
5940 dfa_workspace[0] = -1; /* To catch bad restart */
5941 PCRE2_DFA_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
5942 dat_datctl.options, match_data,
5943 PTR(dat_context), dfa_workspace, DFA_WS_DIMENSION);
5944 }
5945
5946 else if ((pat_patctl.control & CTL_JITFAST) != 0)
5947 PCRE2_JIT_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
5948 dat_datctl.options, match_data, PTR(dat_context));
5949
5950 else
5951 {
5952 stack_start = START_FRAMES_SIZE/1024;
5953 PCRE2_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
5954 dat_datctl.options, match_data, PTR(dat_context));
5955 }
5956
5957 if (capcount == errnumber)
5958 {
5959 if ((mid & 0x80000000u) != 0)
5960 {
5961 fprintf(outfile, "Can't find minimum %s limit: check pattern for "
5962 "restriction\n", msg);
5963 break;
5964 }
5965
5966 min = mid;
5967 mid = (mid == max - 1)? max : (max != UINT32_MAX)? (min + max)/2 : mid*2;
5968 }
5969 else if (capcount >= 0 ||
5970 capcount == PCRE2_ERROR_NOMATCH ||
5971 capcount == PCRE2_ERROR_PARTIAL)
5972 {
5973 /* If we've not hit the error with a heap limit less than the size of the
5974 initial stack frame vector (for pcre2_match()) or the initial stack
5975 workspace vector (for pcre2_dfa_match()), the heap is not being used, so
5976 the minimum limit is zero; there's no need to go on. The other limits are
5977 always greater than zero. */
5978
5979 if (errnumber == PCRE2_ERROR_HEAPLIMIT && mid < stack_start)
5980 {
5981 fprintf(outfile, "Minimum %s limit = 0\n", msg);
5982 break;
5983 }
5984 if (mid == min + 1)
5985 {
5986 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
5987 break;
5988 }
5989 max = mid;
5990 mid = (min + max)/2;
5991 }
5992 else break; /* Some other error */
5993 }
5994
5995 return capcount;
5996 }
5997
5998
5999
6000 /*************************************************
6001 * Substitute callout function *
6002 *************************************************/
6003
6004 /* Called from pcre2_substitute() when the substitute_callout modifier is set.
6005 Print out the data that is passed back. The substitute callout block is
6006 identical for all code unit widths, so we just pick one.
6007
6008 Arguments:
6009 scb pointer to substitute callout block
6010 data_ptr callout data
6011
6012 Returns: nothing
6013 */
6014
6015 static int
substitute_callout_function(pcre2_substitute_callout_block_8 * scb,void * data_ptr)6016 substitute_callout_function(pcre2_substitute_callout_block_8 *scb,
6017 void *data_ptr)
6018 {
6019 int yield = 0;
6020 BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
6021 (void)data_ptr; /* Not used */
6022
6023 fprintf(outfile, "%2d(%d) Old %" SIZ_FORM " %" SIZ_FORM " \"",
6024 scb->subscount, scb->oveccount,
6025 SIZ_CAST scb->ovector[0], SIZ_CAST scb->ovector[1]);
6026
6027 PCHARSV(scb->input, scb->ovector[0], scb->ovector[1] - scb->ovector[0],
6028 utf, outfile);
6029
6030 fprintf(outfile, "\" New %" SIZ_FORM " %" SIZ_FORM " \"",
6031 SIZ_CAST scb->output_offsets[0], SIZ_CAST scb->output_offsets[1]);
6032
6033 PCHARSV(scb->output, scb->output_offsets[0],
6034 scb->output_offsets[1] - scb->output_offsets[0], utf, outfile);
6035
6036 if (scb->subscount == dat_datctl.substitute_stop)
6037 {
6038 yield = -1;
6039 fprintf(outfile, " STOPPED");
6040 }
6041 else if (scb->subscount == dat_datctl.substitute_skip)
6042 {
6043 yield = +1;
6044 fprintf(outfile, " SKIPPED");
6045 }
6046
6047 fprintf(outfile, "\"\n");
6048 return yield;
6049 }
6050
6051
6052 /*************************************************
6053 * Callout function *
6054 *************************************************/
6055
6056 /* Called from a PCRE2 library as a result of the (?C) item. We print out where
6057 we are in the match (unless suppressed). Yield zero unless more callouts than
6058 the fail count, or the callout data is not zero. The only differences in the
6059 callout block for different code unit widths are that the pointers to the
6060 subject, the most recent MARK, and a callout argument string point to strings
6061 of the appropriate width. Casts can be used to deal with this.
6062
6063 Arguments:
6064 cb a pointer to a callout block
6065 callout_data_ptr the provided callout data
6066
6067 Returns: 0 or 1 or an error, as determined by settings
6068 */
6069
6070 static int
callout_function(pcre2_callout_block_8 * cb,void * callout_data_ptr)6071 callout_function(pcre2_callout_block_8 *cb, void *callout_data_ptr)
6072 {
6073 FILE *f, *fdefault;
6074 uint32_t i, pre_start, post_start, subject_length;
6075 PCRE2_SIZE current_position;
6076 BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
6077 BOOL callout_capture = (dat_datctl.control & CTL_CALLOUT_CAPTURE) != 0;
6078 BOOL callout_where = (dat_datctl.control2 & CTL2_CALLOUT_NO_WHERE) == 0;
6079
6080 /* The FILE f is used for echoing the subject string if it is non-NULL. This
6081 happens only once in simple cases, but we want to repeat after any additional
6082 output caused by CALLOUT_EXTRA. */
6083
6084 fdefault = (!first_callout && !callout_capture && cb->callout_string == NULL)?
6085 NULL : outfile;
6086
6087 if ((dat_datctl.control2 & CTL2_CALLOUT_EXTRA) != 0)
6088 {
6089 f = outfile;
6090 switch (cb->callout_flags)
6091 {
6092 case PCRE2_CALLOUT_BACKTRACK:
6093 fprintf(f, "Backtrack\n");
6094 break;
6095
6096 case PCRE2_CALLOUT_STARTMATCH|PCRE2_CALLOUT_BACKTRACK:
6097 fprintf(f, "Backtrack\nNo other matching paths\n");
6098 /* Fall through */
6099
6100 case PCRE2_CALLOUT_STARTMATCH:
6101 fprintf(f, "New match attempt\n");
6102 break;
6103
6104 default:
6105 f = fdefault;
6106 break;
6107 }
6108 }
6109 else f = fdefault;
6110
6111 /* For a callout with a string argument, show the string first because there
6112 isn't a tidy way to fit it in the rest of the data. */
6113
6114 if (cb->callout_string != NULL)
6115 {
6116 uint32_t delimiter = CODE_UNIT(cb->callout_string, -1);
6117 fprintf(outfile, "Callout (%" SIZ_FORM "): %c",
6118 SIZ_CAST cb->callout_string_offset, delimiter);
6119 PCHARSV(cb->callout_string, 0,
6120 cb->callout_string_length, utf, outfile);
6121 for (i = 0; callout_start_delims[i] != 0; i++)
6122 if (delimiter == callout_start_delims[i])
6123 {
6124 delimiter = callout_end_delims[i];
6125 break;
6126 }
6127 fprintf(outfile, "%c", delimiter);
6128 if (!callout_capture) fprintf(outfile, "\n");
6129 }
6130
6131 /* Show captured strings if required */
6132
6133 if (callout_capture)
6134 {
6135 if (cb->callout_string == NULL)
6136 fprintf(outfile, "Callout %d:", cb->callout_number);
6137 fprintf(outfile, " last capture = %d\n", cb->capture_last);
6138 for (i = 2; i < cb->capture_top * 2; i += 2)
6139 {
6140 fprintf(outfile, "%2d: ", i/2);
6141 if (cb->offset_vector[i] == PCRE2_UNSET)
6142 fprintf(outfile, "<unset>");
6143 else
6144 {
6145 PCHARSV(cb->subject, cb->offset_vector[i],
6146 cb->offset_vector[i+1] - cb->offset_vector[i], utf, f);
6147 }
6148 fprintf(outfile, "\n");
6149 }
6150 }
6151
6152 /* Unless suppressed, re-print the subject in canonical form (with escapes for
6153 non-printing characters), the first time, or if giving full details. On
6154 subsequent calls in the same match, we use PCHARS() just to find the printed
6155 lengths of the substrings. */
6156
6157 if (callout_where)
6158 {
6159 if (f != NULL) fprintf(f, "--->");
6160
6161 /* The subject before the match start. */
6162
6163 PCHARS(pre_start, cb->subject, 0, cb->start_match, utf, f);
6164
6165 /* If a lookbehind is involved, the current position may be earlier than the
6166 match start. If so, use the match start instead. */
6167
6168 current_position = (cb->current_position >= cb->start_match)?
6169 cb->current_position : cb->start_match;
6170
6171 /* The subject between the match start and the current position. */
6172
6173 PCHARS(post_start, cb->subject, cb->start_match,
6174 current_position - cb->start_match, utf, f);
6175
6176 /* Print from the current position to the end. */
6177
6178 PCHARSV(cb->subject, current_position, cb->subject_length - current_position,
6179 utf, f);
6180
6181 /* Calculate the total subject printed length (no print). */
6182
6183 PCHARS(subject_length, cb->subject, 0, cb->subject_length, utf, NULL);
6184
6185 if (f != NULL) fprintf(f, "\n");
6186
6187 /* For automatic callouts, show the pattern offset. Otherwise, for a
6188 numerical callout whose number has not already been shown with captured
6189 strings, show the number here. A callout with a string argument has been
6190 displayed above. */
6191
6192 if (cb->callout_number == 255)
6193 {
6194 fprintf(outfile, "%+3d ", (int)cb->pattern_position);
6195 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
6196 }
6197 else
6198 {
6199 if (callout_capture || cb->callout_string != NULL) fprintf(outfile, " ");
6200 else fprintf(outfile, "%3d ", cb->callout_number);
6201 }
6202
6203 /* Now show position indicators */
6204
6205 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
6206 fprintf(outfile, "^");
6207
6208 if (post_start > 0)
6209 {
6210 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
6211 fprintf(outfile, "^");
6212 }
6213
6214 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
6215 fprintf(outfile, " ");
6216
6217 if (cb->next_item_length != 0)
6218 fprintf(outfile, "%.*s", (int)(cb->next_item_length),
6219 pbuffer8 + cb->pattern_position);
6220 else
6221 fprintf(outfile, "End of pattern");
6222
6223 fprintf(outfile, "\n");
6224 }
6225
6226 first_callout = FALSE;
6227
6228 /* Show any mark info */
6229
6230 if (cb->mark != last_callout_mark)
6231 {
6232 if (cb->mark == NULL)
6233 fprintf(outfile, "Latest Mark: <unset>\n");
6234 else
6235 {
6236 fprintf(outfile, "Latest Mark: ");
6237 PCHARSV(cb->mark, 0, -1, utf, outfile);
6238 putc('\n', outfile);
6239 }
6240 last_callout_mark = cb->mark;
6241 }
6242
6243 /* Show callout data */
6244
6245 if (callout_data_ptr != NULL)
6246 {
6247 int callout_data = *((int32_t *)callout_data_ptr);
6248 if (callout_data != 0)
6249 {
6250 fprintf(outfile, "Callout data = %d\n", callout_data);
6251 return callout_data;
6252 }
6253 }
6254
6255 /* Keep count and give the appropriate return code */
6256
6257 callout_count++;
6258
6259 if (cb->callout_number == dat_datctl.cerror[0] &&
6260 callout_count >= dat_datctl.cerror[1])
6261 return PCRE2_ERROR_CALLOUT;
6262
6263 if (cb->callout_number == dat_datctl.cfail[0] &&
6264 callout_count >= dat_datctl.cfail[1])
6265 return 1;
6266
6267 return 0;
6268 }
6269
6270
6271
6272 /*************************************************
6273 * Handle *MARK and copy/get tests *
6274 *************************************************/
6275
6276 /* This function is called after complete and partial matches. It runs the
6277 tests for substring extraction.
6278
6279 Arguments:
6280 utf TRUE for utf
6281 capcount return from pcre2_match()
6282
6283 Returns: FALSE if print_error_message() fails
6284 */
6285
6286 static BOOL
copy_and_get(BOOL utf,int capcount)6287 copy_and_get(BOOL utf, int capcount)
6288 {
6289 int i;
6290 uint8_t *nptr;
6291
6292 /* Test copy strings by number */
6293
6294 for (i = 0; i < MAXCPYGET && dat_datctl.copy_numbers[i] >= 0; i++)
6295 {
6296 int rc;
6297 PCRE2_SIZE length, length2;
6298 uint32_t copybuffer[256];
6299 uint32_t n = (uint32_t)(dat_datctl.copy_numbers[i]);
6300 length = sizeof(copybuffer)/code_unit_size;
6301 PCRE2_SUBSTRING_COPY_BYNUMBER(rc, match_data, n, copybuffer, &length);
6302 if (rc < 0)
6303 {
6304 fprintf(outfile, "Copy substring %d failed (%d): ", n, rc);
6305 if (!print_error_message(rc, "", "\n")) return FALSE;
6306 }
6307 else
6308 {
6309 PCRE2_SUBSTRING_LENGTH_BYNUMBER(rc, match_data, n, &length2);
6310 if (rc < 0)
6311 {
6312 fprintf(outfile, "Get substring %d length failed (%d): ", n, rc);
6313 if (!print_error_message(rc, "", "\n")) return FALSE;
6314 }
6315 else if (length2 != length)
6316 {
6317 fprintf(outfile, "Mismatched substring lengths: %"
6318 SIZ_FORM " %" SIZ_FORM "\n", SIZ_CAST length, SIZ_CAST length2);
6319 }
6320 fprintf(outfile, "%2dC ", n);
6321 PCHARSV(copybuffer, 0, length, utf, outfile);
6322 fprintf(outfile, " (%" SIZ_FORM ")\n", SIZ_CAST length);
6323 }
6324 }
6325
6326 /* Test copy strings by name */
6327
6328 nptr = dat_datctl.copy_names;
6329 for (;;)
6330 {
6331 int rc;
6332 int groupnumber;
6333 PCRE2_SIZE length, length2;
6334 uint32_t copybuffer[256];
6335 int namelen = strlen((const char *)nptr);
6336 #if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32
6337 PCRE2_SIZE cnl = namelen;
6338 #endif
6339 if (namelen == 0) break;
6340
6341 #ifdef SUPPORT_PCRE2_8
6342 if (test_mode == PCRE8_MODE) strcpy((char *)pbuffer8, (char *)nptr);
6343 #endif
6344 #ifdef SUPPORT_PCRE2_16
6345 if (test_mode == PCRE16_MODE)(void)to16(nptr, utf, &cnl);
6346 #endif
6347 #ifdef SUPPORT_PCRE2_32
6348 if (test_mode == PCRE32_MODE)(void)to32(nptr, utf, &cnl);
6349 #endif
6350
6351 PCRE2_SUBSTRING_NUMBER_FROM_NAME(groupnumber, compiled_code, pbuffer);
6352 if (groupnumber < 0 && groupnumber != PCRE2_ERROR_NOUNIQUESUBSTRING)
6353 fprintf(outfile, "Number not found for group '%s'\n", nptr);
6354
6355 length = sizeof(copybuffer)/code_unit_size;
6356 PCRE2_SUBSTRING_COPY_BYNAME(rc, match_data, pbuffer, copybuffer, &length);
6357 if (rc < 0)
6358 {
6359 fprintf(outfile, "Copy substring '%s' failed (%d): ", nptr, rc);
6360 if (!print_error_message(rc, "", "\n")) return FALSE;
6361 }
6362 else
6363 {
6364 PCRE2_SUBSTRING_LENGTH_BYNAME(rc, match_data, pbuffer, &length2);
6365 if (rc < 0)
6366 {
6367 fprintf(outfile, "Get substring '%s' length failed (%d): ", nptr, rc);
6368 if (!print_error_message(rc, "", "\n")) return FALSE;
6369 }
6370 else if (length2 != length)
6371 {
6372 fprintf(outfile, "Mismatched substring lengths: %"
6373 SIZ_FORM " %" SIZ_FORM "\n", SIZ_CAST length, SIZ_CAST length2);
6374 }
6375 fprintf(outfile, " C ");
6376 PCHARSV(copybuffer, 0, length, utf, outfile);
6377 fprintf(outfile, " (%" SIZ_FORM ") %s", SIZ_CAST length, nptr);
6378 if (groupnumber >= 0) fprintf(outfile, " (group %d)\n", groupnumber);
6379 else fprintf(outfile, " (non-unique)\n");
6380 }
6381 nptr += namelen + 1;
6382 }
6383
6384 /* Test get strings by number */
6385
6386 for (i = 0; i < MAXCPYGET && dat_datctl.get_numbers[i] >= 0; i++)
6387 {
6388 int rc;
6389 PCRE2_SIZE length;
6390 void *gotbuffer;
6391 uint32_t n = (uint32_t)(dat_datctl.get_numbers[i]);
6392 PCRE2_SUBSTRING_GET_BYNUMBER(rc, match_data, n, &gotbuffer, &length);
6393 if (rc < 0)
6394 {
6395 fprintf(outfile, "Get substring %d failed (%d): ", n, rc);
6396 if (!print_error_message(rc, "", "\n")) return FALSE;
6397 }
6398 else
6399 {
6400 fprintf(outfile, "%2dG ", n);
6401 PCHARSV(gotbuffer, 0, length, utf, outfile);
6402 fprintf(outfile, " (%" SIZ_FORM ")\n", SIZ_CAST length);
6403 PCRE2_SUBSTRING_FREE(gotbuffer);
6404 }
6405 }
6406
6407 /* Test get strings by name */
6408
6409 nptr = dat_datctl.get_names;
6410 for (;;)
6411 {
6412 PCRE2_SIZE length;
6413 void *gotbuffer;
6414 int rc;
6415 int groupnumber;
6416 int namelen = strlen((const char *)nptr);
6417 #if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32
6418 PCRE2_SIZE cnl = namelen;
6419 #endif
6420 if (namelen == 0) break;
6421
6422 #ifdef SUPPORT_PCRE2_8
6423 if (test_mode == PCRE8_MODE) strcpy((char *)pbuffer8, (char *)nptr);
6424 #endif
6425 #ifdef SUPPORT_PCRE2_16
6426 if (test_mode == PCRE16_MODE)(void)to16(nptr, utf, &cnl);
6427 #endif
6428 #ifdef SUPPORT_PCRE2_32
6429 if (test_mode == PCRE32_MODE)(void)to32(nptr, utf, &cnl);
6430 #endif
6431
6432 PCRE2_SUBSTRING_NUMBER_FROM_NAME(groupnumber, compiled_code, pbuffer);
6433 if (groupnumber < 0 && groupnumber != PCRE2_ERROR_NOUNIQUESUBSTRING)
6434 fprintf(outfile, "Number not found for group '%s'\n", nptr);
6435
6436 PCRE2_SUBSTRING_GET_BYNAME(rc, match_data, pbuffer, &gotbuffer, &length);
6437 if (rc < 0)
6438 {
6439 fprintf(outfile, "Get substring '%s' failed (%d): ", nptr, rc);
6440 if (!print_error_message(rc, "", "\n")) return FALSE;
6441 }
6442 else
6443 {
6444 fprintf(outfile, " G ");
6445 PCHARSV(gotbuffer, 0, length, utf, outfile);
6446 fprintf(outfile, " (%" SIZ_FORM ") %s", SIZ_CAST length, nptr);
6447 if (groupnumber >= 0) fprintf(outfile, " (group %d)\n", groupnumber);
6448 else fprintf(outfile, " (non-unique)\n");
6449 PCRE2_SUBSTRING_FREE(gotbuffer);
6450 }
6451 nptr += namelen + 1;
6452 }
6453
6454 /* Test getting the complete list of captured strings. */
6455
6456 if ((dat_datctl.control & CTL_GETALL) != 0)
6457 {
6458 int rc;
6459 void **stringlist;
6460 PCRE2_SIZE *lengths;
6461 PCRE2_SUBSTRING_LIST_GET(rc, match_data, &stringlist, &lengths);
6462 if (rc < 0)
6463 {
6464 fprintf(outfile, "get substring list failed (%d): ", rc);
6465 if (!print_error_message(rc, "", "\n")) return FALSE;
6466 }
6467 else
6468 {
6469 for (i = 0; i < capcount; i++)
6470 {
6471 fprintf(outfile, "%2dL ", i);
6472 PCHARSV(stringlist[i], 0, lengths[i], utf, outfile);
6473 putc('\n', outfile);
6474 }
6475 if (stringlist[i] != NULL)
6476 fprintf(outfile, "string list not terminated by NULL\n");
6477 PCRE2_SUBSTRING_LIST_FREE(stringlist);
6478 }
6479 }
6480
6481 return TRUE;
6482 }
6483
6484
6485
6486 /*************************************************
6487 * Show an entire ovector *
6488 *************************************************/
6489
6490 /* This function is called after partial matching or match failure, when the
6491 "allvector" modifier is set. It is a means of checking the contents of the
6492 entire ovector, to ensure no modification of fields that should be unchanged.
6493
6494 Arguments:
6495 ovector points to the ovector
6496 oveccount number of pairs
6497
6498 Returns: nothing
6499 */
6500
6501 static void
show_ovector(PCRE2_SIZE * ovector,uint32_t oveccount)6502 show_ovector(PCRE2_SIZE *ovector, uint32_t oveccount)
6503 {
6504 uint32_t i;
6505 for (i = 0; i < 2*oveccount; i += 2)
6506 {
6507 PCRE2_SIZE start = ovector[i];
6508 PCRE2_SIZE end = ovector[i+1];
6509
6510 fprintf(outfile, "%2d: ", i/2);
6511 if (start == PCRE2_UNSET && end == PCRE2_UNSET)
6512 fprintf(outfile, "<unset>\n");
6513 else if (start == JUNK_OFFSET && end == JUNK_OFFSET)
6514 fprintf(outfile, "<unchanged>\n");
6515 else
6516 fprintf(outfile, "%ld %ld\n", (unsigned long int)start,
6517 (unsigned long int)end);
6518 }
6519 }
6520
6521
6522 /*************************************************
6523 * Process a data line *
6524 *************************************************/
6525
6526 /* The line is in buffer; it will not be empty.
6527
6528 Arguments: none
6529
6530 Returns: PR_OK continue processing next line
6531 PR_SKIP skip to a blank line
6532 PR_ABEND abort the pcre2test run
6533 */
6534
6535 static int
process_data(void)6536 process_data(void)
6537 {
6538 PCRE2_SIZE len, ulen, arg_ulen;
6539 uint32_t gmatched;
6540 uint32_t c, k;
6541 uint32_t g_notempty = 0;
6542 uint8_t *p, *pp, *start_rep;
6543 size_t needlen;
6544 void *use_dat_context;
6545 BOOL utf;
6546 BOOL subject_literal;
6547
6548 PCRE2_SIZE *ovector;
6549 PCRE2_SIZE ovecsave[3];
6550 uint32_t oveccount;
6551
6552 #ifdef SUPPORT_PCRE2_8
6553 uint8_t *q8 = NULL;
6554 #endif
6555 #ifdef SUPPORT_PCRE2_16
6556 uint16_t *q16 = NULL;
6557 #endif
6558 #ifdef SUPPORT_PCRE2_32
6559 uint32_t *q32 = NULL;
6560 #endif
6561
6562 subject_literal = (pat_patctl.control2 & CTL2_SUBJECT_LITERAL) != 0;
6563
6564 /* Copy the default context and data control blocks to the active ones. Then
6565 copy from the pattern the controls that can be set in either the pattern or the
6566 data. This allows them to be overridden in the data line. We do not do this for
6567 options because those that are common apply separately to compiling and
6568 matching. */
6569
6570 DATCTXCPY(dat_context, default_dat_context);
6571 memcpy(&dat_datctl, &def_datctl, sizeof(datctl));
6572 dat_datctl.control |= (pat_patctl.control & CTL_ALLPD);
6573 dat_datctl.control2 |= (pat_patctl.control2 & CTL2_ALLPD);
6574 strcpy((char *)dat_datctl.replacement, (char *)pat_patctl.replacement);
6575 if (dat_datctl.jitstack == 0) dat_datctl.jitstack = pat_patctl.jitstack;
6576
6577 if (dat_datctl.substitute_skip == 0)
6578 dat_datctl.substitute_skip = pat_patctl.substitute_skip;
6579 if (dat_datctl.substitute_stop == 0)
6580 dat_datctl.substitute_stop = pat_patctl.substitute_stop;
6581
6582 /* Initialize for scanning the data line. */
6583
6584 #ifdef SUPPORT_PCRE2_8
6585 utf = ((((pat_patctl.control & CTL_POSIX) != 0)?
6586 ((pcre2_real_code_8 *)preg.re_pcre2_code)->overall_options :
6587 FLD(compiled_code, overall_options)) & PCRE2_UTF) != 0;
6588 #else
6589 utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
6590 #endif
6591
6592 start_rep = NULL;
6593 len = strlen((const char *)buffer);
6594 while (len > 0 && isspace(buffer[len-1])) len--;
6595 buffer[len] = 0;
6596 p = buffer;
6597 while (isspace(*p)) p++;
6598
6599 /* Check that the data is well-formed UTF-8 if we're in UTF mode. To create
6600 invalid input to pcre2_match(), you must use \x?? or \x{} sequences. */
6601
6602 if (utf)
6603 {
6604 uint8_t *q;
6605 uint32_t cc;
6606 int n = 1;
6607 for (q = p; n > 0 && *q; q += n) n = utf82ord(q, &cc);
6608 if (n <= 0)
6609 {
6610 fprintf(outfile, "** Failed: invalid UTF-8 string cannot be used as input "
6611 "in UTF mode\n");
6612 return PR_OK;
6613 }
6614 }
6615
6616 #ifdef SUPPORT_VALGRIND
6617 /* Mark the dbuffer as addressable but undefined again. */
6618 if (dbuffer != NULL)
6619 {
6620 VALGRIND_MAKE_MEM_UNDEFINED(dbuffer, dbuffer_size);
6621 }
6622 #endif
6623
6624 /* Allocate a buffer to hold the data line; len+1 is an upper bound on
6625 the number of code units that will be needed (though the buffer may have to be
6626 extended if replication is involved). */
6627
6628 needlen = (size_t)((len+1) * code_unit_size);
6629 if (dbuffer == NULL || needlen >= dbuffer_size)
6630 {
6631 while (needlen >= dbuffer_size) dbuffer_size *= 2;
6632 dbuffer = (uint8_t *)realloc(dbuffer, dbuffer_size);
6633 if (dbuffer == NULL)
6634 {
6635 fprintf(stderr, "pcre2test: realloc(%d) failed\n", (int)dbuffer_size);
6636 exit(1);
6637 }
6638 }
6639 SETCASTPTR(q, dbuffer); /* Sets q8, q16, or q32, as appropriate. */
6640
6641 /* Scan the data line, interpreting data escapes, and put the result into a
6642 buffer of the appropriate width. In UTF mode, input is always UTF-8; otherwise,
6643 in 16- and 32-bit modes, it can be forced to UTF-8 by the utf8_input modifier.
6644 */
6645
6646 while ((c = *p++) != 0)
6647 {
6648 int32_t i = 0;
6649 size_t replen;
6650
6651 /* ] may mark the end of a replicated sequence */
6652
6653 if (c == ']' && start_rep != NULL)
6654 {
6655 long li;
6656 char *endptr;
6657 size_t qoffset = CAST8VAR(q) - dbuffer;
6658 size_t rep_offset = start_rep - dbuffer;
6659
6660 if (*p++ != '{')
6661 {
6662 fprintf(outfile, "** Expected '{' after \\[....]\n");
6663 return PR_OK;
6664 }
6665
6666 li = strtol((const char *)p, &endptr, 10);
6667 if (S32OVERFLOW(li))
6668 {
6669 fprintf(outfile, "** Repeat count too large\n");
6670 return PR_OK;
6671 }
6672
6673 p = (uint8_t *)endptr;
6674 if (*p++ != '}')
6675 {
6676 fprintf(outfile, "** Expected '}' after \\[...]{...\n");
6677 return PR_OK;
6678 }
6679
6680 i = (int32_t)li;
6681 if (i-- == 0)
6682 {
6683 fprintf(outfile, "** Zero repeat not allowed\n");
6684 return PR_OK;
6685 }
6686
6687 replen = CAST8VAR(q) - start_rep;
6688 needlen += replen * i;
6689
6690 if (needlen >= dbuffer_size)
6691 {
6692 while (needlen >= dbuffer_size) dbuffer_size *= 2;
6693 dbuffer = (uint8_t *)realloc(dbuffer, dbuffer_size);
6694 if (dbuffer == NULL)
6695 {
6696 fprintf(stderr, "pcre2test: realloc(%d) failed\n", (int)dbuffer_size);
6697 exit(1);
6698 }
6699 SETCASTPTR(q, dbuffer + qoffset);
6700 start_rep = dbuffer + rep_offset;
6701 }
6702
6703 while (i-- > 0)
6704 {
6705 memcpy(CAST8VAR(q), start_rep, replen);
6706 SETPLUS(q, replen/code_unit_size);
6707 }
6708
6709 start_rep = NULL;
6710 continue;
6711 }
6712
6713 /* Handle a non-escaped character. In non-UTF 32-bit mode with utf8_input
6714 set, do the fudge for setting the top bit. */
6715
6716 if (c != '\\' || subject_literal)
6717 {
6718 uint32_t topbit = 0;
6719 if (test_mode == PCRE32_MODE && c == 0xff && *p != 0)
6720 {
6721 topbit = 0x80000000;
6722 c = *p++;
6723 }
6724 if ((utf || (pat_patctl.control & CTL_UTF8_INPUT) != 0) &&
6725 HASUTF8EXTRALEN(c)) { GETUTF8INC(c, p); }
6726 c |= topbit;
6727 }
6728
6729 /* Handle backslash escapes */
6730
6731 else switch ((c = *p++))
6732 {
6733 case '\\': break;
6734 case 'a': c = CHAR_BEL; break;
6735 case 'b': c = '\b'; break;
6736 case 'e': c = CHAR_ESC; break;
6737 case 'f': c = '\f'; break;
6738 case 'n': c = '\n'; break;
6739 case 'r': c = '\r'; break;
6740 case 't': c = '\t'; break;
6741 case 'v': c = '\v'; break;
6742
6743 case '0': case '1': case '2': case '3':
6744 case '4': case '5': case '6': case '7':
6745 c -= '0';
6746 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
6747 c = c * 8 + *p++ - '0';
6748 break;
6749
6750 case 'o':
6751 if (*p == '{')
6752 {
6753 uint8_t *pt = p;
6754 c = 0;
6755 for (pt++; isdigit(*pt) && *pt != '8' && *pt != '9'; pt++)
6756 {
6757 if (++i == 12)
6758 fprintf(outfile, "** Too many octal digits in \\o{...} item; "
6759 "using only the first twelve.\n");
6760 else c = c * 8 + *pt - '0';
6761 }
6762 if (*pt == '}') p = pt + 1;
6763 else fprintf(outfile, "** Missing } after \\o{ (assumed)\n");
6764 }
6765 break;
6766
6767 case 'x':
6768 if (*p == '{')
6769 {
6770 uint8_t *pt = p;
6771 c = 0;
6772
6773 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
6774 when isxdigit() is a macro that refers to its argument more than
6775 once. This is banned by the C Standard, but apparently happens in at
6776 least one MacOS environment. */
6777
6778 for (pt++; isxdigit(*pt); pt++)
6779 {
6780 if (++i == 9)
6781 fprintf(outfile, "** Too many hex digits in \\x{...} item; "
6782 "using only the first eight.\n");
6783 else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
6784 }
6785 if (*pt == '}')
6786 {
6787 p = pt + 1;
6788 break;
6789 }
6790 /* Not correct form for \x{...}; fall through */
6791 }
6792
6793 /* \x without {} always defines just one byte in 8-bit mode. This
6794 allows UTF-8 characters to be constructed byte by byte, and also allows
6795 invalid UTF-8 sequences to be made. Just copy the byte in UTF-8 mode.
6796 Otherwise, pass it down as data. */
6797
6798 c = 0;
6799 while (i++ < 2 && isxdigit(*p))
6800 {
6801 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
6802 p++;
6803 }
6804 #if defined SUPPORT_PCRE2_8
6805 if (utf && (test_mode == PCRE8_MODE))
6806 {
6807 *q8++ = c;
6808 continue;
6809 }
6810 #endif
6811 break;
6812
6813 case 0: /* \ followed by EOF allows for an empty line */
6814 p--;
6815 continue;
6816
6817 case '=': /* \= terminates the data, starts modifiers */
6818 goto ENDSTRING;
6819
6820 case '[': /* \[ introduces a replicated character sequence */
6821 if (start_rep != NULL)
6822 {
6823 fprintf(outfile, "** Nested replication is not supported\n");
6824 return PR_OK;
6825 }
6826 start_rep = CAST8VAR(q);
6827 continue;
6828
6829 default:
6830 if (isalnum(c))
6831 {
6832 fprintf(outfile, "** Unrecognized escape sequence \"\\%c\"\n", c);
6833 return PR_OK;
6834 }
6835 }
6836
6837 /* We now have a character value in c that may be greater than 255.
6838 In 8-bit mode we convert to UTF-8 if we are in UTF mode. Values greater
6839 than 127 in UTF mode must have come from \x{...} or octal constructs
6840 because values from \x.. get this far only in non-UTF mode. */
6841
6842 #ifdef SUPPORT_PCRE2_8
6843 if (test_mode == PCRE8_MODE)
6844 {
6845 if (utf)
6846 {
6847 if (c > 0x7fffffff)
6848 {
6849 fprintf(outfile, "** Character \\x{%x} is greater than 0x7fffffff "
6850 "and so cannot be converted to UTF-8\n", c);
6851 return PR_OK;
6852 }
6853 q8 += ord2utf8(c, q8);
6854 }
6855 else
6856 {
6857 if (c > 0xffu)
6858 {
6859 fprintf(outfile, "** Character \\x{%x} is greater than 255 "
6860 "and UTF-8 mode is not enabled.\n", c);
6861 fprintf(outfile, "** Truncation will probably give the wrong "
6862 "result.\n");
6863 }
6864 *q8++ = (uint8_t)c;
6865 }
6866 }
6867 #endif
6868 #ifdef SUPPORT_PCRE2_16
6869 if (test_mode == PCRE16_MODE)
6870 {
6871 if (utf)
6872 {
6873 if (c > 0x10ffffu)
6874 {
6875 fprintf(outfile, "** Failed: character \\x{%x} is greater than "
6876 "0x10ffff and so cannot be converted to UTF-16\n", c);
6877 return PR_OK;
6878 }
6879 else if (c >= 0x10000u)
6880 {
6881 c-= 0x10000u;
6882 *q16++ = 0xD800 | (c >> 10);
6883 *q16++ = 0xDC00 | (c & 0x3ff);
6884 }
6885 else
6886 *q16++ = c;
6887 }
6888 else
6889 {
6890 if (c > 0xffffu)
6891 {
6892 fprintf(outfile, "** Character \\x{%x} is greater than 0xffff "
6893 "and UTF-16 mode is not enabled.\n", c);
6894 fprintf(outfile, "** Truncation will probably give the wrong "
6895 "result.\n");
6896 }
6897
6898 *q16++ = (uint16_t)c;
6899 }
6900 }
6901 #endif
6902 #ifdef SUPPORT_PCRE2_32
6903 if (test_mode == PCRE32_MODE)
6904 {
6905 *q32++ = c;
6906 }
6907 #endif
6908 }
6909
6910 ENDSTRING:
6911 SET(*q, 0);
6912 len = CASTVAR(uint8_t *, q) - dbuffer; /* Length in bytes */
6913 ulen = len/code_unit_size; /* Length in code units */
6914 arg_ulen = ulen; /* Value to use in match arg */
6915
6916 /* If the string was terminated by \= we must now interpret modifiers. */
6917
6918 if (p[-1] != 0 && !decode_modifiers(p, CTX_DAT, NULL, &dat_datctl))
6919 return PR_OK;
6920
6921 /* Setting substitute_{skip,fail} implies a substitute callout. */
6922
6923 if (dat_datctl.substitute_skip != 0 || dat_datctl.substitute_stop != 0)
6924 dat_datctl.control2 |= CTL2_SUBSTITUTE_CALLOUT;
6925
6926 /* Check for mutually exclusive modifiers. At present, these are all in the
6927 first control word. */
6928
6929 for (k = 0; k < sizeof(exclusive_dat_controls)/sizeof(uint32_t); k++)
6930 {
6931 c = dat_datctl.control & exclusive_dat_controls[k];
6932 if (c != 0 && c != (c & (~c+1)))
6933 {
6934 show_controls(c, 0, "** Not allowed together:");
6935 fprintf(outfile, "\n");
6936 return PR_OK;
6937 }
6938 }
6939
6940 if (pat_patctl.replacement[0] != 0)
6941 {
6942 if ((dat_datctl.control2 & CTL2_SUBSTITUTE_CALLOUT) != 0 &&
6943 (dat_datctl.control & CTL_NULLCONTEXT) != 0)
6944 {
6945 fprintf(outfile, "** Replacement callouts are not supported with null_context.\n");
6946 return PR_OK;
6947 }
6948
6949 if ((dat_datctl.control & CTL_ALLCAPTURES) != 0)
6950 fprintf(outfile, "** Ignored with replacement text: allcaptures\n");
6951 }
6952
6953 /* Warn for modifiers that are ignored for DFA. */
6954
6955 if ((dat_datctl.control & CTL_DFA) != 0)
6956 {
6957 if ((dat_datctl.control & CTL_ALLCAPTURES) != 0)
6958 fprintf(outfile, "** Ignored after DFA matching: allcaptures\n");
6959 }
6960
6961 /* We now have the subject in dbuffer, with len containing the byte length, and
6962 ulen containing the code unit length, with a copy in arg_ulen for use in match
6963 function arguments (this gets changed to PCRE2_ZERO_TERMINATED when the
6964 zero_terminate modifier is present).
6965
6966 Move the data to the end of the buffer so that a read over the end can be
6967 caught by valgrind or other means. If we have explicit valgrind support, mark
6968 the unused start of the buffer unaddressable. If we are using the POSIX
6969 interface, or testing zero-termination, we must include the terminating zero in
6970 the usable data. */
6971
6972 c = code_unit_size * (((pat_patctl.control & CTL_POSIX) +
6973 (dat_datctl.control & CTL_ZERO_TERMINATE) != 0)? 1:0);
6974 pp = memmove(dbuffer + dbuffer_size - len - c, dbuffer, len + c);
6975 #ifdef SUPPORT_VALGRIND
6976 VALGRIND_MAKE_MEM_NOACCESS(dbuffer, dbuffer_size - (len + c));
6977 #endif
6978
6979 /* Now pp points to the subject string. POSIX matching is only possible in
6980 8-bit mode, and it does not support timing or other fancy features. Some were
6981 checked at compile time, but we need to check the match-time settings here. */
6982
6983 #ifdef SUPPORT_PCRE2_8
6984 if ((pat_patctl.control & CTL_POSIX) != 0)
6985 {
6986 int rc;
6987 int eflags = 0;
6988 regmatch_t *pmatch = NULL;
6989 const char *msg = "** Ignored with POSIX interface:";
6990
6991 if (dat_datctl.cerror[0] != CFORE_UNSET || dat_datctl.cerror[1] != CFORE_UNSET)
6992 prmsg(&msg, "callout_error");
6993 if (dat_datctl.cfail[0] != CFORE_UNSET || dat_datctl.cfail[1] != CFORE_UNSET)
6994 prmsg(&msg, "callout_fail");
6995 if (dat_datctl.copy_numbers[0] >= 0 || dat_datctl.copy_names[0] != 0)
6996 prmsg(&msg, "copy");
6997 if (dat_datctl.get_numbers[0] >= 0 || dat_datctl.get_names[0] != 0)
6998 prmsg(&msg, "get");
6999 if (dat_datctl.jitstack != 0) prmsg(&msg, "jitstack");
7000 if (dat_datctl.offset != 0) prmsg(&msg, "offset");
7001
7002 if ((dat_datctl.options & ~POSIX_SUPPORTED_MATCH_OPTIONS) != 0)
7003 {
7004 fprintf(outfile, "%s", msg);
7005 show_match_options(dat_datctl.options & ~POSIX_SUPPORTED_MATCH_OPTIONS);
7006 msg = "";
7007 }
7008 if ((dat_datctl.control & ~POSIX_SUPPORTED_MATCH_CONTROLS) != 0 ||
7009 (dat_datctl.control2 & ~POSIX_SUPPORTED_MATCH_CONTROLS2) != 0)
7010 {
7011 show_controls(dat_datctl.control & ~POSIX_SUPPORTED_MATCH_CONTROLS,
7012 dat_datctl.control2 & ~POSIX_SUPPORTED_MATCH_CONTROLS2, msg);
7013 msg = "";
7014 }
7015
7016 if (msg[0] == 0) fprintf(outfile, "\n");
7017
7018 if (dat_datctl.oveccount > 0)
7019 {
7020 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * dat_datctl.oveccount);
7021 if (pmatch == NULL)
7022 {
7023 fprintf(outfile, "** Failed to get memory for recording matching "
7024 "information (size set = %du)\n", dat_datctl.oveccount);
7025 return PR_OK;
7026 }
7027 }
7028
7029 if (dat_datctl.startend[0] != CFORE_UNSET)
7030 {
7031 pmatch[0].rm_so = dat_datctl.startend[0];
7032 pmatch[0].rm_eo = (dat_datctl.startend[1] != 0)?
7033 dat_datctl.startend[1] : len;
7034 eflags |= REG_STARTEND;
7035 }
7036
7037 if ((dat_datctl.options & PCRE2_NOTBOL) != 0) eflags |= REG_NOTBOL;
7038 if ((dat_datctl.options & PCRE2_NOTEOL) != 0) eflags |= REG_NOTEOL;
7039 if ((dat_datctl.options & PCRE2_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
7040
7041 rc = regexec(&preg, (const char *)pp, dat_datctl.oveccount, pmatch, eflags);
7042 if (rc != 0)
7043 {
7044 (void)regerror(rc, &preg, (char *)pbuffer8, pbuffer8_size);
7045 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, pbuffer8);
7046 }
7047 else if ((pat_patctl.control & CTL_POSIX_NOSUB) != 0)
7048 fprintf(outfile, "Matched with REG_NOSUB\n");
7049 else if (dat_datctl.oveccount == 0)
7050 fprintf(outfile, "Matched without capture\n");
7051 else
7052 {
7053 size_t i, j;
7054 size_t last_printed = (size_t)dat_datctl.oveccount;
7055 for (i = 0; i < (size_t)dat_datctl.oveccount; i++)
7056 {
7057 if (pmatch[i].rm_so >= 0)
7058 {
7059 PCRE2_SIZE start = pmatch[i].rm_so;
7060 PCRE2_SIZE end = pmatch[i].rm_eo;
7061 for (j = last_printed + 1; j < i; j++)
7062 fprintf(outfile, "%2d: <unset>\n", (int)j);
7063 last_printed = i;
7064 if (start > end)
7065 {
7066 start = pmatch[i].rm_eo;
7067 end = pmatch[i].rm_so;
7068 fprintf(outfile, "Start of matched string is beyond its end - "
7069 "displaying from end to start.\n");
7070 }
7071 fprintf(outfile, "%2d: ", (int)i);
7072 PCHARSV(pp, start, end - start, utf, outfile);
7073 fprintf(outfile, "\n");
7074
7075 if ((i == 0 && (dat_datctl.control & CTL_AFTERTEXT) != 0) ||
7076 (dat_datctl.control & CTL_ALLAFTERTEXT) != 0)
7077 {
7078 fprintf(outfile, "%2d+ ", (int)i);
7079 /* Note: don't use the start/end variables here because we want to
7080 show the text from what is reported as the end. */
7081 PCHARSV(pp, pmatch[i].rm_eo, len - pmatch[i].rm_eo, utf, outfile);
7082 fprintf(outfile, "\n"); }
7083 }
7084 }
7085 }
7086 free(pmatch);
7087 return PR_OK;
7088 }
7089 #endif /* SUPPORT_PCRE2_8 */
7090
7091 /* Handle matching via the native interface. Check for consistency of
7092 modifiers. */
7093
7094 if (dat_datctl.startend[0] != CFORE_UNSET)
7095 fprintf(outfile, "** \\=posix_startend ignored for non-POSIX matching\n");
7096
7097 /* ALLUSEDTEXT is not supported with JIT, but JIT is not used with DFA
7098 matching, even if the JIT compiler was used. */
7099
7100 if ((dat_datctl.control & (CTL_ALLUSEDTEXT|CTL_DFA)) == CTL_ALLUSEDTEXT &&
7101 FLD(compiled_code, executable_jit) != NULL)
7102 {
7103 fprintf(outfile, "** Showing all consulted text is not supported by JIT: ignored\n");
7104 dat_datctl.control &= ~CTL_ALLUSEDTEXT;
7105 }
7106
7107 /* Handle passing the subject as zero-terminated. */
7108
7109 if ((dat_datctl.control & CTL_ZERO_TERMINATE) != 0)
7110 arg_ulen = PCRE2_ZERO_TERMINATED;
7111
7112 /* The nullcontext modifier is used to test calling pcre2_[jit_]match() with a
7113 NULL context. */
7114
7115 use_dat_context = ((dat_datctl.control & CTL_NULLCONTEXT) != 0)?
7116 NULL : PTR(dat_context);
7117
7118 /* Enable display of malloc/free if wanted. We can do this only if either the
7119 pattern or the subject is processed with a context. */
7120
7121 show_memory = (dat_datctl.control & CTL_MEMORY) != 0;
7122
7123 if (show_memory &&
7124 (pat_patctl.control & dat_datctl.control & CTL_NULLCONTEXT) != 0)
7125 fprintf(outfile, "** \\=memory requires either a pattern or a subject "
7126 "context: ignored\n");
7127
7128 /* Create and assign a JIT stack if requested. */
7129
7130 if (dat_datctl.jitstack != 0)
7131 {
7132 if (dat_datctl.jitstack != jit_stack_size)
7133 {
7134 PCRE2_JIT_STACK_FREE(jit_stack);
7135 PCRE2_JIT_STACK_CREATE(jit_stack, 1, dat_datctl.jitstack * 1024, NULL);
7136 jit_stack_size = dat_datctl.jitstack;
7137 }
7138 PCRE2_JIT_STACK_ASSIGN(dat_context, jit_callback, jit_stack);
7139 }
7140
7141 /* Or de-assign */
7142
7143 else if (jit_stack != NULL)
7144 {
7145 PCRE2_JIT_STACK_ASSIGN(dat_context, NULL, NULL);
7146 PCRE2_JIT_STACK_FREE(jit_stack);
7147 jit_stack = NULL;
7148 jit_stack_size = 0;
7149 }
7150
7151 /* When no JIT stack is assigned, we must ensure that there is a JIT callback
7152 if we want to verify that JIT was actually used. */
7153
7154 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_stack == NULL)
7155 {
7156 PCRE2_JIT_STACK_ASSIGN(dat_context, jit_callback, NULL);
7157 }
7158
7159 /* Adjust match_data according to size of offsets required. A size of zero
7160 causes a new match data block to be obtained that exactly fits the pattern. */
7161
7162 if (dat_datctl.oveccount == 0)
7163 {
7164 PCRE2_MATCH_DATA_FREE(match_data);
7165 PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(match_data, compiled_code, NULL);
7166 PCRE2_GET_OVECTOR_COUNT(max_oveccount, match_data);
7167 }
7168 else if (dat_datctl.oveccount <= max_oveccount)
7169 {
7170 SETFLD(match_data, oveccount, dat_datctl.oveccount);
7171 }
7172 else
7173 {
7174 max_oveccount = dat_datctl.oveccount;
7175 PCRE2_MATCH_DATA_FREE(match_data);
7176 PCRE2_MATCH_DATA_CREATE(match_data, max_oveccount, NULL);
7177 }
7178
7179 if (CASTVAR(void *, match_data) == NULL)
7180 {
7181 fprintf(outfile, "** Failed to get memory for recording matching "
7182 "information (size requested: %d)\n", dat_datctl.oveccount);
7183 max_oveccount = 0;
7184 return PR_OK;
7185 }
7186
7187 ovector = FLD(match_data, ovector);
7188 PCRE2_GET_OVECTOR_COUNT(oveccount, match_data);
7189
7190 /* Replacement processing is ignored for DFA matching. */
7191
7192 if (dat_datctl.replacement[0] != 0 && (dat_datctl.control & CTL_DFA) != 0)
7193 {
7194 fprintf(outfile, "** Ignored for DFA matching: replace\n");
7195 dat_datctl.replacement[0] = 0;
7196 }
7197
7198 /* If a replacement string is provided, call pcre2_substitute() instead of one
7199 of the matching functions. First we have to convert the replacement string to
7200 the appropriate width. */
7201
7202 if (dat_datctl.replacement[0] != 0)
7203 {
7204 int rc;
7205 uint8_t *pr;
7206 uint8_t rbuffer[REPLACE_BUFFSIZE];
7207 uint8_t nbuffer[REPLACE_BUFFSIZE];
7208 uint32_t xoptions;
7209 PCRE2_SIZE j, rlen, nsize, erroroffset;
7210 BOOL badutf = FALSE;
7211
7212 #ifdef SUPPORT_PCRE2_8
7213 uint8_t *r8 = NULL;
7214 #endif
7215 #ifdef SUPPORT_PCRE2_16
7216 uint16_t *r16 = NULL;
7217 #endif
7218 #ifdef SUPPORT_PCRE2_32
7219 uint32_t *r32 = NULL;
7220 #endif
7221
7222 /* Fill the ovector with junk to detect elements that do not get set
7223 when they should be (relevant only when "allvector" is specified). */
7224
7225 for (j = 0; j < 2*oveccount; j++) ovector[j] = JUNK_OFFSET;
7226
7227 if (timeitm)
7228 fprintf(outfile, "** Timing is not supported with replace: ignored\n");
7229
7230 if ((dat_datctl.control & CTL_ALTGLOBAL) != 0)
7231 fprintf(outfile, "** Altglobal is not supported with replace: ignored\n");
7232
7233 xoptions = (((dat_datctl.control & CTL_GLOBAL) == 0)? 0 :
7234 PCRE2_SUBSTITUTE_GLOBAL) |
7235 (((dat_datctl.control2 & CTL2_SUBSTITUTE_EXTENDED) == 0)? 0 :
7236 PCRE2_SUBSTITUTE_EXTENDED) |
7237 (((dat_datctl.control2 & CTL2_SUBSTITUTE_OVERFLOW_LENGTH) == 0)? 0 :
7238 PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) |
7239 (((dat_datctl.control2 & CTL2_SUBSTITUTE_UNKNOWN_UNSET) == 0)? 0 :
7240 PCRE2_SUBSTITUTE_UNKNOWN_UNSET) |
7241 (((dat_datctl.control2 & CTL2_SUBSTITUTE_UNSET_EMPTY) == 0)? 0 :
7242 PCRE2_SUBSTITUTE_UNSET_EMPTY);
7243
7244 SETCASTPTR(r, rbuffer); /* Sets r8, r16, or r32, as appropriate. */
7245 pr = dat_datctl.replacement;
7246
7247 /* If the replacement starts with '[<number>]' we interpret that as length
7248 value for the replacement buffer. */
7249
7250 nsize = REPLACE_BUFFSIZE/code_unit_size;
7251 if (*pr == '[')
7252 {
7253 PCRE2_SIZE n = 0;
7254 while ((c = *(++pr)) >= CHAR_0 && c <= CHAR_9) n = n * 10 + c - CHAR_0;
7255 if (*pr++ != ']')
7256 {
7257 fprintf(outfile, "Bad buffer size in replacement string\n");
7258 return PR_OK;
7259 }
7260 if (n > nsize)
7261 {
7262 fprintf(outfile, "Replacement buffer setting (%" SIZ_FORM ") is too "
7263 "large (max %" SIZ_FORM ")\n", SIZ_CAST n, SIZ_CAST nsize);
7264 return PR_OK;
7265 }
7266 nsize = n;
7267 }
7268
7269 /* Now copy the replacement string to a buffer of the appropriate width. No
7270 escape processing is done for replacements. In UTF mode, check for an invalid
7271 UTF-8 input string, and if it is invalid, just copy its code units without
7272 UTF interpretation. This provides a means of checking that an invalid string
7273 is detected. Otherwise, UTF-8 can be used to include wide characters in a
7274 replacement. */
7275
7276 if (utf) badutf = valid_utf(pr, strlen((const char *)pr), &erroroffset);
7277
7278 /* Not UTF or invalid UTF-8: just copy the code units. */
7279
7280 if (!utf || badutf)
7281 {
7282 while ((c = *pr++) != 0)
7283 {
7284 #ifdef SUPPORT_PCRE2_8
7285 if (test_mode == PCRE8_MODE) *r8++ = c;
7286 #endif
7287 #ifdef SUPPORT_PCRE2_16
7288 if (test_mode == PCRE16_MODE) *r16++ = c;
7289 #endif
7290 #ifdef SUPPORT_PCRE2_32
7291 if (test_mode == PCRE32_MODE) *r32++ = c;
7292 #endif
7293 }
7294 }
7295
7296 /* Valid UTF-8 replacement string */
7297
7298 else while ((c = *pr++) != 0)
7299 {
7300 if (HASUTF8EXTRALEN(c)) { GETUTF8INC(c, pr); }
7301
7302 #ifdef SUPPORT_PCRE2_8
7303 if (test_mode == PCRE8_MODE) r8 += ord2utf8(c, r8);
7304 #endif
7305
7306 #ifdef SUPPORT_PCRE2_16
7307 if (test_mode == PCRE16_MODE)
7308 {
7309 if (c >= 0x10000u)
7310 {
7311 c-= 0x10000u;
7312 *r16++ = 0xD800 | (c >> 10);
7313 *r16++ = 0xDC00 | (c & 0x3ff);
7314 }
7315 else *r16++ = c;
7316 }
7317 #endif
7318
7319 #ifdef SUPPORT_PCRE2_32
7320 if (test_mode == PCRE32_MODE) *r32++ = c;
7321 #endif
7322 }
7323
7324 SET(*r, 0);
7325 if ((dat_datctl.control & CTL_ZERO_TERMINATE) != 0)
7326 rlen = PCRE2_ZERO_TERMINATED;
7327 else
7328 rlen = (CASTVAR(uint8_t *, r) - rbuffer)/code_unit_size;
7329
7330 if ((dat_datctl.control2 & CTL2_SUBSTITUTE_CALLOUT) != 0)
7331 {
7332 PCRE2_SET_SUBSTITUTE_CALLOUT(dat_context, substitute_callout_function, NULL);
7333 }
7334 else
7335 {
7336 PCRE2_SET_SUBSTITUTE_CALLOUT(dat_context, NULL, NULL); /* No callout */
7337 }
7338
7339 PCRE2_SUBSTITUTE(rc, compiled_code, pp, arg_ulen, dat_datctl.offset,
7340 dat_datctl.options|xoptions, match_data, use_dat_context,
7341 rbuffer, rlen, nbuffer, &nsize);
7342
7343 if (rc < 0)
7344 {
7345 fprintf(outfile, "Failed: error %d", rc);
7346 if (rc != PCRE2_ERROR_NOMEMORY && nsize != PCRE2_UNSET)
7347 fprintf(outfile, " at offset %ld in replacement", (long int)nsize);
7348 fprintf(outfile, ": ");
7349 if (!print_error_message(rc, "", "")) return PR_ABEND;
7350 if (rc == PCRE2_ERROR_NOMEMORY &&
7351 (xoptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) != 0)
7352 fprintf(outfile, ": %ld code units are needed", (long int)nsize);
7353 }
7354 else
7355 {
7356 fprintf(outfile, "%2d: ", rc);
7357 PCHARSV(nbuffer, 0, nsize, utf, outfile);
7358 }
7359
7360 fprintf(outfile, "\n");
7361 show_memory = FALSE;
7362
7363 /* Show final ovector contents if requested. */
7364
7365 if ((dat_datctl.control2 & CTL2_ALLVECTOR) != 0)
7366 show_ovector(ovector, oveccount);
7367
7368 return PR_OK;
7369 } /* End of substitution handling */
7370
7371 /* When a replacement string is not provided, run a loop for global matching
7372 with one of the basic matching functions. For altglobal (or first time round
7373 the loop), set an "unset" value for the previous match info. */
7374
7375 ovecsave[0] = ovecsave[1] = ovecsave[2] = PCRE2_UNSET;
7376
7377 for (gmatched = 0;; gmatched++)
7378 {
7379 PCRE2_SIZE j;
7380 int capcount;
7381
7382 /* Fill the ovector with junk to detect elements that do not get set
7383 when they should be. */
7384
7385 for (j = 0; j < 2*oveccount; j++) ovector[j] = JUNK_OFFSET;
7386
7387 /* When matching is via pcre2_match(), we will detect the use of JIT via the
7388 stack callback function. */
7389
7390 jit_was_used = (pat_patctl.control & CTL_JITFAST) != 0;
7391
7392 /* Do timing if required. */
7393
7394 if (timeitm > 0)
7395 {
7396 int i;
7397 clock_t start_time, time_taken;
7398
7399 if ((dat_datctl.control & CTL_DFA) != 0)
7400 {
7401 if ((dat_datctl.options & PCRE2_DFA_RESTART) != 0)
7402 {
7403 fprintf(outfile, "Timing DFA restarts is not supported\n");
7404 return PR_OK;
7405 }
7406 if (dfa_workspace == NULL)
7407 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
7408 start_time = clock();
7409 for (i = 0; i < timeitm; i++)
7410 {
7411 PCRE2_DFA_MATCH(capcount, compiled_code, pp, arg_ulen,
7412 dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
7413 use_dat_context, dfa_workspace, DFA_WS_DIMENSION);
7414 }
7415 }
7416
7417 else if ((pat_patctl.control & CTL_JITFAST) != 0)
7418 {
7419 start_time = clock();
7420 for (i = 0; i < timeitm; i++)
7421 {
7422 PCRE2_JIT_MATCH(capcount, compiled_code, pp, arg_ulen,
7423 dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
7424 use_dat_context);
7425 }
7426 }
7427
7428 else
7429 {
7430 start_time = clock();
7431 for (i = 0; i < timeitm; i++)
7432 {
7433 PCRE2_MATCH(capcount, compiled_code, pp, arg_ulen,
7434 dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
7435 use_dat_context);
7436 }
7437 }
7438 total_match_time += (time_taken = clock() - start_time);
7439 fprintf(outfile, "Match time %.4f milliseconds\n",
7440 (((double)time_taken * 1000.0) / (double)timeitm) /
7441 (double)CLOCKS_PER_SEC);
7442 }
7443
7444 /* Find the heap, match and depth limits if requested. The depth and heap
7445 limits are not relevant for JIT. The return from check_match_limit() is the
7446 return from the final call to pcre2_match() or pcre2_dfa_match(). */
7447
7448 if ((dat_datctl.control & CTL_FINDLIMITS) != 0)
7449 {
7450 capcount = 0; /* This stops compiler warnings */
7451
7452 if (FLD(compiled_code, executable_jit) == NULL ||
7453 (dat_datctl.options & PCRE2_NO_JIT) != 0)
7454 {
7455 (void)check_match_limit(pp, arg_ulen, PCRE2_ERROR_HEAPLIMIT, "heap");
7456 }
7457
7458 capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_MATCHLIMIT,
7459 "match");
7460
7461 if (FLD(compiled_code, executable_jit) == NULL ||
7462 (dat_datctl.options & PCRE2_NO_JIT) != 0 ||
7463 (dat_datctl.control & CTL_DFA) != 0)
7464 {
7465 capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_DEPTHLIMIT,
7466 "depth");
7467 }
7468
7469 if (capcount == 0)
7470 {
7471 fprintf(outfile, "Matched, but offsets vector is too small to show all matches\n");
7472 capcount = dat_datctl.oveccount;
7473 }
7474 }
7475
7476 /* Otherwise just run a single match, setting up a callout if required (the
7477 default). There is a copy of the pattern in pbuffer8 for use by callouts. */
7478
7479 else
7480 {
7481 if ((dat_datctl.control & CTL_CALLOUT_NONE) == 0)
7482 {
7483 PCRE2_SET_CALLOUT(dat_context, callout_function,
7484 (void *)(&dat_datctl.callout_data));
7485 first_callout = TRUE;
7486 last_callout_mark = NULL;
7487 callout_count = 0;
7488 }
7489 else
7490 {
7491 PCRE2_SET_CALLOUT(dat_context, NULL, NULL); /* No callout */
7492 }
7493
7494 /* Run a single DFA or NFA match. */
7495
7496 if ((dat_datctl.control & CTL_DFA) != 0)
7497 {
7498 if (dfa_workspace == NULL)
7499 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
7500 if (dfa_matched++ == 0)
7501 dfa_workspace[0] = -1; /* To catch bad restart */
7502 PCRE2_DFA_MATCH(capcount, compiled_code, pp, arg_ulen,
7503 dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
7504 use_dat_context, dfa_workspace, DFA_WS_DIMENSION);
7505 if (capcount == 0)
7506 {
7507 fprintf(outfile, "Matched, but offsets vector is too small to show all matches\n");
7508 capcount = dat_datctl.oveccount;
7509 }
7510 }
7511 else
7512 {
7513 if ((pat_patctl.control & CTL_JITFAST) != 0)
7514 PCRE2_JIT_MATCH(capcount, compiled_code, pp, arg_ulen, dat_datctl.offset,
7515 dat_datctl.options | g_notempty, match_data, use_dat_context);
7516 else
7517 PCRE2_MATCH(capcount, compiled_code, pp, arg_ulen, dat_datctl.offset,
7518 dat_datctl.options | g_notempty, match_data, use_dat_context);
7519 if (capcount == 0)
7520 {
7521 fprintf(outfile, "Matched, but too many substrings\n");
7522 capcount = dat_datctl.oveccount;
7523 }
7524 }
7525 }
7526
7527 /* The result of the match is now in capcount. First handle a successful
7528 match. */
7529
7530 if (capcount >= 0)
7531 {
7532 int i;
7533
7534 if (capcount > (int)oveccount) /* Check for lunatic return value */
7535 {
7536 fprintf(outfile,
7537 "** PCRE2 error: returned count %d is too big for ovector count %d\n",
7538 capcount, oveccount);
7539 capcount = oveccount;
7540 if ((dat_datctl.control & CTL_ANYGLOB) != 0)
7541 {
7542 fprintf(outfile, "** Global loop abandoned\n");
7543 dat_datctl.control &= ~CTL_ANYGLOB; /* Break g/G loop */
7544 }
7545 }
7546
7547 /* If PCRE2_COPY_MATCHED_SUBJECT was set, check that things are as they
7548 should be, but not for fast JIT, where it isn't supported. */
7549
7550 if ((dat_datctl.options & PCRE2_COPY_MATCHED_SUBJECT) != 0 &&
7551 (pat_patctl.control & CTL_JITFAST) == 0)
7552 {
7553 if ((FLD(match_data, flags) & PCRE2_MD_COPIED_SUBJECT) == 0)
7554 fprintf(outfile,
7555 "** PCRE2 error: flag not set after copy_matched_subject\n");
7556
7557 if (CASTFLD(void *, match_data, subject) == pp)
7558 fprintf(outfile,
7559 "** PCRE2 error: copy_matched_subject has not copied\n");
7560
7561 if (memcmp(CASTFLD(void *, match_data, subject), pp, ulen) != 0)
7562 fprintf(outfile,
7563 "** PCRE2 error: copy_matched_subject mismatch\n");
7564 }
7565
7566 /* If this is not the first time round a global loop, check that the
7567 returned string has changed. If it has not, check for an empty string match
7568 at different starting offset from the previous match. This is a failed test
7569 retry for null-matching patterns that don't match at their starting offset,
7570 for example /(?<=\G.)/. A repeated match at the same point is not such a
7571 pattern, and must be discarded, and we then proceed to seek a non-null
7572 match at the current point. For any other repeated match, there is a bug
7573 somewhere and we must break the loop because it will go on for ever. We
7574 know that there are always at least two elements in the ovector. */
7575
7576 if (gmatched > 0 && ovecsave[0] == ovector[0] && ovecsave[1] == ovector[1])
7577 {
7578 if (ovector[0] == ovector[1] && ovecsave[2] != dat_datctl.offset)
7579 {
7580 g_notempty = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
7581 ovecsave[2] = dat_datctl.offset;
7582 continue; /* Back to the top of the loop */
7583 }
7584 fprintf(outfile,
7585 "** PCRE2 error: global repeat returned the same string as previous\n");
7586 fprintf(outfile, "** Global loop abandoned\n");
7587 dat_datctl.control &= ~CTL_ANYGLOB; /* Break g/G loop */
7588 }
7589
7590 /* "allcaptures" requests showing of all captures in the pattern, to check
7591 unset ones at the end. It may be set on the pattern or the data. Implement
7592 by setting capcount to the maximum. This is not relevant for DFA matching,
7593 so ignore it (warning given above). */
7594
7595 if ((dat_datctl.control & (CTL_ALLCAPTURES|CTL_DFA)) == CTL_ALLCAPTURES)
7596 {
7597 capcount = maxcapcount + 1; /* Allow for full match */
7598 if (capcount > (int)oveccount) capcount = oveccount;
7599 }
7600
7601 /* "allvector" request showing the entire ovector. */
7602
7603 if ((dat_datctl.control2 & CTL2_ALLVECTOR) != 0) capcount = oveccount;
7604
7605 /* Output the captured substrings. Note that, for the matched string,
7606 the use of \K in an assertion can make the start later than the end. */
7607
7608 for (i = 0; i < 2*capcount; i += 2)
7609 {
7610 PCRE2_SIZE lleft, lmiddle, lright;
7611 PCRE2_SIZE start = ovector[i];
7612 PCRE2_SIZE end = ovector[i+1];
7613
7614 if (start > end)
7615 {
7616 start = ovector[i+1];
7617 end = ovector[i];
7618 fprintf(outfile, "Start of matched string is beyond its end - "
7619 "displaying from end to start.\n");
7620 }
7621
7622 fprintf(outfile, "%2d: ", i/2);
7623
7624 /* Check for an unset group */
7625
7626 if (start == PCRE2_UNSET && end == PCRE2_UNSET)
7627 {
7628 fprintf(outfile, "<unset>\n");
7629 continue;
7630 }
7631
7632 /* Check for silly offsets, in particular, values that have not been
7633 set when they should have been. However, if we are past the end of the
7634 captures for this pattern ("allvector" causes this), or if we are DFA
7635 matching, it isn't an error if the entry is unchanged. */
7636
7637 if (start > ulen || end > ulen)
7638 {
7639 if (((dat_datctl.control & CTL_DFA) != 0 ||
7640 i >= (int)(2*maxcapcount + 2)) &&
7641 start == JUNK_OFFSET && end == JUNK_OFFSET)
7642 fprintf(outfile, "<unchanged>\n");
7643 else
7644 fprintf(outfile, "ERROR: bad value(s) for offset(s): 0x%lx 0x%lx\n",
7645 (unsigned long int)start, (unsigned long int)end);
7646 continue;
7647 }
7648
7649 /* When JIT is not being used, ALLUSEDTEXT may be set. (It if is set with
7650 JIT, it is disabled above, with a comment.) When the match is done by the
7651 interpreter, leftchar and rightchar are available, and if ALLUSEDTEXT is
7652 set, and if the leftmost consulted character is before the start of the
7653 match or the rightmost consulted character is past the end of the match,
7654 we want to show all consulted characters for the main matched string, and
7655 indicate which were lookarounds. */
7656
7657 if (i == 0)
7658 {
7659 BOOL showallused;
7660 PCRE2_SIZE leftchar, rightchar;
7661
7662 if ((dat_datctl.control & CTL_ALLUSEDTEXT) != 0)
7663 {
7664 leftchar = FLD(match_data, leftchar);
7665 rightchar = FLD(match_data, rightchar);
7666 showallused = i == 0 && (leftchar < start || rightchar > end);
7667 }
7668 else showallused = FALSE;
7669
7670 if (showallused)
7671 {
7672 PCHARS(lleft, pp, leftchar, start - leftchar, utf, outfile);
7673 PCHARS(lmiddle, pp, start, end - start, utf, outfile);
7674 PCHARS(lright, pp, end, rightchar - end, utf, outfile);
7675 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
7676 fprintf(outfile, " (JIT)");
7677 fprintf(outfile, "\n ");
7678 for (j = 0; j < lleft; j++) fprintf(outfile, "<");
7679 for (j = 0; j < lmiddle; j++) fprintf(outfile, " ");
7680 for (j = 0; j < lright; j++) fprintf(outfile, ">");
7681 }
7682
7683 /* When a pattern contains \K, the start of match position may be
7684 different to the start of the matched string. When this is the case,
7685 show it when requested. */
7686
7687 else if ((dat_datctl.control & CTL_STARTCHAR) != 0)
7688 {
7689 PCRE2_SIZE startchar;
7690 PCRE2_GET_STARTCHAR(startchar, match_data);
7691 PCHARS(lleft, pp, startchar, start - startchar, utf, outfile);
7692 PCHARSV(pp, start, end - start, utf, outfile);
7693 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
7694 fprintf(outfile, " (JIT)");
7695 if (startchar != start)
7696 {
7697 fprintf(outfile, "\n ");
7698 for (j = 0; j < lleft; j++) fprintf(outfile, "^");
7699 }
7700 }
7701
7702 /* Otherwise, just show the matched string. */
7703
7704 else
7705 {
7706 PCHARSV(pp, start, end - start, utf, outfile);
7707 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
7708 fprintf(outfile, " (JIT)");
7709 }
7710 }
7711
7712 /* Not the main matched string. Just show it unadorned. */
7713
7714 else
7715 {
7716 PCHARSV(pp, start, end - start, utf, outfile);
7717 }
7718
7719 fprintf(outfile, "\n");
7720
7721 /* Note: don't use the start/end variables here because we want to
7722 show the text from what is reported as the end. */
7723
7724 if ((dat_datctl.control & CTL_ALLAFTERTEXT) != 0 ||
7725 (i == 0 && (dat_datctl.control & CTL_AFTERTEXT) != 0))
7726 {
7727 fprintf(outfile, "%2d+ ", i/2);
7728 PCHARSV(pp, ovector[i+1], ulen - ovector[i+1], utf, outfile);
7729 fprintf(outfile, "\n");
7730 }
7731 }
7732
7733 /* Output (*MARK) data if requested */
7734
7735 if ((dat_datctl.control & CTL_MARK) != 0 &&
7736 TESTFLD(match_data, mark, !=, NULL))
7737 {
7738 fprintf(outfile, "MK: ");
7739 PCHARSV(CASTFLD(void *, match_data, mark), 0, -1, utf, outfile);
7740 fprintf(outfile, "\n");
7741 }
7742
7743 /* Process copy/get strings */
7744
7745 if (!copy_and_get(utf, capcount)) return PR_ABEND;
7746
7747 } /* End of handling a successful match */
7748
7749 /* There was a partial match. The value of ovector[0] is the bumpalong point,
7750 that is, startchar, not any \K point that might have been passed. */
7751
7752 else if (capcount == PCRE2_ERROR_PARTIAL)
7753 {
7754 PCRE2_SIZE poffset;
7755 int backlength;
7756 int rubriclength = 0;
7757
7758 fprintf(outfile, "Partial match");
7759 if ((dat_datctl.control & CTL_MARK) != 0 &&
7760 TESTFLD(match_data, mark, !=, NULL))
7761 {
7762 fprintf(outfile, ", mark=");
7763 PCHARS(rubriclength, CASTFLD(void *, match_data, mark), 0, -1, utf,
7764 outfile);
7765 rubriclength += 7;
7766 }
7767 fprintf(outfile, ": ");
7768 rubriclength += 15;
7769
7770 poffset = backchars(pp, ovector[0], maxlookbehind, utf);
7771 PCHARS(backlength, pp, poffset, ovector[0] - poffset, utf, outfile);
7772 PCHARSV(pp, ovector[0], ulen - ovector[0], utf, outfile);
7773
7774 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
7775 fprintf(outfile, " (JIT)");
7776 fprintf(outfile, "\n");
7777
7778 if (backlength != 0)
7779 {
7780 int i;
7781 for (i = 0; i < rubriclength; i++) fprintf(outfile, " ");
7782 for (i = 0; i < backlength; i++) fprintf(outfile, "<");
7783 fprintf(outfile, "\n");
7784 }
7785
7786 if (ulen != ovector[1])
7787 fprintf(outfile, "** ovector[1] is not equal to the subject length: "
7788 "%ld != %ld\n", (unsigned long int)ovector[1], (unsigned long int)ulen);
7789
7790 /* Process copy/get strings */
7791
7792 if (!copy_and_get(utf, 1)) return PR_ABEND;
7793
7794 /* "allvector" outputs the entire vector */
7795
7796 if ((dat_datctl.control2 & CTL2_ALLVECTOR) != 0)
7797 show_ovector(ovector, oveccount);
7798
7799 break; /* Out of the /g loop */
7800 } /* End of handling partial match */
7801
7802 /* Failed to match. If this is a /g or /G loop, we might previously have
7803 set g_notempty (to PCRE2_NOTEMPTY_ATSTART|PCRE2_ANCHORED) after a null match.
7804 If that is the case, this is not necessarily the end. We want to advance the
7805 start offset, and continue. We won't be at the end of the string - that was
7806 checked before setting g_notempty. We achieve the effect by pretending that a
7807 single character was matched.
7808
7809 Complication arises in the case when the newline convention is "any", "crlf",
7810 or "anycrlf". If the previous match was at the end of a line terminated by
7811 CRLF, an advance of one character just passes the CR, whereas we should
7812 prefer the longer newline sequence, as does the code in pcre2_match().
7813
7814 Otherwise, in the case of UTF-8 or UTF-16 matching, the advance must be one
7815 character, not one byte. */
7816
7817 else if (g_notempty != 0) /* There was a previous null match */
7818 {
7819 uint16_t nl = FLD(compiled_code, newline_convention);
7820 PCRE2_SIZE start_offset = dat_datctl.offset; /* Where the match was */
7821 PCRE2_SIZE end_offset = start_offset + 1;
7822
7823 if ((nl == PCRE2_NEWLINE_CRLF || nl == PCRE2_NEWLINE_ANY ||
7824 nl == PCRE2_NEWLINE_ANYCRLF) &&
7825 start_offset < ulen - 1 &&
7826 CODE_UNIT(pp, start_offset) == '\r' &&
7827 CODE_UNIT(pp, end_offset) == '\n')
7828 end_offset++;
7829
7830 else if (utf && test_mode != PCRE32_MODE)
7831 {
7832 if (test_mode == PCRE8_MODE)
7833 {
7834 for (; end_offset < ulen; end_offset++)
7835 if ((((PCRE2_SPTR8)pp)[end_offset] & 0xc0) != 0x80) break;
7836 }
7837 else /* 16-bit mode */
7838 {
7839 for (; end_offset < ulen; end_offset++)
7840 if ((((PCRE2_SPTR16)pp)[end_offset] & 0xfc00) != 0xdc00) break;
7841 }
7842 }
7843
7844 SETFLDVEC(match_data, ovector, 0, start_offset);
7845 SETFLDVEC(match_data, ovector, 1, end_offset);
7846 } /* End of handling null match in a global loop */
7847
7848 /* A "normal" match failure. There will be a negative error number in
7849 capcount. */
7850
7851 else
7852 {
7853 switch(capcount)
7854 {
7855 case PCRE2_ERROR_NOMATCH:
7856 if (gmatched == 0)
7857 {
7858 fprintf(outfile, "No match");
7859 if ((dat_datctl.control & CTL_MARK) != 0 &&
7860 TESTFLD(match_data, mark, !=, NULL))
7861 {
7862 fprintf(outfile, ", mark = ");
7863 PCHARSV(CASTFLD(void *, match_data, mark), 0, -1, utf, outfile);
7864 }
7865 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
7866 fprintf(outfile, " (JIT)");
7867 fprintf(outfile, "\n");
7868
7869 /* "allvector" outputs the entire vector */
7870
7871 if ((dat_datctl.control2 & CTL2_ALLVECTOR) != 0)
7872 show_ovector(ovector, oveccount);
7873 }
7874 break;
7875
7876 case PCRE2_ERROR_BADUTFOFFSET:
7877 fprintf(outfile, "Error %d (bad UTF-%d offset)\n", capcount, test_mode);
7878 break;
7879
7880 default:
7881 fprintf(outfile, "Failed: error %d: ", capcount);
7882 if (!print_error_message(capcount, "", "")) return PR_ABEND;
7883 if (capcount <= PCRE2_ERROR_UTF8_ERR1 &&
7884 capcount >= PCRE2_ERROR_UTF32_ERR2)
7885 {
7886 PCRE2_SIZE startchar;
7887 PCRE2_GET_STARTCHAR(startchar, match_data);
7888 fprintf(outfile, " at offset %" SIZ_FORM, SIZ_CAST startchar);
7889 }
7890 fprintf(outfile, "\n");
7891 break;
7892 }
7893
7894 break; /* Out of the /g loop */
7895 } /* End of failed match handling */
7896
7897 /* Control reaches here in two circumstances: (a) after a match, and (b)
7898 after a non-match that immediately followed a match on an empty string when
7899 doing a global search. Such a match is done with PCRE2_NOTEMPTY_ATSTART and
7900 PCRE2_ANCHORED set in g_notempty. The code above turns it into a fake match
7901 of one character. So effectively we get here only after a match. If we
7902 are not doing a global search, we are done. */
7903
7904 if ((dat_datctl.control & CTL_ANYGLOB) == 0) break; else
7905 {
7906 PCRE2_SIZE match_offset = FLD(match_data, ovector)[0];
7907 PCRE2_SIZE end_offset = FLD(match_data, ovector)[1];
7908
7909 /* We must now set up for the next iteration of a global search. If we have
7910 matched an empty string, first check to see if we are at the end of the
7911 subject. If so, the loop is over. Otherwise, mimic what Perl's /g option
7912 does. Set PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED and try the match again
7913 at the same point. If this fails it will be picked up above, where a fake
7914 match is set up so that at this point we advance to the next character.
7915
7916 However, in order to cope with patterns that never match at their starting
7917 offset (e.g. /(?<=\G.)/) we don't do this when the match offset is greater
7918 than the starting offset. This means there will be a retry with the
7919 starting offset at the match offset. If this returns the same match again,
7920 it is picked up above and ignored, and the special action is then taken. */
7921
7922 if (match_offset == end_offset)
7923 {
7924 if (end_offset == ulen) break; /* End of subject */
7925 if (match_offset <= dat_datctl.offset)
7926 g_notempty = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
7927 }
7928
7929 /* However, even after matching a non-empty string, there is still one
7930 tricky case. If a pattern contains \K within a lookbehind assertion at the
7931 start, the end of the matched string can be at the offset where the match
7932 started. In the case of a normal /g iteration without special action, this
7933 leads to a loop that keeps on returning the same substring. The loop would
7934 be caught above, but we really want to move on to the next match. */
7935
7936 else
7937 {
7938 g_notempty = 0; /* Set for a "normal" repeat */
7939 if ((dat_datctl.control & CTL_GLOBAL) != 0)
7940 {
7941 PCRE2_SIZE startchar;
7942 PCRE2_GET_STARTCHAR(startchar, match_data);
7943 if (end_offset <= startchar)
7944 {
7945 if (startchar >= ulen) break; /* End of subject */
7946 end_offset = startchar + 1;
7947 if (utf && test_mode != PCRE32_MODE)
7948 {
7949 if (test_mode == PCRE8_MODE)
7950 {
7951 for (; end_offset < ulen; end_offset++)
7952 if ((((PCRE2_SPTR8)pp)[end_offset] & 0xc0) != 0x80) break;
7953 }
7954 else /* 16-bit mode */
7955 {
7956 for (; end_offset < ulen; end_offset++)
7957 if ((((PCRE2_SPTR16)pp)[end_offset] & 0xfc00) != 0xdc00) break;
7958 }
7959 }
7960 }
7961 }
7962 }
7963
7964 /* For a normal global (/g) iteration, save the current ovector[0,1] and
7965 the starting offset so that we can check that they do change each time.
7966 Otherwise a matching bug that returns the same string causes an infinite
7967 loop. It has happened! Then update the start offset, leaving other
7968 parameters alone. */
7969
7970 if ((dat_datctl.control & CTL_GLOBAL) != 0)
7971 {
7972 ovecsave[0] = ovector[0];
7973 ovecsave[1] = ovector[1];
7974 ovecsave[2] = dat_datctl.offset;
7975 dat_datctl.offset = end_offset;
7976 }
7977
7978 /* For altglobal, just update the pointer and length. */
7979
7980 else
7981 {
7982 pp += end_offset * code_unit_size;
7983 len -= end_offset * code_unit_size;
7984 ulen -= end_offset;
7985 if (arg_ulen != PCRE2_ZERO_TERMINATED) arg_ulen -= end_offset;
7986 }
7987 }
7988 } /* End of global loop */
7989
7990 show_memory = FALSE;
7991 return PR_OK;
7992 }
7993
7994
7995
7996
7997 /*************************************************
7998 * Print PCRE2 version *
7999 *************************************************/
8000
8001 static void
print_version(FILE * f)8002 print_version(FILE *f)
8003 {
8004 VERSION_TYPE *vp;
8005 fprintf(f, "PCRE2 version ");
8006 for (vp = version; *vp != 0; vp++) fprintf(f, "%c", *vp);
8007 fprintf(f, "\n");
8008 }
8009
8010
8011
8012 /*************************************************
8013 * Print Unicode version *
8014 *************************************************/
8015
8016 static void
print_unicode_version(FILE * f)8017 print_unicode_version(FILE *f)
8018 {
8019 VERSION_TYPE *vp;
8020 fprintf(f, "Unicode version ");
8021 for (vp = uversion; *vp != 0; vp++) fprintf(f, "%c", *vp);
8022 }
8023
8024
8025
8026 /*************************************************
8027 * Print JIT target *
8028 *************************************************/
8029
8030 static void
print_jit_target(FILE * f)8031 print_jit_target(FILE *f)
8032 {
8033 VERSION_TYPE *vp;
8034 for (vp = jittarget; *vp != 0; vp++) fprintf(f, "%c", *vp);
8035 }
8036
8037
8038
8039 /*************************************************
8040 * Print newline configuration *
8041 *************************************************/
8042
8043 /* Output is always to stdout.
8044
8045 Arguments:
8046 rc the return code from PCRE2_CONFIG_NEWLINE
8047 isc TRUE if called from "-C newline"
8048 Returns: nothing
8049 */
8050
8051 static void
print_newline_config(uint32_t optval,BOOL isc)8052 print_newline_config(uint32_t optval, BOOL isc)
8053 {
8054 if (!isc) printf(" Newline sequence is ");
8055 if (optval < sizeof(newlines)/sizeof(char *))
8056 printf("%s\n", newlines[optval]);
8057 else
8058 printf("a non-standard value: %d\n", optval);
8059 }
8060
8061
8062
8063 /*************************************************
8064 * Usage function *
8065 *************************************************/
8066
8067 static void
usage(void)8068 usage(void)
8069 {
8070 printf("Usage: pcre2test [options] [<input file> [<output file>]]\n\n");
8071 printf("Input and output default to stdin and stdout.\n");
8072 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
8073 printf("If input is a terminal, readline() is used to read from it.\n");
8074 #else
8075 printf("This version of pcre2test is not linked with readline().\n");
8076 #endif
8077 printf("\nOptions:\n");
8078 #ifdef SUPPORT_PCRE2_8
8079 printf(" -8 use the 8-bit library\n");
8080 #endif
8081 #ifdef SUPPORT_PCRE2_16
8082 printf(" -16 use the 16-bit library\n");
8083 #endif
8084 #ifdef SUPPORT_PCRE2_32
8085 printf(" -32 use the 32-bit library\n");
8086 #endif
8087 printf(" -ac set default pattern modifier PCRE2_AUTO_CALLOUT\n");
8088 printf(" -AC as -ac, but also set subject 'callout_extra' modifier\n");
8089 printf(" -b set default pattern modifier 'fullbincode'\n");
8090 printf(" -C show PCRE2 compile-time options and exit\n");
8091 printf(" -C arg show a specific compile-time option and exit with its\n");
8092 printf(" value if numeric (else 0). The arg can be:\n");
8093 printf(" backslash-C use of \\C is enabled [0, 1]\n");
8094 printf(" bsr \\R type [ANYCRLF, ANY]\n");
8095 printf(" ebcdic compiled for EBCDIC character code [0,1]\n");
8096 printf(" ebcdic-nl NL code if compiled for EBCDIC\n");
8097 printf(" jit just-in-time compiler supported [0, 1]\n");
8098 printf(" linksize internal link size [2, 3, 4]\n");
8099 printf(" newline newline type [CR, LF, CRLF, ANYCRLF, ANY, NUL]\n");
8100 printf(" pcre2-8 8 bit library support enabled [0, 1]\n");
8101 printf(" pcre2-16 16 bit library support enabled [0, 1]\n");
8102 printf(" pcre2-32 32 bit library support enabled [0, 1]\n");
8103 printf(" unicode Unicode and UTF support enabled [0, 1]\n");
8104 printf(" -d set default pattern modifier 'debug'\n");
8105 printf(" -dfa set default subject modifier 'dfa'\n");
8106 printf(" -error <n,m,..> show messages for error numbers, then exit\n");
8107 printf(" -help show usage information\n");
8108 printf(" -i set default pattern modifier 'info'\n");
8109 printf(" -jit set default pattern modifier 'jit'\n");
8110 printf(" -jitverify set default pattern modifier 'jitverify'\n");
8111 printf(" -LM list pattern and subject modifiers, then exit\n");
8112 printf(" -q quiet: do not output PCRE2 version number at start\n");
8113 printf(" -pattern <s> set default pattern modifier fields\n");
8114 printf(" -subject <s> set default subject modifier fields\n");
8115 printf(" -S <n> set stack size to <n> mebibytes\n");
8116 printf(" -t [<n>] time compilation and execution, repeating <n> times\n");
8117 printf(" -tm [<n>] time execution (matching) only, repeating <n> times\n");
8118 printf(" -T same as -t, but show total times at the end\n");
8119 printf(" -TM same as -tm, but show total time at the end\n");
8120 printf(" -version show PCRE2 version and exit\n");
8121 }
8122
8123
8124
8125 /*************************************************
8126 * Handle -C option *
8127 *************************************************/
8128
8129 /* This option outputs configuration options and sets an appropriate return
8130 code when asked for a single option. The code is abstracted into a separate
8131 function because of its size. Use whichever pcre2_config() function is
8132 available.
8133
8134 Argument: an option name or NULL
8135 Returns: the return code
8136 */
8137
8138 static int
c_option(const char * arg)8139 c_option(const char *arg)
8140 {
8141 uint32_t optval;
8142 unsigned int i = COPTLISTCOUNT;
8143 int yield = 0;
8144
8145 if (arg != NULL && arg[0] != CHAR_MINUS)
8146 {
8147 for (i = 0; i < COPTLISTCOUNT; i++)
8148 if (strcmp(arg, coptlist[i].name) == 0) break;
8149
8150 if (i >= COPTLISTCOUNT)
8151 {
8152 fprintf(stderr, "** Unknown -C option '%s'\n", arg);
8153 return 0;
8154 }
8155
8156 switch (coptlist[i].type)
8157 {
8158 case CONF_BSR:
8159 (void)PCRE2_CONFIG(coptlist[i].value, &optval);
8160 printf("%s\n", (optval == PCRE2_BSR_ANYCRLF)? "ANYCRLF" : "ANY");
8161 break;
8162
8163 case CONF_FIX:
8164 yield = coptlist[i].value;
8165 printf("%d\n", yield);
8166 break;
8167
8168 case CONF_FIZ:
8169 optval = coptlist[i].value;
8170 printf("%d\n", optval);
8171 break;
8172
8173 case CONF_INT:
8174 (void)PCRE2_CONFIG(coptlist[i].value, &yield);
8175 printf("%d\n", yield);
8176 break;
8177
8178 case CONF_NL:
8179 (void)PCRE2_CONFIG(coptlist[i].value, &optval);
8180 print_newline_config(optval, TRUE);
8181 break;
8182 }
8183
8184 /* For VMS, return the value by setting a symbol, for certain values only. This
8185 is contributed code which the PCRE2 developers have no means of testing. */
8186
8187 #ifdef __VMS
8188
8189 /* This is the original code provided by the first VMS contributor. */
8190 #ifdef NEVER
8191 if (copytlist[i].type == CONF_FIX || coptlist[i].type == CONF_INT)
8192 {
8193 char ucname[16];
8194 strcpy(ucname, coptlist[i].name);
8195 for (i = 0; ucname[i] != 0; i++) ucname[i] = toupper[ucname[i]];
8196 vms_setsymbol(ucname, 0, optval);
8197 }
8198 #endif
8199
8200 /* This is the new code, provided by a second VMS contributor. */
8201
8202 if (coptlist[i].type == CONF_FIX || coptlist[i].type == CONF_INT)
8203 {
8204 char nam_buf[22], val_buf[4];
8205 $DESCRIPTOR(nam, nam_buf);
8206 $DESCRIPTOR(val, val_buf);
8207
8208 strcpy(nam_buf, coptlist[i].name);
8209 nam.dsc$w_length = strlen(nam_buf);
8210 sprintf(val_buf, "%d", yield);
8211 val.dsc$w_length = strlen(val_buf);
8212 lib$set_symbol(&nam, &val);
8213 }
8214 #endif /* __VMS */
8215
8216 return yield;
8217 }
8218
8219 /* No argument for -C: output all configuration information. */
8220
8221 print_version(stdout);
8222 printf("Compiled with\n");
8223
8224 #ifdef EBCDIC
8225 printf(" EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
8226 #if defined NATIVE_ZOS
8227 printf(" EBCDIC code page %s or similar\n", pcrz_cpversion());
8228 #endif
8229 #endif
8230
8231 (void)PCRE2_CONFIG(PCRE2_CONFIG_COMPILED_WIDTHS, &optval);
8232 if (optval & 1) printf(" 8-bit support\n");
8233 if (optval & 2) printf(" 16-bit support\n");
8234 if (optval & 4) printf(" 32-bit support\n");
8235
8236 #ifdef SUPPORT_VALGRIND
8237 printf(" Valgrind support\n");
8238 #endif
8239
8240 (void)PCRE2_CONFIG(PCRE2_CONFIG_UNICODE, &optval);
8241 if (optval != 0)
8242 {
8243 printf(" UTF and UCP support (");
8244 print_unicode_version(stdout);
8245 printf(")\n");
8246 }
8247 else printf(" No Unicode support\n");
8248
8249 (void)PCRE2_CONFIG(PCRE2_CONFIG_JIT, &optval);
8250 if (optval != 0)
8251 {
8252 printf(" Just-in-time compiler support: ");
8253 print_jit_target(stdout);
8254 printf("\n");
8255 }
8256 else
8257 {
8258 printf(" No just-in-time compiler support\n");
8259 }
8260
8261 (void)PCRE2_CONFIG(PCRE2_CONFIG_NEWLINE, &optval);
8262 print_newline_config(optval, FALSE);
8263 (void)PCRE2_CONFIG(PCRE2_CONFIG_BSR, &optval);
8264 printf(" \\R matches %s\n",
8265 (optval == PCRE2_BSR_ANYCRLF)? "CR, LF, or CRLF only" :
8266 "all Unicode newlines");
8267 (void)PCRE2_CONFIG(PCRE2_CONFIG_NEVER_BACKSLASH_C, &optval);
8268 printf(" \\C is %ssupported\n", optval? "not ":"");
8269 (void)PCRE2_CONFIG(PCRE2_CONFIG_LINKSIZE, &optval);
8270 printf(" Internal link size = %d\n", optval);
8271 (void)PCRE2_CONFIG(PCRE2_CONFIG_PARENSLIMIT, &optval);
8272 printf(" Parentheses nest limit = %d\n", optval);
8273 (void)PCRE2_CONFIG(PCRE2_CONFIG_HEAPLIMIT, &optval);
8274 printf(" Default heap limit = %d kibibytes\n", optval);
8275 (void)PCRE2_CONFIG(PCRE2_CONFIG_MATCHLIMIT, &optval);
8276 printf(" Default match limit = %d\n", optval);
8277 (void)PCRE2_CONFIG(PCRE2_CONFIG_DEPTHLIMIT, &optval);
8278 printf(" Default depth limit = %d\n", optval);
8279 return 0;
8280 }
8281
8282
8283
8284 /*************************************************
8285 * Display one modifier *
8286 *************************************************/
8287
8288 static void
display_one_modifier(modstruct * m,BOOL for_pattern)8289 display_one_modifier(modstruct *m, BOOL for_pattern)
8290 {
8291 uint32_t c = (!for_pattern && (m->which == MOD_PND || m->which == MOD_PNDP))?
8292 '*' : ' ';
8293 printf("%c%s", c, m->name);
8294 }
8295
8296
8297
8298 /*************************************************
8299 * Display pattern or subject modifiers *
8300 *************************************************/
8301
8302 /* In order to print in two columns, first scan without printing to get a list
8303 of the modifiers that are required.
8304
8305 Arguments:
8306 for_pattern TRUE for pattern modifiers, FALSE for subject modifiers
8307 title string to be used in title
8308
8309 Returns: nothing
8310 */
8311
8312 static void
display_selected_modifiers(BOOL for_pattern,const char * title)8313 display_selected_modifiers(BOOL for_pattern, const char *title)
8314 {
8315 uint32_t i, j;
8316 uint32_t n = 0;
8317 uint32_t list[MODLISTCOUNT];
8318
8319 for (i = 0; i < MODLISTCOUNT; i++)
8320 {
8321 BOOL is_pattern = TRUE;
8322 modstruct *m = modlist + i;
8323
8324 switch (m->which)
8325 {
8326 case MOD_CTC: /* Compile context */
8327 case MOD_PAT: /* Pattern */
8328 case MOD_PATP: /* Pattern, OK for Perl-compatible test */
8329 break;
8330
8331 /* The MOD_PND and MOD_PNDP modifiers are precisely those that affect
8332 subjects, but can be given with a pattern. We list them as subject
8333 modifiers, but marked with an asterisk.*/
8334
8335 case MOD_CTM: /* Match context */
8336 case MOD_DAT: /* Subject line */
8337 case MOD_PND: /* As PD, but not default pattern */
8338 case MOD_PNDP: /* As PND, OK for Perl-compatible test */
8339 is_pattern = FALSE;
8340 break;
8341
8342 default: printf("** Unknown type for modifier '%s'\n", m->name);
8343 /* Fall through */
8344 case MOD_PD: /* Pattern or subject */
8345 case MOD_PDP: /* As PD, OK for Perl-compatible test */
8346 is_pattern = for_pattern;
8347 break;
8348 }
8349
8350 if (for_pattern == is_pattern) list[n++] = i;
8351 }
8352
8353 /* Now print from the list in two columns. */
8354
8355 printf("-------------- %s MODIFIERS --------------\n", title);
8356
8357 for (i = 0, j = (n+1)/2; i < (n+1)/2; i++, j++)
8358 {
8359 modstruct *m = modlist + list[i];
8360 display_one_modifier(m, for_pattern);
8361 if (j < n)
8362 {
8363 uint32_t k = 27 - strlen(m->name);
8364 while (k-- > 0) printf(" ");
8365 display_one_modifier(modlist + list[j], for_pattern);
8366 }
8367 printf("\n");
8368 }
8369 }
8370
8371
8372
8373 /*************************************************
8374 * Display the list of modifiers *
8375 *************************************************/
8376
8377 static void
display_modifiers(void)8378 display_modifiers(void)
8379 {
8380 printf(
8381 "An asterisk on a subject modifier means that it may be given on a pattern\n"
8382 "line, in order to apply to all subjects matched by that pattern. Modifiers\n"
8383 "that are listed for both patterns and subjects have different effects in\n"
8384 "each case.\n\n");
8385 display_selected_modifiers(TRUE, "PATTERN");
8386 printf("\n");
8387 display_selected_modifiers(FALSE, "SUBJECT");
8388 }
8389
8390
8391
8392 /*************************************************
8393 * Main Program *
8394 *************************************************/
8395
8396 int
main(int argc,char ** argv)8397 main(int argc, char **argv)
8398 {
8399 uint32_t temp;
8400 uint32_t yield = 0;
8401 uint32_t op = 1;
8402 BOOL notdone = TRUE;
8403 BOOL quiet = FALSE;
8404 BOOL showtotaltimes = FALSE;
8405 BOOL skipping = FALSE;
8406 char *arg_subject = NULL;
8407 char *arg_pattern = NULL;
8408 char *arg_error = NULL;
8409
8410 /* The offsets to the options and control bits fields of the pattern and data
8411 control blocks must be the same so that common options and controls such as
8412 "anchored" or "memory" can work for either of them from a single table entry.
8413 We cannot test this till runtime because "offsetof" does not work in the
8414 preprocessor. */
8415
8416 if (PO(options) != DO(options) || PO(control) != DO(control) ||
8417 PO(control2) != DO(control2))
8418 {
8419 fprintf(stderr, "** Coding error: "
8420 "options and control offsets for pattern and data must be the same.\n");
8421 return 1;
8422 }
8423
8424 /* Get the PCRE2 and Unicode version number and JIT target information, at the
8425 same time checking that a request for the length gives the same answer. Also
8426 check lengths for non-string items. */
8427
8428 if (PCRE2_CONFIG(PCRE2_CONFIG_VERSION, NULL) !=
8429 PCRE2_CONFIG(PCRE2_CONFIG_VERSION, version) ||
8430
8431 PCRE2_CONFIG(PCRE2_CONFIG_UNICODE_VERSION, NULL) !=
8432 PCRE2_CONFIG(PCRE2_CONFIG_UNICODE_VERSION, uversion) ||
8433
8434 PCRE2_CONFIG(PCRE2_CONFIG_JITTARGET, NULL) !=
8435 PCRE2_CONFIG(PCRE2_CONFIG_JITTARGET, jittarget) ||
8436
8437 PCRE2_CONFIG(PCRE2_CONFIG_UNICODE, NULL) != sizeof(uint32_t) ||
8438 PCRE2_CONFIG(PCRE2_CONFIG_MATCHLIMIT, NULL) != sizeof(uint32_t))
8439 {
8440 fprintf(stderr, "** Error in pcre2_config(): bad length\n");
8441 return 1;
8442 }
8443
8444 /* Check that bad options are diagnosed. */
8445
8446 if (PCRE2_CONFIG(999, NULL) != PCRE2_ERROR_BADOPTION ||
8447 PCRE2_CONFIG(999, &temp) != PCRE2_ERROR_BADOPTION)
8448 {
8449 fprintf(stderr, "** Error in pcre2_config(): bad option not diagnosed\n");
8450 return 1;
8451 }
8452
8453 /* This configuration option is now obsolete, but running a quick check ensures
8454 that its code is covered. */
8455
8456 (void)PCRE2_CONFIG(PCRE2_CONFIG_STACKRECURSE, &temp);
8457
8458 /* Get buffers from malloc() so that valgrind will check their misuse when
8459 debugging. They grow automatically when very long lines are read. The 16-
8460 and 32-bit buffers (pbuffer16, pbuffer32) are obtained only if needed. */
8461
8462 buffer = (uint8_t *)malloc(pbuffer8_size);
8463 pbuffer8 = (uint8_t *)malloc(pbuffer8_size);
8464
8465 /* The following _setmode() stuff is some Windows magic that tells its runtime
8466 library to translate CRLF into a single LF character. At least, that's what
8467 I've been told: never having used Windows I take this all on trust. Originally
8468 it set 0x8000, but then I was advised that _O_BINARY was better. */
8469
8470 #if defined(_WIN32) || defined(WIN32)
8471 _setmode( _fileno( stdout ), _O_BINARY );
8472 #endif
8473
8474 /* Initialization that does not depend on the running mode. */
8475
8476 locale_name[0] = 0;
8477
8478 memset(&def_patctl, 0, sizeof(patctl));
8479 def_patctl.convert_type = CONVERT_UNSET;
8480
8481 memset(&def_datctl, 0, sizeof(datctl));
8482 def_datctl.oveccount = DEFAULT_OVECCOUNT;
8483 def_datctl.copy_numbers[0] = -1;
8484 def_datctl.get_numbers[0] = -1;
8485 def_datctl.startend[0] = def_datctl.startend[1] = CFORE_UNSET;
8486 def_datctl.cerror[0] = def_datctl.cerror[1] = CFORE_UNSET;
8487 def_datctl.cfail[0] = def_datctl.cfail[1] = CFORE_UNSET;
8488
8489 /* Scan command line options. */
8490
8491 while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0)
8492 {
8493 char *endptr;
8494 char *arg = argv[op];
8495 unsigned long uli;
8496
8497 /* List modifiers and exit. */
8498
8499 if (strcmp(arg, "-LM") == 0)
8500 {
8501 display_modifiers();
8502 goto EXIT;
8503 }
8504
8505 /* Display and/or set return code for configuration options. */
8506
8507 if (strcmp(arg, "-C") == 0)
8508 {
8509 yield = c_option(argv[op + 1]);
8510 goto EXIT;
8511 }
8512
8513 /* Select operating mode. Ensure that pcre2_config() is called in 16-bit
8514 and 32-bit modes because that won't happen naturally when 8-bit is also
8515 configured. Also call some other functions that are not otherwise used. This
8516 means that a coverage report won't claim there are uncalled functions. */
8517
8518 if (strcmp(arg, "-8") == 0)
8519 {
8520 #ifdef SUPPORT_PCRE2_8
8521 test_mode = PCRE8_MODE;
8522 (void)pcre2_set_bsr_8(pat_context8, 999);
8523 (void)pcre2_set_newline_8(pat_context8, 999);
8524 #else
8525 fprintf(stderr,
8526 "** This version of PCRE2 was built without 8-bit support\n");
8527 exit(1);
8528 #endif
8529 }
8530
8531 else if (strcmp(arg, "-16") == 0)
8532 {
8533 #ifdef SUPPORT_PCRE2_16
8534 test_mode = PCRE16_MODE;
8535 (void)pcre2_config_16(PCRE2_CONFIG_VERSION, NULL);
8536 (void)pcre2_set_bsr_16(pat_context16, 999);
8537 (void)pcre2_set_newline_16(pat_context16, 999);
8538 #else
8539 fprintf(stderr,
8540 "** This version of PCRE2 was built without 16-bit support\n");
8541 exit(1);
8542 #endif
8543 }
8544
8545 else if (strcmp(arg, "-32") == 0)
8546 {
8547 #ifdef SUPPORT_PCRE2_32
8548 test_mode = PCRE32_MODE;
8549 (void)pcre2_config_32(PCRE2_CONFIG_VERSION, NULL);
8550 (void)pcre2_set_bsr_32(pat_context32, 999);
8551 (void)pcre2_set_newline_32(pat_context32, 999);
8552 #else
8553 fprintf(stderr,
8554 "** This version of PCRE2 was built without 32-bit support\n");
8555 exit(1);
8556 #endif
8557 }
8558
8559 /* Set quiet (no version verification) */
8560
8561 else if (strcmp(arg, "-q") == 0) quiet = TRUE;
8562
8563 /* Set system stack size */
8564
8565 else if (strcmp(arg, "-S") == 0 && argc > 2 &&
8566 ((uli = strtoul(argv[op+1], &endptr, 10)), *endptr == 0))
8567 {
8568 #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS) || defined(__VMS)
8569 fprintf(stderr, "pcre2test: -S is not supported on this OS\n");
8570 exit(1);
8571 #else
8572 int rc;
8573 uint32_t stack_size;
8574 struct rlimit rlim;
8575 if (U32OVERFLOW(uli))
8576 {
8577 fprintf(stderr, "** Argument for -S is too big\n");
8578 exit(1);
8579 }
8580 stack_size = (uint32_t)uli;
8581 getrlimit(RLIMIT_STACK, &rlim);
8582 rlim.rlim_cur = stack_size * 1024 * 1024;
8583 if (rlim.rlim_cur > rlim.rlim_max)
8584 {
8585 fprintf(stderr,
8586 "pcre2test: requested stack size %luMiB is greater than hard limit "
8587 "%luMiB\n", (unsigned long int)stack_size,
8588 (unsigned long int)(rlim.rlim_max));
8589 exit(1);
8590 }
8591 rc = setrlimit(RLIMIT_STACK, &rlim);
8592 if (rc != 0)
8593 {
8594 fprintf(stderr, "pcre2test: setting stack size %luMiB failed: %s\n",
8595 (unsigned long int)stack_size, strerror(errno));
8596 exit(1);
8597 }
8598 op++;
8599 argc--;
8600 #endif
8601 }
8602
8603 /* Set some common pattern and subject controls */
8604
8605 else if (strcmp(arg, "-AC") == 0)
8606 {
8607 def_patctl.options |= PCRE2_AUTO_CALLOUT;
8608 def_datctl.control2 |= CTL2_CALLOUT_EXTRA;
8609 }
8610 else if (strcmp(arg, "-ac") == 0) def_patctl.options |= PCRE2_AUTO_CALLOUT;
8611 else if (strcmp(arg, "-b") == 0) def_patctl.control |= CTL_FULLBINCODE;
8612 else if (strcmp(arg, "-d") == 0) def_patctl.control |= CTL_DEBUG;
8613 else if (strcmp(arg, "-dfa") == 0) def_datctl.control |= CTL_DFA;
8614 else if (strcmp(arg, "-i") == 0) def_patctl.control |= CTL_INFO;
8615 else if (strcmp(arg, "-jit") == 0 || strcmp(arg, "-jitverify") == 0)
8616 {
8617 if (arg[4] != 0) def_patctl.control |= CTL_JITVERIFY;
8618 def_patctl.jit = 7; /* full & partial */
8619 #ifndef SUPPORT_JIT
8620 fprintf(stderr, "** Warning: JIT support is not available: "
8621 "-jit[verify] calls functions that do nothing.\n");
8622 #endif
8623 }
8624
8625 /* Set timing parameters */
8626
8627 else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0 ||
8628 strcmp(arg, "-T") == 0 || strcmp(arg, "-TM") == 0)
8629 {
8630 int both = arg[2] == 0;
8631 showtotaltimes = arg[1] == 'T';
8632 if (argc > 2 && (uli = strtoul(argv[op+1], &endptr, 10), *endptr == 0))
8633 {
8634 if (U32OVERFLOW(uli))
8635 {
8636 fprintf(stderr, "** Argument for %s is too big\n", arg);
8637 exit(1);
8638 }
8639 timeitm = (int)uli;
8640 op++;
8641 argc--;
8642 }
8643 else timeitm = LOOPREPEAT;
8644 if (both) timeit = timeitm;
8645 }
8646
8647 /* Give help */
8648
8649 else if (strcmp(arg, "-help") == 0 ||
8650 strcmp(arg, "--help") == 0)
8651 {
8652 usage();
8653 goto EXIT;
8654 }
8655
8656 /* Show version */
8657
8658 else if (strcmp(arg, "-version") == 0 ||
8659 strcmp(arg, "--version") == 0)
8660 {
8661 print_version(stdout);
8662 goto EXIT;
8663 }
8664
8665 /* The following options save their data for processing once we know what
8666 the running mode is. */
8667
8668 else if (strcmp(arg, "-error") == 0)
8669 {
8670 arg_error = argv[op+1];
8671 goto CHECK_VALUE_EXISTS;
8672 }
8673
8674 else if (strcmp(arg, "-subject") == 0)
8675 {
8676 arg_subject = argv[op+1];
8677 goto CHECK_VALUE_EXISTS;
8678 }
8679
8680 else if (strcmp(arg, "-pattern") == 0)
8681 {
8682 arg_pattern = argv[op+1];
8683 CHECK_VALUE_EXISTS:
8684 if (argc <= 2)
8685 {
8686 fprintf(stderr, "** Missing value for %s\n", arg);
8687 yield = 1;
8688 goto EXIT;
8689 }
8690 op++;
8691 argc--;
8692 }
8693
8694 /* Unrecognized option */
8695
8696 else
8697 {
8698 fprintf(stderr, "** Unknown or malformed option '%s'\n", arg);
8699 usage();
8700 yield = 1;
8701 goto EXIT;
8702 }
8703 op++;
8704 argc--;
8705 }
8706
8707 /* If -error was present, get the error numbers, show the messages, and exit.
8708 We wait to do this until we know which mode we are in. */
8709
8710 if (arg_error != NULL)
8711 {
8712 int len;
8713 int errcode;
8714 char *endptr;
8715
8716 /* Ensure the relevant non-8-bit buffer is available. Ensure that it is at
8717 least 128 code units, because it is used for retrieving error messages. */
8718
8719 #ifdef SUPPORT_PCRE2_16
8720 if (test_mode == PCRE16_MODE)
8721 {
8722 pbuffer16_size = 256;
8723 pbuffer16 = (uint16_t *)malloc(pbuffer16_size);
8724 if (pbuffer16 == NULL)
8725 {
8726 fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer16\n",
8727 SIZ_CAST pbuffer16_size);
8728 yield = 1;
8729 goto EXIT;
8730 }
8731 }
8732 #endif
8733
8734 #ifdef SUPPORT_PCRE2_32
8735 if (test_mode == PCRE32_MODE)
8736 {
8737 pbuffer32_size = 512;
8738 pbuffer32 = (uint32_t *)malloc(pbuffer32_size);
8739 if (pbuffer32 == NULL)
8740 {
8741 fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer32\n",
8742 SIZ_CAST pbuffer32_size);
8743 yield = 1;
8744 goto EXIT;
8745 }
8746 }
8747 #endif
8748
8749 /* Loop along a list of error numbers. */
8750
8751 for (;;)
8752 {
8753 errcode = strtol(arg_error, &endptr, 10);
8754 if (*endptr != 0 && *endptr != CHAR_COMMA)
8755 {
8756 fprintf(stderr, "** '%s' is not a valid error number list\n", arg_error);
8757 yield = 1;
8758 goto EXIT;
8759 }
8760 printf("Error %d: ", errcode);
8761 PCRE2_GET_ERROR_MESSAGE(len, errcode, pbuffer);
8762 if (len < 0)
8763 {
8764 switch (len)
8765 {
8766 case PCRE2_ERROR_BADDATA:
8767 printf("PCRE2_ERROR_BADDATA (unknown error number)");
8768 break;
8769
8770 case PCRE2_ERROR_NOMEMORY:
8771 printf("PCRE2_ERROR_NOMEMORY (buffer too small)");
8772 break;
8773
8774 default:
8775 printf("Unexpected return (%d) from pcre2_get_error_message()", len);
8776 break;
8777 }
8778 }
8779 else
8780 {
8781 PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, stdout);
8782 }
8783 printf("\n");
8784 if (*endptr == 0) goto EXIT;
8785 arg_error = endptr + 1;
8786 }
8787 /* Control never reaches here */
8788 } /* End of -error handling */
8789
8790 /* Initialize things that cannot be done until we know which test mode we are
8791 running in. Exercise the general context copying function, which is not
8792 otherwise used. */
8793
8794 code_unit_size = test_mode/8;
8795 max_oveccount = DEFAULT_OVECCOUNT;
8796
8797 /* Use macros to save a lot of duplication. */
8798
8799 #define CREATECONTEXTS \
8800 G(general_context,BITS) = G(pcre2_general_context_create_,BITS)(&my_malloc, &my_free, NULL); \
8801 G(general_context_copy,BITS) = G(pcre2_general_context_copy_,BITS)(G(general_context,BITS)); \
8802 G(default_pat_context,BITS) = G(pcre2_compile_context_create_,BITS)(G(general_context,BITS)); \
8803 G(pat_context,BITS) = G(pcre2_compile_context_copy_,BITS)(G(default_pat_context,BITS)); \
8804 G(default_dat_context,BITS) = G(pcre2_match_context_create_,BITS)(G(general_context,BITS)); \
8805 G(dat_context,BITS) = G(pcre2_match_context_copy_,BITS)(G(default_dat_context,BITS)); \
8806 G(default_con_context,BITS) = G(pcre2_convert_context_create_,BITS)(G(general_context,BITS)); \
8807 G(con_context,BITS) = G(pcre2_convert_context_copy_,BITS)(G(default_con_context,BITS)); \
8808 G(match_data,BITS) = G(pcre2_match_data_create_,BITS)(max_oveccount, G(general_context,BITS))
8809
8810 #define CONTEXTTESTS \
8811 (void)G(pcre2_set_compile_extra_options_,BITS)(G(pat_context,BITS), 0); \
8812 (void)G(pcre2_set_max_pattern_length_,BITS)(G(pat_context,BITS), 0); \
8813 (void)G(pcre2_set_offset_limit_,BITS)(G(dat_context,BITS), 0); \
8814 (void)G(pcre2_set_recursion_memory_management_,BITS)(G(dat_context,BITS), my_malloc, my_free, NULL)
8815
8816 /* Call the appropriate functions for the current mode, and exercise some
8817 functions that are not otherwise called. */
8818
8819 #ifdef SUPPORT_PCRE2_8
8820 #undef BITS
8821 #define BITS 8
8822 if (test_mode == PCRE8_MODE)
8823 {
8824 CREATECONTEXTS;
8825 CONTEXTTESTS;
8826 }
8827 #endif
8828
8829 #ifdef SUPPORT_PCRE2_16
8830 #undef BITS
8831 #define BITS 16
8832 if (test_mode == PCRE16_MODE)
8833 {
8834 CREATECONTEXTS;
8835 CONTEXTTESTS;
8836 }
8837 #endif
8838
8839 #ifdef SUPPORT_PCRE2_32
8840 #undef BITS
8841 #define BITS 32
8842 if (test_mode == PCRE32_MODE)
8843 {
8844 CREATECONTEXTS;
8845 CONTEXTTESTS;
8846 }
8847 #endif
8848
8849 /* Set a default parentheses nest limit that is large enough to run the
8850 standard tests (this also exercises the function). */
8851
8852 PCRE2_SET_PARENS_NEST_LIMIT(default_pat_context, PARENS_NEST_DEFAULT);
8853
8854 /* Handle command line modifier settings, sending any error messages to
8855 stderr. We need to know the mode before modifying the context, and it is tidier
8856 to do them all in the same way. */
8857
8858 outfile = stderr;
8859 if ((arg_pattern != NULL &&
8860 !decode_modifiers((uint8_t *)arg_pattern, CTX_DEFPAT, &def_patctl, NULL)) ||
8861 (arg_subject != NULL &&
8862 !decode_modifiers((uint8_t *)arg_subject, CTX_DEFDAT, NULL, &def_datctl)))
8863 {
8864 yield = 1;
8865 goto EXIT;
8866 }
8867
8868 /* Sort out the input and output files, defaulting to stdin/stdout. */
8869
8870 infile = stdin;
8871 outfile = stdout;
8872
8873 if (argc > 1 && strcmp(argv[op], "-") != 0)
8874 {
8875 infile = fopen(argv[op], INPUT_MODE);
8876 if (infile == NULL)
8877 {
8878 printf("** Failed to open '%s': %s\n", argv[op], strerror(errno));
8879 yield = 1;
8880 goto EXIT;
8881 }
8882 }
8883
8884 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
8885 if (INTERACTIVE(infile)) using_history();
8886 #endif
8887
8888 if (argc > 2)
8889 {
8890 outfile = fopen(argv[op+1], OUTPUT_MODE);
8891 if (outfile == NULL)
8892 {
8893 printf("** Failed to open '%s': %s\n", argv[op+1], strerror(errno));
8894 yield = 1;
8895 goto EXIT;
8896 }
8897 }
8898
8899 /* Output a heading line unless quiet, then process input lines. */
8900
8901 if (!quiet) print_version(outfile);
8902
8903 SET(compiled_code, NULL);
8904
8905 #ifdef SUPPORT_PCRE2_8
8906 preg.re_pcre2_code = NULL;
8907 preg.re_match_data = NULL;
8908 #endif
8909
8910 while (notdone)
8911 {
8912 uint8_t *p;
8913 int rc = PR_OK;
8914 BOOL expectdata = TEST(compiled_code, !=, NULL);
8915 #ifdef SUPPORT_PCRE2_8
8916 expectdata |= preg.re_pcre2_code != NULL;
8917 #endif
8918
8919 if (extend_inputline(infile, buffer, expectdata? "data> " : " re> ") == NULL)
8920 break;
8921 if (!INTERACTIVE(infile)) fprintf(outfile, "%s", (char *)buffer);
8922 fflush(outfile);
8923 p = buffer;
8924
8925 /* If we have a pattern set up for testing, or we are skipping after a
8926 compile failure, a blank line terminates this test. */
8927
8928 if (expectdata || skipping)
8929 {
8930 while (isspace(*p)) p++;
8931 if (*p == 0)
8932 {
8933 #ifdef SUPPORT_PCRE2_8
8934 if (preg.re_pcre2_code != NULL)
8935 {
8936 regfree(&preg);
8937 preg.re_pcre2_code = NULL;
8938 preg.re_match_data = NULL;
8939 }
8940 #endif /* SUPPORT_PCRE2_8 */
8941 if (TEST(compiled_code, !=, NULL))
8942 {
8943 SUB1(pcre2_code_free, compiled_code);
8944 SET(compiled_code, NULL);
8945 }
8946 skipping = FALSE;
8947 setlocale(LC_CTYPE, "C");
8948 }
8949
8950 /* Otherwise, if we are not skipping, and the line is not a data comment
8951 line starting with "\=", process a data line. */
8952
8953 else if (!skipping && !(p[0] == '\\' && p[1] == '=' && isspace(p[2])))
8954 {
8955 rc = process_data();
8956 }
8957 }
8958
8959 /* We do not have a pattern set up for testing. Lines starting with # are
8960 either comments or special commands. Blank lines are ignored. Otherwise, the
8961 line must start with a valid delimiter. It is then processed as a pattern
8962 line. A copy of the pattern is left in pbuffer8 for use by callouts. Under
8963 valgrind, make the unused part of the buffer undefined, to catch overruns. */
8964
8965 else if (*p == '#')
8966 {
8967 if (isspace(p[1]) || p[1] == '!' || p[1] == 0) continue;
8968 rc = process_command();
8969 }
8970
8971 else if (strchr("/!\"'`%&-=_:;,@~", *p) != NULL)
8972 {
8973 rc = process_pattern();
8974 dfa_matched = 0;
8975 }
8976
8977 else
8978 {
8979 while (isspace(*p)) p++;
8980 if (*p != 0)
8981 {
8982 fprintf(outfile, "** Invalid pattern delimiter '%c' (x%x).\n", *buffer,
8983 *buffer);
8984 rc = PR_SKIP;
8985 }
8986 }
8987
8988 if (rc == PR_SKIP && !INTERACTIVE(infile)) skipping = TRUE;
8989 else if (rc == PR_ABEND)
8990 {
8991 fprintf(outfile, "** pcre2test run abandoned\n");
8992 yield = 1;
8993 goto EXIT;
8994 }
8995 }
8996
8997 /* Finish off a normal run. */
8998
8999 if (INTERACTIVE(infile)) fprintf(outfile, "\n");
9000
9001 if (showtotaltimes)
9002 {
9003 const char *pad = "";
9004 fprintf(outfile, "--------------------------------------\n");
9005 if (timeit > 0)
9006 {
9007 fprintf(outfile, "Total compile time %.4f milliseconds\n",
9008 (((double)total_compile_time * 1000.0) / (double)timeit) /
9009 (double)CLOCKS_PER_SEC);
9010 if (total_jit_compile_time > 0)
9011 fprintf(outfile, "Total JIT compile %.4f milliseconds\n",
9012 (((double)total_jit_compile_time * 1000.0) / (double)timeit) /
9013 (double)CLOCKS_PER_SEC);
9014 pad = " ";
9015 }
9016 fprintf(outfile, "Total match time %s%.4f milliseconds\n", pad,
9017 (((double)total_match_time * 1000.0) / (double)timeitm) /
9018 (double)CLOCKS_PER_SEC);
9019 }
9020
9021
9022 EXIT:
9023
9024 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
9025 if (infile != NULL && INTERACTIVE(infile)) clear_history();
9026 #endif
9027
9028 if (infile != NULL && infile != stdin) fclose(infile);
9029 if (outfile != NULL && outfile != stdout) fclose(outfile);
9030
9031 free(buffer);
9032 free(dbuffer);
9033 free(pbuffer8);
9034 free(dfa_workspace);
9035 free((void *)locale_tables);
9036 PCRE2_MATCH_DATA_FREE(match_data);
9037 SUB1(pcre2_code_free, compiled_code);
9038
9039 while(patstacknext-- > 0)
9040 {
9041 SET(compiled_code, patstack[patstacknext]);
9042 SUB1(pcre2_code_free, compiled_code);
9043 }
9044
9045 PCRE2_JIT_FREE_UNUSED_MEMORY(general_context);
9046 if (jit_stack != NULL)
9047 {
9048 PCRE2_JIT_STACK_FREE(jit_stack);
9049 }
9050
9051 #define FREECONTEXTS \
9052 G(pcre2_general_context_free_,BITS)(G(general_context,BITS)); \
9053 G(pcre2_general_context_free_,BITS)(G(general_context_copy,BITS)); \
9054 G(pcre2_compile_context_free_,BITS)(G(pat_context,BITS)); \
9055 G(pcre2_compile_context_free_,BITS)(G(default_pat_context,BITS)); \
9056 G(pcre2_match_context_free_,BITS)(G(dat_context,BITS)); \
9057 G(pcre2_match_context_free_,BITS)(G(default_dat_context,BITS)); \
9058 G(pcre2_convert_context_free_,BITS)(G(default_con_context,BITS)); \
9059 G(pcre2_convert_context_free_,BITS)(G(con_context,BITS));
9060
9061 #ifdef SUPPORT_PCRE2_8
9062 #undef BITS
9063 #define BITS 8
9064 if (preg.re_pcre2_code != NULL) regfree(&preg);
9065 FREECONTEXTS;
9066 #endif
9067
9068 #ifdef SUPPORT_PCRE2_16
9069 #undef BITS
9070 #define BITS 16
9071 free(pbuffer16);
9072 FREECONTEXTS;
9073 #endif
9074
9075 #ifdef SUPPORT_PCRE2_32
9076 #undef BITS
9077 #define BITS 32
9078 free(pbuffer32);
9079 FREECONTEXTS;
9080 #endif
9081
9082 #if defined(__VMS)
9083 yield = SS$_NORMAL; /* Return values via DCL symbols */
9084 #endif
9085
9086 return yield;
9087 }
9088
9089 /* End of pcre2test.c */
9090