• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*************************************************
2 *             PCRE2 testing program              *
3 *************************************************/
4 
5 /* PCRE2 is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language. In 2014
7 the API was completely revised and '2' was added to the name, because the old
8 API, which had lasted for 16 years, could not accommodate new requirements. At
9 the same time, this testing program was re-designed because its original
10 hacked-up (non-) design had also run out of steam.
11 
12                        Written by Philip Hazel
13      Original code Copyright (c) 1997-2012 University of Cambridge
14     Rewritten code Copyright (c) 2016-2022 University of Cambridge
15 
16 -----------------------------------------------------------------------------
17 Redistribution and use in source and binary forms, with or without
18 modification, are permitted provided that the following conditions are met:
19 
20     * Redistributions of source code must retain the above copyright notice,
21       this list of conditions and the following disclaimer.
22 
23     * Redistributions in binary form must reproduce the above copyright
24       notice, this list of conditions and the following disclaimer in the
25       documentation and/or other materials provided with the distribution.
26 
27     * Neither the name of the University of Cambridge nor the names of its
28       contributors may be used to endorse or promote products derived from
29       this software without specific prior written permission.
30 
31 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
32 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
35 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
36 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
37 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
38 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
39 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
41 POSSIBILITY OF SUCH DAMAGE.
42 -----------------------------------------------------------------------------
43 */
44 
45 
46 /* This program supports testing of the 8-bit, 16-bit, and 32-bit PCRE2
47 libraries in a single program, though its input and output are always 8-bit.
48 It is different from modules such as pcre2_compile.c in the library itself,
49 which are compiled separately for each code unit width. If two widths are
50 enabled, for example, pcre2_compile.c is compiled twice. In contrast,
51 pcre2test.c is compiled only once, and linked with all the enabled libraries.
52 Therefore, it must not make use of any of the macros from pcre2.h or
53 pcre2_internal.h that depend on PCRE2_CODE_UNIT_WIDTH. It does, however, make
54 use of SUPPORT_PCRE2_8, SUPPORT_PCRE2_16, and SUPPORT_PCRE2_32, to ensure that
55 it references only the enabled library functions. */
56 
57 #ifdef HAVE_CONFIG_H
58 #include "config.h"
59 #endif
60 
61 #include <ctype.h>
62 #include <stdio.h>
63 #include <string.h>
64 #include <stdlib.h>
65 #include <time.h>
66 #include <locale.h>
67 #include <errno.h>
68 
69 #if defined NATIVE_ZOS
70 #include "pcrzoscs.h"
71 /* That header is not included in the main PCRE2 distribution because other
72 apparatus is needed to compile pcre2test for z/OS. The header can be found in
73 the special z/OS distribution, which is available from www.zaconsultants.net or
74 from www.cbttape.org. */
75 #endif
76 
77 #ifdef HAVE_UNISTD_H
78 #include <unistd.h>
79 #endif
80 
81 /* Debugging code enabler */
82 
83 /* #define DEBUG_SHOW_MALLOC_ADDRESSES */
84 
85 /* Both libreadline and libedit are optionally supported */
86 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
87 #if defined(SUPPORT_LIBREADLINE)
88 #include <readline/readline.h>
89 #include <readline/history.h>
90 #else
91 #if defined(HAVE_EDITLINE_READLINE_H)
92 #include <editline/readline.h>
93 #elif defined(HAVE_EDIT_READLINE_READLINE_H)
94 #include <edit/readline/readline.h>
95 #else
96 #include <readline.h>
97 /* GNU readline defines this macro but libedit doesn't, if that ever changes
98 this needs to be updated or the build could break */
99 #ifdef RL_VERSION_MAJOR
100 #include <history.h>
101 #endif
102 #endif
103 #endif
104 #endif
105 
106 /* Put the test for interactive input into a macro so that it can be changed if
107 required for different environments. */
108 
109 #define INTERACTIVE(f) isatty(fileno(f))
110 
111 
112 /* ---------------------- System-specific definitions ---------------------- */
113 
114 /* A number of things vary for Windows builds. Originally, pcretest opened its
115 input and output without "b"; then I was told that "b" was needed in some
116 environments, so it was added for release 5.0 to both the input and output. (It
117 makes no difference on Unix-like systems.) Later I was told that it is wrong
118 for the input on Windows. I've now abstracted the modes into macros that are
119 set here, to make it easier to fiddle with them, and removed "b" from the input
120 mode under Windows. The BINARY versions are used when saving/restoring compiled
121 patterns. */
122 
123 #if defined(_WIN32) || defined(WIN32)
124 #include <io.h>                /* For _setmode() */
125 #include <fcntl.h>             /* For _O_BINARY */
126 #define INPUT_MODE          "r"
127 #define OUTPUT_MODE         "wb"
128 #define BINARY_INPUT_MODE   "rb"
129 #define BINARY_OUTPUT_MODE  "wb"
130 
131 #ifndef isatty
132 #define isatty _isatty         /* This is what Windows calls them, I'm told, */
133 #endif                         /* though in some environments they seem to   */
134                                /* be already defined, hence the #ifndefs.    */
135 #ifndef fileno
136 #define fileno _fileno
137 #endif
138 
139 /* A user sent this fix for Borland Builder 5 under Windows. */
140 
141 #ifdef __BORLANDC__
142 #define _setmode(handle, mode) setmode(handle, mode)
143 #endif
144 
145 /* Not Windows */
146 
147 #else
148 #include <sys/time.h>          /* These two includes are needed */
149 #include <sys/resource.h>      /* for setrlimit(). */
150 #if defined NATIVE_ZOS         /* z/OS uses non-binary I/O */
151 #define INPUT_MODE   "r"
152 #define OUTPUT_MODE  "w"
153 #define BINARY_INPUT_MODE   "rb"
154 #define BINARY_OUTPUT_MODE  "wb"
155 #else
156 #define INPUT_MODE          "rb"
157 #define OUTPUT_MODE         "wb"
158 #define BINARY_INPUT_MODE   "rb"
159 #define BINARY_OUTPUT_MODE  "wb"
160 #endif
161 #endif
162 
163 /* VMS-specific code was included as suggested by a VMS user [1]. Another VMS
164 user [2] provided alternative code which worked better for him. I have
165 commented out the original, but kept it around just in case. */
166 
167 #ifdef __VMS
168 #include <ssdef.h>
169 /* These two includes came from [2]. */
170 #include descrip
171 #include lib$routines
172 /* void vms_setsymbol( char *, char *, int ); Original code from [1]. */
173 #endif
174 
175 /* old VC and older compilers don't support %td or %zu, and even some that
176 claim to be C99 don't support it (hence DISABLE_PERCENT_ZT). */
177 
178 #if defined(DISABLE_PERCENT_ZT) || (defined(_MSC_VER) && (_MSC_VER < 1800)) || \
179   (!defined(_MSC_VER) && (!defined(__STDC_VERSION__) || (__STDC_VERSION__ < 199901L)))
180 #ifdef _WIN64
181 #define PTR_FORM "lld"
182 #define SIZ_FORM "llu"
183 #else
184 #define PTR_FORM "ld"
185 #define SIZ_FORM "lu"
186 #endif
187 #else
188 #define PTR_FORM "td"
189 #define SIZ_FORM "zu"
190 #endif
191 
192 /* ------------------End of system-specific definitions -------------------- */
193 
194 /* Glueing macros that are used in several places below. */
195 
196 #define glue(a,b) a##b
197 #define G(a,b) glue(a,b)
198 
199 /* Miscellaneous parameters and manifests */
200 
201 #ifndef CLOCKS_PER_SEC
202 #ifdef CLK_TCK
203 #define CLOCKS_PER_SEC CLK_TCK
204 #else
205 #define CLOCKS_PER_SEC 100
206 #endif
207 #endif
208 
209 #define CFORE_UNSET UINT32_MAX    /* Unset value for startend/cfail/cerror fields */
210 #define CONVERT_UNSET UINT32_MAX  /* Unset value for convert_type field */
211 #define DFA_WS_DIMENSION 1000     /* Size of DFA workspace */
212 #define DEFAULT_OVECCOUNT 15      /* Default ovector count */
213 #define JUNK_OFFSET 0xdeadbeef    /* For initializing ovector */
214 #define LOCALESIZE 32             /* Size of locale name */
215 #define LOOPREPEAT 500000         /* Default loop count for timing */
216 #define MALLOCLISTSIZE 20         /* For remembering mallocs */
217 #define PARENS_NEST_DEFAULT 220   /* Default parentheses nest limit */
218 #define PATSTACKSIZE 20           /* Pattern stack for save/restore testing */
219 #define REPLACE_MODSIZE 100       /* Field for reading 8-bit replacement */
220 #define VERSION_SIZE 64           /* Size of buffer for the version strings */
221 
222 /* Default JIT compile options */
223 
224 #define JIT_DEFAULT (PCRE2_JIT_COMPLETE|\
225                      PCRE2_JIT_PARTIAL_SOFT|\
226                      PCRE2_JIT_PARTIAL_HARD)
227 
228 /* Make sure the buffer into which replacement strings are copied is big enough
229 to hold them as 32-bit code units. */
230 
231 #define REPLACE_BUFFSIZE 1024   /* This is a byte value */
232 
233 /* Execution modes */
234 
235 #define PCRE8_MODE   8
236 #define PCRE16_MODE 16
237 #define PCRE32_MODE 32
238 
239 /* Processing returns */
240 
241 enum { PR_OK, PR_SKIP, PR_ABEND };
242 
243 /* The macro PRINTABLE determines whether to print an output character as-is or
244 as a hex value when showing compiled patterns. is We use it in cases when the
245 locale has not been explicitly changed, so as to get consistent output from
246 systems that differ in their output from isprint() even in the "C" locale. */
247 
248 #ifdef EBCDIC
249 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
250 #else
251 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
252 #endif
253 
254 #define PRINTOK(c) ((use_tables != NULL && c < 256)? isprint(c) : PRINTABLE(c))
255 
256 /* We have to include some of the library source files because we need
257 to use some of the macros, internal structure definitions, and other internal
258 values - pcre2test has "inside information" compared to an application program
259 that strictly follows the PCRE2 API.
260 
261 Before including pcre2_internal.h we define PRIV so that it does not get
262 defined therein. This ensures that PRIV names in the included files do not
263 clash with those in the libraries. Also, although pcre2_internal.h does itself
264 include pcre2.h, we explicitly include it beforehand, along with pcre2posix.h,
265 so that the PCRE2_EXP_xxx macros get set appropriately for an application, not
266 for building the library. */
267 
268 #define PRIV(name) name
269 #define PCRE2_CODE_UNIT_WIDTH 0
270 #include "pcre2.h"
271 #include "pcre2posix.h"
272 #include "pcre2_internal.h"
273 
274 /* We need access to some of the data tables that PCRE2 uses. Defining
275 PCRE2_PCRETEST makes some minor changes in the files. The previous definition
276 of PRIV avoids name clashes. */
277 
278 #define PCRE2_PCRE2TEST
279 #include "pcre2_tables.c"
280 #include "pcre2_ucd.c"
281 
282 /* 32-bit integer values in the input are read by strtoul() or strtol(). The
283 check needed for overflow depends on whether long ints are in fact longer than
284 ints. They are defined not to be shorter. */
285 
286 #if ULONG_MAX > UINT32_MAX
287 #define U32OVERFLOW(x) (x > UINT32_MAX)
288 #else
289 #define U32OVERFLOW(x) (x == UINT32_MAX)
290 #endif
291 
292 #if LONG_MAX > INT32_MAX
293 #define S32OVERFLOW(x) (x > INT32_MAX || x < INT32_MIN)
294 #else
295 #define S32OVERFLOW(x) (x == INT32_MAX || x == INT32_MIN)
296 #endif
297 
298 /* When PCRE2_CODE_UNIT_WIDTH is zero, pcre2_internal.h does not include
299 pcre2_intmodedep.h, which is where mode-dependent macros and structures are
300 defined. We can now include it for each supported code unit width. Because
301 PCRE2_CODE_UNIT_WIDTH was defined as zero before including pcre2.h, it will
302 have left PCRE2_SUFFIX defined as a no-op. We must re-define it appropriately
303 while including these files, and then restore it to a no-op. Because LINK_SIZE
304 may be changed in 16-bit mode and forced to 1 in 32-bit mode, the order of
305 these inclusions should not be changed. */
306 
307 #undef PCRE2_SUFFIX
308 #undef PCRE2_CODE_UNIT_WIDTH
309 
310 #ifdef   SUPPORT_PCRE2_8
311 #define  PCRE2_CODE_UNIT_WIDTH 8
312 #define  PCRE2_SUFFIX(a) G(a,8)
313 #include "pcre2_intmodedep.h"
314 #include "pcre2_printint.c"
315 #undef   PCRE2_CODE_UNIT_WIDTH
316 #undef   PCRE2_SUFFIX
317 #endif   /* SUPPORT_PCRE2_8 */
318 
319 #ifdef   SUPPORT_PCRE2_16
320 #define  PCRE2_CODE_UNIT_WIDTH 16
321 #define  PCRE2_SUFFIX(a) G(a,16)
322 #include "pcre2_intmodedep.h"
323 #include "pcre2_printint.c"
324 #undef   PCRE2_CODE_UNIT_WIDTH
325 #undef   PCRE2_SUFFIX
326 #endif   /* SUPPORT_PCRE2_16 */
327 
328 #ifdef   SUPPORT_PCRE2_32
329 #define  PCRE2_CODE_UNIT_WIDTH 32
330 #define  PCRE2_SUFFIX(a) G(a,32)
331 #include "pcre2_intmodedep.h"
332 #include "pcre2_printint.c"
333 #undef   PCRE2_CODE_UNIT_WIDTH
334 #undef   PCRE2_SUFFIX
335 #endif   /* SUPPORT_PCRE2_32 */
336 
337 #define PCRE2_SUFFIX(a) a
338 
339 /* We need to be able to check input text for UTF-8 validity, whatever code
340 widths are actually available, because the input to pcre2test is always in
341 8-bit code units. So we include the UTF validity checking function for 8-bit
342 code units. */
343 
344 extern int valid_utf(PCRE2_SPTR8, PCRE2_SIZE, PCRE2_SIZE *);
345 
346 #define  PCRE2_CODE_UNIT_WIDTH 8
347 #undef   PCRE2_SPTR
348 #define  PCRE2_SPTR PCRE2_SPTR8
349 #include "pcre2_valid_utf.c"
350 #undef   PCRE2_CODE_UNIT_WIDTH
351 #undef   PCRE2_SPTR
352 
353 /* If we have 8-bit support, default to it; if there is also 16-or 32-bit
354 support, it can be selected by a command-line option. If there is no 8-bit
355 support, there must be 16-bit or 32-bit support, so default to one of them. The
356 config function, JIT stack, contexts, and version string are the same in all
357 modes, so use the form of the first that is available. */
358 
359 #if defined SUPPORT_PCRE2_8
360 #define DEFAULT_TEST_MODE PCRE8_MODE
361 #define VERSION_TYPE PCRE2_UCHAR8
362 #define PCRE2_CONFIG pcre2_config_8
363 #define PCRE2_JIT_STACK pcre2_jit_stack_8
364 #define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_8
365 #define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_8
366 #define PCRE2_REAL_CONVERT_CONTEXT pcre2_real_convert_context_8
367 #define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_8
368 
369 #elif defined SUPPORT_PCRE2_16
370 #define DEFAULT_TEST_MODE PCRE16_MODE
371 #define VERSION_TYPE PCRE2_UCHAR16
372 #define PCRE2_CONFIG pcre2_config_16
373 #define PCRE2_JIT_STACK pcre2_jit_stack_16
374 #define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_16
375 #define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_16
376 #define PCRE2_REAL_CONVERT_CONTEXT pcre2_real_convert_context_16
377 #define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_16
378 
379 #elif defined SUPPORT_PCRE2_32
380 #define DEFAULT_TEST_MODE PCRE32_MODE
381 #define VERSION_TYPE PCRE2_UCHAR32
382 #define PCRE2_CONFIG pcre2_config_32
383 #define PCRE2_JIT_STACK pcre2_jit_stack_32
384 #define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_32
385 #define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_32
386 #define PCRE2_REAL_CONVERT_CONTEXT pcre2_real_convert_context_32
387 #define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_32
388 #endif
389 
390 /* ------------- Structure and table for handling #-commands ------------- */
391 
392 typedef struct cmdstruct {
393   const char *name;
394   int  value;
395 } cmdstruct;
396 
397 enum { CMD_FORBID_UTF, CMD_LOAD, CMD_LOADTABLES, CMD_NEWLINE_DEFAULT,
398   CMD_PATTERN, CMD_PERLTEST, CMD_POP, CMD_POPCOPY, CMD_SAVE, CMD_SUBJECT,
399   CMD_UNKNOWN };
400 
401 static cmdstruct cmdlist[] = {
402   { "forbid_utf",      CMD_FORBID_UTF },
403   { "load",            CMD_LOAD },
404   { "loadtables",      CMD_LOADTABLES },
405   { "newline_default", CMD_NEWLINE_DEFAULT },
406   { "pattern",         CMD_PATTERN },
407   { "perltest",        CMD_PERLTEST },
408   { "pop",             CMD_POP },
409   { "popcopy",         CMD_POPCOPY },
410   { "save",            CMD_SAVE },
411   { "subject",         CMD_SUBJECT }};
412 
413 #define cmdlistcount (sizeof(cmdlist)/sizeof(cmdstruct))
414 
415 /* ------------- Structures and tables for handling modifiers -------------- */
416 
417 /* Table of names for newline types. Must be kept in step with the definitions
418 of PCRE2_NEWLINE_xx in pcre2.h. */
419 
420 static const char *newlines[] = {
421   "DEFAULT", "CR", "LF", "CRLF", "ANY", "ANYCRLF", "NUL" };
422 
423 /* Structure and table for handling pattern conversion types. */
424 
425 typedef struct convertstruct {
426   const char *name;
427   uint32_t option;
428 } convertstruct;
429 
430 static convertstruct convertlist[] = {
431   { "glob",                   PCRE2_CONVERT_GLOB },
432   { "glob_no_starstar",       PCRE2_CONVERT_GLOB_NO_STARSTAR },
433   { "glob_no_wild_separator", PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR },
434   { "posix_basic",            PCRE2_CONVERT_POSIX_BASIC },
435   { "posix_extended",         PCRE2_CONVERT_POSIX_EXTENDED },
436   { "unset",                  CONVERT_UNSET }};
437 
438 #define convertlistcount (sizeof(convertlist)/sizeof(convertstruct))
439 
440 /* Modifier types and applicability */
441 
442 enum { MOD_CTC,    /* Applies to a compile context */
443        MOD_CTM,    /* Applies to a match context */
444        MOD_PAT,    /* Applies to a pattern */
445        MOD_PATP,   /* Ditto, OK for Perl test */
446        MOD_DAT,    /* Applies to a data line */
447        MOD_DATP,   /* Ditto, OK for Perl test */
448        MOD_PD,     /* Applies to a pattern or a data line */
449        MOD_PDP,    /* As MOD_PD, OK for Perl test */
450        MOD_PND,    /* As MOD_PD, but not for a default pattern */
451        MOD_PNDP,   /* As MOD_PND, OK for Perl test */
452        MOD_CHR,    /* Is a single character */
453        MOD_CON,    /* Is a "convert" type/options list */
454        MOD_CTL,    /* Is a control bit */
455        MOD_BSR,    /* Is a BSR value */
456        MOD_IN2,    /* Is one or two unsigned integers */
457        MOD_INS,    /* Is a signed integer */
458        MOD_INT,    /* Is an unsigned integer */
459        MOD_IND,    /* Is an unsigned integer, but no value => default */
460        MOD_NL,     /* Is a newline value */
461        MOD_NN,     /* Is a number or a name; more than one may occur */
462        MOD_OPT,    /* Is an option bit */
463        MOD_SIZ,    /* Is a PCRE2_SIZE value */
464        MOD_STR };  /* Is a string */
465 
466 /* Control bits. Some apply to compiling, some to matching, but some can be set
467 either on a pattern or a data line, so they must all be distinct. There are now
468 so many of them that they are split into two fields. */
469 
470 #define CTL_AFTERTEXT                    0x00000001u
471 #define CTL_ALLAFTERTEXT                 0x00000002u
472 #define CTL_ALLCAPTURES                  0x00000004u
473 #define CTL_ALLUSEDTEXT                  0x00000008u
474 #define CTL_ALTGLOBAL                    0x00000010u
475 #define CTL_BINCODE                      0x00000020u
476 #define CTL_CALLOUT_CAPTURE              0x00000040u
477 #define CTL_CALLOUT_INFO                 0x00000080u
478 #define CTL_CALLOUT_NONE                 0x00000100u
479 #define CTL_DFA                          0x00000200u
480 #define CTL_EXPAND                       0x00000400u
481 #define CTL_FINDLIMITS                   0x00000800u
482 #define CTL_FRAMESIZE                    0x00001000u
483 #define CTL_FULLBINCODE                  0x00002000u
484 #define CTL_GETALL                       0x00004000u
485 #define CTL_GLOBAL                       0x00008000u
486 #define CTL_HEXPAT                       0x00010000u  /* Same word as USE_LENGTH */
487 #define CTL_INFO                         0x00020000u
488 #define CTL_JITFAST                      0x00040000u
489 #define CTL_JITVERIFY                    0x00080000u
490 #define CTL_MARK                         0x00100000u
491 #define CTL_MEMORY                       0x00200000u
492 #define CTL_NULLCONTEXT                  0x00400000u
493 #define CTL_POSIX                        0x00800000u
494 #define CTL_POSIX_NOSUB                  0x01000000u
495 #define CTL_PUSH                         0x02000000u  /* These three must be */
496 #define CTL_PUSHCOPY                     0x04000000u  /*   all in the same */
497 #define CTL_PUSHTABLESCOPY               0x08000000u  /*     word. */
498 #define CTL_STARTCHAR                    0x10000000u
499 #define CTL_USE_LENGTH                   0x20000000u  /* Same word as HEXPAT */
500 #define CTL_UTF8_INPUT                   0x40000000u
501 #define CTL_ZERO_TERMINATE               0x80000000u
502 
503 /* Combinations */
504 
505 #define CTL_DEBUG            (CTL_FULLBINCODE|CTL_INFO)  /* For setting */
506 #define CTL_ANYINFO          (CTL_DEBUG|CTL_BINCODE|CTL_CALLOUT_INFO)
507 #define CTL_ANYGLOB          (CTL_ALTGLOBAL|CTL_GLOBAL)
508 
509 /* Second control word */
510 
511 #define CTL2_SUBSTITUTE_CALLOUT          0x00000001u
512 #define CTL2_SUBSTITUTE_EXTENDED         0x00000002u
513 #define CTL2_SUBSTITUTE_LITERAL          0x00000004u
514 #define CTL2_SUBSTITUTE_MATCHED          0x00000008u
515 #define CTL2_SUBSTITUTE_OVERFLOW_LENGTH  0x00000010u
516 #define CTL2_SUBSTITUTE_REPLACEMENT_ONLY 0x00000020u
517 #define CTL2_SUBSTITUTE_UNKNOWN_UNSET    0x00000040u
518 #define CTL2_SUBSTITUTE_UNSET_EMPTY      0x00000080u
519 #define CTL2_SUBJECT_LITERAL             0x00000100u
520 #define CTL2_CALLOUT_NO_WHERE            0x00000200u
521 #define CTL2_CALLOUT_EXTRA               0x00000400u
522 #define CTL2_ALLVECTOR                   0x00000800u
523 #define CTL2_NULL_SUBJECT                0x00001000u
524 #define CTL2_NULL_REPLACEMENT            0x00002000u
525 
526 #define CTL2_NL_SET                      0x40000000u  /* Informational */
527 #define CTL2_BSR_SET                     0x80000000u  /* Informational */
528 
529 /* These are the matching controls that may be set either on a pattern or on a
530 data line. They are copied from the pattern controls as initial settings for
531 data line controls. Note that CTL_MEMORY is not included here, because it does
532 different things in the two cases. */
533 
534 #define CTL_ALLPD  (CTL_AFTERTEXT|\
535                     CTL_ALLAFTERTEXT|\
536                     CTL_ALLCAPTURES|\
537                     CTL_ALLUSEDTEXT|\
538                     CTL_ALTGLOBAL|\
539                     CTL_GLOBAL|\
540                     CTL_MARK|\
541                     CTL_STARTCHAR|\
542                     CTL_UTF8_INPUT)
543 
544 #define CTL2_ALLPD (CTL2_SUBSTITUTE_CALLOUT|\
545                     CTL2_SUBSTITUTE_EXTENDED|\
546                     CTL2_SUBSTITUTE_LITERAL|\
547                     CTL2_SUBSTITUTE_MATCHED|\
548                     CTL2_SUBSTITUTE_OVERFLOW_LENGTH|\
549                     CTL2_SUBSTITUTE_REPLACEMENT_ONLY|\
550                     CTL2_SUBSTITUTE_UNKNOWN_UNSET|\
551                     CTL2_SUBSTITUTE_UNSET_EMPTY|\
552                     CTL2_ALLVECTOR)
553 
554 /* Structures for holding modifier information for patterns and subject strings
555 (data). Fields containing modifiers that can be set either for a pattern or a
556 subject must be at the start and in the same order in both cases so that the
557 same offset in the big table below works for both. */
558 
559 typedef struct patctl {       /* Structure for pattern modifiers. */
560   uint32_t  options;          /* Must be in same position as datctl */
561   uint32_t  control;          /* Must be in same position as datctl */
562   uint32_t  control2;         /* Must be in same position as datctl */
563   uint32_t  jitstack;         /* Must be in same position as datctl */
564    uint8_t  replacement[REPLACE_MODSIZE];  /* So must this */
565   uint32_t  substitute_skip;  /* Must be in same position as patctl */
566   uint32_t  substitute_stop;  /* Must be in same position as patctl */
567   uint32_t  jit;
568   uint32_t  stackguard_test;
569   uint32_t  tables_id;
570   uint32_t  convert_type;
571   uint32_t  convert_length;
572   uint32_t  convert_glob_escape;
573   uint32_t  convert_glob_separator;
574   uint32_t  regerror_buffsize;
575    uint8_t  locale[LOCALESIZE];
576 } patctl;
577 
578 #define MAXCPYGET 10
579 #define LENCPYGET 64
580 
581 typedef struct datctl {       /* Structure for data line modifiers. */
582   uint32_t  options;          /* Must be in same position as patctl */
583   uint32_t  control;          /* Must be in same position as patctl */
584   uint32_t  control2;         /* Must be in same position as patctl */
585   uint32_t  jitstack;         /* Must be in same position as patctl */
586    uint8_t  replacement[REPLACE_MODSIZE];  /* So must this */
587   uint32_t  substitute_skip;  /* Must be in same position as patctl */
588   uint32_t  substitute_stop;  /* Must be in same position as patctl */
589   uint32_t  startend[2];
590   uint32_t  cerror[2];
591   uint32_t  cfail[2];
592    int32_t  callout_data;
593    int32_t  copy_numbers[MAXCPYGET];
594    int32_t  get_numbers[MAXCPYGET];
595   uint32_t  oveccount;
596   uint32_t  offset;
597   uint8_t   copy_names[LENCPYGET];
598   uint8_t   get_names[LENCPYGET];
599 } datctl;
600 
601 /* Ids for which context to modify. */
602 
603 enum { CTX_PAT,            /* Active pattern context */
604        CTX_POPPAT,         /* Ditto, for a popped pattern */
605        CTX_DEFPAT,         /* Default pattern context */
606        CTX_DAT,            /* Active data (match) context */
607        CTX_DEFDAT };       /* Default data (match) context */
608 
609 /* Macros to simplify the big table below. */
610 
611 #define CO(name) offsetof(PCRE2_REAL_COMPILE_CONTEXT, name)
612 #define MO(name) offsetof(PCRE2_REAL_MATCH_CONTEXT, name)
613 #define PO(name) offsetof(patctl, name)
614 #define PD(name) PO(name)
615 #define DO(name) offsetof(datctl, name)
616 
617 /* Table of all long-form modifiers. Must be in collating sequence of modifier
618 name because it is searched by binary chop. */
619 
620 typedef struct modstruct {
621   const char   *name;
622   uint16_t      which;
623   uint16_t      type;
624   uint32_t      value;
625   PCRE2_SIZE    offset;
626 } modstruct;
627 
628 static modstruct modlist[] = {
629   { "aftertext",                   MOD_PNDP, MOD_CTL, CTL_AFTERTEXT,              PO(control) },
630   { "allaftertext",                MOD_PNDP, MOD_CTL, CTL_ALLAFTERTEXT,           PO(control) },
631   { "allcaptures",                 MOD_PND,  MOD_CTL, CTL_ALLCAPTURES,            PO(control) },
632   { "allow_empty_class",           MOD_PAT,  MOD_OPT, PCRE2_ALLOW_EMPTY_CLASS,    PO(options) },
633   { "allow_lookaround_bsk",        MOD_CTC,  MOD_OPT, PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK, CO(extra_options) },
634   { "allow_surrogate_escapes",     MOD_CTC,  MOD_OPT, PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES, CO(extra_options) },
635   { "allusedtext",                 MOD_PNDP, MOD_CTL, CTL_ALLUSEDTEXT,            PO(control) },
636   { "allvector",                   MOD_PND,  MOD_CTL, CTL2_ALLVECTOR,             PO(control2) },
637   { "alt_bsux",                    MOD_PAT,  MOD_OPT, PCRE2_ALT_BSUX,             PO(options) },
638   { "alt_circumflex",              MOD_PAT,  MOD_OPT, PCRE2_ALT_CIRCUMFLEX,       PO(options) },
639   { "alt_verbnames",               MOD_PAT,  MOD_OPT, PCRE2_ALT_VERBNAMES,        PO(options) },
640   { "altglobal",                   MOD_PND,  MOD_CTL, CTL_ALTGLOBAL,              PO(control) },
641   { "anchored",                    MOD_PD,   MOD_OPT, PCRE2_ANCHORED,             PD(options) },
642   { "auto_callout",                MOD_PAT,  MOD_OPT, PCRE2_AUTO_CALLOUT,         PO(options) },
643   { "bad_escape_is_literal",       MOD_CTC,  MOD_OPT, PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL, CO(extra_options) },
644   { "bincode",                     MOD_PAT,  MOD_CTL, CTL_BINCODE,                PO(control) },
645   { "bsr",                         MOD_CTC,  MOD_BSR, 0,                          CO(bsr_convention) },
646   { "callout_capture",             MOD_DAT,  MOD_CTL, CTL_CALLOUT_CAPTURE,        DO(control) },
647   { "callout_data",                MOD_DAT,  MOD_INS, 0,                          DO(callout_data) },
648   { "callout_error",               MOD_DAT,  MOD_IN2, 0,                          DO(cerror) },
649   { "callout_extra",               MOD_DAT,  MOD_CTL, CTL2_CALLOUT_EXTRA,         DO(control2) },
650   { "callout_fail",                MOD_DAT,  MOD_IN2, 0,                          DO(cfail) },
651   { "callout_info",                MOD_PAT,  MOD_CTL, CTL_CALLOUT_INFO,           PO(control) },
652   { "callout_no_where",            MOD_DAT,  MOD_CTL, CTL2_CALLOUT_NO_WHERE,      DO(control2) },
653   { "callout_none",                MOD_DAT,  MOD_CTL, CTL_CALLOUT_NONE,           DO(control) },
654   { "caseless",                    MOD_PATP, MOD_OPT, PCRE2_CASELESS,             PO(options) },
655   { "convert",                     MOD_PAT,  MOD_CON, 0,                          PO(convert_type) },
656   { "convert_glob_escape",         MOD_PAT,  MOD_CHR, 0,                          PO(convert_glob_escape) },
657   { "convert_glob_separator",      MOD_PAT,  MOD_CHR, 0,                          PO(convert_glob_separator) },
658   { "convert_length",              MOD_PAT,  MOD_INT, 0,                          PO(convert_length) },
659   { "copy",                        MOD_DAT,  MOD_NN,  DO(copy_numbers),           DO(copy_names) },
660   { "copy_matched_subject",        MOD_DAT,  MOD_OPT, PCRE2_COPY_MATCHED_SUBJECT, DO(options) },
661   { "debug",                       MOD_PAT,  MOD_CTL, CTL_DEBUG,                  PO(control) },
662   { "depth_limit",                 MOD_CTM,  MOD_INT, 0,                          MO(depth_limit) },
663   { "dfa",                         MOD_DAT,  MOD_CTL, CTL_DFA,                    DO(control) },
664   { "dfa_restart",                 MOD_DAT,  MOD_OPT, PCRE2_DFA_RESTART,          DO(options) },
665   { "dfa_shortest",                MOD_DAT,  MOD_OPT, PCRE2_DFA_SHORTEST,         DO(options) },
666   { "dollar_endonly",              MOD_PAT,  MOD_OPT, PCRE2_DOLLAR_ENDONLY,       PO(options) },
667   { "dotall",                      MOD_PATP, MOD_OPT, PCRE2_DOTALL,               PO(options) },
668   { "dupnames",                    MOD_PATP, MOD_OPT, PCRE2_DUPNAMES,             PO(options) },
669   { "endanchored",                 MOD_PD,   MOD_OPT, PCRE2_ENDANCHORED,          PD(options) },
670   { "escaped_cr_is_lf",            MOD_CTC,  MOD_OPT, PCRE2_EXTRA_ESCAPED_CR_IS_LF, CO(extra_options) },
671   { "expand",                      MOD_PAT,  MOD_CTL, CTL_EXPAND,                 PO(control) },
672   { "extended",                    MOD_PATP, MOD_OPT, PCRE2_EXTENDED,             PO(options) },
673   { "extended_more",               MOD_PATP, MOD_OPT, PCRE2_EXTENDED_MORE,        PO(options) },
674   { "extra_alt_bsux",              MOD_CTC,  MOD_OPT, PCRE2_EXTRA_ALT_BSUX,       CO(extra_options) },
675   { "find_limits",                 MOD_DAT,  MOD_CTL, CTL_FINDLIMITS,             DO(control) },
676   { "firstline",                   MOD_PAT,  MOD_OPT, PCRE2_FIRSTLINE,            PO(options) },
677   { "framesize",                   MOD_PAT,  MOD_CTL, CTL_FRAMESIZE,              PO(control) },
678   { "fullbincode",                 MOD_PAT,  MOD_CTL, CTL_FULLBINCODE,            PO(control) },
679   { "get",                         MOD_DAT,  MOD_NN,  DO(get_numbers),            DO(get_names) },
680   { "getall",                      MOD_DAT,  MOD_CTL, CTL_GETALL,                 DO(control) },
681   { "global",                      MOD_PNDP, MOD_CTL, CTL_GLOBAL,                 PO(control) },
682   { "heap_limit",                  MOD_CTM,  MOD_INT, 0,                          MO(heap_limit) },
683   { "hex",                         MOD_PAT,  MOD_CTL, CTL_HEXPAT,                 PO(control) },
684   { "info",                        MOD_PAT,  MOD_CTL, CTL_INFO,                   PO(control) },
685   { "jit",                         MOD_PAT,  MOD_IND, 7,                          PO(jit) },
686   { "jitfast",                     MOD_PAT,  MOD_CTL, CTL_JITFAST,                PO(control) },
687   { "jitstack",                    MOD_PNDP, MOD_INT, 0,                          PO(jitstack) },
688   { "jitverify",                   MOD_PAT,  MOD_CTL, CTL_JITVERIFY,              PO(control) },
689   { "literal",                     MOD_PAT,  MOD_OPT, PCRE2_LITERAL,              PO(options) },
690   { "locale",                      MOD_PAT,  MOD_STR, LOCALESIZE,                 PO(locale) },
691   { "mark",                        MOD_PNDP, MOD_CTL, CTL_MARK,                   PO(control) },
692   { "match_invalid_utf",           MOD_PAT,  MOD_OPT, PCRE2_MATCH_INVALID_UTF,    PO(options) },
693   { "match_limit",                 MOD_CTM,  MOD_INT, 0,                          MO(match_limit) },
694   { "match_line",                  MOD_CTC,  MOD_OPT, PCRE2_EXTRA_MATCH_LINE,     CO(extra_options) },
695   { "match_unset_backref",         MOD_PAT,  MOD_OPT, PCRE2_MATCH_UNSET_BACKREF,  PO(options) },
696   { "match_word",                  MOD_CTC,  MOD_OPT, PCRE2_EXTRA_MATCH_WORD,     CO(extra_options) },
697   { "max_pattern_length",          MOD_CTC,  MOD_SIZ, 0,                          CO(max_pattern_length) },
698   { "memory",                      MOD_PD,   MOD_CTL, CTL_MEMORY,                 PD(control) },
699   { "multiline",                   MOD_PATP, MOD_OPT, PCRE2_MULTILINE,            PO(options) },
700   { "never_backslash_c",           MOD_PAT,  MOD_OPT, PCRE2_NEVER_BACKSLASH_C,    PO(options) },
701   { "never_ucp",                   MOD_PAT,  MOD_OPT, PCRE2_NEVER_UCP,            PO(options) },
702   { "never_utf",                   MOD_PAT,  MOD_OPT, PCRE2_NEVER_UTF,            PO(options) },
703   { "newline",                     MOD_CTC,  MOD_NL,  0,                          CO(newline_convention) },
704   { "no_auto_capture",             MOD_PAT,  MOD_OPT, PCRE2_NO_AUTO_CAPTURE,      PO(options) },
705   { "no_auto_possess",             MOD_PATP, MOD_OPT, PCRE2_NO_AUTO_POSSESS,      PO(options) },
706   { "no_dotstar_anchor",           MOD_PAT,  MOD_OPT, PCRE2_NO_DOTSTAR_ANCHOR,    PO(options) },
707   { "no_jit",                      MOD_DATP, MOD_OPT, PCRE2_NO_JIT,               DO(options) },
708   { "no_start_optimize",           MOD_PATP, MOD_OPT, PCRE2_NO_START_OPTIMIZE,    PO(options) },
709   { "no_utf_check",                MOD_PD,   MOD_OPT, PCRE2_NO_UTF_CHECK,         PD(options) },
710   { "notbol",                      MOD_DAT,  MOD_OPT, PCRE2_NOTBOL,               DO(options) },
711   { "notempty",                    MOD_DAT,  MOD_OPT, PCRE2_NOTEMPTY,             DO(options) },
712   { "notempty_atstart",            MOD_DAT,  MOD_OPT, PCRE2_NOTEMPTY_ATSTART,     DO(options) },
713   { "noteol",                      MOD_DAT,  MOD_OPT, PCRE2_NOTEOL,               DO(options) },
714   { "null_context",                MOD_PD,   MOD_CTL, CTL_NULLCONTEXT,            PO(control) },
715   { "null_replacement",            MOD_DAT,  MOD_CTL, CTL2_NULL_REPLACEMENT,      DO(control2) },
716   { "null_subject",                MOD_DAT,  MOD_CTL, CTL2_NULL_SUBJECT,          DO(control2) },
717   { "offset",                      MOD_DAT,  MOD_INT, 0,                          DO(offset) },
718   { "offset_limit",                MOD_CTM,  MOD_SIZ, 0,                          MO(offset_limit)},
719   { "ovector",                     MOD_DAT,  MOD_INT, 0,                          DO(oveccount) },
720   { "parens_nest_limit",           MOD_CTC,  MOD_INT, 0,                          CO(parens_nest_limit) },
721   { "partial_hard",                MOD_DAT,  MOD_OPT, PCRE2_PARTIAL_HARD,         DO(options) },
722   { "partial_soft",                MOD_DAT,  MOD_OPT, PCRE2_PARTIAL_SOFT,         DO(options) },
723   { "ph",                          MOD_DAT,  MOD_OPT, PCRE2_PARTIAL_HARD,         DO(options) },
724   { "posix",                       MOD_PAT,  MOD_CTL, CTL_POSIX,                  PO(control) },
725   { "posix_nosub",                 MOD_PAT,  MOD_CTL, CTL_POSIX|CTL_POSIX_NOSUB,  PO(control) },
726   { "posix_startend",              MOD_DAT,  MOD_IN2, 0,                          DO(startend) },
727   { "ps",                          MOD_DAT,  MOD_OPT, PCRE2_PARTIAL_SOFT,         DO(options) },
728   { "push",                        MOD_PAT,  MOD_CTL, CTL_PUSH,                   PO(control) },
729   { "pushcopy",                    MOD_PAT,  MOD_CTL, CTL_PUSHCOPY,               PO(control) },
730   { "pushtablescopy",              MOD_PAT,  MOD_CTL, CTL_PUSHTABLESCOPY,         PO(control) },
731   { "recursion_limit",             MOD_CTM,  MOD_INT, 0,                          MO(depth_limit) },  /* Obsolete synonym */
732   { "regerror_buffsize",           MOD_PAT,  MOD_INT, 0,                          PO(regerror_buffsize) },
733   { "replace",                     MOD_PND,  MOD_STR, REPLACE_MODSIZE,            PO(replacement) },
734   { "stackguard",                  MOD_PAT,  MOD_INT, 0,                          PO(stackguard_test) },
735   { "startchar",                   MOD_PND,  MOD_CTL, CTL_STARTCHAR,              PO(control) },
736   { "startoffset",                 MOD_DAT,  MOD_INT, 0,                          DO(offset) },
737   { "subject_literal",             MOD_PATP, MOD_CTL, CTL2_SUBJECT_LITERAL,       PO(control2) },
738   { "substitute_callout",          MOD_PND,  MOD_CTL, CTL2_SUBSTITUTE_CALLOUT,    PO(control2) },
739   { "substitute_extended",         MOD_PND,  MOD_CTL, CTL2_SUBSTITUTE_EXTENDED,   PO(control2) },
740   { "substitute_literal",          MOD_PND,  MOD_CTL, CTL2_SUBSTITUTE_LITERAL,    PO(control2) },
741   { "substitute_matched",          MOD_PND,  MOD_CTL, CTL2_SUBSTITUTE_MATCHED,    PO(control2) },
742   { "substitute_overflow_length",  MOD_PND,  MOD_CTL, CTL2_SUBSTITUTE_OVERFLOW_LENGTH, PO(control2) },
743   { "substitute_replacement_only", MOD_PND,  MOD_CTL, CTL2_SUBSTITUTE_REPLACEMENT_ONLY, PO(control2) },
744   { "substitute_skip",             MOD_PND,  MOD_INT, 0,                          PO(substitute_skip) },
745   { "substitute_stop",             MOD_PND,  MOD_INT, 0,                          PO(substitute_stop) },
746   { "substitute_unknown_unset",    MOD_PND,  MOD_CTL, CTL2_SUBSTITUTE_UNKNOWN_UNSET, PO(control2) },
747   { "substitute_unset_empty",      MOD_PND,  MOD_CTL, CTL2_SUBSTITUTE_UNSET_EMPTY, PO(control2) },
748   { "tables",                      MOD_PAT,  MOD_INT, 0,                          PO(tables_id) },
749   { "ucp",                         MOD_PATP, MOD_OPT, PCRE2_UCP,                  PO(options) },
750   { "ungreedy",                    MOD_PAT,  MOD_OPT, PCRE2_UNGREEDY,             PO(options) },
751   { "use_length",                  MOD_PAT,  MOD_CTL, CTL_USE_LENGTH,             PO(control) },
752   { "use_offset_limit",            MOD_PAT,  MOD_OPT, PCRE2_USE_OFFSET_LIMIT,     PO(options) },
753   { "utf",                         MOD_PATP, MOD_OPT, PCRE2_UTF,                  PO(options) },
754   { "utf8_input",                  MOD_PAT,  MOD_CTL, CTL_UTF8_INPUT,             PO(control) },
755   { "zero_terminate",              MOD_DAT,  MOD_CTL, CTL_ZERO_TERMINATE,         DO(control) }
756 };
757 
758 #define MODLISTCOUNT sizeof(modlist)/sizeof(modstruct)
759 
760 /* Controls and options that are supported for use with the POSIX interface. */
761 
762 #define POSIX_SUPPORTED_COMPILE_OPTIONS ( \
763   PCRE2_CASELESS|PCRE2_DOTALL|PCRE2_LITERAL|PCRE2_MULTILINE|PCRE2_UCP| \
764   PCRE2_UTF|PCRE2_UNGREEDY)
765 
766 #define POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS (0)
767 
768 #define POSIX_SUPPORTED_COMPILE_CONTROLS ( \
769   CTL_AFTERTEXT|CTL_ALLAFTERTEXT|CTL_EXPAND|CTL_HEXPAT|CTL_POSIX| \
770   CTL_POSIX_NOSUB|CTL_USE_LENGTH)
771 
772 #define POSIX_SUPPORTED_COMPILE_CONTROLS2 (0)
773 
774 #define POSIX_SUPPORTED_MATCH_OPTIONS ( \
775   PCRE2_NOTBOL|PCRE2_NOTEMPTY|PCRE2_NOTEOL)
776 
777 #define POSIX_SUPPORTED_MATCH_CONTROLS  (CTL_AFTERTEXT|CTL_ALLAFTERTEXT)
778 #define POSIX_SUPPORTED_MATCH_CONTROLS2 (CTL2_NULL_SUBJECT)
779 
780 /* Control bits that are not ignored with 'push'. */
781 
782 #define PUSH_SUPPORTED_COMPILE_CONTROLS ( \
783   CTL_BINCODE|CTL_CALLOUT_INFO|CTL_FULLBINCODE|CTL_HEXPAT|CTL_INFO| \
784   CTL_JITVERIFY|CTL_MEMORY|CTL_FRAMESIZE|CTL_PUSH|CTL_PUSHCOPY| \
785   CTL_PUSHTABLESCOPY|CTL_USE_LENGTH)
786 
787 #define PUSH_SUPPORTED_COMPILE_CONTROLS2 (CTL2_BSR_SET|CTL2_NL_SET)
788 
789 /* Controls that apply only at compile time with 'push'. */
790 
791 #define PUSH_COMPILE_ONLY_CONTROLS   CTL_JITVERIFY
792 #define PUSH_COMPILE_ONLY_CONTROLS2  (0)
793 
794 /* Controls that are forbidden with #pop or #popcopy. */
795 
796 #define NOTPOP_CONTROLS (CTL_HEXPAT|CTL_POSIX|CTL_POSIX_NOSUB|CTL_PUSH| \
797   CTL_PUSHCOPY|CTL_PUSHTABLESCOPY|CTL_USE_LENGTH)
798 
799 /* Pattern controls that are mutually exclusive. At present these are all in
800 the first control word. Note that CTL_POSIX_NOSUB is always accompanied by
801 CTL_POSIX, so it doesn't need its own entries. */
802 
803 static uint32_t exclusive_pat_controls[] = {
804   CTL_POSIX    | CTL_PUSH,
805   CTL_POSIX    | CTL_PUSHCOPY,
806   CTL_POSIX    | CTL_PUSHTABLESCOPY,
807   CTL_PUSH     | CTL_PUSHCOPY,
808   CTL_PUSH     | CTL_PUSHTABLESCOPY,
809   CTL_PUSHCOPY | CTL_PUSHTABLESCOPY,
810   CTL_EXPAND   | CTL_HEXPAT };
811 
812 /* Data controls that are mutually exclusive. At present these are all in the
813 first control word. */
814 
815 static uint32_t exclusive_dat_controls[] = {
816   CTL_ALLUSEDTEXT | CTL_STARTCHAR,
817   CTL_FINDLIMITS  | CTL_NULLCONTEXT };
818 
819 /* Table of single-character abbreviated modifiers. The index field is
820 initialized to -1, but the first time the modifier is encountered, it is filled
821 in with the index of the full entry in modlist, to save repeated searching when
822 processing multiple test items. This short list is searched serially, so its
823 order does not matter. */
824 
825 typedef struct c1modstruct {
826   const char *fullname;
827   uint32_t    onechar;
828   int         index;
829 } c1modstruct;
830 
831 static c1modstruct c1modlist[] = {
832   { "bincode",         'B',           -1 },
833   { "info",            'I',           -1 },
834   { "global",          'g',           -1 },
835   { "caseless",        'i',           -1 },
836   { "multiline",       'm',           -1 },
837   { "no_auto_capture", 'n',           -1 },
838   { "dotall",          's',           -1 },
839   { "extended",        'x',           -1 }
840 };
841 
842 #define C1MODLISTCOUNT sizeof(c1modlist)/sizeof(c1modstruct)
843 
844 /* Table of arguments for the -C command line option. Use macros to make the
845 table itself easier to read. */
846 
847 #if defined SUPPORT_PCRE2_8
848 #define SUPPORT_8 1
849 #endif
850 #if defined SUPPORT_PCRE2_16
851 #define SUPPORT_16 1
852 #endif
853 #if defined SUPPORT_PCRE2_32
854 #define SUPPORT_32 1
855 #endif
856 
857 #ifndef SUPPORT_8
858 #define SUPPORT_8 0
859 #endif
860 #ifndef SUPPORT_16
861 #define SUPPORT_16 0
862 #endif
863 #ifndef SUPPORT_32
864 #define SUPPORT_32 0
865 #endif
866 
867 #ifdef EBCDIC
868 #define SUPPORT_EBCDIC 1
869 #define EBCDIC_NL CHAR_LF
870 #else
871 #define SUPPORT_EBCDIC 0
872 #define EBCDIC_NL 0
873 #endif
874 
875 #ifdef NEVER_BACKSLASH_C
876 #define BACKSLASH_C 0
877 #else
878 #define BACKSLASH_C 1
879 #endif
880 
881 typedef struct coptstruct {
882   const char *name;
883   uint32_t    type;
884   uint32_t    value;
885 } coptstruct;
886 
887 enum { CONF_BSR,
888        CONF_FIX,
889        CONF_FIZ,
890        CONF_INT,
891        CONF_NL
892 };
893 
894 static coptstruct coptlist[] = {
895   { "backslash-C", CONF_FIX, BACKSLASH_C },
896   { "bsr",         CONF_BSR, PCRE2_CONFIG_BSR },
897   { "ebcdic",      CONF_FIX, SUPPORT_EBCDIC },
898   { "ebcdic-nl",   CONF_FIZ, EBCDIC_NL },
899   { "jit",         CONF_INT, PCRE2_CONFIG_JIT },
900   { "linksize",    CONF_INT, PCRE2_CONFIG_LINKSIZE },
901   { "newline",     CONF_NL,  PCRE2_CONFIG_NEWLINE },
902   { "pcre2-16",    CONF_FIX, SUPPORT_16 },
903   { "pcre2-32",    CONF_FIX, SUPPORT_32 },
904   { "pcre2-8",     CONF_FIX, SUPPORT_8 },
905   { "unicode",     CONF_INT, PCRE2_CONFIG_UNICODE }
906 };
907 
908 #define COPTLISTCOUNT sizeof(coptlist)/sizeof(coptstruct)
909 
910 #undef SUPPORT_8
911 #undef SUPPORT_16
912 #undef SUPPORT_32
913 #undef SUPPORT_EBCDIC
914 
915 
916 /* ----------------------- Static variables ------------------------ */
917 
918 static FILE *infile;
919 static FILE *outfile;
920 
921 static const void *last_callout_mark;
922 static PCRE2_JIT_STACK *jit_stack = NULL;
923 static size_t jit_stack_size = 0;
924 
925 static BOOL first_callout;
926 static BOOL jit_was_used;
927 static BOOL restrict_for_perl_test = FALSE;
928 static BOOL show_memory = FALSE;
929 
930 static int code_unit_size;                    /* Bytes */
931 static int jitrc;                             /* Return from JIT compile */
932 static int test_mode = DEFAULT_TEST_MODE;
933 static int timeit = 0;
934 static int timeitm = 0;
935 
936 clock_t total_compile_time = 0;
937 clock_t total_jit_compile_time = 0;
938 clock_t total_match_time = 0;
939 
940 static uint32_t dfa_matched;
941 static uint32_t forbid_utf = 0;
942 static uint32_t maxlookbehind;
943 static uint32_t max_oveccount;
944 static uint32_t callout_count;
945 static uint32_t maxcapcount;
946 
947 static uint16_t local_newline_default = 0;
948 
949 static VERSION_TYPE jittarget[VERSION_SIZE];
950 static VERSION_TYPE version[VERSION_SIZE];
951 static VERSION_TYPE uversion[VERSION_SIZE];
952 
953 static patctl def_patctl;
954 static patctl pat_patctl;
955 static datctl def_datctl;
956 static datctl dat_datctl;
957 
958 static void *patstack[PATSTACKSIZE];
959 static int patstacknext = 0;
960 
961 static void *malloclist[MALLOCLISTSIZE];
962 static PCRE2_SIZE malloclistlength[MALLOCLISTSIZE];
963 static uint32_t malloclistptr = 0;
964 
965 #ifdef SUPPORT_PCRE2_8
966 static regex_t preg = { NULL, NULL, 0, 0, 0, 0 };
967 #endif
968 
969 static int *dfa_workspace = NULL;
970 static const uint8_t *locale_tables = NULL;
971 static const uint8_t *use_tables = NULL;
972 static uint8_t locale_name[32];
973 static uint8_t *tables3 = NULL;         /* For binary-loaded tables */
974 static uint32_t loadtables_length = 0;
975 
976 /* We need buffers for building 16/32-bit strings; 8-bit strings don't need
977 rebuilding, but set up the same naming scheme for use in macros. The "buffer"
978 buffer is where all input lines are read. Its size is the same as pbuffer8.
979 Pattern lines are always copied to pbuffer8 for use in callouts, even if they
980 are actually compiled from pbuffer16 or pbuffer32. */
981 
982 static size_t    pbuffer8_size  = 50000;        /* Initial size, bytes */
983 static uint8_t  *pbuffer8 = NULL;
984 static uint8_t  *buffer = NULL;
985 
986 /* The dbuffer is where all processed data lines are put. In non-8-bit modes it
987 is cast as needed. For long data lines it grows as necessary. */
988 
989 static size_t dbuffer_size = 1u << 14;    /* Initial size, bytes */
990 static uint8_t *dbuffer = NULL;
991 
992 
993 /* ---------------- Mode-dependent variables -------------------*/
994 
995 #ifdef SUPPORT_PCRE2_8
996 static pcre2_code_8             *compiled_code8;
997 static pcre2_general_context_8  *general_context8, *general_context_copy8;
998 static pcre2_compile_context_8  *pat_context8, *default_pat_context8;
999 static pcre2_convert_context_8  *con_context8, *default_con_context8;
1000 static pcre2_match_context_8    *dat_context8, *default_dat_context8;
1001 static pcre2_match_data_8       *match_data8;
1002 #endif
1003 
1004 #ifdef SUPPORT_PCRE2_16
1005 static pcre2_code_16            *compiled_code16;
1006 static pcre2_general_context_16 *general_context16, *general_context_copy16;
1007 static pcre2_compile_context_16 *pat_context16, *default_pat_context16;
1008 static pcre2_convert_context_16 *con_context16, *default_con_context16;
1009 static pcre2_match_context_16   *dat_context16, *default_dat_context16;
1010 static pcre2_match_data_16      *match_data16;
1011 static PCRE2_SIZE pbuffer16_size = 0;   /* Set only when needed */
1012 static uint16_t *pbuffer16 = NULL;
1013 #endif
1014 
1015 #ifdef SUPPORT_PCRE2_32
1016 static pcre2_code_32            *compiled_code32;
1017 static pcre2_general_context_32 *general_context32, *general_context_copy32;
1018 static pcre2_compile_context_32 *pat_context32, *default_pat_context32;
1019 static pcre2_convert_context_32 *con_context32, *default_con_context32;
1020 static pcre2_match_context_32   *dat_context32, *default_dat_context32;
1021 static pcre2_match_data_32      *match_data32;
1022 static PCRE2_SIZE pbuffer32_size = 0;   /* Set only when needed */
1023 static uint32_t *pbuffer32 = NULL;
1024 #endif
1025 
1026 
1027 /* ---------------- Macros that work in all modes ----------------- */
1028 
1029 #define CAST8VAR(x) CASTVAR(uint8_t *, x)
1030 #define SET(x,y) SETOP(x,y,=)
1031 #define SETPLUS(x,y) SETOP(x,y,+=)
1032 #define strlen8(x) strlen((char *)x)
1033 
1034 
1035 /* ---------------- Mode-dependent, runtime-testing macros ------------------*/
1036 
1037 /* Define macros for variables and functions that must be selected dynamically
1038 depending on the mode setting (8, 16, 32). These are dependent on which modes
1039 are supported. */
1040 
1041 #if (defined (SUPPORT_PCRE2_8) + defined (SUPPORT_PCRE2_16) + \
1042      defined (SUPPORT_PCRE2_32)) >= 2
1043 
1044 /* ----- All three modes supported ----- */
1045 
1046 #if defined(SUPPORT_PCRE2_8) && defined(SUPPORT_PCRE2_16) && defined(SUPPORT_PCRE2_32)
1047 
1048 #define CASTFLD(t,a,b) ((test_mode == PCRE8_MODE)? (t)(G(a,8)->b) : \
1049   (test_mode == PCRE16_MODE)? (t)(G(a,16)->b) : (t)(G(a,32)->b))
1050 
1051 #define CASTVAR(t,x) ( \
1052   (test_mode == PCRE8_MODE)? (t)G(x,8) : \
1053   (test_mode == PCRE16_MODE)? (t)G(x,16) : (t)G(x,32))
1054 
1055 #define CODE_UNIT(a,b) ( \
1056   (test_mode == PCRE8_MODE)? (uint32_t)(((PCRE2_SPTR8)(a))[b]) : \
1057   (test_mode == PCRE16_MODE)? (uint32_t)(((PCRE2_SPTR16)(a))[b]) : \
1058   (uint32_t)(((PCRE2_SPTR32)(a))[b]))
1059 
1060 #define CONCTXCPY(a,b) \
1061   if (test_mode == PCRE8_MODE) \
1062     memcpy(G(a,8),G(b,8),sizeof(pcre2_convert_context_8)); \
1063   else if (test_mode == PCRE16_MODE) \
1064     memcpy(G(a,16),G(b,16),sizeof(pcre2_convert_context_16)); \
1065   else memcpy(G(a,32),G(b,32),sizeof(pcre2_convert_context_32))
1066 
1067 #define CONVERT_COPY(a,b,c) \
1068   if (test_mode == PCRE8_MODE) \
1069     memcpy(G(a,8),(char *)b,c); \
1070   else if (test_mode == PCRE16_MODE) \
1071     memcpy(G(a,16),(char *)b,(c)*2); \
1072   else if (test_mode == PCRE32_MODE) \
1073     memcpy(G(a,32),(char *)b,(c)*4)
1074 
1075 #define DATCTXCPY(a,b) \
1076   if (test_mode == PCRE8_MODE) \
1077     memcpy(G(a,8),G(b,8),sizeof(pcre2_match_context_8)); \
1078   else if (test_mode == PCRE16_MODE) \
1079     memcpy(G(a,16),G(b,16),sizeof(pcre2_match_context_16)); \
1080   else memcpy(G(a,32),G(b,32),sizeof(pcre2_match_context_32))
1081 
1082 #define FLD(a,b) ((test_mode == PCRE8_MODE)? G(a,8)->b : \
1083   (test_mode == PCRE16_MODE)? G(a,16)->b : G(a,32)->b)
1084 
1085 #define PATCTXCPY(a,b) \
1086   if (test_mode == PCRE8_MODE) \
1087     memcpy(G(a,8),G(b,8),sizeof(pcre2_compile_context_8)); \
1088   else if (test_mode == PCRE16_MODE) \
1089     memcpy(G(a,16),G(b,16),sizeof(pcre2_compile_context_16)); \
1090   else memcpy(G(a,32),G(b,32),sizeof(pcre2_compile_context_32))
1091 
1092 #define PCHARS(lv, p, offset, len, utf, f) \
1093   if (test_mode == PCRE32_MODE) \
1094     lv = pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f); \
1095   else if (test_mode == PCRE16_MODE) \
1096     lv = pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f); \
1097   else \
1098     lv = pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
1099 
1100 #define PCHARSV(p, offset, len, utf, f) \
1101   if (test_mode == PCRE32_MODE) \
1102     (void)pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f); \
1103   else if (test_mode == PCRE16_MODE) \
1104     (void)pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f); \
1105   else \
1106     (void)pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
1107 
1108 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
1109   if (test_mode == PCRE8_MODE) \
1110      a = pcre2_callout_enumerate_8(compiled_code8, \
1111        (int (*)(struct pcre2_callout_enumerate_block_8 *, void *))b,c); \
1112   else if (test_mode == PCRE16_MODE) \
1113      a = pcre2_callout_enumerate_16(compiled_code16, \
1114        (int(*)(struct pcre2_callout_enumerate_block_16 *, void *))b,c); \
1115   else \
1116      a = pcre2_callout_enumerate_32(compiled_code32, \
1117        (int (*)(struct pcre2_callout_enumerate_block_32 *, void *))b,c)
1118 
1119 #define PCRE2_CODE_COPY_FROM_VOID(a,b) \
1120   if (test_mode == PCRE8_MODE) \
1121     G(a,8) = pcre2_code_copy_8(b); \
1122   else if (test_mode == PCRE16_MODE) \
1123     G(a,16) = pcre2_code_copy_16(b); \
1124   else \
1125     G(a,32) = pcre2_code_copy_32(b)
1126 
1127 #define PCRE2_CODE_COPY_TO_VOID(a,b) \
1128   if (test_mode == PCRE8_MODE) \
1129     a = (void *)pcre2_code_copy_8(G(b,8)); \
1130   else if (test_mode == PCRE16_MODE) \
1131     a = (void *)pcre2_code_copy_16(G(b,16)); \
1132   else \
1133     a = (void *)pcre2_code_copy_32(G(b,32))
1134 
1135 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) \
1136   if (test_mode == PCRE8_MODE) \
1137     a = (void *)pcre2_code_copy_with_tables_8(G(b,8)); \
1138   else if (test_mode == PCRE16_MODE) \
1139     a = (void *)pcre2_code_copy_with_tables_16(G(b,16)); \
1140   else \
1141     a = (void *)pcre2_code_copy_with_tables_32(G(b,32))
1142 
1143 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
1144   if (test_mode == PCRE8_MODE) \
1145     G(a,8) = pcre2_compile_8(G(b,8),c,d,e,f,g); \
1146   else if (test_mode == PCRE16_MODE) \
1147     G(a,16) = pcre2_compile_16(G(b,16),c,d,e,f,g); \
1148   else \
1149     G(a,32) = pcre2_compile_32(G(b,32),c,d,e,f,g)
1150 
1151 #define PCRE2_CONVERTED_PATTERN_FREE(a) \
1152   if (test_mode == PCRE8_MODE) pcre2_converted_pattern_free_8((PCRE2_UCHAR8 *)a); \
1153   else if (test_mode == PCRE16_MODE) pcre2_converted_pattern_free_16((PCRE2_UCHAR16 *)a); \
1154   else pcre2_converted_pattern_free_32((PCRE2_UCHAR32 *)a)
1155 
1156 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
1157   if (test_mode == PCRE8_MODE) \
1158     a = pcre2_dfa_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h,i,j); \
1159   else if (test_mode == PCRE16_MODE) \
1160     a = pcre2_dfa_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h,i,j); \
1161   else \
1162     a = pcre2_dfa_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h,i,j)
1163 
1164 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
1165   if (test_mode == PCRE8_MODE) \
1166     r = pcre2_get_error_message_8(a,G(b,8),G(G(b,8),_size)); \
1167   else if (test_mode == PCRE16_MODE) \
1168     r = pcre2_get_error_message_16(a,G(b,16),G(G(b,16),_size/2)); \
1169   else \
1170     r = pcre2_get_error_message_32(a,G(b,32),G(G(b,32),_size/4))
1171 
1172 #define PCRE2_GET_OVECTOR_COUNT(a,b) \
1173   if (test_mode == PCRE8_MODE) \
1174     a = pcre2_get_ovector_count_8(G(b,8)); \
1175   else if (test_mode == PCRE16_MODE) \
1176     a = pcre2_get_ovector_count_16(G(b,16)); \
1177   else \
1178     a = pcre2_get_ovector_count_32(G(b,32))
1179 
1180 #define PCRE2_GET_STARTCHAR(a,b) \
1181   if (test_mode == PCRE8_MODE) \
1182     a = pcre2_get_startchar_8(G(b,8)); \
1183   else if (test_mode == PCRE16_MODE) \
1184     a = pcre2_get_startchar_16(G(b,16)); \
1185   else \
1186     a = pcre2_get_startchar_32(G(b,32))
1187 
1188 #define PCRE2_JIT_COMPILE(r,a,b) \
1189   if (test_mode == PCRE8_MODE) r = pcre2_jit_compile_8(G(a,8),b); \
1190   else if (test_mode == PCRE16_MODE) r = pcre2_jit_compile_16(G(a,16),b); \
1191   else r = pcre2_jit_compile_32(G(a,32),b)
1192 
1193 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) \
1194   if (test_mode == PCRE8_MODE) pcre2_jit_free_unused_memory_8(G(a,8)); \
1195   else if (test_mode == PCRE16_MODE) pcre2_jit_free_unused_memory_16(G(a,16)); \
1196   else pcre2_jit_free_unused_memory_32(G(a,32))
1197 
1198 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
1199   if (test_mode == PCRE8_MODE) \
1200     a = pcre2_jit_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h); \
1201   else if (test_mode == PCRE16_MODE) \
1202     a = pcre2_jit_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h); \
1203   else \
1204     a = pcre2_jit_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
1205 
1206 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
1207   if (test_mode == PCRE8_MODE) \
1208     a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_8(b,c,d); \
1209   else if (test_mode == PCRE16_MODE) \
1210     a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_16(b,c,d); \
1211   else \
1212     a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_32(b,c,d);
1213 
1214 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
1215   if (test_mode == PCRE8_MODE) \
1216     pcre2_jit_stack_assign_8(G(a,8),(pcre2_jit_callback_8)b,c); \
1217   else if (test_mode == PCRE16_MODE) \
1218     pcre2_jit_stack_assign_16(G(a,16),(pcre2_jit_callback_16)b,c); \
1219   else \
1220     pcre2_jit_stack_assign_32(G(a,32),(pcre2_jit_callback_32)b,c);
1221 
1222 #define PCRE2_JIT_STACK_FREE(a) \
1223   if (test_mode == PCRE8_MODE) \
1224     pcre2_jit_stack_free_8((pcre2_jit_stack_8 *)a); \
1225   else if (test_mode == PCRE16_MODE) \
1226     pcre2_jit_stack_free_16((pcre2_jit_stack_16 *)a); \
1227   else \
1228     pcre2_jit_stack_free_32((pcre2_jit_stack_32 *)a);
1229 
1230 #define PCRE2_MAKETABLES(a) \
1231   if (test_mode == PCRE8_MODE) a = pcre2_maketables_8(NULL); \
1232   else if (test_mode == PCRE16_MODE) a = pcre2_maketables_16(NULL); \
1233   else a = pcre2_maketables_32(NULL)
1234 
1235 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
1236   if (test_mode == PCRE8_MODE) \
1237     a = pcre2_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h); \
1238   else if (test_mode == PCRE16_MODE) \
1239     a = pcre2_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h); \
1240   else \
1241     a = pcre2_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
1242 
1243 #define PCRE2_MATCH_DATA_CREATE(a,b,c) \
1244   if (test_mode == PCRE8_MODE) \
1245     G(a,8) = pcre2_match_data_create_8(b,c); \
1246   else if (test_mode == PCRE16_MODE) \
1247     G(a,16) = pcre2_match_data_create_16(b,c); \
1248   else \
1249     G(a,32) = pcre2_match_data_create_32(b,c)
1250 
1251 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
1252   if (test_mode == PCRE8_MODE) \
1253     G(a,8) = pcre2_match_data_create_from_pattern_8(G(b,8),c); \
1254   else if (test_mode == PCRE16_MODE) \
1255     G(a,16) = pcre2_match_data_create_from_pattern_16(G(b,16),c); \
1256   else \
1257     G(a,32) = pcre2_match_data_create_from_pattern_32(G(b,32),c)
1258 
1259 #define PCRE2_MATCH_DATA_FREE(a) \
1260   if (test_mode == PCRE8_MODE) \
1261     pcre2_match_data_free_8(G(a,8)); \
1262   else if (test_mode == PCRE16_MODE) \
1263     pcre2_match_data_free_16(G(a,16)); \
1264   else \
1265     pcre2_match_data_free_32(G(a,32))
1266 
1267 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) \
1268   if (test_mode == PCRE8_MODE) \
1269     a = pcre2_pattern_convert_8(G(b,8),c,d,(PCRE2_UCHAR8 **)e,f,G(g,8)); \
1270   else if (test_mode == PCRE16_MODE) \
1271     a = pcre2_pattern_convert_16(G(b,16),c,d,(PCRE2_UCHAR16 **)e,f,G(g,16)); \
1272   else \
1273     a = pcre2_pattern_convert_32(G(b,32),c,d,(PCRE2_UCHAR32 **)e,f,G(g,32))
1274 
1275 #define PCRE2_PATTERN_INFO(a,b,c,d) \
1276   if (test_mode == PCRE8_MODE) \
1277     a = pcre2_pattern_info_8(G(b,8),c,d); \
1278   else if (test_mode == PCRE16_MODE) \
1279     a = pcre2_pattern_info_16(G(b,16),c,d); \
1280   else \
1281     a = pcre2_pattern_info_32(G(b,32),c,d)
1282 
1283 #define PCRE2_PRINTINT(a) \
1284   if (test_mode == PCRE8_MODE) \
1285     pcre2_printint_8(compiled_code8,outfile,a); \
1286   else if (test_mode == PCRE16_MODE) \
1287     pcre2_printint_16(compiled_code16,outfile,a); \
1288   else \
1289     pcre2_printint_32(compiled_code32,outfile,a)
1290 
1291 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
1292   if (test_mode == PCRE8_MODE) \
1293     r = pcre2_serialize_decode_8((pcre2_code_8 **)a,b,c,G(d,8)); \
1294   else if (test_mode == PCRE16_MODE) \
1295     r = pcre2_serialize_decode_16((pcre2_code_16 **)a,b,c,G(d,16)); \
1296   else \
1297     r = pcre2_serialize_decode_32((pcre2_code_32 **)a,b,c,G(d,32))
1298 
1299 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
1300   if (test_mode == PCRE8_MODE) \
1301     r = pcre2_serialize_encode_8((const pcre2_code_8 **)a,b,c,d,G(e,8)); \
1302   else if (test_mode == PCRE16_MODE) \
1303     r = pcre2_serialize_encode_16((const pcre2_code_16 **)a,b,c,d,G(e,16)); \
1304   else \
1305     r = pcre2_serialize_encode_32((const pcre2_code_32 **)a,b,c,d,G(e,32))
1306 
1307 #define PCRE2_SERIALIZE_FREE(a) \
1308   if (test_mode == PCRE8_MODE) \
1309     pcre2_serialize_free_8(a); \
1310   else if (test_mode == PCRE16_MODE) \
1311     pcre2_serialize_free_16(a); \
1312   else \
1313     pcre2_serialize_free_32(a)
1314 
1315 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
1316   if (test_mode == PCRE8_MODE) \
1317     r = pcre2_serialize_get_number_of_codes_8(a); \
1318   else if (test_mode == PCRE16_MODE) \
1319     r = pcre2_serialize_get_number_of_codes_16(a); \
1320   else \
1321     r = pcre2_serialize_get_number_of_codes_32(a); \
1322 
1323 #define PCRE2_SET_CALLOUT(a,b,c) \
1324   if (test_mode == PCRE8_MODE) \
1325     pcre2_set_callout_8(G(a,8),(int (*)(pcre2_callout_block_8 *, void *))b,c); \
1326   else if (test_mode == PCRE16_MODE) \
1327     pcre2_set_callout_16(G(a,16),(int (*)(pcre2_callout_block_16 *, void *))b,c); \
1328   else \
1329     pcre2_set_callout_32(G(a,32),(int (*)(pcre2_callout_block_32 *, void *))b,c);
1330 
1331 #define PCRE2_SET_CHARACTER_TABLES(a,b) \
1332   if (test_mode == PCRE8_MODE) \
1333     pcre2_set_character_tables_8(G(a,8),b); \
1334   else if (test_mode == PCRE16_MODE) \
1335     pcre2_set_character_tables_16(G(a,16),b); \
1336   else \
1337     pcre2_set_character_tables_32(G(a,32),b)
1338 
1339 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
1340   if (test_mode == PCRE8_MODE) \
1341     pcre2_set_compile_recursion_guard_8(G(a,8),b,c); \
1342   else if (test_mode == PCRE16_MODE) \
1343     pcre2_set_compile_recursion_guard_16(G(a,16),b,c); \
1344   else \
1345     pcre2_set_compile_recursion_guard_32(G(a,32),b,c)
1346 
1347 #define PCRE2_SET_DEPTH_LIMIT(a,b) \
1348   if (test_mode == PCRE8_MODE) \
1349     pcre2_set_depth_limit_8(G(a,8),b); \
1350   else if (test_mode == PCRE16_MODE) \
1351     pcre2_set_depth_limit_16(G(a,16),b); \
1352   else \
1353     pcre2_set_depth_limit_32(G(a,32),b)
1354 
1355 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) \
1356   if (test_mode == PCRE8_MODE) \
1357     r = pcre2_set_glob_separator_8(G(a,8),b); \
1358   else if (test_mode == PCRE16_MODE) \
1359     r = pcre2_set_glob_separator_16(G(a,16),b); \
1360   else \
1361     r = pcre2_set_glob_separator_32(G(a,32),b)
1362 
1363 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) \
1364   if (test_mode == PCRE8_MODE) \
1365     r = pcre2_set_glob_escape_8(G(a,8),b); \
1366   else if (test_mode == PCRE16_MODE) \
1367     r = pcre2_set_glob_escape_16(G(a,16),b); \
1368   else \
1369     r = pcre2_set_glob_escape_32(G(a,32),b)
1370 
1371 #define PCRE2_SET_HEAP_LIMIT(a,b) \
1372   if (test_mode == PCRE8_MODE) \
1373     pcre2_set_heap_limit_8(G(a,8),b); \
1374   else if (test_mode == PCRE16_MODE) \
1375     pcre2_set_heap_limit_16(G(a,16),b); \
1376   else \
1377     pcre2_set_heap_limit_32(G(a,32),b)
1378 
1379 #define PCRE2_SET_MATCH_LIMIT(a,b) \
1380   if (test_mode == PCRE8_MODE) \
1381     pcre2_set_match_limit_8(G(a,8),b); \
1382   else if (test_mode == PCRE16_MODE) \
1383     pcre2_set_match_limit_16(G(a,16),b); \
1384   else \
1385     pcre2_set_match_limit_32(G(a,32),b)
1386 
1387 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) \
1388   if (test_mode == PCRE8_MODE) \
1389     pcre2_set_max_pattern_length_8(G(a,8),b); \
1390   else if (test_mode == PCRE16_MODE) \
1391     pcre2_set_max_pattern_length_16(G(a,16),b); \
1392   else \
1393     pcre2_set_max_pattern_length_32(G(a,32),b)
1394 
1395 #define PCRE2_SET_OFFSET_LIMIT(a,b) \
1396   if (test_mode == PCRE8_MODE) \
1397     pcre2_set_offset_limit_8(G(a,8),b); \
1398   else if (test_mode == PCRE16_MODE) \
1399     pcre2_set_offset_limit_16(G(a,16),b); \
1400   else \
1401     pcre2_set_offset_limit_32(G(a,32),b)
1402 
1403 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) \
1404   if (test_mode == PCRE8_MODE) \
1405     pcre2_set_parens_nest_limit_8(G(a,8),b); \
1406   else if (test_mode == PCRE16_MODE) \
1407     pcre2_set_parens_nest_limit_16(G(a,16),b); \
1408   else \
1409     pcre2_set_parens_nest_limit_32(G(a,32),b)
1410 
1411 #define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
1412   if (test_mode == PCRE8_MODE) \
1413     pcre2_set_substitute_callout_8(G(a,8), \
1414       (int (*)(pcre2_substitute_callout_block_8 *, void *))b,c); \
1415   else if (test_mode == PCRE16_MODE) \
1416     pcre2_set_substitute_callout_16(G(a,16), \
1417       (int (*)(pcre2_substitute_callout_block_16 *, void *))b,c); \
1418   else \
1419     pcre2_set_substitute_callout_32(G(a,32), \
1420       (int (*)(pcre2_substitute_callout_block_32 *, void *))b,c)
1421 
1422 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
1423   if (test_mode == PCRE8_MODE) \
1424     a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h, \
1425       (PCRE2_SPTR8)i,j,(PCRE2_UCHAR8 *)k,l); \
1426   else if (test_mode == PCRE16_MODE) \
1427     a = pcre2_substitute_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h, \
1428       (PCRE2_SPTR16)i,j,(PCRE2_UCHAR16 *)k,l); \
1429   else \
1430     a = pcre2_substitute_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h, \
1431       (PCRE2_SPTR32)i,j,(PCRE2_UCHAR32 *)k,l)
1432 
1433 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
1434   if (test_mode == PCRE8_MODE) \
1435     a = pcre2_substring_copy_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 *)d,e); \
1436   else if (test_mode == PCRE16_MODE) \
1437     a = pcre2_substring_copy_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 *)d,e); \
1438   else \
1439     a = pcre2_substring_copy_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 *)d,e)
1440 
1441 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
1442   if (test_mode == PCRE8_MODE) \
1443     a = pcre2_substring_copy_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 *)d,e); \
1444   else if (test_mode == PCRE16_MODE) \
1445     a = pcre2_substring_copy_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 *)d,e); \
1446   else \
1447     a = pcre2_substring_copy_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 *)d,e)
1448 
1449 #define PCRE2_SUBSTRING_FREE(a) \
1450   if (test_mode == PCRE8_MODE) pcre2_substring_free_8((PCRE2_UCHAR8 *)a); \
1451   else if (test_mode == PCRE16_MODE) \
1452     pcre2_substring_free_16((PCRE2_UCHAR16 *)a); \
1453   else pcre2_substring_free_32((PCRE2_UCHAR32 *)a)
1454 
1455 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
1456   if (test_mode == PCRE8_MODE) \
1457     a = pcre2_substring_get_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 **)d,e); \
1458   else if (test_mode == PCRE16_MODE) \
1459     a = pcre2_substring_get_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 **)d,e); \
1460   else \
1461     a = pcre2_substring_get_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 **)d,e)
1462 
1463 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
1464   if (test_mode == PCRE8_MODE) \
1465     a = pcre2_substring_get_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 **)d,e); \
1466   else if (test_mode == PCRE16_MODE) \
1467     a = pcre2_substring_get_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 **)d,e); \
1468   else \
1469     a = pcre2_substring_get_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 **)d,e)
1470 
1471 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
1472   if (test_mode == PCRE8_MODE) \
1473     a = pcre2_substring_length_byname_8(G(b,8),G(c,8),d); \
1474   else if (test_mode == PCRE16_MODE) \
1475     a = pcre2_substring_length_byname_16(G(b,16),G(c,16),d); \
1476   else \
1477     a = pcre2_substring_length_byname_32(G(b,32),G(c,32),d)
1478 
1479 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
1480   if (test_mode == PCRE8_MODE) \
1481     a = pcre2_substring_length_bynumber_8(G(b,8),c,d); \
1482   else if (test_mode == PCRE16_MODE) \
1483     a = pcre2_substring_length_bynumber_16(G(b,16),c,d); \
1484   else \
1485     a = pcre2_substring_length_bynumber_32(G(b,32),c,d)
1486 
1487 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
1488   if (test_mode == PCRE8_MODE) \
1489     a = pcre2_substring_list_get_8(G(b,8),(PCRE2_UCHAR8 ***)c,d); \
1490   else if (test_mode == PCRE16_MODE) \
1491     a = pcre2_substring_list_get_16(G(b,16),(PCRE2_UCHAR16 ***)c,d); \
1492   else \
1493     a = pcre2_substring_list_get_32(G(b,32),(PCRE2_UCHAR32 ***)c,d)
1494 
1495 #define PCRE2_SUBSTRING_LIST_FREE(a) \
1496   if (test_mode == PCRE8_MODE) \
1497     pcre2_substring_list_free_8((PCRE2_SPTR8 *)a); \
1498   else if (test_mode == PCRE16_MODE) \
1499     pcre2_substring_list_free_16((PCRE2_SPTR16 *)a); \
1500   else \
1501     pcre2_substring_list_free_32((PCRE2_SPTR32 *)a)
1502 
1503 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
1504   if (test_mode == PCRE8_MODE) \
1505     a = pcre2_substring_number_from_name_8(G(b,8),G(c,8)); \
1506   else if (test_mode == PCRE16_MODE) \
1507     a = pcre2_substring_number_from_name_16(G(b,16),G(c,16)); \
1508   else \
1509     a = pcre2_substring_number_from_name_32(G(b,32),G(c,32))
1510 
1511 #define PTR(x) ( \
1512   (test_mode == PCRE8_MODE)? (void *)G(x,8) : \
1513   (test_mode == PCRE16_MODE)? (void *)G(x,16) : \
1514   (void *)G(x,32))
1515 
1516 #define SETFLD(x,y,z) \
1517   if (test_mode == PCRE8_MODE) G(x,8)->y = z; \
1518   else if (test_mode == PCRE16_MODE) G(x,16)->y = z; \
1519   else G(x,32)->y = z
1520 
1521 #define SETFLDVEC(x,y,v,z) \
1522   if (test_mode == PCRE8_MODE) G(x,8)->y[v] = z; \
1523   else if (test_mode == PCRE16_MODE) G(x,16)->y[v] = z; \
1524   else G(x,32)->y[v] = z
1525 
1526 #define SETOP(x,y,z) \
1527   if (test_mode == PCRE8_MODE) G(x,8) z y; \
1528   else if (test_mode == PCRE16_MODE) G(x,16) z y; \
1529   else G(x,32) z y
1530 
1531 #define SETCASTPTR(x,y) \
1532   if (test_mode == PCRE8_MODE) \
1533     G(x,8) = (uint8_t *)(y); \
1534   else if (test_mode == PCRE16_MODE) \
1535     G(x,16) = (uint16_t *)(y); \
1536   else \
1537     G(x,32) = (uint32_t *)(y)
1538 
1539 #define STRLEN(p) ((test_mode == PCRE8_MODE)? ((int)strlen((char *)p)) : \
1540   (test_mode == PCRE16_MODE)? ((int)strlen16((PCRE2_SPTR16)p)) : \
1541   ((int)strlen32((PCRE2_SPTR32)p)))
1542 
1543 #define SUB1(a,b) \
1544   if (test_mode == PCRE8_MODE) G(a,8)(G(b,8)); \
1545   else if (test_mode == PCRE16_MODE) G(a,16)(G(b,16)); \
1546   else G(a,32)(G(b,32))
1547 
1548 #define SUB2(a,b,c) \
1549   if (test_mode == PCRE8_MODE) G(a,8)(G(b,8),G(c,8)); \
1550   else if (test_mode == PCRE16_MODE) G(a,16)(G(b,16),G(c,16)); \
1551   else G(a,32)(G(b,32),G(c,32))
1552 
1553 #define TEST(x,r,y) ( \
1554   (test_mode == PCRE8_MODE && G(x,8) r (y)) || \
1555   (test_mode == PCRE16_MODE && G(x,16) r (y)) || \
1556   (test_mode == PCRE32_MODE && G(x,32) r (y)))
1557 
1558 #define TESTFLD(x,f,r,y) ( \
1559   (test_mode == PCRE8_MODE && G(x,8)->f r (y)) || \
1560   (test_mode == PCRE16_MODE && G(x,16)->f r (y)) || \
1561   (test_mode == PCRE32_MODE && G(x,32)->f r (y)))
1562 
1563 
1564 /* ----- Two out of three modes are supported ----- */
1565 
1566 #else
1567 
1568 /* We can use some macro trickery to make a single set of definitions work in
1569 the three different cases. */
1570 
1571 /* ----- 32-bit and 16-bit but not 8-bit supported ----- */
1572 
1573 #if defined(SUPPORT_PCRE2_32) && defined(SUPPORT_PCRE2_16)
1574 #define BITONE 32
1575 #define BITTWO 16
1576 
1577 /* ----- 32-bit and 8-bit but not 16-bit supported ----- */
1578 
1579 #elif defined(SUPPORT_PCRE2_32) && defined(SUPPORT_PCRE2_8)
1580 #define BITONE 32
1581 #define BITTWO 8
1582 
1583 /* ----- 16-bit and 8-bit but not 32-bit supported ----- */
1584 
1585 #else
1586 #define BITONE 16
1587 #define BITTWO 8
1588 #endif
1589 
1590 
1591 /* ----- Common macros for two-mode cases ----- */
1592 
1593 #define BYTEONE (BITONE/8)
1594 #define BYTETWO (BITTWO/8)
1595 
1596 #define CASTFLD(t,a,b) \
1597   ((test_mode == G(G(PCRE,BITONE),_MODE))? (t)(G(a,BITONE)->b) : \
1598     (t)(G(a,BITTWO)->b))
1599 
1600 #define CASTVAR(t,x) ( \
1601   (test_mode == G(G(PCRE,BITONE),_MODE))? \
1602     (t)G(x,BITONE) : (t)G(x,BITTWO))
1603 
1604 #define CODE_UNIT(a,b) ( \
1605   (test_mode == G(G(PCRE,BITONE),_MODE))? \
1606   (uint32_t)(((G(PCRE2_SPTR,BITONE))(a))[b]) : \
1607   (uint32_t)(((G(PCRE2_SPTR,BITTWO))(a))[b]))
1608 
1609 #define CONCTXCPY(a,b) \
1610   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1611     memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_convert_context_,BITONE))); \
1612   else \
1613     memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_convert_context_,BITTWO)))
1614 
1615 #define CONVERT_COPY(a,b,c) \
1616   (test_mode == G(G(PCRE,BITONE),_MODE))? \
1617   memcpy(G(a,BITONE),(char *)b,(c)*BYTEONE) : \
1618   memcpy(G(a,BITTWO),(char *)b,(c)*BYTETWO)
1619 
1620 #define DATCTXCPY(a,b) \
1621   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1622     memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_match_context_,BITONE))); \
1623   else \
1624     memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_match_context_,BITTWO)))
1625 
1626 #define FLD(a,b) \
1627   ((test_mode == G(G(PCRE,BITONE),_MODE))? G(a,BITONE)->b : G(a,BITTWO)->b)
1628 
1629 #define PATCTXCPY(a,b) \
1630   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1631     memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_compile_context_,BITONE))); \
1632   else \
1633     memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_compile_context_,BITTWO)))
1634 
1635 #define PCHARS(lv, p, offset, len, utf, f) \
1636   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1637     lv = G(pchars,BITONE)((G(PCRE2_SPTR,BITONE))(p)+offset, len, utf, f); \
1638   else \
1639     lv = G(pchars,BITTWO)((G(PCRE2_SPTR,BITTWO))(p)+offset, len, utf, f)
1640 
1641 #define PCHARSV(p, offset, len, utf, f) \
1642   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1643     (void)G(pchars,BITONE)((G(PCRE2_SPTR,BITONE))(p)+offset, len, utf, f); \
1644   else \
1645     (void)G(pchars,BITTWO)((G(PCRE2_SPTR,BITTWO))(p)+offset, len, utf, f)
1646 
1647 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
1648   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1649      a = G(pcre2_callout_enumerate,BITONE)(G(compiled_code,BITONE), \
1650        (int (*)(struct G(pcre2_callout_enumerate_block_,BITONE) *, void *))b,c); \
1651   else \
1652      a = G(pcre2_callout_enumerate,BITTWO)(G(compiled_code,BITTWO), \
1653        (int (*)(struct G(pcre2_callout_enumerate_block_,BITTWO) *, void *))b,c)
1654 
1655 #define PCRE2_CODE_COPY_FROM_VOID(a,b) \
1656   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1657     G(a,BITONE) = G(pcre2_code_copy_,BITONE)(b); \
1658   else \
1659     G(a,BITTWO) = G(pcre2_code_copy_,BITTWO)(b)
1660 
1661 #define PCRE2_CODE_COPY_TO_VOID(a,b) \
1662   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1663     a = (void *)G(pcre2_code_copy_,BITONE)(G(b,BITONE)); \
1664   else \
1665     a = (void *)G(pcre2_code_copy_,BITTWO)(G(b,BITTWO))
1666 
1667 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) \
1668   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1669     a = (void *)G(pcre2_code_copy_with_tables_,BITONE)(G(b,BITONE)); \
1670   else \
1671     a = (void *)G(pcre2_code_copy_with_tables_,BITTWO)(G(b,BITTWO))
1672 
1673 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
1674   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1675     G(a,BITONE) = G(pcre2_compile_,BITONE)(G(b,BITONE),c,d,e,f,g); \
1676   else \
1677     G(a,BITTWO) = G(pcre2_compile_,BITTWO)(G(b,BITTWO),c,d,e,f,g)
1678 
1679 #define PCRE2_CONVERTED_PATTERN_FREE(a) \
1680   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1681     G(pcre2_converted_pattern_free_,BITONE)((G(PCRE2_UCHAR,BITONE) *)a); \
1682   else \
1683     G(pcre2_converted_pattern_free_,BITTWO)((G(PCRE2_UCHAR,BITTWO) *)a)
1684 
1685 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
1686   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1687     a = G(pcre2_dfa_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1688       G(g,BITONE),h,i,j); \
1689   else \
1690     a = G(pcre2_dfa_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1691       G(g,BITTWO),h,i,j)
1692 
1693 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
1694   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1695     r = G(pcre2_get_error_message_,BITONE)(a,G(b,BITONE),G(G(b,BITONE),_size/BYTEONE)); \
1696   else \
1697     r = G(pcre2_get_error_message_,BITTWO)(a,G(b,BITTWO),G(G(b,BITTWO),_size/BYTETWO))
1698 
1699 #define PCRE2_GET_OVECTOR_COUNT(a,b) \
1700   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1701     a = G(pcre2_get_ovector_count_,BITONE)(G(b,BITONE)); \
1702   else \
1703     a = G(pcre2_get_ovector_count_,BITTWO)(G(b,BITTWO))
1704 
1705 #define PCRE2_GET_STARTCHAR(a,b) \
1706   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1707     a = G(pcre2_get_startchar_,BITONE)(G(b,BITONE)); \
1708   else \
1709     a = G(pcre2_get_startchar_,BITTWO)(G(b,BITTWO))
1710 
1711 #define PCRE2_JIT_COMPILE(r,a,b) \
1712   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1713     r = G(pcre2_jit_compile_,BITONE)(G(a,BITONE),b); \
1714   else \
1715     r = G(pcre2_jit_compile_,BITTWO)(G(a,BITTWO),b)
1716 
1717 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) \
1718   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1719     G(pcre2_jit_free_unused_memory_,BITONE)(G(a,BITONE)); \
1720   else \
1721     G(pcre2_jit_free_unused_memory_,BITTWO)(G(a,BITTWO))
1722 
1723 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
1724   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1725     a = G(pcre2_jit_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1726       G(g,BITONE),h); \
1727   else \
1728     a = G(pcre2_jit_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1729       G(g,BITTWO),h)
1730 
1731 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
1732   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1733     a = (PCRE2_JIT_STACK *)G(pcre2_jit_stack_create_,BITONE)(b,c,d); \
1734   else \
1735     a = (PCRE2_JIT_STACK *)G(pcre2_jit_stack_create_,BITTWO)(b,c,d); \
1736 
1737 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
1738   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1739     G(pcre2_jit_stack_assign_,BITONE)(G(a,BITONE),(G(pcre2_jit_callback_,BITONE))b,c); \
1740   else \
1741     G(pcre2_jit_stack_assign_,BITTWO)(G(a,BITTWO),(G(pcre2_jit_callback_,BITTWO))b,c);
1742 
1743 #define PCRE2_JIT_STACK_FREE(a) \
1744   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1745     G(pcre2_jit_stack_free_,BITONE)((G(pcre2_jit_stack_,BITONE) *)a); \
1746   else \
1747     G(pcre2_jit_stack_free_,BITTWO)((G(pcre2_jit_stack_,BITTWO) *)a);
1748 
1749 #define PCRE2_MAKETABLES(a) \
1750   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1751     a = G(pcre2_maketables_,BITONE)(NULL); \
1752   else \
1753     a = G(pcre2_maketables_,BITTWO)(NULL)
1754 
1755 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
1756   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1757     a = G(pcre2_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1758       G(g,BITONE),h); \
1759   else \
1760     a = G(pcre2_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1761       G(g,BITTWO),h)
1762 
1763 #define PCRE2_MATCH_DATA_CREATE(a,b,c) \
1764   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1765     G(a,BITONE) = G(pcre2_match_data_create_,BITONE)(b,c); \
1766   else \
1767     G(a,BITTWO) = G(pcre2_match_data_create_,BITTWO)(b,c)
1768 
1769 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
1770   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1771     G(a,BITONE) = G(pcre2_match_data_create_from_pattern_,BITONE)(G(b,BITONE),c); \
1772   else \
1773     G(a,BITTWO) = G(pcre2_match_data_create_from_pattern_,BITTWO)(G(b,BITTWO),c)
1774 
1775 #define PCRE2_MATCH_DATA_FREE(a) \
1776   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1777     G(pcre2_match_data_free_,BITONE)(G(a,BITONE)); \
1778   else \
1779     G(pcre2_match_data_free_,BITTWO)(G(a,BITTWO))
1780 
1781 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) \
1782   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1783     a = G(pcre2_pattern_convert_,BITONE)(G(b,BITONE),c,d,(G(PCRE2_UCHAR,BITONE) **)e,f,G(g,BITONE)); \
1784   else \
1785     a = G(pcre2_pattern_convert_,BITTWO)(G(b,BITTWO),c,d,(G(PCRE2_UCHAR,BITTWO) **)e,f,G(g,BITTWO))
1786 
1787 #define PCRE2_PATTERN_INFO(a,b,c,d) \
1788   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1789     a = G(pcre2_pattern_info_,BITONE)(G(b,BITONE),c,d); \
1790   else \
1791     a = G(pcre2_pattern_info_,BITTWO)(G(b,BITTWO),c,d)
1792 
1793 #define PCRE2_PRINTINT(a) \
1794  if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1795     G(pcre2_printint_,BITONE)(G(compiled_code,BITONE),outfile,a); \
1796   else \
1797     G(pcre2_printint_,BITTWO)(G(compiled_code,BITTWO),outfile,a)
1798 
1799 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
1800  if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1801     r = G(pcre2_serialize_decode_,BITONE)((G(pcre2_code_,BITONE) **)a,b,c,G(d,BITONE)); \
1802   else \
1803     r = G(pcre2_serialize_decode_,BITTWO)((G(pcre2_code_,BITTWO) **)a,b,c,G(d,BITTWO))
1804 
1805 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
1806  if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1807     r = G(pcre2_serialize_encode_,BITONE)((G(const pcre2_code_,BITONE) **)a,b,c,d,G(e,BITONE)); \
1808   else \
1809     r = G(pcre2_serialize_encode_,BITTWO)((G(const pcre2_code_,BITTWO) **)a,b,c,d,G(e,BITTWO))
1810 
1811 #define PCRE2_SERIALIZE_FREE(a) \
1812  if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1813     G(pcre2_serialize_free_,BITONE)(a); \
1814   else \
1815     G(pcre2_serialize_free_,BITTWO)(a)
1816 
1817 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
1818  if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1819     r = G(pcre2_serialize_get_number_of_codes_,BITONE)(a); \
1820   else \
1821     r = G(pcre2_serialize_get_number_of_codes_,BITTWO)(a)
1822 
1823 #define PCRE2_SET_CALLOUT(a,b,c) \
1824   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1825     G(pcre2_set_callout_,BITONE)(G(a,BITONE), \
1826       (int (*)(G(pcre2_callout_block_,BITONE) *, void *))b,c); \
1827   else \
1828     G(pcre2_set_callout_,BITTWO)(G(a,BITTWO), \
1829       (int (*)(G(pcre2_callout_block_,BITTWO) *, void *))b,c);
1830 
1831 #define PCRE2_SET_CHARACTER_TABLES(a,b) \
1832   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1833     G(pcre2_set_character_tables_,BITONE)(G(a,BITONE),b); \
1834   else \
1835     G(pcre2_set_character_tables_,BITTWO)(G(a,BITTWO),b)
1836 
1837 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
1838   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1839     G(pcre2_set_compile_recursion_guard_,BITONE)(G(a,BITONE),b,c); \
1840   else \
1841     G(pcre2_set_compile_recursion_guard_,BITTWO)(G(a,BITTWO),b,c)
1842 
1843 #define PCRE2_SET_DEPTH_LIMIT(a,b) \
1844   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1845     G(pcre2_set_depth_limit_,BITONE)(G(a,BITONE),b); \
1846   else \
1847     G(pcre2_set_depth_limit_,BITTWO)(G(a,BITTWO),b)
1848 
1849 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) \
1850   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1851     r = G(pcre2_set_glob_escape_,BITONE)(G(a,BITONE),b); \
1852   else \
1853     r = G(pcre2_set_glob_escape_,BITTWO)(G(a,BITTWO),b)
1854 
1855 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) \
1856   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1857     r = G(pcre2_set_glob_separator_,BITONE)(G(a,BITONE),b); \
1858   else \
1859     r = G(pcre2_set_glob_separator_,BITTWO)(G(a,BITTWO),b)
1860 
1861 #define PCRE2_SET_HEAP_LIMIT(a,b) \
1862   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1863     G(pcre2_set_heap_limit_,BITONE)(G(a,BITONE),b); \
1864   else \
1865     G(pcre2_set_heap_limit_,BITTWO)(G(a,BITTWO),b)
1866 
1867 #define PCRE2_SET_MATCH_LIMIT(a,b) \
1868   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1869     G(pcre2_set_match_limit_,BITONE)(G(a,BITONE),b); \
1870   else \
1871     G(pcre2_set_match_limit_,BITTWO)(G(a,BITTWO),b)
1872 
1873 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) \
1874   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1875     G(pcre2_set_max_pattern_length_,BITONE)(G(a,BITONE),b); \
1876   else \
1877     G(pcre2_set_max_pattern_length_,BITTWO)(G(a,BITTWO),b)
1878 
1879 #define PCRE2_SET_OFFSET_LIMIT(a,b) \
1880   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1881     G(pcre2_set_offset_limit_,BITONE)(G(a,BITONE),b); \
1882   else \
1883     G(pcre2_set_offset_limit_,BITTWO)(G(a,BITTWO),b)
1884 
1885 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) \
1886   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1887     G(pcre2_set_parens_nest_limit_,BITONE)(G(a,BITONE),b); \
1888   else \
1889     G(pcre2_set_parens_nest_limit_,BITTWO)(G(a,BITTWO),b)
1890 
1891 #define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
1892   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1893     G(pcre2_set_substitute_callout_,BITONE)(G(a,BITONE), \
1894       (int (*)(G(pcre2_substitute_callout_block_,BITONE) *, void *))b,c); \
1895   else \
1896     G(pcre2_set_substitute_callout_,BITTWO)(G(a,BITTWO), \
1897       (int (*)(G(pcre2_substitute_callout_block_,BITTWO) *, void *))b,c)
1898 
1899 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
1900   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1901     a = G(pcre2_substitute_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1902       G(g,BITONE),h,(G(PCRE2_SPTR,BITONE))i,j, \
1903       (G(PCRE2_UCHAR,BITONE) *)k,l); \
1904   else \
1905     a = G(pcre2_substitute_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1906       G(g,BITTWO),h,(G(PCRE2_SPTR,BITTWO))i,j, \
1907       (G(PCRE2_UCHAR,BITTWO) *)k,l)
1908 
1909 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
1910   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1911     a = G(pcre2_substring_copy_byname_,BITONE)(G(b,BITONE),G(c,BITONE),\
1912       (G(PCRE2_UCHAR,BITONE) *)d,e); \
1913   else \
1914     a = G(pcre2_substring_copy_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),\
1915       (G(PCRE2_UCHAR,BITTWO) *)d,e)
1916 
1917 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
1918   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1919     a = G(pcre2_substring_copy_bynumber_,BITONE)(G(b,BITONE),c,\
1920       (G(PCRE2_UCHAR,BITONE) *)d,e); \
1921   else \
1922     a = G(pcre2_substring_copy_bynumber_,BITTWO)(G(b,BITTWO),c,\
1923       (G(PCRE2_UCHAR,BITTWO) *)d,e)
1924 
1925 #define PCRE2_SUBSTRING_FREE(a) \
1926   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1927     G(pcre2_substring_free_,BITONE)((G(PCRE2_UCHAR,BITONE) *)a); \
1928   else G(pcre2_substring_free_,BITTWO)((G(PCRE2_UCHAR,BITTWO) *)a)
1929 
1930 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
1931   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1932     a = G(pcre2_substring_get_byname_,BITONE)(G(b,BITONE),G(c,BITONE),\
1933       (G(PCRE2_UCHAR,BITONE) **)d,e); \
1934   else \
1935     a = G(pcre2_substring_get_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),\
1936       (G(PCRE2_UCHAR,BITTWO) **)d,e)
1937 
1938 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
1939   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1940     a = G(pcre2_substring_get_bynumber_,BITONE)(G(b,BITONE),c,\
1941       (G(PCRE2_UCHAR,BITONE) **)d,e); \
1942   else \
1943     a = G(pcre2_substring_get_bynumber_,BITTWO)(G(b,BITTWO),c,\
1944       (G(PCRE2_UCHAR,BITTWO) **)d,e)
1945 
1946 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
1947   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1948     a = G(pcre2_substring_length_byname_,BITONE)(G(b,BITONE),G(c,BITONE),d); \
1949   else \
1950     a = G(pcre2_substring_length_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),d)
1951 
1952 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
1953   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1954     a = G(pcre2_substring_length_bynumber_,BITONE)(G(b,BITONE),c,d); \
1955   else \
1956     a = G(pcre2_substring_length_bynumber_,BITTWO)(G(b,BITTWO),c,d)
1957 
1958 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
1959   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1960     a = G(pcre2_substring_list_get_,BITONE)(G(b,BITONE), \
1961       (G(PCRE2_UCHAR,BITONE) ***)c,d); \
1962   else \
1963     a = G(pcre2_substring_list_get_,BITTWO)(G(b,BITTWO), \
1964       (G(PCRE2_UCHAR,BITTWO) ***)c,d)
1965 
1966 #define PCRE2_SUBSTRING_LIST_FREE(a) \
1967   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1968     G(pcre2_substring_list_free_,BITONE)((G(PCRE2_SPTR,BITONE) *)a); \
1969   else \
1970     G(pcre2_substring_list_free_,BITTWO)((G(PCRE2_SPTR,BITTWO) *)a)
1971 
1972 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
1973   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1974     a = G(pcre2_substring_number_from_name_,BITONE)(G(b,BITONE),G(c,BITONE)); \
1975   else \
1976     a = G(pcre2_substring_number_from_name_,BITTWO)(G(b,BITTWO),G(c,BITTWO))
1977 
1978 #define PTR(x) ( \
1979   (test_mode == G(G(PCRE,BITONE),_MODE))? (void *)G(x,BITONE) : \
1980   (void *)G(x,BITTWO))
1981 
1982 #define SETFLD(x,y,z) \
1983   if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE)->y = z; \
1984   else G(x,BITTWO)->y = z
1985 
1986 #define SETFLDVEC(x,y,v,z) \
1987   if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE)->y[v] = z; \
1988   else G(x,BITTWO)->y[v] = z
1989 
1990 #define SETOP(x,y,z) \
1991   if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE) z y; \
1992   else G(x,BITTWO) z y
1993 
1994 #define SETCASTPTR(x,y) \
1995   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1996     G(x,BITONE) = (G(G(uint,BITONE),_t) *)(y); \
1997   else \
1998     G(x,BITTWO) = (G(G(uint,BITTWO),_t) *)(y)
1999 
2000 #define STRLEN(p) ((test_mode == G(G(PCRE,BITONE),_MODE))? \
2001   G(strlen,BITONE)((G(PCRE2_SPTR,BITONE))p) : \
2002   G(strlen,BITTWO)((G(PCRE2_SPTR,BITTWO))p))
2003 
2004 #define SUB1(a,b) \
2005   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
2006     G(a,BITONE)(G(b,BITONE)); \
2007   else \
2008     G(a,BITTWO)(G(b,BITTWO))
2009 
2010 #define SUB2(a,b,c) \
2011   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
2012     G(a,BITONE))(G(b,BITONE),G(c,BITONE)); \
2013   else \
2014     G(a,BITTWO))(G(b,BITTWO),G(c,BITTWO))
2015 
2016 #define TEST(x,r,y) ( \
2017   (test_mode == G(G(PCRE,BITONE),_MODE) && G(x,BITONE) r (y)) || \
2018   (test_mode == G(G(PCRE,BITTWO),_MODE) && G(x,BITTWO) r (y)))
2019 
2020 #define TESTFLD(x,f,r,y) ( \
2021   (test_mode == G(G(PCRE,BITONE),_MODE) && G(x,BITONE)->f r (y)) || \
2022   (test_mode == G(G(PCRE,BITTWO),_MODE) && G(x,BITTWO)->f r (y)))
2023 
2024 
2025 #endif  /* Two out of three modes */
2026 
2027 /* ----- End of cases where more than one mode is supported ----- */
2028 
2029 
2030 /* ----- Only 8-bit mode is supported ----- */
2031 
2032 #elif defined SUPPORT_PCRE2_8
2033 #define CASTFLD(t,a,b) (t)(G(a,8)->b)
2034 #define CASTVAR(t,x) (t)G(x,8)
2035 #define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR8)(a))[b])
2036 #define CONCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_convert_context_8))
2037 #define CONVERT_COPY(a,b,c) memcpy(G(a,8),(char *)b, c)
2038 #define DATCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_match_context_8))
2039 #define FLD(a,b) G(a,8)->b
2040 #define PATCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_compile_context_8))
2041 #define PCHARS(lv, p, offset, len, utf, f) \
2042   lv = pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
2043 #define PCHARSV(p, offset, len, utf, f) \
2044   (void)pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
2045 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
2046    a = pcre2_callout_enumerate_8(compiled_code8, \
2047      (int (*)(struct pcre2_callout_enumerate_block_8 *, void *))b,c)
2048 #define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,8) = pcre2_code_copy_8(b)
2049 #define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_8(G(b,8))
2050 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_8(G(b,8))
2051 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
2052   G(a,8) = pcre2_compile_8(G(b,8),c,d,e,f,g)
2053 #define PCRE2_CONVERTED_PATTERN_FREE(a) \
2054   pcre2_converted_pattern_free_8((PCRE2_UCHAR8 *)a)
2055 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
2056   a = pcre2_dfa_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h,i,j)
2057 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
2058   r = pcre2_get_error_message_8(a,G(b,8),G(G(b,8),_size))
2059 #define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_8(G(b,8))
2060 #define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_8(G(b,8))
2061 #define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_8(G(a,8),b)
2062 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_8(G(a,8))
2063 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
2064   a = pcre2_jit_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h)
2065 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
2066   a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_8(b,c,d);
2067 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
2068   pcre2_jit_stack_assign_8(G(a,8),(pcre2_jit_callback_8)b,c);
2069 #define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_8((pcre2_jit_stack_8 *)a);
2070 #define PCRE2_MAKETABLES(a) a = pcre2_maketables_8(NULL)
2071 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
2072   a = pcre2_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h)
2073 #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,8) = pcre2_match_data_create_8(b,c)
2074 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
2075   G(a,8) = pcre2_match_data_create_from_pattern_8(G(b,8),c)
2076 #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_8(G(a,8))
2077 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) a = pcre2_pattern_convert_8(G(b,8),c,d,(PCRE2_UCHAR8 **)e,f,G(g,8))
2078 #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_8(G(b,8),c,d)
2079 #define PCRE2_PRINTINT(a) pcre2_printint_8(compiled_code8,outfile,a)
2080 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
2081   r = pcre2_serialize_decode_8((pcre2_code_8 **)a,b,c,G(d,8))
2082 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
2083   r = pcre2_serialize_encode_8((const pcre2_code_8 **)a,b,c,d,G(e,8))
2084 #define PCRE2_SERIALIZE_FREE(a) pcre2_serialize_free_8(a)
2085 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
2086   r = pcre2_serialize_get_number_of_codes_8(a)
2087 #define PCRE2_SET_CALLOUT(a,b,c) \
2088   pcre2_set_callout_8(G(a,8),(int (*)(pcre2_callout_block_8 *, void *))b,c)
2089 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_8(G(a,8),b)
2090 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
2091   pcre2_set_compile_recursion_guard_8(G(a,8),b,c)
2092 #define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_8(G(a,8),b)
2093 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) r = pcre2_set_glob_escape_8(G(a,8),b)
2094 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_8(G(a,8),b)
2095 #define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_8(G(a,8),b)
2096 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_8(G(a,8),b)
2097 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_8(G(a,8),b)
2098 #define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_8(G(a,8),b)
2099 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_8(G(a,8),b)
2100 #define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
2101   pcre2_set_substitute_callout_8(G(a,8), \
2102     (int (*)(pcre2_substitute_callout_block_8 *, void *))b,c)
2103 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
2104   a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h, \
2105     (PCRE2_SPTR8)i,j,(PCRE2_UCHAR8 *)k,l)
2106 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
2107   a = pcre2_substring_copy_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 *)d,e)
2108 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
2109   a = pcre2_substring_copy_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 *)d,e)
2110 #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_8((PCRE2_UCHAR8 *)a)
2111 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
2112   a = pcre2_substring_get_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 **)d,e)
2113 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
2114   a = pcre2_substring_get_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 **)d,e)
2115 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
2116     a = pcre2_substring_length_byname_8(G(b,8),G(c,8),d)
2117 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
2118     a = pcre2_substring_length_bynumber_8(G(b,8),c,d)
2119 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
2120   a = pcre2_substring_list_get_8(G(b,8),(PCRE2_UCHAR8 ***)c,d)
2121 #define PCRE2_SUBSTRING_LIST_FREE(a) \
2122   pcre2_substring_list_free_8((PCRE2_SPTR8 *)a)
2123 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
2124   a = pcre2_substring_number_from_name_8(G(b,8),G(c,8));
2125 #define PTR(x) (void *)G(x,8)
2126 #define SETFLD(x,y,z) G(x,8)->y = z
2127 #define SETFLDVEC(x,y,v,z) G(x,8)->y[v] = z
2128 #define SETOP(x,y,z) G(x,8) z y
2129 #define SETCASTPTR(x,y) G(x,8) = (uint8_t *)(y)
2130 #define STRLEN(p) (int)strlen((char *)p)
2131 #define SUB1(a,b) G(a,8)(G(b,8))
2132 #define SUB2(a,b,c) G(a,8)(G(b,8),G(c,8))
2133 #define TEST(x,r,y) (G(x,8) r (y))
2134 #define TESTFLD(x,f,r,y) (G(x,8)->f r (y))
2135 
2136 
2137 /* ----- Only 16-bit mode is supported ----- */
2138 
2139 #elif defined SUPPORT_PCRE2_16
2140 #define CASTFLD(t,a,b) (t)(G(a,16)->b)
2141 #define CASTVAR(t,x) (t)G(x,16)
2142 #define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR16)(a))[b])
2143 #define CONCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_convert_context_16))
2144 #define CONVERT_COPY(a,b,c) memcpy(G(a,16),(char *)b, (c)*2)
2145 #define DATCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_match_context_16))
2146 #define FLD(a,b) G(a,16)->b
2147 #define PATCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_compile_context_16))
2148 #define PCHARS(lv, p, offset, len, utf, f) \
2149   lv = pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f)
2150 #define PCHARSV(p, offset, len, utf, f) \
2151   (void)pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f)
2152 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
2153    a = pcre2_callout_enumerate_16(compiled_code16, \
2154      (int (*)(struct pcre2_callout_enumerate_block_16 *, void *))b,c)
2155 #define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,16) = pcre2_code_copy_16(b)
2156 #define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_16(G(b,16))
2157 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_16(G(b,16))
2158 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
2159   G(a,16) = pcre2_compile_16(G(b,16),c,d,e,f,g)
2160 #define PCRE2_CONVERTED_PATTERN_FREE(a) \
2161   pcre2_converted_pattern_free_16((PCRE2_UCHAR16 *)a)
2162 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
2163   a = pcre2_dfa_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h,i,j)
2164 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
2165   r = pcre2_get_error_message_16(a,G(b,16),G(G(b,16),_size/2))
2166 #define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_16(G(b,16))
2167 #define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_16(G(b,16))
2168 #define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_16(G(a,16),b)
2169 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_16(G(a,16))
2170 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
2171   a = pcre2_jit_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h)
2172 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
2173   a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_16(b,c,d);
2174 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
2175   pcre2_jit_stack_assign_16(G(a,16),(pcre2_jit_callback_16)b,c);
2176 #define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_16((pcre2_jit_stack_16 *)a);
2177 #define PCRE2_MAKETABLES(a) a = pcre2_maketables_16(NULL)
2178 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
2179   a = pcre2_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h)
2180 #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,16) = pcre2_match_data_create_16(b,c)
2181 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
2182   G(a,16) = pcre2_match_data_create_from_pattern_16(G(b,16),c)
2183 #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_16(G(a,16))
2184 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) a = pcre2_pattern_convert_16(G(b,16),c,d,(PCRE2_UCHAR16 **)e,f,G(g,16))
2185 #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_16(G(b,16),c,d)
2186 #define PCRE2_PRINTINT(a) pcre2_printint_16(compiled_code16,outfile,a)
2187 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
2188   r = pcre2_serialize_decode_16((pcre2_code_16 **)a,b,c,G(d,16))
2189 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
2190   r = pcre2_serialize_encode_16((const pcre2_code_16 **)a,b,c,d,G(e,16))
2191 #define PCRE2_SERIALIZE_FREE(a) pcre2_serialize_free_16(a)
2192 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
2193   r = pcre2_serialize_get_number_of_codes_16(a)
2194 #define PCRE2_SET_CALLOUT(a,b,c) \
2195   pcre2_set_callout_16(G(a,16),(int (*)(pcre2_callout_block_16 *, void *))b,c);
2196 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_16(G(a,16),b)
2197 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
2198   pcre2_set_compile_recursion_guard_16(G(a,16),b,c)
2199 #define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_16(G(a,16),b)
2200 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) r = pcre2_set_glob_escape_16(G(a,16),b)
2201 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_16(G(a,16),b)
2202 #define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_16(G(a,16),b)
2203 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_16(G(a,16),b)
2204 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_16(G(a,16),b)
2205 #define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_16(G(a,16),b)
2206 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_16(G(a,16),b)
2207 #define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
2208   pcre2_set_substitute_callout_16(G(a,16), \
2209     (int (*)(pcre2_substitute_callout_block_16 *, void *))b,c)
2210 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
2211   a = pcre2_substitute_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h, \
2212     (PCRE2_SPTR16)i,j,(PCRE2_UCHAR16 *)k,l)
2213 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
2214   a = pcre2_substring_copy_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 *)d,e)
2215 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
2216   a = pcre2_substring_copy_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 *)d,e)
2217 #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_16((PCRE2_UCHAR16 *)a)
2218 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
2219   a = pcre2_substring_get_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 **)d,e)
2220 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
2221   a = pcre2_substring_get_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 **)d,e)
2222 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
2223     a = pcre2_substring_length_byname_16(G(b,16),G(c,16),d)
2224 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
2225     a = pcre2_substring_length_bynumber_16(G(b,16),c,d)
2226 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
2227   a = pcre2_substring_list_get_16(G(b,16),(PCRE2_UCHAR16 ***)c,d)
2228 #define PCRE2_SUBSTRING_LIST_FREE(a) \
2229   pcre2_substring_list_free_16((PCRE2_SPTR16 *)a)
2230 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
2231   a = pcre2_substring_number_from_name_16(G(b,16),G(c,16));
2232 #define PTR(x) (void *)G(x,16)
2233 #define SETFLD(x,y,z) G(x,16)->y = z
2234 #define SETFLDVEC(x,y,v,z) G(x,16)->y[v] = z
2235 #define SETOP(x,y,z) G(x,16) z y
2236 #define SETCASTPTR(x,y) G(x,16) = (uint16_t *)(y)
2237 #define STRLEN(p) (int)strlen16((PCRE2_SPTR16)p)
2238 #define SUB1(a,b) G(a,16)(G(b,16))
2239 #define SUB2(a,b,c) G(a,16)(G(b,16),G(c,16))
2240 #define TEST(x,r,y) (G(x,16) r (y))
2241 #define TESTFLD(x,f,r,y) (G(x,16)->f r (y))
2242 
2243 
2244 /* ----- Only 32-bit mode is supported ----- */
2245 
2246 #elif defined SUPPORT_PCRE2_32
2247 #define CASTFLD(t,a,b) (t)(G(a,32)->b)
2248 #define CASTVAR(t,x) (t)G(x,32)
2249 #define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR32)(a))[b])
2250 #define CONCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_convert_context_32))
2251 #define CONVERT_COPY(a,b,c) memcpy(G(a,32),(char *)b, (c)*4)
2252 #define DATCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_match_context_32))
2253 #define FLD(a,b) G(a,32)->b
2254 #define PATCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_compile_context_32))
2255 #define PCHARS(lv, p, offset, len, utf, f) \
2256   lv = pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f)
2257 #define PCHARSV(p, offset, len, utf, f) \
2258   (void)pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f)
2259 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
2260    a = pcre2_callout_enumerate_32(compiled_code32, \
2261      (int (*)(struct pcre2_callout_enumerate_block_32 *, void *))b,c)
2262 #define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,32) = pcre2_code_copy_32(b)
2263 #define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_32(G(b,32))
2264 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_32(G(b,32))
2265 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
2266   G(a,32) = pcre2_compile_32(G(b,32),c,d,e,f,g)
2267 #define PCRE2_CONVERTED_PATTERN_FREE(a) \
2268   pcre2_converted_pattern_free_32((PCRE2_UCHAR32 *)a)
2269 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
2270   a = pcre2_dfa_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h,i,j)
2271 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
2272   r = pcre2_get_error_message_32(a,G(b,32),G(G(b,32),_size/4))
2273 #define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_32(G(b,32))
2274 #define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_32(G(b,32))
2275 #define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_32(G(a,32),b)
2276 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_32(G(a,32))
2277 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
2278   a = pcre2_jit_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
2279 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
2280   a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_32(b,c,d);
2281 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
2282   pcre2_jit_stack_assign_32(G(a,32),(pcre2_jit_callback_32)b,c);
2283 #define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_32((pcre2_jit_stack_32 *)a);
2284 #define PCRE2_MAKETABLES(a) a = pcre2_maketables_32(NULL)
2285 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
2286   a = pcre2_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
2287 #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,32) = pcre2_match_data_create_32(b,c)
2288 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
2289   G(a,32) = pcre2_match_data_create_from_pattern_32(G(b,32),c)
2290 #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_32(G(a,32))
2291 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) a = pcre2_pattern_convert_32(G(b,32),c,d,(PCRE2_UCHAR32 **)e,f,G(g,32))
2292 #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_32(G(b,32),c,d)
2293 #define PCRE2_PRINTINT(a) pcre2_printint_32(compiled_code32,outfile,a)
2294 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
2295   r = pcre2_serialize_decode_32((pcre2_code_32 **)a,b,c,G(d,32))
2296 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
2297   r = pcre2_serialize_encode_32((const pcre2_code_32 **)a,b,c,d,G(e,32))
2298 #define PCRE2_SERIALIZE_FREE(a) pcre2_serialize_free_32(a)
2299 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
2300   r = pcre2_serialize_get_number_of_codes_32(a)
2301 #define PCRE2_SET_CALLOUT(a,b,c) \
2302   pcre2_set_callout_32(G(a,32),(int (*)(pcre2_callout_block_32 *, void *))b,c)
2303 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_32(G(a,32),b)
2304 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
2305   pcre2_set_compile_recursion_guard_32(G(a,32),b,c)
2306 #define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_32(G(a,32),b)
2307 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) r = pcre2_set_glob_escape_32(G(a,32),b)
2308 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_32(G(a,32),b)
2309 #define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_32(G(a,32),b)
2310 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_32(G(a,32),b)
2311 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_32(G(a,32),b)
2312 #define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_32(G(a,32),b)
2313 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_32(G(a,32),b)
2314 #define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
2315   pcre2_set_substitute_callout_32(G(a,32), \
2316     (int (*)(pcre2_substitute_callout_block_32 *, void *))b,c)
2317 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
2318   a = pcre2_substitute_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h, \
2319     (PCRE2_SPTR32)i,j,(PCRE2_UCHAR32 *)k,l)
2320 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
2321   a = pcre2_substring_copy_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 *)d,e)
2322 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
2323   a = pcre2_substring_copy_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 *)d,e);
2324 #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_32((PCRE2_UCHAR32 *)a)
2325 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
2326   a = pcre2_substring_get_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 **)d,e)
2327 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
2328   a = pcre2_substring_get_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 **)d,e)
2329 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
2330     a = pcre2_substring_length_byname_32(G(b,32),G(c,32),d)
2331 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
2332     a = pcre2_substring_length_bynumber_32(G(b,32),c,d)
2333 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
2334   a = pcre2_substring_list_get_32(G(b,32),(PCRE2_UCHAR32 ***)c,d)
2335 #define PCRE2_SUBSTRING_LIST_FREE(a) \
2336   pcre2_substring_list_free_32((PCRE2_SPTR32 *)a)
2337 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
2338   a = pcre2_substring_number_from_name_32(G(b,32),G(c,32));
2339 #define PTR(x) (void *)G(x,32)
2340 #define SETFLD(x,y,z) G(x,32)->y = z
2341 #define SETFLDVEC(x,y,v,z) G(x,32)->y[v] = z
2342 #define SETOP(x,y,z) G(x,32) z y
2343 #define SETCASTPTR(x,y) G(x,32) = (uint32_t *)(y)
2344 #define STRLEN(p) (int)strlen32((PCRE2_SPTR32)p)
2345 #define SUB1(a,b) G(a,32)(G(b,32))
2346 #define SUB2(a,b,c) G(a,32)(G(b,32),G(c,32))
2347 #define TEST(x,r,y) (G(x,32) r (y))
2348 #define TESTFLD(x,f,r,y) (G(x,32)->f r (y))
2349 
2350 #endif
2351 
2352 /* ----- End of mode-specific function call macros ----- */
2353 
2354 
2355 
2356 
2357 /*************************************************
2358 *         Alternate character tables             *
2359 *************************************************/
2360 
2361 /* By default, the "tables" pointer in the compile context when calling
2362 pcre2_compile() is not set (= NULL), thereby using the default tables of the
2363 library. However, the tables modifier can be used to select alternate sets of
2364 tables, for different kinds of testing. Note that the locale modifier also
2365 adjusts the tables. */
2366 
2367 /* This is the set of tables distributed as default with PCRE2. It recognizes
2368 only ASCII characters. */
2369 
2370 static const uint8_t tables1[] = {
2371 
2372 /* This table is a lower casing table. */
2373 
2374     0,  1,  2,  3,  4,  5,  6,  7,
2375     8,  9, 10, 11, 12, 13, 14, 15,
2376    16, 17, 18, 19, 20, 21, 22, 23,
2377    24, 25, 26, 27, 28, 29, 30, 31,
2378    32, 33, 34, 35, 36, 37, 38, 39,
2379    40, 41, 42, 43, 44, 45, 46, 47,
2380    48, 49, 50, 51, 52, 53, 54, 55,
2381    56, 57, 58, 59, 60, 61, 62, 63,
2382    64, 97, 98, 99,100,101,102,103,
2383   104,105,106,107,108,109,110,111,
2384   112,113,114,115,116,117,118,119,
2385   120,121,122, 91, 92, 93, 94, 95,
2386    96, 97, 98, 99,100,101,102,103,
2387   104,105,106,107,108,109,110,111,
2388   112,113,114,115,116,117,118,119,
2389   120,121,122,123,124,125,126,127,
2390   128,129,130,131,132,133,134,135,
2391   136,137,138,139,140,141,142,143,
2392   144,145,146,147,148,149,150,151,
2393   152,153,154,155,156,157,158,159,
2394   160,161,162,163,164,165,166,167,
2395   168,169,170,171,172,173,174,175,
2396   176,177,178,179,180,181,182,183,
2397   184,185,186,187,188,189,190,191,
2398   192,193,194,195,196,197,198,199,
2399   200,201,202,203,204,205,206,207,
2400   208,209,210,211,212,213,214,215,
2401   216,217,218,219,220,221,222,223,
2402   224,225,226,227,228,229,230,231,
2403   232,233,234,235,236,237,238,239,
2404   240,241,242,243,244,245,246,247,
2405   248,249,250,251,252,253,254,255,
2406 
2407 /* This table is a case flipping table. */
2408 
2409     0,  1,  2,  3,  4,  5,  6,  7,
2410     8,  9, 10, 11, 12, 13, 14, 15,
2411    16, 17, 18, 19, 20, 21, 22, 23,
2412    24, 25, 26, 27, 28, 29, 30, 31,
2413    32, 33, 34, 35, 36, 37, 38, 39,
2414    40, 41, 42, 43, 44, 45, 46, 47,
2415    48, 49, 50, 51, 52, 53, 54, 55,
2416    56, 57, 58, 59, 60, 61, 62, 63,
2417    64, 97, 98, 99,100,101,102,103,
2418   104,105,106,107,108,109,110,111,
2419   112,113,114,115,116,117,118,119,
2420   120,121,122, 91, 92, 93, 94, 95,
2421    96, 65, 66, 67, 68, 69, 70, 71,
2422    72, 73, 74, 75, 76, 77, 78, 79,
2423    80, 81, 82, 83, 84, 85, 86, 87,
2424    88, 89, 90,123,124,125,126,127,
2425   128,129,130,131,132,133,134,135,
2426   136,137,138,139,140,141,142,143,
2427   144,145,146,147,148,149,150,151,
2428   152,153,154,155,156,157,158,159,
2429   160,161,162,163,164,165,166,167,
2430   168,169,170,171,172,173,174,175,
2431   176,177,178,179,180,181,182,183,
2432   184,185,186,187,188,189,190,191,
2433   192,193,194,195,196,197,198,199,
2434   200,201,202,203,204,205,206,207,
2435   208,209,210,211,212,213,214,215,
2436   216,217,218,219,220,221,222,223,
2437   224,225,226,227,228,229,230,231,
2438   232,233,234,235,236,237,238,239,
2439   240,241,242,243,244,245,246,247,
2440   248,249,250,251,252,253,254,255,
2441 
2442 /* This table contains bit maps for various character classes. Each map is 32
2443 bytes long and the bits run from the least significant end of each byte. The
2444 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
2445 graph, print, punct, and cntrl. Other classes are built from combinations. */
2446 
2447   0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
2448   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2449   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2450   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2451 
2452   0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
2453   0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
2454   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2455   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2456 
2457   0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
2458   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2459   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2460   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2461 
2462   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2463   0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
2464   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2465   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2466 
2467   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2468   0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
2469   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2470   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2471 
2472   0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
2473   0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
2474   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2475   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2476 
2477   0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
2478   0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
2479   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2480   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2481 
2482   0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
2483   0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
2484   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2485   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2486 
2487   0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
2488   0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
2489   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2490   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2491 
2492   0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
2493   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
2494   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2495   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2496 
2497 /* This table identifies various classes of character by individual bits:
2498   0x01   white space character
2499   0x02   letter
2500   0x04   decimal digit
2501   0x08   hexadecimal digit
2502   0x10   alphanumeric or '_'
2503   0x80   regular expression metacharacter or binary zero
2504 */
2505 
2506   0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
2507   0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /*   8- 15 */
2508   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
2509   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
2510   0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
2511   0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
2512   0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
2513   0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
2514   0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
2515   0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
2516   0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
2517   0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
2518   0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
2519   0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
2520   0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
2521   0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
2522   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
2523   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
2524   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
2525   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
2526   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
2527   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
2528   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
2529   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
2530   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
2531   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
2532   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
2533   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
2534   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
2535   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
2536   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
2537   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
2538 
2539 /* This is a set of tables that came originally from a Windows user. It seems
2540 to be at least an approximation of ISO 8859. In particular, there are
2541 characters greater than 128 that are marked as spaces, letters, etc. */
2542 
2543 static const uint8_t tables2[] = {
2544 0,1,2,3,4,5,6,7,
2545 8,9,10,11,12,13,14,15,
2546 16,17,18,19,20,21,22,23,
2547 24,25,26,27,28,29,30,31,
2548 32,33,34,35,36,37,38,39,
2549 40,41,42,43,44,45,46,47,
2550 48,49,50,51,52,53,54,55,
2551 56,57,58,59,60,61,62,63,
2552 64,97,98,99,100,101,102,103,
2553 104,105,106,107,108,109,110,111,
2554 112,113,114,115,116,117,118,119,
2555 120,121,122,91,92,93,94,95,
2556 96,97,98,99,100,101,102,103,
2557 104,105,106,107,108,109,110,111,
2558 112,113,114,115,116,117,118,119,
2559 120,121,122,123,124,125,126,127,
2560 128,129,130,131,132,133,134,135,
2561 136,137,138,139,140,141,142,143,
2562 144,145,146,147,148,149,150,151,
2563 152,153,154,155,156,157,158,159,
2564 160,161,162,163,164,165,166,167,
2565 168,169,170,171,172,173,174,175,
2566 176,177,178,179,180,181,182,183,
2567 184,185,186,187,188,189,190,191,
2568 224,225,226,227,228,229,230,231,
2569 232,233,234,235,236,237,238,239,
2570 240,241,242,243,244,245,246,215,
2571 248,249,250,251,252,253,254,223,
2572 224,225,226,227,228,229,230,231,
2573 232,233,234,235,236,237,238,239,
2574 240,241,242,243,244,245,246,247,
2575 248,249,250,251,252,253,254,255,
2576 0,1,2,3,4,5,6,7,
2577 8,9,10,11,12,13,14,15,
2578 16,17,18,19,20,21,22,23,
2579 24,25,26,27,28,29,30,31,
2580 32,33,34,35,36,37,38,39,
2581 40,41,42,43,44,45,46,47,
2582 48,49,50,51,52,53,54,55,
2583 56,57,58,59,60,61,62,63,
2584 64,97,98,99,100,101,102,103,
2585 104,105,106,107,108,109,110,111,
2586 112,113,114,115,116,117,118,119,
2587 120,121,122,91,92,93,94,95,
2588 96,65,66,67,68,69,70,71,
2589 72,73,74,75,76,77,78,79,
2590 80,81,82,83,84,85,86,87,
2591 88,89,90,123,124,125,126,127,
2592 128,129,130,131,132,133,134,135,
2593 136,137,138,139,140,141,142,143,
2594 144,145,146,147,148,149,150,151,
2595 152,153,154,155,156,157,158,159,
2596 160,161,162,163,164,165,166,167,
2597 168,169,170,171,172,173,174,175,
2598 176,177,178,179,180,181,182,183,
2599 184,185,186,187,188,189,190,191,
2600 224,225,226,227,228,229,230,231,
2601 232,233,234,235,236,237,238,239,
2602 240,241,242,243,244,245,246,215,
2603 248,249,250,251,252,253,254,223,
2604 192,193,194,195,196,197,198,199,
2605 200,201,202,203,204,205,206,207,
2606 208,209,210,211,212,213,214,247,
2607 216,217,218,219,220,221,222,255,
2608 0,62,0,0,1,0,0,0,
2609 0,0,0,0,0,0,0,0,
2610 32,0,0,0,1,0,0,0,
2611 0,0,0,0,0,0,0,0,
2612 0,0,0,0,0,0,255,3,
2613 126,0,0,0,126,0,0,0,
2614 0,0,0,0,0,0,0,0,
2615 0,0,0,0,0,0,0,0,
2616 0,0,0,0,0,0,255,3,
2617 0,0,0,0,0,0,0,0,
2618 0,0,0,0,0,0,12,2,
2619 0,0,0,0,0,0,0,0,
2620 0,0,0,0,0,0,0,0,
2621 254,255,255,7,0,0,0,0,
2622 0,0,0,0,0,0,0,0,
2623 255,255,127,127,0,0,0,0,
2624 0,0,0,0,0,0,0,0,
2625 0,0,0,0,254,255,255,7,
2626 0,0,0,0,0,4,32,4,
2627 0,0,0,128,255,255,127,255,
2628 0,0,0,0,0,0,255,3,
2629 254,255,255,135,254,255,255,7,
2630 0,0,0,0,0,4,44,6,
2631 255,255,127,255,255,255,127,255,
2632 0,0,0,0,254,255,255,255,
2633 255,255,255,255,255,255,255,127,
2634 0,0,0,0,254,255,255,255,
2635 255,255,255,255,255,255,255,255,
2636 0,2,0,0,255,255,255,255,
2637 255,255,255,255,255,255,255,127,
2638 0,0,0,0,255,255,255,255,
2639 255,255,255,255,255,255,255,255,
2640 0,0,0,0,254,255,0,252,
2641 1,0,0,248,1,0,0,120,
2642 0,0,0,0,254,255,255,255,
2643 0,0,128,0,0,0,128,0,
2644 255,255,255,255,0,0,0,0,
2645 0,0,0,0,0,0,0,128,
2646 255,255,255,255,0,0,0,0,
2647 0,0,0,0,0,0,0,0,
2648 128,0,0,0,0,0,0,0,
2649 0,1,1,0,1,1,0,0,
2650 0,0,0,0,0,0,0,0,
2651 0,0,0,0,0,0,0,0,
2652 1,0,0,0,128,0,0,0,
2653 128,128,128,128,0,0,128,0,
2654 28,28,28,28,28,28,28,28,
2655 28,28,0,0,0,0,0,128,
2656 0,26,26,26,26,26,26,18,
2657 18,18,18,18,18,18,18,18,
2658 18,18,18,18,18,18,18,18,
2659 18,18,18,128,128,0,128,16,
2660 0,26,26,26,26,26,26,18,
2661 18,18,18,18,18,18,18,18,
2662 18,18,18,18,18,18,18,18,
2663 18,18,18,128,128,0,0,0,
2664 0,0,0,0,0,1,0,0,
2665 0,0,0,0,0,0,0,0,
2666 0,0,0,0,0,0,0,0,
2667 0,0,0,0,0,0,0,0,
2668 1,0,0,0,0,0,0,0,
2669 0,0,18,0,0,0,0,0,
2670 0,0,20,20,0,18,0,0,
2671 0,20,18,0,0,0,0,0,
2672 18,18,18,18,18,18,18,18,
2673 18,18,18,18,18,18,18,18,
2674 18,18,18,18,18,18,18,0,
2675 18,18,18,18,18,18,18,18,
2676 18,18,18,18,18,18,18,18,
2677 18,18,18,18,18,18,18,18,
2678 18,18,18,18,18,18,18,0,
2679 18,18,18,18,18,18,18,18
2680 };
2681 
2682 
2683 
2684 #if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE)
2685 /*************************************************
2686 *    Emulated memmove() for systems without it   *
2687 *************************************************/
2688 
2689 /* This function can make use of bcopy() if it is available. Otherwise do it by
2690 steam, as there are some non-Unix environments that lack both memmove() and
2691 bcopy(). */
2692 
2693 static void *
emulated_memmove(void * d,const void * s,size_t n)2694 emulated_memmove(void *d, const void *s, size_t n)
2695 {
2696 #ifdef HAVE_BCOPY
2697 bcopy(s, d, n);
2698 return d;
2699 #else
2700 size_t i;
2701 unsigned char *dest = (unsigned char *)d;
2702 const unsigned char *src = (const unsigned char *)s;
2703 if (dest > src)
2704   {
2705   dest += n;
2706   src += n;
2707   for (i = 0; i < n; ++i) *(--dest) = *(--src);
2708   return (void *)dest;
2709   }
2710 else
2711   {
2712   for (i = 0; i < n; ++i) *dest++ = *src++;
2713   return (void *)(dest - n);
2714   }
2715 #endif   /* not HAVE_BCOPY */
2716 }
2717 #undef memmove
2718 #define memmove(d,s,n) emulated_memmove(d,s,n)
2719 #endif   /* not VPCOMPAT && not HAVE_MEMMOVE */
2720 
2721 
2722 
2723 #ifndef HAVE_STRERROR
2724 /*************************************************
2725 *     Provide strerror() for non-ANSI libraries  *
2726 *************************************************/
2727 
2728 /* Some old-fashioned systems (e.g. SunOS4) didn't have strerror() in their
2729 libraries. They may no longer be around, but just in case, we can try to
2730 provide the same facility by this simple alternative function. */
2731 
2732 extern int   sys_nerr;
2733 extern char *sys_errlist[];
2734 
2735 char *
strerror(int n)2736 strerror(int n)
2737 {
2738 if (n < 0 || n >= sys_nerr) return "unknown error number";
2739 return sys_errlist[n];
2740 }
2741 #endif /* HAVE_STRERROR */
2742 
2743 
2744 
2745 /*************************************************
2746 *            Local memory functions              *
2747 *************************************************/
2748 
2749 /* Alternative memory functions, to test functionality. */
2750 
my_malloc(PCRE2_SIZE size,void * data)2751 static void *my_malloc(PCRE2_SIZE size, void *data)
2752 {
2753 void *block = malloc(size);
2754 (void)data;
2755 if (show_memory)
2756   {
2757   if (block == NULL)
2758     {
2759     fprintf(outfile, "** malloc() failed for %" SIZ_FORM "\n", size);
2760     }
2761   else
2762     {
2763     fprintf(outfile, "malloc  %5" SIZ_FORM, size);
2764 #ifdef DEBUG_SHOW_MALLOC_ADDRESSES
2765     fprintf(outfile, " %p", block);   /* Not portable */
2766 #endif
2767     if (malloclistptr < MALLOCLISTSIZE)
2768       {
2769       malloclist[malloclistptr] = block;
2770       malloclistlength[malloclistptr++] = size;
2771       }
2772     else
2773       fprintf(outfile, " (not remembered)");
2774     fprintf(outfile, "\n");
2775     }
2776   }
2777 return block;
2778 }
2779 
my_free(void * block,void * data)2780 static void my_free(void *block, void *data)
2781 {
2782 (void)data;
2783 if (show_memory)
2784   {
2785   uint32_t i, j;
2786   BOOL found = FALSE;
2787 
2788   fprintf(outfile, "free");
2789   for (i = 0; i < malloclistptr; i++)
2790     {
2791     if (block == malloclist[i])
2792       {
2793       fprintf(outfile, "    %5" SIZ_FORM, malloclistlength[i]);
2794       malloclistptr--;
2795       for (j = i; j < malloclistptr; j++)
2796         {
2797         malloclist[j] = malloclist[j+1];
2798         malloclistlength[j] = malloclistlength[j+1];
2799         }
2800       found = TRUE;
2801       break;
2802       }
2803     }
2804   if (!found) fprintf(outfile, " unremembered block");
2805 #ifdef DEBUG_SHOW_MALLOC_ADDRESSES
2806   fprintf(outfile, " %p", block);  /* Not portable */
2807 #endif
2808   fprintf(outfile, "\n");
2809   }
2810 free(block);
2811 }
2812 
2813 
2814 
2815 /*************************************************
2816 *       Callback function for stack guard        *
2817 *************************************************/
2818 
2819 /* This is set up to be called from pcre2_compile() when the stackguard=n
2820 modifier sets a value greater than zero. The test we do is whether the
2821 parenthesis nesting depth is greater than the value set by the modifier.
2822 
2823 Argument:  the current parenthesis nesting depth
2824 Returns:   non-zero to kill the compilation
2825 */
2826 
2827 static int
stack_guard(uint32_t depth,void * user_data)2828 stack_guard(uint32_t depth, void *user_data)
2829 {
2830 (void)user_data;
2831 return depth > pat_patctl.stackguard_test;
2832 }
2833 
2834 
2835 /*************************************************
2836 *         JIT memory callback                    *
2837 *************************************************/
2838 
2839 static PCRE2_JIT_STACK*
jit_callback(void * arg)2840 jit_callback(void *arg)
2841 {
2842 jit_was_used = TRUE;
2843 return (PCRE2_JIT_STACK *)arg;
2844 }
2845 
2846 
2847 /*************************************************
2848 *      Convert UTF-8 character to code point     *
2849 *************************************************/
2850 
2851 /* This function reads one or more bytes that represent a UTF-8 character,
2852 and returns the codepoint of that character. Note that the function supports
2853 the original UTF-8 definition of RFC 2279, allowing for values in the range 0
2854 to 0x7fffffff, up to 6 bytes long. This makes it possible to generate
2855 codepoints greater than 0x10ffff which are useful for testing PCRE2's error
2856 checking, and also for generating 32-bit non-UTF data values above the UTF
2857 limit.
2858 
2859 Argument:
2860   utf8bytes   a pointer to the byte vector
2861   vptr        a pointer to an int to receive the value
2862 
2863 Returns:      >  0 => the number of bytes consumed
2864               -6 to 0 => malformed UTF-8 character at offset = (-return)
2865 */
2866 
2867 static int
utf82ord(PCRE2_SPTR8 utf8bytes,uint32_t * vptr)2868 utf82ord(PCRE2_SPTR8 utf8bytes, uint32_t *vptr)
2869 {
2870 uint32_t c = *utf8bytes++;
2871 uint32_t d = c;
2872 int i, j, s;
2873 
2874 for (i = -1; i < 6; i++)               /* i is number of additional bytes */
2875   {
2876   if ((d & 0x80) == 0) break;
2877   d <<= 1;
2878   }
2879 
2880 if (i == -1) { *vptr = c; return 1; }  /* ascii character */
2881 if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
2882 
2883 /* i now has a value in the range 1-5 */
2884 
2885 s = 6*i;
2886 d = (c & utf8_table3[i]) << s;
2887 
2888 for (j = 0; j < i; j++)
2889   {
2890   c = *utf8bytes++;
2891   if ((c & 0xc0) != 0x80) return -(j+1);
2892   s -= 6;
2893   d |= (c & 0x3f) << s;
2894   }
2895 
2896 /* Check that encoding was the correct unique one */
2897 
2898 for (j = 0; j < utf8_table1_size; j++)
2899   if (d <= (uint32_t)utf8_table1[j]) break;
2900 if (j != i) return -(i+1);
2901 
2902 /* Valid value */
2903 
2904 *vptr = d;
2905 return i+1;
2906 }
2907 
2908 
2909 
2910 /*************************************************
2911 *             Print one character                *
2912 *************************************************/
2913 
2914 /* Print a single character either literally, or as a hex escape, and count how
2915 many printed characters are used.
2916 
2917 Arguments:
2918   c            the character
2919   utf          TRUE in UTF mode
2920   f            the FILE to print to, or NULL just to count characters
2921 
2922 Returns:       number of characters written
2923 */
2924 
2925 static int
pchar(uint32_t c,BOOL utf,FILE * f)2926 pchar(uint32_t c, BOOL utf, FILE *f)
2927 {
2928 int n = 0;
2929 char tempbuffer[16];
2930 
2931 if (PRINTOK(c))
2932   {
2933   if (f != NULL) fprintf(f, "%c", c);
2934   return 1;
2935   }
2936 
2937 if (c < 0x100)
2938   {
2939   if (utf)
2940     {
2941     if (f != NULL) fprintf(f, "\\x{%02x}", c);
2942     return 6;
2943     }
2944   else
2945     {
2946     if (f != NULL) fprintf(f, "\\x%02x", c);
2947     return 4;
2948     }
2949   }
2950 
2951 if (f != NULL) n = fprintf(f, "\\x{%02x}", c);
2952   else n = sprintf(tempbuffer, "\\x{%02x}", c);
2953 
2954 return n >= 0 ? n : 0;
2955 }
2956 
2957 
2958 
2959 #ifdef SUPPORT_PCRE2_16
2960 /*************************************************
2961 *    Find length of 0-terminated 16-bit string   *
2962 *************************************************/
2963 
strlen16(PCRE2_SPTR16 p)2964 static size_t strlen16(PCRE2_SPTR16 p)
2965 {
2966 PCRE2_SPTR16 pp = p;
2967 while (*pp != 0) pp++;
2968 return (int)(pp - p);
2969 }
2970 #endif  /* SUPPORT_PCRE2_16 */
2971 
2972 
2973 
2974 #ifdef SUPPORT_PCRE2_32
2975 /*************************************************
2976 *    Find length of 0-terminated 32-bit string   *
2977 *************************************************/
2978 
strlen32(PCRE2_SPTR32 p)2979 static size_t strlen32(PCRE2_SPTR32 p)
2980 {
2981 PCRE2_SPTR32 pp = p;
2982 while (*pp != 0) pp++;
2983 return (int)(pp - p);
2984 }
2985 #endif  /* SUPPORT_PCRE2_32 */
2986 
2987 
2988 #ifdef SUPPORT_PCRE2_8
2989 /*************************************************
2990 *         Print 8-bit character string           *
2991 *************************************************/
2992 
2993 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
2994 For printing *MARK strings, a negative length is given, indicating that the
2995 length is in the first code unit. If handed a NULL file, this function just
2996 counts chars without printing (because pchar() does that). */
2997 
pchars8(PCRE2_SPTR8 p,int length,BOOL utf,FILE * f)2998 static int pchars8(PCRE2_SPTR8 p, int length, BOOL utf, FILE *f)
2999 {
3000 uint32_t c = 0;
3001 int yield = 0;
3002 if (length < 0) length = *p++;
3003 while (length-- > 0)
3004   {
3005   if (utf)
3006     {
3007     int rc = utf82ord(p, &c);
3008     if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
3009       {
3010       length -= rc - 1;
3011       p += rc;
3012       yield += pchar(c, utf, f);
3013       continue;
3014       }
3015     }
3016   c = *p++;
3017   yield += pchar(c, utf, f);
3018   }
3019 
3020 return yield;
3021 }
3022 #endif
3023 
3024 
3025 #ifdef SUPPORT_PCRE2_16
3026 /*************************************************
3027 *           Print 16-bit character string        *
3028 *************************************************/
3029 
3030 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
3031 For printing *MARK strings, a negative length is given, indicating that the
3032 length is in the first code unit. If handed a NULL file, just counts chars
3033 without printing. */
3034 
pchars16(PCRE2_SPTR16 p,int length,BOOL utf,FILE * f)3035 static int pchars16(PCRE2_SPTR16 p, int length, BOOL utf, FILE *f)
3036 {
3037 int yield = 0;
3038 if (length < 0) length = *p++;
3039 while (length-- > 0)
3040   {
3041   uint32_t c = *p++ & 0xffff;
3042   if (utf && c >= 0xD800 && c < 0xDC00 && length > 0)
3043     {
3044     int d = *p & 0xffff;
3045     if (d >= 0xDC00 && d <= 0xDFFF)
3046       {
3047       c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
3048       length--;
3049       p++;
3050       }
3051     }
3052   yield += pchar(c, utf, f);
3053   }
3054 return yield;
3055 }
3056 #endif  /* SUPPORT_PCRE2_16 */
3057 
3058 
3059 
3060 #ifdef SUPPORT_PCRE2_32
3061 /*************************************************
3062 *           Print 32-bit character string        *
3063 *************************************************/
3064 
3065 /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
3066 For printing *MARK strings, a negative length is given, indicating that the
3067 length is in the first code unit. If handed a NULL file, just counts chars
3068 without printing. */
3069 
pchars32(PCRE2_SPTR32 p,int length,BOOL utf,FILE * f)3070 static int pchars32(PCRE2_SPTR32 p, int length, BOOL utf, FILE *f)
3071 {
3072 int yield = 0;
3073 (void)(utf);  /* Avoid compiler warning */
3074 if (length < 0) length = *p++;
3075 while (length-- > 0)
3076   {
3077   uint32_t c = *p++;
3078   yield += pchar(c, utf, f);
3079   }
3080 return yield;
3081 }
3082 #endif  /* SUPPORT_PCRE2_32 */
3083 
3084 
3085 
3086 
3087 /*************************************************
3088 *       Convert character value to UTF-8         *
3089 *************************************************/
3090 
3091 /* This function takes an integer value in the range 0 - 0x7fffffff
3092 and encodes it as a UTF-8 character in 0 to 6 bytes. It is needed even when the
3093 8-bit library is not supported, to generate UTF-8 output for non-ASCII
3094 characters.
3095 
3096 Arguments:
3097   cvalue     the character value
3098   utf8bytes  pointer to buffer for result - at least 6 bytes long
3099 
3100 Returns:     number of characters placed in the buffer
3101 */
3102 
3103 static int
ord2utf8(uint32_t cvalue,uint8_t * utf8bytes)3104 ord2utf8(uint32_t cvalue, uint8_t *utf8bytes)
3105 {
3106 int i, j;
3107 if (cvalue > 0x7fffffffu)
3108   return -1;
3109 for (i = 0; i < utf8_table1_size; i++)
3110   if (cvalue <= (uint32_t)utf8_table1[i]) break;
3111 utf8bytes += i;
3112 for (j = i; j > 0; j--)
3113  {
3114  *utf8bytes-- = 0x80 | (cvalue & 0x3f);
3115  cvalue >>= 6;
3116  }
3117 *utf8bytes = utf8_table2[i] | cvalue;
3118 return i + 1;
3119 }
3120 
3121 
3122 
3123 #ifdef SUPPORT_PCRE2_16
3124 /*************************************************
3125 *           Convert string to 16-bit             *
3126 *************************************************/
3127 
3128 /* In UTF mode the input is always interpreted as a string of UTF-8 bytes using
3129 the original UTF-8 definition of RFC 2279, which allows for up to 6 bytes, and
3130 code values from 0 to 0x7fffffff. However, values greater than the later UTF
3131 limit of 0x10ffff cause an error. In non-UTF mode the input is interpreted as
3132 UTF-8 if the utf8_input modifier is set, but an error is generated for values
3133 greater than 0xffff.
3134 
3135 If all the input bytes are ASCII, the space needed for a 16-bit string is
3136 exactly double the 8-bit size. Otherwise, the size needed for a 16-bit string
3137 is no more than double, because up to 0xffff uses no more than 3 bytes in UTF-8
3138 but possibly 4 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes
3139 in UTF-16. The result is always left in pbuffer16. Impose a minimum size to
3140 save repeated re-sizing.
3141 
3142 Note that this function does not object to surrogate values. This is
3143 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
3144 for the purpose of testing that they are correctly faulted.
3145 
3146 Arguments:
3147   p          points to a byte string
3148   utf        true in UTF mode
3149   lenptr     points to number of bytes in the string (excluding trailing zero)
3150 
3151 Returns:     0 on success, with the length updated to the number of 16-bit
3152                data items used (excluding the trailing zero)
3153              OR -1 if a UTF-8 string is malformed
3154              OR -2 if a value > 0x10ffff is encountered in UTF mode
3155              OR -3 if a value > 0xffff is encountered when not in UTF mode
3156 */
3157 
3158 static int
to16(uint8_t * p,int utf,PCRE2_SIZE * lenptr)3159 to16(uint8_t *p, int utf, PCRE2_SIZE *lenptr)
3160 {
3161 uint16_t *pp;
3162 PCRE2_SIZE len = *lenptr;
3163 
3164 if (pbuffer16_size < 2*len + 2)
3165   {
3166   if (pbuffer16 != NULL) free(pbuffer16);
3167   pbuffer16_size = 2*len + 2;
3168   if (pbuffer16_size < 4096) pbuffer16_size = 4096;
3169   pbuffer16 = (uint16_t *)malloc(pbuffer16_size);
3170   if (pbuffer16 == NULL)
3171     {
3172     fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer16\n",
3173       pbuffer16_size);
3174     exit(1);
3175     }
3176   }
3177 
3178 pp = pbuffer16;
3179 if (!utf && (pat_patctl.control & CTL_UTF8_INPUT) == 0)
3180   {
3181   for (; len > 0; len--) *pp++ = *p++;
3182   }
3183 else while (len > 0)
3184   {
3185   uint32_t c;
3186   int chlen = utf82ord(p, &c);
3187   if (chlen <= 0) return -1;
3188   if (!utf && c > 0xffff) return -3;
3189   if (c > 0x10ffff) return -2;
3190   p += chlen;
3191   len -= chlen;
3192   if (c < 0x10000) *pp++ = c; else
3193     {
3194     c -= 0x10000;
3195     *pp++ = 0xD800 | (c >> 10);
3196     *pp++ = 0xDC00 | (c & 0x3ff);
3197     }
3198   }
3199 
3200 *pp = 0;
3201 *lenptr = pp - pbuffer16;
3202 return 0;
3203 }
3204 #endif
3205 
3206 
3207 
3208 #ifdef SUPPORT_PCRE2_32
3209 /*************************************************
3210 *           Convert string to 32-bit             *
3211 *************************************************/
3212 
3213 /* In UTF mode the input is always interpreted as a string of UTF-8 bytes using
3214 the original UTF-8 definition of RFC 2279, which allows for up to 6 bytes, and
3215 code values from 0 to 0x7fffffff. However, values greater than the later UTF
3216 limit of 0x10ffff cause an error.
3217 
3218 In non-UTF mode the input is interpreted as UTF-8 if the utf8_input modifier
3219 is set, and no limit is imposed. There is special interpretation of the 0xff
3220 byte (which is illegal in UTF-8) in this case: it causes the top bit of the
3221 next character to be set. This provides a way of generating 32-bit characters
3222 greater than 0x7fffffff.
3223 
3224 If all the input bytes are ASCII, the space needed for a 32-bit string is
3225 exactly four times the 8-bit size. Otherwise, the size needed for a 32-bit
3226 string is no more than four times, because the number of characters must be
3227 less than the number of bytes. The result is always left in pbuffer32. Impose a
3228 minimum size to save repeated re-sizing.
3229 
3230 Note that this function does not object to surrogate values. This is
3231 deliberate; it makes it possible to construct UTF-32 strings that are invalid,
3232 for the purpose of testing that they are correctly faulted.
3233 
3234 Arguments:
3235   p          points to a byte string
3236   utf        true in UTF mode
3237   lenptr     points to number of bytes in the string (excluding trailing zero)
3238 
3239 Returns:     0 on success, with the length updated to the number of 32-bit
3240                data items used (excluding the trailing zero)
3241              OR -1 if a UTF-8 string is malformed
3242              OR -2 if a value > 0x10ffff is encountered in UTF mode
3243 */
3244 
3245 static int
to32(uint8_t * p,int utf,PCRE2_SIZE * lenptr)3246 to32(uint8_t *p, int utf, PCRE2_SIZE *lenptr)
3247 {
3248 uint32_t *pp;
3249 PCRE2_SIZE len = *lenptr;
3250 
3251 if (pbuffer32_size < 4*len + 4)
3252   {
3253   if (pbuffer32 != NULL) free(pbuffer32);
3254   pbuffer32_size = 4*len + 4;
3255   if (pbuffer32_size < 8192) pbuffer32_size = 8192;
3256   pbuffer32 = (uint32_t *)malloc(pbuffer32_size);
3257   if (pbuffer32 == NULL)
3258     {
3259     fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer32\n",
3260       pbuffer32_size);
3261     exit(1);
3262     }
3263   }
3264 
3265 pp = pbuffer32;
3266 
3267 if (!utf && (pat_patctl.control & CTL_UTF8_INPUT) == 0)
3268   {
3269   for (; len > 0; len--) *pp++ = *p++;
3270   }
3271 
3272 else while (len > 0)
3273   {
3274   int chlen;
3275   uint32_t c;
3276   uint32_t topbit = 0;
3277   if (!utf && *p == 0xff && len > 1)
3278     {
3279     topbit = 0x80000000u;
3280     p++;
3281     len--;
3282     }
3283   chlen = utf82ord(p, &c);
3284   if (chlen <= 0) return -1;
3285   if (utf && c > 0x10ffff) return -2;
3286   p += chlen;
3287   len -= chlen;
3288   *pp++ = c | topbit;
3289   }
3290 
3291 *pp = 0;
3292 *lenptr = pp - pbuffer32;
3293 return 0;
3294 }
3295 #endif /* SUPPORT_PCRE2_32 */
3296 
3297 
3298 
3299 /* This function is no longer used. Keep it around for a while, just in case it
3300 needs to be re-instated. */
3301 
3302 #ifdef NEVERNEVERNEVER
3303 
3304 /*************************************************
3305 *         Move back by so many characters        *
3306 *************************************************/
3307 
3308 /* Given a code unit offset in a subject string, move backwards by a number of
3309 characters, and return the resulting offset.
3310 
3311 Arguments:
3312   subject   pointer to the string
3313   offset    start offset
3314   count     count to move back by
3315   utf       TRUE if in UTF mode
3316 
3317 Returns:   a possibly changed offset
3318 */
3319 
3320 static PCRE2_SIZE
backchars(uint8_t * subject,PCRE2_SIZE offset,uint32_t count,BOOL utf)3321 backchars(uint8_t *subject, PCRE2_SIZE offset, uint32_t count, BOOL utf)
3322 {
3323 if (!utf || test_mode == PCRE32_MODE)
3324   return (count >= offset)? 0 : (offset - count);
3325 
3326 else if (test_mode == PCRE8_MODE)
3327   {
3328   PCRE2_SPTR8 pp = (PCRE2_SPTR8)subject + offset;
3329   for (; count > 0 && pp > (PCRE2_SPTR8)subject; count--)
3330     {
3331     pp--;
3332     while ((*pp & 0xc0) == 0x80) pp--;
3333     }
3334   return pp - (PCRE2_SPTR8)subject;
3335   }
3336 
3337 else  /* 16-bit mode */
3338   {
3339   PCRE2_SPTR16 pp = (PCRE2_SPTR16)subject + offset;
3340   for (; count > 0 && pp > (PCRE2_SPTR16)subject; count--)
3341     {
3342     pp--;
3343     if ((*pp & 0xfc00) == 0xdc00) pp--;
3344     }
3345   return pp - (PCRE2_SPTR16)subject;
3346   }
3347 }
3348 #endif  /* NEVERNEVERNEVER */
3349 
3350 
3351 
3352 /*************************************************
3353 *           Expand input buffers                 *
3354 *************************************************/
3355 
3356 /* This function doubles the size of the input buffer and the buffer for
3357 keeping an 8-bit copy of patterns (pbuffer8), and copies the current buffers to
3358 the new ones.
3359 
3360 Arguments: none
3361 Returns:   nothing (aborts if malloc() fails)
3362 */
3363 
3364 static void
expand_input_buffers(void)3365 expand_input_buffers(void)
3366 {
3367 int new_pbuffer8_size = 2*pbuffer8_size;
3368 uint8_t *new_buffer = (uint8_t *)malloc(new_pbuffer8_size);
3369 uint8_t *new_pbuffer8 = (uint8_t *)malloc(new_pbuffer8_size);
3370 
3371 if (new_buffer == NULL || new_pbuffer8 == NULL)
3372   {
3373   fprintf(stderr, "pcre2test: malloc(%d) failed\n", new_pbuffer8_size);
3374   exit(1);
3375   }
3376 
3377 memcpy(new_buffer, buffer, pbuffer8_size);
3378 memcpy(new_pbuffer8, pbuffer8, pbuffer8_size);
3379 
3380 pbuffer8_size = new_pbuffer8_size;
3381 
3382 free(buffer);
3383 free(pbuffer8);
3384 
3385 buffer = new_buffer;
3386 pbuffer8 = new_pbuffer8;
3387 }
3388 
3389 
3390 
3391 /*************************************************
3392 *        Read or extend an input line            *
3393 *************************************************/
3394 
3395 /* Input lines are read into buffer, but both patterns and data lines can be
3396 continued over multiple input lines. In addition, if the buffer fills up, we
3397 want to automatically expand it so as to be able to handle extremely large
3398 lines that are needed for certain stress tests, although this is less likely
3399 now that there are repetition features for both patterns and data. When the
3400 input buffer is expanded, the other two buffers must also be expanded likewise,
3401 and the contents of pbuffer, which are a copy of the input for callouts, must
3402 be preserved (for when expansion happens for a data line). This is not the most
3403 optimal way of handling this, but hey, this is just a test program!
3404 
3405 Arguments:
3406   f            the file to read
3407   start        where in buffer to start (this *must* be within buffer)
3408   prompt       for stdin or readline()
3409 
3410 Returns:       pointer to the start of new data
3411                could be a copy of start, or could be moved
3412                NULL if no data read and EOF reached
3413 */
3414 
3415 static uint8_t *
extend_inputline(FILE * f,uint8_t * start,const char * prompt)3416 extend_inputline(FILE *f, uint8_t *start, const char *prompt)
3417 {
3418 uint8_t *here = start;
3419 
3420 for (;;)
3421   {
3422   size_t rlen = (size_t)(pbuffer8_size - (here - buffer));
3423 
3424   if (rlen > 1000)
3425     {
3426     size_t dlen;
3427 
3428     /* If libreadline or libedit support is required, use readline() to read a
3429     line if the input is a terminal. Note that readline() removes the trailing
3430     newline, so we must put it back again, to be compatible with fgets(). */
3431 
3432 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
3433     if (INTERACTIVE(f))
3434       {
3435       size_t len;
3436       char *s = readline(prompt);
3437       if (s == NULL) return (here == start)? NULL : start;
3438       len = strlen(s);
3439       if (len > 0) add_history(s);
3440       if (len > rlen - 1) len = rlen - 1;
3441       memcpy(here, s, len);
3442       here[len] = '\n';
3443       here[len+1] = 0;
3444       free(s);
3445       }
3446     else
3447 #endif
3448 
3449     /* Read the next line by normal means, prompting if the file is a tty. */
3450 
3451       {
3452       if (INTERACTIVE(f)) printf("%s", prompt);
3453       if (fgets((char *)here, rlen,  f) == NULL)
3454         return (here == start)? NULL : start;
3455       }
3456 
3457     dlen = strlen((char *)here);
3458     here += dlen;
3459 
3460     /* Check for end of line reached. Take care not to read data from before
3461     start (dlen will be zero for a file starting with a binary zero). */
3462 
3463     if (here > start && here[-1] == '\n') return start;
3464 
3465     /* If we have not read a newline when reading a file, we have either filled
3466     the buffer or reached the end of the file. We can detect the former by
3467     checking that the string fills the buffer, and the latter by feof(). If
3468     neither of these is true, it means we read a binary zero which has caused
3469     strlen() to give a short length. This is a hard error because pcre2test
3470     expects to work with C strings. */
3471 
3472     if (!INTERACTIVE(f) && dlen < rlen - 1 && !feof(f))
3473       {
3474       fprintf(outfile, "** Binary zero encountered in input\n");
3475       fprintf(outfile, "** pcre2test run abandoned\n");
3476       exit(1);
3477       }
3478     }
3479 
3480   else
3481     {
3482     size_t start_offset = start - buffer;
3483     size_t here_offset = here - buffer;
3484     expand_input_buffers();
3485     start = buffer + start_offset;
3486     here = buffer + here_offset;
3487     }
3488   }
3489 
3490 /* Control never gets here */
3491 }
3492 
3493 
3494 
3495 /*************************************************
3496 *         Case-independent strncmp() function    *
3497 *************************************************/
3498 
3499 /*
3500 Arguments:
3501   s         first string
3502   t         second string
3503   n         number of characters to compare
3504 
3505 Returns:    < 0, = 0, or > 0, according to the comparison
3506 */
3507 
3508 static int
strncmpic(const uint8_t * s,const uint8_t * t,int n)3509 strncmpic(const uint8_t *s, const uint8_t *t, int n)
3510 {
3511 while (n--)
3512   {
3513   int c = tolower(*s++) - tolower(*t++);
3514   if (c != 0) return c;
3515   }
3516 return 0;
3517 }
3518 
3519 
3520 
3521 /*************************************************
3522 *          Scan the main modifier list           *
3523 *************************************************/
3524 
3525 /* This function searches the modifier list for a long modifier name.
3526 
3527 Argument:
3528   p         start of the name
3529   lenp      length of the name
3530 
3531 Returns:    an index in the modifier list, or -1 on failure
3532 */
3533 
3534 static int
scan_modifiers(const uint8_t * p,unsigned int len)3535 scan_modifiers(const uint8_t *p, unsigned int len)
3536 {
3537 int bot = 0;
3538 int top = MODLISTCOUNT;
3539 
3540 while (top > bot)
3541   {
3542   int mid = (bot + top)/2;
3543   unsigned int mlen = strlen(modlist[mid].name);
3544   int c = strncmp((char *)p, modlist[mid].name, (len < mlen)? len : mlen);
3545   if (c == 0)
3546     {
3547     if (len == mlen) return mid;
3548     c = (int)len - (int)mlen;
3549     }
3550   if (c > 0) bot = mid + 1; else top = mid;
3551   }
3552 
3553 return -1;
3554 
3555 }
3556 
3557 
3558 
3559 /*************************************************
3560 *        Check a modifer and find its field      *
3561 *************************************************/
3562 
3563 /* This function is called when a modifier has been identified. We check that
3564 it is allowed here and find the field that is to be changed.
3565 
3566 Arguments:
3567   m          the modifier list entry
3568   ctx        CTX_PAT     => pattern context
3569              CTX_POPPAT  => pattern context for popped pattern
3570              CTX_DEFPAT  => default pattern context
3571              CTX_DAT     => data context
3572              CTX_DEFDAT  => default data context
3573   pctl       point to pattern control block
3574   dctl       point to data control block
3575   c          a single character or 0
3576 
3577 Returns:     a field pointer or NULL
3578 */
3579 
3580 static void *
check_modifier(modstruct * m,int ctx,patctl * pctl,datctl * dctl,uint32_t c)3581 check_modifier(modstruct *m, int ctx, patctl *pctl, datctl *dctl, uint32_t c)
3582 {
3583 void *field = NULL;
3584 PCRE2_SIZE offset = m->offset;
3585 
3586 if (restrict_for_perl_test) switch(m->which)
3587   {
3588   case MOD_PNDP:
3589   case MOD_PATP:
3590   case MOD_DATP:
3591   case MOD_PDP:
3592   break;
3593 
3594   default:
3595   fprintf(outfile, "** '%s' is not allowed in a Perl-compatible test\n",
3596     m->name);
3597   return NULL;
3598   }
3599 
3600 switch (m->which)
3601   {
3602   case MOD_CTC:  /* Compile context modifier */
3603   if (ctx == CTX_DEFPAT) field = PTR(default_pat_context);
3604     else if (ctx == CTX_PAT) field = PTR(pat_context);
3605   break;
3606 
3607   case MOD_CTM:  /* Match context modifier */
3608   if (ctx == CTX_DEFDAT) field = PTR(default_dat_context);
3609     else if (ctx == CTX_DAT) field = PTR(dat_context);
3610   break;
3611 
3612   case MOD_DAT:    /* Data line modifier */
3613   case MOD_DATP:   /* Allowed for Perl test */
3614   if (dctl != NULL) field = dctl;
3615   break;
3616 
3617   case MOD_PAT:    /* Pattern modifier */
3618   case MOD_PATP:   /* Allowed for Perl test */
3619   if (pctl != NULL) field = pctl;
3620   break;
3621 
3622   case MOD_PD:   /* Pattern or data line modifier */
3623   case MOD_PDP:  /* Ditto, allowed for Perl test */
3624   case MOD_PND:  /* Ditto, but not default pattern */
3625   case MOD_PNDP: /* Ditto, allowed for Perl test */
3626   if (dctl != NULL) field = dctl;
3627     else if (pctl != NULL && (m->which == MOD_PD || m->which == MOD_PDP ||
3628              ctx != CTX_DEFPAT))
3629       field = pctl;
3630   break;
3631   }
3632 
3633 if (field == NULL)
3634   {
3635   if (c == 0)
3636     fprintf(outfile, "** '%s' is not valid here\n", m->name);
3637   else
3638     fprintf(outfile, "** /%c is not valid here\n", c);
3639   return NULL;
3640   }
3641 
3642 return (char *)field + offset;
3643 }
3644 
3645 
3646 
3647 /*************************************************
3648 *            Decode a modifier list              *
3649 *************************************************/
3650 
3651 /* A pointer to a control block is NULL when called in cases when that block is
3652 not relevant. They are never all relevant in one call. At least one of patctl
3653 and datctl is NULL. The second argument specifies which context to use for
3654 modifiers that apply to contexts.
3655 
3656 Arguments:
3657   p          point to modifier string
3658   ctx        CTX_PAT     => pattern context
3659              CTX_POPPAT  => pattern context for popped pattern
3660              CTX_DEFPAT  => default pattern context
3661              CTX_DAT     => data context
3662              CTX_DEFDAT  => default data context
3663   pctl       point to pattern control block
3664   dctl       point to data control block
3665 
3666 Returns: TRUE if successful decode, FALSE otherwise
3667 */
3668 
3669 static BOOL
decode_modifiers(uint8_t * p,int ctx,patctl * pctl,datctl * dctl)3670 decode_modifiers(uint8_t *p, int ctx, patctl *pctl, datctl *dctl)
3671 {
3672 uint8_t *ep, *pp;
3673 long li;
3674 unsigned long uli;
3675 BOOL first = TRUE;
3676 
3677 for (;;)
3678   {
3679   void *field;
3680   modstruct *m;
3681   BOOL off = FALSE;
3682   unsigned int i, len;
3683   int index;
3684   char *endptr;
3685 
3686   /* Skip white space and commas. */
3687 
3688   while (isspace(*p) || *p == ',') p++;
3689   if (*p == 0) break;
3690 
3691   /* Find the end of the item; lose trailing whitespace at end of line. */
3692 
3693   for (ep = p; *ep != 0 && *ep != ','; ep++);
3694   if (*ep == 0)
3695     {
3696     while (ep > p && isspace(ep[-1])) ep--;
3697     *ep = 0;
3698     }
3699 
3700   /* Remember if the first character is '-'. */
3701 
3702   if (*p == '-')
3703     {
3704     off = TRUE;
3705     p++;
3706     }
3707 
3708   /* Find the length of a full-length modifier name, and scan for it. */
3709 
3710   pp = p;
3711   while (pp < ep && *pp != '=') pp++;
3712   index = scan_modifiers(p, pp - p);
3713 
3714   /* If the first modifier is unrecognized, try to interpret it as a sequence
3715   of single-character abbreviated modifiers. None of these modifiers have any
3716   associated data. They just set options or control bits. */
3717 
3718   if (index < 0)
3719     {
3720     uint32_t cc;
3721     uint8_t *mp = p;
3722 
3723     if (!first)
3724       {
3725       fprintf(outfile, "** Unrecognized modifier '%.*s'\n", (int)(ep-p), p);
3726       if (ep - p == 1)
3727         fprintf(outfile, "** Single-character modifiers must come first\n");
3728       return FALSE;
3729       }
3730 
3731     for (cc = *p; cc != ',' && cc != '\n' && cc != 0; cc = *(++p))
3732       {
3733       for (i = 0; i < C1MODLISTCOUNT; i++)
3734         if (cc == c1modlist[i].onechar) break;
3735 
3736       if (i >= C1MODLISTCOUNT)
3737         {
3738         fprintf(outfile, "** Unrecognized modifier '%c' in '%.*s'\n",
3739           *p, (int)(ep-mp), mp);
3740         return FALSE;
3741         }
3742 
3743       if (c1modlist[i].index >= 0)
3744         {
3745         index = c1modlist[i].index;
3746         }
3747 
3748       else
3749         {
3750         index = scan_modifiers((uint8_t *)(c1modlist[i].fullname),
3751           strlen(c1modlist[i].fullname));
3752         if (index < 0)
3753           {
3754           fprintf(outfile, "** Internal error: single-character equivalent "
3755             "modifier '%s' not found\n", c1modlist[i].fullname);
3756           return FALSE;
3757           }
3758         c1modlist[i].index = index;     /* Cache for next time */
3759         }
3760 
3761       field = check_modifier(modlist + index, ctx, pctl, dctl, *p);
3762       if (field == NULL) return FALSE;
3763 
3764       /* /x is a special case; a second appearance changes PCRE2_EXTENDED to
3765       PCRE2_EXTENDED_MORE. */
3766 
3767       if (cc == 'x' && (*((uint32_t *)field) & PCRE2_EXTENDED) != 0)
3768         {
3769         *((uint32_t *)field) &= ~PCRE2_EXTENDED;
3770         *((uint32_t *)field) |= PCRE2_EXTENDED_MORE;
3771         }
3772       else
3773         *((uint32_t *)field) |= modlist[index].value;
3774       }
3775 
3776     continue;    /* With tne next (fullname) modifier */
3777     }
3778 
3779   /* We have a match on a full-name modifier. Check for the existence of data
3780   when needed. */
3781 
3782   m = modlist + index;      /* Save typing */
3783   if (m->type != MOD_CTL && m->type != MOD_OPT &&
3784       (m->type != MOD_IND || *pp == '='))
3785     {
3786     if (*pp++ != '=')
3787       {
3788       fprintf(outfile, "** '=' expected after '%s'\n", m->name);
3789       return FALSE;
3790       }
3791     if (off)
3792       {
3793       fprintf(outfile, "** '-' is not valid for '%s'\n", m->name);
3794       return FALSE;
3795       }
3796     }
3797 
3798   /* These on/off types have no data. */
3799 
3800   else if (*pp != ',' && *pp != '\n' && *pp != ' ' && *pp != 0)
3801     {
3802     fprintf(outfile, "** Unrecognized modifier '%.*s'\n", (int)(ep-p), p);
3803     return FALSE;
3804     }
3805 
3806   /* Set the data length for those types that have data. Then find the field
3807   that is to be set. If check_modifier() returns NULL, it has already output an
3808   error message. */
3809 
3810   len = ep - pp;
3811   field = check_modifier(m, ctx, pctl, dctl, 0);
3812   if (field == NULL) return FALSE;
3813 
3814   /* Process according to data type. */
3815 
3816   switch (m->type)
3817     {
3818     case MOD_CTL:
3819     case MOD_OPT:
3820     if (off) *((uint32_t *)field) &= ~m->value;
3821       else *((uint32_t *)field) |= m->value;
3822     break;
3823 
3824     case MOD_BSR:
3825     if (len == 7 && strncmpic(pp, (const uint8_t *)"default", 7) == 0)
3826       {
3827 #ifdef BSR_ANYCRLF
3828       *((uint16_t *)field) = PCRE2_BSR_ANYCRLF;
3829 #else
3830       *((uint16_t *)field) = PCRE2_BSR_UNICODE;
3831 #endif
3832       if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 &= ~CTL2_BSR_SET;
3833         else dctl->control2 &= ~CTL2_BSR_SET;
3834       }
3835     else
3836       {
3837       if (len == 7 && strncmpic(pp, (const uint8_t *)"anycrlf", 7) == 0)
3838         *((uint16_t *)field) = PCRE2_BSR_ANYCRLF;
3839       else if (len == 7 && strncmpic(pp, (const uint8_t *)"unicode", 7) == 0)
3840         *((uint16_t *)field) = PCRE2_BSR_UNICODE;
3841       else goto INVALID_VALUE;
3842       if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 |= CTL2_BSR_SET;
3843         else dctl->control2 |= CTL2_BSR_SET;
3844       }
3845     pp = ep;
3846     break;
3847 
3848     case MOD_CHR:  /* A single character */
3849     *((uint32_t *)field) = *pp++;
3850     break;
3851 
3852     case MOD_CON:  /* A convert type/options list */
3853     for (;; pp++)
3854       {
3855       uint8_t *colon = (uint8_t *)strchr((const char *)pp, ':');
3856       len = ((colon != NULL && colon < ep)? colon:ep) - pp;
3857       for (i = 0; i < convertlistcount; i++)
3858         {
3859         if (strncmpic(pp, (const uint8_t *)convertlist[i].name, len) == 0)
3860           {
3861           if (*((uint32_t *)field) == CONVERT_UNSET)
3862             *((uint32_t *)field) = convertlist[i].option;
3863           else
3864             *((uint32_t *)field) |= convertlist[i].option;
3865           break;
3866           }
3867         }
3868       if (i >= convertlistcount) goto INVALID_VALUE;
3869       pp += len;
3870       if (*pp != ':') break;
3871       }
3872     break;
3873 
3874     case MOD_IN2:    /* One or two unsigned integers */
3875     if (!isdigit(*pp)) goto INVALID_VALUE;
3876     uli = strtoul((const char *)pp, &endptr, 10);
3877     if (U32OVERFLOW(uli)) goto INVALID_VALUE;
3878     ((uint32_t *)field)[0] = (uint32_t)uli;
3879     if (*endptr == ':')
3880       {
3881       uli = strtoul((const char *)endptr+1, &endptr, 10);
3882       if (U32OVERFLOW(uli)) goto INVALID_VALUE;
3883       ((uint32_t *)field)[1] = (uint32_t)uli;
3884       }
3885     else ((uint32_t *)field)[1] = 0;
3886     pp = (uint8_t *)endptr;
3887     break;
3888 
3889     /* PCRE2_SIZE_MAX is usually SIZE_MAX, which may be greater, equal to, or
3890     less than ULONG_MAX. So first test for overflowing the long int, and then
3891     test for overflowing PCRE2_SIZE_MAX if it is smaller than ULONG_MAX. */
3892 
3893     case MOD_SIZ:    /* PCRE2_SIZE value */
3894     if (!isdigit(*pp)) goto INVALID_VALUE;
3895     uli = strtoul((const char *)pp, &endptr, 10);
3896     if (uli == ULONG_MAX) goto INVALID_VALUE;
3897 #if ULONG_MAX > PCRE2_SIZE_MAX
3898     if (uli > PCRE2_SIZE_MAX) goto INVALID_VALUE;
3899 #endif
3900     *((PCRE2_SIZE *)field) = (PCRE2_SIZE)uli;
3901     pp = (uint8_t *)endptr;
3902     break;
3903 
3904     case MOD_IND:    /* Unsigned integer with default */
3905     if (len == 0)
3906       {
3907       *((uint32_t *)field) = (uint32_t)(m->value);
3908       break;
3909       }
3910     /* Fall through */
3911 
3912     case MOD_INT:    /* Unsigned integer */
3913     if (!isdigit(*pp)) goto INVALID_VALUE;
3914     uli = strtoul((const char *)pp, &endptr, 10);
3915     if (U32OVERFLOW(uli)) goto INVALID_VALUE;
3916     *((uint32_t *)field) = (uint32_t)uli;
3917     pp = (uint8_t *)endptr;
3918     break;
3919 
3920     case MOD_INS:   /* Signed integer */
3921     if (!isdigit(*pp) && *pp != '-') goto INVALID_VALUE;
3922     li = strtol((const char *)pp, &endptr, 10);
3923     if (S32OVERFLOW(li)) goto INVALID_VALUE;
3924     *((int32_t *)field) = (int32_t)li;
3925     pp = (uint8_t *)endptr;
3926     break;
3927 
3928     case MOD_NL:
3929     for (i = 0; i < sizeof(newlines)/sizeof(char *); i++)
3930       if (len == strlen(newlines[i]) &&
3931         strncmpic(pp, (const uint8_t *)newlines[i], len) == 0) break;
3932     if (i >= sizeof(newlines)/sizeof(char *)) goto INVALID_VALUE;
3933     if (i == 0)
3934       {
3935       *((uint16_t *)field) = NEWLINE_DEFAULT;
3936       if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 &= ~CTL2_NL_SET;
3937         else dctl->control2 &= ~CTL2_NL_SET;
3938       }
3939     else
3940       {
3941       *((uint16_t *)field) = i;
3942       if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 |= CTL2_NL_SET;
3943         else dctl->control2 |= CTL2_NL_SET;
3944       }
3945     pp = ep;
3946     break;
3947 
3948     case MOD_NN:              /* Name or (signed) number; may be several */
3949     if (isdigit(*pp) || *pp == '-')
3950       {
3951       int ct = MAXCPYGET - 1;
3952       int32_t value;
3953       li = strtol((const char *)pp, &endptr, 10);
3954       if (S32OVERFLOW(li)) goto INVALID_VALUE;
3955       value = (int32_t)li;
3956       field = (char *)field - m->offset + m->value;      /* Adjust field ptr */
3957       if (value >= 0)                                    /* Add new number */
3958         {
3959         while (*((int32_t *)field) >= 0 && ct-- > 0)   /* Skip previous */
3960           field = (char *)field + sizeof(int32_t);
3961         if (ct <= 0)
3962           {
3963           fprintf(outfile, "** Too many numeric '%s' modifiers\n", m->name);
3964           return FALSE;
3965           }
3966         }
3967       *((int32_t *)field) = value;
3968       if (ct > 0) ((int32_t *)field)[1] = -1;
3969       pp = (uint8_t *)endptr;
3970       }
3971 
3972     /* Multiple strings are put end to end. */
3973 
3974     else
3975       {
3976       char *nn = (char *)field;
3977       if (len > 0)                    /* Add new name */
3978         {
3979         if (len > MAX_NAME_SIZE)
3980           {
3981           fprintf(outfile, "** Group name in '%s' is too long\n", m->name);
3982           return FALSE;
3983           }
3984         while (*nn != 0) nn += strlen(nn) + 1;
3985         if (nn + len + 2 - (char *)field > LENCPYGET)
3986           {
3987           fprintf(outfile, "** Too many characters in named '%s' modifiers\n",
3988             m->name);
3989           return FALSE;
3990           }
3991         memcpy(nn, pp, len);
3992         }
3993       nn[len] = 0 ;
3994       nn[len+1] = 0;
3995       pp = ep;
3996       }
3997     break;
3998 
3999     case MOD_STR:
4000     if (len + 1 > m->value)
4001       {
4002       fprintf(outfile, "** Overlong value for '%s' (max %d code units)\n",
4003         m->name, m->value - 1);
4004       return FALSE;
4005       }
4006     memcpy(field, pp, len);
4007     ((uint8_t *)field)[len] = 0;
4008     pp = ep;
4009     break;
4010     }
4011 
4012   if (*pp != ',' && *pp != '\n' && *pp != ' ' && *pp != 0)
4013     {
4014     fprintf(outfile, "** Comma expected after modifier item '%s'\n", m->name);
4015     return FALSE;
4016     }
4017 
4018   p = pp;
4019   first = FALSE;
4020 
4021   if (ctx == CTX_POPPAT &&
4022      (pctl->options != 0 ||
4023       pctl->tables_id != 0 ||
4024       pctl->locale[0] != 0 ||
4025       (pctl->control & NOTPOP_CONTROLS) != 0))
4026     {
4027     fprintf(outfile, "** '%s' is not valid here\n", m->name);
4028     return FALSE;
4029     }
4030   }
4031 
4032 return TRUE;
4033 
4034 INVALID_VALUE:
4035 fprintf(outfile, "** Invalid value in '%.*s'\n", (int)(ep-p), p);
4036 return FALSE;
4037 }
4038 
4039 
4040 /*************************************************
4041 *             Get info from a pattern            *
4042 *************************************************/
4043 
4044 /* A wrapped call to pcre2_pattern_info(), applied to the current compiled
4045 pattern.
4046 
4047 Arguments:
4048   what        code for the required information
4049   where       where to put the answer
4050   unsetok     PCRE2_ERROR_UNSET is an "expected" result
4051 
4052 Returns:      the return from pcre2_pattern_info()
4053 */
4054 
4055 static int
pattern_info(int what,void * where,BOOL unsetok)4056 pattern_info(int what, void *where, BOOL unsetok)
4057 {
4058 int rc;
4059 PCRE2_PATTERN_INFO(rc, compiled_code, what, NULL);  /* Exercise the code */
4060 PCRE2_PATTERN_INFO(rc, compiled_code, what, where);
4061 if (rc >= 0) return 0;
4062 if (rc != PCRE2_ERROR_UNSET || !unsetok)
4063   {
4064   fprintf(outfile, "Error %d from pcre2_pattern_info_%d(%d)\n", rc, test_mode,
4065     what);
4066   if (rc == PCRE2_ERROR_BADMODE)
4067     fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
4068       "%d-bit mode\n", test_mode,
4069       8 * (FLD(compiled_code, flags) & PCRE2_MODE_MASK));
4070   }
4071 return rc;
4072 }
4073 
4074 
4075 
4076 #ifdef SUPPORT_PCRE2_8
4077 /*************************************************
4078 *             Show something in a list           *
4079 *************************************************/
4080 
4081 /* This function just helps to keep the code that uses it tidier. It's used for
4082 various lists of things where there needs to be introductory text before the
4083 first item. As these calls are all in the POSIX-support code, they happen only
4084 when 8-bit mode is supported. */
4085 
4086 static void
prmsg(const char ** msg,const char * s)4087 prmsg(const char **msg, const char *s)
4088 {
4089 fprintf(outfile, "%s %s", *msg, s);
4090 *msg = "";
4091 }
4092 #endif  /* SUPPORT_PCRE2_8 */
4093 
4094 
4095 
4096 /*************************************************
4097 *                Show control bits               *
4098 *************************************************/
4099 
4100 /* Called for mutually exclusive controls and for unsupported POSIX controls.
4101 Because the bits are unique, this can be used for both pattern and data control
4102 words.
4103 
4104 Arguments:
4105   controls    control bits
4106   controls2   more control bits
4107   before      text to print before
4108 
4109 Returns:      nothing
4110 */
4111 
4112 static void
show_controls(uint32_t controls,uint32_t controls2,const char * before)4113 show_controls(uint32_t controls, uint32_t controls2, const char *before)
4114 {
4115 fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
4116   before,
4117   ((controls & CTL_AFTERTEXT) != 0)? " aftertext" : "",
4118   ((controls & CTL_ALLAFTERTEXT) != 0)? " allaftertext" : "",
4119   ((controls & CTL_ALLCAPTURES) != 0)? " allcaptures" : "",
4120   ((controls & CTL_ALLUSEDTEXT) != 0)? " allusedtext" : "",
4121   ((controls2 & CTL2_ALLVECTOR) != 0)? " allvector" : "",
4122   ((controls & CTL_ALTGLOBAL) != 0)? " altglobal" : "",
4123   ((controls & CTL_BINCODE) != 0)? " bincode" : "",
4124   ((controls2 & CTL2_BSR_SET) != 0)? " bsr" : "",
4125   ((controls & CTL_CALLOUT_CAPTURE) != 0)? " callout_capture" : "",
4126   ((controls2 & CTL2_CALLOUT_EXTRA) != 0)? " callout_extra" : "",
4127   ((controls & CTL_CALLOUT_INFO) != 0)? " callout_info" : "",
4128   ((controls & CTL_CALLOUT_NONE) != 0)? " callout_none" : "",
4129   ((controls2 & CTL2_CALLOUT_NO_WHERE) != 0)? " callout_no_where" : "",
4130   ((controls & CTL_DFA) != 0)? " dfa" : "",
4131   ((controls & CTL_EXPAND) != 0)? " expand" : "",
4132   ((controls & CTL_FINDLIMITS) != 0)? " find_limits" : "",
4133   ((controls & CTL_FRAMESIZE) != 0)? " framesize" : "",
4134   ((controls & CTL_FULLBINCODE) != 0)? " fullbincode" : "",
4135   ((controls & CTL_GETALL) != 0)? " getall" : "",
4136   ((controls & CTL_GLOBAL) != 0)? " global" : "",
4137   ((controls & CTL_HEXPAT) != 0)? " hex" : "",
4138   ((controls & CTL_INFO) != 0)? " info" : "",
4139   ((controls & CTL_JITFAST) != 0)? " jitfast" : "",
4140   ((controls & CTL_JITVERIFY) != 0)? " jitverify" : "",
4141   ((controls & CTL_MARK) != 0)? " mark" : "",
4142   ((controls & CTL_MEMORY) != 0)? " memory" : "",
4143   ((controls2 & CTL2_NL_SET) != 0)? " newline" : "",
4144   ((controls & CTL_NULLCONTEXT) != 0)? " null_context" : "",
4145   ((controls2 & CTL2_NULL_REPLACEMENT) != 0)? " null_replacement" : "",
4146   ((controls2 & CTL2_NULL_SUBJECT) != 0)? " null_subject" : "",
4147   ((controls & CTL_POSIX) != 0)? " posix" : "",
4148   ((controls & CTL_POSIX_NOSUB) != 0)? " posix_nosub" : "",
4149   ((controls & CTL_PUSH) != 0)? " push" : "",
4150   ((controls & CTL_PUSHCOPY) != 0)? " pushcopy" : "",
4151   ((controls & CTL_PUSHTABLESCOPY) != 0)? " pushtablescopy" : "",
4152   ((controls & CTL_STARTCHAR) != 0)? " startchar" : "",
4153   ((controls2 & CTL2_SUBSTITUTE_CALLOUT) != 0)? " substitute_callout" : "",
4154   ((controls2 & CTL2_SUBSTITUTE_EXTENDED) != 0)? " substitute_extended" : "",
4155   ((controls2 & CTL2_SUBSTITUTE_LITERAL) != 0)? " substitute_literal" : "",
4156   ((controls2 & CTL2_SUBSTITUTE_MATCHED) != 0)? " substitute_matched" : "",
4157   ((controls2 & CTL2_SUBSTITUTE_OVERFLOW_LENGTH) != 0)? " substitute_overflow_length" : "",
4158   ((controls2 & CTL2_SUBSTITUTE_REPLACEMENT_ONLY) != 0)? " substitute_replacement_only" : "",
4159   ((controls2 & CTL2_SUBSTITUTE_UNKNOWN_UNSET) != 0)? " substitute_unknown_unset" : "",
4160   ((controls2 & CTL2_SUBSTITUTE_UNSET_EMPTY) != 0)? " substitute_unset_empty" : "",
4161   ((controls & CTL_USE_LENGTH) != 0)? " use_length" : "",
4162   ((controls & CTL_UTF8_INPUT) != 0)? " utf8_input" : "",
4163   ((controls & CTL_ZERO_TERMINATE) != 0)? " zero_terminate" : "");
4164 }
4165 
4166 
4167 
4168 /*************************************************
4169 *                Show compile options            *
4170 *************************************************/
4171 
4172 /* Called from show_pattern_info() and for unsupported POSIX options.
4173 
4174 Arguments:
4175   options     an options word
4176   before      text to print before
4177   after       text to print after
4178 
4179 Returns:      nothing
4180 */
4181 
4182 static void
show_compile_options(uint32_t options,const char * before,const char * after)4183 show_compile_options(uint32_t options, const char *before, const char *after)
4184 {
4185 if (options == 0) fprintf(outfile, "%s <none>%s", before, after);
4186 else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
4187   before,
4188   ((options & PCRE2_ALT_BSUX) != 0)? " alt_bsux" : "",
4189   ((options & PCRE2_ALT_CIRCUMFLEX) != 0)? " alt_circumflex" : "",
4190   ((options & PCRE2_ALT_VERBNAMES) != 0)? " alt_verbnames" : "",
4191   ((options & PCRE2_ALLOW_EMPTY_CLASS) != 0)? " allow_empty_class" : "",
4192   ((options & PCRE2_ANCHORED) != 0)? " anchored" : "",
4193   ((options & PCRE2_AUTO_CALLOUT) != 0)? " auto_callout" : "",
4194   ((options & PCRE2_CASELESS) != 0)? " caseless" : "",
4195   ((options & PCRE2_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
4196   ((options & PCRE2_DOTALL) != 0)? " dotall" : "",
4197   ((options & PCRE2_DUPNAMES) != 0)? " dupnames" : "",
4198   ((options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "",
4199   ((options & PCRE2_EXTENDED) != 0)? " extended" : "",
4200   ((options & PCRE2_EXTENDED_MORE) != 0)? " extended_more" : "",
4201   ((options & PCRE2_FIRSTLINE) != 0)? " firstline" : "",
4202   ((options & PCRE2_LITERAL) != 0)? " literal" : "",
4203   ((options & PCRE2_MATCH_INVALID_UTF) != 0)? " match_invalid_utf" : "",
4204   ((options & PCRE2_MATCH_UNSET_BACKREF) != 0)? " match_unset_backref" : "",
4205   ((options & PCRE2_MULTILINE) != 0)? " multiline" : "",
4206   ((options & PCRE2_NEVER_BACKSLASH_C) != 0)? " never_backslash_c" : "",
4207   ((options & PCRE2_NEVER_UCP) != 0)? " never_ucp" : "",
4208   ((options & PCRE2_NEVER_UTF) != 0)? " never_utf" : "",
4209   ((options & PCRE2_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
4210   ((options & PCRE2_NO_AUTO_POSSESS) != 0)? " no_auto_possess" : "",
4211   ((options & PCRE2_NO_DOTSTAR_ANCHOR) != 0)? " no_dotstar_anchor" : "",
4212   ((options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
4213   ((options & PCRE2_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
4214   ((options & PCRE2_UCP) != 0)? " ucp" : "",
4215   ((options & PCRE2_UNGREEDY) != 0)? " ungreedy" : "",
4216   ((options & PCRE2_USE_OFFSET_LIMIT) != 0)? " use_offset_limit" : "",
4217   ((options & PCRE2_UTF) != 0)? " utf" : "",
4218   after);
4219 }
4220 
4221 
4222 /*************************************************
4223 *           Show compile extra options           *
4224 *************************************************/
4225 
4226 /* Called from show_pattern_info() and for unsupported POSIX options.
4227 
4228 Arguments:
4229   options     an options word
4230   before      text to print before
4231   after       text to print after
4232 
4233 Returns:      nothing
4234 */
4235 
4236 static void
show_compile_extra_options(uint32_t options,const char * before,const char * after)4237 show_compile_extra_options(uint32_t options, const char *before,
4238   const char *after)
4239 {
4240 if (options == 0) fprintf(outfile, "%s <none>%s", before, after);
4241 else fprintf(outfile, "%s%s%s%s%s%s%s%s",
4242   before,
4243   ((options & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) != 0)? " allow_surrogate_escapes" : "",
4244   ((options & PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL) != 0)? " bad_escape_is_literal" : "",
4245   ((options & PCRE2_EXTRA_ALT_BSUX) != 0)? " extra_alt_bsux" : "",
4246   ((options & PCRE2_EXTRA_MATCH_WORD) != 0)? " match_word" : "",
4247   ((options & PCRE2_EXTRA_MATCH_LINE) != 0)? " match_line" : "",
4248   ((options & PCRE2_EXTRA_ESCAPED_CR_IS_LF) != 0)? " escaped_cr_is_lf" : "",
4249   after);
4250 }
4251 
4252 
4253 
4254 #ifdef SUPPORT_PCRE2_8
4255 /*************************************************
4256 *                Show match options              *
4257 *************************************************/
4258 
4259 /* Called for unsupported POSIX options. */
4260 
4261 static void
show_match_options(uint32_t options)4262 show_match_options(uint32_t options)
4263 {
4264 fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s",
4265   ((options & PCRE2_ANCHORED) != 0)? " anchored" : "",
4266   ((options & PCRE2_COPY_MATCHED_SUBJECT) != 0)? " copy_matched_subject" : "",
4267   ((options & PCRE2_DFA_RESTART) != 0)? " dfa_restart" : "",
4268   ((options & PCRE2_DFA_SHORTEST) != 0)? " dfa_shortest" : "",
4269   ((options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "",
4270   ((options & PCRE2_NO_JIT) != 0)? " no_jit" : "",
4271   ((options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
4272   ((options & PCRE2_NOTBOL) != 0)? " notbol" : "",
4273   ((options & PCRE2_NOTEMPTY) != 0)? " notempty" : "",
4274   ((options & PCRE2_NOTEMPTY_ATSTART) != 0)? " notempty_atstart" : "",
4275   ((options & PCRE2_NOTEOL) != 0)? " noteol" : "",
4276   ((options & PCRE2_PARTIAL_HARD) != 0)? " partial_hard" : "",
4277   ((options & PCRE2_PARTIAL_SOFT) != 0)? " partial_soft" : "");
4278 }
4279 #endif  /* SUPPORT_PCRE2_8 */
4280 
4281 
4282 
4283 /*************************************************
4284 *      Show memory usage info for a pattern      *
4285 *************************************************/
4286 
4287 static void
show_memory_info(void)4288 show_memory_info(void)
4289 {
4290 uint32_t name_count, name_entry_size;
4291 size_t size, cblock_size;
4292 
4293 /* One of the test_mode values will always be true, but to stop a compiler
4294 warning we must initialize cblock_size. */
4295 
4296 cblock_size = 0;
4297 #ifdef SUPPORT_PCRE2_8
4298 if (test_mode == PCRE8_MODE) cblock_size = sizeof(pcre2_real_code_8);
4299 #endif
4300 #ifdef SUPPORT_PCRE2_16
4301 if (test_mode == PCRE16_MODE) cblock_size = sizeof(pcre2_real_code_16);
4302 #endif
4303 #ifdef SUPPORT_PCRE2_32
4304 if (test_mode == PCRE32_MODE) cblock_size = sizeof(pcre2_real_code_32);
4305 #endif
4306 
4307 (void)pattern_info(PCRE2_INFO_SIZE, &size, FALSE);
4308 (void)pattern_info(PCRE2_INFO_NAMECOUNT, &name_count, FALSE);
4309 (void)pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &name_entry_size, FALSE);
4310 fprintf(outfile, "Memory allocation (code space): %d\n",
4311   (int)(size - name_count*name_entry_size*code_unit_size - cblock_size));
4312 if (pat_patctl.jit != 0)
4313   {
4314   (void)pattern_info(PCRE2_INFO_JITSIZE, &size, FALSE);
4315   fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)size);
4316   }
4317 }
4318 
4319 
4320 
4321 /*************************************************
4322 *       Show frame size info for a pattern       *
4323 *************************************************/
4324 
4325 static void
show_framesize(void)4326 show_framesize(void)
4327 {
4328 size_t frame_size;
4329 (void)pattern_info(PCRE2_INFO_FRAMESIZE, &frame_size, FALSE);
4330 fprintf(outfile, "Frame size for pcre2_match(): %d\n", (int)frame_size);
4331 }
4332 
4333 
4334 
4335 /*************************************************
4336 *         Get and output an error message        *
4337 *************************************************/
4338 
4339 static BOOL
print_error_message(int errorcode,const char * before,const char * after)4340 print_error_message(int errorcode, const char *before, const char *after)
4341 {
4342 int len;
4343 PCRE2_GET_ERROR_MESSAGE(len, errorcode, pbuffer);
4344 if (len < 0)
4345   {
4346   fprintf(outfile, "\n** pcre2test internal error: cannot interpret error "
4347     "number\n** Unexpected return (%d) from pcre2_get_error_message()\n", len);
4348   }
4349 else
4350   {
4351   fprintf(outfile, "%s", before);
4352   PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, outfile);
4353   fprintf(outfile, "%s", after);
4354   }
4355 return len >= 0;
4356 }
4357 
4358 
4359 /*************************************************
4360 *     Callback function for callout enumeration  *
4361 *************************************************/
4362 
4363 /* The only differences in the callout emumeration block for different code
4364 unit widths are that the pointers to the subject, the most recent MARK, and a
4365 callout argument string point to strings of the appropriate width. Casts can be
4366 used to deal with this.
4367 
4368 Argument:
4369   cb            pointer to enumerate block
4370   callout_data  user data
4371 
4372 Returns:    0
4373 */
4374 
callout_callback(pcre2_callout_enumerate_block_8 * cb,void * callout_data)4375 static int callout_callback(pcre2_callout_enumerate_block_8 *cb,
4376   void *callout_data)
4377 {
4378 uint32_t i;
4379 BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
4380 
4381 (void)callout_data;  /* Not currently displayed */
4382 
4383 fprintf(outfile, "Callout ");
4384 if (cb->callout_string != NULL)
4385   {
4386   uint32_t delimiter = CODE_UNIT(cb->callout_string, -1);
4387   fprintf(outfile, "%c", delimiter);
4388   PCHARSV(cb->callout_string, 0,
4389     cb->callout_string_length, utf, outfile);
4390   for (i = 0; callout_start_delims[i] != 0; i++)
4391     if (delimiter == callout_start_delims[i])
4392       {
4393       delimiter = callout_end_delims[i];
4394       break;
4395       }
4396   fprintf(outfile, "%c  ", delimiter);
4397   }
4398 else fprintf(outfile, "%d  ", cb->callout_number);
4399 
4400 fprintf(outfile, "%.*s\n",
4401   (int)((cb->next_item_length == 0)? 1 : cb->next_item_length),
4402   pbuffer8 + cb->pattern_position);
4403 
4404 return 0;
4405 }
4406 
4407 
4408 
4409 /*************************************************
4410 *        Show information about a pattern        *
4411 *************************************************/
4412 
4413 /* This function is called after a pattern has been compiled if any of the
4414 information-requesting controls have been set.
4415 
4416 Arguments:  none
4417 
4418 Returns:    PR_OK     continue processing next line
4419             PR_SKIP   skip to a blank line
4420             PR_ABEND  abort the pcre2test run
4421 */
4422 
4423 static int
show_pattern_info(void)4424 show_pattern_info(void)
4425 {
4426 uint32_t compile_options, overall_options, extra_options;
4427 BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
4428 
4429 if ((pat_patctl.control & (CTL_BINCODE|CTL_FULLBINCODE)) != 0)
4430   {
4431   fprintf(outfile, "------------------------------------------------------------------\n");
4432   PCRE2_PRINTINT((pat_patctl.control & CTL_FULLBINCODE) != 0);
4433   }
4434 
4435 if ((pat_patctl.control & CTL_INFO) != 0)
4436   {
4437   int rc;
4438   void *nametable;
4439   uint8_t *start_bits;
4440   BOOL heap_limit_set, match_limit_set, depth_limit_set;
4441   uint32_t backrefmax, bsr_convention, capture_count, first_ctype, first_cunit,
4442     hasbackslashc, hascrorlf, jchanged, last_ctype, last_cunit, match_empty,
4443     depth_limit, heap_limit, match_limit, minlength, nameentrysize, namecount,
4444     newline_convention;
4445 
4446   /* Exercise the error route. */
4447 
4448   PCRE2_PATTERN_INFO(rc, compiled_code, 999, NULL);
4449   (void)rc;
4450 
4451   /* These info requests may return PCRE2_ERROR_UNSET. */
4452 
4453   switch(pattern_info(PCRE2_INFO_HEAPLIMIT, &heap_limit, TRUE))
4454     {
4455     case 0:
4456     heap_limit_set = TRUE;
4457     break;
4458 
4459     case PCRE2_ERROR_UNSET:
4460     heap_limit_set = FALSE;
4461     break;
4462 
4463     default:
4464     return PR_ABEND;
4465     }
4466 
4467   switch(pattern_info(PCRE2_INFO_MATCHLIMIT, &match_limit, TRUE))
4468     {
4469     case 0:
4470     match_limit_set = TRUE;
4471     break;
4472 
4473     case PCRE2_ERROR_UNSET:
4474     match_limit_set = FALSE;
4475     break;
4476 
4477     default:
4478     return PR_ABEND;
4479     }
4480 
4481   switch(pattern_info(PCRE2_INFO_DEPTHLIMIT, &depth_limit, TRUE))
4482     {
4483     case 0:
4484     depth_limit_set = TRUE;
4485     break;
4486 
4487     case PCRE2_ERROR_UNSET:
4488     depth_limit_set = FALSE;
4489     break;
4490 
4491     default:
4492     return PR_ABEND;
4493     }
4494 
4495   /* These info requests should always succeed. */
4496 
4497   if (pattern_info(PCRE2_INFO_BACKREFMAX, &backrefmax, FALSE) +
4498       pattern_info(PCRE2_INFO_BSR, &bsr_convention, FALSE) +
4499       pattern_info(PCRE2_INFO_CAPTURECOUNT, &capture_count, FALSE) +
4500       pattern_info(PCRE2_INFO_FIRSTBITMAP, &start_bits, FALSE) +
4501       pattern_info(PCRE2_INFO_FIRSTCODEUNIT, &first_cunit, FALSE) +
4502       pattern_info(PCRE2_INFO_FIRSTCODETYPE, &first_ctype, FALSE) +
4503       pattern_info(PCRE2_INFO_HASBACKSLASHC, &hasbackslashc, FALSE) +
4504       pattern_info(PCRE2_INFO_HASCRORLF, &hascrorlf, FALSE) +
4505       pattern_info(PCRE2_INFO_JCHANGED, &jchanged, FALSE) +
4506       pattern_info(PCRE2_INFO_LASTCODEUNIT, &last_cunit, FALSE) +
4507       pattern_info(PCRE2_INFO_LASTCODETYPE, &last_ctype, FALSE) +
4508       pattern_info(PCRE2_INFO_MATCHEMPTY, &match_empty, FALSE) +
4509       pattern_info(PCRE2_INFO_MINLENGTH, &minlength, FALSE) +
4510       pattern_info(PCRE2_INFO_NAMECOUNT, &namecount, FALSE) +
4511       pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &nameentrysize, FALSE) +
4512       pattern_info(PCRE2_INFO_NAMETABLE, &nametable, FALSE) +
4513       pattern_info(PCRE2_INFO_NEWLINE, &newline_convention, FALSE)
4514       != 0)
4515     return PR_ABEND;
4516 
4517   fprintf(outfile, "Capture group count = %d\n", capture_count);
4518 
4519   if (backrefmax > 0)
4520     fprintf(outfile, "Max back reference = %d\n", backrefmax);
4521 
4522   if (maxlookbehind > 0)
4523     fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
4524 
4525   if (heap_limit_set)
4526     fprintf(outfile, "Heap limit = %u\n", heap_limit);
4527 
4528   if (match_limit_set)
4529     fprintf(outfile, "Match limit = %u\n", match_limit);
4530 
4531   if (depth_limit_set)
4532     fprintf(outfile, "Depth limit = %u\n", depth_limit);
4533 
4534   if (namecount > 0)
4535     {
4536     fprintf(outfile, "Named capture groups:\n");
4537     for (; namecount > 0; namecount--)
4538       {
4539       int imm2_size = test_mode == PCRE8_MODE ? 2 : 1;
4540       uint32_t length = (uint32_t)STRLEN(nametable + imm2_size);
4541       fprintf(outfile, "  ");
4542 
4543       /* In UTF mode the name may be a UTF string containing non-ASCII
4544       letters and digits. We must output it as a UTF-8 string. In non-UTF mode,
4545       use the normal string printing functions, which use escapes for all
4546       non-ASCII characters. */
4547 
4548       if (utf)
4549         {
4550 #ifdef SUPPORT_PCRE2_32
4551         if (test_mode == PCRE32_MODE)
4552           {
4553           PCRE2_SPTR32 nameptr = (PCRE2_SPTR32)nametable + imm2_size;
4554           while (*nameptr != 0)
4555             {
4556             uint8_t u8buff[6];
4557             int len = ord2utf8(*nameptr++, u8buff);
4558             fprintf(outfile, "%.*s", len, u8buff);
4559             }
4560           }
4561 #endif
4562 #ifdef SUPPORT_PCRE2_16
4563         if (test_mode == PCRE16_MODE)
4564           {
4565           PCRE2_SPTR16 nameptr = (PCRE2_SPTR16)nametable + imm2_size;
4566           while (*nameptr != 0)
4567             {
4568             int len;
4569             uint8_t u8buff[6];
4570             uint32_t c = *nameptr++ & 0xffff;
4571             if (c >= 0xD800 && c < 0xDC00)
4572               c = ((c & 0x3ff) << 10) + (*nameptr++ & 0x3ff) + 0x10000;
4573             len = ord2utf8(c, u8buff);
4574             fprintf(outfile, "%.*s", len, u8buff);
4575             }
4576           }
4577 #endif
4578 #ifdef SUPPORT_PCRE2_8
4579         if (test_mode == PCRE8_MODE)
4580           fprintf(outfile, "%s", (PCRE2_SPTR8)nametable + imm2_size);
4581 #endif
4582         }
4583       else  /* Not UTF mode */
4584         {
4585         PCHARSV(nametable, imm2_size, length, FALSE, outfile);
4586         }
4587 
4588       while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
4589 
4590 #ifdef SUPPORT_PCRE2_32
4591       if (test_mode == PCRE32_MODE)
4592         fprintf(outfile, "%3d\n", (int)(((PCRE2_SPTR32)nametable)[0]));
4593 #endif
4594 #ifdef SUPPORT_PCRE2_16
4595       if (test_mode == PCRE16_MODE)
4596         fprintf(outfile, "%3d\n", (int)(((PCRE2_SPTR16)nametable)[0]));
4597 #endif
4598 #ifdef SUPPORT_PCRE2_8
4599       if (test_mode == PCRE8_MODE)
4600         fprintf(outfile, "%3d\n", (int)(
4601         ((((PCRE2_SPTR8)nametable)[0]) << 8) | ((PCRE2_SPTR8)nametable)[1]));
4602 #endif
4603 
4604       nametable = (void*)((PCRE2_SPTR8)nametable + nameentrysize * code_unit_size);
4605       }
4606     }
4607 
4608   if (hascrorlf)     fprintf(outfile, "Contains explicit CR or LF match\n");
4609   if (hasbackslashc) fprintf(outfile, "Contains \\C\n");
4610   if (match_empty)   fprintf(outfile, "May match empty string\n");
4611 
4612   pattern_info(PCRE2_INFO_ARGOPTIONS, &compile_options, FALSE);
4613   pattern_info(PCRE2_INFO_ALLOPTIONS, &overall_options, FALSE);
4614   pattern_info(PCRE2_INFO_EXTRAOPTIONS, &extra_options, FALSE);
4615 
4616   /* Remove UTF/UCP if they were there only because of forbid_utf. This saves
4617   cluttering up the verification output of non-UTF test files. */
4618 
4619   if ((pat_patctl.options & PCRE2_NEVER_UTF) == 0)
4620     {
4621     compile_options &= ~PCRE2_NEVER_UTF;
4622     overall_options &= ~PCRE2_NEVER_UTF;
4623     }
4624 
4625   if ((pat_patctl.options & PCRE2_NEVER_UCP) == 0)
4626     {
4627     compile_options &= ~PCRE2_NEVER_UCP;
4628     overall_options &= ~PCRE2_NEVER_UCP;
4629     }
4630 
4631   if ((compile_options|overall_options) != 0)
4632     {
4633     if (compile_options == overall_options)
4634       show_compile_options(compile_options, "Options:", "\n");
4635     else
4636       {
4637       show_compile_options(compile_options, "Compile options:", "\n");
4638       show_compile_options(overall_options, "Overall options:", "\n");
4639       }
4640     }
4641 
4642   if (extra_options != 0)
4643     show_compile_extra_options(extra_options, "Extra options:", "\n");
4644 
4645   if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
4646 
4647   if ((pat_patctl.control2 & CTL2_BSR_SET) != 0 ||
4648       (FLD(compiled_code, flags) & PCRE2_BSR_SET) != 0)
4649     fprintf(outfile, "\\R matches %s\n", (bsr_convention == PCRE2_BSR_UNICODE)?
4650       "any Unicode newline" : "CR, LF, or CRLF");
4651 
4652   if ((FLD(compiled_code, flags) & PCRE2_NL_SET) != 0)
4653     {
4654     switch (newline_convention)
4655       {
4656       case PCRE2_NEWLINE_CR:
4657       fprintf(outfile, "Forced newline is CR\n");
4658       break;
4659 
4660       case PCRE2_NEWLINE_LF:
4661       fprintf(outfile, "Forced newline is LF\n");
4662       break;
4663 
4664       case PCRE2_NEWLINE_CRLF:
4665       fprintf(outfile, "Forced newline is CRLF\n");
4666       break;
4667 
4668       case PCRE2_NEWLINE_ANYCRLF:
4669       fprintf(outfile, "Forced newline is CR, LF, or CRLF\n");
4670       break;
4671 
4672       case PCRE2_NEWLINE_ANY:
4673       fprintf(outfile, "Forced newline is any Unicode newline\n");
4674       break;
4675 
4676       case PCRE2_NEWLINE_NUL:
4677       fprintf(outfile, "Forced newline is NUL\n");
4678       break;
4679 
4680       default:
4681       break;
4682       }
4683     }
4684 
4685   if (first_ctype == 2)
4686     {
4687     fprintf(outfile, "First code unit at start or follows newline\n");
4688     }
4689   else if (first_ctype == 1)
4690     {
4691     const char *caseless =
4692       ((FLD(compiled_code, flags) & PCRE2_FIRSTCASELESS) == 0)?
4693       "" : " (caseless)";
4694     if (PRINTOK(first_cunit))
4695       fprintf(outfile, "First code unit = \'%c\'%s\n", first_cunit, caseless);
4696     else
4697       {
4698       fprintf(outfile, "First code unit = ");
4699       pchar(first_cunit, FALSE, outfile);
4700       fprintf(outfile, "%s\n", caseless);
4701       }
4702     }
4703   else if (start_bits != NULL)
4704     {
4705     int i;
4706     int c = 24;
4707     fprintf(outfile, "Starting code units: ");
4708     for (i = 0; i < 256; i++)
4709       {
4710       if ((start_bits[i/8] & (1u << (i&7))) != 0)
4711         {
4712         if (c > 75)
4713           {
4714           fprintf(outfile, "\n  ");
4715           c = 2;
4716           }
4717         if (PRINTOK(i) && i != ' ')
4718           {
4719           fprintf(outfile, "%c ", i);
4720           c += 2;
4721           }
4722         else
4723           {
4724           fprintf(outfile, "\\x%02x ", i);
4725           c += 5;
4726           }
4727         }
4728       }
4729     fprintf(outfile, "\n");
4730     }
4731 
4732   if (last_ctype != 0)
4733     {
4734     const char *caseless =
4735       ((FLD(compiled_code, flags) & PCRE2_LASTCASELESS) == 0)?
4736       "" : " (caseless)";
4737     if (PRINTOK(last_cunit))
4738       fprintf(outfile, "Last code unit = \'%c\'%s\n", last_cunit, caseless);
4739     else
4740       {
4741       fprintf(outfile, "Last code unit = ");
4742       pchar(last_cunit, FALSE, outfile);
4743       fprintf(outfile, "%s\n", caseless);
4744       }
4745     }
4746 
4747   if ((FLD(compiled_code, overall_options) & PCRE2_NO_START_OPTIMIZE) == 0)
4748     fprintf(outfile, "Subject length lower bound = %d\n", minlength);
4749 
4750   if (pat_patctl.jit != 0 && (pat_patctl.control & CTL_JITVERIFY) != 0)
4751     {
4752     if (FLD(compiled_code, executable_jit) != NULL)
4753       fprintf(outfile, "JIT compilation was successful\n");
4754     else
4755       {
4756 #ifdef SUPPORT_JIT
4757       fprintf(outfile, "JIT compilation was not successful");
4758       if (jitrc != 0 && !print_error_message(jitrc, " (", ")"))
4759         return PR_ABEND;
4760       fprintf(outfile, "\n");
4761 #else
4762       fprintf(outfile, "JIT support is not available in this version of PCRE2\n");
4763 #endif
4764       }
4765     }
4766   }
4767 
4768 if ((pat_patctl.control & CTL_CALLOUT_INFO) != 0)
4769   {
4770   int errorcode;
4771   PCRE2_CALLOUT_ENUMERATE(errorcode, callout_callback, 0);
4772   if (errorcode != 0)
4773     {
4774     fprintf(outfile, "Callout enumerate failed: error %d: ", errorcode);
4775     if (errorcode < 0 && !print_error_message(errorcode, "", "\n"))
4776       return PR_ABEND;
4777     return PR_SKIP;
4778     }
4779   }
4780 
4781 return PR_OK;
4782 }
4783 
4784 
4785 
4786 /*************************************************
4787 *              Handle serialization error        *
4788 *************************************************/
4789 
4790 /* Print an error message after a serialization failure.
4791 
4792 Arguments:
4793   rc         the error code
4794   msg        an initial message for what failed
4795 
4796 Returns:     FALSE if print_error_message() fails
4797 */
4798 
4799 static BOOL
serial_error(int rc,const char * msg)4800 serial_error(int rc, const char *msg)
4801 {
4802 fprintf(outfile, "%s failed: error %d: ", msg, rc);
4803 return print_error_message(rc, "", "\n");
4804 }
4805 
4806 
4807 
4808 /*************************************************
4809 *        Open file for save/load commands        *
4810 *************************************************/
4811 
4812 /* This function decodes the file name and opens the file.
4813 
4814 Arguments:
4815   buffptr     point after the #command
4816   mode        open mode
4817   fptr        points to the FILE variable
4818   name        name of # command
4819 
4820 Returns:      PR_OK or PR_ABEND
4821 */
4822 
4823 static int
open_file(uint8_t * buffptr,const char * mode,FILE ** fptr,const char * name)4824 open_file(uint8_t *buffptr, const char *mode, FILE **fptr, const char *name)
4825 {
4826 char *endf;
4827 char *filename = (char *)buffptr;
4828 while (isspace(*filename)) filename++;
4829 endf = filename + strlen8(filename);
4830 while (endf > filename && isspace(endf[-1])) endf--;
4831 
4832 if (endf == filename)
4833   {
4834   fprintf(outfile, "** File name expected after %s\n", name);
4835   return PR_ABEND;
4836   }
4837 
4838 *endf = 0;
4839 *fptr = fopen((const char *)filename, mode);
4840 if (*fptr == NULL)
4841   {
4842   fprintf(outfile, "** Failed to open '%s': %s\n", filename, strerror(errno));
4843   return PR_ABEND;
4844   }
4845 
4846 return PR_OK;
4847 }
4848 
4849 
4850 
4851 /*************************************************
4852 *               Process command line             *
4853 *************************************************/
4854 
4855 /* This function is called for lines beginning with # and a character that is
4856 not ! or whitespace, when encountered between tests, which means that there is
4857 no compiled pattern (compiled_code is NULL). The line is in buffer.
4858 
4859 Arguments:  none
4860 
4861 Returns:    PR_OK     continue processing next line
4862             PR_SKIP   skip to a blank line
4863             PR_ABEND  abort the pcre2test run
4864 */
4865 
4866 static int
process_command(void)4867 process_command(void)
4868 {
4869 FILE *f;
4870 PCRE2_SIZE serial_size;
4871 size_t i;
4872 int rc, cmd, cmdlen, yield;
4873 uint16_t first_listed_newline;
4874 const char *cmdname;
4875 uint8_t *argptr, *serial;
4876 
4877 yield = PR_OK;
4878 cmd = CMD_UNKNOWN;
4879 cmdlen = 0;
4880 
4881 for (i = 0; i < cmdlistcount; i++)
4882   {
4883   cmdname = cmdlist[i].name;
4884   cmdlen = strlen(cmdname);
4885   if (strncmp((char *)(buffer+1), cmdname, cmdlen) == 0 &&
4886       isspace(buffer[cmdlen+1]))
4887     {
4888     cmd = cmdlist[i].value;
4889     break;
4890     }
4891   }
4892 
4893 argptr = buffer + cmdlen + 1;
4894 
4895 if (restrict_for_perl_test && cmd != CMD_PATTERN && cmd != CMD_SUBJECT)
4896   {
4897   fprintf(outfile, "** #%s is not allowed after #perltest\n", cmdname);
4898   return PR_ABEND;
4899   }
4900 
4901 switch(cmd)
4902   {
4903   case CMD_UNKNOWN:
4904   fprintf(outfile, "** Unknown command: %s", buffer);
4905   break;
4906 
4907   case CMD_FORBID_UTF:
4908   forbid_utf = PCRE2_NEVER_UTF|PCRE2_NEVER_UCP;
4909   break;
4910 
4911   case CMD_PERLTEST:
4912   restrict_for_perl_test = TRUE;
4913   break;
4914 
4915   /* Set default pattern modifiers */
4916 
4917   case CMD_PATTERN:
4918   (void)decode_modifiers(argptr, CTX_DEFPAT, &def_patctl, NULL);
4919   if (def_patctl.jit == 0 && (def_patctl.control & CTL_JITVERIFY) != 0)
4920     def_patctl.jit = JIT_DEFAULT;
4921   break;
4922 
4923   /* Set default subject modifiers */
4924 
4925   case CMD_SUBJECT:
4926   (void)decode_modifiers(argptr, CTX_DEFDAT, NULL, &def_datctl);
4927   break;
4928 
4929   /* Check the default newline, and if not one of those listed, set up the
4930   first one to be forced. An empty list unsets. */
4931 
4932   case CMD_NEWLINE_DEFAULT:
4933   local_newline_default = 0;   /* Unset */
4934   first_listed_newline = 0;
4935   for (;;)
4936     {
4937     while (isspace(*argptr)) argptr++;
4938     if (*argptr == 0) break;
4939     for (i = 1; i < sizeof(newlines)/sizeof(char *); i++)
4940       {
4941       size_t nlen = strlen(newlines[i]);
4942       if (strncmpic(argptr, (const uint8_t *)newlines[i], nlen) == 0 &&
4943           isspace(argptr[nlen]))
4944         {
4945         if (i == NEWLINE_DEFAULT) return PR_OK;  /* Default is valid */
4946         if (first_listed_newline == 0) first_listed_newline = i;
4947         }
4948       }
4949     while (*argptr != 0 && !isspace(*argptr)) argptr++;
4950     }
4951   local_newline_default = first_listed_newline;
4952   break;
4953 
4954   /* Pop or copy a compiled pattern off the stack. Modifiers that do not affect
4955   the compiled pattern (e.g. to give information) are permitted. The default
4956   pattern modifiers are ignored. */
4957 
4958   case CMD_POP:
4959   case CMD_POPCOPY:
4960   if (patstacknext <= 0)
4961     {
4962     fprintf(outfile, "** Can't pop off an empty stack\n");
4963     return PR_SKIP;
4964     }
4965   memset(&pat_patctl, 0, sizeof(patctl));   /* Completely unset */
4966   if (!decode_modifiers(argptr, CTX_POPPAT, &pat_patctl, NULL))
4967     return PR_SKIP;
4968 
4969   if (cmd == CMD_POP)
4970     {
4971     SET(compiled_code, patstack[--patstacknext]);
4972     }
4973   else
4974     {
4975     PCRE2_CODE_COPY_FROM_VOID(compiled_code, patstack[patstacknext - 1]);
4976     }
4977 
4978   if (pat_patctl.jit != 0)
4979     {
4980     PCRE2_JIT_COMPILE(jitrc, compiled_code, pat_patctl.jit);
4981     }
4982   if ((pat_patctl.control & CTL_MEMORY) != 0) show_memory_info();
4983   if ((pat_patctl.control & CTL_FRAMESIZE) != 0) show_framesize();
4984   if ((pat_patctl.control & CTL_ANYINFO) != 0)
4985     {
4986     rc = show_pattern_info();
4987     if (rc != PR_OK) return rc;
4988     }
4989   break;
4990 
4991   /* Save the stack of compiled patterns to a file, then empty the stack. */
4992 
4993   case CMD_SAVE:
4994   if (patstacknext <= 0)
4995     {
4996     fprintf(outfile, "** No stacked patterns to save\n");
4997     return PR_OK;
4998     }
4999 
5000   rc = open_file(argptr+1, BINARY_OUTPUT_MODE, &f, "#save");
5001   if (rc != PR_OK) return rc;
5002 
5003   PCRE2_SERIALIZE_ENCODE(rc, patstack, patstacknext, &serial, &serial_size,
5004     general_context);
5005   if (rc < 0)
5006     {
5007     fclose(f);
5008     if (!serial_error(rc, "Serialization")) return PR_ABEND;
5009     break;
5010     }
5011 
5012   /* Write the length at the start of the file to make it straightforward to
5013   get the right memory when re-loading. This saves having to read the file size
5014   in different operating systems. To allow for different endianness (even
5015   though reloading with the opposite endianness does not work), write the
5016   length byte-by-byte. */
5017 
5018   for (i = 0; i < 4; i++) fputc((serial_size >> (i*8)) & 255, f);
5019   if (fwrite(serial, 1, serial_size, f) != serial_size)
5020     {
5021     fprintf(outfile, "** Wrong return from fwrite()\n");
5022     fclose(f);
5023     return PR_ABEND;
5024     }
5025 
5026   fclose(f);
5027   PCRE2_SERIALIZE_FREE(serial);
5028   while(patstacknext > 0)
5029     {
5030     SET(compiled_code, patstack[--patstacknext]);
5031     SUB1(pcre2_code_free, compiled_code);
5032     }
5033   SET(compiled_code, NULL);
5034   break;
5035 
5036   /* Load a set of compiled patterns from a file onto the stack */
5037 
5038   case CMD_LOAD:
5039   rc = open_file(argptr+1, BINARY_INPUT_MODE, &f, "#load");
5040   if (rc != PR_OK) return rc;
5041 
5042   serial_size = 0;
5043   for (i = 0; i < 4; i++) serial_size |= fgetc(f) << (i*8);
5044 
5045   serial = malloc(serial_size);
5046   if (serial == NULL)
5047     {
5048     fprintf(outfile, "** Failed to get memory (size %" SIZ_FORM ") for #load\n",
5049       serial_size);
5050     fclose(f);
5051     return PR_ABEND;
5052     }
5053 
5054   i = fread(serial, 1, serial_size, f);
5055   fclose(f);
5056 
5057   if (i != serial_size)
5058     {
5059     fprintf(outfile, "** Wrong return from fread()\n");
5060     yield = PR_ABEND;
5061     }
5062   else
5063     {
5064     PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(rc, serial);
5065     if (rc < 0)
5066       {
5067       if (!serial_error(rc, "Get number of codes")) yield = PR_ABEND;
5068       }
5069     else
5070       {
5071       if (rc + patstacknext > PATSTACKSIZE)
5072         {
5073         fprintf(outfile, "** Not enough space on pattern stack for %d pattern%s\n",
5074           rc, (rc == 1)? "" : "s");
5075         rc = PATSTACKSIZE - patstacknext;
5076         fprintf(outfile, "** Decoding %d pattern%s\n", rc,
5077           (rc == 1)? "" : "s");
5078         }
5079       PCRE2_SERIALIZE_DECODE(rc, patstack + patstacknext, rc, serial,
5080         general_context);
5081       if (rc < 0)
5082         {
5083         if (!serial_error(rc, "Deserialization")) yield = PR_ABEND;
5084         }
5085       else patstacknext += rc;
5086       }
5087     }
5088 
5089   free(serial);
5090   break;
5091 
5092   /* Load a set of binary tables into tables3. */
5093 
5094   case CMD_LOADTABLES:
5095   rc = open_file(argptr+1, BINARY_INPUT_MODE, &f, "#loadtables");
5096   if (rc != PR_OK) return rc;
5097 
5098   if (tables3 == NULL)
5099     {
5100     (void)PCRE2_CONFIG(PCRE2_CONFIG_TABLES_LENGTH, &loadtables_length);
5101     tables3 = malloc(loadtables_length);
5102     }
5103 
5104   if (tables3 == NULL)
5105     {
5106     fprintf(outfile, "** Failed: malloc failed for #loadtables\n");
5107     yield = PR_ABEND;
5108     }
5109   else if (fread(tables3, 1, loadtables_length, f) != loadtables_length)
5110     {
5111     fprintf(outfile, "** Wrong return from fread()\n");
5112     yield = PR_ABEND;
5113     }
5114 
5115   fclose(f);
5116   break;
5117   }
5118 
5119 return yield;
5120 }
5121 
5122 
5123 
5124 /*************************************************
5125 *               Process pattern line             *
5126 *************************************************/
5127 
5128 /* This function is called when the input buffer contains the start of a
5129 pattern. The first character is known to be a valid delimiter. The pattern is
5130 read, modifiers are interpreted, and a suitable local context is set up for
5131 this test. The pattern is then compiled.
5132 
5133 Arguments:  none
5134 
5135 Returns:    PR_OK     continue processing next line
5136             PR_SKIP   skip to a blank line
5137             PR_ABEND  abort the pcre2test run
5138 */
5139 
5140 static int
process_pattern(void)5141 process_pattern(void)
5142 {
5143 BOOL utf;
5144 uint32_t k;
5145 uint8_t *p = buffer;
5146 unsigned int delimiter = *p++;
5147 int errorcode;
5148 void *use_pat_context;
5149 uint32_t use_forbid_utf = forbid_utf;
5150 PCRE2_SIZE patlen;
5151 PCRE2_SIZE valgrind_access_length;
5152 PCRE2_SIZE erroroffset;
5153 
5154 /* The perltest.sh script supports only / as a delimiter. */
5155 
5156 if (restrict_for_perl_test && delimiter != '/')
5157   {
5158   fprintf(outfile, "** The only allowed delimiter after #perltest is '/'\n");
5159   return PR_ABEND;
5160   }
5161 
5162 /* Initialize the context and pattern/data controls for this test from the
5163 defaults. */
5164 
5165 PATCTXCPY(pat_context, default_pat_context);
5166 memcpy(&pat_patctl, &def_patctl, sizeof(patctl));
5167 
5168 /* Find the end of the pattern, reading more lines if necessary. */
5169 
5170 for(;;)
5171   {
5172   while (*p != 0)
5173     {
5174     if (*p == '\\' && p[1] != 0) p++;
5175       else if (*p == delimiter) break;
5176     p++;
5177     }
5178   if (*p != 0) break;
5179   if ((p = extend_inputline(infile, p, "    > ")) == NULL)
5180     {
5181     fprintf(outfile, "** Unexpected EOF\n");
5182     return PR_ABEND;
5183     }
5184   if (!INTERACTIVE(infile)) fprintf(outfile, "%s", (char *)p);
5185   }
5186 
5187 /* If the first character after the delimiter is backslash, make the pattern
5188 end with backslash. This is purely to provide a way of testing for the error
5189 message when a pattern ends with backslash. */
5190 
5191 if (p[1] == '\\') *p++ = '\\';
5192 
5193 /* Terminate the pattern at the delimiter, and compute the length. */
5194 
5195 *p++ = 0;
5196 patlen = p - buffer - 2;
5197 
5198 /* Look for modifiers and options after the final delimiter. */
5199 
5200 if (!decode_modifiers(p, CTX_PAT, &pat_patctl, NULL)) return PR_SKIP;
5201 
5202 /* Note that the match_invalid_utf option also sets utf when passed to
5203 pcre2_compile(). */
5204 
5205 utf = (pat_patctl.options & (PCRE2_UTF|PCRE2_MATCH_INVALID_UTF)) != 0;
5206 
5207 /* The utf8_input modifier is not allowed in 8-bit mode, and is mutually
5208 exclusive with the utf modifier. */
5209 
5210 if ((pat_patctl.control & CTL_UTF8_INPUT) != 0)
5211   {
5212   if (test_mode == PCRE8_MODE)
5213     {
5214     fprintf(outfile, "** The utf8_input modifier is not allowed in 8-bit mode\n");
5215     return PR_SKIP;
5216     }
5217   if (utf)
5218     {
5219     fprintf(outfile, "** The utf and utf8_input modifiers are mutually exclusive\n");
5220     return PR_SKIP;
5221     }
5222   }
5223 
5224 /* The convert and posix modifiers are mutually exclusive. */
5225 
5226 if (pat_patctl.convert_type != CONVERT_UNSET &&
5227     (pat_patctl.control & CTL_POSIX) != 0)
5228   {
5229   fprintf(outfile, "** The convert and posix modifiers are mutually exclusive\n");
5230   return PR_SKIP;
5231   }
5232 
5233 /* Check for mutually exclusive control modifiers. At present, these are all in
5234 the first control word. */
5235 
5236 for (k = 0; k < sizeof(exclusive_pat_controls)/sizeof(uint32_t); k++)
5237   {
5238   uint32_t c = pat_patctl.control & exclusive_pat_controls[k];
5239   if (c != 0 && c != (c & (~c+1)))
5240     {
5241     show_controls(c, 0, "** Not allowed together:");
5242     fprintf(outfile, "\n");
5243     return PR_SKIP;
5244     }
5245   }
5246 
5247 /* Assume full JIT compile for jitverify and/or jitfast if nothing else was
5248 specified. */
5249 
5250 if (pat_patctl.jit == 0 &&
5251     (pat_patctl.control & (CTL_JITVERIFY|CTL_JITFAST)) != 0)
5252   pat_patctl.jit = JIT_DEFAULT;
5253 
5254 /* Now copy the pattern to pbuffer8 for use in 8-bit testing and for reflecting
5255 in callouts. Convert from hex if requested (literal strings in quotes may be
5256 present within the hexadecimal pairs). The result must necessarily be fewer
5257 characters so will always fit in pbuffer8. */
5258 
5259 if ((pat_patctl.control & CTL_HEXPAT) != 0)
5260   {
5261   uint8_t *pp, *pt;
5262   uint32_t c, d;
5263 
5264   pt = pbuffer8;
5265   for (pp = buffer + 1; *pp != 0; pp++)
5266     {
5267     if (isspace(*pp)) continue;
5268     c = *pp++;
5269 
5270     /* Handle a literal substring */
5271 
5272     if (c == '\'' || c == '"')
5273       {
5274       uint8_t *pq = pp;
5275       for (;; pp++)
5276         {
5277         d = *pp;
5278         if (d == 0)
5279           {
5280           fprintf(outfile, "** Missing closing quote in hex pattern: "
5281             "opening quote is at offset %" PTR_FORM ".\n", pq - buffer - 2);
5282           return PR_SKIP;
5283           }
5284         if (d == c) break;
5285         *pt++ = d;
5286         }
5287       }
5288 
5289     /* Expect a hex pair */
5290 
5291     else
5292       {
5293       if (!isxdigit(c))
5294         {
5295         fprintf(outfile, "** Unexpected non-hex-digit '%c' at offset %"
5296           PTR_FORM " in hex pattern: quote missing?\n", c, pp - buffer - 2);
5297         return PR_SKIP;
5298         }
5299       if (*pp == 0)
5300         {
5301         fprintf(outfile, "** Odd number of digits in hex pattern\n");
5302         return PR_SKIP;
5303         }
5304       d = *pp;
5305       if (!isxdigit(d))
5306         {
5307         fprintf(outfile, "** Unexpected non-hex-digit '%c' at offset %"
5308           PTR_FORM " in hex pattern: quote missing?\n", d, pp - buffer - 1);
5309         return PR_SKIP;
5310         }
5311       c = toupper(c);
5312       d = toupper(d);
5313       *pt++ = ((isdigit(c)? (c - '0') : (c - 'A' + 10)) << 4) +
5314                (isdigit(d)? (d - '0') : (d - 'A' + 10));
5315       }
5316     }
5317   *pt = 0;
5318   patlen = pt - pbuffer8;
5319   }
5320 
5321 /* If not a hex string, process for repetition expansion if requested. */
5322 
5323 else if ((pat_patctl.control & CTL_EXPAND) != 0)
5324   {
5325   uint8_t *pp, *pt;
5326 
5327   pt = pbuffer8;
5328   for (pp = buffer + 1; *pp != 0; pp++)
5329     {
5330     uint8_t *pc = pp;
5331     uint32_t count = 1;
5332     size_t length = 1;
5333 
5334     /* Check for replication syntax; if not found, the defaults just set will
5335     prevail and one character will be copied. */
5336 
5337     if (pp[0] == '\\' && pp[1] == '[')
5338       {
5339       uint8_t *pe;
5340       for (pe = pp + 2; *pe != 0; pe++)
5341         {
5342         if (pe[0] == ']' && pe[1] == '{')
5343           {
5344           uint32_t clen = pe - pc - 2;
5345           uint32_t i = 0;
5346           unsigned long uli;
5347           char *endptr;
5348 
5349           pe += 2;
5350           uli = strtoul((const char *)pe, &endptr, 10);
5351           if (U32OVERFLOW(uli))
5352             {
5353             fprintf(outfile, "** Pattern repeat count too large\n");
5354             return PR_SKIP;
5355             }
5356 
5357           i = (uint32_t)uli;
5358           pe = (uint8_t *)endptr;
5359           if (*pe == '}')
5360             {
5361             if (i == 0)
5362               {
5363               fprintf(outfile, "** Zero repeat not allowed\n");
5364               return PR_SKIP;
5365               }
5366             pc += 2;
5367             count = i;
5368             length = clen;
5369             pp = pe;
5370             break;
5371             }
5372           }
5373         }
5374       }
5375 
5376     /* Add to output. If the buffer is too small expand it. The function for
5377     expanding buffers always keeps buffer and pbuffer8 in step as far as their
5378     size goes. */
5379 
5380     while (pt + count * length > pbuffer8 + pbuffer8_size)
5381       {
5382       size_t pc_offset = pc - buffer;
5383       size_t pp_offset = pp - buffer;
5384       size_t pt_offset = pt - pbuffer8;
5385       expand_input_buffers();
5386       pc = buffer + pc_offset;
5387       pp = buffer + pp_offset;
5388       pt = pbuffer8 + pt_offset;
5389       }
5390 
5391     for (; count > 0; count--)
5392       {
5393       memcpy(pt, pc, length);
5394       pt += length;
5395       }
5396     }
5397 
5398   *pt = 0;
5399   patlen = pt - pbuffer8;
5400 
5401   if ((pat_patctl.control & CTL_INFO) != 0)
5402     fprintf(outfile, "Expanded: %s\n", pbuffer8);
5403   }
5404 
5405 /* Neither hex nor expanded, just copy the input verbatim. */
5406 
5407 else
5408   {
5409   strncpy((char *)pbuffer8, (char *)(buffer+1), patlen + 1);
5410   }
5411 
5412 /* Sort out character tables */
5413 
5414 if (pat_patctl.locale[0] != 0)
5415   {
5416   if (pat_patctl.tables_id != 0)
5417     {
5418     fprintf(outfile, "** 'Locale' and 'tables' must not both be set\n");
5419     return PR_SKIP;
5420     }
5421   if (setlocale(LC_CTYPE, (const char *)pat_patctl.locale) == NULL)
5422     {
5423     fprintf(outfile, "** Failed to set locale '%s'\n", pat_patctl.locale);
5424     return PR_SKIP;
5425     }
5426   if (strcmp((const char *)pat_patctl.locale, (const char *)locale_name) != 0)
5427     {
5428     strcpy((char *)locale_name, (char *)pat_patctl.locale);
5429     if (locale_tables != NULL) free((void *)locale_tables);
5430     PCRE2_MAKETABLES(locale_tables);
5431     }
5432   use_tables = locale_tables;
5433   }
5434 
5435 else switch (pat_patctl.tables_id)
5436   {
5437   case 0: use_tables = NULL; break;
5438   case 1: use_tables = tables1; break;
5439   case 2: use_tables = tables2; break;
5440 
5441   case 3:
5442   if (tables3 == NULL)
5443     {
5444     fprintf(outfile, "** 'Tables = 3' is invalid: binary tables have not "
5445       "been loaded\n");
5446     return PR_SKIP;
5447     }
5448   use_tables = tables3;
5449   break;
5450 
5451   default:
5452   fprintf(outfile, "** 'Tables' must specify 0, 1, 2, or 3.\n");
5453   return PR_SKIP;
5454   }
5455 
5456 PCRE2_SET_CHARACTER_TABLES(pat_context, use_tables);
5457 
5458 /* Set up for the stackguard test. */
5459 
5460 if (pat_patctl.stackguard_test != 0)
5461   {
5462   PCRE2_SET_COMPILE_RECURSION_GUARD(pat_context, stack_guard, NULL);
5463   }
5464 
5465 /* Handle compiling via the POSIX interface, which doesn't support the
5466 timing, showing, or debugging options, nor the ability to pass over
5467 local character tables. Neither does it have 16-bit or 32-bit support. */
5468 
5469 if ((pat_patctl.control & CTL_POSIX) != 0)
5470   {
5471 #ifdef SUPPORT_PCRE2_8
5472   int rc;
5473   int cflags = 0;
5474   const char *msg = "** Ignored with POSIX interface:";
5475 #endif
5476 
5477   if (test_mode != PCRE8_MODE)
5478     {
5479     fprintf(outfile, "** The POSIX interface is available only in 8-bit mode\n");
5480     return PR_SKIP;
5481     }
5482 
5483 #ifdef SUPPORT_PCRE2_8
5484   /* Check for features that the POSIX interface does not support. */
5485 
5486   if (pat_patctl.locale[0] != 0) prmsg(&msg, "locale");
5487   if (pat_patctl.replacement[0] != 0) prmsg(&msg, "replace");
5488   if (pat_patctl.tables_id != 0) prmsg(&msg, "tables");
5489   if (pat_patctl.stackguard_test != 0) prmsg(&msg, "stackguard");
5490   if (timeit > 0) prmsg(&msg, "timing");
5491   if (pat_patctl.jit != 0) prmsg(&msg, "JIT");
5492 
5493   if ((pat_patctl.options & ~POSIX_SUPPORTED_COMPILE_OPTIONS) != 0)
5494     {
5495     show_compile_options(
5496       pat_patctl.options & (uint32_t)(~POSIX_SUPPORTED_COMPILE_OPTIONS),
5497         msg, "");
5498     msg = "";
5499     }
5500 
5501   if ((FLD(pat_context, extra_options) &
5502        (uint32_t)(~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS)) != 0)
5503     {
5504     show_compile_extra_options(
5505       FLD(pat_context, extra_options) &
5506         (uint32_t)(~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS), msg, "");
5507     msg = "";
5508     }
5509 
5510   if ((pat_patctl.control & (uint32_t)(~POSIX_SUPPORTED_COMPILE_CONTROLS)) != 0 ||
5511       (pat_patctl.control2 & (uint32_t)(~POSIX_SUPPORTED_COMPILE_CONTROLS2)) != 0)
5512     {
5513     show_controls(
5514       pat_patctl.control & (uint32_t)(~POSIX_SUPPORTED_COMPILE_CONTROLS),
5515       pat_patctl.control2 & (uint32_t)(~POSIX_SUPPORTED_COMPILE_CONTROLS2),
5516       msg);
5517     msg = "";
5518     }
5519 
5520   if (local_newline_default != 0) prmsg(&msg, "#newline_default");
5521   if (FLD(pat_context, max_pattern_length) != PCRE2_UNSET)
5522     prmsg(&msg, "max_pattern_length");
5523   if (FLD(pat_context, parens_nest_limit) != PARENS_NEST_DEFAULT)
5524     prmsg(&msg, "parens_nest_limit");
5525 
5526   if (msg[0] == 0) fprintf(outfile, "\n");
5527 
5528   /* Translate PCRE2 options to POSIX options and then compile. */
5529 
5530   if (utf) cflags |= REG_UTF;
5531   if ((pat_patctl.control & CTL_POSIX_NOSUB) != 0) cflags |= REG_NOSUB;
5532   if ((pat_patctl.options & PCRE2_UCP) != 0) cflags |= REG_UCP;
5533   if ((pat_patctl.options & PCRE2_CASELESS) != 0) cflags |= REG_ICASE;
5534   if ((pat_patctl.options & PCRE2_LITERAL) != 0) cflags |= REG_NOSPEC;
5535   if ((pat_patctl.options & PCRE2_MULTILINE) != 0) cflags |= REG_NEWLINE;
5536   if ((pat_patctl.options & PCRE2_DOTALL) != 0) cflags |= REG_DOTALL;
5537   if ((pat_patctl.options & PCRE2_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
5538 
5539   if ((pat_patctl.control & (CTL_HEXPAT|CTL_USE_LENGTH)) != 0)
5540     {
5541     preg.re_endp = (char *)pbuffer8 + patlen;
5542     cflags |= REG_PEND;
5543     }
5544 
5545   rc = regcomp(&preg, (char *)pbuffer8, cflags);
5546 
5547   /* Compiling failed */
5548 
5549   if (rc != 0)
5550     {
5551     size_t bsize, usize;
5552     int psize;
5553 
5554     preg.re_pcre2_code = NULL;     /* In case something was left in there */
5555     preg.re_match_data = NULL;
5556 
5557     bsize = (pat_patctl.regerror_buffsize != 0)?
5558       pat_patctl.regerror_buffsize : pbuffer8_size;
5559     if (bsize + 8 < pbuffer8_size)
5560       memcpy(pbuffer8 + bsize, "DEADBEEF", 8);
5561     usize = regerror(rc, &preg, (char *)pbuffer8, bsize);
5562 
5563     /* Inside regerror(), snprintf() is used. If the buffer is too small, some
5564     versions of snprintf() put a zero byte at the end, but others do not.
5565     Therefore, we print a maximum of one less than the size of the buffer. */
5566 
5567     psize = (int)bsize - 1;
5568     fprintf(outfile, "Failed: POSIX code %d: %.*s\n", rc, psize, pbuffer8);
5569     if (usize > bsize)
5570       {
5571       fprintf(outfile, "** regerror() message truncated\n");
5572       if (memcmp(pbuffer8 + bsize, "DEADBEEF", 8) != 0)
5573         fprintf(outfile, "** regerror() buffer overflow\n");
5574       }
5575     return PR_SKIP;
5576     }
5577 
5578   /* Compiling succeeded. Check that the values in the preg block are sensible.
5579   It can happen that pcre2test is accidentally linked with a different POSIX
5580   library which succeeds, but of course puts different things into preg. In
5581   this situation, calling regfree() may cause a segfault (or invalid free() in
5582   valgrind), so ensure that preg.re_pcre2_code is NULL, which suppresses the
5583   calling of regfree() on exit. */
5584 
5585   if (preg.re_pcre2_code == NULL ||
5586       ((pcre2_real_code_8 *)preg.re_pcre2_code)->magic_number != MAGIC_NUMBER ||
5587       ((pcre2_real_code_8 *)preg.re_pcre2_code)->top_bracket != preg.re_nsub ||
5588       preg.re_match_data == NULL ||
5589       preg.re_cflags != cflags)
5590     {
5591     fprintf(outfile,
5592       "** The regcomp() function returned zero (success), but the values set\n"
5593       "** in the preg block are not valid for PCRE2. Check that pcre2test is\n"
5594       "** linked with PCRE2's pcre2posix module (-lpcre2-posix) and not with\n"
5595       "** some other POSIX regex library.\n**\n");
5596     preg.re_pcre2_code = NULL;
5597     return PR_ABEND;
5598     }
5599 
5600   return PR_OK;
5601 #endif  /* SUPPORT_PCRE2_8 */
5602   }
5603 
5604 /* Handle compiling via the native interface. Controls that act later are
5605 ignored with "push". Replacements are locked out. */
5606 
5607 if ((pat_patctl.control & (CTL_PUSH|CTL_PUSHCOPY|CTL_PUSHTABLESCOPY)) != 0)
5608   {
5609   if (pat_patctl.replacement[0] != 0)
5610     {
5611     fprintf(outfile, "** Replacement text is not supported with 'push'.\n");
5612     return PR_OK;
5613     }
5614   if ((pat_patctl.control & ~PUSH_SUPPORTED_COMPILE_CONTROLS) != 0 ||
5615       (pat_patctl.control2 & ~PUSH_SUPPORTED_COMPILE_CONTROLS2) != 0)
5616     {
5617     show_controls(pat_patctl.control & ~PUSH_SUPPORTED_COMPILE_CONTROLS,
5618                   pat_patctl.control2 & ~PUSH_SUPPORTED_COMPILE_CONTROLS2,
5619       "** Ignored when compiled pattern is stacked with 'push':");
5620     fprintf(outfile, "\n");
5621     }
5622   if ((pat_patctl.control & PUSH_COMPILE_ONLY_CONTROLS) != 0 ||
5623       (pat_patctl.control2 & PUSH_COMPILE_ONLY_CONTROLS2) != 0)
5624     {
5625     show_controls(pat_patctl.control & PUSH_COMPILE_ONLY_CONTROLS,
5626                   pat_patctl.control2 & PUSH_COMPILE_ONLY_CONTROLS2,
5627       "** Applies only to compile when pattern is stacked with 'push':");
5628     fprintf(outfile, "\n");
5629     }
5630   }
5631 
5632 /* Convert the input in non-8-bit modes. */
5633 
5634 errorcode = 0;
5635 
5636 #ifdef SUPPORT_PCRE2_16
5637 if (test_mode == PCRE16_MODE) errorcode = to16(pbuffer8, utf, &patlen);
5638 #endif
5639 
5640 #ifdef SUPPORT_PCRE2_32
5641 if (test_mode == PCRE32_MODE) errorcode = to32(pbuffer8, utf, &patlen);
5642 #endif
5643 
5644 switch(errorcode)
5645   {
5646   case -1:
5647   fprintf(outfile, "** Failed: invalid UTF-8 string cannot be "
5648     "converted to %d-bit string\n", (test_mode == PCRE16_MODE)? 16:32);
5649   return PR_SKIP;
5650 
5651   case -2:
5652   fprintf(outfile, "** Failed: character value greater than 0x10ffff "
5653     "cannot be converted to UTF\n");
5654   return PR_SKIP;
5655 
5656   case -3:
5657   fprintf(outfile, "** Failed: character value greater than 0xffff "
5658     "cannot be converted to 16-bit in non-UTF mode\n");
5659   return PR_SKIP;
5660 
5661   default:
5662   break;
5663   }
5664 
5665 /* The pattern is now in pbuffer[8|16|32], with the length in code units in
5666 patlen. If it is to be converted, copy the result back afterwards so that it
5667 ends up back in the usual place. */
5668 
5669 if (pat_patctl.convert_type != CONVERT_UNSET)
5670   {
5671   int rc;
5672   int convert_return = PR_OK;
5673   uint32_t convert_options = pat_patctl.convert_type;
5674   void *converted_pattern;
5675   PCRE2_SIZE converted_length;
5676 
5677   if (pat_patctl.convert_length != 0)
5678     {
5679     converted_length = pat_patctl.convert_length;
5680     converted_pattern = malloc(converted_length * code_unit_size);
5681     if (converted_pattern == NULL)
5682       {
5683       fprintf(outfile, "** Failed: malloc failed for converted pattern\n");
5684       return PR_SKIP;
5685       }
5686     }
5687   else converted_pattern = NULL;  /* Let the library allocate */
5688 
5689   if (utf) convert_options |= PCRE2_CONVERT_UTF;
5690   if ((pat_patctl.options & PCRE2_NO_UTF_CHECK) != 0)
5691     convert_options |= PCRE2_CONVERT_NO_UTF_CHECK;
5692 
5693   CONCTXCPY(con_context, default_con_context);
5694 
5695   if (pat_patctl.convert_glob_escape != 0)
5696     {
5697     uint32_t escape = (pat_patctl.convert_glob_escape == '0')? 0 :
5698       pat_patctl.convert_glob_escape;
5699     PCRE2_SET_GLOB_ESCAPE(rc, con_context, escape);
5700     if (rc != 0)
5701       {
5702       fprintf(outfile, "** Invalid glob escape '%c'\n",
5703         pat_patctl.convert_glob_escape);
5704       convert_return = PR_SKIP;
5705       goto CONVERT_FINISH;
5706       }
5707     }
5708 
5709   if (pat_patctl.convert_glob_separator != 0)
5710     {
5711     PCRE2_SET_GLOB_SEPARATOR(rc, con_context, pat_patctl.convert_glob_separator);
5712     if (rc != 0)
5713       {
5714       fprintf(outfile, "** Invalid glob separator '%c'\n",
5715         pat_patctl.convert_glob_separator);
5716       convert_return = PR_SKIP;
5717       goto CONVERT_FINISH;
5718       }
5719     }
5720 
5721   PCRE2_PATTERN_CONVERT(rc, pbuffer, patlen, convert_options,
5722     &converted_pattern, &converted_length, con_context);
5723 
5724   if (rc != 0)
5725     {
5726     fprintf(outfile, "** Pattern conversion error at offset %" SIZ_FORM ": ",
5727       converted_length);
5728     convert_return = print_error_message(rc, "", "\n")? PR_SKIP:PR_ABEND;
5729     }
5730 
5731   /* Output the converted pattern, then copy it. */
5732 
5733   else
5734     {
5735     PCHARSV(converted_pattern, 0, converted_length, utf, outfile);
5736     fprintf(outfile, "\n");
5737     patlen = converted_length;
5738     CONVERT_COPY(pbuffer, converted_pattern, converted_length + 1);
5739     }
5740 
5741   /* Free the converted pattern. */
5742 
5743   CONVERT_FINISH:
5744   if (pat_patctl.convert_length != 0)
5745     free(converted_pattern);
5746   else
5747     PCRE2_CONVERTED_PATTERN_FREE(converted_pattern);
5748 
5749   /* Return if conversion was unsuccessful. */
5750 
5751   if (convert_return != PR_OK) return convert_return;
5752   }
5753 
5754 /* By default we pass a zero-terminated pattern, but a length is passed if
5755 "use_length" was specified or this is a hex pattern (which might contain binary
5756 zeros). When valgrind is supported, arrange for the unused part of the buffer
5757 to be marked as no access. */
5758 
5759 valgrind_access_length = patlen;
5760 if ((pat_patctl.control & (CTL_HEXPAT|CTL_USE_LENGTH)) == 0)
5761   {
5762   patlen = PCRE2_ZERO_TERMINATED;
5763   valgrind_access_length += 1;  /* For the terminating zero */
5764   }
5765 
5766 #ifdef SUPPORT_VALGRIND
5767 #ifdef SUPPORT_PCRE2_8
5768 if (test_mode == PCRE8_MODE && pbuffer8 != NULL)
5769   {
5770   VALGRIND_MAKE_MEM_NOACCESS(pbuffer8 + valgrind_access_length,
5771     pbuffer8_size - valgrind_access_length);
5772   }
5773 #endif
5774 #ifdef SUPPORT_PCRE2_16
5775 if (test_mode == PCRE16_MODE && pbuffer16 != NULL)
5776   {
5777   VALGRIND_MAKE_MEM_NOACCESS(pbuffer16 + valgrind_access_length,
5778     pbuffer16_size - valgrind_access_length*sizeof(uint16_t));
5779   }
5780 #endif
5781 #ifdef SUPPORT_PCRE2_32
5782 if (test_mode == PCRE32_MODE && pbuffer32 != NULL)
5783   {
5784   VALGRIND_MAKE_MEM_NOACCESS(pbuffer32 + valgrind_access_length,
5785     pbuffer32_size - valgrind_access_length*sizeof(uint32_t));
5786   }
5787 #endif
5788 #else  /* Valgrind not supported */
5789 (void)valgrind_access_length;  /* Avoid compiler warning */
5790 #endif
5791 
5792 /* If #newline_default has been used and the library was not compiled with an
5793 appropriate default newline setting, local_newline_default will be non-zero. We
5794 use this if there is no explicit newline modifier. */
5795 
5796 if ((pat_patctl.control2 & CTL2_NL_SET) == 0 && local_newline_default != 0)
5797   {
5798   SETFLD(pat_context, newline_convention, local_newline_default);
5799   }
5800 
5801 /* The null_context modifier is used to test calling pcre2_compile() with a
5802 NULL context. */
5803 
5804 use_pat_context = ((pat_patctl.control & CTL_NULLCONTEXT) != 0)?
5805   NULL : PTR(pat_context);
5806 
5807 /* If PCRE2_LITERAL is set, set use_forbid_utf zero because PCRE2_NEVER_UTF
5808 and PCRE2_NEVER_UCP are invalid with it. */
5809 
5810 if ((pat_patctl.options & PCRE2_LITERAL) != 0) use_forbid_utf = 0;
5811 
5812 /* Compile many times when timing. */
5813 
5814 if (timeit > 0)
5815   {
5816   int i;
5817   clock_t time_taken = 0;
5818   for (i = 0; i < timeit; i++)
5819     {
5820     clock_t start_time = clock();
5821     PCRE2_COMPILE(compiled_code, pbuffer, patlen,
5822       pat_patctl.options|use_forbid_utf, &errorcode, &erroroffset,
5823         use_pat_context);
5824     time_taken += clock() - start_time;
5825     if (TEST(compiled_code, !=, NULL))
5826       { SUB1(pcre2_code_free, compiled_code); }
5827     }
5828   total_compile_time += time_taken;
5829   fprintf(outfile, "Compile time %.4f milliseconds\n",
5830     (((double)time_taken * 1000.0) / (double)timeit) /
5831       (double)CLOCKS_PER_SEC);
5832   }
5833 
5834 /* A final compile that is used "for real". */
5835 
5836 PCRE2_COMPILE(compiled_code, pbuffer, patlen, pat_patctl.options|use_forbid_utf,
5837   &errorcode, &erroroffset, use_pat_context);
5838 
5839 /* Call the JIT compiler if requested. When timing, we must free and recompile
5840 the pattern each time because that is the only way to free the JIT compiled
5841 code. We know that compilation will always succeed. */
5842 
5843 if (TEST(compiled_code, !=, NULL) && pat_patctl.jit != 0)
5844   {
5845   if (timeit > 0)
5846     {
5847     int i;
5848     clock_t time_taken = 0;
5849 
5850     for (i = 0; i < timeit; i++)
5851       {
5852       clock_t start_time;
5853       SUB1(pcre2_code_free, compiled_code);
5854       PCRE2_COMPILE(compiled_code, pbuffer, patlen,
5855         pat_patctl.options|use_forbid_utf, &errorcode, &erroroffset,
5856         use_pat_context);
5857       start_time = clock();
5858       PCRE2_JIT_COMPILE(jitrc, compiled_code, pat_patctl.jit);
5859       time_taken += clock() - start_time;
5860       }
5861     total_jit_compile_time += time_taken;
5862     fprintf(outfile, "JIT compile  %.4f milliseconds\n",
5863       (((double)time_taken * 1000.0) / (double)timeit) /
5864         (double)CLOCKS_PER_SEC);
5865     }
5866   else
5867     {
5868     PCRE2_JIT_COMPILE(jitrc, compiled_code, pat_patctl.jit);
5869     }
5870   }
5871 
5872 /* If valgrind is supported, mark the pbuffer as accessible again. The 16-bit
5873 and 32-bit buffers can be marked completely undefined, but we must leave the
5874 pattern in the 8-bit buffer defined because it may be read from a callout
5875 during matching. */
5876 
5877 #ifdef SUPPORT_VALGRIND
5878 #ifdef SUPPORT_PCRE2_8
5879 if (test_mode == PCRE8_MODE)
5880   {
5881   VALGRIND_MAKE_MEM_UNDEFINED(pbuffer8 + valgrind_access_length,
5882     pbuffer8_size - valgrind_access_length);
5883   }
5884 #endif
5885 #ifdef SUPPORT_PCRE2_16
5886 if (test_mode == PCRE16_MODE)
5887   {
5888   VALGRIND_MAKE_MEM_UNDEFINED(pbuffer16, pbuffer16_size);
5889   }
5890 #endif
5891 #ifdef SUPPORT_PCRE2_32
5892 if (test_mode == PCRE32_MODE)
5893   {
5894   VALGRIND_MAKE_MEM_UNDEFINED(pbuffer32, pbuffer32_size);
5895   }
5896 #endif
5897 #endif
5898 
5899 /* Compilation failed; go back for another re, skipping to blank line
5900 if non-interactive. */
5901 
5902 if (TEST(compiled_code, ==, NULL))
5903   {
5904   fprintf(outfile, "Failed: error %d at offset %d: ", errorcode,
5905     (int)erroroffset);
5906   if (!print_error_message(errorcode, "", "\n")) return PR_ABEND;
5907   return PR_SKIP;
5908   }
5909 
5910 /* If forbid_utf is non-zero, we are running a non-UTF test. UTF and UCP are
5911 locked out at compile time, but we must also check for occurrences of \P, \p,
5912 and \X, which are only supported when Unicode is supported. */
5913 
5914 if (forbid_utf != 0)
5915   {
5916   if ((FLD(compiled_code, flags) & PCRE2_HASBKPORX) != 0)
5917     {
5918     fprintf(outfile, "** \\P, \\p, and \\X are not allowed after the "
5919       "#forbid_utf command\n");
5920     return PR_SKIP;
5921     }
5922   }
5923 
5924 /* Remember the maximum lookbehind, for partial matching. */
5925 
5926 if (pattern_info(PCRE2_INFO_MAXLOOKBEHIND, &maxlookbehind, FALSE) != 0)
5927   return PR_ABEND;
5928 
5929 /* Remember the number of captures. */
5930 
5931 if (pattern_info(PCRE2_INFO_CAPTURECOUNT, &maxcapcount, FALSE) < 0)
5932   return PR_ABEND;
5933 
5934 /* If an explicit newline modifier was given, set the information flag in the
5935 pattern so that it is preserved over push/pop. */
5936 
5937 if ((pat_patctl.control2 & CTL2_NL_SET) != 0)
5938   {
5939   SETFLD(compiled_code, flags, FLD(compiled_code, flags) | PCRE2_NL_SET);
5940   }
5941 
5942 /* Output code size and other information if requested. */
5943 
5944 if ((pat_patctl.control & CTL_MEMORY) != 0) show_memory_info();
5945 if ((pat_patctl.control & CTL_FRAMESIZE) != 0) show_framesize();
5946 if ((pat_patctl.control & CTL_ANYINFO) != 0)
5947   {
5948   int rc = show_pattern_info();
5949   if (rc != PR_OK) return rc;
5950   }
5951 
5952 /* The "push" control requests that the compiled pattern be remembered on a
5953 stack. This is mainly for testing the serialization functionality. */
5954 
5955 if ((pat_patctl.control & CTL_PUSH) != 0)
5956   {
5957   if (patstacknext >= PATSTACKSIZE)
5958     {
5959     fprintf(outfile, "** Too many pushed patterns (max %d)\n", PATSTACKSIZE);
5960     return PR_ABEND;
5961     }
5962   patstack[patstacknext++] = PTR(compiled_code);
5963   SET(compiled_code, NULL);
5964   }
5965 
5966 /* The "pushcopy" and "pushtablescopy" controls are similar, but push a
5967 copy of the pattern, the latter with a copy of its character tables. This tests
5968 the pcre2_code_copy() and pcre2_code_copy_with_tables() functions. */
5969 
5970 if ((pat_patctl.control & (CTL_PUSHCOPY|CTL_PUSHTABLESCOPY)) != 0)
5971   {
5972   if (patstacknext >= PATSTACKSIZE)
5973     {
5974     fprintf(outfile, "** Too many pushed patterns (max %d)\n", PATSTACKSIZE);
5975     return PR_ABEND;
5976     }
5977   if ((pat_patctl.control & CTL_PUSHCOPY) != 0)
5978     {
5979     PCRE2_CODE_COPY_TO_VOID(patstack[patstacknext++], compiled_code);
5980     }
5981   else
5982     {
5983     PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(patstack[patstacknext++],
5984       compiled_code); }
5985   }
5986 
5987 return PR_OK;
5988 }
5989 
5990 
5991 
5992 /*************************************************
5993 *          Check heap, match or depth limit      *
5994 *************************************************/
5995 
5996 /* This is used for DFA, normal, and JIT fast matching. For DFA matching it
5997 should only be called with the third argument set to PCRE2_ERROR_DEPTHLIMIT.
5998 
5999 Arguments:
6000   pp        the subject string
6001   ulen      length of subject or PCRE2_ZERO_TERMINATED
6002   errnumber defines which limit to test
6003   msg       string to include in final message
6004 
6005 Returns:    the return from the final match function call
6006 */
6007 
6008 static int
check_match_limit(uint8_t * pp,PCRE2_SIZE ulen,int errnumber,const char * msg)6009 check_match_limit(uint8_t *pp, PCRE2_SIZE ulen, int errnumber, const char *msg)
6010 {
6011 int capcount;
6012 uint32_t min = 0;
6013 uint32_t mid = 64;
6014 uint32_t max = UINT32_MAX;
6015 
6016 PCRE2_SET_MATCH_LIMIT(dat_context, max);
6017 PCRE2_SET_DEPTH_LIMIT(dat_context, max);
6018 PCRE2_SET_HEAP_LIMIT(dat_context, max);
6019 
6020 for (;;)
6021   {
6022   uint32_t stack_start = 0;
6023 
6024   if (errnumber == PCRE2_ERROR_HEAPLIMIT)
6025     {
6026     PCRE2_SET_HEAP_LIMIT(dat_context, mid);
6027     }
6028   else if (errnumber == PCRE2_ERROR_MATCHLIMIT)
6029     {
6030     PCRE2_SET_MATCH_LIMIT(dat_context, mid);
6031     }
6032   else
6033     {
6034     PCRE2_SET_DEPTH_LIMIT(dat_context, mid);
6035     }
6036 
6037   if ((dat_datctl.control & CTL_DFA) != 0)
6038     {
6039     stack_start = DFA_START_RWS_SIZE/1024;
6040     if (dfa_workspace == NULL)
6041       dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
6042     if (dfa_matched++ == 0)
6043       dfa_workspace[0] = -1;  /* To catch bad restart */
6044     PCRE2_DFA_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
6045       dat_datctl.options, match_data,
6046       PTR(dat_context), dfa_workspace, DFA_WS_DIMENSION);
6047     }
6048 
6049   else if ((pat_patctl.control & CTL_JITFAST) != 0)
6050     PCRE2_JIT_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
6051       dat_datctl.options, match_data, PTR(dat_context));
6052 
6053   else
6054     {
6055     stack_start = START_FRAMES_SIZE/1024;
6056     PCRE2_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
6057       dat_datctl.options, match_data, PTR(dat_context));
6058     }
6059 
6060   if (capcount == errnumber)
6061     {
6062     if ((mid & 0x80000000u) != 0)
6063       {
6064       fprintf(outfile, "Can't find minimum %s limit: check pattern for "
6065         "restriction\n", msg);
6066       break;
6067       }
6068 
6069     min = mid;
6070     mid = (mid == max - 1)? max : (max != UINT32_MAX)? (min + max)/2 : mid*2;
6071     }
6072   else if (capcount >= 0 ||
6073            capcount == PCRE2_ERROR_NOMATCH ||
6074            capcount == PCRE2_ERROR_PARTIAL)
6075     {
6076     /* If we've not hit the error with a heap limit less than the size of the
6077     initial stack frame vector (for pcre2_match()) or the initial stack
6078     workspace vector (for pcre2_dfa_match()), the heap is not being used, so
6079     the minimum limit is zero; there's no need to go on. The other limits are
6080     always greater than zero. */
6081 
6082     if (errnumber == PCRE2_ERROR_HEAPLIMIT && mid < stack_start)
6083       {
6084       fprintf(outfile, "Minimum %s limit = 0\n", msg);
6085       break;
6086       }
6087     if (mid == min + 1)
6088       {
6089       fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
6090       break;
6091       }
6092     max = mid;
6093     mid = (min + max)/2;
6094     }
6095   else break;    /* Some other error */
6096   }
6097 
6098 return capcount;
6099 }
6100 
6101 
6102 
6103 /*************************************************
6104 *        Substitute callout function             *
6105 *************************************************/
6106 
6107 /* Called from pcre2_substitute() when the substitute_callout modifier is set.
6108 Print out the data that is passed back. The substitute callout block is
6109 identical for all code unit widths, so we just pick one.
6110 
6111 Arguments:
6112   scb         pointer to substitute callout block
6113   data_ptr    callout data
6114 
6115 Returns:      nothing
6116 */
6117 
6118 static int
substitute_callout_function(pcre2_substitute_callout_block_8 * scb,void * data_ptr)6119 substitute_callout_function(pcre2_substitute_callout_block_8 *scb,
6120   void *data_ptr)
6121 {
6122 int yield = 0;
6123 BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
6124 (void)data_ptr;   /* Not used */
6125 
6126 fprintf(outfile, "%2d(%d) Old %" SIZ_FORM " %" SIZ_FORM " \"",
6127   scb->subscount, scb->oveccount,
6128   scb->ovector[0], scb->ovector[1]);
6129 
6130 PCHARSV(scb->input, scb->ovector[0], scb->ovector[1] - scb->ovector[0],
6131   utf, outfile);
6132 
6133 fprintf(outfile, "\" New %" SIZ_FORM " %" SIZ_FORM " \"",
6134   scb->output_offsets[0], scb->output_offsets[1]);
6135 
6136 PCHARSV(scb->output, scb->output_offsets[0],
6137   scb->output_offsets[1] - scb->output_offsets[0], utf, outfile);
6138 
6139 if (scb->subscount == dat_datctl.substitute_stop)
6140   {
6141   yield = -1;
6142   fprintf(outfile, " STOPPED");
6143   }
6144 else if (scb->subscount == dat_datctl.substitute_skip)
6145   {
6146   yield = +1;
6147   fprintf(outfile, " SKIPPED");
6148   }
6149 
6150 fprintf(outfile, "\"\n");
6151 return yield;
6152 }
6153 
6154 
6155 /*************************************************
6156 *              Callout function                  *
6157 *************************************************/
6158 
6159 /* Called from a PCRE2 library as a result of the (?C) item. We print out where
6160 we are in the match (unless suppressed). Yield zero unless more callouts than
6161 the fail count, or the callout data is not zero. The only differences in the
6162 callout block for different code unit widths are that the pointers to the
6163 subject, the most recent MARK, and a callout argument string point to strings
6164 of the appropriate width. Casts can be used to deal with this.
6165 
6166 Arguments:
6167   cb                a pointer to a callout block
6168   callout_data_ptr  the provided callout data
6169 
6170 Returns:            0 or 1 or an error, as determined by settings
6171 */
6172 
6173 static int
callout_function(pcre2_callout_block_8 * cb,void * callout_data_ptr)6174 callout_function(pcre2_callout_block_8 *cb, void *callout_data_ptr)
6175 {
6176 FILE *f, *fdefault;
6177 uint32_t i, pre_start, post_start, subject_length;
6178 PCRE2_SIZE current_position;
6179 BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
6180 BOOL callout_capture = (dat_datctl.control & CTL_CALLOUT_CAPTURE) != 0;
6181 BOOL callout_where = (dat_datctl.control2 & CTL2_CALLOUT_NO_WHERE) == 0;
6182 
6183 /* The FILE f is used for echoing the subject string if it is non-NULL. This
6184 happens only once in simple cases, but we want to repeat after any additional
6185 output caused by CALLOUT_EXTRA. */
6186 
6187 fdefault = (!first_callout && !callout_capture && cb->callout_string == NULL)?
6188   NULL : outfile;
6189 
6190 if ((dat_datctl.control2 & CTL2_CALLOUT_EXTRA) != 0)
6191   {
6192   f = outfile;
6193   switch (cb->callout_flags)
6194     {
6195     case PCRE2_CALLOUT_BACKTRACK:
6196     fprintf(f, "Backtrack\n");
6197     break;
6198 
6199     case PCRE2_CALLOUT_STARTMATCH|PCRE2_CALLOUT_BACKTRACK:
6200     fprintf(f, "Backtrack\nNo other matching paths\n");
6201     /* Fall through */
6202 
6203     case PCRE2_CALLOUT_STARTMATCH:
6204     fprintf(f, "New match attempt\n");
6205     break;
6206 
6207     default:
6208     f = fdefault;
6209     break;
6210     }
6211   }
6212 else f = fdefault;
6213 
6214 /* For a callout with a string argument, show the string first because there
6215 isn't a tidy way to fit it in the rest of the data. */
6216 
6217 if (cb->callout_string != NULL)
6218   {
6219   uint32_t delimiter = CODE_UNIT(cb->callout_string, -1);
6220   fprintf(outfile, "Callout (%" SIZ_FORM "): %c",
6221     cb->callout_string_offset, delimiter);
6222   PCHARSV(cb->callout_string, 0,
6223     cb->callout_string_length, utf, outfile);
6224   for (i = 0; callout_start_delims[i] != 0; i++)
6225     if (delimiter == callout_start_delims[i])
6226       {
6227       delimiter = callout_end_delims[i];
6228       break;
6229       }
6230   fprintf(outfile, "%c", delimiter);
6231   if (!callout_capture) fprintf(outfile, "\n");
6232   }
6233 
6234 /* Show captured strings if required */
6235 
6236 if (callout_capture)
6237   {
6238   if (cb->callout_string == NULL)
6239     fprintf(outfile, "Callout %d:", cb->callout_number);
6240   fprintf(outfile, " last capture = %d\n", cb->capture_last);
6241   for (i = 2; i < cb->capture_top * 2; i += 2)
6242     {
6243     fprintf(outfile, "%2d: ", i/2);
6244     if (cb->offset_vector[i] == PCRE2_UNSET)
6245       fprintf(outfile, "<unset>");
6246     else
6247       {
6248       PCHARSV(cb->subject, cb->offset_vector[i],
6249         cb->offset_vector[i+1] - cb->offset_vector[i], utf, f);
6250       }
6251     fprintf(outfile, "\n");
6252     }
6253   }
6254 
6255 /* Unless suppressed, re-print the subject in canonical form (with escapes for
6256 non-printing characters), the first time, or if giving full details. On
6257 subsequent calls in the same match, we use PCHARS() just to find the printed
6258 lengths of the substrings. */
6259 
6260 if (callout_where)
6261   {
6262   if (f != NULL) fprintf(f, "--->");
6263 
6264   /* The subject before the match start. */
6265 
6266   PCHARS(pre_start, cb->subject, 0, cb->start_match, utf, f);
6267 
6268   /* If a lookbehind is involved, the current position may be earlier than the
6269   match start. If so, use the match start instead. */
6270 
6271   current_position = (cb->current_position >= cb->start_match)?
6272     cb->current_position : cb->start_match;
6273 
6274   /* The subject between the match start and the current position. */
6275 
6276   PCHARS(post_start, cb->subject, cb->start_match,
6277     current_position - cb->start_match, utf, f);
6278 
6279   /* Print from the current position to the end. */
6280 
6281   PCHARSV(cb->subject, current_position, cb->subject_length - current_position,
6282     utf, f);
6283 
6284   /* Calculate the total subject printed length (no print). */
6285 
6286   PCHARS(subject_length, cb->subject, 0, cb->subject_length, utf, NULL);
6287 
6288   if (f != NULL) fprintf(f, "\n");
6289 
6290   /* For automatic callouts, show the pattern offset. Otherwise, for a
6291   numerical callout whose number has not already been shown with captured
6292   strings, show the number here. A callout with a string argument has been
6293   displayed above. */
6294 
6295   if (cb->callout_number == 255)
6296     {
6297     fprintf(outfile, "%+3d ", (int)cb->pattern_position);
6298     if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
6299     }
6300   else
6301     {
6302     if (callout_capture || cb->callout_string != NULL) fprintf(outfile, "    ");
6303       else fprintf(outfile, "%3d ", cb->callout_number);
6304     }
6305 
6306   /* Now show position indicators */
6307 
6308   for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
6309   fprintf(outfile, "^");
6310 
6311   if (post_start > 0)
6312     {
6313     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
6314     fprintf(outfile, "^");
6315     }
6316 
6317   for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
6318     fprintf(outfile, " ");
6319 
6320   if (cb->next_item_length != 0)
6321     fprintf(outfile, "%.*s", (int)(cb->next_item_length),
6322       pbuffer8 + cb->pattern_position);
6323   else
6324     fprintf(outfile, "End of pattern");
6325 
6326   fprintf(outfile, "\n");
6327   }
6328 
6329 first_callout = FALSE;
6330 
6331 /* Show any mark info */
6332 
6333 if (cb->mark != last_callout_mark)
6334   {
6335   if (cb->mark == NULL)
6336     fprintf(outfile, "Latest Mark: <unset>\n");
6337   else
6338     {
6339     fprintf(outfile, "Latest Mark: ");
6340     PCHARSV(cb->mark, -1, -1, utf, outfile);
6341     putc('\n', outfile);
6342     }
6343   last_callout_mark = cb->mark;
6344   }
6345 
6346 /* Show callout data */
6347 
6348 if (callout_data_ptr != NULL)
6349   {
6350   int callout_data = *((int32_t *)callout_data_ptr);
6351   if (callout_data != 0)
6352     {
6353     fprintf(outfile, "Callout data = %d\n", callout_data);
6354     return callout_data;
6355     }
6356   }
6357 
6358 /* Keep count and give the appropriate return code */
6359 
6360 callout_count++;
6361 
6362 if (cb->callout_number == dat_datctl.cerror[0] &&
6363     callout_count >= dat_datctl.cerror[1])
6364   return PCRE2_ERROR_CALLOUT;
6365 
6366 if (cb->callout_number == dat_datctl.cfail[0] &&
6367     callout_count >= dat_datctl.cfail[1])
6368   return 1;
6369 
6370 return 0;
6371 }
6372 
6373 
6374 
6375 /*************************************************
6376 *       Handle *MARK and copy/get tests          *
6377 *************************************************/
6378 
6379 /* This function is called after complete and partial matches. It runs the
6380 tests for substring extraction.
6381 
6382 Arguments:
6383   utf       TRUE for utf
6384   capcount  return from pcre2_match()
6385 
6386 Returns:    FALSE if print_error_message() fails
6387 */
6388 
6389 static BOOL
copy_and_get(BOOL utf,int capcount)6390 copy_and_get(BOOL utf, int capcount)
6391 {
6392 int i;
6393 uint8_t *nptr;
6394 
6395 /* Test copy strings by number */
6396 
6397 for (i = 0; i < MAXCPYGET && dat_datctl.copy_numbers[i] >= 0; i++)
6398   {
6399   int rc;
6400   PCRE2_SIZE length, length2;
6401   uint32_t copybuffer[256];
6402   uint32_t n = (uint32_t)(dat_datctl.copy_numbers[i]);
6403   length = sizeof(copybuffer)/code_unit_size;
6404   PCRE2_SUBSTRING_COPY_BYNUMBER(rc, match_data, n, copybuffer, &length);
6405   if (rc < 0)
6406     {
6407     fprintf(outfile, "Copy substring %d failed (%d): ", n, rc);
6408     if (!print_error_message(rc, "", "\n")) return FALSE;
6409     }
6410   else
6411     {
6412     PCRE2_SUBSTRING_LENGTH_BYNUMBER(rc, match_data, n, &length2);
6413     if (rc < 0)
6414       {
6415       fprintf(outfile, "Get substring %d length failed (%d): ", n, rc);
6416       if (!print_error_message(rc, "", "\n")) return FALSE;
6417       }
6418     else if (length2 != length)
6419       {
6420       fprintf(outfile, "Mismatched substring lengths: %"
6421         SIZ_FORM " %" SIZ_FORM "\n", length, length2);
6422       }
6423     fprintf(outfile, "%2dC ", n);
6424     PCHARSV(copybuffer, 0, length, utf, outfile);
6425     fprintf(outfile, " (%" SIZ_FORM ")\n", length);
6426     }
6427   }
6428 
6429 /* Test copy strings by name */
6430 
6431 nptr = dat_datctl.copy_names;
6432 for (;;)
6433   {
6434   int rc;
6435   int groupnumber;
6436   PCRE2_SIZE length, length2;
6437   uint32_t copybuffer[256];
6438   int namelen = strlen((const char *)nptr);
6439 #if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32
6440   PCRE2_SIZE cnl = namelen;
6441 #endif
6442   if (namelen == 0) break;
6443 
6444 #ifdef SUPPORT_PCRE2_8
6445   if (test_mode == PCRE8_MODE) strcpy((char *)pbuffer8, (char *)nptr);
6446 #endif
6447 #ifdef SUPPORT_PCRE2_16
6448   if (test_mode == PCRE16_MODE)(void)to16(nptr, utf, &cnl);
6449 #endif
6450 #ifdef SUPPORT_PCRE2_32
6451   if (test_mode == PCRE32_MODE)(void)to32(nptr, utf, &cnl);
6452 #endif
6453 
6454   PCRE2_SUBSTRING_NUMBER_FROM_NAME(groupnumber, compiled_code, pbuffer);
6455   if (groupnumber < 0 && groupnumber != PCRE2_ERROR_NOUNIQUESUBSTRING)
6456     fprintf(outfile, "Number not found for group '%s'\n", nptr);
6457 
6458   length = sizeof(copybuffer)/code_unit_size;
6459   PCRE2_SUBSTRING_COPY_BYNAME(rc, match_data, pbuffer, copybuffer, &length);
6460   if (rc < 0)
6461     {
6462     fprintf(outfile, "Copy substring '%s' failed (%d): ", nptr, rc);
6463     if (!print_error_message(rc, "", "\n")) return FALSE;
6464     }
6465   else
6466     {
6467     PCRE2_SUBSTRING_LENGTH_BYNAME(rc, match_data, pbuffer, &length2);
6468     if (rc < 0)
6469       {
6470       fprintf(outfile, "Get substring '%s' length failed (%d): ", nptr, rc);
6471       if (!print_error_message(rc, "", "\n")) return FALSE;
6472       }
6473     else if (length2 != length)
6474       {
6475       fprintf(outfile, "Mismatched substring lengths: %"
6476         SIZ_FORM " %" SIZ_FORM "\n", length, length2);
6477       }
6478     fprintf(outfile, "  C ");
6479     PCHARSV(copybuffer, 0, length, utf, outfile);
6480     fprintf(outfile, " (%" SIZ_FORM ") %s", length, nptr);
6481     if (groupnumber >= 0) fprintf(outfile, " (group %d)\n", groupnumber);
6482       else fprintf(outfile, " (non-unique)\n");
6483     }
6484   nptr += namelen + 1;
6485   }
6486 
6487 /* Test get strings by number */
6488 
6489 for (i = 0; i < MAXCPYGET && dat_datctl.get_numbers[i] >= 0; i++)
6490   {
6491   int rc;
6492   PCRE2_SIZE length;
6493   void *gotbuffer;
6494   uint32_t n = (uint32_t)(dat_datctl.get_numbers[i]);
6495   PCRE2_SUBSTRING_GET_BYNUMBER(rc, match_data, n, &gotbuffer, &length);
6496   if (rc < 0)
6497     {
6498     fprintf(outfile, "Get substring %d failed (%d): ", n, rc);
6499     if (!print_error_message(rc, "", "\n")) return FALSE;
6500     }
6501   else
6502     {
6503     fprintf(outfile, "%2dG ", n);
6504     PCHARSV(gotbuffer, 0, length, utf, outfile);
6505     fprintf(outfile, " (%" SIZ_FORM ")\n", length);
6506     PCRE2_SUBSTRING_FREE(gotbuffer);
6507     }
6508   }
6509 
6510 /* Test get strings by name */
6511 
6512 nptr = dat_datctl.get_names;
6513 for (;;)
6514   {
6515   PCRE2_SIZE length;
6516   void *gotbuffer;
6517   int rc;
6518   int groupnumber;
6519   int namelen = strlen((const char *)nptr);
6520 #if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32
6521   PCRE2_SIZE cnl = namelen;
6522 #endif
6523   if (namelen == 0) break;
6524 
6525 #ifdef SUPPORT_PCRE2_8
6526   if (test_mode == PCRE8_MODE) strcpy((char *)pbuffer8, (char *)nptr);
6527 #endif
6528 #ifdef SUPPORT_PCRE2_16
6529   if (test_mode == PCRE16_MODE)(void)to16(nptr, utf, &cnl);
6530 #endif
6531 #ifdef SUPPORT_PCRE2_32
6532   if (test_mode == PCRE32_MODE)(void)to32(nptr, utf, &cnl);
6533 #endif
6534 
6535   PCRE2_SUBSTRING_NUMBER_FROM_NAME(groupnumber, compiled_code, pbuffer);
6536   if (groupnumber < 0 && groupnumber != PCRE2_ERROR_NOUNIQUESUBSTRING)
6537     fprintf(outfile, "Number not found for group '%s'\n", nptr);
6538 
6539   PCRE2_SUBSTRING_GET_BYNAME(rc, match_data, pbuffer, &gotbuffer, &length);
6540   if (rc < 0)
6541     {
6542     fprintf(outfile, "Get substring '%s' failed (%d): ", nptr, rc);
6543     if (!print_error_message(rc, "", "\n")) return FALSE;
6544     }
6545   else
6546     {
6547     fprintf(outfile, "  G ");
6548     PCHARSV(gotbuffer, 0, length, utf, outfile);
6549     fprintf(outfile, " (%" SIZ_FORM ") %s", length, nptr);
6550     if (groupnumber >= 0) fprintf(outfile, " (group %d)\n", groupnumber);
6551       else fprintf(outfile, " (non-unique)\n");
6552     PCRE2_SUBSTRING_FREE(gotbuffer);
6553     }
6554   nptr += namelen + 1;
6555   }
6556 
6557 /* Test getting the complete list of captured strings. */
6558 
6559 if ((dat_datctl.control & CTL_GETALL) != 0)
6560   {
6561   int rc;
6562   void **stringlist;
6563   PCRE2_SIZE *lengths;
6564   PCRE2_SUBSTRING_LIST_GET(rc, match_data, &stringlist, &lengths);
6565   if (rc < 0)
6566     {
6567     fprintf(outfile, "get substring list failed (%d): ", rc);
6568     if (!print_error_message(rc, "", "\n")) return FALSE;
6569     }
6570   else
6571     {
6572     for (i = 0; i < capcount; i++)
6573       {
6574       fprintf(outfile, "%2dL ", i);
6575       PCHARSV(stringlist[i], 0, lengths[i], utf, outfile);
6576       putc('\n', outfile);
6577       }
6578     if (stringlist[i] != NULL)
6579       fprintf(outfile, "string list not terminated by NULL\n");
6580     PCRE2_SUBSTRING_LIST_FREE(stringlist);
6581     }
6582   }
6583 
6584 return TRUE;
6585 }
6586 
6587 
6588 
6589 /*************************************************
6590 *            Show an entire ovector              *
6591 *************************************************/
6592 
6593 /* This function is called after partial matching or match failure, when the
6594 "allvector" modifier is set. It is a means of checking the contents of the
6595 entire ovector, to ensure no modification of fields that should be unchanged.
6596 
6597 Arguments:
6598   ovector      points to the ovector
6599   oveccount    number of pairs
6600 
6601 Returns:       nothing
6602 */
6603 
6604 static void
show_ovector(PCRE2_SIZE * ovector,uint32_t oveccount)6605 show_ovector(PCRE2_SIZE *ovector, uint32_t oveccount)
6606 {
6607 uint32_t i;
6608 for (i = 0; i < 2*oveccount; i += 2)
6609   {
6610   PCRE2_SIZE start = ovector[i];
6611   PCRE2_SIZE end = ovector[i+1];
6612 
6613   fprintf(outfile, "%2d: ", i/2);
6614   if (start == PCRE2_UNSET && end == PCRE2_UNSET)
6615     fprintf(outfile, "<unset>\n");
6616   else if (start == JUNK_OFFSET && end == JUNK_OFFSET)
6617     fprintf(outfile, "<unchanged>\n");
6618   else
6619     fprintf(outfile, "%ld %ld\n", (unsigned long int)start,
6620       (unsigned long int)end);
6621   }
6622 }
6623 
6624 
6625 /*************************************************
6626 *               Process a data line              *
6627 *************************************************/
6628 
6629 /* The line is in buffer; it will not be empty.
6630 
6631 Arguments:  none
6632 
6633 Returns:    PR_OK     continue processing next line
6634             PR_SKIP   skip to a blank line
6635             PR_ABEND  abort the pcre2test run
6636 */
6637 
6638 static int
process_data(void)6639 process_data(void)
6640 {
6641 PCRE2_SIZE len, ulen, arg_ulen;
6642 uint32_t gmatched;
6643 uint32_t c, k;
6644 uint32_t g_notempty = 0;
6645 uint8_t *p, *pp, *start_rep;
6646 size_t needlen;
6647 void *use_dat_context;
6648 BOOL utf;
6649 BOOL subject_literal;
6650 
6651 PCRE2_SIZE *ovector;
6652 PCRE2_SIZE ovecsave[3];
6653 uint32_t oveccount;
6654 
6655 #ifdef SUPPORT_PCRE2_8
6656 uint8_t *q8 = NULL;
6657 #endif
6658 #ifdef SUPPORT_PCRE2_16
6659 uint16_t *q16 = NULL;
6660 #endif
6661 #ifdef SUPPORT_PCRE2_32
6662 uint32_t *q32 = NULL;
6663 #endif
6664 
6665 subject_literal = (pat_patctl.control2 & CTL2_SUBJECT_LITERAL) != 0;
6666 
6667 /* Copy the default context and data control blocks to the active ones. Then
6668 copy from the pattern the controls that can be set in either the pattern or the
6669 data. This allows them to be overridden in the data line. We do not do this for
6670 options because those that are common apply separately to compiling and
6671 matching. */
6672 
6673 DATCTXCPY(dat_context, default_dat_context);
6674 memcpy(&dat_datctl, &def_datctl, sizeof(datctl));
6675 dat_datctl.control |= (pat_patctl.control & CTL_ALLPD);
6676 dat_datctl.control2 |= (pat_patctl.control2 & CTL2_ALLPD);
6677 strcpy((char *)dat_datctl.replacement, (char *)pat_patctl.replacement);
6678 if (dat_datctl.jitstack == 0) dat_datctl.jitstack = pat_patctl.jitstack;
6679 
6680 if (dat_datctl.substitute_skip == 0)
6681     dat_datctl.substitute_skip = pat_patctl.substitute_skip;
6682 if (dat_datctl.substitute_stop == 0)
6683     dat_datctl.substitute_stop = pat_patctl.substitute_stop;
6684 
6685 /* Initialize for scanning the data line. */
6686 
6687 #ifdef SUPPORT_PCRE2_8
6688 utf = ((((pat_patctl.control & CTL_POSIX) != 0)?
6689   ((pcre2_real_code_8 *)preg.re_pcre2_code)->overall_options :
6690   FLD(compiled_code, overall_options)) & PCRE2_UTF) != 0;
6691 #else
6692 utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
6693 #endif
6694 
6695 start_rep = NULL;
6696 len = strlen((const char *)buffer);
6697 while (len > 0 && isspace(buffer[len-1])) len--;
6698 buffer[len] = 0;
6699 p = buffer;
6700 while (isspace(*p)) p++;
6701 
6702 /* Check that the data is well-formed UTF-8 if we're in UTF mode. To create
6703 invalid input to pcre2_match(), you must use \x?? or \x{} sequences. */
6704 
6705 if (utf)
6706   {
6707   uint8_t *q;
6708   uint32_t cc;
6709   int n = 1;
6710   for (q = p; n > 0 && *q; q += n) n = utf82ord(q, &cc);
6711   if (n <= 0)
6712     {
6713     fprintf(outfile, "** Failed: invalid UTF-8 string cannot be used as input "
6714       "in UTF mode\n");
6715     return PR_OK;
6716     }
6717   }
6718 
6719 #ifdef SUPPORT_VALGRIND
6720 /* Mark the dbuffer as addressable but undefined again. */
6721 if (dbuffer != NULL)
6722   {
6723   VALGRIND_MAKE_MEM_UNDEFINED(dbuffer, dbuffer_size);
6724   }
6725 #endif
6726 
6727 /* Allocate a buffer to hold the data line; len+1 is an upper bound on
6728 the number of code units that will be needed (though the buffer may have to be
6729 extended if replication is involved). */
6730 
6731 needlen = (size_t)((len+1) * code_unit_size);
6732 if (dbuffer == NULL || needlen >= dbuffer_size)
6733   {
6734   while (needlen >= dbuffer_size) dbuffer_size *= 2;
6735   dbuffer = (uint8_t *)realloc(dbuffer, dbuffer_size);
6736   if (dbuffer == NULL)
6737     {
6738     fprintf(stderr, "pcre2test: realloc(%d) failed\n", (int)dbuffer_size);
6739     exit(1);
6740     }
6741   }
6742 SETCASTPTR(q, dbuffer);  /* Sets q8, q16, or q32, as appropriate. */
6743 
6744 /* Scan the data line, interpreting data escapes, and put the result into a
6745 buffer of the appropriate width. In UTF mode, input is always UTF-8; otherwise,
6746 in 16- and 32-bit modes, it can be forced to UTF-8 by the utf8_input modifier.
6747 */
6748 
6749 while ((c = *p++) != 0)
6750   {
6751   int32_t i = 0;
6752   size_t replen;
6753 
6754   /* ] may mark the end of a replicated sequence */
6755 
6756   if (c == ']' && start_rep != NULL)
6757     {
6758     long li;
6759     char *endptr;
6760     size_t qoffset = CAST8VAR(q) - dbuffer;
6761     size_t rep_offset = start_rep - dbuffer;
6762 
6763     if (*p++ != '{')
6764       {
6765       fprintf(outfile, "** Expected '{' after \\[....]\n");
6766       return PR_OK;
6767       }
6768 
6769     li = strtol((const char *)p, &endptr, 10);
6770     if (S32OVERFLOW(li))
6771       {
6772       fprintf(outfile, "** Repeat count too large\n");
6773       return PR_OK;
6774       }
6775 
6776     p = (uint8_t *)endptr;
6777     if (*p++ != '}')
6778       {
6779       fprintf(outfile, "** Expected '}' after \\[...]{...\n");
6780       return PR_OK;
6781       }
6782 
6783     i = (int32_t)li;
6784     if (i-- == 0)
6785       {
6786       fprintf(outfile, "** Zero repeat not allowed\n");
6787       return PR_OK;
6788       }
6789 
6790     replen = CAST8VAR(q) - start_rep;
6791     needlen += replen * i;
6792 
6793     if (needlen >= dbuffer_size)
6794       {
6795       while (needlen >= dbuffer_size) dbuffer_size *= 2;
6796       dbuffer = (uint8_t *)realloc(dbuffer, dbuffer_size);
6797       if (dbuffer == NULL)
6798         {
6799         fprintf(stderr, "pcre2test: realloc(%d) failed\n", (int)dbuffer_size);
6800         exit(1);
6801         }
6802       SETCASTPTR(q, dbuffer + qoffset);
6803       start_rep = dbuffer + rep_offset;
6804       }
6805 
6806     while (i-- > 0)
6807       {
6808       memcpy(CAST8VAR(q), start_rep, replen);
6809       SETPLUS(q, replen/code_unit_size);
6810       }
6811 
6812     start_rep = NULL;
6813     continue;
6814     }
6815 
6816   /* Handle a non-escaped character. In non-UTF 32-bit mode with utf8_input
6817   set, do the fudge for setting the top bit. */
6818 
6819   if (c != '\\' || subject_literal)
6820     {
6821     uint32_t topbit = 0;
6822     if (test_mode == PCRE32_MODE && c == 0xff && *p != 0)
6823       {
6824       topbit = 0x80000000;
6825       c = *p++;
6826       }
6827     if ((utf || (pat_patctl.control & CTL_UTF8_INPUT) != 0) &&
6828       HASUTF8EXTRALEN(c)) { GETUTF8INC(c, p); }
6829     c |= topbit;
6830     }
6831 
6832   /* Handle backslash escapes */
6833 
6834   else switch ((c = *p++))
6835     {
6836     case '\\': break;
6837     case 'a': c = CHAR_BEL; break;
6838     case 'b': c = '\b'; break;
6839     case 'e': c = CHAR_ESC; break;
6840     case 'f': c = '\f'; break;
6841     case 'n': c = '\n'; break;
6842     case 'r': c = '\r'; break;
6843     case 't': c = '\t'; break;
6844     case 'v': c = '\v'; break;
6845 
6846     case '0': case '1': case '2': case '3':
6847     case '4': case '5': case '6': case '7':
6848     c -= '0';
6849     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
6850       c = c * 8 + *p++ - '0';
6851     break;
6852 
6853     case 'o':
6854     if (*p == '{')
6855       {
6856       uint8_t *pt = p;
6857       c = 0;
6858       for (pt++; isdigit(*pt) && *pt != '8' && *pt != '9'; pt++)
6859         {
6860         if (++i == 12)
6861           fprintf(outfile, "** Too many octal digits in \\o{...} item; "
6862                            "using only the first twelve.\n");
6863         else c = c * 8 + *pt - '0';
6864         }
6865       if (*pt == '}') p = pt + 1;
6866         else fprintf(outfile, "** Missing } after \\o{ (assumed)\n");
6867       }
6868     break;
6869 
6870     case 'x':
6871     if (*p == '{')
6872       {
6873       uint8_t *pt = p;
6874       c = 0;
6875 
6876       /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
6877       when isxdigit() is a macro that refers to its argument more than
6878       once. This is banned by the C Standard, but apparently happens in at
6879       least one MacOS environment. */
6880 
6881       for (pt++; isxdigit(*pt); pt++)
6882         {
6883         if (++i == 9)
6884           fprintf(outfile, "** Too many hex digits in \\x{...} item; "
6885                            "using only the first eight.\n");
6886         else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
6887         }
6888       if (*pt == '}')
6889         {
6890         p = pt + 1;
6891         break;
6892         }
6893       /* Not correct form for \x{...}; fall through */
6894       }
6895 
6896     /* \x without {} always defines just one byte in 8-bit mode. This
6897     allows UTF-8 characters to be constructed byte by byte, and also allows
6898     invalid UTF-8 sequences to be made. Just copy the byte in UTF-8 mode.
6899     Otherwise, pass it down as data. */
6900 
6901     c = 0;
6902     while (i++ < 2 && isxdigit(*p))
6903       {
6904       c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
6905       p++;
6906       }
6907 #if defined SUPPORT_PCRE2_8
6908     if (utf && (test_mode == PCRE8_MODE))
6909       {
6910       *q8++ = c;
6911       continue;
6912       }
6913 #endif
6914     break;
6915 
6916     case 0:     /* \ followed by EOF allows for an empty line */
6917     p--;
6918     continue;
6919 
6920     case '=':   /* \= terminates the data, starts modifiers */
6921     goto ENDSTRING;
6922 
6923     case '[':   /* \[ introduces a replicated character sequence */
6924     if (start_rep != NULL)
6925       {
6926       fprintf(outfile, "** Nested replication is not supported\n");
6927       return PR_OK;
6928       }
6929     start_rep = CAST8VAR(q);
6930     continue;
6931 
6932     default:
6933     if (isalnum(c))
6934       {
6935       fprintf(outfile, "** Unrecognized escape sequence \"\\%c\"\n", c);
6936       return PR_OK;
6937       }
6938     }
6939 
6940   /* We now have a character value in c that may be greater than 255.
6941   In 8-bit mode we convert to UTF-8 if we are in UTF mode. Values greater
6942   than 127 in UTF mode must have come from \x{...} or octal constructs
6943   because values from \x.. get this far only in non-UTF mode. */
6944 
6945 #ifdef SUPPORT_PCRE2_8
6946   if (test_mode == PCRE8_MODE)
6947     {
6948     if (utf)
6949       {
6950       if (c > 0x7fffffff)
6951         {
6952         fprintf(outfile, "** Character \\x{%x} is greater than 0x7fffffff "
6953           "and so cannot be converted to UTF-8\n", c);
6954         return PR_OK;
6955         }
6956       q8 += ord2utf8(c, q8);
6957       }
6958     else
6959       {
6960       if (c > 0xffu)
6961         {
6962         fprintf(outfile, "** Character \\x{%x} is greater than 255 "
6963           "and UTF-8 mode is not enabled.\n", c);
6964         fprintf(outfile, "** Truncation will probably give the wrong "
6965           "result.\n");
6966         }
6967       *q8++ = (uint8_t)c;
6968       }
6969     }
6970 #endif
6971 #ifdef SUPPORT_PCRE2_16
6972   if (test_mode == PCRE16_MODE)
6973     {
6974     if (utf)
6975       {
6976       if (c > 0x10ffffu)
6977         {
6978         fprintf(outfile, "** Failed: character \\x{%x} is greater than "
6979           "0x10ffff and so cannot be converted to UTF-16\n", c);
6980         return PR_OK;
6981         }
6982       else if (c >= 0x10000u)
6983         {
6984         c-= 0x10000u;
6985         *q16++ = 0xD800 | (c >> 10);
6986         *q16++ = 0xDC00 | (c & 0x3ff);
6987         }
6988       else
6989         *q16++ = c;
6990       }
6991     else
6992       {
6993       if (c > 0xffffu)
6994         {
6995         fprintf(outfile, "** Character \\x{%x} is greater than 0xffff "
6996           "and UTF-16 mode is not enabled.\n", c);
6997         fprintf(outfile, "** Truncation will probably give the wrong "
6998           "result.\n");
6999         }
7000 
7001       *q16++ = (uint16_t)c;
7002       }
7003     }
7004 #endif
7005 #ifdef SUPPORT_PCRE2_32
7006   if (test_mode == PCRE32_MODE)
7007     {
7008     *q32++ = c;
7009     }
7010 #endif
7011   }
7012 
7013 ENDSTRING:
7014 SET(*q, 0);
7015 len = CASTVAR(uint8_t *, q) - dbuffer;    /* Length in bytes */
7016 ulen = len/code_unit_size;                /* Length in code units */
7017 arg_ulen = ulen;                          /* Value to use in match arg */
7018 
7019 /* If the string was terminated by \= we must now interpret modifiers. */
7020 
7021 if (p[-1] != 0 && !decode_modifiers(p, CTX_DAT, NULL, &dat_datctl))
7022   return PR_OK;
7023 
7024 /* Setting substitute_{skip,fail} implies a substitute callout. */
7025 
7026 if (dat_datctl.substitute_skip != 0 || dat_datctl.substitute_stop != 0)
7027   dat_datctl.control2 |= CTL2_SUBSTITUTE_CALLOUT;
7028 
7029 /* Check for mutually exclusive modifiers. At present, these are all in the
7030 first control word. */
7031 
7032 for (k = 0; k < sizeof(exclusive_dat_controls)/sizeof(uint32_t); k++)
7033   {
7034   c = dat_datctl.control & exclusive_dat_controls[k];
7035   if (c != 0 && c != (c & (~c+1)))
7036     {
7037     show_controls(c, 0, "** Not allowed together:");
7038     fprintf(outfile, "\n");
7039     return PR_OK;
7040     }
7041   }
7042 
7043 if (pat_patctl.replacement[0] != 0)
7044   {
7045   if ((dat_datctl.control2 & CTL2_SUBSTITUTE_CALLOUT) != 0 &&
7046       (dat_datctl.control & CTL_NULLCONTEXT) != 0)
7047     {
7048     fprintf(outfile, "** Replacement callouts are not supported with null_context.\n");
7049     return PR_OK;
7050     }
7051 
7052   if ((dat_datctl.control & CTL_ALLCAPTURES) != 0)
7053     fprintf(outfile, "** Ignored with replacement text: allcaptures\n");
7054   }
7055 
7056 /* Warn for modifiers that are ignored for DFA. */
7057 
7058 if ((dat_datctl.control & CTL_DFA) != 0)
7059   {
7060   if ((dat_datctl.control & CTL_ALLCAPTURES) != 0)
7061     fprintf(outfile, "** Ignored after DFA matching: allcaptures\n");
7062   }
7063 
7064 /* We now have the subject in dbuffer, with len containing the byte length, and
7065 ulen containing the code unit length, with a copy in arg_ulen for use in match
7066 function arguments (this gets changed to PCRE2_ZERO_TERMINATED when the
7067 zero_terminate modifier is present).
7068 
7069 Move the data to the end of the buffer so that a read over the end can be
7070 caught by valgrind or other means. If we have explicit valgrind support, mark
7071 the unused start of the buffer unaddressable. If we are using the POSIX
7072 interface, or testing zero-termination, we must include the terminating zero in
7073 the usable data. */
7074 
7075 c = code_unit_size * (((pat_patctl.control & CTL_POSIX) +
7076                        (dat_datctl.control & CTL_ZERO_TERMINATE) != 0)? 1:0);
7077 pp = memmove(dbuffer + dbuffer_size - len - c, dbuffer, len + c);
7078 #ifdef SUPPORT_VALGRIND
7079   VALGRIND_MAKE_MEM_NOACCESS(dbuffer, dbuffer_size - (len + c));
7080 #endif
7081 
7082 /* Now pp points to the subject string, but if null_subject was specified, set
7083 it to NULL to test PCRE2's behaviour. */
7084 
7085 if ((dat_datctl.control2 & CTL2_NULL_SUBJECT) != 0) pp = NULL;
7086 
7087 /* POSIX matching is only possible in 8-bit mode, and it does not support
7088 timing or other fancy features. Some were checked at compile time, but we need
7089 to check the match-time settings here. */
7090 
7091 #ifdef SUPPORT_PCRE2_8
7092 if ((pat_patctl.control & CTL_POSIX) != 0)
7093   {
7094   int rc;
7095   int eflags = 0;
7096   regmatch_t *pmatch = NULL;
7097   const char *msg = "** Ignored with POSIX interface:";
7098 
7099   if (dat_datctl.cerror[0] != CFORE_UNSET || dat_datctl.cerror[1] != CFORE_UNSET)
7100     prmsg(&msg, "callout_error");
7101   if (dat_datctl.cfail[0] != CFORE_UNSET || dat_datctl.cfail[1] != CFORE_UNSET)
7102     prmsg(&msg, "callout_fail");
7103   if (dat_datctl.copy_numbers[0] >= 0 || dat_datctl.copy_names[0] != 0)
7104     prmsg(&msg, "copy");
7105   if (dat_datctl.get_numbers[0] >= 0 || dat_datctl.get_names[0] != 0)
7106     prmsg(&msg, "get");
7107   if (dat_datctl.jitstack != 0) prmsg(&msg, "jitstack");
7108   if (dat_datctl.offset != 0) prmsg(&msg, "offset");
7109 
7110   if ((dat_datctl.options & ~POSIX_SUPPORTED_MATCH_OPTIONS) != 0)
7111     {
7112     fprintf(outfile, "%s", msg);
7113     show_match_options(dat_datctl.options & ~POSIX_SUPPORTED_MATCH_OPTIONS);
7114     msg = "";
7115     }
7116   if ((dat_datctl.control & ~POSIX_SUPPORTED_MATCH_CONTROLS) != 0 ||
7117       (dat_datctl.control2 & ~POSIX_SUPPORTED_MATCH_CONTROLS2) != 0)
7118     {
7119     show_controls(dat_datctl.control & ~POSIX_SUPPORTED_MATCH_CONTROLS,
7120                   dat_datctl.control2 & ~POSIX_SUPPORTED_MATCH_CONTROLS2, msg);
7121     msg = "";
7122     }
7123 
7124   if (msg[0] == 0) fprintf(outfile, "\n");
7125 
7126   if (dat_datctl.oveccount > 0)
7127     {
7128     pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * dat_datctl.oveccount);
7129     if (pmatch == NULL)
7130       {
7131       fprintf(outfile, "** Failed to get memory for recording matching "
7132         "information (size set = %du)\n", dat_datctl.oveccount);
7133       return PR_OK;
7134       }
7135     }
7136 
7137   if (dat_datctl.startend[0] != CFORE_UNSET)
7138     {
7139     pmatch[0].rm_so = dat_datctl.startend[0];
7140     pmatch[0].rm_eo = (dat_datctl.startend[1] != 0)?
7141       dat_datctl.startend[1] : len;
7142     eflags |= REG_STARTEND;
7143     }
7144 
7145   if ((dat_datctl.options & PCRE2_NOTBOL) != 0) eflags |= REG_NOTBOL;
7146   if ((dat_datctl.options & PCRE2_NOTEOL) != 0) eflags |= REG_NOTEOL;
7147   if ((dat_datctl.options & PCRE2_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
7148 
7149   rc = regexec(&preg, (const char *)pp, dat_datctl.oveccount, pmatch, eflags);
7150   if (rc != 0)
7151     {
7152     (void)regerror(rc, &preg, (char *)pbuffer8, pbuffer8_size);
7153     fprintf(outfile, "No match: POSIX code %d: %s\n", rc, pbuffer8);
7154     }
7155   else if ((pat_patctl.control & CTL_POSIX_NOSUB) != 0)
7156     fprintf(outfile, "Matched with REG_NOSUB\n");
7157   else if (dat_datctl.oveccount == 0)
7158     fprintf(outfile, "Matched without capture\n");
7159   else
7160     {
7161     size_t i, j;
7162     size_t last_printed = (size_t)dat_datctl.oveccount;
7163     for (i = 0; i < (size_t)dat_datctl.oveccount; i++)
7164       {
7165       if (pmatch[i].rm_so >= 0)
7166         {
7167         PCRE2_SIZE start = pmatch[i].rm_so;
7168         PCRE2_SIZE end = pmatch[i].rm_eo;
7169         for (j = last_printed + 1; j < i; j++)
7170           fprintf(outfile, "%2d: <unset>\n", (int)j);
7171         last_printed = i;
7172         if (start > end)
7173           {
7174           start = pmatch[i].rm_eo;
7175           end = pmatch[i].rm_so;
7176           fprintf(outfile, "Start of matched string is beyond its end - "
7177             "displaying from end to start.\n");
7178           }
7179         fprintf(outfile, "%2d: ", (int)i);
7180         PCHARSV(pp, start, end - start, utf, outfile);
7181         fprintf(outfile, "\n");
7182 
7183         if ((i == 0 && (dat_datctl.control & CTL_AFTERTEXT) != 0) ||
7184             (dat_datctl.control & CTL_ALLAFTERTEXT) != 0)
7185           {
7186           fprintf(outfile, "%2d+ ", (int)i);
7187           /* Note: don't use the start/end variables here because we want to
7188           show the text from what is reported as the end. */
7189           PCHARSV(pp, pmatch[i].rm_eo, len - pmatch[i].rm_eo, utf, outfile);
7190           fprintf(outfile, "\n"); }
7191         }
7192       }
7193     }
7194   free(pmatch);
7195   return PR_OK;
7196   }
7197 #endif  /* SUPPORT_PCRE2_8 */
7198 
7199  /* Handle matching via the native interface. Check for consistency of
7200 modifiers. */
7201 
7202 if (dat_datctl.startend[0] != CFORE_UNSET)
7203   fprintf(outfile, "** \\=posix_startend ignored for non-POSIX matching\n");
7204 
7205 /* ALLUSEDTEXT is not supported with JIT, but JIT is not used with DFA
7206 matching, even if the JIT compiler was used. */
7207 
7208 if ((dat_datctl.control & (CTL_ALLUSEDTEXT|CTL_DFA)) == CTL_ALLUSEDTEXT &&
7209     FLD(compiled_code, executable_jit) != NULL)
7210   {
7211   fprintf(outfile, "** Showing all consulted text is not supported by JIT: ignored\n");
7212   dat_datctl.control &= ~CTL_ALLUSEDTEXT;
7213   }
7214 
7215 /* Handle passing the subject as zero-terminated. */
7216 
7217 if ((dat_datctl.control & CTL_ZERO_TERMINATE) != 0)
7218   arg_ulen = PCRE2_ZERO_TERMINATED;
7219 
7220 /* The nullcontext modifier is used to test calling pcre2_[jit_]match() with a
7221 NULL context. */
7222 
7223 use_dat_context = ((dat_datctl.control & CTL_NULLCONTEXT) != 0)?
7224   NULL : PTR(dat_context);
7225 
7226 /* Enable display of malloc/free if wanted. We can do this only if either the
7227 pattern or the subject is processed with a context. */
7228 
7229 show_memory = (dat_datctl.control & CTL_MEMORY) != 0;
7230 
7231 if (show_memory &&
7232     (pat_patctl.control & dat_datctl.control & CTL_NULLCONTEXT) != 0)
7233   fprintf(outfile, "** \\=memory requires either a pattern or a subject "
7234     "context: ignored\n");
7235 
7236 /* Create and assign a JIT stack if requested. */
7237 
7238 if (dat_datctl.jitstack != 0)
7239   {
7240   if (dat_datctl.jitstack != jit_stack_size)
7241     {
7242     PCRE2_JIT_STACK_FREE(jit_stack);
7243     PCRE2_JIT_STACK_CREATE(jit_stack, 1, dat_datctl.jitstack * 1024, NULL);
7244     jit_stack_size = dat_datctl.jitstack;
7245     }
7246   PCRE2_JIT_STACK_ASSIGN(dat_context, jit_callback, jit_stack);
7247   }
7248 
7249 /* Or de-assign */
7250 
7251 else if (jit_stack != NULL)
7252   {
7253   PCRE2_JIT_STACK_ASSIGN(dat_context, NULL, NULL);
7254   PCRE2_JIT_STACK_FREE(jit_stack);
7255   jit_stack = NULL;
7256   jit_stack_size = 0;
7257   }
7258 
7259 /* When no JIT stack is assigned, we must ensure that there is a JIT callback
7260 if we want to verify that JIT was actually used. */
7261 
7262 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_stack == NULL)
7263    {
7264    PCRE2_JIT_STACK_ASSIGN(dat_context, jit_callback, NULL);
7265    }
7266 
7267 /* Adjust match_data according to size of offsets required. A size of zero
7268 causes a new match data block to be obtained that exactly fits the pattern. */
7269 
7270 if (dat_datctl.oveccount == 0)
7271   {
7272   PCRE2_MATCH_DATA_FREE(match_data);
7273   PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(match_data, compiled_code, NULL);
7274   PCRE2_GET_OVECTOR_COUNT(max_oveccount, match_data);
7275   }
7276 else if (dat_datctl.oveccount <= max_oveccount)
7277   {
7278   SETFLD(match_data, oveccount, dat_datctl.oveccount);
7279   }
7280 else
7281   {
7282   max_oveccount = dat_datctl.oveccount;
7283   PCRE2_MATCH_DATA_FREE(match_data);
7284   PCRE2_MATCH_DATA_CREATE(match_data, max_oveccount, NULL);
7285   }
7286 
7287 if (CASTVAR(void *, match_data) == NULL)
7288   {
7289   fprintf(outfile, "** Failed to get memory for recording matching "
7290     "information (size requested: %d)\n", dat_datctl.oveccount);
7291   max_oveccount = 0;
7292   return PR_OK;
7293   }
7294 
7295 ovector = FLD(match_data, ovector);
7296 PCRE2_GET_OVECTOR_COUNT(oveccount, match_data);
7297 
7298 /* Replacement processing is ignored for DFA matching. */
7299 
7300 if (dat_datctl.replacement[0] != 0 && (dat_datctl.control & CTL_DFA) != 0)
7301   {
7302   fprintf(outfile, "** Ignored for DFA matching: replace\n");
7303   dat_datctl.replacement[0] = 0;
7304   }
7305 
7306 /* If a replacement string is provided, call pcre2_substitute() instead of one
7307 of the matching functions. First we have to convert the replacement string to
7308 the appropriate width. */
7309 
7310 if (dat_datctl.replacement[0] != 0)
7311   {
7312   int rc;
7313   uint8_t *pr;
7314   uint8_t rbuffer[REPLACE_BUFFSIZE];
7315   uint8_t nbuffer[REPLACE_BUFFSIZE];
7316   uint8_t *rbptr;
7317   uint32_t xoptions;
7318   uint32_t emoption;  /* External match option */
7319   PCRE2_SIZE j, rlen, nsize, erroroffset;
7320   BOOL badutf = FALSE;
7321 
7322 #ifdef SUPPORT_PCRE2_8
7323   uint8_t *r8 = NULL;
7324 #endif
7325 #ifdef SUPPORT_PCRE2_16
7326   uint16_t *r16 = NULL;
7327 #endif
7328 #ifdef SUPPORT_PCRE2_32
7329   uint32_t *r32 = NULL;
7330 #endif
7331 
7332   /* Fill the ovector with junk to detect elements that do not get set
7333   when they should be (relevant only when "allvector" is specified). */
7334 
7335   for (j = 0; j < 2*oveccount; j++) ovector[j] = JUNK_OFFSET;
7336 
7337   if (timeitm)
7338     fprintf(outfile, "** Timing is not supported with replace: ignored\n");
7339 
7340   if ((dat_datctl.control & CTL_ALTGLOBAL) != 0)
7341     fprintf(outfile, "** Altglobal is not supported with replace: ignored\n");
7342 
7343   /* Check for a test that does substitution after an initial external match.
7344   If this is set, we run the external match, but leave the interpretation of
7345   its output to pcre2_substitute(). */
7346 
7347   emoption = ((dat_datctl.control2 & CTL2_SUBSTITUTE_MATCHED) == 0)? 0 :
7348     PCRE2_SUBSTITUTE_MATCHED;
7349 
7350   if (emoption != 0)
7351     {
7352     PCRE2_MATCH(rc, compiled_code, pp, arg_ulen, dat_datctl.offset,
7353       dat_datctl.options, match_data, use_dat_context);
7354     }
7355 
7356   xoptions = emoption |
7357              (((dat_datctl.control & CTL_GLOBAL) == 0)? 0 :
7358                 PCRE2_SUBSTITUTE_GLOBAL) |
7359              (((dat_datctl.control2 & CTL2_SUBSTITUTE_EXTENDED) == 0)? 0 :
7360                 PCRE2_SUBSTITUTE_EXTENDED) |
7361              (((dat_datctl.control2 & CTL2_SUBSTITUTE_LITERAL) == 0)? 0 :
7362                 PCRE2_SUBSTITUTE_LITERAL) |
7363              (((dat_datctl.control2 & CTL2_SUBSTITUTE_OVERFLOW_LENGTH) == 0)? 0 :
7364                 PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) |
7365              (((dat_datctl.control2 & CTL2_SUBSTITUTE_REPLACEMENT_ONLY) == 0)? 0 :
7366                 PCRE2_SUBSTITUTE_REPLACEMENT_ONLY) |
7367              (((dat_datctl.control2 & CTL2_SUBSTITUTE_UNKNOWN_UNSET) == 0)? 0 :
7368                 PCRE2_SUBSTITUTE_UNKNOWN_UNSET) |
7369              (((dat_datctl.control2 & CTL2_SUBSTITUTE_UNSET_EMPTY) == 0)? 0 :
7370                 PCRE2_SUBSTITUTE_UNSET_EMPTY);
7371 
7372   SETCASTPTR(r, rbuffer);  /* Sets r8, r16, or r32, as appropriate. */
7373   pr = dat_datctl.replacement;
7374 
7375   /* If the replacement starts with '[<number>]' we interpret that as length
7376   value for the replacement buffer. */
7377 
7378   nsize = REPLACE_BUFFSIZE/code_unit_size;
7379   if (*pr == '[')
7380     {
7381     PCRE2_SIZE n = 0;
7382     while ((c = *(++pr)) >= CHAR_0 && c <= CHAR_9) n = n * 10 + c - CHAR_0;
7383     if (*pr++ != ']')
7384       {
7385       fprintf(outfile, "Bad buffer size in replacement string\n");
7386       return PR_OK;
7387       }
7388     if (n > nsize)
7389       {
7390       fprintf(outfile, "Replacement buffer setting (%" SIZ_FORM ") is too "
7391         "large (max %" SIZ_FORM ")\n", n, nsize);
7392       return PR_OK;
7393       }
7394     nsize = n;
7395     }
7396 
7397   /* Now copy the replacement string to a buffer of the appropriate width. No
7398   escape processing is done for replacements. In UTF mode, check for an invalid
7399   UTF-8 input string, and if it is invalid, just copy its code units without
7400   UTF interpretation. This provides a means of checking that an invalid string
7401   is detected. Otherwise, UTF-8 can be used to include wide characters in a
7402   replacement. */
7403 
7404   if (utf) badutf = valid_utf(pr, strlen((const char *)pr), &erroroffset);
7405 
7406   /* Not UTF or invalid UTF-8: just copy the code units. */
7407 
7408   if (!utf || badutf)
7409     {
7410     while ((c = *pr++) != 0)
7411       {
7412 #ifdef SUPPORT_PCRE2_8
7413       if (test_mode == PCRE8_MODE) *r8++ = c;
7414 #endif
7415 #ifdef SUPPORT_PCRE2_16
7416       if (test_mode == PCRE16_MODE) *r16++ = c;
7417 #endif
7418 #ifdef SUPPORT_PCRE2_32
7419       if (test_mode == PCRE32_MODE) *r32++ = c;
7420 #endif
7421       }
7422     }
7423 
7424   /* Valid UTF-8 replacement string */
7425 
7426   else while ((c = *pr++) != 0)
7427     {
7428     if (HASUTF8EXTRALEN(c)) { GETUTF8INC(c, pr); }
7429 
7430 #ifdef SUPPORT_PCRE2_8
7431     if (test_mode == PCRE8_MODE) r8 += ord2utf8(c, r8);
7432 #endif
7433 
7434 #ifdef SUPPORT_PCRE2_16
7435     if (test_mode == PCRE16_MODE)
7436       {
7437       if (c >= 0x10000u)
7438         {
7439         c-= 0x10000u;
7440         *r16++ = 0xD800 | (c >> 10);
7441         *r16++ = 0xDC00 | (c & 0x3ff);
7442         }
7443       else *r16++ = c;
7444       }
7445 #endif
7446 
7447 #ifdef SUPPORT_PCRE2_32
7448     if (test_mode == PCRE32_MODE) *r32++ = c;
7449 #endif
7450     }
7451 
7452   SET(*r, 0);
7453   if ((dat_datctl.control & CTL_ZERO_TERMINATE) != 0)
7454     rlen = PCRE2_ZERO_TERMINATED;
7455   else
7456     rlen = (CASTVAR(uint8_t *, r) - rbuffer)/code_unit_size;
7457 
7458   if ((dat_datctl.control2 & CTL2_SUBSTITUTE_CALLOUT) != 0)
7459     {
7460     PCRE2_SET_SUBSTITUTE_CALLOUT(dat_context, substitute_callout_function, NULL);
7461     }
7462   else
7463     {
7464     PCRE2_SET_SUBSTITUTE_CALLOUT(dat_context, NULL, NULL);  /* No callout */
7465     }
7466 
7467   /* There is a special option to set the replacement to NULL in order to test
7468   that case. */
7469 
7470   rbptr = ((dat_datctl.control2 & CTL2_NULL_REPLACEMENT) == 0)? rbuffer : NULL;
7471 
7472   PCRE2_SUBSTITUTE(rc, compiled_code, pp, arg_ulen, dat_datctl.offset,
7473     dat_datctl.options|xoptions, match_data, use_dat_context,
7474     rbptr, rlen, nbuffer, &nsize);
7475 
7476   if (rc < 0)
7477     {
7478     fprintf(outfile, "Failed: error %d", rc);
7479     if (rc != PCRE2_ERROR_NOMEMORY && nsize != PCRE2_UNSET)
7480       fprintf(outfile, " at offset %ld in replacement", (long int)nsize);
7481     fprintf(outfile, ": ");
7482     if (!print_error_message(rc, "", "")) return PR_ABEND;
7483     if (rc == PCRE2_ERROR_NOMEMORY &&
7484         (xoptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) != 0)
7485       fprintf(outfile, ": %ld code units are needed", (long int)nsize);
7486     }
7487   else
7488     {
7489     fprintf(outfile, "%2d: ", rc);
7490     PCHARSV(nbuffer, 0, nsize, utf, outfile);
7491     }
7492 
7493   fprintf(outfile, "\n");
7494   show_memory = FALSE;
7495 
7496   /* Show final ovector contents if requested. */
7497 
7498   if ((dat_datctl.control2 & CTL2_ALLVECTOR) != 0)
7499     show_ovector(ovector, oveccount);
7500 
7501   return PR_OK;
7502   }   /* End of substitution handling */
7503 
7504 /* When a replacement string is not provided, run a loop for global matching
7505 with one of the basic matching functions. For altglobal (or first time round
7506 the loop), set an "unset" value for the previous match info. */
7507 
7508 ovecsave[0] = ovecsave[1] = ovecsave[2] = PCRE2_UNSET;
7509 
7510 for (gmatched = 0;; gmatched++)
7511   {
7512   PCRE2_SIZE j;
7513   int capcount;
7514 
7515   /* Fill the ovector with junk to detect elements that do not get set
7516   when they should be. */
7517 
7518   for (j = 0; j < 2*oveccount; j++) ovector[j] = JUNK_OFFSET;
7519 
7520   /* When matching is via pcre2_match(), we will detect the use of JIT via the
7521   stack callback function. */
7522 
7523   jit_was_used = (pat_patctl.control & CTL_JITFAST) != 0;
7524 
7525   /* Do timing if required. */
7526 
7527   if (timeitm > 0)
7528     {
7529     int i;
7530     clock_t start_time, time_taken;
7531 
7532     if ((dat_datctl.control & CTL_DFA) != 0)
7533       {
7534       if ((dat_datctl.options & PCRE2_DFA_RESTART) != 0)
7535         {
7536         fprintf(outfile, "Timing DFA restarts is not supported\n");
7537         return PR_OK;
7538         }
7539       if (dfa_workspace == NULL)
7540         dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
7541       start_time = clock();
7542       for (i = 0; i < timeitm; i++)
7543         {
7544         PCRE2_DFA_MATCH(capcount, compiled_code, pp, arg_ulen,
7545           dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
7546           use_dat_context, dfa_workspace, DFA_WS_DIMENSION);
7547         }
7548       }
7549 
7550     else if ((pat_patctl.control & CTL_JITFAST) != 0)
7551       {
7552       start_time = clock();
7553       for (i = 0; i < timeitm; i++)
7554         {
7555         PCRE2_JIT_MATCH(capcount, compiled_code, pp, arg_ulen,
7556           dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
7557           use_dat_context);
7558         }
7559       }
7560 
7561     else
7562       {
7563       start_time = clock();
7564       for (i = 0; i < timeitm; i++)
7565         {
7566         PCRE2_MATCH(capcount, compiled_code, pp, arg_ulen,
7567           dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
7568           use_dat_context);
7569         }
7570       }
7571     total_match_time += (time_taken = clock() - start_time);
7572     fprintf(outfile, "Match time %.4f milliseconds\n",
7573       (((double)time_taken * 1000.0) / (double)timeitm) /
7574         (double)CLOCKS_PER_SEC);
7575     }
7576 
7577   /* Find the heap, match and depth limits if requested. The depth and heap
7578   limits are not relevant for JIT. The return from check_match_limit() is the
7579   return from the final call to pcre2_match() or pcre2_dfa_match(). */
7580 
7581   if ((dat_datctl.control & CTL_FINDLIMITS) != 0)
7582     {
7583     capcount = 0;  /* This stops compiler warnings */
7584 
7585     if (FLD(compiled_code, executable_jit) == NULL ||
7586           (dat_datctl.options & PCRE2_NO_JIT) != 0)
7587       {
7588       (void)check_match_limit(pp, arg_ulen, PCRE2_ERROR_HEAPLIMIT, "heap");
7589       }
7590 
7591     capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_MATCHLIMIT,
7592       "match");
7593 
7594     if (FLD(compiled_code, executable_jit) == NULL ||
7595         (dat_datctl.options & PCRE2_NO_JIT) != 0 ||
7596         (dat_datctl.control & CTL_DFA) != 0)
7597       {
7598       capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_DEPTHLIMIT,
7599         "depth");
7600       }
7601 
7602     if (capcount == 0)
7603       {
7604       fprintf(outfile, "Matched, but offsets vector is too small to show all matches\n");
7605       capcount = dat_datctl.oveccount;
7606       }
7607     }
7608 
7609   /* Otherwise just run a single match, setting up a callout if required (the
7610   default). There is a copy of the pattern in pbuffer8 for use by callouts. */
7611 
7612   else
7613     {
7614     if ((dat_datctl.control & CTL_CALLOUT_NONE) == 0)
7615       {
7616       PCRE2_SET_CALLOUT(dat_context, callout_function,
7617         (void *)(&dat_datctl.callout_data));
7618       first_callout = TRUE;
7619       last_callout_mark = NULL;
7620       callout_count = 0;
7621       }
7622     else
7623       {
7624       PCRE2_SET_CALLOUT(dat_context, NULL, NULL);  /* No callout */
7625       }
7626 
7627     /* Run a single DFA or NFA match. */
7628 
7629     if ((dat_datctl.control & CTL_DFA) != 0)
7630       {
7631       if (dfa_workspace == NULL)
7632         dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
7633       if (dfa_matched++ == 0)
7634         dfa_workspace[0] = -1;  /* To catch bad restart */
7635       PCRE2_DFA_MATCH(capcount, compiled_code, pp, arg_ulen,
7636         dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
7637         use_dat_context, dfa_workspace, DFA_WS_DIMENSION);
7638       if (capcount == 0)
7639         {
7640         fprintf(outfile, "Matched, but offsets vector is too small to show all matches\n");
7641         capcount = dat_datctl.oveccount;
7642         }
7643       }
7644     else
7645       {
7646       if ((pat_patctl.control & CTL_JITFAST) != 0)
7647         PCRE2_JIT_MATCH(capcount, compiled_code, pp, arg_ulen, dat_datctl.offset,
7648           dat_datctl.options | g_notempty, match_data, use_dat_context);
7649       else
7650         PCRE2_MATCH(capcount, compiled_code, pp, arg_ulen, dat_datctl.offset,
7651           dat_datctl.options | g_notempty, match_data, use_dat_context);
7652       if (capcount == 0)
7653         {
7654         fprintf(outfile, "Matched, but too many substrings\n");
7655         capcount = dat_datctl.oveccount;
7656         }
7657       }
7658     }
7659 
7660   /* The result of the match is now in capcount. First handle a successful
7661   match. If pp was forced to be NULL (to test NULL handling) it will have been
7662   treated as an empty string if the length was zero. So re-create that for
7663   outputting. */
7664 
7665   if (capcount >= 0)
7666     {
7667     int i;
7668 
7669     if (pp == NULL) pp = (uint8_t *)"";
7670 
7671     if (capcount > (int)oveccount)   /* Check for lunatic return value */
7672       {
7673       fprintf(outfile,
7674         "** PCRE2 error: returned count %d is too big for ovector count %d\n",
7675         capcount, oveccount);
7676       capcount = oveccount;
7677       if ((dat_datctl.control & CTL_ANYGLOB) != 0)
7678         {
7679         fprintf(outfile, "** Global loop abandoned\n");
7680         dat_datctl.control &= ~CTL_ANYGLOB;        /* Break g/G loop */
7681         }
7682       }
7683 
7684     /* If PCRE2_COPY_MATCHED_SUBJECT was set, check that things are as they
7685     should be, but not for fast JIT, where it isn't supported. */
7686 
7687     if ((dat_datctl.options & PCRE2_COPY_MATCHED_SUBJECT) != 0 &&
7688         (pat_patctl.control & CTL_JITFAST) == 0)
7689       {
7690       if ((FLD(match_data, flags) & PCRE2_MD_COPIED_SUBJECT) == 0)
7691         fprintf(outfile,
7692           "** PCRE2 error: flag not set after copy_matched_subject\n");
7693 
7694       if (CASTFLD(void *, match_data, subject) == pp)
7695         fprintf(outfile,
7696           "** PCRE2 error: copy_matched_subject has not copied\n");
7697 
7698       if (memcmp(CASTFLD(void *, match_data, subject), pp, ulen) != 0)
7699         fprintf(outfile,
7700           "** PCRE2 error: copy_matched_subject mismatch\n");
7701       }
7702 
7703     /* If this is not the first time round a global loop, check that the
7704     returned string has changed. If it has not, check for an empty string match
7705     at different starting offset from the previous match. This is a failed test
7706     retry for null-matching patterns that don't match at their starting offset,
7707     for example /(?<=\G.)/. A repeated match at the same point is not such a
7708     pattern, and must be discarded, and we then proceed to seek a non-null
7709     match at the current point. For any other repeated match, there is a bug
7710     somewhere and we must break the loop because it will go on for ever. We
7711     know that there are always at least two elements in the ovector. */
7712 
7713     if (gmatched > 0 && ovecsave[0] == ovector[0] && ovecsave[1] == ovector[1])
7714       {
7715       if (ovector[0] == ovector[1] && ovecsave[2] != dat_datctl.offset)
7716         {
7717         g_notempty = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
7718         ovecsave[2] = dat_datctl.offset;
7719         continue;    /* Back to the top of the loop */
7720         }
7721       fprintf(outfile,
7722         "** PCRE2 error: global repeat returned the same string as previous\n");
7723       fprintf(outfile, "** Global loop abandoned\n");
7724       dat_datctl.control &= ~CTL_ANYGLOB;        /* Break g/G loop */
7725       }
7726 
7727     /* "allcaptures" requests showing of all captures in the pattern, to check
7728     unset ones at the end. It may be set on the pattern or the data. Implement
7729     by setting capcount to the maximum. This is not relevant for DFA matching,
7730     so ignore it (warning given above). */
7731 
7732     if ((dat_datctl.control & (CTL_ALLCAPTURES|CTL_DFA)) == CTL_ALLCAPTURES)
7733       {
7734       capcount = maxcapcount + 1;   /* Allow for full match */
7735       if (capcount > (int)oveccount) capcount = oveccount;
7736       }
7737 
7738     /* "allvector" request showing the entire ovector. */
7739 
7740     if ((dat_datctl.control2 & CTL2_ALLVECTOR) != 0) capcount = oveccount;
7741 
7742     /* Output the captured substrings. Note that, for the matched string,
7743     the use of \K in an assertion can make the start later than the end. */
7744 
7745     for (i = 0; i < 2*capcount; i += 2)
7746       {
7747       PCRE2_SIZE lleft, lmiddle, lright;
7748       PCRE2_SIZE start = ovector[i];
7749       PCRE2_SIZE end = ovector[i+1];
7750 
7751       if (start > end)
7752         {
7753         start = ovector[i+1];
7754         end = ovector[i];
7755         fprintf(outfile, "Start of matched string is beyond its end - "
7756           "displaying from end to start.\n");
7757         }
7758 
7759       fprintf(outfile, "%2d: ", i/2);
7760 
7761       /* Check for an unset group */
7762 
7763       if (start == PCRE2_UNSET && end == PCRE2_UNSET)
7764         {
7765         fprintf(outfile, "<unset>\n");
7766         continue;
7767         }
7768 
7769       /* Check for silly offsets, in particular, values that have not been
7770       set when they should have been. However, if we are past the end of the
7771       captures for this pattern ("allvector" causes this), or if we are DFA
7772       matching, it isn't an error if the entry is unchanged. */
7773 
7774       if (start > ulen || end > ulen)
7775         {
7776         if (((dat_datctl.control & CTL_DFA) != 0 ||
7777               i >= (int)(2*maxcapcount + 2)) &&
7778             start == JUNK_OFFSET && end == JUNK_OFFSET)
7779           fprintf(outfile, "<unchanged>\n");
7780         else
7781           fprintf(outfile, "ERROR: bad value(s) for offset(s): 0x%lx 0x%lx\n",
7782             (unsigned long int)start, (unsigned long int)end);
7783         continue;
7784         }
7785 
7786       /* When JIT is not being used, ALLUSEDTEXT may be set. (It if is set with
7787       JIT, it is disabled above, with a comment.) When the match is done by the
7788       interpreter, leftchar and rightchar are available, and if ALLUSEDTEXT is
7789       set, and if the leftmost consulted character is before the start of the
7790       match or the rightmost consulted character is past the end of the match,
7791       we want to show all consulted characters for the main matched string, and
7792       indicate which were lookarounds. */
7793 
7794       if (i == 0)
7795         {
7796         BOOL showallused;
7797         PCRE2_SIZE leftchar, rightchar;
7798 
7799         if ((dat_datctl.control & CTL_ALLUSEDTEXT) != 0)
7800           {
7801           leftchar = FLD(match_data, leftchar);
7802           rightchar = FLD(match_data, rightchar);
7803           showallused = i == 0 && (leftchar < start || rightchar > end);
7804           }
7805         else showallused = FALSE;
7806 
7807         if (showallused)
7808           {
7809           PCHARS(lleft, pp, leftchar, start - leftchar, utf, outfile);
7810           PCHARS(lmiddle, pp, start, end - start, utf, outfile);
7811           PCHARS(lright, pp, end, rightchar - end, utf, outfile);
7812           if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
7813             fprintf(outfile, " (JIT)");
7814           fprintf(outfile, "\n    ");
7815           for (j = 0; j < lleft; j++) fprintf(outfile, "<");
7816           for (j = 0; j < lmiddle; j++) fprintf(outfile, " ");
7817           for (j = 0; j < lright; j++) fprintf(outfile, ">");
7818           }
7819 
7820         /* When a pattern contains \K, the start of match position may be
7821         different to the start of the matched string. When this is the case,
7822         show it when requested. */
7823 
7824         else if ((dat_datctl.control & CTL_STARTCHAR) != 0)
7825           {
7826           PCRE2_SIZE startchar;
7827           PCRE2_GET_STARTCHAR(startchar, match_data);
7828           PCHARS(lleft, pp, startchar, start - startchar, utf, outfile);
7829           PCHARSV(pp, start, end - start, utf, outfile);
7830           if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
7831             fprintf(outfile, " (JIT)");
7832           if (startchar != start)
7833             {
7834             fprintf(outfile, "\n    ");
7835             for (j = 0; j < lleft; j++) fprintf(outfile, "^");
7836             }
7837           }
7838 
7839         /* Otherwise, just show the matched string. */
7840 
7841         else
7842           {
7843           PCHARSV(pp, start, end - start, utf, outfile);
7844           if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
7845             fprintf(outfile, " (JIT)");
7846           }
7847         }
7848 
7849       /* Not the main matched string. Just show it unadorned. */
7850 
7851       else
7852         {
7853         PCHARSV(pp, start, end - start, utf, outfile);
7854         }
7855 
7856       fprintf(outfile, "\n");
7857 
7858       /* Note: don't use the start/end variables here because we want to
7859       show the text from what is reported as the end. */
7860 
7861       if ((dat_datctl.control & CTL_ALLAFTERTEXT) != 0 ||
7862           (i == 0 && (dat_datctl.control & CTL_AFTERTEXT) != 0))
7863         {
7864         fprintf(outfile, "%2d+ ", i/2);
7865         PCHARSV(pp, ovector[i+1], ulen - ovector[i+1], utf, outfile);
7866         fprintf(outfile, "\n");
7867         }
7868       }
7869 
7870     /* Output (*MARK) data if requested */
7871 
7872     if ((dat_datctl.control & CTL_MARK) != 0 &&
7873          TESTFLD(match_data, mark, !=, NULL))
7874       {
7875       fprintf(outfile, "MK: ");
7876       PCHARSV(CASTFLD(void *, match_data, mark), -1, -1, utf, outfile);
7877       fprintf(outfile, "\n");
7878       }
7879 
7880     /* Process copy/get strings */
7881 
7882     if (!copy_and_get(utf, capcount)) return PR_ABEND;
7883 
7884     }    /* End of handling a successful match */
7885 
7886   /* There was a partial match. The value of ovector[0] is the bumpalong point,
7887   that is, startchar, not any \K point that might have been passed. When JIT is
7888   not in use, "allusedtext" may be set, in which case we indicate the leftmost
7889   consulted character. */
7890 
7891   else if (capcount == PCRE2_ERROR_PARTIAL)
7892     {
7893     PCRE2_SIZE leftchar;
7894     int backlength;
7895     int rubriclength = 0;
7896 
7897     if ((dat_datctl.control & CTL_ALLUSEDTEXT) != 0)
7898       {
7899       leftchar = FLD(match_data, leftchar);
7900       }
7901     else leftchar = ovector[0];
7902 
7903     fprintf(outfile, "Partial match");
7904     if ((dat_datctl.control & CTL_MARK) != 0 &&
7905          TESTFLD(match_data, mark, !=, NULL))
7906       {
7907       fprintf(outfile, ", mark=");
7908       PCHARS(rubriclength, CASTFLD(void *, match_data, mark), -1, -1, utf,
7909         outfile);
7910       rubriclength += 7;
7911       }
7912     fprintf(outfile, ": ");
7913     rubriclength += 15;
7914 
7915     PCHARS(backlength, pp, leftchar, ovector[0] - leftchar, utf, outfile);
7916     PCHARSV(pp, ovector[0], ulen - ovector[0], utf, outfile);
7917 
7918     if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
7919       fprintf(outfile, " (JIT)");
7920     fprintf(outfile, "\n");
7921 
7922     if (backlength != 0)
7923       {
7924       int i;
7925       for (i = 0; i < rubriclength; i++) fprintf(outfile, " ");
7926       for (i = 0; i < backlength; i++) fprintf(outfile, "<");
7927       fprintf(outfile, "\n");
7928       }
7929 
7930     if (ulen != ovector[1])
7931       fprintf(outfile, "** ovector[1] is not equal to the subject length: "
7932         "%ld != %ld\n", (unsigned long int)ovector[1], (unsigned long int)ulen);
7933 
7934     /* Process copy/get strings */
7935 
7936     if (!copy_and_get(utf, 1)) return PR_ABEND;
7937 
7938     /* "allvector" outputs the entire vector */
7939 
7940     if ((dat_datctl.control2 & CTL2_ALLVECTOR) != 0)
7941       show_ovector(ovector, oveccount);
7942 
7943     break;  /* Out of the /g loop */
7944     }       /* End of handling partial match */
7945 
7946   /* Failed to match. If this is a /g or /G loop, we might previously have
7947   set g_notempty (to PCRE2_NOTEMPTY_ATSTART|PCRE2_ANCHORED) after a null match.
7948   If that is the case, this is not necessarily the end. We want to advance the
7949   start offset, and continue. We won't be at the end of the string - that was
7950   checked before setting g_notempty. We achieve the effect by pretending that a
7951   single character was matched.
7952 
7953   Complication arises in the case when the newline convention is "any", "crlf",
7954   or "anycrlf". If the previous match was at the end of a line terminated by
7955   CRLF, an advance of one character just passes the CR, whereas we should
7956   prefer the longer newline sequence, as does the code in pcre2_match().
7957 
7958   Otherwise, in the case of UTF-8 or UTF-16 matching, the advance must be one
7959   character, not one byte. */
7960 
7961   else if (g_notempty != 0)   /* There was a previous null match */
7962     {
7963     uint16_t nl = FLD(compiled_code, newline_convention);
7964     PCRE2_SIZE start_offset = dat_datctl.offset;    /* Where the match was */
7965     PCRE2_SIZE end_offset = start_offset + 1;
7966 
7967     if ((nl == PCRE2_NEWLINE_CRLF || nl == PCRE2_NEWLINE_ANY ||
7968          nl == PCRE2_NEWLINE_ANYCRLF) &&
7969         start_offset < ulen - 1 &&
7970         CODE_UNIT(pp, start_offset) == '\r' &&
7971         CODE_UNIT(pp, end_offset) == '\n')
7972       end_offset++;
7973 
7974     else if (utf && test_mode != PCRE32_MODE)
7975       {
7976       if (test_mode == PCRE8_MODE)
7977         {
7978         for (; end_offset < ulen; end_offset++)
7979           if ((((PCRE2_SPTR8)pp)[end_offset] & 0xc0) != 0x80) break;
7980         }
7981       else  /* 16-bit mode */
7982         {
7983         for (; end_offset < ulen; end_offset++)
7984           if ((((PCRE2_SPTR16)pp)[end_offset] & 0xfc00) != 0xdc00) break;
7985         }
7986       }
7987 
7988     SETFLDVEC(match_data, ovector, 0, start_offset);
7989     SETFLDVEC(match_data, ovector, 1, end_offset);
7990     }  /* End of handling null match in a global loop */
7991 
7992   /* A "normal" match failure. There will be a negative error number in
7993   capcount. */
7994 
7995   else
7996     {
7997     switch(capcount)
7998       {
7999       case PCRE2_ERROR_NOMATCH:
8000       if (gmatched == 0)
8001         {
8002         fprintf(outfile, "No match");
8003         if ((dat_datctl.control & CTL_MARK) != 0 &&
8004              TESTFLD(match_data, mark, !=, NULL))
8005           {
8006           fprintf(outfile, ", mark = ");
8007           PCHARSV(CASTFLD(void *, match_data, mark), -1, -1, utf, outfile);
8008           }
8009         if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
8010           fprintf(outfile, " (JIT)");
8011         fprintf(outfile, "\n");
8012 
8013         /* "allvector" outputs the entire vector */
8014 
8015         if ((dat_datctl.control2 & CTL2_ALLVECTOR) != 0)
8016           show_ovector(ovector, oveccount);
8017         }
8018       break;
8019 
8020       case PCRE2_ERROR_BADUTFOFFSET:
8021       fprintf(outfile, "Error %d (bad UTF-%d offset)\n", capcount, test_mode);
8022       break;
8023 
8024       default:
8025       fprintf(outfile, "Failed: error %d: ", capcount);
8026       if (!print_error_message(capcount, "", "")) return PR_ABEND;
8027       if (capcount <= PCRE2_ERROR_UTF8_ERR1 &&
8028           capcount >= PCRE2_ERROR_UTF32_ERR2)
8029         {
8030         PCRE2_SIZE startchar;
8031         PCRE2_GET_STARTCHAR(startchar, match_data);
8032         fprintf(outfile, " at offset %" SIZ_FORM, startchar);
8033         }
8034       fprintf(outfile, "\n");
8035       break;
8036       }
8037 
8038     break;  /* Out of the /g loop */
8039     }       /* End of failed match handling */
8040 
8041   /* Control reaches here in two circumstances: (a) after a match, and (b)
8042   after a non-match that immediately followed a match on an empty string when
8043   doing a global search. Such a match is done with PCRE2_NOTEMPTY_ATSTART and
8044   PCRE2_ANCHORED set in g_notempty. The code above turns it into a fake match
8045   of one character. So effectively we get here only after a match. If we
8046   are not doing a global search, we are done. */
8047 
8048   if ((dat_datctl.control & CTL_ANYGLOB) == 0) break; else
8049     {
8050     PCRE2_SIZE match_offset = FLD(match_data, ovector)[0];
8051     PCRE2_SIZE end_offset = FLD(match_data, ovector)[1];
8052 
8053     /* We must now set up for the next iteration of a global search. If we have
8054     matched an empty string, first check to see if we are at the end of the
8055     subject. If so, the loop is over. Otherwise, mimic what Perl's /g option
8056     does. Set PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED and try the match again
8057     at the same point. If this fails it will be picked up above, where a fake
8058     match is set up so that at this point we advance to the next character.
8059 
8060     However, in order to cope with patterns that never match at their starting
8061     offset (e.g. /(?<=\G.)/) we don't do this when the match offset is greater
8062     than the starting offset. This means there will be a retry with the
8063     starting offset at the match offset. If this returns the same match again,
8064     it is picked up above and ignored, and the special action is then taken. */
8065 
8066     if (match_offset == end_offset)
8067       {
8068       if (end_offset == ulen) break;           /* End of subject */
8069       if (match_offset <= dat_datctl.offset)
8070         g_notempty = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
8071       }
8072 
8073     /* However, even after matching a non-empty string, there is still one
8074     tricky case. If a pattern contains \K within a lookbehind assertion at the
8075     start, the end of the matched string can be at the offset where the match
8076     started. In the case of a normal /g iteration without special action, this
8077     leads to a loop that keeps on returning the same substring. The loop would
8078     be caught above, but we really want to move on to the next match. */
8079 
8080     else
8081       {
8082       g_notempty = 0;   /* Set for a "normal" repeat */
8083       if ((dat_datctl.control & CTL_GLOBAL) != 0)
8084         {
8085         PCRE2_SIZE startchar;
8086         PCRE2_GET_STARTCHAR(startchar, match_data);
8087         if (end_offset <= startchar)
8088           {
8089           if (startchar >= ulen) break;       /* End of subject */
8090           end_offset = startchar + 1;
8091           if (utf && test_mode != PCRE32_MODE)
8092             {
8093             if (test_mode == PCRE8_MODE)
8094               {
8095               for (; end_offset < ulen; end_offset++)
8096                 if ((((PCRE2_SPTR8)pp)[end_offset] & 0xc0) != 0x80) break;
8097               }
8098             else  /* 16-bit mode */
8099               {
8100               for (; end_offset < ulen; end_offset++)
8101                 if ((((PCRE2_SPTR16)pp)[end_offset] & 0xfc00) != 0xdc00) break;
8102               }
8103             }
8104           }
8105         }
8106       }
8107 
8108     /* For a normal global (/g) iteration, save the current ovector[0,1] and
8109     the starting offset so that we can check that they do change each time.
8110     Otherwise a matching bug that returns the same string causes an infinite
8111     loop. It has happened! Then update the start offset, leaving other
8112     parameters alone. */
8113 
8114     if ((dat_datctl.control & CTL_GLOBAL) != 0)
8115       {
8116       ovecsave[0] = ovector[0];
8117       ovecsave[1] = ovector[1];
8118       ovecsave[2] = dat_datctl.offset;
8119       dat_datctl.offset = end_offset;
8120       }
8121 
8122     /* For altglobal, just update the pointer and length. */
8123 
8124     else
8125       {
8126       pp += end_offset * code_unit_size;
8127       len -= end_offset * code_unit_size;
8128       ulen -= end_offset;
8129       if (arg_ulen != PCRE2_ZERO_TERMINATED) arg_ulen -= end_offset;
8130       }
8131     }
8132   }  /* End of global loop */
8133 
8134 show_memory = FALSE;
8135 return PR_OK;
8136 }
8137 
8138 
8139 
8140 
8141 /*************************************************
8142 *               Print PCRE2 version              *
8143 *************************************************/
8144 
8145 static void
print_version(FILE * f)8146 print_version(FILE *f)
8147 {
8148 VERSION_TYPE *vp;
8149 fprintf(f, "PCRE2 version ");
8150 for (vp = version; *vp != 0; vp++) fprintf(f, "%c", *vp);
8151 fprintf(f, "\n");
8152 }
8153 
8154 
8155 
8156 /*************************************************
8157 *               Print Unicode version            *
8158 *************************************************/
8159 
8160 static void
print_unicode_version(FILE * f)8161 print_unicode_version(FILE *f)
8162 {
8163 VERSION_TYPE *vp;
8164 fprintf(f, "Unicode version ");
8165 for (vp = uversion; *vp != 0; vp++) fprintf(f, "%c", *vp);
8166 }
8167 
8168 
8169 
8170 /*************************************************
8171 *               Print JIT target                 *
8172 *************************************************/
8173 
8174 static void
print_jit_target(FILE * f)8175 print_jit_target(FILE *f)
8176 {
8177 VERSION_TYPE *vp;
8178 for (vp = jittarget; *vp != 0; vp++) fprintf(f, "%c", *vp);
8179 }
8180 
8181 
8182 
8183 /*************************************************
8184 *       Print newline configuration              *
8185 *************************************************/
8186 
8187 /* Output is always to stdout.
8188 
8189 Arguments:
8190   rc         the return code from PCRE2_CONFIG_NEWLINE
8191   isc        TRUE if called from "-C newline"
8192 Returns:     nothing
8193 */
8194 
8195 static void
print_newline_config(uint32_t optval,BOOL isc)8196 print_newline_config(uint32_t optval, BOOL isc)
8197 {
8198 if (!isc) printf("  Default newline sequence is ");
8199 if (optval < sizeof(newlines)/sizeof(char *))
8200   printf("%s\n", newlines[optval]);
8201 else
8202   printf("a non-standard value: %d\n", optval);
8203 }
8204 
8205 
8206 
8207 /*************************************************
8208 *             Usage function                     *
8209 *************************************************/
8210 
8211 static void
usage(void)8212 usage(void)
8213 {
8214 printf("Usage:     pcre2test [options] [<input file> [<output file>]]\n\n");
8215 printf("Input and output default to stdin and stdout.\n");
8216 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
8217 printf("If input is a terminal, readline() is used to read from it.\n");
8218 #else
8219 printf("This version of pcre2test is not linked with readline().\n");
8220 #endif
8221 printf("\nOptions:\n");
8222 #ifdef SUPPORT_PCRE2_8
8223 printf("  -8            use the 8-bit library\n");
8224 #endif
8225 #ifdef SUPPORT_PCRE2_16
8226 printf("  -16           use the 16-bit library\n");
8227 #endif
8228 #ifdef SUPPORT_PCRE2_32
8229 printf("  -32           use the 32-bit library\n");
8230 #endif
8231 printf("  -ac           set default pattern modifier PCRE2_AUTO_CALLOUT\n");
8232 printf("  -AC           as -ac, but also set subject 'callout_extra' modifier\n");
8233 printf("  -b            set default pattern modifier 'fullbincode'\n");
8234 printf("  -C            show PCRE2 compile-time options and exit\n");
8235 printf("  -C arg        show a specific compile-time option and exit with its\n");
8236 printf("                  value if numeric (else 0). The arg can be:\n");
8237 printf("     backslash-C    use of \\C is enabled [0, 1]\n");
8238 printf("     bsr            \\R type [ANYCRLF, ANY]\n");
8239 printf("     ebcdic         compiled for EBCDIC character code [0,1]\n");
8240 printf("     ebcdic-nl      NL code if compiled for EBCDIC\n");
8241 printf("     jit            just-in-time compiler supported [0, 1]\n");
8242 printf("     linksize       internal link size [2, 3, 4]\n");
8243 printf("     newline        newline type [CR, LF, CRLF, ANYCRLF, ANY, NUL]\n");
8244 printf("     pcre2-8        8 bit library support enabled [0, 1]\n");
8245 printf("     pcre2-16       16 bit library support enabled [0, 1]\n");
8246 printf("     pcre2-32       32 bit library support enabled [0, 1]\n");
8247 printf("     unicode        Unicode and UTF support enabled [0, 1]\n");
8248 printf("  -d            set default pattern modifier 'debug'\n");
8249 printf("  -dfa          set default subject modifier 'dfa'\n");
8250 printf("  -error <n,m,..>  show messages for error numbers, then exit\n");
8251 printf("  -help         show usage information\n");
8252 printf("  -i            set default pattern modifier 'info'\n");
8253 printf("  -jit          set default pattern modifier 'jit'\n");
8254 printf("  -jitfast      set default pattern modifier 'jitfast'\n");
8255 printf("  -jitverify    set default pattern modifier 'jitverify'\n");
8256 printf("  -LM           list pattern and subject modifiers, then exit\n");
8257 printf("  -LP           list non-script properties, then exit\n");
8258 printf("  -LS           list supported scripts, then exit\n");
8259 printf("  -q            quiet: do not output PCRE2 version number at start\n");
8260 printf("  -pattern <s>  set default pattern modifier fields\n");
8261 printf("  -subject <s>  set default subject modifier fields\n");
8262 printf("  -S <n>        set stack size to <n> mebibytes\n");
8263 printf("  -t [<n>]      time compilation and execution, repeating <n> times\n");
8264 printf("  -tm [<n>]     time execution (matching) only, repeating <n> times\n");
8265 printf("  -T            same as -t, but show total times at the end\n");
8266 printf("  -TM           same as -tm, but show total time at the end\n");
8267 printf("  -version      show PCRE2 version and exit\n");
8268 }
8269 
8270 
8271 
8272 /*************************************************
8273 *             Handle -C option                   *
8274 *************************************************/
8275 
8276 /* This option outputs configuration options and sets an appropriate return
8277 code when asked for a single option. The code is abstracted into a separate
8278 function because of its size. Use whichever pcre2_config() function is
8279 available.
8280 
8281 Argument:   an option name or NULL
8282 Returns:    the return code
8283 */
8284 
8285 static int
c_option(const char * arg)8286 c_option(const char *arg)
8287 {
8288 uint32_t optval;
8289 unsigned int i = COPTLISTCOUNT;
8290 int yield = 0;
8291 
8292 if (arg != NULL && arg[0] != CHAR_MINUS)
8293   {
8294   for (i = 0; i < COPTLISTCOUNT; i++)
8295     if (strcmp(arg, coptlist[i].name) == 0) break;
8296 
8297   if (i >= COPTLISTCOUNT)
8298     {
8299     fprintf(stderr, "** Unknown -C option '%s'\n", arg);
8300     return 0;
8301     }
8302 
8303   switch (coptlist[i].type)
8304     {
8305     case CONF_BSR:
8306     (void)PCRE2_CONFIG(coptlist[i].value, &optval);
8307     printf("%s\n", (optval == PCRE2_BSR_ANYCRLF)? "ANYCRLF" : "ANY");
8308     break;
8309 
8310     case CONF_FIX:
8311     yield = coptlist[i].value;
8312     printf("%d\n", yield);
8313     break;
8314 
8315     case CONF_FIZ:
8316     optval = coptlist[i].value;
8317     printf("%d\n", optval);
8318     break;
8319 
8320     case CONF_INT:
8321     (void)PCRE2_CONFIG(coptlist[i].value, &yield);
8322     printf("%d\n", yield);
8323     break;
8324 
8325     case CONF_NL:
8326     (void)PCRE2_CONFIG(coptlist[i].value, &optval);
8327     print_newline_config(optval, TRUE);
8328     break;
8329     }
8330 
8331 /* For VMS, return the value by setting a symbol, for certain values only. This
8332 is contributed code which the PCRE2 developers have no means of testing. */
8333 
8334 #ifdef __VMS
8335 
8336 /* This is the original code provided by the first VMS contributor. */
8337 #ifdef NEVER
8338   if (copytlist[i].type == CONF_FIX || coptlist[i].type == CONF_INT)
8339     {
8340     char ucname[16];
8341     strcpy(ucname, coptlist[i].name);
8342     for (i = 0; ucname[i] != 0; i++) ucname[i] = toupper[ucname[i]];
8343     vms_setsymbol(ucname, 0, optval);
8344     }
8345 #endif
8346 
8347 /* This is the new code, provided by a second VMS contributor. */
8348 
8349   if (coptlist[i].type == CONF_FIX || coptlist[i].type == CONF_INT)
8350     {
8351     char nam_buf[22], val_buf[4];
8352     $DESCRIPTOR(nam, nam_buf);
8353     $DESCRIPTOR(val, val_buf);
8354 
8355     strcpy(nam_buf, coptlist[i].name);
8356     nam.dsc$w_length = strlen(nam_buf);
8357     sprintf(val_buf, "%d", yield);
8358     val.dsc$w_length = strlen(val_buf);
8359     lib$set_symbol(&nam, &val);
8360     }
8361 #endif  /* __VMS */
8362 
8363   return yield;
8364   }
8365 
8366 /* No argument for -C: output all configuration information. */
8367 
8368 print_version(stdout);
8369 printf("Compiled with\n");
8370 
8371 #ifdef EBCDIC
8372 printf("  EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
8373 #if defined NATIVE_ZOS
8374 printf("  EBCDIC code page %s or similar\n", pcrz_cpversion());
8375 #endif
8376 #endif
8377 
8378 (void)PCRE2_CONFIG(PCRE2_CONFIG_COMPILED_WIDTHS, &optval);
8379 if (optval & 1) printf("  8-bit support\n");
8380 if (optval & 2) printf("  16-bit support\n");
8381 if (optval & 4) printf("  32-bit support\n");
8382 
8383 #ifdef SUPPORT_VALGRIND
8384 printf("  Valgrind support\n");
8385 #endif
8386 
8387 (void)PCRE2_CONFIG(PCRE2_CONFIG_UNICODE, &optval);
8388 if (optval != 0)
8389   {
8390   printf("  UTF and UCP support (");
8391   print_unicode_version(stdout);
8392   printf(")\n");
8393   }
8394 else printf("  No Unicode support\n");
8395 
8396 (void)PCRE2_CONFIG(PCRE2_CONFIG_JIT, &optval);
8397 if (optval != 0)
8398   {
8399   printf("  Just-in-time compiler support: ");
8400   print_jit_target(stdout);
8401   printf("\n");
8402   }
8403 else
8404   {
8405   printf("  No just-in-time compiler support\n");
8406   }
8407 
8408 (void)PCRE2_CONFIG(PCRE2_CONFIG_NEWLINE, &optval);
8409 print_newline_config(optval, FALSE);
8410 (void)PCRE2_CONFIG(PCRE2_CONFIG_BSR, &optval);
8411 printf("  \\R matches %s\n",
8412   (optval == PCRE2_BSR_ANYCRLF)? "CR, LF, or CRLF only" :
8413                                  "all Unicode newlines");
8414 (void)PCRE2_CONFIG(PCRE2_CONFIG_NEVER_BACKSLASH_C, &optval);
8415 printf("  \\C is %ssupported\n", optval? "not ":"");
8416 (void)PCRE2_CONFIG(PCRE2_CONFIG_LINKSIZE, &optval);
8417 printf("  Internal link size = %d\n", optval);
8418 (void)PCRE2_CONFIG(PCRE2_CONFIG_PARENSLIMIT, &optval);
8419 printf("  Parentheses nest limit = %d\n", optval);
8420 (void)PCRE2_CONFIG(PCRE2_CONFIG_HEAPLIMIT, &optval);
8421 printf("  Default heap limit = %d kibibytes\n", optval);
8422 (void)PCRE2_CONFIG(PCRE2_CONFIG_MATCHLIMIT, &optval);
8423 printf("  Default match limit = %d\n", optval);
8424 (void)PCRE2_CONFIG(PCRE2_CONFIG_DEPTHLIMIT, &optval);
8425 printf("  Default depth limit = %d\n", optval);
8426 
8427 #if defined SUPPORT_LIBREADLINE
8428 printf("  pcre2test has libreadline support\n");
8429 #elif defined SUPPORT_LIBEDIT
8430 printf("  pcre2test has libedit support\n");
8431 #else
8432 printf("  pcre2test has neither libreadline nor libedit support\n");
8433 #endif
8434 
8435 return 0;
8436 }
8437 
8438 
8439 /*************************************************
8440 *      Format one property/script list item      *
8441 *************************************************/
8442 
8443 #ifdef SUPPORT_UNICODE
8444 static void
format_list_item(int16_t * ff,char * buff,BOOL isscript)8445 format_list_item(int16_t *ff, char *buff, BOOL isscript)
8446 {
8447 int count;
8448 int maxi = 0;
8449 const char *maxs = "";
8450 size_t max = 0;
8451 
8452 for (count = 0; ff[count] >= 0; count++) {}
8453 
8454 /* Find the name to put first. For scripts, any 3-character name is chosen.
8455 For non-scripts, or if there is no 3-character name, take the longest. */
8456 
8457 for (int i = 0; ff[i] >= 0; i++)
8458   {
8459   const char *s = PRIV(utt_names) + ff[i];
8460   size_t len = strlen(s);
8461   if (isscript && len == 3)
8462     {
8463     maxi = i;
8464     max = len;
8465     maxs = s;
8466     break;
8467     }
8468   else if (len > max)
8469     {
8470     max = len;
8471     maxi = i;
8472     maxs = s;
8473     }
8474   }
8475 
8476 strcpy(buff, maxs);
8477 buff += max;
8478 
8479 if (count > 1)
8480   {
8481   const char *sep = " (";
8482   for (int i = 0; i < count; i++)
8483     {
8484     if (i == maxi) continue;
8485     buff += sprintf(buff, "%s%s", sep, PRIV(utt_names) + ff[i]);
8486     sep = ", ";
8487     }
8488   (void)sprintf(buff, ")");
8489   }
8490 }
8491 #endif  /* SUPPORT_UNICODE */
8492 
8493 
8494 
8495 /*************************************************
8496 *        Display scripts or properties           *
8497 *************************************************/
8498 
8499 #define MAX_SYNONYMS 5
8500 
8501 static void
display_properties(BOOL wantscripts)8502 display_properties(BOOL wantscripts)
8503 {
8504 #ifndef SUPPORT_UNICODE
8505 (void)wantscripts;
8506 printf("** This version of PCRE2 was compiled without Unicode support.\n");
8507 #else
8508 
8509 const char *typename;
8510 uint16_t seentypes[1024];
8511 uint16_t seenvalues[1024];
8512 int seencount = 0;
8513 int16_t found[256][MAX_SYNONYMS + 1];
8514 int fc = 0;
8515 int colwidth = 40;
8516 int n;
8517 
8518 if (wantscripts)
8519   {
8520   n = ucp_Script_Count;
8521   typename = "SCRIPTS";
8522   }
8523 else
8524   {
8525   n = ucp_Bprop_Count;
8526   typename = "PROPERTIES";
8527   }
8528 
8529 for (size_t i = 0; i < PRIV(utt_size); i++)
8530   {
8531   int k;
8532   int m = 0;
8533   int16_t *fv;
8534   const ucp_type_table *t = PRIV(utt) + i;
8535   unsigned int value = t->value;
8536 
8537   if (wantscripts)
8538     {
8539     if (t->type != PT_SC && t->type != PT_SCX) continue;
8540     }
8541   else
8542     {
8543     if (t->type != PT_BOOL) continue;
8544     }
8545 
8546   for (k = 0; k < seencount; k++)
8547     {
8548     if (t->type == seentypes[k] && t->value == seenvalues[k]) break;
8549     }
8550   if (k < seencount) continue;
8551 
8552   seentypes[seencount] = t->type;
8553   seenvalues[seencount++] = t->value;
8554 
8555   fv = found[fc++];
8556   fv[m++] = t->name_offset;
8557 
8558   for (size_t j = i + 1; j < PRIV(utt_size); j++)
8559     {
8560     const ucp_type_table *tt = PRIV(utt) + j;
8561     if (tt->type != t->type || tt->value != value) continue;
8562     if (m >= MAX_SYNONYMS)
8563       printf("** Too many synonyms: %s ignored\n",
8564         PRIV(utt_names) + tt->name_offset);
8565     else fv[m++] = tt->name_offset;
8566     }
8567 
8568   fv[m] = -1;
8569   }
8570 
8571 printf("-------------------------- SUPPORTED %s --------------------------\n\n",
8572   typename);
8573 
8574 if (!wantscripts) printf(
8575 "This release of PCRE2 supports Unicode's general category properties such\n"
8576 "as Lu (upper case letter), bi-directional properties such as Bidi_Class,\n"
8577 "and the following binary (yes/no) properties:\n\n");
8578 
8579 
8580 for (int k = 0; k < (n+1)/2; k++)
8581   {
8582   int x;
8583   char buff1[128];
8584   char buff2[128];
8585 
8586   format_list_item(found[k], buff1, wantscripts);
8587   x = k + (n+1)/2;
8588   if (x < n) format_list_item(found[x], buff2, wantscripts);
8589     else buff2[0] = 0;
8590 
8591   x = printf("%s", buff1);
8592   while (x++ < colwidth) printf(" ");
8593   printf("%s\n", buff2);
8594   }
8595 
8596 #endif  /* SUPPORT_UNICODE */
8597 }
8598 
8599 
8600 
8601 /*************************************************
8602 *              Display one modifier              *
8603 *************************************************/
8604 
8605 static void
display_one_modifier(modstruct * m,BOOL for_pattern)8606 display_one_modifier(modstruct *m, BOOL for_pattern)
8607 {
8608 uint32_t c = (!for_pattern && (m->which == MOD_PND || m->which == MOD_PNDP))?
8609   '*' : ' ';
8610 printf("%c%s", c, m->name);
8611 for (size_t i = 0; i < C1MODLISTCOUNT; i++)
8612   {
8613   if (strcmp(m->name, c1modlist[i].fullname) == 0)
8614     printf(" (%c)", c1modlist[i].onechar);
8615   }
8616 }
8617 
8618 
8619 
8620 /*************************************************
8621 *       Display pattern or subject modifiers     *
8622 *************************************************/
8623 
8624 /* In order to print in two columns, first scan without printing to get a list
8625 of the modifiers that are required.
8626 
8627 Arguments:
8628   for_pattern   TRUE for pattern modifiers, FALSE for subject modifiers
8629   title         string to be used in title
8630 
8631 Returns:        nothing
8632 */
8633 
8634 static void
display_selected_modifiers(BOOL for_pattern,const char * title)8635 display_selected_modifiers(BOOL for_pattern, const char *title)
8636 {
8637 uint32_t i, j;
8638 uint32_t n = 0;
8639 uint32_t list[MODLISTCOUNT];
8640 uint32_t extra[MODLISTCOUNT];
8641 
8642 for (i = 0; i < MODLISTCOUNT; i++)
8643   {
8644   BOOL is_pattern = TRUE;
8645   modstruct *m = modlist + i;
8646 
8647   switch (m->which)
8648     {
8649     case MOD_CTC:       /* Compile context */
8650     case MOD_PAT:       /* Pattern */
8651     case MOD_PATP:      /* Pattern, OK for Perl-compatible test */
8652     break;
8653 
8654     /* The MOD_PND and MOD_PNDP modifiers are precisely those that affect
8655     subjects, but can be given with a pattern. We list them as subject
8656     modifiers, but marked with an asterisk.*/
8657 
8658     case MOD_CTM:       /* Match context */
8659     case MOD_DAT:       /* Subject line */
8660     case MOD_DATP:      /* Subject line, OK for Perl-compatible test */
8661     case MOD_PND:       /* As PD, but not default pattern */
8662     case MOD_PNDP:      /* As PND, OK for Perl-compatible test */
8663     is_pattern = FALSE;
8664     break;
8665 
8666     default: printf("** Unknown type for modifier '%s'\n", m->name);
8667     /* Fall through */
8668     case MOD_PD:        /* Pattern or subject */
8669     case MOD_PDP:       /* As PD, OK for Perl-compatible test */
8670     is_pattern = for_pattern;
8671     break;
8672     }
8673 
8674   if (for_pattern == is_pattern)
8675     {
8676     extra[n] = 0;
8677     for (size_t k = 0; k < C1MODLISTCOUNT; k++)
8678       {
8679       if (strcmp(m->name, c1modlist[k].fullname) == 0)
8680         {
8681         extra[n] += 4;
8682         break;
8683         }
8684       }
8685     list[n++] = i;
8686     }
8687   }
8688 
8689 /* Now print from the list in two columns. */
8690 
8691 printf("-------------- %s MODIFIERS --------------\n", title);
8692 
8693 for (i = 0, j = (n+1)/2; i < (n+1)/2; i++, j++)
8694   {
8695   modstruct *m = modlist + list[i];
8696   display_one_modifier(m, for_pattern);
8697   if (j < n)
8698     {
8699     uint32_t k = 27 - strlen(m->name) - extra[i];
8700     while (k-- > 0) printf(" ");
8701     display_one_modifier(modlist + list[j], for_pattern);
8702     }
8703   printf("\n");
8704   }
8705 }
8706 
8707 
8708 
8709 /*************************************************
8710 *          Display the list of modifiers         *
8711 *************************************************/
8712 
8713 static void
display_modifiers(void)8714 display_modifiers(void)
8715 {
8716 printf(
8717   "An asterisk on a subject modifier means that it may be given on a pattern\n"
8718   "line, in order to apply to all subjects matched by that pattern. Modifiers\n"
8719   "that are listed for both patterns and subjects have different effects in\n"
8720   "each case.\n\n");
8721 display_selected_modifiers(TRUE, "PATTERN");
8722 printf("\n");
8723 display_selected_modifiers(FALSE, "SUBJECT");
8724 }
8725 
8726 
8727 
8728 /*************************************************
8729 *                Main Program                    *
8730 *************************************************/
8731 
8732 int
main(int argc,char ** argv)8733 main(int argc, char **argv)
8734 {
8735 uint32_t temp;
8736 uint32_t yield = 0;
8737 uint32_t op = 1;
8738 BOOL notdone = TRUE;
8739 BOOL quiet = FALSE;
8740 BOOL showtotaltimes = FALSE;
8741 BOOL skipping = FALSE;
8742 char *arg_subject = NULL;
8743 char *arg_pattern = NULL;
8744 char *arg_error = NULL;
8745 
8746 /* The offsets to the options and control bits fields of the pattern and data
8747 control blocks must be the same so that common options and controls such as
8748 "anchored" or "memory" can work for either of them from a single table entry.
8749 We cannot test this till runtime because "offsetof" does not work in the
8750 preprocessor. */
8751 
8752 if (PO(options) != DO(options) || PO(control) != DO(control) ||
8753     PO(control2) != DO(control2))
8754   {
8755   fprintf(stderr, "** Coding error: "
8756     "options and control offsets for pattern and data must be the same.\n");
8757   return 1;
8758   }
8759 
8760 /* Get the PCRE2 and Unicode version number and JIT target information, at the
8761 same time checking that a request for the length gives the same answer. Also
8762 check lengths for non-string items. */
8763 
8764 if (PCRE2_CONFIG(PCRE2_CONFIG_VERSION, NULL) !=
8765     PCRE2_CONFIG(PCRE2_CONFIG_VERSION, version) ||
8766 
8767     PCRE2_CONFIG(PCRE2_CONFIG_UNICODE_VERSION, NULL) !=
8768     PCRE2_CONFIG(PCRE2_CONFIG_UNICODE_VERSION, uversion) ||
8769 
8770     PCRE2_CONFIG(PCRE2_CONFIG_JITTARGET, NULL) !=
8771     PCRE2_CONFIG(PCRE2_CONFIG_JITTARGET, jittarget) ||
8772 
8773     PCRE2_CONFIG(PCRE2_CONFIG_UNICODE, NULL) != sizeof(uint32_t) ||
8774     PCRE2_CONFIG(PCRE2_CONFIG_MATCHLIMIT, NULL) != sizeof(uint32_t))
8775   {
8776   fprintf(stderr, "** Error in pcre2_config(): bad length\n");
8777   return 1;
8778   }
8779 
8780 /* Check that bad options are diagnosed. */
8781 
8782 if (PCRE2_CONFIG(999, NULL) != PCRE2_ERROR_BADOPTION ||
8783     PCRE2_CONFIG(999, &temp) != PCRE2_ERROR_BADOPTION)
8784   {
8785   fprintf(stderr, "** Error in pcre2_config(): bad option not diagnosed\n");
8786   return 1;
8787   }
8788 
8789 /* This configuration option is now obsolete, but running a quick check ensures
8790 that its code is covered. */
8791 
8792 (void)PCRE2_CONFIG(PCRE2_CONFIG_STACKRECURSE, &temp);
8793 
8794 /* Get buffers from malloc() so that valgrind will check their misuse when
8795 debugging. They grow automatically when very long lines are read. The 16-
8796 and 32-bit buffers (pbuffer16, pbuffer32) are obtained only if needed. */
8797 
8798 buffer = (uint8_t *)malloc(pbuffer8_size);
8799 pbuffer8 = (uint8_t *)malloc(pbuffer8_size);
8800 
8801 /* The following  _setmode() stuff is some Windows magic that tells its runtime
8802 library to translate CRLF into a single LF character. At least, that's what
8803 I've been told: never having used Windows I take this all on trust. Originally
8804 it set 0x8000, but then I was advised that _O_BINARY was better. */
8805 
8806 #if defined(_WIN32) || defined(WIN32)
8807 _setmode( _fileno( stdout ), _O_BINARY );
8808 #endif
8809 
8810 /* Initialization that does not depend on the running mode. */
8811 
8812 locale_name[0] = 0;
8813 
8814 memset(&def_patctl, 0, sizeof(patctl));
8815 def_patctl.convert_type = CONVERT_UNSET;
8816 
8817 memset(&def_datctl, 0, sizeof(datctl));
8818 def_datctl.oveccount = DEFAULT_OVECCOUNT;
8819 def_datctl.copy_numbers[0] = -1;
8820 def_datctl.get_numbers[0] = -1;
8821 def_datctl.startend[0] = def_datctl.startend[1] = CFORE_UNSET;
8822 def_datctl.cerror[0] = def_datctl.cerror[1] = CFORE_UNSET;
8823 def_datctl.cfail[0] = def_datctl.cfail[1] = CFORE_UNSET;
8824 
8825 /* Scan command line options. */
8826 
8827 while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0)
8828   {
8829   char *endptr;
8830   char *arg = argv[op];
8831   unsigned long uli;
8832 
8833   /* List modifiers and exit. */
8834 
8835   if (strcmp(arg, "-LM") == 0)
8836     {
8837     display_modifiers();
8838     goto EXIT;
8839     }
8840 
8841   /* List properties and exit */
8842 
8843   if (strcmp(arg, "-LP") == 0)
8844     {
8845     display_properties(FALSE);
8846     goto EXIT;
8847     }
8848 
8849   /* List scripts and exit */
8850 
8851   if (strcmp(arg, "-LS") == 0)
8852     {
8853     display_properties(TRUE);
8854     goto EXIT;
8855     }
8856 
8857   /* Display and/or set return code for configuration options. */
8858 
8859   if (strcmp(arg, "-C") == 0)
8860     {
8861     yield = c_option(argv[op + 1]);
8862     goto EXIT;
8863     }
8864 
8865   /* Select operating mode. Ensure that pcre2_config() is called in 16-bit
8866   and 32-bit modes because that won't happen naturally when 8-bit is also
8867   configured. Also call some other functions that are not otherwise used. This
8868   means that a coverage report won't claim there are uncalled functions. */
8869 
8870   if (strcmp(arg, "-8") == 0)
8871     {
8872 #ifdef SUPPORT_PCRE2_8
8873     test_mode = PCRE8_MODE;
8874     (void)pcre2_set_bsr_8(pat_context8, 999);
8875     (void)pcre2_set_newline_8(pat_context8, 999);
8876 #else
8877     fprintf(stderr,
8878       "** This version of PCRE2 was built without 8-bit support\n");
8879     exit(1);
8880 #endif
8881     }
8882 
8883   else if (strcmp(arg, "-16") == 0)
8884     {
8885 #ifdef SUPPORT_PCRE2_16
8886     test_mode = PCRE16_MODE;
8887     (void)pcre2_config_16(PCRE2_CONFIG_VERSION, NULL);
8888     (void)pcre2_set_bsr_16(pat_context16, 999);
8889     (void)pcre2_set_newline_16(pat_context16, 999);
8890 #else
8891     fprintf(stderr,
8892       "** This version of PCRE2 was built without 16-bit support\n");
8893     exit(1);
8894 #endif
8895     }
8896 
8897   else if (strcmp(arg, "-32") == 0)
8898     {
8899 #ifdef SUPPORT_PCRE2_32
8900     test_mode = PCRE32_MODE;
8901     (void)pcre2_config_32(PCRE2_CONFIG_VERSION, NULL);
8902     (void)pcre2_set_bsr_32(pat_context32, 999);
8903     (void)pcre2_set_newline_32(pat_context32, 999);
8904 #else
8905     fprintf(stderr,
8906       "** This version of PCRE2 was built without 32-bit support\n");
8907     exit(1);
8908 #endif
8909     }
8910 
8911   /* Set quiet (no version verification) */
8912 
8913   else if (strcmp(arg, "-q") == 0) quiet = TRUE;
8914 
8915   /* Set system stack size */
8916 
8917   else if (strcmp(arg, "-S") == 0 && argc > 2 &&
8918       ((uli = strtoul(argv[op+1], &endptr, 10)), *endptr == 0))
8919     {
8920 #if defined(_WIN32) || defined(WIN32) || defined(__HAIKU__) || defined(NATIVE_ZOS) || defined(__VMS)
8921     fprintf(stderr, "pcre2test: -S is not supported on this OS\n");
8922     exit(1);
8923 #else
8924     int rc;
8925     uint32_t stack_size;
8926     struct rlimit rlim;
8927     if (U32OVERFLOW(uli))
8928       {
8929       fprintf(stderr, "** Argument for -S is too big\n");
8930       exit(1);
8931       }
8932     stack_size = (uint32_t)uli;
8933     getrlimit(RLIMIT_STACK, &rlim);
8934     rlim.rlim_cur = stack_size * 1024 * 1024;
8935     if (rlim.rlim_cur > rlim.rlim_max)
8936       {
8937       fprintf(stderr,
8938         "pcre2test: requested stack size %luMiB is greater than hard limit "
8939           "%luMiB\n", (unsigned long int)stack_size,
8940           (unsigned long int)(rlim.rlim_max));
8941       exit(1);
8942       }
8943     rc = setrlimit(RLIMIT_STACK, &rlim);
8944     if (rc != 0)
8945       {
8946       fprintf(stderr, "pcre2test: setting stack size %luMiB failed: %s\n",
8947         (unsigned long int)stack_size, strerror(errno));
8948       exit(1);
8949       }
8950     op++;
8951     argc--;
8952 #endif
8953     }
8954 
8955   /* Set some common pattern and subject controls */
8956 
8957   else if (strcmp(arg, "-AC") == 0)
8958     {
8959     def_patctl.options |= PCRE2_AUTO_CALLOUT;
8960     def_datctl.control2 |= CTL2_CALLOUT_EXTRA;
8961     }
8962   else if (strcmp(arg, "-ac") == 0)  def_patctl.options |= PCRE2_AUTO_CALLOUT;
8963   else if (strcmp(arg, "-b") == 0)   def_patctl.control |= CTL_FULLBINCODE;
8964   else if (strcmp(arg, "-d") == 0)   def_patctl.control |= CTL_DEBUG;
8965   else if (strcmp(arg, "-dfa") == 0) def_datctl.control |= CTL_DFA;
8966   else if (strcmp(arg, "-i") == 0)   def_patctl.control |= CTL_INFO;
8967   else if (strcmp(arg, "-jit") == 0 || strcmp(arg, "-jitverify") == 0 ||
8968            strcmp(arg, "-jitfast") == 0)
8969     {
8970     if (arg[4] == 'v') def_patctl.control |= CTL_JITVERIFY;
8971       else if (arg[4] == 'f') def_patctl.control |= CTL_JITFAST;
8972     def_patctl.jit = JIT_DEFAULT;  /* full & partial */
8973 #ifndef SUPPORT_JIT
8974     fprintf(stderr, "** Warning: JIT support is not available: "
8975                     "-jit[fast|verify] calls functions that do nothing.\n");
8976 #endif
8977     }
8978 
8979   /* Set timing parameters */
8980 
8981   else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0 ||
8982            strcmp(arg, "-T") == 0 || strcmp(arg, "-TM") == 0)
8983     {
8984     int both = arg[2] == 0;
8985     showtotaltimes = arg[1] == 'T';
8986     if (argc > 2 && (uli = strtoul(argv[op+1], &endptr, 10), *endptr == 0))
8987       {
8988       if (uli == 0)
8989         {
8990         fprintf(stderr, "** Argument for %s must not be zero\n", arg);
8991         exit(1);
8992         }
8993       if (U32OVERFLOW(uli))
8994         {
8995         fprintf(stderr, "** Argument for %s is too big\n", arg);
8996         exit(1);
8997         }
8998       timeitm = (int)uli;
8999       op++;
9000       argc--;
9001       }
9002     else timeitm = LOOPREPEAT;
9003     if (both) timeit = timeitm;
9004     }
9005 
9006   /* Give help */
9007 
9008   else if (strcmp(arg, "-help") == 0 ||
9009            strcmp(arg, "--help") == 0)
9010     {
9011     usage();
9012     goto EXIT;
9013     }
9014 
9015   /* Show version */
9016 
9017   else if (strcmp(arg, "-version") == 0 ||
9018            strcmp(arg, "--version") == 0)
9019     {
9020     print_version(stdout);
9021     goto EXIT;
9022     }
9023 
9024   /* The following options save their data for processing once we know what
9025   the running mode is. */
9026 
9027   else if (strcmp(arg, "-error") == 0)
9028     {
9029     arg_error = argv[op+1];
9030     goto CHECK_VALUE_EXISTS;
9031     }
9032 
9033   else if (strcmp(arg, "-subject") == 0)
9034     {
9035     arg_subject = argv[op+1];
9036     goto CHECK_VALUE_EXISTS;
9037     }
9038 
9039   else if (strcmp(arg, "-pattern") == 0)
9040     {
9041     arg_pattern = argv[op+1];
9042     CHECK_VALUE_EXISTS:
9043     if (argc <= 2)
9044       {
9045       fprintf(stderr, "** Missing value for %s\n", arg);
9046       yield = 1;
9047       goto EXIT;
9048       }
9049     op++;
9050     argc--;
9051     }
9052 
9053   /* Unrecognized option */
9054 
9055   else
9056     {
9057     fprintf(stderr, "** Unknown or malformed option '%s'\n", arg);
9058     usage();
9059     yield = 1;
9060     goto EXIT;
9061     }
9062   op++;
9063   argc--;
9064   }
9065 
9066 /* If -error was present, get the error numbers, show the messages, and exit.
9067 We wait to do this until we know which mode we are in. */
9068 
9069 if (arg_error != NULL)
9070   {
9071   int len;
9072   int errcode;
9073   char *endptr;
9074 
9075 /* Ensure the relevant non-8-bit buffer is available. Ensure that it is at
9076 least 128 code units, because it is used for retrieving error messages. */
9077 
9078 #ifdef SUPPORT_PCRE2_16
9079   if (test_mode == PCRE16_MODE)
9080     {
9081     pbuffer16_size = 256;
9082     pbuffer16 = (uint16_t *)malloc(pbuffer16_size);
9083     if (pbuffer16 == NULL)
9084       {
9085       fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer16\n",
9086         pbuffer16_size);
9087       yield = 1;
9088       goto EXIT;
9089       }
9090     }
9091 #endif
9092 
9093 #ifdef SUPPORT_PCRE2_32
9094   if (test_mode == PCRE32_MODE)
9095     {
9096     pbuffer32_size = 512;
9097     pbuffer32 = (uint32_t *)malloc(pbuffer32_size);
9098     if (pbuffer32 == NULL)
9099       {
9100       fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer32\n",
9101         pbuffer32_size);
9102       yield = 1;
9103       goto EXIT;
9104       }
9105     }
9106 #endif
9107 
9108   /* Loop along a list of error numbers. */
9109 
9110   for (;;)
9111     {
9112     errcode = strtol(arg_error, &endptr, 10);
9113     if (*endptr != 0 && *endptr != CHAR_COMMA)
9114       {
9115       fprintf(stderr, "** '%s' is not a valid error number list\n", arg_error);
9116       yield = 1;
9117       goto EXIT;
9118       }
9119     printf("Error %d: ", errcode);
9120     PCRE2_GET_ERROR_MESSAGE(len, errcode, pbuffer);
9121     if (len < 0)
9122       {
9123       switch (len)
9124         {
9125         case PCRE2_ERROR_BADDATA:
9126         printf("PCRE2_ERROR_BADDATA (unknown error number)");
9127         break;
9128 
9129         case PCRE2_ERROR_NOMEMORY:
9130         printf("PCRE2_ERROR_NOMEMORY (buffer too small)");
9131         break;
9132 
9133         default:
9134         printf("Unexpected return (%d) from pcre2_get_error_message()", len);
9135         break;
9136         }
9137       }
9138     else
9139       {
9140       PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, stdout);
9141       }
9142     printf("\n");
9143     if (*endptr == 0) goto EXIT;
9144     arg_error = endptr + 1;
9145     }
9146   /* Control never reaches here */
9147   }  /* End of -error handling */
9148 
9149 /* Initialize things that cannot be done until we know which test mode we are
9150 running in. Exercise the general context copying and match data size functions,
9151 which are not otherwise used. */
9152 
9153 code_unit_size = test_mode/8;
9154 max_oveccount = DEFAULT_OVECCOUNT;
9155 
9156 /* Use macros to save a lot of duplication. */
9157 
9158 #define CREATECONTEXTS \
9159   G(general_context,BITS) = G(pcre2_general_context_create_,BITS)(&my_malloc, &my_free, NULL); \
9160   G(general_context_copy,BITS) = G(pcre2_general_context_copy_,BITS)(G(general_context,BITS)); \
9161   G(default_pat_context,BITS) = G(pcre2_compile_context_create_,BITS)(G(general_context,BITS)); \
9162   G(pat_context,BITS) = G(pcre2_compile_context_copy_,BITS)(G(default_pat_context,BITS)); \
9163   G(default_dat_context,BITS) = G(pcre2_match_context_create_,BITS)(G(general_context,BITS)); \
9164   G(dat_context,BITS) = G(pcre2_match_context_copy_,BITS)(G(default_dat_context,BITS)); \
9165   G(default_con_context,BITS) = G(pcre2_convert_context_create_,BITS)(G(general_context,BITS)); \
9166   G(con_context,BITS) = G(pcre2_convert_context_copy_,BITS)(G(default_con_context,BITS)); \
9167   G(match_data,BITS) = G(pcre2_match_data_create_,BITS)(max_oveccount, G(general_context,BITS))
9168 
9169 #define CONTEXTTESTS \
9170   (void)G(pcre2_set_compile_extra_options_,BITS)(G(pat_context,BITS), 0); \
9171   (void)G(pcre2_set_max_pattern_length_,BITS)(G(pat_context,BITS), 0); \
9172   (void)G(pcre2_set_offset_limit_,BITS)(G(dat_context,BITS), 0); \
9173   (void)G(pcre2_set_recursion_memory_management_,BITS)(G(dat_context,BITS), my_malloc, my_free, NULL); \
9174   (void)G(pcre2_get_match_data_size_,BITS)(G(match_data,BITS))
9175 
9176 
9177 /* Call the appropriate functions for the current mode, and exercise some
9178 functions that are not otherwise called. */
9179 
9180 #ifdef SUPPORT_PCRE2_8
9181 #undef BITS
9182 #define BITS 8
9183 if (test_mode == PCRE8_MODE)
9184   {
9185   CREATECONTEXTS;
9186   CONTEXTTESTS;
9187   }
9188 #endif
9189 
9190 #ifdef SUPPORT_PCRE2_16
9191 #undef BITS
9192 #define BITS 16
9193 if (test_mode == PCRE16_MODE)
9194   {
9195   CREATECONTEXTS;
9196   CONTEXTTESTS;
9197   }
9198 #endif
9199 
9200 #ifdef SUPPORT_PCRE2_32
9201 #undef BITS
9202 #define BITS 32
9203 if (test_mode == PCRE32_MODE)
9204   {
9205   CREATECONTEXTS;
9206   CONTEXTTESTS;
9207   }
9208 #endif
9209 
9210 /* Set a default parentheses nest limit that is large enough to run the
9211 standard tests (this also exercises the function). */
9212 
9213 PCRE2_SET_PARENS_NEST_LIMIT(default_pat_context, PARENS_NEST_DEFAULT);
9214 
9215 /* Handle command line modifier settings, sending any error messages to
9216 stderr. We need to know the mode before modifying the context, and it is tidier
9217 to do them all in the same way. */
9218 
9219 outfile = stderr;
9220 if ((arg_pattern != NULL &&
9221     !decode_modifiers((uint8_t *)arg_pattern, CTX_DEFPAT, &def_patctl, NULL)) ||
9222     (arg_subject != NULL &&
9223     !decode_modifiers((uint8_t *)arg_subject, CTX_DEFDAT, NULL, &def_datctl)))
9224   {
9225   yield = 1;
9226   goto EXIT;
9227   }
9228 
9229 /* Sort out the input and output files, defaulting to stdin/stdout. */
9230 
9231 infile = stdin;
9232 outfile = stdout;
9233 
9234 if (argc > 1 && strcmp(argv[op], "-") != 0)
9235   {
9236   infile = fopen(argv[op], INPUT_MODE);
9237   if (infile == NULL)
9238     {
9239     printf("** Failed to open '%s': %s\n", argv[op], strerror(errno));
9240     yield = 1;
9241     goto EXIT;
9242     }
9243   }
9244 
9245 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
9246 if (INTERACTIVE(infile)) using_history();
9247 #endif
9248 
9249 if (argc > 2)
9250   {
9251   outfile = fopen(argv[op+1], OUTPUT_MODE);
9252   if (outfile == NULL)
9253     {
9254     printf("** Failed to open '%s': %s\n", argv[op+1], strerror(errno));
9255     yield = 1;
9256     goto EXIT;
9257     }
9258   }
9259 
9260 /* Output a heading line unless quiet, then process input lines. */
9261 
9262 if (!quiet) print_version(outfile);
9263 
9264 SET(compiled_code, NULL);
9265 
9266 #ifdef SUPPORT_PCRE2_8
9267 preg.re_pcre2_code = NULL;
9268 preg.re_match_data = NULL;
9269 #endif
9270 
9271 while (notdone)
9272   {
9273   uint8_t *p;
9274   int rc = PR_OK;
9275   BOOL expectdata = TEST(compiled_code, !=, NULL);
9276 #ifdef SUPPORT_PCRE2_8
9277   expectdata |= preg.re_pcre2_code != NULL;
9278 #endif
9279 
9280   if (extend_inputline(infile, buffer, expectdata? "data> " : "  re> ") == NULL)
9281     break;
9282   if (!INTERACTIVE(infile)) fprintf(outfile, "%s", (char *)buffer);
9283   fflush(outfile);
9284   p = buffer;
9285 
9286   /* If we have a pattern set up for testing, or we are skipping after a
9287   compile failure, a blank line terminates this test. */
9288 
9289   if (expectdata || skipping)
9290     {
9291     while (isspace(*p)) p++;
9292     if (*p == 0)
9293       {
9294 #ifdef SUPPORT_PCRE2_8
9295       if (preg.re_pcre2_code != NULL)
9296         {
9297         regfree(&preg);
9298         preg.re_pcre2_code = NULL;
9299         preg.re_match_data = NULL;
9300         }
9301 #endif  /* SUPPORT_PCRE2_8 */
9302       if (TEST(compiled_code, !=, NULL))
9303         {
9304         SUB1(pcre2_code_free, compiled_code);
9305         SET(compiled_code, NULL);
9306         }
9307       skipping = FALSE;
9308       setlocale(LC_CTYPE, "C");
9309       }
9310 
9311     /* Otherwise, if we are not skipping, and the line is not a data comment
9312     line starting with "\=", process a data line. */
9313 
9314     else if (!skipping && !(p[0] == '\\' && p[1] == '=' && isspace(p[2])))
9315       {
9316       rc = process_data();
9317       }
9318     }
9319 
9320   /* We do not have a pattern set up for testing. Lines starting with # are
9321   either comments or special commands. Blank lines are ignored. Otherwise, the
9322   line must start with a valid delimiter. It is then processed as a pattern
9323   line. A copy of the pattern is left in pbuffer8 for use by callouts. Under
9324   valgrind, make the unused part of the buffer undefined, to catch overruns. */
9325 
9326   else if (*p == '#')
9327     {
9328     if (isspace(p[1]) || p[1] == '!' || p[1] == 0) continue;
9329     rc = process_command();
9330     }
9331 
9332   else if (strchr("/!\"'`%&-=_:;,@~", *p) != NULL)
9333     {
9334     rc = process_pattern();
9335     dfa_matched = 0;
9336     }
9337 
9338   else
9339     {
9340     while (isspace(*p)) p++;
9341     if (*p != 0)
9342       {
9343       fprintf(outfile, "** Invalid pattern delimiter '%c' (x%x).\n", *buffer,
9344         *buffer);
9345       rc = PR_SKIP;
9346       }
9347     }
9348 
9349   if (rc == PR_SKIP && !INTERACTIVE(infile)) skipping = TRUE;
9350   else if (rc == PR_ABEND)
9351     {
9352     fprintf(outfile, "** pcre2test run abandoned\n");
9353     yield = 1;
9354     goto EXIT;
9355     }
9356   }
9357 
9358 /* Finish off a normal run. */
9359 
9360 if (INTERACTIVE(infile)) fprintf(outfile, "\n");
9361 
9362 if (showtotaltimes)
9363   {
9364   const char *pad = "";
9365   fprintf(outfile, "--------------------------------------\n");
9366   if (timeit > 0)
9367     {
9368     fprintf(outfile, "Total compile time %.4f milliseconds\n",
9369       (((double)total_compile_time * 1000.0) / (double)timeit) /
9370         (double)CLOCKS_PER_SEC);
9371     if (total_jit_compile_time > 0)
9372       fprintf(outfile, "Total JIT compile  %.4f milliseconds\n",
9373         (((double)total_jit_compile_time * 1000.0) / (double)timeit) /
9374           (double)CLOCKS_PER_SEC);
9375     pad = "  ";
9376     }
9377   fprintf(outfile, "Total match time %s%.4f milliseconds\n", pad,
9378     (((double)total_match_time * 1000.0) / (double)timeitm) /
9379       (double)CLOCKS_PER_SEC);
9380   }
9381 
9382 
9383 EXIT:
9384 
9385 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
9386 if (infile != NULL && INTERACTIVE(infile)) clear_history();
9387 #endif
9388 
9389 if (infile != NULL && infile != stdin) fclose(infile);
9390 if (outfile != NULL && outfile != stdout) fclose(outfile);
9391 
9392 free(buffer);
9393 free(dbuffer);
9394 free(pbuffer8);
9395 free(dfa_workspace);
9396 free((void *)locale_tables);
9397 free(tables3);
9398 PCRE2_MATCH_DATA_FREE(match_data);
9399 SUB1(pcre2_code_free, compiled_code);
9400 
9401 while(patstacknext-- > 0)
9402   {
9403   SET(compiled_code, patstack[patstacknext]);
9404   SUB1(pcre2_code_free, compiled_code);
9405   }
9406 
9407 PCRE2_JIT_FREE_UNUSED_MEMORY(general_context);
9408 if (jit_stack != NULL)
9409   {
9410   PCRE2_JIT_STACK_FREE(jit_stack);
9411   }
9412 
9413 #define FREECONTEXTS \
9414   G(pcre2_general_context_free_,BITS)(G(general_context,BITS)); \
9415   G(pcre2_general_context_free_,BITS)(G(general_context_copy,BITS)); \
9416   G(pcre2_compile_context_free_,BITS)(G(pat_context,BITS)); \
9417   G(pcre2_compile_context_free_,BITS)(G(default_pat_context,BITS)); \
9418   G(pcre2_match_context_free_,BITS)(G(dat_context,BITS)); \
9419   G(pcre2_match_context_free_,BITS)(G(default_dat_context,BITS)); \
9420   G(pcre2_convert_context_free_,BITS)(G(default_con_context,BITS)); \
9421   G(pcre2_convert_context_free_,BITS)(G(con_context,BITS));
9422 
9423 #ifdef SUPPORT_PCRE2_8
9424 #undef BITS
9425 #define BITS 8
9426 if (preg.re_pcre2_code != NULL) regfree(&preg);
9427 FREECONTEXTS;
9428 #endif
9429 
9430 #ifdef SUPPORT_PCRE2_16
9431 #undef BITS
9432 #define BITS 16
9433 free(pbuffer16);
9434 FREECONTEXTS;
9435 #endif
9436 
9437 #ifdef SUPPORT_PCRE2_32
9438 #undef BITS
9439 #define BITS 32
9440 free(pbuffer32);
9441 FREECONTEXTS;
9442 #endif
9443 
9444 #if defined(__VMS)
9445   yield = SS$_NORMAL;  /* Return values via DCL symbols */
9446 #endif
9447 
9448 return yield;
9449 }
9450 
9451 /* End of pcre2test.c */
9452