• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*************************************************
2 *             PCRE2 testing program              *
3 *************************************************/
4 
5 /* PCRE2 is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language. In 2014
7 the API was completely revised and '2' was added to the name, because the old
8 API, which had lasted for 16 years, could not accommodate new requirements. At
9 the same time, this testing program was re-designed because its original
10 hacked-up (non-) design had also run out of steam.
11 
12                        Written by Philip Hazel
13      Original code Copyright (c) 1997-2012 University of Cambridge
14     Rewritten code Copyright (c) 2016-2019 University of Cambridge
15 
16 -----------------------------------------------------------------------------
17 Redistribution and use in source and binary forms, with or without
18 modification, are permitted provided that the following conditions are met:
19 
20     * Redistributions of source code must retain the above copyright notice,
21       this list of conditions and the following disclaimer.
22 
23     * Redistributions in binary form must reproduce the above copyright
24       notice, this list of conditions and the following disclaimer in the
25       documentation and/or other materials provided with the distribution.
26 
27     * Neither the name of the University of Cambridge nor the names of its
28       contributors may be used to endorse or promote products derived from
29       this software without specific prior written permission.
30 
31 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
32 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
35 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
36 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
37 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
38 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
39 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
41 POSSIBILITY OF SUCH DAMAGE.
42 -----------------------------------------------------------------------------
43 */
44 
45 
46 /* This program supports testing of the 8-bit, 16-bit, and 32-bit PCRE2
47 libraries in a single program, though its input and output are always 8-bit.
48 It is different from modules such as pcre2_compile.c in the library itself,
49 which are compiled separately for each code unit width. If two widths are
50 enabled, for example, pcre2_compile.c is compiled twice. In contrast,
51 pcre2test.c is compiled only once, and linked with all the enabled libraries.
52 Therefore, it must not make use of any of the macros from pcre2.h or
53 pcre2_internal.h that depend on PCRE2_CODE_UNIT_WIDTH. It does, however, make
54 use of SUPPORT_PCRE2_8, SUPPORT_PCRE2_16, and SUPPORT_PCRE2_32, to ensure that
55 it references only the enabled library functions. */
56 
57 #ifdef HAVE_CONFIG_H
58 #include "config.h"
59 #endif
60 
61 #include <ctype.h>
62 #include <stdio.h>
63 #include <string.h>
64 #include <stdlib.h>
65 #include <time.h>
66 #include <locale.h>
67 #include <errno.h>
68 
69 #if defined NATIVE_ZOS
70 #include "pcrzoscs.h"
71 /* That header is not included in the main PCRE2 distribution because other
72 apparatus is needed to compile pcre2test for z/OS. The header can be found in
73 the special z/OS distribution, which is available from www.zaconsultants.net or
74 from www.cbttape.org. */
75 #endif
76 
77 #ifdef HAVE_UNISTD_H
78 #include <unistd.h>
79 #endif
80 
81 /* Debugging code enabler */
82 
83 /* #define DEBUG_SHOW_MALLOC_ADDRESSES */
84 
85 /* Both libreadline and libedit are optionally supported. The user-supplied
86 original patch uses readline/readline.h for libedit, but in at least one system
87 it is installed as editline/readline.h, so the configuration code now looks for
88 that first, falling back to readline/readline.h. */
89 
90 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
91 #if defined(SUPPORT_LIBREADLINE)
92 #include <readline/readline.h>
93 #include <readline/history.h>
94 #else
95 #if defined(HAVE_EDITLINE_READLINE_H)
96 #include <editline/readline.h>
97 #else
98 #include <readline/readline.h>
99 #endif
100 #endif
101 #endif
102 
103 /* Put the test for interactive input into a macro so that it can be changed if
104 required for different environments. */
105 
106 #define INTERACTIVE(f) isatty(fileno(f))
107 
108 
109 /* ---------------------- System-specific definitions ---------------------- */
110 
111 /* A number of things vary for Windows builds. Originally, pcretest opened its
112 input and output without "b"; then I was told that "b" was needed in some
113 environments, so it was added for release 5.0 to both the input and output. (It
114 makes no difference on Unix-like systems.) Later I was told that it is wrong
115 for the input on Windows. I've now abstracted the modes into macros that are
116 set here, to make it easier to fiddle with them, and removed "b" from the input
117 mode under Windows. The BINARY versions are used when saving/restoring compiled
118 patterns. */
119 
120 #if defined(_WIN32) || defined(WIN32)
121 #include <io.h>                /* For _setmode() */
122 #include <fcntl.h>             /* For _O_BINARY */
123 #define INPUT_MODE          "r"
124 #define OUTPUT_MODE         "wb"
125 #define BINARY_INPUT_MODE   "rb"
126 #define BINARY_OUTPUT_MODE  "wb"
127 
128 #ifndef isatty
129 #define isatty _isatty         /* This is what Windows calls them, I'm told, */
130 #endif                         /* though in some environments they seem to   */
131                                /* be already defined, hence the #ifndefs.    */
132 #ifndef fileno
133 #define fileno _fileno
134 #endif
135 
136 /* A user sent this fix for Borland Builder 5 under Windows. */
137 
138 #ifdef __BORLANDC__
139 #define _setmode(handle, mode) setmode(handle, mode)
140 #endif
141 
142 /* Not Windows */
143 
144 #else
145 #include <sys/time.h>          /* These two includes are needed */
146 #include <sys/resource.h>      /* for setrlimit(). */
147 #if defined NATIVE_ZOS         /* z/OS uses non-binary I/O */
148 #define INPUT_MODE   "r"
149 #define OUTPUT_MODE  "w"
150 #define BINARY_INPUT_MODE   "rb"
151 #define BINARY_OUTPUT_MODE  "wb"
152 #else
153 #define INPUT_MODE          "rb"
154 #define OUTPUT_MODE         "wb"
155 #define BINARY_INPUT_MODE   "rb"
156 #define BINARY_OUTPUT_MODE  "wb"
157 #endif
158 #endif
159 
160 /* VMS-specific code was included as suggested by a VMS user [1]. Another VMS
161 user [2] provided alternative code which worked better for him. I have
162 commented out the original, but kept it around just in case. */
163 
164 #ifdef __VMS
165 #include <ssdef.h>
166 /* These two includes came from [2]. */
167 #include descrip
168 #include lib$routines
169 /* void vms_setsymbol( char *, char *, int ); Original code from [1]. */
170 #endif
171 
172 /* VC and older compilers don't support %td or %zu, and even some that claim to
173 be C99 don't support it (hence DISABLE_PERCENT_ZT). */
174 
175 #if defined(_MSC_VER) || !defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L || defined(DISABLE_PERCENT_ZT)
176 #define PTR_FORM "lu"
177 #define SIZ_FORM "lu"
178 #define SIZ_CAST (unsigned long int)
179 #else
180 #define PTR_FORM "td"
181 #define SIZ_FORM "zu"
182 #define SIZ_CAST
183 #endif
184 
185 /* ------------------End of system-specific definitions -------------------- */
186 
187 /* Glueing macros that are used in several places below. */
188 
189 #define glue(a,b) a##b
190 #define G(a,b) glue(a,b)
191 
192 /* Miscellaneous parameters and manifests */
193 
194 #ifndef CLOCKS_PER_SEC
195 #ifdef CLK_TCK
196 #define CLOCKS_PER_SEC CLK_TCK
197 #else
198 #define CLOCKS_PER_SEC 100
199 #endif
200 #endif
201 
202 #define CFORE_UNSET UINT32_MAX    /* Unset value for startend/cfail/cerror fields */
203 #define CONVERT_UNSET UINT32_MAX  /* Unset value for convert_type field */
204 #define DFA_WS_DIMENSION 1000     /* Size of DFA workspace */
205 #define DEFAULT_OVECCOUNT 15      /* Default ovector count */
206 #define JUNK_OFFSET 0xdeadbeef    /* For initializing ovector */
207 #define LOCALESIZE 32             /* Size of locale name */
208 #define LOOPREPEAT 500000         /* Default loop count for timing */
209 #define MALLOCLISTSIZE 20         /* For remembering mallocs */
210 #define PARENS_NEST_DEFAULT 220   /* Default parentheses nest limit */
211 #define PATSTACKSIZE 20           /* Pattern stack for save/restore testing */
212 #define REPLACE_MODSIZE 100       /* Field for reading 8-bit replacement */
213 #define VERSION_SIZE 64           /* Size of buffer for the version strings */
214 
215 /* Make sure the buffer into which replacement strings are copied is big enough
216 to hold them as 32-bit code units. */
217 
218 #define REPLACE_BUFFSIZE 1024   /* This is a byte value */
219 
220 /* Execution modes */
221 
222 #define PCRE8_MODE   8
223 #define PCRE16_MODE 16
224 #define PCRE32_MODE 32
225 
226 /* Processing returns */
227 
228 enum { PR_OK, PR_SKIP, PR_ABEND };
229 
230 /* The macro PRINTABLE determines whether to print an output character as-is or
231 as a hex value when showing compiled patterns. is We use it in cases when the
232 locale has not been explicitly changed, so as to get consistent output from
233 systems that differ in their output from isprint() even in the "C" locale. */
234 
235 #ifdef EBCDIC
236 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
237 #else
238 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
239 #endif
240 
241 #define PRINTOK(c) ((use_tables != NULL && c < 256)? isprint(c) : PRINTABLE(c))
242 
243 /* We have to include some of the library source files because we need
244 to use some of the macros, internal structure definitions, and other internal
245 values - pcre2test has "inside information" compared to an application program
246 that strictly follows the PCRE2 API.
247 
248 Before including pcre2_internal.h we define PRIV so that it does not get
249 defined therein. This ensures that PRIV names in the included files do not
250 clash with those in the libraries. Also, although pcre2_internal.h does itself
251 include pcre2.h, we explicitly include it beforehand, along with pcre2posix.h,
252 so that the PCRE2_EXP_xxx macros get set appropriately for an application, not
253 for building the library. */
254 
255 #define PRIV(name) name
256 #define PCRE2_CODE_UNIT_WIDTH 0
257 #include "pcre2.h"
258 #include "pcre2posix.h"
259 #include "pcre2_internal.h"
260 
261 /* We need access to some of the data tables that PCRE2 uses. Defining
262 PCRE2_PCRETEST makes some minor changes in the files. The previous definition
263 of PRIV avoids name clashes. */
264 
265 #define PCRE2_PCRE2TEST
266 #include "pcre2_tables.c"
267 #include "pcre2_ucd.c"
268 
269 /* 32-bit integer values in the input are read by strtoul() or strtol(). The
270 check needed for overflow depends on whether long ints are in fact longer than
271 ints. They are defined not to be shorter. */
272 
273 #if ULONG_MAX > UINT32_MAX
274 #define U32OVERFLOW(x) (x > UINT32_MAX)
275 #else
276 #define U32OVERFLOW(x) (x == UINT32_MAX)
277 #endif
278 
279 #if LONG_MAX > INT32_MAX
280 #define S32OVERFLOW(x) (x > INT32_MAX || x < INT32_MIN)
281 #else
282 #define S32OVERFLOW(x) (x == INT32_MAX || x == INT32_MIN)
283 #endif
284 
285 /* When PCRE2_CODE_UNIT_WIDTH is zero, pcre2_internal.h does not include
286 pcre2_intmodedep.h, which is where mode-dependent macros and structures are
287 defined. We can now include it for each supported code unit width. Because
288 PCRE2_CODE_UNIT_WIDTH was defined as zero before including pcre2.h, it will
289 have left PCRE2_SUFFIX defined as a no-op. We must re-define it appropriately
290 while including these files, and then restore it to a no-op. Because LINK_SIZE
291 may be changed in 16-bit mode and forced to 1 in 32-bit mode, the order of
292 these inclusions should not be changed. */
293 
294 #undef PCRE2_SUFFIX
295 #undef PCRE2_CODE_UNIT_WIDTH
296 
297 #ifdef   SUPPORT_PCRE2_8
298 #define  PCRE2_CODE_UNIT_WIDTH 8
299 #define  PCRE2_SUFFIX(a) G(a,8)
300 #include "pcre2_intmodedep.h"
301 #include "pcre2_printint.c"
302 #undef   PCRE2_CODE_UNIT_WIDTH
303 #undef   PCRE2_SUFFIX
304 #endif   /* SUPPORT_PCRE2_8 */
305 
306 #ifdef   SUPPORT_PCRE2_16
307 #define  PCRE2_CODE_UNIT_WIDTH 16
308 #define  PCRE2_SUFFIX(a) G(a,16)
309 #include "pcre2_intmodedep.h"
310 #include "pcre2_printint.c"
311 #undef   PCRE2_CODE_UNIT_WIDTH
312 #undef   PCRE2_SUFFIX
313 #endif   /* SUPPORT_PCRE2_16 */
314 
315 #ifdef   SUPPORT_PCRE2_32
316 #define  PCRE2_CODE_UNIT_WIDTH 32
317 #define  PCRE2_SUFFIX(a) G(a,32)
318 #include "pcre2_intmodedep.h"
319 #include "pcre2_printint.c"
320 #undef   PCRE2_CODE_UNIT_WIDTH
321 #undef   PCRE2_SUFFIX
322 #endif   /* SUPPORT_PCRE2_32 */
323 
324 #define PCRE2_SUFFIX(a) a
325 
326 /* We need to be able to check input text for UTF-8 validity, whatever code
327 widths are actually available, because the input to pcre2test is always in
328 8-bit code units. So we include the UTF validity checking function for 8-bit
329 code units. */
330 
331 extern int valid_utf(PCRE2_SPTR8, PCRE2_SIZE, PCRE2_SIZE *);
332 
333 #define  PCRE2_CODE_UNIT_WIDTH 8
334 #undef   PCRE2_SPTR
335 #define  PCRE2_SPTR PCRE2_SPTR8
336 #include "pcre2_valid_utf.c"
337 #undef   PCRE2_CODE_UNIT_WIDTH
338 #undef   PCRE2_SPTR
339 
340 /* If we have 8-bit support, default to it; if there is also 16-or 32-bit
341 support, it can be selected by a command-line option. If there is no 8-bit
342 support, there must be 16-bit or 32-bit support, so default to one of them. The
343 config function, JIT stack, contexts, and version string are the same in all
344 modes, so use the form of the first that is available. */
345 
346 #if defined SUPPORT_PCRE2_8
347 #define DEFAULT_TEST_MODE PCRE8_MODE
348 #define VERSION_TYPE PCRE2_UCHAR8
349 #define PCRE2_CONFIG pcre2_config_8
350 #define PCRE2_JIT_STACK pcre2_jit_stack_8
351 #define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_8
352 #define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_8
353 #define PCRE2_REAL_CONVERT_CONTEXT pcre2_real_convert_context_8
354 #define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_8
355 
356 #elif defined SUPPORT_PCRE2_16
357 #define DEFAULT_TEST_MODE PCRE16_MODE
358 #define VERSION_TYPE PCRE2_UCHAR16
359 #define PCRE2_CONFIG pcre2_config_16
360 #define PCRE2_JIT_STACK pcre2_jit_stack_16
361 #define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_16
362 #define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_16
363 #define PCRE2_REAL_CONVERT_CONTEXT pcre2_real_convert_context_16
364 #define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_16
365 
366 #elif defined SUPPORT_PCRE2_32
367 #define DEFAULT_TEST_MODE PCRE32_MODE
368 #define VERSION_TYPE PCRE2_UCHAR32
369 #define PCRE2_CONFIG pcre2_config_32
370 #define PCRE2_JIT_STACK pcre2_jit_stack_32
371 #define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_32
372 #define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_32
373 #define PCRE2_REAL_CONVERT_CONTEXT pcre2_real_convert_context_32
374 #define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_32
375 #endif
376 
377 /* ------------- Structure and table for handling #-commands ------------- */
378 
379 typedef struct cmdstruct {
380   const char *name;
381   int  value;
382 } cmdstruct;
383 
384 enum { CMD_FORBID_UTF, CMD_LOAD, CMD_NEWLINE_DEFAULT, CMD_PATTERN,
385   CMD_PERLTEST, CMD_POP, CMD_POPCOPY, CMD_SAVE, CMD_SUBJECT, CMD_UNKNOWN };
386 
387 static cmdstruct cmdlist[] = {
388   { "forbid_utf",      CMD_FORBID_UTF },
389   { "load",            CMD_LOAD },
390   { "newline_default", CMD_NEWLINE_DEFAULT },
391   { "pattern",         CMD_PATTERN },
392   { "perltest",        CMD_PERLTEST },
393   { "pop",             CMD_POP },
394   { "popcopy",         CMD_POPCOPY },
395   { "save",            CMD_SAVE },
396   { "subject",         CMD_SUBJECT }};
397 
398 #define cmdlistcount (sizeof(cmdlist)/sizeof(cmdstruct))
399 
400 /* ------------- Structures and tables for handling modifiers -------------- */
401 
402 /* Table of names for newline types. Must be kept in step with the definitions
403 of PCRE2_NEWLINE_xx in pcre2.h. */
404 
405 static const char *newlines[] = {
406   "DEFAULT", "CR", "LF", "CRLF", "ANY", "ANYCRLF", "NUL" };
407 
408 /* Structure and table for handling pattern conversion types. */
409 
410 typedef struct convertstruct {
411   const char *name;
412   uint32_t option;
413 } convertstruct;
414 
415 static convertstruct convertlist[] = {
416   { "glob",                   PCRE2_CONVERT_GLOB },
417   { "glob_no_starstar",       PCRE2_CONVERT_GLOB_NO_STARSTAR },
418   { "glob_no_wild_separator", PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR },
419   { "posix_basic",            PCRE2_CONVERT_POSIX_BASIC },
420   { "posix_extended",         PCRE2_CONVERT_POSIX_EXTENDED },
421   { "unset",                  CONVERT_UNSET }};
422 
423 #define convertlistcount (sizeof(convertlist)/sizeof(convertstruct))
424 
425 /* Modifier types and applicability */
426 
427 enum { MOD_CTC,    /* Applies to a compile context */
428        MOD_CTM,    /* Applies to a match context */
429        MOD_PAT,    /* Applies to a pattern */
430        MOD_PATP,   /* Ditto, OK for Perl test */
431        MOD_DAT,    /* Applies to a data line */
432        MOD_PD,     /* Applies to a pattern or a data line */
433        MOD_PDP,    /* As MOD_PD, OK for Perl test */
434        MOD_PND,    /* As MOD_PD, but not for a default pattern */
435        MOD_PNDP,   /* As MOD_PND, OK for Perl test */
436        MOD_CHR,    /* Is a single character */
437        MOD_CON,    /* Is a "convert" type/options list */
438        MOD_CTL,    /* Is a control bit */
439        MOD_BSR,    /* Is a BSR value */
440        MOD_IN2,    /* Is one or two unsigned integers */
441        MOD_INS,    /* Is a signed integer */
442        MOD_INT,    /* Is an unsigned integer */
443        MOD_IND,    /* Is an unsigned integer, but no value => default */
444        MOD_NL,     /* Is a newline value */
445        MOD_NN,     /* Is a number or a name; more than one may occur */
446        MOD_OPT,    /* Is an option bit */
447        MOD_SIZ,    /* Is a PCRE2_SIZE value */
448        MOD_STR };  /* Is a string */
449 
450 /* Control bits. Some apply to compiling, some to matching, but some can be set
451 either on a pattern or a data line, so they must all be distinct. There are now
452 so many of them that they are split into two fields. */
453 
454 #define CTL_AFTERTEXT                    0x00000001u
455 #define CTL_ALLAFTERTEXT                 0x00000002u
456 #define CTL_ALLCAPTURES                  0x00000004u
457 #define CTL_ALLUSEDTEXT                  0x00000008u
458 #define CTL_ALTGLOBAL                    0x00000010u
459 #define CTL_BINCODE                      0x00000020u
460 #define CTL_CALLOUT_CAPTURE              0x00000040u
461 #define CTL_CALLOUT_INFO                 0x00000080u
462 #define CTL_CALLOUT_NONE                 0x00000100u
463 #define CTL_DFA                          0x00000200u
464 #define CTL_EXPAND                       0x00000400u
465 #define CTL_FINDLIMITS                   0x00000800u
466 #define CTL_FRAMESIZE                    0x00001000u
467 #define CTL_FULLBINCODE                  0x00002000u
468 #define CTL_GETALL                       0x00004000u
469 #define CTL_GLOBAL                       0x00008000u
470 #define CTL_HEXPAT                       0x00010000u  /* Same word as USE_LENGTH */
471 #define CTL_INFO                         0x00020000u
472 #define CTL_JITFAST                      0x00040000u
473 #define CTL_JITVERIFY                    0x00080000u
474 #define CTL_MARK                         0x00100000u
475 #define CTL_MEMORY                       0x00200000u
476 #define CTL_NULLCONTEXT                  0x00400000u
477 #define CTL_POSIX                        0x00800000u
478 #define CTL_POSIX_NOSUB                  0x01000000u
479 #define CTL_PUSH                         0x02000000u  /* These three must be */
480 #define CTL_PUSHCOPY                     0x04000000u  /*   all in the same */
481 #define CTL_PUSHTABLESCOPY               0x08000000u  /*     word. */
482 #define CTL_STARTCHAR                    0x10000000u
483 #define CTL_USE_LENGTH                   0x20000000u  /* Same word as HEXPAT */
484 #define CTL_UTF8_INPUT                   0x40000000u
485 #define CTL_ZERO_TERMINATE               0x80000000u
486 
487 /* Combinations */
488 
489 #define CTL_DEBUG            (CTL_FULLBINCODE|CTL_INFO)  /* For setting */
490 #define CTL_ANYINFO          (CTL_DEBUG|CTL_BINCODE|CTL_CALLOUT_INFO)
491 #define CTL_ANYGLOB          (CTL_ALTGLOBAL|CTL_GLOBAL)
492 
493 /* Second control word */
494 
495 #define CTL2_SUBSTITUTE_CALLOUT          0x00000001u
496 #define CTL2_SUBSTITUTE_EXTENDED         0x00000002u
497 #define CTL2_SUBSTITUTE_OVERFLOW_LENGTH  0x00000004u
498 #define CTL2_SUBSTITUTE_UNKNOWN_UNSET    0x00000008u
499 #define CTL2_SUBSTITUTE_UNSET_EMPTY      0x00000010u
500 #define CTL2_SUBJECT_LITERAL             0x00000020u
501 #define CTL2_CALLOUT_NO_WHERE            0x00000040u
502 #define CTL2_CALLOUT_EXTRA               0x00000080u
503 #define CTL2_ALLVECTOR                   0x00000100u
504 
505 #define CTL2_NL_SET                      0x40000000u  /* Informational */
506 #define CTL2_BSR_SET                     0x80000000u  /* Informational */
507 
508 /* These are the matching controls that may be set either on a pattern or on a
509 data line. They are copied from the pattern controls as initial settings for
510 data line controls. Note that CTL_MEMORY is not included here, because it does
511 different things in the two cases. */
512 
513 #define CTL_ALLPD  (CTL_AFTERTEXT|\
514                     CTL_ALLAFTERTEXT|\
515                     CTL_ALLCAPTURES|\
516                     CTL_ALLUSEDTEXT|\
517                     CTL_ALTGLOBAL|\
518                     CTL_GLOBAL|\
519                     CTL_MARK|\
520                     CTL_STARTCHAR|\
521                     CTL_UTF8_INPUT)
522 
523 #define CTL2_ALLPD (CTL2_SUBSTITUTE_CALLOUT|\
524                     CTL2_SUBSTITUTE_EXTENDED|\
525                     CTL2_SUBSTITUTE_OVERFLOW_LENGTH|\
526                     CTL2_SUBSTITUTE_UNKNOWN_UNSET|\
527                     CTL2_SUBSTITUTE_UNSET_EMPTY|\
528                     CTL2_ALLVECTOR)
529 
530 /* Structures for holding modifier information for patterns and subject strings
531 (data). Fields containing modifiers that can be set either for a pattern or a
532 subject must be at the start and in the same order in both cases so that the
533 same offset in the big table below works for both. */
534 
535 typedef struct patctl {       /* Structure for pattern modifiers. */
536   uint32_t  options;          /* Must be in same position as datctl */
537   uint32_t  control;          /* Must be in same position as datctl */
538   uint32_t  control2;         /* Must be in same position as datctl */
539   uint32_t  jitstack;         /* Must be in same position as datctl */
540    uint8_t  replacement[REPLACE_MODSIZE];  /* So must this */
541   uint32_t  substitute_skip;  /* Must be in same position as patctl */
542   uint32_t  substitute_stop;  /* Must be in same position as patctl */
543   uint32_t  jit;
544   uint32_t  stackguard_test;
545   uint32_t  tables_id;
546   uint32_t  convert_type;
547   uint32_t  convert_length;
548   uint32_t  convert_glob_escape;
549   uint32_t  convert_glob_separator;
550   uint32_t  regerror_buffsize;
551    uint8_t  locale[LOCALESIZE];
552 } patctl;
553 
554 #define MAXCPYGET 10
555 #define LENCPYGET 64
556 
557 typedef struct datctl {       /* Structure for data line modifiers. */
558   uint32_t  options;          /* Must be in same position as patctl */
559   uint32_t  control;          /* Must be in same position as patctl */
560   uint32_t  control2;         /* Must be in same position as patctl */
561   uint32_t  jitstack;         /* Must be in same position as patctl */
562    uint8_t  replacement[REPLACE_MODSIZE];  /* So must this */
563   uint32_t  substitute_skip;  /* Must be in same position as patctl */
564   uint32_t  substitute_stop;  /* Must be in same position as patctl */
565   uint32_t  startend[2];
566   uint32_t  cerror[2];
567   uint32_t  cfail[2];
568    int32_t  callout_data;
569    int32_t  copy_numbers[MAXCPYGET];
570    int32_t  get_numbers[MAXCPYGET];
571   uint32_t  oveccount;
572   uint32_t  offset;
573   uint8_t   copy_names[LENCPYGET];
574   uint8_t   get_names[LENCPYGET];
575 } datctl;
576 
577 /* Ids for which context to modify. */
578 
579 enum { CTX_PAT,            /* Active pattern context */
580        CTX_POPPAT,         /* Ditto, for a popped pattern */
581        CTX_DEFPAT,         /* Default pattern context */
582        CTX_DAT,            /* Active data (match) context */
583        CTX_DEFDAT };       /* Default data (match) context */
584 
585 /* Macros to simplify the big table below. */
586 
587 #define CO(name) offsetof(PCRE2_REAL_COMPILE_CONTEXT, name)
588 #define MO(name) offsetof(PCRE2_REAL_MATCH_CONTEXT, name)
589 #define PO(name) offsetof(patctl, name)
590 #define PD(name) PO(name)
591 #define DO(name) offsetof(datctl, name)
592 
593 /* Table of all long-form modifiers. Must be in collating sequence of modifier
594 name because it is searched by binary chop. */
595 
596 typedef struct modstruct {
597   const char   *name;
598   uint16_t      which;
599   uint16_t      type;
600   uint32_t      value;
601   PCRE2_SIZE    offset;
602 } modstruct;
603 
604 static modstruct modlist[] = {
605   { "aftertext",                  MOD_PNDP, MOD_CTL, CTL_AFTERTEXT,              PO(control) },
606   { "allaftertext",               MOD_PNDP, MOD_CTL, CTL_ALLAFTERTEXT,           PO(control) },
607   { "allcaptures",                MOD_PND,  MOD_CTL, CTL_ALLCAPTURES,            PO(control) },
608   { "allow_empty_class",          MOD_PAT,  MOD_OPT, PCRE2_ALLOW_EMPTY_CLASS,    PO(options) },
609   { "allow_surrogate_escapes",    MOD_CTC,  MOD_OPT, PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES, CO(extra_options) },
610   { "allusedtext",                MOD_PNDP, MOD_CTL, CTL_ALLUSEDTEXT,            PO(control) },
611   { "allvector",                  MOD_PND,  MOD_CTL, CTL2_ALLVECTOR,             PO(control2) },
612   { "alt_bsux",                   MOD_PAT,  MOD_OPT, PCRE2_ALT_BSUX,             PO(options) },
613   { "alt_circumflex",             MOD_PAT,  MOD_OPT, PCRE2_ALT_CIRCUMFLEX,       PO(options) },
614   { "alt_verbnames",              MOD_PAT,  MOD_OPT, PCRE2_ALT_VERBNAMES,        PO(options) },
615   { "altglobal",                  MOD_PND,  MOD_CTL, CTL_ALTGLOBAL,              PO(control) },
616   { "anchored",                   MOD_PD,   MOD_OPT, PCRE2_ANCHORED,             PD(options) },
617   { "auto_callout",               MOD_PAT,  MOD_OPT, PCRE2_AUTO_CALLOUT,         PO(options) },
618   { "bad_escape_is_literal",      MOD_CTC,  MOD_OPT, PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL, CO(extra_options) },
619   { "bincode",                    MOD_PAT,  MOD_CTL, CTL_BINCODE,                PO(control) },
620   { "bsr",                        MOD_CTC,  MOD_BSR, 0,                          CO(bsr_convention) },
621   { "callout_capture",            MOD_DAT,  MOD_CTL, CTL_CALLOUT_CAPTURE,        DO(control) },
622   { "callout_data",               MOD_DAT,  MOD_INS, 0,                          DO(callout_data) },
623   { "callout_error",              MOD_DAT,  MOD_IN2, 0,                          DO(cerror) },
624   { "callout_extra",              MOD_DAT,  MOD_CTL, CTL2_CALLOUT_EXTRA,         DO(control2) },
625   { "callout_fail",               MOD_DAT,  MOD_IN2, 0,                          DO(cfail) },
626   { "callout_info",               MOD_PAT,  MOD_CTL, CTL_CALLOUT_INFO,           PO(control) },
627   { "callout_no_where",           MOD_DAT,  MOD_CTL, CTL2_CALLOUT_NO_WHERE,      DO(control2) },
628   { "callout_none",               MOD_DAT,  MOD_CTL, CTL_CALLOUT_NONE,           DO(control) },
629   { "caseless",                   MOD_PATP, MOD_OPT, PCRE2_CASELESS,             PO(options) },
630   { "convert",                    MOD_PAT,  MOD_CON, 0,                          PO(convert_type) },
631   { "convert_glob_escape",        MOD_PAT,  MOD_CHR, 0,                          PO(convert_glob_escape) },
632   { "convert_glob_separator",     MOD_PAT,  MOD_CHR, 0,                          PO(convert_glob_separator) },
633   { "convert_length",             MOD_PAT,  MOD_INT, 0,                          PO(convert_length) },
634   { "copy",                       MOD_DAT,  MOD_NN,  DO(copy_numbers),           DO(copy_names) },
635   { "copy_matched_subject",       MOD_DAT,  MOD_OPT, PCRE2_COPY_MATCHED_SUBJECT, DO(options) },
636   { "debug",                      MOD_PAT,  MOD_CTL, CTL_DEBUG,                  PO(control) },
637   { "depth_limit",                MOD_CTM,  MOD_INT, 0,                          MO(depth_limit) },
638   { "dfa",                        MOD_DAT,  MOD_CTL, CTL_DFA,                    DO(control) },
639   { "dfa_restart",                MOD_DAT,  MOD_OPT, PCRE2_DFA_RESTART,          DO(options) },
640   { "dfa_shortest",               MOD_DAT,  MOD_OPT, PCRE2_DFA_SHORTEST,         DO(options) },
641   { "dollar_endonly",             MOD_PAT,  MOD_OPT, PCRE2_DOLLAR_ENDONLY,       PO(options) },
642   { "dotall",                     MOD_PATP, MOD_OPT, PCRE2_DOTALL,               PO(options) },
643   { "dupnames",                   MOD_PATP, MOD_OPT, PCRE2_DUPNAMES,             PO(options) },
644   { "endanchored",                MOD_PD,   MOD_OPT, PCRE2_ENDANCHORED,          PD(options) },
645   { "escaped_cr_is_lf",           MOD_CTC,  MOD_OPT, PCRE2_EXTRA_ESCAPED_CR_IS_LF, CO(extra_options) },
646   { "expand",                     MOD_PAT,  MOD_CTL, CTL_EXPAND,                 PO(control) },
647   { "extended",                   MOD_PATP, MOD_OPT, PCRE2_EXTENDED,             PO(options) },
648   { "extended_more",              MOD_PATP, MOD_OPT, PCRE2_EXTENDED_MORE,        PO(options) },
649   { "extra_alt_bsux",             MOD_CTC,  MOD_OPT, PCRE2_EXTRA_ALT_BSUX,       CO(extra_options) },
650   { "find_limits",                MOD_DAT,  MOD_CTL, CTL_FINDLIMITS,             DO(control) },
651   { "firstline",                  MOD_PAT,  MOD_OPT, PCRE2_FIRSTLINE,            PO(options) },
652   { "framesize",                  MOD_PAT,  MOD_CTL, CTL_FRAMESIZE,              PO(control) },
653   { "fullbincode",                MOD_PAT,  MOD_CTL, CTL_FULLBINCODE,            PO(control) },
654   { "get",                        MOD_DAT,  MOD_NN,  DO(get_numbers),            DO(get_names) },
655   { "getall",                     MOD_DAT,  MOD_CTL, CTL_GETALL,                 DO(control) },
656   { "global",                     MOD_PNDP, MOD_CTL, CTL_GLOBAL,                 PO(control) },
657   { "heap_limit",                 MOD_CTM,  MOD_INT, 0,                          MO(heap_limit) },
658   { "hex",                        MOD_PAT,  MOD_CTL, CTL_HEXPAT,                 PO(control) },
659   { "info",                       MOD_PAT,  MOD_CTL, CTL_INFO,                   PO(control) },
660   { "jit",                        MOD_PAT,  MOD_IND, 7,                          PO(jit) },
661   { "jitfast",                    MOD_PAT,  MOD_CTL, CTL_JITFAST,                PO(control) },
662   { "jitstack",                   MOD_PNDP, MOD_INT, 0,                          PO(jitstack) },
663   { "jitverify",                  MOD_PAT,  MOD_CTL, CTL_JITVERIFY,              PO(control) },
664   { "literal",                    MOD_PAT,  MOD_OPT, PCRE2_LITERAL,              PO(options) },
665   { "locale",                     MOD_PAT,  MOD_STR, LOCALESIZE,                 PO(locale) },
666   { "mark",                       MOD_PNDP, MOD_CTL, CTL_MARK,                   PO(control) },
667   { "match_limit",                MOD_CTM,  MOD_INT, 0,                          MO(match_limit) },
668   { "match_line",                 MOD_CTC,  MOD_OPT, PCRE2_EXTRA_MATCH_LINE,     CO(extra_options) },
669   { "match_unset_backref",        MOD_PAT,  MOD_OPT, PCRE2_MATCH_UNSET_BACKREF,  PO(options) },
670   { "match_word",                 MOD_CTC,  MOD_OPT, PCRE2_EXTRA_MATCH_WORD,     CO(extra_options) },
671   { "max_pattern_length",         MOD_CTC,  MOD_SIZ, 0,                          CO(max_pattern_length) },
672   { "memory",                     MOD_PD,   MOD_CTL, CTL_MEMORY,                 PD(control) },
673   { "multiline",                  MOD_PATP, MOD_OPT, PCRE2_MULTILINE,            PO(options) },
674   { "never_backslash_c",          MOD_PAT,  MOD_OPT, PCRE2_NEVER_BACKSLASH_C,    PO(options) },
675   { "never_ucp",                  MOD_PAT,  MOD_OPT, PCRE2_NEVER_UCP,            PO(options) },
676   { "never_utf",                  MOD_PAT,  MOD_OPT, PCRE2_NEVER_UTF,            PO(options) },
677   { "newline",                    MOD_CTC,  MOD_NL,  0,                          CO(newline_convention) },
678   { "no_auto_capture",            MOD_PAT,  MOD_OPT, PCRE2_NO_AUTO_CAPTURE,      PO(options) },
679   { "no_auto_possess",            MOD_PATP, MOD_OPT, PCRE2_NO_AUTO_POSSESS,      PO(options) },
680   { "no_dotstar_anchor",          MOD_PAT,  MOD_OPT, PCRE2_NO_DOTSTAR_ANCHOR,    PO(options) },
681   { "no_jit",                     MOD_DAT,  MOD_OPT, PCRE2_NO_JIT,               DO(options) },
682   { "no_start_optimize",          MOD_PATP, MOD_OPT, PCRE2_NO_START_OPTIMIZE,    PO(options) },
683   { "no_utf_check",               MOD_PD,   MOD_OPT, PCRE2_NO_UTF_CHECK,         PD(options) },
684   { "notbol",                     MOD_DAT,  MOD_OPT, PCRE2_NOTBOL,               DO(options) },
685   { "notempty",                   MOD_DAT,  MOD_OPT, PCRE2_NOTEMPTY,             DO(options) },
686   { "notempty_atstart",           MOD_DAT,  MOD_OPT, PCRE2_NOTEMPTY_ATSTART,     DO(options) },
687   { "noteol",                     MOD_DAT,  MOD_OPT, PCRE2_NOTEOL,               DO(options) },
688   { "null_context",               MOD_PD,   MOD_CTL, CTL_NULLCONTEXT,            PO(control) },
689   { "offset",                     MOD_DAT,  MOD_INT, 0,                          DO(offset) },
690   { "offset_limit",               MOD_CTM,  MOD_SIZ, 0,                          MO(offset_limit)},
691   { "ovector",                    MOD_DAT,  MOD_INT, 0,                          DO(oveccount) },
692   { "parens_nest_limit",          MOD_CTC,  MOD_INT, 0,                          CO(parens_nest_limit) },
693   { "partial_hard",               MOD_DAT,  MOD_OPT, PCRE2_PARTIAL_HARD,         DO(options) },
694   { "partial_soft",               MOD_DAT,  MOD_OPT, PCRE2_PARTIAL_SOFT,         DO(options) },
695   { "ph",                         MOD_DAT,  MOD_OPT, PCRE2_PARTIAL_HARD,         DO(options) },
696   { "posix",                      MOD_PAT,  MOD_CTL, CTL_POSIX,                  PO(control) },
697   { "posix_nosub",                MOD_PAT,  MOD_CTL, CTL_POSIX|CTL_POSIX_NOSUB,  PO(control) },
698   { "posix_startend",             MOD_DAT,  MOD_IN2, 0,                          DO(startend) },
699   { "ps",                         MOD_DAT,  MOD_OPT, PCRE2_PARTIAL_SOFT,         DO(options) },
700   { "push",                       MOD_PAT,  MOD_CTL, CTL_PUSH,                   PO(control) },
701   { "pushcopy",                   MOD_PAT,  MOD_CTL, CTL_PUSHCOPY,               PO(control) },
702   { "pushtablescopy",             MOD_PAT,  MOD_CTL, CTL_PUSHTABLESCOPY,         PO(control) },
703   { "recursion_limit",            MOD_CTM,  MOD_INT, 0,                          MO(depth_limit) },  /* Obsolete synonym */
704   { "regerror_buffsize",          MOD_PAT,  MOD_INT, 0,                          PO(regerror_buffsize) },
705   { "replace",                    MOD_PND,  MOD_STR, REPLACE_MODSIZE,            PO(replacement) },
706   { "stackguard",                 MOD_PAT,  MOD_INT, 0,                          PO(stackguard_test) },
707   { "startchar",                  MOD_PND,  MOD_CTL, CTL_STARTCHAR,              PO(control) },
708   { "startoffset",                MOD_DAT,  MOD_INT, 0,                          DO(offset) },
709   { "subject_literal",            MOD_PATP, MOD_CTL, CTL2_SUBJECT_LITERAL,       PO(control2) },
710   { "substitute_callout",         MOD_PND,  MOD_CTL, CTL2_SUBSTITUTE_CALLOUT,    PO(control2) },
711   { "substitute_extended",        MOD_PND,  MOD_CTL, CTL2_SUBSTITUTE_EXTENDED,   PO(control2) },
712   { "substitute_overflow_length", MOD_PND,  MOD_CTL, CTL2_SUBSTITUTE_OVERFLOW_LENGTH, PO(control2) },
713   { "substitute_skip",            MOD_PND,  MOD_INT, 0,                          PO(substitute_skip) },
714   { "substitute_stop",            MOD_PND,  MOD_INT, 0,                          PO(substitute_stop) },
715   { "substitute_unknown_unset",   MOD_PND,  MOD_CTL, CTL2_SUBSTITUTE_UNKNOWN_UNSET, PO(control2) },
716   { "substitute_unset_empty",     MOD_PND,  MOD_CTL, CTL2_SUBSTITUTE_UNSET_EMPTY, PO(control2) },
717   { "tables",                     MOD_PAT,  MOD_INT, 0,                          PO(tables_id) },
718   { "ucp",                        MOD_PATP, MOD_OPT, PCRE2_UCP,                  PO(options) },
719   { "ungreedy",                   MOD_PAT,  MOD_OPT, PCRE2_UNGREEDY,             PO(options) },
720   { "use_length",                 MOD_PAT,  MOD_CTL, CTL_USE_LENGTH,             PO(control) },
721   { "use_offset_limit",           MOD_PAT,  MOD_OPT, PCRE2_USE_OFFSET_LIMIT,     PO(options) },
722   { "utf",                        MOD_PATP, MOD_OPT, PCRE2_UTF,                  PO(options) },
723   { "utf8_input",                 MOD_PAT,  MOD_CTL, CTL_UTF8_INPUT,             PO(control) },
724   { "zero_terminate",             MOD_DAT,  MOD_CTL, CTL_ZERO_TERMINATE,         DO(control) }
725 };
726 
727 #define MODLISTCOUNT sizeof(modlist)/sizeof(modstruct)
728 
729 /* Controls and options that are supported for use with the POSIX interface. */
730 
731 #define POSIX_SUPPORTED_COMPILE_OPTIONS ( \
732   PCRE2_CASELESS|PCRE2_DOTALL|PCRE2_LITERAL|PCRE2_MULTILINE|PCRE2_UCP| \
733   PCRE2_UTF|PCRE2_UNGREEDY)
734 
735 #define POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS (0)
736 
737 #define POSIX_SUPPORTED_COMPILE_CONTROLS ( \
738   CTL_AFTERTEXT|CTL_ALLAFTERTEXT|CTL_EXPAND|CTL_HEXPAT|CTL_POSIX| \
739   CTL_POSIX_NOSUB|CTL_USE_LENGTH)
740 
741 #define POSIX_SUPPORTED_COMPILE_CONTROLS2 (0)
742 
743 #define POSIX_SUPPORTED_MATCH_OPTIONS ( \
744   PCRE2_NOTBOL|PCRE2_NOTEMPTY|PCRE2_NOTEOL)
745 
746 #define POSIX_SUPPORTED_MATCH_CONTROLS  (CTL_AFTERTEXT|CTL_ALLAFTERTEXT)
747 #define POSIX_SUPPORTED_MATCH_CONTROLS2 (0)
748 
749 /* Control bits that are not ignored with 'push'. */
750 
751 #define PUSH_SUPPORTED_COMPILE_CONTROLS ( \
752   CTL_BINCODE|CTL_CALLOUT_INFO|CTL_FULLBINCODE|CTL_HEXPAT|CTL_INFO| \
753   CTL_JITVERIFY|CTL_MEMORY|CTL_FRAMESIZE|CTL_PUSH|CTL_PUSHCOPY| \
754   CTL_PUSHTABLESCOPY|CTL_USE_LENGTH)
755 
756 #define PUSH_SUPPORTED_COMPILE_CONTROLS2 (CTL2_BSR_SET|CTL2_NL_SET)
757 
758 /* Controls that apply only at compile time with 'push'. */
759 
760 #define PUSH_COMPILE_ONLY_CONTROLS   CTL_JITVERIFY
761 #define PUSH_COMPILE_ONLY_CONTROLS2  (0)
762 
763 /* Controls that are forbidden with #pop or #popcopy. */
764 
765 #define NOTPOP_CONTROLS (CTL_HEXPAT|CTL_POSIX|CTL_POSIX_NOSUB|CTL_PUSH| \
766   CTL_PUSHCOPY|CTL_PUSHTABLESCOPY|CTL_USE_LENGTH)
767 
768 /* Pattern controls that are mutually exclusive. At present these are all in
769 the first control word. Note that CTL_POSIX_NOSUB is always accompanied by
770 CTL_POSIX, so it doesn't need its own entries. */
771 
772 static uint32_t exclusive_pat_controls[] = {
773   CTL_POSIX    | CTL_PUSH,
774   CTL_POSIX    | CTL_PUSHCOPY,
775   CTL_POSIX    | CTL_PUSHTABLESCOPY,
776   CTL_PUSH     | CTL_PUSHCOPY,
777   CTL_PUSH     | CTL_PUSHTABLESCOPY,
778   CTL_PUSHCOPY | CTL_PUSHTABLESCOPY,
779   CTL_EXPAND   | CTL_HEXPAT };
780 
781 /* Data controls that are mutually exclusive. At present these are all in the
782 first control word. */
783 
784 static uint32_t exclusive_dat_controls[] = {
785   CTL_ALLUSEDTEXT | CTL_STARTCHAR,
786   CTL_FINDLIMITS  | CTL_NULLCONTEXT };
787 
788 /* Table of single-character abbreviated modifiers. The index field is
789 initialized to -1, but the first time the modifier is encountered, it is filled
790 in with the index of the full entry in modlist, to save repeated searching when
791 processing multiple test items. This short list is searched serially, so its
792 order does not matter. */
793 
794 typedef struct c1modstruct {
795   const char *fullname;
796   uint32_t    onechar;
797   int         index;
798 } c1modstruct;
799 
800 static c1modstruct c1modlist[] = {
801   { "bincode",         'B',           -1 },
802   { "info",            'I',           -1 },
803   { "global",          'g',           -1 },
804   { "caseless",        'i',           -1 },
805   { "multiline",       'm',           -1 },
806   { "no_auto_capture", 'n',           -1 },
807   { "dotall",          's',           -1 },
808   { "extended",        'x',           -1 }
809 };
810 
811 #define C1MODLISTCOUNT sizeof(c1modlist)/sizeof(c1modstruct)
812 
813 /* Table of arguments for the -C command line option. Use macros to make the
814 table itself easier to read. */
815 
816 #if defined SUPPORT_PCRE2_8
817 #define SUPPORT_8 1
818 #endif
819 #if defined SUPPORT_PCRE2_16
820 #define SUPPORT_16 1
821 #endif
822 #if defined SUPPORT_PCRE2_32
823 #define SUPPORT_32 1
824 #endif
825 
826 #ifndef SUPPORT_8
827 #define SUPPORT_8 0
828 #endif
829 #ifndef SUPPORT_16
830 #define SUPPORT_16 0
831 #endif
832 #ifndef SUPPORT_32
833 #define SUPPORT_32 0
834 #endif
835 
836 #ifdef EBCDIC
837 #define SUPPORT_EBCDIC 1
838 #define EBCDIC_NL CHAR_LF
839 #else
840 #define SUPPORT_EBCDIC 0
841 #define EBCDIC_NL 0
842 #endif
843 
844 #ifdef NEVER_BACKSLASH_C
845 #define BACKSLASH_C 0
846 #else
847 #define BACKSLASH_C 1
848 #endif
849 
850 typedef struct coptstruct {
851   const char *name;
852   uint32_t    type;
853   uint32_t    value;
854 } coptstruct;
855 
856 enum { CONF_BSR,
857        CONF_FIX,
858        CONF_FIZ,
859        CONF_INT,
860        CONF_NL
861 };
862 
863 static coptstruct coptlist[] = {
864   { "backslash-C", CONF_FIX, BACKSLASH_C },
865   { "bsr",         CONF_BSR, PCRE2_CONFIG_BSR },
866   { "ebcdic",      CONF_FIX, SUPPORT_EBCDIC },
867   { "ebcdic-nl",   CONF_FIZ, EBCDIC_NL },
868   { "jit",         CONF_INT, PCRE2_CONFIG_JIT },
869   { "linksize",    CONF_INT, PCRE2_CONFIG_LINKSIZE },
870   { "newline",     CONF_NL,  PCRE2_CONFIG_NEWLINE },
871   { "pcre2-16",    CONF_FIX, SUPPORT_16 },
872   { "pcre2-32",    CONF_FIX, SUPPORT_32 },
873   { "pcre2-8",     CONF_FIX, SUPPORT_8 },
874   { "unicode",     CONF_INT, PCRE2_CONFIG_UNICODE }
875 };
876 
877 #define COPTLISTCOUNT sizeof(coptlist)/sizeof(coptstruct)
878 
879 #undef SUPPORT_8
880 #undef SUPPORT_16
881 #undef SUPPORT_32
882 #undef SUPPORT_EBCDIC
883 
884 
885 /* ----------------------- Static variables ------------------------ */
886 
887 static FILE *infile;
888 static FILE *outfile;
889 
890 static const void *last_callout_mark;
891 static PCRE2_JIT_STACK *jit_stack = NULL;
892 static size_t jit_stack_size = 0;
893 
894 static BOOL first_callout;
895 static BOOL jit_was_used;
896 static BOOL restrict_for_perl_test = FALSE;
897 static BOOL show_memory = FALSE;
898 
899 static int code_unit_size;                    /* Bytes */
900 static int jitrc;                             /* Return from JIT compile */
901 static int test_mode = DEFAULT_TEST_MODE;
902 static int timeit = 0;
903 static int timeitm = 0;
904 
905 clock_t total_compile_time = 0;
906 clock_t total_jit_compile_time = 0;
907 clock_t total_match_time = 0;
908 
909 static uint32_t dfa_matched;
910 static uint32_t forbid_utf = 0;
911 static uint32_t maxlookbehind;
912 static uint32_t max_oveccount;
913 static uint32_t callout_count;
914 static uint32_t maxcapcount;
915 
916 static uint16_t local_newline_default = 0;
917 
918 static VERSION_TYPE jittarget[VERSION_SIZE];
919 static VERSION_TYPE version[VERSION_SIZE];
920 static VERSION_TYPE uversion[VERSION_SIZE];
921 
922 static patctl def_patctl;
923 static patctl pat_patctl;
924 static datctl def_datctl;
925 static datctl dat_datctl;
926 
927 static void *patstack[PATSTACKSIZE];
928 static int patstacknext = 0;
929 
930 static void *malloclist[MALLOCLISTSIZE];
931 static PCRE2_SIZE malloclistlength[MALLOCLISTSIZE];
932 static uint32_t malloclistptr = 0;
933 
934 #ifdef SUPPORT_PCRE2_8
935 static regex_t preg = { NULL, NULL, 0, 0, 0, 0 };
936 #endif
937 
938 static int *dfa_workspace = NULL;
939 static const uint8_t *locale_tables = NULL;
940 static const uint8_t *use_tables = NULL;
941 static uint8_t locale_name[32];
942 
943 /* We need buffers for building 16/32-bit strings; 8-bit strings don't need
944 rebuilding, but set up the same naming scheme for use in macros. The "buffer"
945 buffer is where all input lines are read. Its size is the same as pbuffer8.
946 Pattern lines are always copied to pbuffer8 for use in callouts, even if they
947 are actually compiled from pbuffer16 or pbuffer32. */
948 
949 static size_t    pbuffer8_size  = 50000;        /* Initial size, bytes */
950 static uint8_t  *pbuffer8 = NULL;
951 static uint8_t  *buffer = NULL;
952 
953 /* The dbuffer is where all processed data lines are put. In non-8-bit modes it
954 is cast as needed. For long data lines it grows as necessary. */
955 
956 static size_t dbuffer_size = 1u << 14;    /* Initial size, bytes */
957 static uint8_t *dbuffer = NULL;
958 
959 
960 /* ---------------- Mode-dependent variables -------------------*/
961 
962 #ifdef SUPPORT_PCRE2_8
963 static pcre2_code_8             *compiled_code8;
964 static pcre2_general_context_8  *general_context8, *general_context_copy8;
965 static pcre2_compile_context_8  *pat_context8, *default_pat_context8;
966 static pcre2_convert_context_8  *con_context8, *default_con_context8;
967 static pcre2_match_context_8    *dat_context8, *default_dat_context8;
968 static pcre2_match_data_8       *match_data8;
969 #endif
970 
971 #ifdef SUPPORT_PCRE2_16
972 static pcre2_code_16            *compiled_code16;
973 static pcre2_general_context_16 *general_context16, *general_context_copy16;
974 static pcre2_compile_context_16 *pat_context16, *default_pat_context16;
975 static pcre2_convert_context_16 *con_context16, *default_con_context16;
976 static pcre2_match_context_16   *dat_context16, *default_dat_context16;
977 static pcre2_match_data_16      *match_data16;
978 static PCRE2_SIZE pbuffer16_size = 0;   /* Set only when needed */
979 static uint16_t *pbuffer16 = NULL;
980 #endif
981 
982 #ifdef SUPPORT_PCRE2_32
983 static pcre2_code_32            *compiled_code32;
984 static pcre2_general_context_32 *general_context32, *general_context_copy32;
985 static pcre2_compile_context_32 *pat_context32, *default_pat_context32;
986 static pcre2_convert_context_32 *con_context32, *default_con_context32;
987 static pcre2_match_context_32   *dat_context32, *default_dat_context32;
988 static pcre2_match_data_32      *match_data32;
989 static PCRE2_SIZE pbuffer32_size = 0;   /* Set only when needed */
990 static uint32_t *pbuffer32 = NULL;
991 #endif
992 
993 
994 /* ---------------- Macros that work in all modes ----------------- */
995 
996 #define CAST8VAR(x) CASTVAR(uint8_t *, x)
997 #define SET(x,y) SETOP(x,y,=)
998 #define SETPLUS(x,y) SETOP(x,y,+=)
999 #define strlen8(x) strlen((char *)x)
1000 
1001 
1002 /* ---------------- Mode-dependent, runtime-testing macros ------------------*/
1003 
1004 /* Define macros for variables and functions that must be selected dynamically
1005 depending on the mode setting (8, 16, 32). These are dependent on which modes
1006 are supported. */
1007 
1008 #if (defined (SUPPORT_PCRE2_8) + defined (SUPPORT_PCRE2_16) + \
1009      defined (SUPPORT_PCRE2_32)) >= 2
1010 
1011 /* ----- All three modes supported ----- */
1012 
1013 #if defined(SUPPORT_PCRE2_8) && defined(SUPPORT_PCRE2_16) && defined(SUPPORT_PCRE2_32)
1014 
1015 #define CASTFLD(t,a,b) ((test_mode == PCRE8_MODE)? (t)(G(a,8)->b) : \
1016   (test_mode == PCRE16_MODE)? (t)(G(a,16)->b) : (t)(G(a,32)->b))
1017 
1018 #define CASTVAR(t,x) ( \
1019   (test_mode == PCRE8_MODE)? (t)G(x,8) : \
1020   (test_mode == PCRE16_MODE)? (t)G(x,16) : (t)G(x,32))
1021 
1022 #define CODE_UNIT(a,b) ( \
1023   (test_mode == PCRE8_MODE)? (uint32_t)(((PCRE2_SPTR8)(a))[b]) : \
1024   (test_mode == PCRE16_MODE)? (uint32_t)(((PCRE2_SPTR16)(a))[b]) : \
1025   (uint32_t)(((PCRE2_SPTR32)(a))[b]))
1026 
1027 #define CONCTXCPY(a,b) \
1028   if (test_mode == PCRE8_MODE) \
1029     memcpy(G(a,8),G(b,8),sizeof(pcre2_convert_context_8)); \
1030   else if (test_mode == PCRE16_MODE) \
1031     memcpy(G(a,16),G(b,16),sizeof(pcre2_convert_context_16)); \
1032   else memcpy(G(a,32),G(b,32),sizeof(pcre2_convert_context_32))
1033 
1034 #define CONVERT_COPY(a,b,c) \
1035   if (test_mode == PCRE8_MODE) \
1036     memcpy(G(a,8),(char *)b,c); \
1037   else if (test_mode == PCRE16_MODE) \
1038     memcpy(G(a,16),(char *)b,(c)*2); \
1039   else if (test_mode == PCRE32_MODE) \
1040     memcpy(G(a,32),(char *)b,(c)*4)
1041 
1042 #define DATCTXCPY(a,b) \
1043   if (test_mode == PCRE8_MODE) \
1044     memcpy(G(a,8),G(b,8),sizeof(pcre2_match_context_8)); \
1045   else if (test_mode == PCRE16_MODE) \
1046     memcpy(G(a,16),G(b,16),sizeof(pcre2_match_context_16)); \
1047   else memcpy(G(a,32),G(b,32),sizeof(pcre2_match_context_32))
1048 
1049 #define FLD(a,b) ((test_mode == PCRE8_MODE)? G(a,8)->b : \
1050   (test_mode == PCRE16_MODE)? G(a,16)->b : G(a,32)->b)
1051 
1052 #define PATCTXCPY(a,b) \
1053   if (test_mode == PCRE8_MODE) \
1054     memcpy(G(a,8),G(b,8),sizeof(pcre2_compile_context_8)); \
1055   else if (test_mode == PCRE16_MODE) \
1056     memcpy(G(a,16),G(b,16),sizeof(pcre2_compile_context_16)); \
1057   else memcpy(G(a,32),G(b,32),sizeof(pcre2_compile_context_32))
1058 
1059 #define PCHARS(lv, p, offset, len, utf, f) \
1060   if (test_mode == PCRE32_MODE) \
1061     lv = pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f); \
1062   else if (test_mode == PCRE16_MODE) \
1063     lv = pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f); \
1064   else \
1065     lv = pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
1066 
1067 #define PCHARSV(p, offset, len, utf, f) \
1068   if (test_mode == PCRE32_MODE) \
1069     (void)pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f); \
1070   else if (test_mode == PCRE16_MODE) \
1071     (void)pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f); \
1072   else \
1073     (void)pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
1074 
1075 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
1076   if (test_mode == PCRE8_MODE) \
1077      a = pcre2_callout_enumerate_8(compiled_code8, \
1078        (int (*)(struct pcre2_callout_enumerate_block_8 *, void *))b,c); \
1079   else if (test_mode == PCRE16_MODE) \
1080      a = pcre2_callout_enumerate_16(compiled_code16, \
1081        (int(*)(struct pcre2_callout_enumerate_block_16 *, void *))b,c); \
1082   else \
1083      a = pcre2_callout_enumerate_32(compiled_code32, \
1084        (int (*)(struct pcre2_callout_enumerate_block_32 *, void *))b,c)
1085 
1086 #define PCRE2_CODE_COPY_FROM_VOID(a,b) \
1087   if (test_mode == PCRE8_MODE) \
1088     G(a,8) = pcre2_code_copy_8(b); \
1089   else if (test_mode == PCRE16_MODE) \
1090     G(a,16) = pcre2_code_copy_16(b); \
1091   else \
1092     G(a,32) = pcre2_code_copy_32(b)
1093 
1094 #define PCRE2_CODE_COPY_TO_VOID(a,b) \
1095   if (test_mode == PCRE8_MODE) \
1096     a = (void *)pcre2_code_copy_8(G(b,8)); \
1097   else if (test_mode == PCRE16_MODE) \
1098     a = (void *)pcre2_code_copy_16(G(b,16)); \
1099   else \
1100     a = (void *)pcre2_code_copy_32(G(b,32))
1101 
1102 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) \
1103   if (test_mode == PCRE8_MODE) \
1104     a = (void *)pcre2_code_copy_with_tables_8(G(b,8)); \
1105   else if (test_mode == PCRE16_MODE) \
1106     a = (void *)pcre2_code_copy_with_tables_16(G(b,16)); \
1107   else \
1108     a = (void *)pcre2_code_copy_with_tables_32(G(b,32))
1109 
1110 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
1111   if (test_mode == PCRE8_MODE) \
1112     G(a,8) = pcre2_compile_8(G(b,8),c,d,e,f,g); \
1113   else if (test_mode == PCRE16_MODE) \
1114     G(a,16) = pcre2_compile_16(G(b,16),c,d,e,f,g); \
1115   else \
1116     G(a,32) = pcre2_compile_32(G(b,32),c,d,e,f,g)
1117 
1118 #define PCRE2_CONVERTED_PATTERN_FREE(a) \
1119   if (test_mode == PCRE8_MODE) pcre2_converted_pattern_free_8((PCRE2_UCHAR8 *)a); \
1120   else if (test_mode == PCRE16_MODE) pcre2_converted_pattern_free_16((PCRE2_UCHAR16 *)a); \
1121   else pcre2_converted_pattern_free_32((PCRE2_UCHAR32 *)a)
1122 
1123 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
1124   if (test_mode == PCRE8_MODE) \
1125     a = pcre2_dfa_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h,i,j); \
1126   else if (test_mode == PCRE16_MODE) \
1127     a = pcre2_dfa_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h,i,j); \
1128   else \
1129     a = pcre2_dfa_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h,i,j)
1130 
1131 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
1132   if (test_mode == PCRE8_MODE) \
1133     r = pcre2_get_error_message_8(a,G(b,8),G(G(b,8),_size)); \
1134   else if (test_mode == PCRE16_MODE) \
1135     r = pcre2_get_error_message_16(a,G(b,16),G(G(b,16),_size/2)); \
1136   else \
1137     r = pcre2_get_error_message_32(a,G(b,32),G(G(b,32),_size/4))
1138 
1139 #define PCRE2_GET_OVECTOR_COUNT(a,b) \
1140   if (test_mode == PCRE8_MODE) \
1141     a = pcre2_get_ovector_count_8(G(b,8)); \
1142   else if (test_mode == PCRE16_MODE) \
1143     a = pcre2_get_ovector_count_16(G(b,16)); \
1144   else \
1145     a = pcre2_get_ovector_count_32(G(b,32))
1146 
1147 #define PCRE2_GET_STARTCHAR(a,b) \
1148   if (test_mode == PCRE8_MODE) \
1149     a = pcre2_get_startchar_8(G(b,8)); \
1150   else if (test_mode == PCRE16_MODE) \
1151     a = pcre2_get_startchar_16(G(b,16)); \
1152   else \
1153     a = pcre2_get_startchar_32(G(b,32))
1154 
1155 #define PCRE2_JIT_COMPILE(r,a,b) \
1156   if (test_mode == PCRE8_MODE) r = pcre2_jit_compile_8(G(a,8),b); \
1157   else if (test_mode == PCRE16_MODE) r = pcre2_jit_compile_16(G(a,16),b); \
1158   else r = pcre2_jit_compile_32(G(a,32),b)
1159 
1160 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) \
1161   if (test_mode == PCRE8_MODE) pcre2_jit_free_unused_memory_8(G(a,8)); \
1162   else if (test_mode == PCRE16_MODE) pcre2_jit_free_unused_memory_16(G(a,16)); \
1163   else pcre2_jit_free_unused_memory_32(G(a,32))
1164 
1165 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
1166   if (test_mode == PCRE8_MODE) \
1167     a = pcre2_jit_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h); \
1168   else if (test_mode == PCRE16_MODE) \
1169     a = pcre2_jit_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h); \
1170   else \
1171     a = pcre2_jit_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
1172 
1173 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
1174   if (test_mode == PCRE8_MODE) \
1175     a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_8(b,c,d); \
1176   else if (test_mode == PCRE16_MODE) \
1177     a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_16(b,c,d); \
1178   else \
1179     a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_32(b,c,d);
1180 
1181 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
1182   if (test_mode == PCRE8_MODE) \
1183     pcre2_jit_stack_assign_8(G(a,8),(pcre2_jit_callback_8)b,c); \
1184   else if (test_mode == PCRE16_MODE) \
1185     pcre2_jit_stack_assign_16(G(a,16),(pcre2_jit_callback_16)b,c); \
1186   else \
1187     pcre2_jit_stack_assign_32(G(a,32),(pcre2_jit_callback_32)b,c);
1188 
1189 #define PCRE2_JIT_STACK_FREE(a) \
1190   if (test_mode == PCRE8_MODE) \
1191     pcre2_jit_stack_free_8((pcre2_jit_stack_8 *)a); \
1192   else if (test_mode == PCRE16_MODE) \
1193     pcre2_jit_stack_free_16((pcre2_jit_stack_16 *)a); \
1194   else \
1195     pcre2_jit_stack_free_32((pcre2_jit_stack_32 *)a);
1196 
1197 #define PCRE2_MAKETABLES(a) \
1198   if (test_mode == PCRE8_MODE) a = pcre2_maketables_8(NULL); \
1199   else if (test_mode == PCRE16_MODE) a = pcre2_maketables_16(NULL); \
1200   else a = pcre2_maketables_32(NULL)
1201 
1202 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
1203   if (test_mode == PCRE8_MODE) \
1204     a = pcre2_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h); \
1205   else if (test_mode == PCRE16_MODE) \
1206     a = pcre2_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h); \
1207   else \
1208     a = pcre2_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
1209 
1210 #define PCRE2_MATCH_DATA_CREATE(a,b,c) \
1211   if (test_mode == PCRE8_MODE) \
1212     G(a,8) = pcre2_match_data_create_8(b,c); \
1213   else if (test_mode == PCRE16_MODE) \
1214     G(a,16) = pcre2_match_data_create_16(b,c); \
1215   else \
1216     G(a,32) = pcre2_match_data_create_32(b,c)
1217 
1218 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
1219   if (test_mode == PCRE8_MODE) \
1220     G(a,8) = pcre2_match_data_create_from_pattern_8(G(b,8),c); \
1221   else if (test_mode == PCRE16_MODE) \
1222     G(a,16) = pcre2_match_data_create_from_pattern_16(G(b,16),c); \
1223   else \
1224     G(a,32) = pcre2_match_data_create_from_pattern_32(G(b,32),c)
1225 
1226 #define PCRE2_MATCH_DATA_FREE(a) \
1227   if (test_mode == PCRE8_MODE) \
1228     pcre2_match_data_free_8(G(a,8)); \
1229   else if (test_mode == PCRE16_MODE) \
1230     pcre2_match_data_free_16(G(a,16)); \
1231   else \
1232     pcre2_match_data_free_32(G(a,32))
1233 
1234 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) \
1235   if (test_mode == PCRE8_MODE) \
1236     a = pcre2_pattern_convert_8(G(b,8),c,d,(PCRE2_UCHAR8 **)e,f,G(g,8)); \
1237   else if (test_mode == PCRE16_MODE) \
1238     a = pcre2_pattern_convert_16(G(b,16),c,d,(PCRE2_UCHAR16 **)e,f,G(g,16)); \
1239   else \
1240     a = pcre2_pattern_convert_32(G(b,32),c,d,(PCRE2_UCHAR32 **)e,f,G(g,32))
1241 
1242 #define PCRE2_PATTERN_INFO(a,b,c,d) \
1243   if (test_mode == PCRE8_MODE) \
1244     a = pcre2_pattern_info_8(G(b,8),c,d); \
1245   else if (test_mode == PCRE16_MODE) \
1246     a = pcre2_pattern_info_16(G(b,16),c,d); \
1247   else \
1248     a = pcre2_pattern_info_32(G(b,32),c,d)
1249 
1250 #define PCRE2_PRINTINT(a) \
1251   if (test_mode == PCRE8_MODE) \
1252     pcre2_printint_8(compiled_code8,outfile,a); \
1253   else if (test_mode == PCRE16_MODE) \
1254     pcre2_printint_16(compiled_code16,outfile,a); \
1255   else \
1256     pcre2_printint_32(compiled_code32,outfile,a)
1257 
1258 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
1259   if (test_mode == PCRE8_MODE) \
1260     r = pcre2_serialize_decode_8((pcre2_code_8 **)a,b,c,G(d,8)); \
1261   else if (test_mode == PCRE16_MODE) \
1262     r = pcre2_serialize_decode_16((pcre2_code_16 **)a,b,c,G(d,16)); \
1263   else \
1264     r = pcre2_serialize_decode_32((pcre2_code_32 **)a,b,c,G(d,32))
1265 
1266 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
1267   if (test_mode == PCRE8_MODE) \
1268     r = pcre2_serialize_encode_8((const pcre2_code_8 **)a,b,c,d,G(e,8)); \
1269   else if (test_mode == PCRE16_MODE) \
1270     r = pcre2_serialize_encode_16((const pcre2_code_16 **)a,b,c,d,G(e,16)); \
1271   else \
1272     r = pcre2_serialize_encode_32((const pcre2_code_32 **)a,b,c,d,G(e,32))
1273 
1274 #define PCRE2_SERIALIZE_FREE(a) \
1275   if (test_mode == PCRE8_MODE) \
1276     pcre2_serialize_free_8(a); \
1277   else if (test_mode == PCRE16_MODE) \
1278     pcre2_serialize_free_16(a); \
1279   else \
1280     pcre2_serialize_free_32(a)
1281 
1282 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
1283   if (test_mode == PCRE8_MODE) \
1284     r = pcre2_serialize_get_number_of_codes_8(a); \
1285   else if (test_mode == PCRE16_MODE) \
1286     r = pcre2_serialize_get_number_of_codes_16(a); \
1287   else \
1288     r = pcre2_serialize_get_number_of_codes_32(a); \
1289 
1290 #define PCRE2_SET_CALLOUT(a,b,c) \
1291   if (test_mode == PCRE8_MODE) \
1292     pcre2_set_callout_8(G(a,8),(int (*)(pcre2_callout_block_8 *, void *))b,c); \
1293   else if (test_mode == PCRE16_MODE) \
1294     pcre2_set_callout_16(G(a,16),(int (*)(pcre2_callout_block_16 *, void *))b,c); \
1295   else \
1296     pcre2_set_callout_32(G(a,32),(int (*)(pcre2_callout_block_32 *, void *))b,c);
1297 
1298 #define PCRE2_SET_CHARACTER_TABLES(a,b) \
1299   if (test_mode == PCRE8_MODE) \
1300     pcre2_set_character_tables_8(G(a,8),b); \
1301   else if (test_mode == PCRE16_MODE) \
1302     pcre2_set_character_tables_16(G(a,16),b); \
1303   else \
1304     pcre2_set_character_tables_32(G(a,32),b)
1305 
1306 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
1307   if (test_mode == PCRE8_MODE) \
1308     pcre2_set_compile_recursion_guard_8(G(a,8),b,c); \
1309   else if (test_mode == PCRE16_MODE) \
1310     pcre2_set_compile_recursion_guard_16(G(a,16),b,c); \
1311   else \
1312     pcre2_set_compile_recursion_guard_32(G(a,32),b,c)
1313 
1314 #define PCRE2_SET_DEPTH_LIMIT(a,b) \
1315   if (test_mode == PCRE8_MODE) \
1316     pcre2_set_depth_limit_8(G(a,8),b); \
1317   else if (test_mode == PCRE16_MODE) \
1318     pcre2_set_depth_limit_16(G(a,16),b); \
1319   else \
1320     pcre2_set_depth_limit_32(G(a,32),b)
1321 
1322 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) \
1323   if (test_mode == PCRE8_MODE) \
1324     r = pcre2_set_glob_separator_8(G(a,8),b); \
1325   else if (test_mode == PCRE16_MODE) \
1326     r = pcre2_set_glob_separator_16(G(a,16),b); \
1327   else \
1328     r = pcre2_set_glob_separator_32(G(a,32),b)
1329 
1330 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) \
1331   if (test_mode == PCRE8_MODE) \
1332     r = pcre2_set_glob_escape_8(G(a,8),b); \
1333   else if (test_mode == PCRE16_MODE) \
1334     r = pcre2_set_glob_escape_16(G(a,16),b); \
1335   else \
1336     r = pcre2_set_glob_escape_32(G(a,32),b)
1337 
1338 #define PCRE2_SET_HEAP_LIMIT(a,b) \
1339   if (test_mode == PCRE8_MODE) \
1340     pcre2_set_heap_limit_8(G(a,8),b); \
1341   else if (test_mode == PCRE16_MODE) \
1342     pcre2_set_heap_limit_16(G(a,16),b); \
1343   else \
1344     pcre2_set_heap_limit_32(G(a,32),b)
1345 
1346 #define PCRE2_SET_MATCH_LIMIT(a,b) \
1347   if (test_mode == PCRE8_MODE) \
1348     pcre2_set_match_limit_8(G(a,8),b); \
1349   else if (test_mode == PCRE16_MODE) \
1350     pcre2_set_match_limit_16(G(a,16),b); \
1351   else \
1352     pcre2_set_match_limit_32(G(a,32),b)
1353 
1354 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) \
1355   if (test_mode == PCRE8_MODE) \
1356     pcre2_set_max_pattern_length_8(G(a,8),b); \
1357   else if (test_mode == PCRE16_MODE) \
1358     pcre2_set_max_pattern_length_16(G(a,16),b); \
1359   else \
1360     pcre2_set_max_pattern_length_32(G(a,32),b)
1361 
1362 #define PCRE2_SET_OFFSET_LIMIT(a,b) \
1363   if (test_mode == PCRE8_MODE) \
1364     pcre2_set_offset_limit_8(G(a,8),b); \
1365   else if (test_mode == PCRE16_MODE) \
1366     pcre2_set_offset_limit_16(G(a,16),b); \
1367   else \
1368     pcre2_set_offset_limit_32(G(a,32),b)
1369 
1370 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) \
1371   if (test_mode == PCRE8_MODE) \
1372     pcre2_set_parens_nest_limit_8(G(a,8),b); \
1373   else if (test_mode == PCRE16_MODE) \
1374     pcre2_set_parens_nest_limit_16(G(a,16),b); \
1375   else \
1376     pcre2_set_parens_nest_limit_32(G(a,32),b)
1377 
1378 #define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
1379   if (test_mode == PCRE8_MODE) \
1380     pcre2_set_substitute_callout_8(G(a,8), \
1381       (int (*)(pcre2_substitute_callout_block_8 *, void *))b,c); \
1382   else if (test_mode == PCRE16_MODE) \
1383     pcre2_set_substitute_callout_16(G(a,16), \
1384       (int (*)(pcre2_substitute_callout_block_16 *, void *))b,c); \
1385   else \
1386     pcre2_set_substitute_callout_32(G(a,32), \
1387       (int (*)(pcre2_substitute_callout_block_32 *, void *))b,c)
1388 
1389 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
1390   if (test_mode == PCRE8_MODE) \
1391     a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h, \
1392       (PCRE2_SPTR8)i,j,(PCRE2_UCHAR8 *)k,l); \
1393   else if (test_mode == PCRE16_MODE) \
1394     a = pcre2_substitute_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h, \
1395       (PCRE2_SPTR16)i,j,(PCRE2_UCHAR16 *)k,l); \
1396   else \
1397     a = pcre2_substitute_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h, \
1398       (PCRE2_SPTR32)i,j,(PCRE2_UCHAR32 *)k,l)
1399 
1400 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
1401   if (test_mode == PCRE8_MODE) \
1402     a = pcre2_substring_copy_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 *)d,e); \
1403   else if (test_mode == PCRE16_MODE) \
1404     a = pcre2_substring_copy_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 *)d,e); \
1405   else \
1406     a = pcre2_substring_copy_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 *)d,e)
1407 
1408 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
1409   if (test_mode == PCRE8_MODE) \
1410     a = pcre2_substring_copy_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 *)d,e); \
1411   else if (test_mode == PCRE16_MODE) \
1412     a = pcre2_substring_copy_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 *)d,e); \
1413   else \
1414     a = pcre2_substring_copy_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 *)d,e)
1415 
1416 #define PCRE2_SUBSTRING_FREE(a) \
1417   if (test_mode == PCRE8_MODE) pcre2_substring_free_8((PCRE2_UCHAR8 *)a); \
1418   else if (test_mode == PCRE16_MODE) \
1419     pcre2_substring_free_16((PCRE2_UCHAR16 *)a); \
1420   else pcre2_substring_free_32((PCRE2_UCHAR32 *)a)
1421 
1422 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
1423   if (test_mode == PCRE8_MODE) \
1424     a = pcre2_substring_get_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 **)d,e); \
1425   else if (test_mode == PCRE16_MODE) \
1426     a = pcre2_substring_get_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 **)d,e); \
1427   else \
1428     a = pcre2_substring_get_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 **)d,e)
1429 
1430 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
1431   if (test_mode == PCRE8_MODE) \
1432     a = pcre2_substring_get_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 **)d,e); \
1433   else if (test_mode == PCRE16_MODE) \
1434     a = pcre2_substring_get_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 **)d,e); \
1435   else \
1436     a = pcre2_substring_get_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 **)d,e)
1437 
1438 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
1439   if (test_mode == PCRE8_MODE) \
1440     a = pcre2_substring_length_byname_8(G(b,8),G(c,8),d); \
1441   else if (test_mode == PCRE16_MODE) \
1442     a = pcre2_substring_length_byname_16(G(b,16),G(c,16),d); \
1443   else \
1444     a = pcre2_substring_length_byname_32(G(b,32),G(c,32),d)
1445 
1446 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
1447   if (test_mode == PCRE8_MODE) \
1448     a = pcre2_substring_length_bynumber_8(G(b,8),c,d); \
1449   else if (test_mode == PCRE16_MODE) \
1450     a = pcre2_substring_length_bynumber_16(G(b,16),c,d); \
1451   else \
1452     a = pcre2_substring_length_bynumber_32(G(b,32),c,d)
1453 
1454 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
1455   if (test_mode == PCRE8_MODE) \
1456     a = pcre2_substring_list_get_8(G(b,8),(PCRE2_UCHAR8 ***)c,d); \
1457   else if (test_mode == PCRE16_MODE) \
1458     a = pcre2_substring_list_get_16(G(b,16),(PCRE2_UCHAR16 ***)c,d); \
1459   else \
1460     a = pcre2_substring_list_get_32(G(b,32),(PCRE2_UCHAR32 ***)c,d)
1461 
1462 #define PCRE2_SUBSTRING_LIST_FREE(a) \
1463   if (test_mode == PCRE8_MODE) \
1464     pcre2_substring_list_free_8((PCRE2_SPTR8 *)a); \
1465   else if (test_mode == PCRE16_MODE) \
1466     pcre2_substring_list_free_16((PCRE2_SPTR16 *)a); \
1467   else \
1468     pcre2_substring_list_free_32((PCRE2_SPTR32 *)a)
1469 
1470 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
1471   if (test_mode == PCRE8_MODE) \
1472     a = pcre2_substring_number_from_name_8(G(b,8),G(c,8)); \
1473   else if (test_mode == PCRE16_MODE) \
1474     a = pcre2_substring_number_from_name_16(G(b,16),G(c,16)); \
1475   else \
1476     a = pcre2_substring_number_from_name_32(G(b,32),G(c,32))
1477 
1478 #define PTR(x) ( \
1479   (test_mode == PCRE8_MODE)? (void *)G(x,8) : \
1480   (test_mode == PCRE16_MODE)? (void *)G(x,16) : \
1481   (void *)G(x,32))
1482 
1483 #define SETFLD(x,y,z) \
1484   if (test_mode == PCRE8_MODE) G(x,8)->y = z; \
1485   else if (test_mode == PCRE16_MODE) G(x,16)->y = z; \
1486   else G(x,32)->y = z
1487 
1488 #define SETFLDVEC(x,y,v,z) \
1489   if (test_mode == PCRE8_MODE) G(x,8)->y[v] = z; \
1490   else if (test_mode == PCRE16_MODE) G(x,16)->y[v] = z; \
1491   else G(x,32)->y[v] = z
1492 
1493 #define SETOP(x,y,z) \
1494   if (test_mode == PCRE8_MODE) G(x,8) z y; \
1495   else if (test_mode == PCRE16_MODE) G(x,16) z y; \
1496   else G(x,32) z y
1497 
1498 #define SETCASTPTR(x,y) \
1499   if (test_mode == PCRE8_MODE) \
1500     G(x,8) = (uint8_t *)(y); \
1501   else if (test_mode == PCRE16_MODE) \
1502     G(x,16) = (uint16_t *)(y); \
1503   else \
1504     G(x,32) = (uint32_t *)(y)
1505 
1506 #define STRLEN(p) ((test_mode == PCRE8_MODE)? ((int)strlen((char *)p)) : \
1507   (test_mode == PCRE16_MODE)? ((int)strlen16((PCRE2_SPTR16)p)) : \
1508   ((int)strlen32((PCRE2_SPTR32)p)))
1509 
1510 #define SUB1(a,b) \
1511   if (test_mode == PCRE8_MODE) G(a,8)(G(b,8)); \
1512   else if (test_mode == PCRE16_MODE) G(a,16)(G(b,16)); \
1513   else G(a,32)(G(b,32))
1514 
1515 #define SUB2(a,b,c) \
1516   if (test_mode == PCRE8_MODE) G(a,8)(G(b,8),G(c,8)); \
1517   else if (test_mode == PCRE16_MODE) G(a,16)(G(b,16),G(c,16)); \
1518   else G(a,32)(G(b,32),G(c,32))
1519 
1520 #define TEST(x,r,y) ( \
1521   (test_mode == PCRE8_MODE && G(x,8) r (y)) || \
1522   (test_mode == PCRE16_MODE && G(x,16) r (y)) || \
1523   (test_mode == PCRE32_MODE && G(x,32) r (y)))
1524 
1525 #define TESTFLD(x,f,r,y) ( \
1526   (test_mode == PCRE8_MODE && G(x,8)->f r (y)) || \
1527   (test_mode == PCRE16_MODE && G(x,16)->f r (y)) || \
1528   (test_mode == PCRE32_MODE && G(x,32)->f r (y)))
1529 
1530 
1531 /* ----- Two out of three modes are supported ----- */
1532 
1533 #else
1534 
1535 /* We can use some macro trickery to make a single set of definitions work in
1536 the three different cases. */
1537 
1538 /* ----- 32-bit and 16-bit but not 8-bit supported ----- */
1539 
1540 #if defined(SUPPORT_PCRE2_32) && defined(SUPPORT_PCRE2_16)
1541 #define BITONE 32
1542 #define BITTWO 16
1543 
1544 /* ----- 32-bit and 8-bit but not 16-bit supported ----- */
1545 
1546 #elif defined(SUPPORT_PCRE2_32) && defined(SUPPORT_PCRE2_8)
1547 #define BITONE 32
1548 #define BITTWO 8
1549 
1550 /* ----- 16-bit and 8-bit but not 32-bit supported ----- */
1551 
1552 #else
1553 #define BITONE 16
1554 #define BITTWO 8
1555 #endif
1556 
1557 
1558 /* ----- Common macros for two-mode cases ----- */
1559 
1560 #define BYTEONE (BITONE/8)
1561 #define BYTETWO (BITTWO/8)
1562 
1563 #define CASTFLD(t,a,b) \
1564   ((test_mode == G(G(PCRE,BITONE),_MODE))? (t)(G(a,BITONE)->b) : \
1565     (t)(G(a,BITTWO)->b))
1566 
1567 #define CASTVAR(t,x) ( \
1568   (test_mode == G(G(PCRE,BITONE),_MODE))? \
1569     (t)G(x,BITONE) : (t)G(x,BITTWO))
1570 
1571 #define CODE_UNIT(a,b) ( \
1572   (test_mode == G(G(PCRE,BITONE),_MODE))? \
1573   (uint32_t)(((G(PCRE2_SPTR,BITONE))(a))[b]) : \
1574   (uint32_t)(((G(PCRE2_SPTR,BITTWO))(a))[b]))
1575 
1576 #define CONCTXCPY(a,b) \
1577   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1578     memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_convert_context_,BITONE))); \
1579   else \
1580     memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_convert_context_,BITTWO)))
1581 
1582 #define CONVERT_COPY(a,b,c) \
1583   (test_mode == G(G(PCRE,BITONE),_MODE))? \
1584   memcpy(G(a,BITONE),(char *)b,(c)*BYTEONE) : \
1585   memcpy(G(a,BITTWO),(char *)b,(c)*BYTETWO)
1586 
1587 #define DATCTXCPY(a,b) \
1588   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1589     memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_match_context_,BITONE))); \
1590   else \
1591     memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_match_context_,BITTWO)))
1592 
1593 #define FLD(a,b) \
1594   ((test_mode == G(G(PCRE,BITONE),_MODE))? G(a,BITONE)->b : G(a,BITTWO)->b)
1595 
1596 #define PATCTXCPY(a,b) \
1597   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1598     memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_compile_context_,BITONE))); \
1599   else \
1600     memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_compile_context_,BITTWO)))
1601 
1602 #define PCHARS(lv, p, offset, len, utf, f) \
1603   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1604     lv = G(pchars,BITONE)((G(PCRE2_SPTR,BITONE))(p)+offset, len, utf, f); \
1605   else \
1606     lv = G(pchars,BITTWO)((G(PCRE2_SPTR,BITTWO))(p)+offset, len, utf, f)
1607 
1608 #define PCHARSV(p, offset, len, utf, f) \
1609   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1610     (void)G(pchars,BITONE)((G(PCRE2_SPTR,BITONE))(p)+offset, len, utf, f); \
1611   else \
1612     (void)G(pchars,BITTWO)((G(PCRE2_SPTR,BITTWO))(p)+offset, len, utf, f)
1613 
1614 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
1615   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1616      a = G(pcre2_callout_enumerate,BITONE)(G(compiled_code,BITONE), \
1617        (int (*)(struct G(pcre2_callout_enumerate_block_,BITONE) *, void *))b,c); \
1618   else \
1619      a = G(pcre2_callout_enumerate,BITTWO)(G(compiled_code,BITTWO), \
1620        (int (*)(struct G(pcre2_callout_enumerate_block_,BITTWO) *, void *))b,c)
1621 
1622 #define PCRE2_CODE_COPY_FROM_VOID(a,b) \
1623   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1624     G(a,BITONE) = G(pcre2_code_copy_,BITONE)(b); \
1625   else \
1626     G(a,BITTWO) = G(pcre2_code_copy_,BITTWO)(b)
1627 
1628 #define PCRE2_CODE_COPY_TO_VOID(a,b) \
1629   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1630     a = (void *)G(pcre2_code_copy_,BITONE)(G(b,BITONE)); \
1631   else \
1632     a = (void *)G(pcre2_code_copy_,BITTWO)(G(b,BITTWO))
1633 
1634 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) \
1635   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1636     a = (void *)G(pcre2_code_copy_with_tables_,BITONE)(G(b,BITONE)); \
1637   else \
1638     a = (void *)G(pcre2_code_copy_with_tables_,BITTWO)(G(b,BITTWO))
1639 
1640 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
1641   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1642     G(a,BITONE) = G(pcre2_compile_,BITONE)(G(b,BITONE),c,d,e,f,g); \
1643   else \
1644     G(a,BITTWO) = G(pcre2_compile_,BITTWO)(G(b,BITTWO),c,d,e,f,g)
1645 
1646 #define PCRE2_CONVERTED_PATTERN_FREE(a) \
1647   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1648     G(pcre2_converted_pattern_free_,BITONE)((G(PCRE2_UCHAR,BITONE) *)a); \
1649   else \
1650     G(pcre2_converted_pattern_free_,BITTWO)((G(PCRE2_UCHAR,BITTWO) *)a)
1651 
1652 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
1653   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1654     a = G(pcre2_dfa_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1655       G(g,BITONE),h,i,j); \
1656   else \
1657     a = G(pcre2_dfa_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1658       G(g,BITTWO),h,i,j)
1659 
1660 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
1661   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1662     r = G(pcre2_get_error_message_,BITONE)(a,G(b,BITONE),G(G(b,BITONE),_size/BYTEONE)); \
1663   else \
1664     r = G(pcre2_get_error_message_,BITTWO)(a,G(b,BITTWO),G(G(b,BITTWO),_size/BYTETWO))
1665 
1666 #define PCRE2_GET_OVECTOR_COUNT(a,b) \
1667   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1668     a = G(pcre2_get_ovector_count_,BITONE)(G(b,BITONE)); \
1669   else \
1670     a = G(pcre2_get_ovector_count_,BITTWO)(G(b,BITTWO))
1671 
1672 #define PCRE2_GET_STARTCHAR(a,b) \
1673   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1674     a = G(pcre2_get_startchar_,BITONE)(G(b,BITONE)); \
1675   else \
1676     a = G(pcre2_get_startchar_,BITTWO)(G(b,BITTWO))
1677 
1678 #define PCRE2_JIT_COMPILE(r,a,b) \
1679   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1680     r = G(pcre2_jit_compile_,BITONE)(G(a,BITONE),b); \
1681   else \
1682     r = G(pcre2_jit_compile_,BITTWO)(G(a,BITTWO),b)
1683 
1684 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) \
1685   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1686     G(pcre2_jit_free_unused_memory_,BITONE)(G(a,BITONE)); \
1687   else \
1688     G(pcre2_jit_free_unused_memory_,BITTWO)(G(a,BITTWO))
1689 
1690 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
1691   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1692     a = G(pcre2_jit_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1693       G(g,BITONE),h); \
1694   else \
1695     a = G(pcre2_jit_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1696       G(g,BITTWO),h)
1697 
1698 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
1699   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1700     a = (PCRE2_JIT_STACK *)G(pcre2_jit_stack_create_,BITONE)(b,c,d); \
1701   else \
1702     a = (PCRE2_JIT_STACK *)G(pcre2_jit_stack_create_,BITTWO)(b,c,d); \
1703 
1704 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
1705   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1706     G(pcre2_jit_stack_assign_,BITONE)(G(a,BITONE),(G(pcre2_jit_callback_,BITONE))b,c); \
1707   else \
1708     G(pcre2_jit_stack_assign_,BITTWO)(G(a,BITTWO),(G(pcre2_jit_callback_,BITTWO))b,c);
1709 
1710 #define PCRE2_JIT_STACK_FREE(a) \
1711   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1712     G(pcre2_jit_stack_free_,BITONE)((G(pcre2_jit_stack_,BITONE) *)a); \
1713   else \
1714     G(pcre2_jit_stack_free_,BITTWO)((G(pcre2_jit_stack_,BITTWO) *)a);
1715 
1716 #define PCRE2_MAKETABLES(a) \
1717   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1718     a = G(pcre2_maketables_,BITONE)(NULL); \
1719   else \
1720     a = G(pcre2_maketables_,BITTWO)(NULL)
1721 
1722 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
1723   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1724     a = G(pcre2_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1725       G(g,BITONE),h); \
1726   else \
1727     a = G(pcre2_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1728       G(g,BITTWO),h)
1729 
1730 #define PCRE2_MATCH_DATA_CREATE(a,b,c) \
1731   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1732     G(a,BITONE) = G(pcre2_match_data_create_,BITONE)(b,c); \
1733   else \
1734     G(a,BITTWO) = G(pcre2_match_data_create_,BITTWO)(b,c)
1735 
1736 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
1737   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1738     G(a,BITONE) = G(pcre2_match_data_create_from_pattern_,BITONE)(G(b,BITONE),c); \
1739   else \
1740     G(a,BITTWO) = G(pcre2_match_data_create_from_pattern_,BITTWO)(G(b,BITTWO),c)
1741 
1742 #define PCRE2_MATCH_DATA_FREE(a) \
1743   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1744     G(pcre2_match_data_free_,BITONE)(G(a,BITONE)); \
1745   else \
1746     G(pcre2_match_data_free_,BITTWO)(G(a,BITTWO))
1747 
1748 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) \
1749   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1750     a = G(pcre2_pattern_convert_,BITONE)(G(b,BITONE),c,d,(G(PCRE2_UCHAR,BITONE) **)e,f,G(g,BITONE)); \
1751   else \
1752     a = G(pcre2_pattern_convert_,BITTWO)(G(b,BITTWO),c,d,(G(PCRE2_UCHAR,BITTWO) **)e,f,G(g,BITTWO))
1753 
1754 #define PCRE2_PATTERN_INFO(a,b,c,d) \
1755   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1756     a = G(pcre2_pattern_info_,BITONE)(G(b,BITONE),c,d); \
1757   else \
1758     a = G(pcre2_pattern_info_,BITTWO)(G(b,BITTWO),c,d)
1759 
1760 #define PCRE2_PRINTINT(a) \
1761  if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1762     G(pcre2_printint_,BITONE)(G(compiled_code,BITONE),outfile,a); \
1763   else \
1764     G(pcre2_printint_,BITTWO)(G(compiled_code,BITTWO),outfile,a)
1765 
1766 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
1767  if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1768     r = G(pcre2_serialize_decode_,BITONE)((G(pcre2_code_,BITONE) **)a,b,c,G(d,BITONE)); \
1769   else \
1770     r = G(pcre2_serialize_decode_,BITTWO)((G(pcre2_code_,BITTWO) **)a,b,c,G(d,BITTWO))
1771 
1772 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
1773  if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1774     r = G(pcre2_serialize_encode_,BITONE)((G(const pcre2_code_,BITONE) **)a,b,c,d,G(e,BITONE)); \
1775   else \
1776     r = G(pcre2_serialize_encode_,BITTWO)((G(const pcre2_code_,BITTWO) **)a,b,c,d,G(e,BITTWO))
1777 
1778 #define PCRE2_SERIALIZE_FREE(a) \
1779  if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1780     G(pcre2_serialize_free_,BITONE)(a); \
1781   else \
1782     G(pcre2_serialize_free_,BITTWO)(a)
1783 
1784 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
1785  if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1786     r = G(pcre2_serialize_get_number_of_codes_,BITONE)(a); \
1787   else \
1788     r = G(pcre2_serialize_get_number_of_codes_,BITTWO)(a)
1789 
1790 #define PCRE2_SET_CALLOUT(a,b,c) \
1791   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1792     G(pcre2_set_callout_,BITONE)(G(a,BITONE), \
1793       (int (*)(G(pcre2_callout_block_,BITONE) *, void *))b,c); \
1794   else \
1795     G(pcre2_set_callout_,BITTWO)(G(a,BITTWO), \
1796       (int (*)(G(pcre2_callout_block_,BITTWO) *, void *))b,c);
1797 
1798 #define PCRE2_SET_CHARACTER_TABLES(a,b) \
1799   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1800     G(pcre2_set_character_tables_,BITONE)(G(a,BITONE),b); \
1801   else \
1802     G(pcre2_set_character_tables_,BITTWO)(G(a,BITTWO),b)
1803 
1804 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
1805   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1806     G(pcre2_set_compile_recursion_guard_,BITONE)(G(a,BITONE),b,c); \
1807   else \
1808     G(pcre2_set_compile_recursion_guard_,BITTWO)(G(a,BITTWO),b,c)
1809 
1810 #define PCRE2_SET_DEPTH_LIMIT(a,b) \
1811   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1812     G(pcre2_set_depth_limit_,BITONE)(G(a,BITONE),b); \
1813   else \
1814     G(pcre2_set_depth_limit_,BITTWO)(G(a,BITTWO),b)
1815 
1816 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) \
1817   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1818     r = G(pcre2_set_glob_escape_,BITONE)(G(a,BITONE),b); \
1819   else \
1820     r = G(pcre2_set_glob_escape_,BITTWO)(G(a,BITTWO),b)
1821 
1822 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) \
1823   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1824     r = G(pcre2_set_glob_separator_,BITONE)(G(a,BITONE),b); \
1825   else \
1826     r = G(pcre2_set_glob_separator_,BITTWO)(G(a,BITTWO),b)
1827 
1828 #define PCRE2_SET_HEAP_LIMIT(a,b) \
1829   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1830     G(pcre2_set_heap_limit_,BITONE)(G(a,BITONE),b); \
1831   else \
1832     G(pcre2_set_heap_limit_,BITTWO)(G(a,BITTWO),b)
1833 
1834 #define PCRE2_SET_MATCH_LIMIT(a,b) \
1835   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1836     G(pcre2_set_match_limit_,BITONE)(G(a,BITONE),b); \
1837   else \
1838     G(pcre2_set_match_limit_,BITTWO)(G(a,BITTWO),b)
1839 
1840 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) \
1841   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1842     G(pcre2_set_max_pattern_length_,BITONE)(G(a,BITONE),b); \
1843   else \
1844     G(pcre2_set_max_pattern_length_,BITTWO)(G(a,BITTWO),b)
1845 
1846 #define PCRE2_SET_OFFSET_LIMIT(a,b) \
1847   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1848     G(pcre2_set_offset_limit_,BITONE)(G(a,BITONE),b); \
1849   else \
1850     G(pcre2_set_offset_limit_,BITTWO)(G(a,BITTWO),b)
1851 
1852 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) \
1853   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1854     G(pcre2_set_parens_nest_limit_,BITONE)(G(a,BITONE),b); \
1855   else \
1856     G(pcre2_set_parens_nest_limit_,BITTWO)(G(a,BITTWO),b)
1857 
1858 #define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
1859   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1860     G(pcre2_set_substitute_callout_,BITONE)(G(a,BITONE), \
1861       (int (*)(G(pcre2_substitute_callout_block_,BITONE) *, void *))b,c); \
1862   else \
1863     G(pcre2_set_substitute_callout_,BITTWO)(G(a,BITTWO), \
1864       (int (*)(G(pcre2_substitute_callout_block_,BITTWO) *, void *))b,c)
1865 
1866 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
1867   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1868     a = G(pcre2_substitute_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1869       G(g,BITONE),h,(G(PCRE2_SPTR,BITONE))i,j, \
1870       (G(PCRE2_UCHAR,BITONE) *)k,l); \
1871   else \
1872     a = G(pcre2_substitute_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1873       G(g,BITTWO),h,(G(PCRE2_SPTR,BITTWO))i,j, \
1874       (G(PCRE2_UCHAR,BITTWO) *)k,l)
1875 
1876 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
1877   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1878     a = G(pcre2_substring_copy_byname_,BITONE)(G(b,BITONE),G(c,BITONE),\
1879       (G(PCRE2_UCHAR,BITONE) *)d,e); \
1880   else \
1881     a = G(pcre2_substring_copy_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),\
1882       (G(PCRE2_UCHAR,BITTWO) *)d,e)
1883 
1884 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
1885   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1886     a = G(pcre2_substring_copy_bynumber_,BITONE)(G(b,BITONE),c,\
1887       (G(PCRE2_UCHAR,BITONE) *)d,e); \
1888   else \
1889     a = G(pcre2_substring_copy_bynumber_,BITTWO)(G(b,BITTWO),c,\
1890       (G(PCRE2_UCHAR,BITTWO) *)d,e)
1891 
1892 #define PCRE2_SUBSTRING_FREE(a) \
1893   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1894     G(pcre2_substring_free_,BITONE)((G(PCRE2_UCHAR,BITONE) *)a); \
1895   else G(pcre2_substring_free_,BITTWO)((G(PCRE2_UCHAR,BITTWO) *)a)
1896 
1897 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
1898   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1899     a = G(pcre2_substring_get_byname_,BITONE)(G(b,BITONE),G(c,BITONE),\
1900       (G(PCRE2_UCHAR,BITONE) **)d,e); \
1901   else \
1902     a = G(pcre2_substring_get_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),\
1903       (G(PCRE2_UCHAR,BITTWO) **)d,e)
1904 
1905 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
1906   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1907     a = G(pcre2_substring_get_bynumber_,BITONE)(G(b,BITONE),c,\
1908       (G(PCRE2_UCHAR,BITONE) **)d,e); \
1909   else \
1910     a = G(pcre2_substring_get_bynumber_,BITTWO)(G(b,BITTWO),c,\
1911       (G(PCRE2_UCHAR,BITTWO) **)d,e)
1912 
1913 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
1914   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1915     a = G(pcre2_substring_length_byname_,BITONE)(G(b,BITONE),G(c,BITONE),d); \
1916   else \
1917     a = G(pcre2_substring_length_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),d)
1918 
1919 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
1920   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1921     a = G(pcre2_substring_length_bynumber_,BITONE)(G(b,BITONE),c,d); \
1922   else \
1923     a = G(pcre2_substring_length_bynumber_,BITTWO)(G(b,BITTWO),c,d)
1924 
1925 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
1926   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1927     a = G(pcre2_substring_list_get_,BITONE)(G(b,BITONE), \
1928       (G(PCRE2_UCHAR,BITONE) ***)c,d); \
1929   else \
1930     a = G(pcre2_substring_list_get_,BITTWO)(G(b,BITTWO), \
1931       (G(PCRE2_UCHAR,BITTWO) ***)c,d)
1932 
1933 #define PCRE2_SUBSTRING_LIST_FREE(a) \
1934   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1935     G(pcre2_substring_list_free_,BITONE)((G(PCRE2_SPTR,BITONE) *)a); \
1936   else \
1937     G(pcre2_substring_list_free_,BITTWO)((G(PCRE2_SPTR,BITTWO) *)a)
1938 
1939 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
1940   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1941     a = G(pcre2_substring_number_from_name_,BITONE)(G(b,BITONE),G(c,BITONE)); \
1942   else \
1943     a = G(pcre2_substring_number_from_name_,BITTWO)(G(b,BITTWO),G(c,BITTWO))
1944 
1945 #define PTR(x) ( \
1946   (test_mode == G(G(PCRE,BITONE),_MODE))? (void *)G(x,BITONE) : \
1947   (void *)G(x,BITTWO))
1948 
1949 #define SETFLD(x,y,z) \
1950   if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE)->y = z; \
1951   else G(x,BITTWO)->y = z
1952 
1953 #define SETFLDVEC(x,y,v,z) \
1954   if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE)->y[v] = z; \
1955   else G(x,BITTWO)->y[v] = z
1956 
1957 #define SETOP(x,y,z) \
1958   if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE) z y; \
1959   else G(x,BITTWO) z y
1960 
1961 #define SETCASTPTR(x,y) \
1962   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1963     G(x,BITONE) = (G(G(uint,BITONE),_t) *)(y); \
1964   else \
1965     G(x,BITTWO) = (G(G(uint,BITTWO),_t) *)(y)
1966 
1967 #define STRLEN(p) ((test_mode == G(G(PCRE,BITONE),_MODE))? \
1968   G(strlen,BITONE)((G(PCRE2_SPTR,BITONE))p) : \
1969   G(strlen,BITTWO)((G(PCRE2_SPTR,BITTWO))p))
1970 
1971 #define SUB1(a,b) \
1972   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1973     G(a,BITONE)(G(b,BITONE)); \
1974   else \
1975     G(a,BITTWO)(G(b,BITTWO))
1976 
1977 #define SUB2(a,b,c) \
1978   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1979     G(a,BITONE))(G(b,BITONE),G(c,BITONE)); \
1980   else \
1981     G(a,BITTWO))(G(b,BITTWO),G(c,BITTWO))
1982 
1983 #define TEST(x,r,y) ( \
1984   (test_mode == G(G(PCRE,BITONE),_MODE) && G(x,BITONE) r (y)) || \
1985   (test_mode == G(G(PCRE,BITTWO),_MODE) && G(x,BITTWO) r (y)))
1986 
1987 #define TESTFLD(x,f,r,y) ( \
1988   (test_mode == G(G(PCRE,BITONE),_MODE) && G(x,BITONE)->f r (y)) || \
1989   (test_mode == G(G(PCRE,BITTWO),_MODE) && G(x,BITTWO)->f r (y)))
1990 
1991 
1992 #endif  /* Two out of three modes */
1993 
1994 /* ----- End of cases where more than one mode is supported ----- */
1995 
1996 
1997 /* ----- Only 8-bit mode is supported ----- */
1998 
1999 #elif defined SUPPORT_PCRE2_8
2000 #define CASTFLD(t,a,b) (t)(G(a,8)->b)
2001 #define CASTVAR(t,x) (t)G(x,8)
2002 #define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR8)(a))[b])
2003 #define CONCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_convert_context_8))
2004 #define CONVERT_COPY(a,b,c) memcpy(G(a,8),(char *)b, c)
2005 #define DATCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_match_context_8))
2006 #define FLD(a,b) G(a,8)->b
2007 #define PATCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_compile_context_8))
2008 #define PCHARS(lv, p, offset, len, utf, f) \
2009   lv = pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
2010 #define PCHARSV(p, offset, len, utf, f) \
2011   (void)pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
2012 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
2013    a = pcre2_callout_enumerate_8(compiled_code8, \
2014      (int (*)(struct pcre2_callout_enumerate_block_8 *, void *))b,c)
2015 #define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,8) = pcre2_code_copy_8(b)
2016 #define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_8(G(b,8))
2017 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_8(G(b,8))
2018 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
2019   G(a,8) = pcre2_compile_8(G(b,8),c,d,e,f,g)
2020 #define PCRE2_CONVERTED_PATTERN_FREE(a) \
2021   pcre2_converted_pattern_free_8((PCRE2_UCHAR8 *)a)
2022 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
2023   a = pcre2_dfa_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h,i,j)
2024 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
2025   r = pcre2_get_error_message_8(a,G(b,8),G(G(b,8),_size))
2026 #define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_8(G(b,8))
2027 #define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_8(G(b,8))
2028 #define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_8(G(a,8),b)
2029 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_8(G(a,8))
2030 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
2031   a = pcre2_jit_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h)
2032 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
2033   a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_8(b,c,d);
2034 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
2035   pcre2_jit_stack_assign_8(G(a,8),(pcre2_jit_callback_8)b,c);
2036 #define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_8((pcre2_jit_stack_8 *)a);
2037 #define PCRE2_MAKETABLES(a) a = pcre2_maketables_8(NULL)
2038 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
2039   a = pcre2_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h)
2040 #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,8) = pcre2_match_data_create_8(b,c)
2041 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
2042   G(a,8) = pcre2_match_data_create_from_pattern_8(G(b,8),c)
2043 #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_8(G(a,8))
2044 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) a = pcre2_pattern_convert_8(G(b,8),c,d,(PCRE2_UCHAR8 **)e,f,G(g,8))
2045 #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_8(G(b,8),c,d)
2046 #define PCRE2_PRINTINT(a) pcre2_printint_8(compiled_code8,outfile,a)
2047 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
2048   r = pcre2_serialize_decode_8((pcre2_code_8 **)a,b,c,G(d,8))
2049 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
2050   r = pcre2_serialize_encode_8((const pcre2_code_8 **)a,b,c,d,G(e,8))
2051 #define PCRE2_SERIALIZE_FREE(a) pcre2_serialize_free_8(a)
2052 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
2053   r = pcre2_serialize_get_number_of_codes_8(a)
2054 #define PCRE2_SET_CALLOUT(a,b,c) \
2055   pcre2_set_callout_8(G(a,8),(int (*)(pcre2_callout_block_8 *, void *))b,c)
2056 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_8(G(a,8),b)
2057 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
2058   pcre2_set_compile_recursion_guard_8(G(a,8),b,c)
2059 #define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_8(G(a,8),b)
2060 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) r = pcre2_set_glob_escape_8(G(a,8),b)
2061 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_8(G(a,8),b)
2062 #define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_8(G(a,8),b)
2063 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_8(G(a,8),b)
2064 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_8(G(a,8),b)
2065 #define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_8(G(a,8),b)
2066 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_8(G(a,8),b)
2067 #define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
2068   pcre2_set_substitute_callout_8(G(a,8), \
2069     (int (*)(pcre2_substitute_callout_block_8 *, void *))b,c)
2070 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
2071   a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h, \
2072     (PCRE2_SPTR8)i,j,(PCRE2_UCHAR8 *)k,l)
2073 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
2074   a = pcre2_substring_copy_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 *)d,e)
2075 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
2076   a = pcre2_substring_copy_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 *)d,e)
2077 #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_8((PCRE2_UCHAR8 *)a)
2078 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
2079   a = pcre2_substring_get_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 **)d,e)
2080 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
2081   a = pcre2_substring_get_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 **)d,e)
2082 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
2083     a = pcre2_substring_length_byname_8(G(b,8),G(c,8),d)
2084 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
2085     a = pcre2_substring_length_bynumber_8(G(b,8),c,d)
2086 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
2087   a = pcre2_substring_list_get_8(G(b,8),(PCRE2_UCHAR8 ***)c,d)
2088 #define PCRE2_SUBSTRING_LIST_FREE(a) \
2089   pcre2_substring_list_free_8((PCRE2_SPTR8 *)a)
2090 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
2091   a = pcre2_substring_number_from_name_8(G(b,8),G(c,8));
2092 #define PTR(x) (void *)G(x,8)
2093 #define SETFLD(x,y,z) G(x,8)->y = z
2094 #define SETFLDVEC(x,y,v,z) G(x,8)->y[v] = z
2095 #define SETOP(x,y,z) G(x,8) z y
2096 #define SETCASTPTR(x,y) G(x,8) = (uint8_t *)(y)
2097 #define STRLEN(p) (int)strlen((char *)p)
2098 #define SUB1(a,b) G(a,8)(G(b,8))
2099 #define SUB2(a,b,c) G(a,8)(G(b,8),G(c,8))
2100 #define TEST(x,r,y) (G(x,8) r (y))
2101 #define TESTFLD(x,f,r,y) (G(x,8)->f r (y))
2102 
2103 
2104 /* ----- Only 16-bit mode is supported ----- */
2105 
2106 #elif defined SUPPORT_PCRE2_16
2107 #define CASTFLD(t,a,b) (t)(G(a,16)->b)
2108 #define CASTVAR(t,x) (t)G(x,16)
2109 #define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR16)(a))[b])
2110 #define CONCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_convert_context_16))
2111 #define CONVERT_COPY(a,b,c) memcpy(G(a,16),(char *)b, (c)*2)
2112 #define DATCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_match_context_16))
2113 #define FLD(a,b) G(a,16)->b
2114 #define PATCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_compile_context_16))
2115 #define PCHARS(lv, p, offset, len, utf, f) \
2116   lv = pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f)
2117 #define PCHARSV(p, offset, len, utf, f) \
2118   (void)pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f)
2119 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
2120    a = pcre2_callout_enumerate_16(compiled_code16, \
2121      (int (*)(struct pcre2_callout_enumerate_block_16 *, void *))b,c)
2122 #define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,16) = pcre2_code_copy_16(b)
2123 #define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_16(G(b,16))
2124 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_16(G(b,16))
2125 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
2126   G(a,16) = pcre2_compile_16(G(b,16),c,d,e,f,g)
2127 #define PCRE2_CONVERTED_PATTERN_FREE(a) \
2128   pcre2_converted_pattern_free_16((PCRE2_UCHAR16 *)a)
2129 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
2130   a = pcre2_dfa_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h,i,j)
2131 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
2132   r = pcre2_get_error_message_16(a,G(b,16),G(G(b,16),_size/2))
2133 #define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_16(G(b,16))
2134 #define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_16(G(b,16))
2135 #define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_16(G(a,16),b)
2136 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_16(G(a,16))
2137 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
2138   a = pcre2_jit_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h)
2139 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
2140   a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_16(b,c,d);
2141 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
2142   pcre2_jit_stack_assign_16(G(a,16),(pcre2_jit_callback_16)b,c);
2143 #define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_16((pcre2_jit_stack_16 *)a);
2144 #define PCRE2_MAKETABLES(a) a = pcre2_maketables_16(NULL)
2145 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
2146   a = pcre2_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h)
2147 #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,16) = pcre2_match_data_create_16(b,c)
2148 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
2149   G(a,16) = pcre2_match_data_create_from_pattern_16(G(b,16),c)
2150 #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_16(G(a,16))
2151 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) a = pcre2_pattern_convert_16(G(b,16),c,d,(PCRE2_UCHAR16 **)e,f,G(g,16))
2152 #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_16(G(b,16),c,d)
2153 #define PCRE2_PRINTINT(a) pcre2_printint_16(compiled_code16,outfile,a)
2154 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
2155   r = pcre2_serialize_decode_16((pcre2_code_16 **)a,b,c,G(d,16))
2156 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
2157   r = pcre2_serialize_encode_16((const pcre2_code_16 **)a,b,c,d,G(e,16))
2158 #define PCRE2_SERIALIZE_FREE(a) pcre2_serialize_free_16(a)
2159 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
2160   r = pcre2_serialize_get_number_of_codes_16(a)
2161 #define PCRE2_SET_CALLOUT(a,b,c) \
2162   pcre2_set_callout_16(G(a,16),(int (*)(pcre2_callout_block_16 *, void *))b,c);
2163 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_16(G(a,16),b)
2164 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
2165   pcre2_set_compile_recursion_guard_16(G(a,16),b,c)
2166 #define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_16(G(a,16),b)
2167 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) r = pcre2_set_glob_escape_16(G(a,16),b)
2168 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_16(G(a,16),b)
2169 #define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_16(G(a,16),b)
2170 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_16(G(a,16),b)
2171 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_16(G(a,16),b)
2172 #define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_16(G(a,16),b)
2173 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_16(G(a,16),b)
2174 #define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
2175   pcre2_set_substitute_callout_16(G(a,16), \
2176     (int (*)(pcre2_substitute_callout_block_16 *, void *))b,c)
2177 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
2178   a = pcre2_substitute_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h, \
2179     (PCRE2_SPTR16)i,j,(PCRE2_UCHAR16 *)k,l)
2180 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
2181   a = pcre2_substring_copy_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 *)d,e)
2182 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
2183   a = pcre2_substring_copy_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 *)d,e)
2184 #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_16((PCRE2_UCHAR16 *)a)
2185 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
2186   a = pcre2_substring_get_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 **)d,e)
2187 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
2188   a = pcre2_substring_get_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 **)d,e)
2189 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
2190     a = pcre2_substring_length_byname_16(G(b,16),G(c,16),d)
2191 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
2192     a = pcre2_substring_length_bynumber_16(G(b,16),c,d)
2193 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
2194   a = pcre2_substring_list_get_16(G(b,16),(PCRE2_UCHAR16 ***)c,d)
2195 #define PCRE2_SUBSTRING_LIST_FREE(a) \
2196   pcre2_substring_list_free_16((PCRE2_SPTR16 *)a)
2197 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
2198   a = pcre2_substring_number_from_name_16(G(b,16),G(c,16));
2199 #define PTR(x) (void *)G(x,16)
2200 #define SETFLD(x,y,z) G(x,16)->y = z
2201 #define SETFLDVEC(x,y,v,z) G(x,16)->y[v] = z
2202 #define SETOP(x,y,z) G(x,16) z y
2203 #define SETCASTPTR(x,y) G(x,16) = (uint16_t *)(y)
2204 #define STRLEN(p) (int)strlen16((PCRE2_SPTR16)p)
2205 #define SUB1(a,b) G(a,16)(G(b,16))
2206 #define SUB2(a,b,c) G(a,16)(G(b,16),G(c,16))
2207 #define TEST(x,r,y) (G(x,16) r (y))
2208 #define TESTFLD(x,f,r,y) (G(x,16)->f r (y))
2209 
2210 
2211 /* ----- Only 32-bit mode is supported ----- */
2212 
2213 #elif defined SUPPORT_PCRE2_32
2214 #define CASTFLD(t,a,b) (t)(G(a,32)->b)
2215 #define CASTVAR(t,x) (t)G(x,32)
2216 #define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR32)(a))[b])
2217 #define CONCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_convert_context_32))
2218 #define CONVERT_COPY(a,b,c) memcpy(G(a,32),(char *)b, (c)*4)
2219 #define DATCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_match_context_32))
2220 #define FLD(a,b) G(a,32)->b
2221 #define PATCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_compile_context_32))
2222 #define PCHARS(lv, p, offset, len, utf, f) \
2223   lv = pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f)
2224 #define PCHARSV(p, offset, len, utf, f) \
2225   (void)pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f)
2226 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
2227    a = pcre2_callout_enumerate_32(compiled_code32, \
2228      (int (*)(struct pcre2_callout_enumerate_block_32 *, void *))b,c)
2229 #define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,32) = pcre2_code_copy_32(b)
2230 #define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_32(G(b,32))
2231 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_32(G(b,32))
2232 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
2233   G(a,32) = pcre2_compile_32(G(b,32),c,d,e,f,g)
2234 #define PCRE2_CONVERTED_PATTERN_FREE(a) \
2235   pcre2_converted_pattern_free_32((PCRE2_UCHAR32 *)a)
2236 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
2237   a = pcre2_dfa_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h,i,j)
2238 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
2239   r = pcre2_get_error_message_32(a,G(b,32),G(G(b,32),_size/4))
2240 #define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_32(G(b,32))
2241 #define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_32(G(b,32))
2242 #define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_32(G(a,32),b)
2243 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_32(G(a,32))
2244 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
2245   a = pcre2_jit_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
2246 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
2247   a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_32(b,c,d);
2248 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
2249   pcre2_jit_stack_assign_32(G(a,32),(pcre2_jit_callback_32)b,c);
2250 #define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_32((pcre2_jit_stack_32 *)a);
2251 #define PCRE2_MAKETABLES(a) a = pcre2_maketables_32(NULL)
2252 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
2253   a = pcre2_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
2254 #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,32) = pcre2_match_data_create_32(b,c)
2255 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
2256   G(a,32) = pcre2_match_data_create_from_pattern_32(G(b,32),c)
2257 #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_32(G(a,32))
2258 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) a = pcre2_pattern_convert_32(G(b,32),c,d,(PCRE2_UCHAR32 **)e,f,G(g,32))
2259 #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_32(G(b,32),c,d)
2260 #define PCRE2_PRINTINT(a) pcre2_printint_32(compiled_code32,outfile,a)
2261 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
2262   r = pcre2_serialize_decode_32((pcre2_code_32 **)a,b,c,G(d,32))
2263 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
2264   r = pcre2_serialize_encode_32((const pcre2_code_32 **)a,b,c,d,G(e,32))
2265 #define PCRE2_SERIALIZE_FREE(a) pcre2_serialize_free_32(a)
2266 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
2267   r = pcre2_serialize_get_number_of_codes_32(a)
2268 #define PCRE2_SET_CALLOUT(a,b,c) \
2269   pcre2_set_callout_32(G(a,32),(int (*)(pcre2_callout_block_32 *, void *))b,c)
2270 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_32(G(a,32),b)
2271 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
2272   pcre2_set_compile_recursion_guard_32(G(a,32),b,c)
2273 #define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_32(G(a,32),b)
2274 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) r = pcre2_set_glob_escape_32(G(a,32),b)
2275 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_32(G(a,32),b)
2276 #define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_32(G(a,32),b)
2277 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_32(G(a,32),b)
2278 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_32(G(a,32),b)
2279 #define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_32(G(a,32),b)
2280 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_32(G(a,32),b)
2281 #define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
2282   pcre2_set_substitute_callout_32(G(a,32), \
2283     (int (*)(pcre2_substitute_callout_block_32 *, void *))b,c)
2284 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
2285   a = pcre2_substitute_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h, \
2286     (PCRE2_SPTR32)i,j,(PCRE2_UCHAR32 *)k,l)
2287 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
2288   a = pcre2_substring_copy_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 *)d,e)
2289 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
2290   a = pcre2_substring_copy_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 *)d,e);
2291 #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_32((PCRE2_UCHAR32 *)a)
2292 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
2293   a = pcre2_substring_get_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 **)d,e)
2294 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
2295   a = pcre2_substring_get_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 **)d,e)
2296 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
2297     a = pcre2_substring_length_byname_32(G(b,32),G(c,32),d)
2298 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
2299     a = pcre2_substring_length_bynumber_32(G(b,32),c,d)
2300 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
2301   a = pcre2_substring_list_get_32(G(b,32),(PCRE2_UCHAR32 ***)c,d)
2302 #define PCRE2_SUBSTRING_LIST_FREE(a) \
2303   pcre2_substring_list_free_32((PCRE2_SPTR32 *)a)
2304 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
2305   a = pcre2_substring_number_from_name_32(G(b,32),G(c,32));
2306 #define PTR(x) (void *)G(x,32)
2307 #define SETFLD(x,y,z) G(x,32)->y = z
2308 #define SETFLDVEC(x,y,v,z) G(x,32)->y[v] = z
2309 #define SETOP(x,y,z) G(x,32) z y
2310 #define SETCASTPTR(x,y) G(x,32) = (uint32_t *)(y)
2311 #define STRLEN(p) (int)strlen32((PCRE2_SPTR32)p)
2312 #define SUB1(a,b) G(a,32)(G(b,32))
2313 #define SUB2(a,b,c) G(a,32)(G(b,32),G(c,32))
2314 #define TEST(x,r,y) (G(x,32) r (y))
2315 #define TESTFLD(x,f,r,y) (G(x,32)->f r (y))
2316 
2317 #endif
2318 
2319 /* ----- End of mode-specific function call macros ----- */
2320 
2321 
2322 
2323 
2324 /*************************************************
2325 *         Alternate character tables             *
2326 *************************************************/
2327 
2328 /* By default, the "tables" pointer in the compile context when calling
2329 pcre2_compile() is not set (= NULL), thereby using the default tables of the
2330 library. However, the tables modifier can be used to select alternate sets of
2331 tables, for different kinds of testing. Note that the locale modifier also
2332 adjusts the tables. */
2333 
2334 /* This is the set of tables distributed as default with PCRE2. It recognizes
2335 only ASCII characters. */
2336 
2337 static const uint8_t tables1[] = {
2338 
2339 /* This table is a lower casing table. */
2340 
2341     0,  1,  2,  3,  4,  5,  6,  7,
2342     8,  9, 10, 11, 12, 13, 14, 15,
2343    16, 17, 18, 19, 20, 21, 22, 23,
2344    24, 25, 26, 27, 28, 29, 30, 31,
2345    32, 33, 34, 35, 36, 37, 38, 39,
2346    40, 41, 42, 43, 44, 45, 46, 47,
2347    48, 49, 50, 51, 52, 53, 54, 55,
2348    56, 57, 58, 59, 60, 61, 62, 63,
2349    64, 97, 98, 99,100,101,102,103,
2350   104,105,106,107,108,109,110,111,
2351   112,113,114,115,116,117,118,119,
2352   120,121,122, 91, 92, 93, 94, 95,
2353    96, 97, 98, 99,100,101,102,103,
2354   104,105,106,107,108,109,110,111,
2355   112,113,114,115,116,117,118,119,
2356   120,121,122,123,124,125,126,127,
2357   128,129,130,131,132,133,134,135,
2358   136,137,138,139,140,141,142,143,
2359   144,145,146,147,148,149,150,151,
2360   152,153,154,155,156,157,158,159,
2361   160,161,162,163,164,165,166,167,
2362   168,169,170,171,172,173,174,175,
2363   176,177,178,179,180,181,182,183,
2364   184,185,186,187,188,189,190,191,
2365   192,193,194,195,196,197,198,199,
2366   200,201,202,203,204,205,206,207,
2367   208,209,210,211,212,213,214,215,
2368   216,217,218,219,220,221,222,223,
2369   224,225,226,227,228,229,230,231,
2370   232,233,234,235,236,237,238,239,
2371   240,241,242,243,244,245,246,247,
2372   248,249,250,251,252,253,254,255,
2373 
2374 /* This table is a case flipping table. */
2375 
2376     0,  1,  2,  3,  4,  5,  6,  7,
2377     8,  9, 10, 11, 12, 13, 14, 15,
2378    16, 17, 18, 19, 20, 21, 22, 23,
2379    24, 25, 26, 27, 28, 29, 30, 31,
2380    32, 33, 34, 35, 36, 37, 38, 39,
2381    40, 41, 42, 43, 44, 45, 46, 47,
2382    48, 49, 50, 51, 52, 53, 54, 55,
2383    56, 57, 58, 59, 60, 61, 62, 63,
2384    64, 97, 98, 99,100,101,102,103,
2385   104,105,106,107,108,109,110,111,
2386   112,113,114,115,116,117,118,119,
2387   120,121,122, 91, 92, 93, 94, 95,
2388    96, 65, 66, 67, 68, 69, 70, 71,
2389    72, 73, 74, 75, 76, 77, 78, 79,
2390    80, 81, 82, 83, 84, 85, 86, 87,
2391    88, 89, 90,123,124,125,126,127,
2392   128,129,130,131,132,133,134,135,
2393   136,137,138,139,140,141,142,143,
2394   144,145,146,147,148,149,150,151,
2395   152,153,154,155,156,157,158,159,
2396   160,161,162,163,164,165,166,167,
2397   168,169,170,171,172,173,174,175,
2398   176,177,178,179,180,181,182,183,
2399   184,185,186,187,188,189,190,191,
2400   192,193,194,195,196,197,198,199,
2401   200,201,202,203,204,205,206,207,
2402   208,209,210,211,212,213,214,215,
2403   216,217,218,219,220,221,222,223,
2404   224,225,226,227,228,229,230,231,
2405   232,233,234,235,236,237,238,239,
2406   240,241,242,243,244,245,246,247,
2407   248,249,250,251,252,253,254,255,
2408 
2409 /* This table contains bit maps for various character classes. Each map is 32
2410 bytes long and the bits run from the least significant end of each byte. The
2411 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
2412 graph, print, punct, and cntrl. Other classes are built from combinations. */
2413 
2414   0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
2415   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2416   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2417   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2418 
2419   0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
2420   0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
2421   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2422   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2423 
2424   0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
2425   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2426   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2427   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2428 
2429   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2430   0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
2431   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2432   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2433 
2434   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2435   0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
2436   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2437   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2438 
2439   0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
2440   0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
2441   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2442   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2443 
2444   0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
2445   0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
2446   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2447   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2448 
2449   0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
2450   0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
2451   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2452   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2453 
2454   0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
2455   0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
2456   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2457   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2458 
2459   0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
2460   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
2461   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2462   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2463 
2464 /* This table identifies various classes of character by individual bits:
2465   0x01   white space character
2466   0x02   letter
2467   0x04   decimal digit
2468   0x08   hexadecimal digit
2469   0x10   alphanumeric or '_'
2470   0x80   regular expression metacharacter or binary zero
2471 */
2472 
2473   0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
2474   0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /*   8- 15 */
2475   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
2476   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
2477   0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
2478   0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
2479   0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
2480   0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
2481   0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
2482   0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
2483   0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
2484   0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
2485   0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
2486   0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
2487   0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
2488   0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
2489   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
2490   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
2491   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
2492   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
2493   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
2494   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
2495   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
2496   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
2497   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
2498   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
2499   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
2500   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
2501   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
2502   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
2503   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
2504   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
2505 
2506 /* This is a set of tables that came originally from a Windows user. It seems
2507 to be at least an approximation of ISO 8859. In particular, there are
2508 characters greater than 128 that are marked as spaces, letters, etc. */
2509 
2510 static const uint8_t tables2[] = {
2511 0,1,2,3,4,5,6,7,
2512 8,9,10,11,12,13,14,15,
2513 16,17,18,19,20,21,22,23,
2514 24,25,26,27,28,29,30,31,
2515 32,33,34,35,36,37,38,39,
2516 40,41,42,43,44,45,46,47,
2517 48,49,50,51,52,53,54,55,
2518 56,57,58,59,60,61,62,63,
2519 64,97,98,99,100,101,102,103,
2520 104,105,106,107,108,109,110,111,
2521 112,113,114,115,116,117,118,119,
2522 120,121,122,91,92,93,94,95,
2523 96,97,98,99,100,101,102,103,
2524 104,105,106,107,108,109,110,111,
2525 112,113,114,115,116,117,118,119,
2526 120,121,122,123,124,125,126,127,
2527 128,129,130,131,132,133,134,135,
2528 136,137,138,139,140,141,142,143,
2529 144,145,146,147,148,149,150,151,
2530 152,153,154,155,156,157,158,159,
2531 160,161,162,163,164,165,166,167,
2532 168,169,170,171,172,173,174,175,
2533 176,177,178,179,180,181,182,183,
2534 184,185,186,187,188,189,190,191,
2535 224,225,226,227,228,229,230,231,
2536 232,233,234,235,236,237,238,239,
2537 240,241,242,243,244,245,246,215,
2538 248,249,250,251,252,253,254,223,
2539 224,225,226,227,228,229,230,231,
2540 232,233,234,235,236,237,238,239,
2541 240,241,242,243,244,245,246,247,
2542 248,249,250,251,252,253,254,255,
2543 0,1,2,3,4,5,6,7,
2544 8,9,10,11,12,13,14,15,
2545 16,17,18,19,20,21,22,23,
2546 24,25,26,27,28,29,30,31,
2547 32,33,34,35,36,37,38,39,
2548 40,41,42,43,44,45,46,47,
2549 48,49,50,51,52,53,54,55,
2550 56,57,58,59,60,61,62,63,
2551 64,97,98,99,100,101,102,103,
2552 104,105,106,107,108,109,110,111,
2553 112,113,114,115,116,117,118,119,
2554 120,121,122,91,92,93,94,95,
2555 96,65,66,67,68,69,70,71,
2556 72,73,74,75,76,77,78,79,
2557 80,81,82,83,84,85,86,87,
2558 88,89,90,123,124,125,126,127,
2559 128,129,130,131,132,133,134,135,
2560 136,137,138,139,140,141,142,143,
2561 144,145,146,147,148,149,150,151,
2562 152,153,154,155,156,157,158,159,
2563 160,161,162,163,164,165,166,167,
2564 168,169,170,171,172,173,174,175,
2565 176,177,178,179,180,181,182,183,
2566 184,185,186,187,188,189,190,191,
2567 224,225,226,227,228,229,230,231,
2568 232,233,234,235,236,237,238,239,
2569 240,241,242,243,244,245,246,215,
2570 248,249,250,251,252,253,254,223,
2571 192,193,194,195,196,197,198,199,
2572 200,201,202,203,204,205,206,207,
2573 208,209,210,211,212,213,214,247,
2574 216,217,218,219,220,221,222,255,
2575 0,62,0,0,1,0,0,0,
2576 0,0,0,0,0,0,0,0,
2577 32,0,0,0,1,0,0,0,
2578 0,0,0,0,0,0,0,0,
2579 0,0,0,0,0,0,255,3,
2580 126,0,0,0,126,0,0,0,
2581 0,0,0,0,0,0,0,0,
2582 0,0,0,0,0,0,0,0,
2583 0,0,0,0,0,0,255,3,
2584 0,0,0,0,0,0,0,0,
2585 0,0,0,0,0,0,12,2,
2586 0,0,0,0,0,0,0,0,
2587 0,0,0,0,0,0,0,0,
2588 254,255,255,7,0,0,0,0,
2589 0,0,0,0,0,0,0,0,
2590 255,255,127,127,0,0,0,0,
2591 0,0,0,0,0,0,0,0,
2592 0,0,0,0,254,255,255,7,
2593 0,0,0,0,0,4,32,4,
2594 0,0,0,128,255,255,127,255,
2595 0,0,0,0,0,0,255,3,
2596 254,255,255,135,254,255,255,7,
2597 0,0,0,0,0,4,44,6,
2598 255,255,127,255,255,255,127,255,
2599 0,0,0,0,254,255,255,255,
2600 255,255,255,255,255,255,255,127,
2601 0,0,0,0,254,255,255,255,
2602 255,255,255,255,255,255,255,255,
2603 0,2,0,0,255,255,255,255,
2604 255,255,255,255,255,255,255,127,
2605 0,0,0,0,255,255,255,255,
2606 255,255,255,255,255,255,255,255,
2607 0,0,0,0,254,255,0,252,
2608 1,0,0,248,1,0,0,120,
2609 0,0,0,0,254,255,255,255,
2610 0,0,128,0,0,0,128,0,
2611 255,255,255,255,0,0,0,0,
2612 0,0,0,0,0,0,0,128,
2613 255,255,255,255,0,0,0,0,
2614 0,0,0,0,0,0,0,0,
2615 128,0,0,0,0,0,0,0,
2616 0,1,1,0,1,1,0,0,
2617 0,0,0,0,0,0,0,0,
2618 0,0,0,0,0,0,0,0,
2619 1,0,0,0,128,0,0,0,
2620 128,128,128,128,0,0,128,0,
2621 28,28,28,28,28,28,28,28,
2622 28,28,0,0,0,0,0,128,
2623 0,26,26,26,26,26,26,18,
2624 18,18,18,18,18,18,18,18,
2625 18,18,18,18,18,18,18,18,
2626 18,18,18,128,128,0,128,16,
2627 0,26,26,26,26,26,26,18,
2628 18,18,18,18,18,18,18,18,
2629 18,18,18,18,18,18,18,18,
2630 18,18,18,128,128,0,0,0,
2631 0,0,0,0,0,1,0,0,
2632 0,0,0,0,0,0,0,0,
2633 0,0,0,0,0,0,0,0,
2634 0,0,0,0,0,0,0,0,
2635 1,0,0,0,0,0,0,0,
2636 0,0,18,0,0,0,0,0,
2637 0,0,20,20,0,18,0,0,
2638 0,20,18,0,0,0,0,0,
2639 18,18,18,18,18,18,18,18,
2640 18,18,18,18,18,18,18,18,
2641 18,18,18,18,18,18,18,0,
2642 18,18,18,18,18,18,18,18,
2643 18,18,18,18,18,18,18,18,
2644 18,18,18,18,18,18,18,18,
2645 18,18,18,18,18,18,18,0,
2646 18,18,18,18,18,18,18,18
2647 };
2648 
2649 
2650 
2651 #if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE)
2652 /*************************************************
2653 *    Emulated memmove() for systems without it   *
2654 *************************************************/
2655 
2656 /* This function can make use of bcopy() if it is available. Otherwise do it by
2657 steam, as there are some non-Unix environments that lack both memmove() and
2658 bcopy(). */
2659 
2660 static void *
emulated_memmove(void * d,const void * s,size_t n)2661 emulated_memmove(void *d, const void *s, size_t n)
2662 {
2663 #ifdef HAVE_BCOPY
2664 bcopy(s, d, n);
2665 return d;
2666 #else
2667 size_t i;
2668 unsigned char *dest = (unsigned char *)d;
2669 const unsigned char *src = (const unsigned char *)s;
2670 if (dest > src)
2671   {
2672   dest += n;
2673   src += n;
2674   for (i = 0; i < n; ++i) *(--dest) = *(--src);
2675   return (void *)dest;
2676   }
2677 else
2678   {
2679   for (i = 0; i < n; ++i) *dest++ = *src++;
2680   return (void *)(dest - n);
2681   }
2682 #endif   /* not HAVE_BCOPY */
2683 }
2684 #undef memmove
2685 #define memmove(d,s,n) emulated_memmove(d,s,n)
2686 #endif   /* not VPCOMPAT && not HAVE_MEMMOVE */
2687 
2688 
2689 
2690 #ifndef HAVE_STRERROR
2691 /*************************************************
2692 *     Provide strerror() for non-ANSI libraries  *
2693 *************************************************/
2694 
2695 /* Some old-fashioned systems (e.g. SunOS4) didn't have strerror() in their
2696 libraries. They may no longer be around, but just in case, we can try to
2697 provide the same facility by this simple alternative function. */
2698 
2699 extern int   sys_nerr;
2700 extern char *sys_errlist[];
2701 
2702 char *
strerror(int n)2703 strerror(int n)
2704 {
2705 if (n < 0 || n >= sys_nerr) return "unknown error number";
2706 return sys_errlist[n];
2707 }
2708 #endif /* HAVE_STRERROR */
2709 
2710 
2711 
2712 /*************************************************
2713 *            Local memory functions              *
2714 *************************************************/
2715 
2716 /* Alternative memory functions, to test functionality. */
2717 
my_malloc(PCRE2_SIZE size,void * data)2718 static void *my_malloc(PCRE2_SIZE size, void *data)
2719 {
2720 void *block = malloc(size);
2721 (void)data;
2722 if (show_memory)
2723   {
2724   if (block == NULL)
2725     {
2726     fprintf(outfile, "** malloc() failed for %" SIZ_FORM "\n", SIZ_CAST size);
2727     }
2728   else
2729     {
2730     fprintf(outfile, "malloc  %5" SIZ_FORM, SIZ_CAST size);
2731 #ifdef DEBUG_SHOW_MALLOC_ADDRESSES
2732     fprintf(outfile, " %p", block);   /* Not portable */
2733 #endif
2734     if (malloclistptr < MALLOCLISTSIZE)
2735       {
2736       malloclist[malloclistptr] = block;
2737       malloclistlength[malloclistptr++] = size;
2738       }
2739     else
2740       fprintf(outfile, " (not remembered)");
2741     fprintf(outfile, "\n");
2742     }
2743   }
2744 return block;
2745 }
2746 
my_free(void * block,void * data)2747 static void my_free(void *block, void *data)
2748 {
2749 (void)data;
2750 if (show_memory)
2751   {
2752   uint32_t i, j;
2753   BOOL found = FALSE;
2754 
2755   fprintf(outfile, "free");
2756   for (i = 0; i < malloclistptr; i++)
2757     {
2758     if (block == malloclist[i])
2759       {
2760       fprintf(outfile, "    %5" SIZ_FORM, SIZ_CAST malloclistlength[i]);
2761       malloclistptr--;
2762       for (j = i; j < malloclistptr; j++)
2763         {
2764         malloclist[j] = malloclist[j+1];
2765         malloclistlength[j] = malloclistlength[j+1];
2766         }
2767       found = TRUE;
2768       break;
2769       }
2770     }
2771   if (!found) fprintf(outfile, " unremembered block");
2772 #ifdef DEBUG_SHOW_MALLOC_ADDRESSES
2773   fprintf(outfile, " %p", block);  /* Not portable */
2774 #endif
2775   fprintf(outfile, "\n");
2776   }
2777 free(block);
2778 }
2779 
2780 
2781 
2782 /*************************************************
2783 *       Callback function for stack guard        *
2784 *************************************************/
2785 
2786 /* This is set up to be called from pcre2_compile() when the stackguard=n
2787 modifier sets a value greater than zero. The test we do is whether the
2788 parenthesis nesting depth is greater than the value set by the modifier.
2789 
2790 Argument:  the current parenthesis nesting depth
2791 Returns:   non-zero to kill the compilation
2792 */
2793 
2794 static int
stack_guard(uint32_t depth,void * user_data)2795 stack_guard(uint32_t depth, void *user_data)
2796 {
2797 (void)user_data;
2798 return depth > pat_patctl.stackguard_test;
2799 }
2800 
2801 
2802 /*************************************************
2803 *         JIT memory callback                    *
2804 *************************************************/
2805 
2806 static PCRE2_JIT_STACK*
jit_callback(void * arg)2807 jit_callback(void *arg)
2808 {
2809 jit_was_used = TRUE;
2810 return (PCRE2_JIT_STACK *)arg;
2811 }
2812 
2813 
2814 /*************************************************
2815 *      Convert UTF-8 character to code point     *
2816 *************************************************/
2817 
2818 /* This function reads one or more bytes that represent a UTF-8 character,
2819 and returns the codepoint of that character. Note that the function supports
2820 the original UTF-8 definition of RFC 2279, allowing for values in the range 0
2821 to 0x7fffffff, up to 6 bytes long. This makes it possible to generate
2822 codepoints greater than 0x10ffff which are useful for testing PCRE2's error
2823 checking, and also for generating 32-bit non-UTF data values above the UTF
2824 limit.
2825 
2826 Argument:
2827   utf8bytes   a pointer to the byte vector
2828   vptr        a pointer to an int to receive the value
2829 
2830 Returns:      >  0 => the number of bytes consumed
2831               -6 to 0 => malformed UTF-8 character at offset = (-return)
2832 */
2833 
2834 static int
utf82ord(PCRE2_SPTR8 utf8bytes,uint32_t * vptr)2835 utf82ord(PCRE2_SPTR8 utf8bytes, uint32_t *vptr)
2836 {
2837 uint32_t c = *utf8bytes++;
2838 uint32_t d = c;
2839 int i, j, s;
2840 
2841 for (i = -1; i < 6; i++)               /* i is number of additional bytes */
2842   {
2843   if ((d & 0x80) == 0) break;
2844   d <<= 1;
2845   }
2846 
2847 if (i == -1) { *vptr = c; return 1; }  /* ascii character */
2848 if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
2849 
2850 /* i now has a value in the range 1-5 */
2851 
2852 s = 6*i;
2853 d = (c & utf8_table3[i]) << s;
2854 
2855 for (j = 0; j < i; j++)
2856   {
2857   c = *utf8bytes++;
2858   if ((c & 0xc0) != 0x80) return -(j+1);
2859   s -= 6;
2860   d |= (c & 0x3f) << s;
2861   }
2862 
2863 /* Check that encoding was the correct unique one */
2864 
2865 for (j = 0; j < utf8_table1_size; j++)
2866   if (d <= (uint32_t)utf8_table1[j]) break;
2867 if (j != i) return -(i+1);
2868 
2869 /* Valid value */
2870 
2871 *vptr = d;
2872 return i+1;
2873 }
2874 
2875 
2876 
2877 /*************************************************
2878 *             Print one character                *
2879 *************************************************/
2880 
2881 /* Print a single character either literally, or as a hex escape, and count how
2882 many printed characters are used.
2883 
2884 Arguments:
2885   c            the character
2886   utf          TRUE in UTF mode
2887   f            the FILE to print to, or NULL just to count characters
2888 
2889 Returns:       number of characters written
2890 */
2891 
2892 static int
pchar(uint32_t c,BOOL utf,FILE * f)2893 pchar(uint32_t c, BOOL utf, FILE *f)
2894 {
2895 int n = 0;
2896 char tempbuffer[16];
2897 
2898 if (PRINTOK(c))
2899   {
2900   if (f != NULL) fprintf(f, "%c", c);
2901   return 1;
2902   }
2903 
2904 if (c < 0x100)
2905   {
2906   if (utf)
2907     {
2908     if (f != NULL) fprintf(f, "\\x{%02x}", c);
2909     return 6;
2910     }
2911   else
2912     {
2913     if (f != NULL) fprintf(f, "\\x%02x", c);
2914     return 4;
2915     }
2916   }
2917 
2918 if (f != NULL) n = fprintf(f, "\\x{%02x}", c);
2919   else n = sprintf(tempbuffer, "\\x{%02x}", c);
2920 
2921 return n >= 0 ? n : 0;
2922 }
2923 
2924 
2925 
2926 #ifdef SUPPORT_PCRE2_16
2927 /*************************************************
2928 *    Find length of 0-terminated 16-bit string   *
2929 *************************************************/
2930 
strlen16(PCRE2_SPTR16 p)2931 static size_t strlen16(PCRE2_SPTR16 p)
2932 {
2933 PCRE2_SPTR16 pp = p;
2934 while (*pp != 0) pp++;
2935 return (int)(pp - p);
2936 }
2937 #endif  /* SUPPORT_PCRE2_16 */
2938 
2939 
2940 
2941 #ifdef SUPPORT_PCRE2_32
2942 /*************************************************
2943 *    Find length of 0-terminated 32-bit string   *
2944 *************************************************/
2945 
strlen32(PCRE2_SPTR32 p)2946 static size_t strlen32(PCRE2_SPTR32 p)
2947 {
2948 PCRE2_SPTR32 pp = p;
2949 while (*pp != 0) pp++;
2950 return (int)(pp - p);
2951 }
2952 #endif  /* SUPPORT_PCRE2_32 */
2953 
2954 
2955 #ifdef SUPPORT_PCRE2_8
2956 /*************************************************
2957 *         Print 8-bit character string           *
2958 *************************************************/
2959 
2960 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
2961 For printing *MARK strings, a negative length is given. If handed a NULL file,
2962 just counts chars without printing (because pchar() does that). */
2963 
pchars8(PCRE2_SPTR8 p,int length,BOOL utf,FILE * f)2964 static int pchars8(PCRE2_SPTR8 p, int length, BOOL utf, FILE *f)
2965 {
2966 uint32_t c = 0;
2967 int yield = 0;
2968 
2969 if (length < 0) length = p[-1];
2970 while (length-- > 0)
2971   {
2972   if (utf)
2973     {
2974     int rc = utf82ord(p, &c);
2975     if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
2976       {
2977       length -= rc - 1;
2978       p += rc;
2979       yield += pchar(c, utf, f);
2980       continue;
2981       }
2982     }
2983   c = *p++;
2984   yield += pchar(c, utf, f);
2985   }
2986 
2987 return yield;
2988 }
2989 #endif
2990 
2991 
2992 #ifdef SUPPORT_PCRE2_16
2993 /*************************************************
2994 *           Print 16-bit character string        *
2995 *************************************************/
2996 
2997 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
2998 For printing *MARK strings, a negative length is given. If handed a NULL file,
2999 just counts chars without printing. */
3000 
pchars16(PCRE2_SPTR16 p,int length,BOOL utf,FILE * f)3001 static int pchars16(PCRE2_SPTR16 p, int length, BOOL utf, FILE *f)
3002 {
3003 int yield = 0;
3004 if (length < 0) length = p[-1];
3005 while (length-- > 0)
3006   {
3007   uint32_t c = *p++ & 0xffff;
3008   if (utf && c >= 0xD800 && c < 0xDC00 && length > 0)
3009     {
3010     int d = *p & 0xffff;
3011     if (d >= 0xDC00 && d <= 0xDFFF)
3012       {
3013       c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
3014       length--;
3015       p++;
3016       }
3017     }
3018   yield += pchar(c, utf, f);
3019   }
3020 return yield;
3021 }
3022 #endif  /* SUPPORT_PCRE2_16 */
3023 
3024 
3025 
3026 #ifdef SUPPORT_PCRE2_32
3027 /*************************************************
3028 *           Print 32-bit character string        *
3029 *************************************************/
3030 
3031 /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
3032 For printing *MARK strings, a negative length is given. If handed a NULL file,
3033 just counts chars without printing. */
3034 
pchars32(PCRE2_SPTR32 p,int length,BOOL utf,FILE * f)3035 static int pchars32(PCRE2_SPTR32 p, int length, BOOL utf, FILE *f)
3036 {
3037 int yield = 0;
3038 (void)(utf);  /* Avoid compiler warning */
3039 
3040 if (length < 0) length = p[-1];
3041 while (length-- > 0)
3042   {
3043   uint32_t c = *p++;
3044   yield += pchar(c, utf, f);
3045   }
3046 return yield;
3047 }
3048 #endif  /* SUPPORT_PCRE2_32 */
3049 
3050 
3051 
3052 
3053 /*************************************************
3054 *       Convert character value to UTF-8         *
3055 *************************************************/
3056 
3057 /* This function takes an integer value in the range 0 - 0x7fffffff
3058 and encodes it as a UTF-8 character in 0 to 6 bytes. It is needed even when the
3059 8-bit library is not supported, to generate UTF-8 output for non-ASCII
3060 characters.
3061 
3062 Arguments:
3063   cvalue     the character value
3064   utf8bytes  pointer to buffer for result - at least 6 bytes long
3065 
3066 Returns:     number of characters placed in the buffer
3067 */
3068 
3069 static int
ord2utf8(uint32_t cvalue,uint8_t * utf8bytes)3070 ord2utf8(uint32_t cvalue, uint8_t *utf8bytes)
3071 {
3072 int i, j;
3073 if (cvalue > 0x7fffffffu)
3074   return -1;
3075 for (i = 0; i < utf8_table1_size; i++)
3076   if (cvalue <= (uint32_t)utf8_table1[i]) break;
3077 utf8bytes += i;
3078 for (j = i; j > 0; j--)
3079  {
3080  *utf8bytes-- = 0x80 | (cvalue & 0x3f);
3081  cvalue >>= 6;
3082  }
3083 *utf8bytes = utf8_table2[i] | cvalue;
3084 return i + 1;
3085 }
3086 
3087 
3088 
3089 #ifdef SUPPORT_PCRE2_16
3090 /*************************************************
3091 *           Convert string to 16-bit             *
3092 *************************************************/
3093 
3094 /* In UTF mode the input is always interpreted as a string of UTF-8 bytes using
3095 the original UTF-8 definition of RFC 2279, which allows for up to 6 bytes, and
3096 code values from 0 to 0x7fffffff. However, values greater than the later UTF
3097 limit of 0x10ffff cause an error. In non-UTF mode the input is interpreted as
3098 UTF-8 if the utf8_input modifier is set, but an error is generated for values
3099 greater than 0xffff.
3100 
3101 If all the input bytes are ASCII, the space needed for a 16-bit string is
3102 exactly double the 8-bit size. Otherwise, the size needed for a 16-bit string
3103 is no more than double, because up to 0xffff uses no more than 3 bytes in UTF-8
3104 but possibly 4 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes
3105 in UTF-16. The result is always left in pbuffer16. Impose a minimum size to
3106 save repeated re-sizing.
3107 
3108 Note that this function does not object to surrogate values. This is
3109 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
3110 for the purpose of testing that they are correctly faulted.
3111 
3112 Arguments:
3113   p          points to a byte string
3114   utf        true in UTF mode
3115   lenptr     points to number of bytes in the string (excluding trailing zero)
3116 
3117 Returns:     0 on success, with the length updated to the number of 16-bit
3118                data items used (excluding the trailing zero)
3119              OR -1 if a UTF-8 string is malformed
3120              OR -2 if a value > 0x10ffff is encountered in UTF mode
3121              OR -3 if a value > 0xffff is encountered when not in UTF mode
3122 */
3123 
3124 static PCRE2_SIZE
to16(uint8_t * p,int utf,PCRE2_SIZE * lenptr)3125 to16(uint8_t *p, int utf, PCRE2_SIZE *lenptr)
3126 {
3127 uint16_t *pp;
3128 PCRE2_SIZE len = *lenptr;
3129 
3130 if (pbuffer16_size < 2*len + 2)
3131   {
3132   if (pbuffer16 != NULL) free(pbuffer16);
3133   pbuffer16_size = 2*len + 2;
3134   if (pbuffer16_size < 4096) pbuffer16_size = 4096;
3135   pbuffer16 = (uint16_t *)malloc(pbuffer16_size);
3136   if (pbuffer16 == NULL)
3137     {
3138     fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer16\n",
3139       SIZ_CAST pbuffer16_size);
3140     exit(1);
3141     }
3142   }
3143 
3144 pp = pbuffer16;
3145 if (!utf && (pat_patctl.control & CTL_UTF8_INPUT) == 0)
3146   {
3147   for (; len > 0; len--) *pp++ = *p++;
3148   }
3149 else while (len > 0)
3150   {
3151   uint32_t c;
3152   int chlen = utf82ord(p, &c);
3153   if (chlen <= 0) return -1;
3154   if (!utf && c > 0xffff) return -3;
3155   if (c > 0x10ffff) return -2;
3156   p += chlen;
3157   len -= chlen;
3158   if (c < 0x10000) *pp++ = c; else
3159     {
3160     c -= 0x10000;
3161     *pp++ = 0xD800 | (c >> 10);
3162     *pp++ = 0xDC00 | (c & 0x3ff);
3163     }
3164   }
3165 
3166 *pp = 0;
3167 *lenptr = pp - pbuffer16;
3168 return 0;
3169 }
3170 #endif
3171 
3172 
3173 
3174 #ifdef SUPPORT_PCRE2_32
3175 /*************************************************
3176 *           Convert string to 32-bit             *
3177 *************************************************/
3178 
3179 /* In UTF mode the input is always interpreted as a string of UTF-8 bytes using
3180 the original UTF-8 definition of RFC 2279, which allows for up to 6 bytes, and
3181 code values from 0 to 0x7fffffff. However, values greater than the later UTF
3182 limit of 0x10ffff cause an error.
3183 
3184 In non-UTF mode the input is interpreted as UTF-8 if the utf8_input modifier
3185 is set, and no limit is imposed. There is special interpretation of the 0xff
3186 byte (which is illegal in UTF-8) in this case: it causes the top bit of the
3187 next character to be set. This provides a way of generating 32-bit characters
3188 greater than 0x7fffffff.
3189 
3190 If all the input bytes are ASCII, the space needed for a 32-bit string is
3191 exactly four times the 8-bit size. Otherwise, the size needed for a 32-bit
3192 string is no more than four times, because the number of characters must be
3193 less than the number of bytes. The result is always left in pbuffer32. Impose a
3194 minimum size to save repeated re-sizing.
3195 
3196 Note that this function does not object to surrogate values. This is
3197 deliberate; it makes it possible to construct UTF-32 strings that are invalid,
3198 for the purpose of testing that they are correctly faulted.
3199 
3200 Arguments:
3201   p          points to a byte string
3202   utf        true in UTF mode
3203   lenptr     points to number of bytes in the string (excluding trailing zero)
3204 
3205 Returns:     0 on success, with the length updated to the number of 32-bit
3206                data items used (excluding the trailing zero)
3207              OR -1 if a UTF-8 string is malformed
3208              OR -2 if a value > 0x10ffff is encountered in UTF mode
3209 */
3210 
3211 static PCRE2_SIZE
to32(uint8_t * p,int utf,PCRE2_SIZE * lenptr)3212 to32(uint8_t *p, int utf, PCRE2_SIZE *lenptr)
3213 {
3214 uint32_t *pp;
3215 PCRE2_SIZE len = *lenptr;
3216 
3217 if (pbuffer32_size < 4*len + 4)
3218   {
3219   if (pbuffer32 != NULL) free(pbuffer32);
3220   pbuffer32_size = 4*len + 4;
3221   if (pbuffer32_size < 8192) pbuffer32_size = 8192;
3222   pbuffer32 = (uint32_t *)malloc(pbuffer32_size);
3223   if (pbuffer32 == NULL)
3224     {
3225     fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer32\n",
3226       SIZ_CAST pbuffer32_size);
3227     exit(1);
3228     }
3229   }
3230 
3231 pp = pbuffer32;
3232 
3233 if (!utf && (pat_patctl.control & CTL_UTF8_INPUT) == 0)
3234   {
3235   for (; len > 0; len--) *pp++ = *p++;
3236   }
3237 
3238 else while (len > 0)
3239   {
3240   int chlen;
3241   uint32_t c;
3242   uint32_t topbit = 0;
3243   if (!utf && *p == 0xff && len > 1)
3244     {
3245     topbit = 0x80000000u;
3246     p++;
3247     len--;
3248     }
3249   chlen = utf82ord(p, &c);
3250   if (chlen <= 0) return -1;
3251   if (utf && c > 0x10ffff) return -2;
3252   p += chlen;
3253   len -= chlen;
3254   *pp++ = c | topbit;
3255   }
3256 
3257 *pp = 0;
3258 *lenptr = pp - pbuffer32;
3259 return 0;
3260 }
3261 #endif /* SUPPORT_PCRE2_32 */
3262 
3263 
3264 
3265 /*************************************************
3266 *         Move back by so many characters        *
3267 *************************************************/
3268 
3269 /* Given a code unit offset in a subject string, move backwards by a number of
3270 characters, and return the resulting offset.
3271 
3272 Arguments:
3273   subject   pointer to the string
3274   offset    start offset
3275   count     count to move back by
3276   utf       TRUE if in UTF mode
3277 
3278 Returns:   a possibly changed offset
3279 */
3280 
3281 static PCRE2_SIZE
backchars(uint8_t * subject,PCRE2_SIZE offset,uint32_t count,BOOL utf)3282 backchars(uint8_t *subject, PCRE2_SIZE offset, uint32_t count, BOOL utf)
3283 {
3284 if (!utf || test_mode == PCRE32_MODE)
3285   return (count >= offset)? 0 : (offset - count);
3286 
3287 else if (test_mode == PCRE8_MODE)
3288   {
3289   PCRE2_SPTR8 pp = (PCRE2_SPTR8)subject + offset;
3290   for (; count > 0 && pp > (PCRE2_SPTR8)subject; count--)
3291     {
3292     pp--;
3293     while ((*pp & 0xc0) == 0x80) pp--;
3294     }
3295   return pp - (PCRE2_SPTR8)subject;
3296   }
3297 
3298 else  /* 16-bit mode */
3299   {
3300   PCRE2_SPTR16 pp = (PCRE2_SPTR16)subject + offset;
3301   for (; count > 0 && pp > (PCRE2_SPTR16)subject; count--)
3302     {
3303     pp--;
3304     if ((*pp & 0xfc00) == 0xdc00) pp--;
3305     }
3306   return pp - (PCRE2_SPTR16)subject;
3307   }
3308 }
3309 
3310 
3311 
3312 /*************************************************
3313 *           Expand input buffers                 *
3314 *************************************************/
3315 
3316 /* This function doubles the size of the input buffer and the buffer for
3317 keeping an 8-bit copy of patterns (pbuffer8), and copies the current buffers to
3318 the new ones.
3319 
3320 Arguments: none
3321 Returns:   nothing (aborts if malloc() fails)
3322 */
3323 
3324 static void
expand_input_buffers(void)3325 expand_input_buffers(void)
3326 {
3327 int new_pbuffer8_size = 2*pbuffer8_size;
3328 uint8_t *new_buffer = (uint8_t *)malloc(new_pbuffer8_size);
3329 uint8_t *new_pbuffer8 = (uint8_t *)malloc(new_pbuffer8_size);
3330 
3331 if (new_buffer == NULL || new_pbuffer8 == NULL)
3332   {
3333   fprintf(stderr, "pcre2test: malloc(%d) failed\n", new_pbuffer8_size);
3334   exit(1);
3335   }
3336 
3337 memcpy(new_buffer, buffer, pbuffer8_size);
3338 memcpy(new_pbuffer8, pbuffer8, pbuffer8_size);
3339 
3340 pbuffer8_size = new_pbuffer8_size;
3341 
3342 free(buffer);
3343 free(pbuffer8);
3344 
3345 buffer = new_buffer;
3346 pbuffer8 = new_pbuffer8;
3347 }
3348 
3349 
3350 
3351 /*************************************************
3352 *        Read or extend an input line            *
3353 *************************************************/
3354 
3355 /* Input lines are read into buffer, but both patterns and data lines can be
3356 continued over multiple input lines. In addition, if the buffer fills up, we
3357 want to automatically expand it so as to be able to handle extremely large
3358 lines that are needed for certain stress tests, although this is less likely
3359 now that there are repetition features for both patterns and data. When the
3360 input buffer is expanded, the other two buffers must also be expanded likewise,
3361 and the contents of pbuffer, which are a copy of the input for callouts, must
3362 be preserved (for when expansion happens for a data line). This is not the most
3363 optimal way of handling this, but hey, this is just a test program!
3364 
3365 Arguments:
3366   f            the file to read
3367   start        where in buffer to start (this *must* be within buffer)
3368   prompt       for stdin or readline()
3369 
3370 Returns:       pointer to the start of new data
3371                could be a copy of start, or could be moved
3372                NULL if no data read and EOF reached
3373 */
3374 
3375 static uint8_t *
extend_inputline(FILE * f,uint8_t * start,const char * prompt)3376 extend_inputline(FILE *f, uint8_t *start, const char *prompt)
3377 {
3378 uint8_t *here = start;
3379 
3380 for (;;)
3381   {
3382   size_t rlen = (size_t)(pbuffer8_size - (here - buffer));
3383 
3384   if (rlen > 1000)
3385     {
3386     size_t dlen;
3387 
3388     /* If libreadline or libedit support is required, use readline() to read a
3389     line if the input is a terminal. Note that readline() removes the trailing
3390     newline, so we must put it back again, to be compatible with fgets(). */
3391 
3392 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
3393     if (INTERACTIVE(f))
3394       {
3395       size_t len;
3396       char *s = readline(prompt);
3397       if (s == NULL) return (here == start)? NULL : start;
3398       len = strlen(s);
3399       if (len > 0) add_history(s);
3400       if (len > rlen - 1) len = rlen - 1;
3401       memcpy(here, s, len);
3402       here[len] = '\n';
3403       here[len+1] = 0;
3404       free(s);
3405       }
3406     else
3407 #endif
3408 
3409     /* Read the next line by normal means, prompting if the file is a tty. */
3410 
3411       {
3412       if (INTERACTIVE(f)) printf("%s", prompt);
3413       if (fgets((char *)here, rlen,  f) == NULL)
3414         return (here == start)? NULL : start;
3415       }
3416 
3417     dlen = strlen((char *)here);
3418     here += dlen;
3419 
3420     /* Check for end of line reached. Take care not to read data from before
3421     start (dlen will be zero for a file starting with a binary zero). */
3422 
3423     if (here > start && here[-1] == '\n') return start;
3424 
3425     /* If we have not read a newline when reading a file, we have either filled
3426     the buffer or reached the end of the file. We can detect the former by
3427     checking that the string fills the buffer, and the latter by feof(). If
3428     neither of these is true, it means we read a binary zero which has caused
3429     strlen() to give a short length. This is a hard error because pcre2test
3430     expects to work with C strings. */
3431 
3432     if (!INTERACTIVE(f) && dlen < rlen - 1 && !feof(f))
3433       {
3434       fprintf(outfile, "** Binary zero encountered in input\n");
3435       fprintf(outfile, "** pcre2test run abandoned\n");
3436       exit(1);
3437       }
3438     }
3439 
3440   else
3441     {
3442     size_t start_offset = start - buffer;
3443     size_t here_offset = here - buffer;
3444     expand_input_buffers();
3445     start = buffer + start_offset;
3446     here = buffer + here_offset;
3447     }
3448   }
3449 
3450 /* Control never gets here */
3451 }
3452 
3453 
3454 
3455 /*************************************************
3456 *         Case-independent strncmp() function    *
3457 *************************************************/
3458 
3459 /*
3460 Arguments:
3461   s         first string
3462   t         second string
3463   n         number of characters to compare
3464 
3465 Returns:    < 0, = 0, or > 0, according to the comparison
3466 */
3467 
3468 static int
strncmpic(const uint8_t * s,const uint8_t * t,int n)3469 strncmpic(const uint8_t *s, const uint8_t *t, int n)
3470 {
3471 while (n--)
3472   {
3473   int c = tolower(*s++) - tolower(*t++);
3474   if (c != 0) return c;
3475   }
3476 return 0;
3477 }
3478 
3479 
3480 
3481 /*************************************************
3482 *          Scan the main modifier list           *
3483 *************************************************/
3484 
3485 /* This function searches the modifier list for a long modifier name.
3486 
3487 Argument:
3488   p         start of the name
3489   lenp      length of the name
3490 
3491 Returns:    an index in the modifier list, or -1 on failure
3492 */
3493 
3494 static int
scan_modifiers(const uint8_t * p,unsigned int len)3495 scan_modifiers(const uint8_t *p, unsigned int len)
3496 {
3497 int bot = 0;
3498 int top = MODLISTCOUNT;
3499 
3500 while (top > bot)
3501   {
3502   int mid = (bot + top)/2;
3503   unsigned int mlen = strlen(modlist[mid].name);
3504   int c = strncmp((char *)p, modlist[mid].name, (len < mlen)? len : mlen);
3505   if (c == 0)
3506     {
3507     if (len == mlen) return mid;
3508     c = (int)len - (int)mlen;
3509     }
3510   if (c > 0) bot = mid + 1; else top = mid;
3511   }
3512 
3513 return -1;
3514 
3515 }
3516 
3517 
3518 
3519 /*************************************************
3520 *        Check a modifer and find its field      *
3521 *************************************************/
3522 
3523 /* This function is called when a modifier has been identified. We check that
3524 it is allowed here and find the field that is to be changed.
3525 
3526 Arguments:
3527   m          the modifier list entry
3528   ctx        CTX_PAT     => pattern context
3529              CTX_POPPAT  => pattern context for popped pattern
3530              CTX_DEFPAT  => default pattern context
3531              CTX_DAT     => data context
3532              CTX_DEFDAT  => default data context
3533   pctl       point to pattern control block
3534   dctl       point to data control block
3535   c          a single character or 0
3536 
3537 Returns:     a field pointer or NULL
3538 */
3539 
3540 static void *
check_modifier(modstruct * m,int ctx,patctl * pctl,datctl * dctl,uint32_t c)3541 check_modifier(modstruct *m, int ctx, patctl *pctl, datctl *dctl, uint32_t c)
3542 {
3543 void *field = NULL;
3544 PCRE2_SIZE offset = m->offset;
3545 
3546 if (restrict_for_perl_test) switch(m->which)
3547   {
3548   case MOD_PNDP:
3549   case MOD_PATP:
3550   case MOD_PDP:
3551   break;
3552 
3553   default:
3554   fprintf(outfile, "** '%s' is not allowed in a Perl-compatible test\n",
3555     m->name);
3556   return NULL;
3557   }
3558 
3559 switch (m->which)
3560   {
3561   case MOD_CTC:  /* Compile context modifier */
3562   if (ctx == CTX_DEFPAT) field = PTR(default_pat_context);
3563     else if (ctx == CTX_PAT) field = PTR(pat_context);
3564   break;
3565 
3566   case MOD_CTM:  /* Match context modifier */
3567   if (ctx == CTX_DEFDAT) field = PTR(default_dat_context);
3568     else if (ctx == CTX_DAT) field = PTR(dat_context);
3569   break;
3570 
3571   case MOD_DAT:  /* Data line modifier */
3572   if (dctl != NULL) field = dctl;
3573   break;
3574 
3575   case MOD_PAT:    /* Pattern modifier */
3576   case MOD_PATP:   /* Allowed for Perl test */
3577   if (pctl != NULL) field = pctl;
3578   break;
3579 
3580   case MOD_PD:   /* Pattern or data line modifier */
3581   case MOD_PDP:  /* Ditto, allowed for Perl test */
3582   case MOD_PND:  /* Ditto, but not default pattern */
3583   case MOD_PNDP: /* Ditto, allowed for Perl test */
3584   if (dctl != NULL) field = dctl;
3585     else if (pctl != NULL && (m->which == MOD_PD || m->which == MOD_PDP ||
3586              ctx != CTX_DEFPAT))
3587       field = pctl;
3588   break;
3589   }
3590 
3591 if (field == NULL)
3592   {
3593   if (c == 0)
3594     fprintf(outfile, "** '%s' is not valid here\n", m->name);
3595   else
3596     fprintf(outfile, "** /%c is not valid here\n", c);
3597   return NULL;
3598   }
3599 
3600 return (char *)field + offset;
3601 }
3602 
3603 
3604 
3605 /*************************************************
3606 *            Decode a modifier list              *
3607 *************************************************/
3608 
3609 /* A pointer to a control block is NULL when called in cases when that block is
3610 not relevant. They are never all relevant in one call. At least one of patctl
3611 and datctl is NULL. The second argument specifies which context to use for
3612 modifiers that apply to contexts.
3613 
3614 Arguments:
3615   p          point to modifier string
3616   ctx        CTX_PAT     => pattern context
3617              CTX_POPPAT  => pattern context for popped pattern
3618              CTX_DEFPAT  => default pattern context
3619              CTX_DAT     => data context
3620              CTX_DEFDAT  => default data context
3621   pctl       point to pattern control block
3622   dctl       point to data control block
3623 
3624 Returns: TRUE if successful decode, FALSE otherwise
3625 */
3626 
3627 static BOOL
decode_modifiers(uint8_t * p,int ctx,patctl * pctl,datctl * dctl)3628 decode_modifiers(uint8_t *p, int ctx, patctl *pctl, datctl *dctl)
3629 {
3630 uint8_t *ep, *pp;
3631 long li;
3632 unsigned long uli;
3633 BOOL first = TRUE;
3634 
3635 for (;;)
3636   {
3637   void *field;
3638   modstruct *m;
3639   BOOL off = FALSE;
3640   unsigned int i, len;
3641   int index;
3642   char *endptr;
3643 
3644   /* Skip white space and commas. */
3645 
3646   while (isspace(*p) || *p == ',') p++;
3647   if (*p == 0) break;
3648 
3649   /* Find the end of the item; lose trailing whitespace at end of line. */
3650 
3651   for (ep = p; *ep != 0 && *ep != ','; ep++);
3652   if (*ep == 0)
3653     {
3654     while (ep > p && isspace(ep[-1])) ep--;
3655     *ep = 0;
3656     }
3657 
3658   /* Remember if the first character is '-'. */
3659 
3660   if (*p == '-')
3661     {
3662     off = TRUE;
3663     p++;
3664     }
3665 
3666   /* Find the length of a full-length modifier name, and scan for it. */
3667 
3668   pp = p;
3669   while (pp < ep && *pp != '=') pp++;
3670   index = scan_modifiers(p, pp - p);
3671 
3672   /* If the first modifier is unrecognized, try to interpret it as a sequence
3673   of single-character abbreviated modifiers. None of these modifiers have any
3674   associated data. They just set options or control bits. */
3675 
3676   if (index < 0)
3677     {
3678     uint32_t cc;
3679     uint8_t *mp = p;
3680 
3681     if (!first)
3682       {
3683       fprintf(outfile, "** Unrecognized modifier '%.*s'\n", (int)(ep-p), p);
3684       if (ep - p == 1)
3685         fprintf(outfile, "** Single-character modifiers must come first\n");
3686       return FALSE;
3687       }
3688 
3689     for (cc = *p; cc != ',' && cc != '\n' && cc != 0; cc = *(++p))
3690       {
3691       for (i = 0; i < C1MODLISTCOUNT; i++)
3692         if (cc == c1modlist[i].onechar) break;
3693 
3694       if (i >= C1MODLISTCOUNT)
3695         {
3696         fprintf(outfile, "** Unrecognized modifier '%c' in '%.*s'\n",
3697           *p, (int)(ep-mp), mp);
3698         return FALSE;
3699         }
3700 
3701       if (c1modlist[i].index >= 0)
3702         {
3703         index = c1modlist[i].index;
3704         }
3705 
3706       else
3707         {
3708         index = scan_modifiers((uint8_t *)(c1modlist[i].fullname),
3709           strlen(c1modlist[i].fullname));
3710         if (index < 0)
3711           {
3712           fprintf(outfile, "** Internal error: single-character equivalent "
3713             "modifier '%s' not found\n", c1modlist[i].fullname);
3714           return FALSE;
3715           }
3716         c1modlist[i].index = index;     /* Cache for next time */
3717         }
3718 
3719       field = check_modifier(modlist + index, ctx, pctl, dctl, *p);
3720       if (field == NULL) return FALSE;
3721 
3722       /* /x is a special case; a second appearance changes PCRE2_EXTENDED to
3723       PCRE2_EXTENDED_MORE. */
3724 
3725       if (cc == 'x' && (*((uint32_t *)field) & PCRE2_EXTENDED) != 0)
3726         {
3727         *((uint32_t *)field) &= ~PCRE2_EXTENDED;
3728         *((uint32_t *)field) |= PCRE2_EXTENDED_MORE;
3729         }
3730       else
3731         *((uint32_t *)field) |= modlist[index].value;
3732       }
3733 
3734     continue;    /* With tne next (fullname) modifier */
3735     }
3736 
3737   /* We have a match on a full-name modifier. Check for the existence of data
3738   when needed. */
3739 
3740   m = modlist + index;      /* Save typing */
3741   if (m->type != MOD_CTL && m->type != MOD_OPT &&
3742       (m->type != MOD_IND || *pp == '='))
3743     {
3744     if (*pp++ != '=')
3745       {
3746       fprintf(outfile, "** '=' expected after '%s'\n", m->name);
3747       return FALSE;
3748       }
3749     if (off)
3750       {
3751       fprintf(outfile, "** '-' is not valid for '%s'\n", m->name);
3752       return FALSE;
3753       }
3754     }
3755 
3756   /* These on/off types have no data. */
3757 
3758   else if (*pp != ',' && *pp != '\n' && *pp != ' ' && *pp != 0)
3759     {
3760     fprintf(outfile, "** Unrecognized modifier '%.*s'\n", (int)(ep-p), p);
3761     return FALSE;
3762     }
3763 
3764   /* Set the data length for those types that have data. Then find the field
3765   that is to be set. If check_modifier() returns NULL, it has already output an
3766   error message. */
3767 
3768   len = ep - pp;
3769   field = check_modifier(m, ctx, pctl, dctl, 0);
3770   if (field == NULL) return FALSE;
3771 
3772   /* Process according to data type. */
3773 
3774   switch (m->type)
3775     {
3776     case MOD_CTL:
3777     case MOD_OPT:
3778     if (off) *((uint32_t *)field) &= ~m->value;
3779       else *((uint32_t *)field) |= m->value;
3780     break;
3781 
3782     case MOD_BSR:
3783     if (len == 7 && strncmpic(pp, (const uint8_t *)"default", 7) == 0)
3784       {
3785 #ifdef BSR_ANYCRLF
3786       *((uint16_t *)field) = PCRE2_BSR_ANYCRLF;
3787 #else
3788       *((uint16_t *)field) = PCRE2_BSR_UNICODE;
3789 #endif
3790       if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 &= ~CTL2_BSR_SET;
3791         else dctl->control2 &= ~CTL2_BSR_SET;
3792       }
3793     else
3794       {
3795       if (len == 7 && strncmpic(pp, (const uint8_t *)"anycrlf", 7) == 0)
3796         *((uint16_t *)field) = PCRE2_BSR_ANYCRLF;
3797       else if (len == 7 && strncmpic(pp, (const uint8_t *)"unicode", 7) == 0)
3798         *((uint16_t *)field) = PCRE2_BSR_UNICODE;
3799       else goto INVALID_VALUE;
3800       if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 |= CTL2_BSR_SET;
3801         else dctl->control2 |= CTL2_BSR_SET;
3802       }
3803     pp = ep;
3804     break;
3805 
3806     case MOD_CHR:  /* A single character */
3807     *((uint32_t *)field) = *pp++;
3808     break;
3809 
3810     case MOD_CON:  /* A convert type/options list */
3811     for (;; pp++)
3812       {
3813       uint8_t *colon = (uint8_t *)strchr((const char *)pp, ':');
3814       len = ((colon != NULL && colon < ep)? colon:ep) - pp;
3815       for (i = 0; i < convertlistcount; i++)
3816         {
3817         if (strncmpic(pp, (const uint8_t *)convertlist[i].name, len) == 0)
3818           {
3819           if (*((uint32_t *)field) == CONVERT_UNSET)
3820             *((uint32_t *)field) = convertlist[i].option;
3821           else
3822             *((uint32_t *)field) |= convertlist[i].option;
3823           break;
3824           }
3825         }
3826       if (i >= convertlistcount) goto INVALID_VALUE;
3827       pp += len;
3828       if (*pp != ':') break;
3829       }
3830     break;
3831 
3832     case MOD_IN2:    /* One or two unsigned integers */
3833     if (!isdigit(*pp)) goto INVALID_VALUE;
3834     uli = strtoul((const char *)pp, &endptr, 10);
3835     if (U32OVERFLOW(uli)) goto INVALID_VALUE;
3836     ((uint32_t *)field)[0] = (uint32_t)uli;
3837     if (*endptr == ':')
3838       {
3839       uli = strtoul((const char *)endptr+1, &endptr, 10);
3840       if (U32OVERFLOW(uli)) goto INVALID_VALUE;
3841       ((uint32_t *)field)[1] = (uint32_t)uli;
3842       }
3843     else ((uint32_t *)field)[1] = 0;
3844     pp = (uint8_t *)endptr;
3845     break;
3846 
3847     /* PCRE2_SIZE_MAX is usually SIZE_MAX, which may be greater, equal to, or
3848     less than ULONG_MAX. So first test for overflowing the long int, and then
3849     test for overflowing PCRE2_SIZE_MAX if it is smaller than ULONG_MAX. */
3850 
3851     case MOD_SIZ:    /* PCRE2_SIZE value */
3852     if (!isdigit(*pp)) goto INVALID_VALUE;
3853     uli = strtoul((const char *)pp, &endptr, 10);
3854     if (uli == ULONG_MAX) goto INVALID_VALUE;
3855 #if ULONG_MAX > PCRE2_SIZE_MAX
3856     if (uli > PCRE2_SIZE_MAX) goto INVALID_VALUE;
3857 #endif
3858     *((PCRE2_SIZE *)field) = (PCRE2_SIZE)uli;
3859     pp = (uint8_t *)endptr;
3860     break;
3861 
3862     case MOD_IND:    /* Unsigned integer with default */
3863     if (len == 0)
3864       {
3865       *((uint32_t *)field) = (uint32_t)(m->value);
3866       break;
3867       }
3868     /* Fall through */
3869 
3870     case MOD_INT:    /* Unsigned integer */
3871     if (!isdigit(*pp)) goto INVALID_VALUE;
3872     uli = strtoul((const char *)pp, &endptr, 10);
3873     if (U32OVERFLOW(uli)) goto INVALID_VALUE;
3874     *((uint32_t *)field) = (uint32_t)uli;
3875     pp = (uint8_t *)endptr;
3876     break;
3877 
3878     case MOD_INS:   /* Signed integer */
3879     if (!isdigit(*pp) && *pp != '-') goto INVALID_VALUE;
3880     li = strtol((const char *)pp, &endptr, 10);
3881     if (S32OVERFLOW(li)) goto INVALID_VALUE;
3882     *((int32_t *)field) = (int32_t)li;
3883     pp = (uint8_t *)endptr;
3884     break;
3885 
3886     case MOD_NL:
3887     for (i = 0; i < sizeof(newlines)/sizeof(char *); i++)
3888       if (len == strlen(newlines[i]) &&
3889         strncmpic(pp, (const uint8_t *)newlines[i], len) == 0) break;
3890     if (i >= sizeof(newlines)/sizeof(char *)) goto INVALID_VALUE;
3891     if (i == 0)
3892       {
3893       *((uint16_t *)field) = NEWLINE_DEFAULT;
3894       if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 &= ~CTL2_NL_SET;
3895         else dctl->control2 &= ~CTL2_NL_SET;
3896       }
3897     else
3898       {
3899       *((uint16_t *)field) = i;
3900       if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 |= CTL2_NL_SET;
3901         else dctl->control2 |= CTL2_NL_SET;
3902       }
3903     pp = ep;
3904     break;
3905 
3906     case MOD_NN:              /* Name or (signed) number; may be several */
3907     if (isdigit(*pp) || *pp == '-')
3908       {
3909       int ct = MAXCPYGET - 1;
3910       int32_t value;
3911       li = strtol((const char *)pp, &endptr, 10);
3912       if (S32OVERFLOW(li)) goto INVALID_VALUE;
3913       value = (int32_t)li;
3914       field = (char *)field - m->offset + m->value;      /* Adjust field ptr */
3915       if (value >= 0)                                    /* Add new number */
3916         {
3917         while (*((int32_t *)field) >= 0 && ct-- > 0)   /* Skip previous */
3918           field = (char *)field + sizeof(int32_t);
3919         if (ct <= 0)
3920           {
3921           fprintf(outfile, "** Too many numeric '%s' modifiers\n", m->name);
3922           return FALSE;
3923           }
3924         }
3925       *((int32_t *)field) = value;
3926       if (ct > 0) ((int32_t *)field)[1] = -1;
3927       pp = (uint8_t *)endptr;
3928       }
3929 
3930     /* Multiple strings are put end to end. */
3931 
3932     else
3933       {
3934       char *nn = (char *)field;
3935       if (len > 0)                    /* Add new name */
3936         {
3937         if (len > MAX_NAME_SIZE)
3938           {
3939           fprintf(outfile, "** Group name in '%s' is too long\n", m->name);
3940           return FALSE;
3941           }
3942         while (*nn != 0) nn += strlen(nn) + 1;
3943         if (nn + len + 2 - (char *)field > LENCPYGET)
3944           {
3945           fprintf(outfile, "** Too many characters in named '%s' modifiers\n",
3946             m->name);
3947           return FALSE;
3948           }
3949         memcpy(nn, pp, len);
3950         }
3951       nn[len] = 0 ;
3952       nn[len+1] = 0;
3953       pp = ep;
3954       }
3955     break;
3956 
3957     case MOD_STR:
3958     if (len + 1 > m->value)
3959       {
3960       fprintf(outfile, "** Overlong value for '%s' (max %d code units)\n",
3961         m->name, m->value - 1);
3962       return FALSE;
3963       }
3964     memcpy(field, pp, len);
3965     ((uint8_t *)field)[len] = 0;
3966     pp = ep;
3967     break;
3968     }
3969 
3970   if (*pp != ',' && *pp != '\n' && *pp != ' ' && *pp != 0)
3971     {
3972     fprintf(outfile, "** Comma expected after modifier item '%s'\n", m->name);
3973     return FALSE;
3974     }
3975 
3976   p = pp;
3977   first = FALSE;
3978 
3979   if (ctx == CTX_POPPAT &&
3980      (pctl->options != 0 ||
3981       pctl->tables_id != 0 ||
3982       pctl->locale[0] != 0 ||
3983       (pctl->control & NOTPOP_CONTROLS) != 0))
3984     {
3985     fprintf(outfile, "** '%s' is not valid here\n", m->name);
3986     return FALSE;
3987     }
3988   }
3989 
3990 return TRUE;
3991 
3992 INVALID_VALUE:
3993 fprintf(outfile, "** Invalid value in '%.*s'\n", (int)(ep-p), p);
3994 return FALSE;
3995 }
3996 
3997 
3998 /*************************************************
3999 *             Get info from a pattern            *
4000 *************************************************/
4001 
4002 /* A wrapped call to pcre2_pattern_info(), applied to the current compiled
4003 pattern.
4004 
4005 Arguments:
4006   what        code for the required information
4007   where       where to put the answer
4008   unsetok     PCRE2_ERROR_UNSET is an "expected" result
4009 
4010 Returns:      the return from pcre2_pattern_info()
4011 */
4012 
4013 static int
pattern_info(int what,void * where,BOOL unsetok)4014 pattern_info(int what, void *where, BOOL unsetok)
4015 {
4016 int rc;
4017 PCRE2_PATTERN_INFO(rc, compiled_code, what, NULL);  /* Exercise the code */
4018 PCRE2_PATTERN_INFO(rc, compiled_code, what, where);
4019 if (rc >= 0) return 0;
4020 if (rc != PCRE2_ERROR_UNSET || !unsetok)
4021   {
4022   fprintf(outfile, "Error %d from pcre2_pattern_info_%d(%d)\n", rc, test_mode,
4023     what);
4024   if (rc == PCRE2_ERROR_BADMODE)
4025     fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
4026       "%d-bit mode\n", test_mode,
4027       8 * (FLD(compiled_code, flags) & PCRE2_MODE_MASK));
4028   }
4029 return rc;
4030 }
4031 
4032 
4033 
4034 #ifdef SUPPORT_PCRE2_8
4035 /*************************************************
4036 *             Show something in a list           *
4037 *************************************************/
4038 
4039 /* This function just helps to keep the code that uses it tidier. It's used for
4040 various lists of things where there needs to be introductory text before the
4041 first item. As these calls are all in the POSIX-support code, they happen only
4042 when 8-bit mode is supported. */
4043 
4044 static void
prmsg(const char ** msg,const char * s)4045 prmsg(const char **msg, const char *s)
4046 {
4047 fprintf(outfile, "%s %s", *msg, s);
4048 *msg = "";
4049 }
4050 #endif  /* SUPPORT_PCRE2_8 */
4051 
4052 
4053 
4054 /*************************************************
4055 *                Show control bits               *
4056 *************************************************/
4057 
4058 /* Called for mutually exclusive controls and for unsupported POSIX controls.
4059 Because the bits are unique, this can be used for both pattern and data control
4060 words.
4061 
4062 Arguments:
4063   controls    control bits
4064   controls2   more control bits
4065   before      text to print before
4066 
4067 Returns:      nothing
4068 */
4069 
4070 static void
show_controls(uint32_t controls,uint32_t controls2,const char * before)4071 show_controls(uint32_t controls, uint32_t controls2, const char *before)
4072 {
4073 fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
4074   before,
4075   ((controls & CTL_AFTERTEXT) != 0)? " aftertext" : "",
4076   ((controls & CTL_ALLAFTERTEXT) != 0)? " allaftertext" : "",
4077   ((controls & CTL_ALLCAPTURES) != 0)? " allcaptures" : "",
4078   ((controls & CTL_ALLUSEDTEXT) != 0)? " allusedtext" : "",
4079   ((controls2 & CTL2_ALLVECTOR) != 0)? " allvector" : "",
4080   ((controls & CTL_ALTGLOBAL) != 0)? " altglobal" : "",
4081   ((controls & CTL_BINCODE) != 0)? " bincode" : "",
4082   ((controls2 & CTL2_BSR_SET) != 0)? " bsr" : "",
4083   ((controls & CTL_CALLOUT_CAPTURE) != 0)? " callout_capture" : "",
4084   ((controls2 & CTL2_CALLOUT_EXTRA) != 0)? " callout_extra" : "",
4085   ((controls & CTL_CALLOUT_INFO) != 0)? " callout_info" : "",
4086   ((controls & CTL_CALLOUT_NONE) != 0)? " callout_none" : "",
4087   ((controls2 & CTL2_CALLOUT_NO_WHERE) != 0)? " callout_no_where" : "",
4088   ((controls & CTL_DFA) != 0)? " dfa" : "",
4089   ((controls & CTL_EXPAND) != 0)? " expand" : "",
4090   ((controls & CTL_FINDLIMITS) != 0)? " find_limits" : "",
4091   ((controls & CTL_FRAMESIZE) != 0)? " framesize" : "",
4092   ((controls & CTL_FULLBINCODE) != 0)? " fullbincode" : "",
4093   ((controls & CTL_GETALL) != 0)? " getall" : "",
4094   ((controls & CTL_GLOBAL) != 0)? " global" : "",
4095   ((controls & CTL_HEXPAT) != 0)? " hex" : "",
4096   ((controls & CTL_INFO) != 0)? " info" : "",
4097   ((controls & CTL_JITFAST) != 0)? " jitfast" : "",
4098   ((controls & CTL_JITVERIFY) != 0)? " jitverify" : "",
4099   ((controls & CTL_MARK) != 0)? " mark" : "",
4100   ((controls & CTL_MEMORY) != 0)? " memory" : "",
4101   ((controls2 & CTL2_NL_SET) != 0)? " newline" : "",
4102   ((controls & CTL_NULLCONTEXT) != 0)? " null_context" : "",
4103   ((controls & CTL_POSIX) != 0)? " posix" : "",
4104   ((controls & CTL_POSIX_NOSUB) != 0)? " posix_nosub" : "",
4105   ((controls & CTL_PUSH) != 0)? " push" : "",
4106   ((controls & CTL_PUSHCOPY) != 0)? " pushcopy" : "",
4107   ((controls & CTL_PUSHTABLESCOPY) != 0)? " pushtablescopy" : "",
4108   ((controls & CTL_STARTCHAR) != 0)? " startchar" : "",
4109   ((controls2 & CTL2_SUBSTITUTE_CALLOUT) != 0)? " substitute_callout" : "",
4110   ((controls2 & CTL2_SUBSTITUTE_EXTENDED) != 0)? " substitute_extended" : "",
4111   ((controls2 & CTL2_SUBSTITUTE_OVERFLOW_LENGTH) != 0)? " substitute_overflow_length" : "",
4112   ((controls2 & CTL2_SUBSTITUTE_UNKNOWN_UNSET) != 0)? " substitute_unknown_unset" : "",
4113   ((controls2 & CTL2_SUBSTITUTE_UNSET_EMPTY) != 0)? " substitute_unset_empty" : "",
4114   ((controls & CTL_USE_LENGTH) != 0)? " use_length" : "",
4115   ((controls & CTL_UTF8_INPUT) != 0)? " utf8_input" : "",
4116   ((controls & CTL_ZERO_TERMINATE) != 0)? " zero_terminate" : "");
4117 }
4118 
4119 
4120 
4121 /*************************************************
4122 *                Show compile options            *
4123 *************************************************/
4124 
4125 /* Called from show_pattern_info() and for unsupported POSIX options.
4126 
4127 Arguments:
4128   options     an options word
4129   before      text to print before
4130   after       text to print after
4131 
4132 Returns:      nothing
4133 */
4134 
4135 static void
show_compile_options(uint32_t options,const char * before,const char * after)4136 show_compile_options(uint32_t options, const char *before, const char *after)
4137 {
4138 if (options == 0) fprintf(outfile, "%s <none>%s", before, after);
4139 else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
4140   before,
4141   ((options & PCRE2_ALT_BSUX) != 0)? " alt_bsux" : "",
4142   ((options & PCRE2_ALT_CIRCUMFLEX) != 0)? " alt_circumflex" : "",
4143   ((options & PCRE2_ALT_VERBNAMES) != 0)? " alt_verbnames" : "",
4144   ((options & PCRE2_ALLOW_EMPTY_CLASS) != 0)? " allow_empty_class" : "",
4145   ((options & PCRE2_ANCHORED) != 0)? " anchored" : "",
4146   ((options & PCRE2_AUTO_CALLOUT) != 0)? " auto_callout" : "",
4147   ((options & PCRE2_CASELESS) != 0)? " caseless" : "",
4148   ((options & PCRE2_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
4149   ((options & PCRE2_DOTALL) != 0)? " dotall" : "",
4150   ((options & PCRE2_DUPNAMES) != 0)? " dupnames" : "",
4151   ((options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "",
4152   ((options & PCRE2_EXTENDED) != 0)? " extended" : "",
4153   ((options & PCRE2_EXTENDED_MORE) != 0)? " extended_more" : "",
4154   ((options & PCRE2_FIRSTLINE) != 0)? " firstline" : "",
4155   ((options & PCRE2_LITERAL) != 0)? " literal" : "",
4156   ((options & PCRE2_MATCH_UNSET_BACKREF) != 0)? " match_unset_backref" : "",
4157   ((options & PCRE2_MULTILINE) != 0)? " multiline" : "",
4158   ((options & PCRE2_NEVER_BACKSLASH_C) != 0)? " never_backslash_c" : "",
4159   ((options & PCRE2_NEVER_UCP) != 0)? " never_ucp" : "",
4160   ((options & PCRE2_NEVER_UTF) != 0)? " never_utf" : "",
4161   ((options & PCRE2_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
4162   ((options & PCRE2_NO_AUTO_POSSESS) != 0)? " no_auto_possess" : "",
4163   ((options & PCRE2_NO_DOTSTAR_ANCHOR) != 0)? " no_dotstar_anchor" : "",
4164   ((options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
4165   ((options & PCRE2_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
4166   ((options & PCRE2_UCP) != 0)? " ucp" : "",
4167   ((options & PCRE2_UNGREEDY) != 0)? " ungreedy" : "",
4168   ((options & PCRE2_USE_OFFSET_LIMIT) != 0)? " use_offset_limit" : "",
4169   ((options & PCRE2_UTF) != 0)? " utf" : "",
4170   after);
4171 }
4172 
4173 
4174 /*************************************************
4175 *           Show compile extra options           *
4176 *************************************************/
4177 
4178 /* Called from show_pattern_info() and for unsupported POSIX options.
4179 
4180 Arguments:
4181   options     an options word
4182   before      text to print before
4183   after       text to print after
4184 
4185 Returns:      nothing
4186 */
4187 
4188 static void
show_compile_extra_options(uint32_t options,const char * before,const char * after)4189 show_compile_extra_options(uint32_t options, const char *before,
4190   const char *after)
4191 {
4192 if (options == 0) fprintf(outfile, "%s <none>%s", before, after);
4193 else fprintf(outfile, "%s%s%s%s%s%s%s%s",
4194   before,
4195   ((options & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) != 0)? " allow_surrogate_escapes" : "",
4196   ((options & PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL) != 0)? " bad_escape_is_literal" : "",
4197   ((options & PCRE2_EXTRA_ALT_BSUX) != 0)? " extra_alt_bsux" : "",
4198   ((options & PCRE2_EXTRA_MATCH_WORD) != 0)? " match_word" : "",
4199   ((options & PCRE2_EXTRA_MATCH_LINE) != 0)? " match_line" : "",
4200   ((options & PCRE2_EXTRA_ESCAPED_CR_IS_LF) != 0)? " escaped_cr_is_lf" : "",
4201   after);
4202 }
4203 
4204 
4205 
4206 #ifdef SUPPORT_PCRE2_8
4207 /*************************************************
4208 *                Show match options              *
4209 *************************************************/
4210 
4211 /* Called for unsupported POSIX options. */
4212 
4213 static void
show_match_options(uint32_t options)4214 show_match_options(uint32_t options)
4215 {
4216 fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s",
4217   ((options & PCRE2_ANCHORED) != 0)? " anchored" : "",
4218   ((options & PCRE2_COPY_MATCHED_SUBJECT) != 0)? " copy_matched_subject" : "",
4219   ((options & PCRE2_DFA_RESTART) != 0)? " dfa_restart" : "",
4220   ((options & PCRE2_DFA_SHORTEST) != 0)? " dfa_shortest" : "",
4221   ((options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "",
4222   ((options & PCRE2_NO_JIT) != 0)? " no_jit" : "",
4223   ((options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
4224   ((options & PCRE2_NOTBOL) != 0)? " notbol" : "",
4225   ((options & PCRE2_NOTEMPTY) != 0)? " notempty" : "",
4226   ((options & PCRE2_NOTEMPTY_ATSTART) != 0)? " notempty_atstart" : "",
4227   ((options & PCRE2_NOTEOL) != 0)? " noteol" : "",
4228   ((options & PCRE2_PARTIAL_HARD) != 0)? " partial_hard" : "",
4229   ((options & PCRE2_PARTIAL_SOFT) != 0)? " partial_soft" : "");
4230 }
4231 #endif  /* SUPPORT_PCRE2_8 */
4232 
4233 
4234 
4235 /*************************************************
4236 *      Show memory usage info for a pattern      *
4237 *************************************************/
4238 
4239 static void
show_memory_info(void)4240 show_memory_info(void)
4241 {
4242 uint32_t name_count, name_entry_size;
4243 size_t size, cblock_size;
4244 
4245 /* One of the test_mode values will always be true, but to stop a compiler
4246 warning we must initialize cblock_size. */
4247 
4248 cblock_size = 0;
4249 #ifdef SUPPORT_PCRE2_8
4250 if (test_mode == PCRE8_MODE) cblock_size = sizeof(pcre2_real_code_8);
4251 #endif
4252 #ifdef SUPPORT_PCRE2_16
4253 if (test_mode == PCRE16_MODE) cblock_size = sizeof(pcre2_real_code_16);
4254 #endif
4255 #ifdef SUPPORT_PCRE2_32
4256 if (test_mode == PCRE32_MODE) cblock_size = sizeof(pcre2_real_code_32);
4257 #endif
4258 
4259 (void)pattern_info(PCRE2_INFO_SIZE, &size, FALSE);
4260 (void)pattern_info(PCRE2_INFO_NAMECOUNT, &name_count, FALSE);
4261 (void)pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &name_entry_size, FALSE);
4262 fprintf(outfile, "Memory allocation (code space): %d\n",
4263   (int)(size - name_count*name_entry_size*code_unit_size - cblock_size));
4264 if (pat_patctl.jit != 0)
4265   {
4266   (void)pattern_info(PCRE2_INFO_JITSIZE, &size, FALSE);
4267   fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)size);
4268   }
4269 }
4270 
4271 
4272 
4273 /*************************************************
4274 *       Show frame size info for a pattern       *
4275 *************************************************/
4276 
4277 static void
show_framesize(void)4278 show_framesize(void)
4279 {
4280 size_t frame_size;
4281 (void)pattern_info(PCRE2_INFO_FRAMESIZE, &frame_size, FALSE);
4282 fprintf(outfile, "Frame size for pcre2_match(): %d\n", (int)frame_size);
4283 }
4284 
4285 
4286 
4287 /*************************************************
4288 *         Get and output an error message        *
4289 *************************************************/
4290 
4291 static BOOL
print_error_message(int errorcode,const char * before,const char * after)4292 print_error_message(int errorcode, const char *before, const char *after)
4293 {
4294 int len;
4295 PCRE2_GET_ERROR_MESSAGE(len, errorcode, pbuffer);
4296 if (len < 0)
4297   {
4298   fprintf(outfile, "\n** pcre2test internal error: cannot interpret error "
4299     "number\n** Unexpected return (%d) from pcre2_get_error_message()\n", len);
4300   }
4301 else
4302   {
4303   fprintf(outfile, "%s", before);
4304   PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, outfile);
4305   fprintf(outfile, "%s", after);
4306   }
4307 return len >= 0;
4308 }
4309 
4310 
4311 /*************************************************
4312 *     Callback function for callout enumeration  *
4313 *************************************************/
4314 
4315 /* The only differences in the callout emumeration block for different code
4316 unit widths are that the pointers to the subject, the most recent MARK, and a
4317 callout argument string point to strings of the appropriate width. Casts can be
4318 used to deal with this.
4319 
4320 Argument:
4321   cb            pointer to enumerate block
4322   callout_data  user data
4323 
4324 Returns:    0
4325 */
4326 
callout_callback(pcre2_callout_enumerate_block_8 * cb,void * callout_data)4327 static int callout_callback(pcre2_callout_enumerate_block_8 *cb,
4328   void *callout_data)
4329 {
4330 uint32_t i;
4331 BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
4332 
4333 (void)callout_data;  /* Not currently displayed */
4334 
4335 fprintf(outfile, "Callout ");
4336 if (cb->callout_string != NULL)
4337   {
4338   uint32_t delimiter = CODE_UNIT(cb->callout_string, -1);
4339   fprintf(outfile, "%c", delimiter);
4340   PCHARSV(cb->callout_string, 0,
4341     cb->callout_string_length, utf, outfile);
4342   for (i = 0; callout_start_delims[i] != 0; i++)
4343     if (delimiter == callout_start_delims[i])
4344       {
4345       delimiter = callout_end_delims[i];
4346       break;
4347       }
4348   fprintf(outfile, "%c  ", delimiter);
4349   }
4350 else fprintf(outfile, "%d  ", cb->callout_number);
4351 
4352 fprintf(outfile, "%.*s\n",
4353   (int)((cb->next_item_length == 0)? 1 : cb->next_item_length),
4354   pbuffer8 + cb->pattern_position);
4355 
4356 return 0;
4357 }
4358 
4359 
4360 
4361 /*************************************************
4362 *        Show information about a pattern        *
4363 *************************************************/
4364 
4365 /* This function is called after a pattern has been compiled if any of the
4366 information-requesting controls have been set.
4367 
4368 Arguments:  none
4369 
4370 Returns:    PR_OK     continue processing next line
4371             PR_SKIP   skip to a blank line
4372             PR_ABEND  abort the pcre2test run
4373 */
4374 
4375 static int
show_pattern_info(void)4376 show_pattern_info(void)
4377 {
4378 uint32_t compile_options, overall_options, extra_options;
4379 BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
4380 
4381 if ((pat_patctl.control & (CTL_BINCODE|CTL_FULLBINCODE)) != 0)
4382   {
4383   fprintf(outfile, "------------------------------------------------------------------\n");
4384   PCRE2_PRINTINT((pat_patctl.control & CTL_FULLBINCODE) != 0);
4385   }
4386 
4387 if ((pat_patctl.control & CTL_INFO) != 0)
4388   {
4389   int rc;
4390   void *nametable;
4391   uint8_t *start_bits;
4392   BOOL heap_limit_set, match_limit_set, depth_limit_set;
4393   uint32_t backrefmax, bsr_convention, capture_count, first_ctype, first_cunit,
4394     hasbackslashc, hascrorlf, jchanged, last_ctype, last_cunit, match_empty,
4395     depth_limit, heap_limit, match_limit, minlength, nameentrysize, namecount,
4396     newline_convention;
4397 
4398   /* Exercise the error route. */
4399 
4400   PCRE2_PATTERN_INFO(rc, compiled_code, 999, NULL);
4401   (void)rc;
4402 
4403   /* These info requests may return PCRE2_ERROR_UNSET. */
4404 
4405   switch(pattern_info(PCRE2_INFO_HEAPLIMIT, &heap_limit, TRUE))
4406     {
4407     case 0:
4408     heap_limit_set = TRUE;
4409     break;
4410 
4411     case PCRE2_ERROR_UNSET:
4412     heap_limit_set = FALSE;
4413     break;
4414 
4415     default:
4416     return PR_ABEND;
4417     }
4418 
4419   switch(pattern_info(PCRE2_INFO_MATCHLIMIT, &match_limit, TRUE))
4420     {
4421     case 0:
4422     match_limit_set = TRUE;
4423     break;
4424 
4425     case PCRE2_ERROR_UNSET:
4426     match_limit_set = FALSE;
4427     break;
4428 
4429     default:
4430     return PR_ABEND;
4431     }
4432 
4433   switch(pattern_info(PCRE2_INFO_DEPTHLIMIT, &depth_limit, TRUE))
4434     {
4435     case 0:
4436     depth_limit_set = TRUE;
4437     break;
4438 
4439     case PCRE2_ERROR_UNSET:
4440     depth_limit_set = FALSE;
4441     break;
4442 
4443     default:
4444     return PR_ABEND;
4445     }
4446 
4447   /* These info requests should always succeed. */
4448 
4449   if (pattern_info(PCRE2_INFO_BACKREFMAX, &backrefmax, FALSE) +
4450       pattern_info(PCRE2_INFO_BSR, &bsr_convention, FALSE) +
4451       pattern_info(PCRE2_INFO_CAPTURECOUNT, &capture_count, FALSE) +
4452       pattern_info(PCRE2_INFO_FIRSTBITMAP, &start_bits, FALSE) +
4453       pattern_info(PCRE2_INFO_FIRSTCODEUNIT, &first_cunit, FALSE) +
4454       pattern_info(PCRE2_INFO_FIRSTCODETYPE, &first_ctype, FALSE) +
4455       pattern_info(PCRE2_INFO_HASBACKSLASHC, &hasbackslashc, FALSE) +
4456       pattern_info(PCRE2_INFO_HASCRORLF, &hascrorlf, FALSE) +
4457       pattern_info(PCRE2_INFO_JCHANGED, &jchanged, FALSE) +
4458       pattern_info(PCRE2_INFO_LASTCODEUNIT, &last_cunit, FALSE) +
4459       pattern_info(PCRE2_INFO_LASTCODETYPE, &last_ctype, FALSE) +
4460       pattern_info(PCRE2_INFO_MATCHEMPTY, &match_empty, FALSE) +
4461       pattern_info(PCRE2_INFO_MINLENGTH, &minlength, FALSE) +
4462       pattern_info(PCRE2_INFO_NAMECOUNT, &namecount, FALSE) +
4463       pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &nameentrysize, FALSE) +
4464       pattern_info(PCRE2_INFO_NAMETABLE, &nametable, FALSE) +
4465       pattern_info(PCRE2_INFO_NEWLINE, &newline_convention, FALSE)
4466       != 0)
4467     return PR_ABEND;
4468 
4469   fprintf(outfile, "Capture group count = %d\n", capture_count);
4470 
4471   if (backrefmax > 0)
4472     fprintf(outfile, "Max back reference = %d\n", backrefmax);
4473 
4474   if (maxlookbehind > 0)
4475     fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
4476 
4477   if (heap_limit_set)
4478     fprintf(outfile, "Heap limit = %u\n", heap_limit);
4479 
4480   if (match_limit_set)
4481     fprintf(outfile, "Match limit = %u\n", match_limit);
4482 
4483   if (depth_limit_set)
4484     fprintf(outfile, "Depth limit = %u\n", depth_limit);
4485 
4486   if (namecount > 0)
4487     {
4488     fprintf(outfile, "Named capture groups:\n");
4489     for (; namecount > 0; namecount--)
4490       {
4491       int imm2_size = test_mode == PCRE8_MODE ? 2 : 1;
4492       uint32_t length = (uint32_t)STRLEN(nametable + imm2_size);
4493       fprintf(outfile, "  ");
4494 
4495       /* In UTF mode the name may be a UTF string containing non-ASCII
4496       letters and digits. We must output it as a UTF-8 string. In non-UTF mode,
4497       use the normal string printing functions, which use escapes for all
4498       non-ASCII characters. */
4499 
4500       if (utf)
4501         {
4502 #ifdef SUPPORT_PCRE2_32
4503         if (test_mode == PCRE32_MODE)
4504           {
4505           PCRE2_SPTR32 nameptr = (PCRE2_SPTR32)nametable + imm2_size;
4506           while (*nameptr != 0)
4507             {
4508             uint8_t u8buff[6];
4509             int len = ord2utf8(*nameptr++, u8buff);
4510             fprintf(outfile, "%.*s", len, u8buff);
4511             }
4512           }
4513 #endif
4514 #ifdef SUPPORT_PCRE2_16
4515         if (test_mode == PCRE16_MODE)
4516           {
4517           PCRE2_SPTR16 nameptr = (PCRE2_SPTR16)nametable + imm2_size;
4518           while (*nameptr != 0)
4519             {
4520             int len;
4521             uint8_t u8buff[6];
4522             uint32_t c = *nameptr++ & 0xffff;
4523             if (c >= 0xD800 && c < 0xDC00)
4524               c = ((c & 0x3ff) << 10) + (*nameptr++ & 0x3ff) + 0x10000;
4525             len = ord2utf8(c, u8buff);
4526             fprintf(outfile, "%.*s", len, u8buff);
4527             }
4528           }
4529 #endif
4530 #ifdef SUPPORT_PCRE2_8
4531         if (test_mode == PCRE8_MODE)
4532           fprintf(outfile, "%s", (PCRE2_SPTR8)nametable + imm2_size);
4533 #endif
4534         }
4535       else  /* Not UTF mode */
4536         {
4537         PCHARSV(nametable, imm2_size, length, FALSE, outfile);
4538         }
4539 
4540       while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
4541 
4542 #ifdef SUPPORT_PCRE2_32
4543       if (test_mode == PCRE32_MODE)
4544         fprintf(outfile, "%3d\n", (int)(((PCRE2_SPTR32)nametable)[0]));
4545 #endif
4546 #ifdef SUPPORT_PCRE2_16
4547       if (test_mode == PCRE16_MODE)
4548         fprintf(outfile, "%3d\n", (int)(((PCRE2_SPTR16)nametable)[0]));
4549 #endif
4550 #ifdef SUPPORT_PCRE2_8
4551       if (test_mode == PCRE8_MODE)
4552         fprintf(outfile, "%3d\n", (int)(
4553         ((((PCRE2_SPTR8)nametable)[0]) << 8) | ((PCRE2_SPTR8)nametable)[1]));
4554 #endif
4555 
4556       nametable = (void*)((PCRE2_SPTR8)nametable + nameentrysize * code_unit_size);
4557       }
4558     }
4559 
4560   if (hascrorlf)     fprintf(outfile, "Contains explicit CR or LF match\n");
4561   if (hasbackslashc) fprintf(outfile, "Contains \\C\n");
4562   if (match_empty)   fprintf(outfile, "May match empty string\n");
4563 
4564   pattern_info(PCRE2_INFO_ARGOPTIONS, &compile_options, FALSE);
4565   pattern_info(PCRE2_INFO_ALLOPTIONS, &overall_options, FALSE);
4566   pattern_info(PCRE2_INFO_EXTRAOPTIONS, &extra_options, FALSE);
4567 
4568   /* Remove UTF/UCP if they were there only because of forbid_utf. This saves
4569   cluttering up the verification output of non-UTF test files. */
4570 
4571   if ((pat_patctl.options & PCRE2_NEVER_UTF) == 0)
4572     {
4573     compile_options &= ~PCRE2_NEVER_UTF;
4574     overall_options &= ~PCRE2_NEVER_UTF;
4575     }
4576 
4577   if ((pat_patctl.options & PCRE2_NEVER_UCP) == 0)
4578     {
4579     compile_options &= ~PCRE2_NEVER_UCP;
4580     overall_options &= ~PCRE2_NEVER_UCP;
4581     }
4582 
4583   if ((compile_options|overall_options) != 0)
4584     {
4585     if (compile_options == overall_options)
4586       show_compile_options(compile_options, "Options:", "\n");
4587     else
4588       {
4589       show_compile_options(compile_options, "Compile options:", "\n");
4590       show_compile_options(overall_options, "Overall options:", "\n");
4591       }
4592     }
4593 
4594   if (extra_options != 0)
4595     show_compile_extra_options(extra_options, "Extra options:", "\n");
4596 
4597   if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
4598 
4599   if ((pat_patctl.control2 & CTL2_BSR_SET) != 0 ||
4600       (FLD(compiled_code, flags) & PCRE2_BSR_SET) != 0)
4601     fprintf(outfile, "\\R matches %s\n", (bsr_convention == PCRE2_BSR_UNICODE)?
4602       "any Unicode newline" : "CR, LF, or CRLF");
4603 
4604   if ((FLD(compiled_code, flags) & PCRE2_NL_SET) != 0)
4605     {
4606     switch (newline_convention)
4607       {
4608       case PCRE2_NEWLINE_CR:
4609       fprintf(outfile, "Forced newline is CR\n");
4610       break;
4611 
4612       case PCRE2_NEWLINE_LF:
4613       fprintf(outfile, "Forced newline is LF\n");
4614       break;
4615 
4616       case PCRE2_NEWLINE_CRLF:
4617       fprintf(outfile, "Forced newline is CRLF\n");
4618       break;
4619 
4620       case PCRE2_NEWLINE_ANYCRLF:
4621       fprintf(outfile, "Forced newline is CR, LF, or CRLF\n");
4622       break;
4623 
4624       case PCRE2_NEWLINE_ANY:
4625       fprintf(outfile, "Forced newline is any Unicode newline\n");
4626       break;
4627 
4628       case PCRE2_NEWLINE_NUL:
4629       fprintf(outfile, "Forced newline is NUL\n");
4630       break;
4631 
4632       default:
4633       break;
4634       }
4635     }
4636 
4637   if (first_ctype == 2)
4638     {
4639     fprintf(outfile, "First code unit at start or follows newline\n");
4640     }
4641   else if (first_ctype == 1)
4642     {
4643     const char *caseless =
4644       ((FLD(compiled_code, flags) & PCRE2_FIRSTCASELESS) == 0)?
4645       "" : " (caseless)";
4646     if (PRINTOK(first_cunit))
4647       fprintf(outfile, "First code unit = \'%c\'%s\n", first_cunit, caseless);
4648     else
4649       {
4650       fprintf(outfile, "First code unit = ");
4651       pchar(first_cunit, FALSE, outfile);
4652       fprintf(outfile, "%s\n", caseless);
4653       }
4654     }
4655   else if (start_bits != NULL)
4656     {
4657     int i;
4658     int c = 24;
4659     fprintf(outfile, "Starting code units: ");
4660     for (i = 0; i < 256; i++)
4661       {
4662       if ((start_bits[i/8] & (1u << (i&7))) != 0)
4663         {
4664         if (c > 75)
4665           {
4666           fprintf(outfile, "\n  ");
4667           c = 2;
4668           }
4669         if (PRINTOK(i) && i != ' ')
4670           {
4671           fprintf(outfile, "%c ", i);
4672           c += 2;
4673           }
4674         else
4675           {
4676           fprintf(outfile, "\\x%02x ", i);
4677           c += 5;
4678           }
4679         }
4680       }
4681     fprintf(outfile, "\n");
4682     }
4683 
4684   if (last_ctype != 0)
4685     {
4686     const char *caseless =
4687       ((FLD(compiled_code, flags) & PCRE2_LASTCASELESS) == 0)?
4688       "" : " (caseless)";
4689     if (PRINTOK(last_cunit))
4690       fprintf(outfile, "Last code unit = \'%c\'%s\n", last_cunit, caseless);
4691     else
4692       {
4693       fprintf(outfile, "Last code unit = ");
4694       pchar(last_cunit, FALSE, outfile);
4695       fprintf(outfile, "%s\n", caseless);
4696       }
4697     }
4698 
4699   fprintf(outfile, "Subject length lower bound = %d\n", minlength);
4700 
4701   if (pat_patctl.jit != 0 && (pat_patctl.control & CTL_JITVERIFY) != 0)
4702     {
4703     if (FLD(compiled_code, executable_jit) != NULL)
4704       fprintf(outfile, "JIT compilation was successful\n");
4705     else
4706       {
4707 #ifdef SUPPORT_JIT
4708       fprintf(outfile, "JIT compilation was not successful");
4709       if (jitrc != 0 && !print_error_message(jitrc, " (", ")"))
4710         return PR_ABEND;
4711       fprintf(outfile, "\n");
4712 #else
4713       fprintf(outfile, "JIT support is not available in this version of PCRE2\n");
4714 #endif
4715       }
4716     }
4717   }
4718 
4719 if ((pat_patctl.control & CTL_CALLOUT_INFO) != 0)
4720   {
4721   int errorcode;
4722   PCRE2_CALLOUT_ENUMERATE(errorcode, callout_callback, 0);
4723   if (errorcode != 0)
4724     {
4725     fprintf(outfile, "Callout enumerate failed: error %d: ", errorcode);
4726     if (errorcode < 0 && !print_error_message(errorcode, "", "\n"))
4727       return PR_ABEND;
4728     return PR_SKIP;
4729     }
4730   }
4731 
4732 return PR_OK;
4733 }
4734 
4735 
4736 
4737 /*************************************************
4738 *              Handle serialization error        *
4739 *************************************************/
4740 
4741 /* Print an error message after a serialization failure.
4742 
4743 Arguments:
4744   rc         the error code
4745   msg        an initial message for what failed
4746 
4747 Returns:     FALSE if print_error_message() fails
4748 */
4749 
4750 static BOOL
serial_error(int rc,const char * msg)4751 serial_error(int rc, const char *msg)
4752 {
4753 fprintf(outfile, "%s failed: error %d: ", msg, rc);
4754 return print_error_message(rc, "", "\n");
4755 }
4756 
4757 
4758 
4759 /*************************************************
4760 *        Open file for save/load commands        *
4761 *************************************************/
4762 
4763 /* This function decodes the file name and opens the file.
4764 
4765 Arguments:
4766   buffptr     point after the #command
4767   mode        open mode
4768   fptr        points to the FILE variable
4769 
4770 Returns:      PR_OK or PR_ABEND
4771 */
4772 
4773 static int
open_file(uint8_t * buffptr,const char * mode,FILE ** fptr)4774 open_file(uint8_t *buffptr, const char *mode, FILE **fptr)
4775 {
4776 char *endf;
4777 char *filename = (char *)buffptr;
4778 while (isspace(*filename)) filename++;
4779 endf = filename + strlen8(filename);
4780 while (endf > filename && isspace(endf[-1])) endf--;
4781 
4782 if (endf == filename)
4783   {
4784   fprintf(outfile, "** File name expected after #save\n");
4785   return PR_ABEND;
4786   }
4787 
4788 *endf = 0;
4789 *fptr = fopen((const char *)filename, mode);
4790 if (*fptr == NULL)
4791   {
4792   fprintf(outfile, "** Failed to open '%s': %s\n", filename, strerror(errno));
4793   return PR_ABEND;
4794   }
4795 
4796 return PR_OK;
4797 }
4798 
4799 
4800 
4801 /*************************************************
4802 *               Process command line             *
4803 *************************************************/
4804 
4805 /* This function is called for lines beginning with # and a character that is
4806 not ! or whitespace, when encountered between tests, which means that there is
4807 no compiled pattern (compiled_code is NULL). The line is in buffer.
4808 
4809 Arguments:  none
4810 
4811 Returns:    PR_OK     continue processing next line
4812             PR_SKIP   skip to a blank line
4813             PR_ABEND  abort the pcre2test run
4814 */
4815 
4816 static int
process_command(void)4817 process_command(void)
4818 {
4819 FILE *f;
4820 PCRE2_SIZE serial_size;
4821 size_t i;
4822 int rc, cmd, cmdlen, yield;
4823 uint16_t first_listed_newline;
4824 const char *cmdname;
4825 uint8_t *argptr, *serial;
4826 
4827 yield = PR_OK;
4828 cmd = CMD_UNKNOWN;
4829 cmdlen = 0;
4830 
4831 for (i = 0; i < cmdlistcount; i++)
4832   {
4833   cmdname = cmdlist[i].name;
4834   cmdlen = strlen(cmdname);
4835   if (strncmp((char *)(buffer+1), cmdname, cmdlen) == 0 &&
4836       isspace(buffer[cmdlen+1]))
4837     {
4838     cmd = cmdlist[i].value;
4839     break;
4840     }
4841   }
4842 
4843 argptr = buffer + cmdlen + 1;
4844 
4845 if (restrict_for_perl_test && cmd != CMD_PATTERN && cmd != CMD_SUBJECT)
4846   {
4847   fprintf(outfile, "** #%s is not allowed after #perltest\n", cmdname);
4848   return PR_ABEND;
4849   }
4850 
4851 switch(cmd)
4852   {
4853   case CMD_UNKNOWN:
4854   fprintf(outfile, "** Unknown command: %s", buffer);
4855   break;
4856 
4857   case CMD_FORBID_UTF:
4858   forbid_utf = PCRE2_NEVER_UTF|PCRE2_NEVER_UCP;
4859   break;
4860 
4861   case CMD_PERLTEST:
4862   restrict_for_perl_test = TRUE;
4863   break;
4864 
4865   /* Set default pattern modifiers */
4866 
4867   case CMD_PATTERN:
4868   (void)decode_modifiers(argptr, CTX_DEFPAT, &def_patctl, NULL);
4869   if (def_patctl.jit == 0 && (def_patctl.control & CTL_JITVERIFY) != 0)
4870     def_patctl.jit = 7;
4871   break;
4872 
4873   /* Set default subject modifiers */
4874 
4875   case CMD_SUBJECT:
4876   (void)decode_modifiers(argptr, CTX_DEFDAT, NULL, &def_datctl);
4877   break;
4878 
4879   /* Check the default newline, and if not one of those listed, set up the
4880   first one to be forced. An empty list unsets. */
4881 
4882   case CMD_NEWLINE_DEFAULT:
4883   local_newline_default = 0;   /* Unset */
4884   first_listed_newline = 0;
4885   for (;;)
4886     {
4887     while (isspace(*argptr)) argptr++;
4888     if (*argptr == 0) break;
4889     for (i = 1; i < sizeof(newlines)/sizeof(char *); i++)
4890       {
4891       size_t nlen = strlen(newlines[i]);
4892       if (strncmpic(argptr, (const uint8_t *)newlines[i], nlen) == 0 &&
4893           isspace(argptr[nlen]))
4894         {
4895         if (i == NEWLINE_DEFAULT) return PR_OK;  /* Default is valid */
4896         if (first_listed_newline == 0) first_listed_newline = i;
4897         }
4898       }
4899     while (*argptr != 0 && !isspace(*argptr)) argptr++;
4900     }
4901   local_newline_default = first_listed_newline;
4902   break;
4903 
4904   /* Pop or copy a compiled pattern off the stack. Modifiers that do not affect
4905   the compiled pattern (e.g. to give information) are permitted. The default
4906   pattern modifiers are ignored. */
4907 
4908   case CMD_POP:
4909   case CMD_POPCOPY:
4910   if (patstacknext <= 0)
4911     {
4912     fprintf(outfile, "** Can't pop off an empty stack\n");
4913     return PR_SKIP;
4914     }
4915   memset(&pat_patctl, 0, sizeof(patctl));   /* Completely unset */
4916   if (!decode_modifiers(argptr, CTX_POPPAT, &pat_patctl, NULL))
4917     return PR_SKIP;
4918 
4919   if (cmd == CMD_POP)
4920     {
4921     SET(compiled_code, patstack[--patstacknext]);
4922     }
4923   else
4924     {
4925     PCRE2_CODE_COPY_FROM_VOID(compiled_code, patstack[patstacknext - 1]);
4926     }
4927 
4928   if (pat_patctl.jit != 0)
4929     {
4930     PCRE2_JIT_COMPILE(jitrc, compiled_code, pat_patctl.jit);
4931     }
4932   if ((pat_patctl.control & CTL_MEMORY) != 0) show_memory_info();
4933   if ((pat_patctl.control & CTL_FRAMESIZE) != 0) show_framesize();
4934   if ((pat_patctl.control & CTL_ANYINFO) != 0)
4935     {
4936     rc = show_pattern_info();
4937     if (rc != PR_OK) return rc;
4938     }
4939   break;
4940 
4941   /* Save the stack of compiled patterns to a file, then empty the stack. */
4942 
4943   case CMD_SAVE:
4944   if (patstacknext <= 0)
4945     {
4946     fprintf(outfile, "** No stacked patterns to save\n");
4947     return PR_OK;
4948     }
4949 
4950   rc = open_file(argptr+1, BINARY_OUTPUT_MODE, &f);
4951   if (rc != PR_OK) return rc;
4952 
4953   PCRE2_SERIALIZE_ENCODE(rc, patstack, patstacknext, &serial, &serial_size,
4954     general_context);
4955   if (rc < 0)
4956     {
4957     fclose(f);
4958     if (!serial_error(rc, "Serialization")) return PR_ABEND;
4959     break;
4960     }
4961 
4962   /* Write the length at the start of the file to make it straightforward to
4963   get the right memory when re-loading. This saves having to read the file size
4964   in different operating systems. To allow for different endianness (even
4965   though reloading with the opposite endianness does not work), write the
4966   length byte-by-byte. */
4967 
4968   for (i = 0; i < 4; i++) fputc((serial_size >> (i*8)) & 255, f);
4969   if (fwrite(serial, 1, serial_size, f) != serial_size)
4970     {
4971     fprintf(outfile, "** Wrong return from fwrite()\n");
4972     fclose(f);
4973     return PR_ABEND;
4974     }
4975 
4976   fclose(f);
4977   PCRE2_SERIALIZE_FREE(serial);
4978   while(patstacknext > 0)
4979     {
4980     SET(compiled_code, patstack[--patstacknext]);
4981     SUB1(pcre2_code_free, compiled_code);
4982     }
4983   SET(compiled_code, NULL);
4984   break;
4985 
4986   /* Load a set of compiled patterns from a file onto the stack */
4987 
4988   case CMD_LOAD:
4989   rc = open_file(argptr+1, BINARY_INPUT_MODE, &f);
4990   if (rc != PR_OK) return rc;
4991 
4992   serial_size = 0;
4993   for (i = 0; i < 4; i++) serial_size |= fgetc(f) << (i*8);
4994 
4995   serial = malloc(serial_size);
4996   if (serial == NULL)
4997     {
4998     fprintf(outfile, "** Failed to get memory (size %" SIZ_FORM ") for #load\n",
4999       SIZ_CAST serial_size);
5000     fclose(f);
5001     return PR_ABEND;
5002     }
5003 
5004   i = fread(serial, 1, serial_size, f);
5005   fclose(f);
5006 
5007   if (i != serial_size)
5008     {
5009     fprintf(outfile, "** Wrong return from fread()\n");
5010     yield = PR_ABEND;
5011     }
5012   else
5013     {
5014     PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(rc, serial);
5015     if (rc < 0)
5016       {
5017       if (!serial_error(rc, "Get number of codes")) yield = PR_ABEND;
5018       }
5019     else
5020       {
5021       if (rc + patstacknext > PATSTACKSIZE)
5022         {
5023         fprintf(outfile, "** Not enough space on pattern stack for %d pattern%s\n",
5024           rc, (rc == 1)? "" : "s");
5025         rc = PATSTACKSIZE - patstacknext;
5026         fprintf(outfile, "** Decoding %d pattern%s\n", rc,
5027           (rc == 1)? "" : "s");
5028         }
5029       PCRE2_SERIALIZE_DECODE(rc, patstack + patstacknext, rc, serial,
5030         general_context);
5031       if (rc < 0)
5032         {
5033         if (!serial_error(rc, "Deserialization")) yield = PR_ABEND;
5034         }
5035       else patstacknext += rc;
5036       }
5037     }
5038 
5039   free(serial);
5040   break;
5041   }
5042 
5043 return yield;
5044 }
5045 
5046 
5047 
5048 /*************************************************
5049 *               Process pattern line             *
5050 *************************************************/
5051 
5052 /* This function is called when the input buffer contains the start of a
5053 pattern. The first character is known to be a valid delimiter. The pattern is
5054 read, modifiers are interpreted, and a suitable local context is set up for
5055 this test. The pattern is then compiled.
5056 
5057 Arguments:  none
5058 
5059 Returns:    PR_OK     continue processing next line
5060             PR_SKIP   skip to a blank line
5061             PR_ABEND  abort the pcre2test run
5062 */
5063 
5064 static int
process_pattern(void)5065 process_pattern(void)
5066 {
5067 BOOL utf;
5068 uint32_t k;
5069 uint8_t *p = buffer;
5070 unsigned int delimiter = *p++;
5071 int errorcode;
5072 void *use_pat_context;
5073 uint32_t use_forbid_utf = forbid_utf;
5074 PCRE2_SIZE patlen;
5075 PCRE2_SIZE valgrind_access_length;
5076 PCRE2_SIZE erroroffset;
5077 
5078 /* Initialize the context and pattern/data controls for this test from the
5079 defaults. */
5080 
5081 PATCTXCPY(pat_context, default_pat_context);
5082 memcpy(&pat_patctl, &def_patctl, sizeof(patctl));
5083 
5084 /* Find the end of the pattern, reading more lines if necessary. */
5085 
5086 for(;;)
5087   {
5088   while (*p != 0)
5089     {
5090     if (*p == '\\' && p[1] != 0) p++;
5091       else if (*p == delimiter) break;
5092     p++;
5093     }
5094   if (*p != 0) break;
5095   if ((p = extend_inputline(infile, p, "    > ")) == NULL)
5096     {
5097     fprintf(outfile, "** Unexpected EOF\n");
5098     return PR_ABEND;
5099     }
5100   if (!INTERACTIVE(infile)) fprintf(outfile, "%s", (char *)p);
5101   }
5102 
5103 /* If the first character after the delimiter is backslash, make the pattern
5104 end with backslash. This is purely to provide a way of testing for the error
5105 message when a pattern ends with backslash. */
5106 
5107 if (p[1] == '\\') *p++ = '\\';
5108 
5109 /* Terminate the pattern at the delimiter, and compute the length. */
5110 
5111 *p++ = 0;
5112 patlen = p - buffer - 2;
5113 
5114 /* Look for modifiers and options after the final delimiter. */
5115 
5116 if (!decode_modifiers(p, CTX_PAT, &pat_patctl, NULL)) return PR_SKIP;
5117 utf = (pat_patctl.options & PCRE2_UTF) != 0;
5118 
5119 /* The utf8_input modifier is not allowed in 8-bit mode, and is mutually
5120 exclusive with the utf modifier. */
5121 
5122 if ((pat_patctl.control & CTL_UTF8_INPUT) != 0)
5123   {
5124   if (test_mode == PCRE8_MODE)
5125     {
5126     fprintf(outfile, "** The utf8_input modifier is not allowed in 8-bit mode\n");
5127     return PR_SKIP;
5128     }
5129   if (utf)
5130     {
5131     fprintf(outfile, "** The utf and utf8_input modifiers are mutually exclusive\n");
5132     return PR_SKIP;
5133     }
5134   }
5135 
5136 /* The convert and posix modifiers are mutually exclusive. */
5137 
5138 if (pat_patctl.convert_type != CONVERT_UNSET &&
5139     (pat_patctl.control & CTL_POSIX) != 0)
5140   {
5141   fprintf(outfile, "** The convert and posix modifiers are mutually exclusive\n");
5142   return PR_SKIP;
5143   }
5144 
5145 /* Check for mutually exclusive control modifiers. At present, these are all in
5146 the first control word. */
5147 
5148 for (k = 0; k < sizeof(exclusive_pat_controls)/sizeof(uint32_t); k++)
5149   {
5150   uint32_t c = pat_patctl.control & exclusive_pat_controls[k];
5151   if (c != 0 && c != (c & (~c+1)))
5152     {
5153     show_controls(c, 0, "** Not allowed together:");
5154     fprintf(outfile, "\n");
5155     return PR_SKIP;
5156     }
5157   }
5158 
5159 /* Assume full JIT compile for jitverify and/or jitfast if nothing else was
5160 specified. */
5161 
5162 if (pat_patctl.jit == 0 &&
5163     (pat_patctl.control & (CTL_JITVERIFY|CTL_JITFAST)) != 0)
5164   pat_patctl.jit = 7;
5165 
5166 /* Now copy the pattern to pbuffer8 for use in 8-bit testing and for reflecting
5167 in callouts. Convert from hex if requested (literal strings in quotes may be
5168 present within the hexadecimal pairs). The result must necessarily be fewer
5169 characters so will always fit in pbuffer8. */
5170 
5171 if ((pat_patctl.control & CTL_HEXPAT) != 0)
5172   {
5173   uint8_t *pp, *pt;
5174   uint32_t c, d;
5175 
5176   pt = pbuffer8;
5177   for (pp = buffer + 1; *pp != 0; pp++)
5178     {
5179     if (isspace(*pp)) continue;
5180     c = *pp++;
5181 
5182     /* Handle a literal substring */
5183 
5184     if (c == '\'' || c == '"')
5185       {
5186       uint8_t *pq = pp;
5187       for (;; pp++)
5188         {
5189         d = *pp;
5190         if (d == 0)
5191           {
5192           fprintf(outfile, "** Missing closing quote in hex pattern: "
5193             "opening quote is at offset %" PTR_FORM ".\n", pq - buffer - 2);
5194           return PR_SKIP;
5195           }
5196         if (d == c) break;
5197         *pt++ = d;
5198         }
5199       }
5200 
5201     /* Expect a hex pair */
5202 
5203     else
5204       {
5205       if (!isxdigit(c))
5206         {
5207         fprintf(outfile, "** Unexpected non-hex-digit '%c' at offset %"
5208           PTR_FORM " in hex pattern: quote missing?\n", c, pp - buffer - 2);
5209         return PR_SKIP;
5210         }
5211       if (*pp == 0)
5212         {
5213         fprintf(outfile, "** Odd number of digits in hex pattern\n");
5214         return PR_SKIP;
5215         }
5216       d = *pp;
5217       if (!isxdigit(d))
5218         {
5219         fprintf(outfile, "** Unexpected non-hex-digit '%c' at offset %"
5220           PTR_FORM " in hex pattern: quote missing?\n", d, pp - buffer - 1);
5221         return PR_SKIP;
5222         }
5223       c = toupper(c);
5224       d = toupper(d);
5225       *pt++ = ((isdigit(c)? (c - '0') : (c - 'A' + 10)) << 4) +
5226                (isdigit(d)? (d - '0') : (d - 'A' + 10));
5227       }
5228     }
5229   *pt = 0;
5230   patlen = pt - pbuffer8;
5231   }
5232 
5233 /* If not a hex string, process for repetition expansion if requested. */
5234 
5235 else if ((pat_patctl.control & CTL_EXPAND) != 0)
5236   {
5237   uint8_t *pp, *pt;
5238 
5239   pt = pbuffer8;
5240   for (pp = buffer + 1; *pp != 0; pp++)
5241     {
5242     uint8_t *pc = pp;
5243     uint32_t count = 1;
5244     size_t length = 1;
5245 
5246     /* Check for replication syntax; if not found, the defaults just set will
5247     prevail and one character will be copied. */
5248 
5249     if (pp[0] == '\\' && pp[1] == '[')
5250       {
5251       uint8_t *pe;
5252       for (pe = pp + 2; *pe != 0; pe++)
5253         {
5254         if (pe[0] == ']' && pe[1] == '{')
5255           {
5256           uint32_t clen = pe - pc - 2;
5257           uint32_t i = 0;
5258           unsigned long uli;
5259           char *endptr;
5260 
5261           pe += 2;
5262           uli = strtoul((const char *)pe, &endptr, 10);
5263           if (U32OVERFLOW(uli))
5264             {
5265             fprintf(outfile, "** Pattern repeat count too large\n");
5266             return PR_SKIP;
5267             }
5268 
5269           i = (uint32_t)uli;
5270           pe = (uint8_t *)endptr;
5271           if (*pe == '}')
5272             {
5273             if (i == 0)
5274               {
5275               fprintf(outfile, "** Zero repeat not allowed\n");
5276               return PR_SKIP;
5277               }
5278             pc += 2;
5279             count = i;
5280             length = clen;
5281             pp = pe;
5282             break;
5283             }
5284           }
5285         }
5286       }
5287 
5288     /* Add to output. If the buffer is too small expand it. The function for
5289     expanding buffers always keeps buffer and pbuffer8 in step as far as their
5290     size goes. */
5291 
5292     while (pt + count * length > pbuffer8 + pbuffer8_size)
5293       {
5294       size_t pc_offset = pc - buffer;
5295       size_t pp_offset = pp - buffer;
5296       size_t pt_offset = pt - pbuffer8;
5297       expand_input_buffers();
5298       pc = buffer + pc_offset;
5299       pp = buffer + pp_offset;
5300       pt = pbuffer8 + pt_offset;
5301       }
5302 
5303     for (; count > 0; count--)
5304       {
5305       memcpy(pt, pc, length);
5306       pt += length;
5307       }
5308     }
5309 
5310   *pt = 0;
5311   patlen = pt - pbuffer8;
5312 
5313   if ((pat_patctl.control & CTL_INFO) != 0)
5314     fprintf(outfile, "Expanded: %s\n", pbuffer8);
5315   }
5316 
5317 /* Neither hex nor expanded, just copy the input verbatim. */
5318 
5319 else
5320   {
5321   strncpy((char *)pbuffer8, (char *)(buffer+1), patlen + 1);
5322   }
5323 
5324 /* Sort out character tables */
5325 
5326 if (pat_patctl.locale[0] != 0)
5327   {
5328   if (pat_patctl.tables_id != 0)
5329     {
5330     fprintf(outfile, "** 'Locale' and 'tables' must not both be set\n");
5331     return PR_SKIP;
5332     }
5333   if (setlocale(LC_CTYPE, (const char *)pat_patctl.locale) == NULL)
5334     {
5335     fprintf(outfile, "** Failed to set locale '%s'\n", pat_patctl.locale);
5336     return PR_SKIP;
5337     }
5338   if (strcmp((const char *)pat_patctl.locale, (const char *)locale_name) != 0)
5339     {
5340     strcpy((char *)locale_name, (char *)pat_patctl.locale);
5341     if (locale_tables != NULL) free((void *)locale_tables);
5342     PCRE2_MAKETABLES(locale_tables);
5343     }
5344   use_tables = locale_tables;
5345   }
5346 
5347 else switch (pat_patctl.tables_id)
5348   {
5349   case 0: use_tables = NULL; break;
5350   case 1: use_tables = tables1; break;
5351   case 2: use_tables = tables2; break;
5352   default:
5353   fprintf(outfile, "** 'Tables' must specify 0, 1, or 2.\n");
5354   return PR_SKIP;
5355   }
5356 
5357 PCRE2_SET_CHARACTER_TABLES(pat_context, use_tables);
5358 
5359 /* Set up for the stackguard test. */
5360 
5361 if (pat_patctl.stackguard_test != 0)
5362   {
5363   PCRE2_SET_COMPILE_RECURSION_GUARD(pat_context, stack_guard, NULL);
5364   }
5365 
5366 /* Handle compiling via the POSIX interface, which doesn't support the
5367 timing, showing, or debugging options, nor the ability to pass over
5368 local character tables. Neither does it have 16-bit or 32-bit support. */
5369 
5370 if ((pat_patctl.control & CTL_POSIX) != 0)
5371   {
5372 #ifdef SUPPORT_PCRE2_8
5373   int rc;
5374   int cflags = 0;
5375   const char *msg = "** Ignored with POSIX interface:";
5376 #endif
5377 
5378   if (test_mode != PCRE8_MODE)
5379     {
5380     fprintf(outfile, "** The POSIX interface is available only in 8-bit mode\n");
5381     return PR_SKIP;
5382     }
5383 
5384 #ifdef SUPPORT_PCRE2_8
5385   /* Check for features that the POSIX interface does not support. */
5386 
5387   if (pat_patctl.locale[0] != 0) prmsg(&msg, "locale");
5388   if (pat_patctl.replacement[0] != 0) prmsg(&msg, "replace");
5389   if (pat_patctl.tables_id != 0) prmsg(&msg, "tables");
5390   if (pat_patctl.stackguard_test != 0) prmsg(&msg, "stackguard");
5391   if (timeit > 0) prmsg(&msg, "timing");
5392   if (pat_patctl.jit != 0) prmsg(&msg, "JIT");
5393 
5394   if ((pat_patctl.options & ~POSIX_SUPPORTED_COMPILE_OPTIONS) != 0)
5395     {
5396     show_compile_options(
5397       pat_patctl.options & ~POSIX_SUPPORTED_COMPILE_OPTIONS, msg, "");
5398     msg = "";
5399     }
5400 
5401   if ((FLD(pat_context, extra_options) &
5402        ~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS) != 0)
5403     {
5404     show_compile_extra_options(
5405       FLD(pat_context, extra_options) & ~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS,
5406         msg, "");
5407     msg = "";
5408     }
5409 
5410   if ((pat_patctl.control & ~POSIX_SUPPORTED_COMPILE_CONTROLS) != 0 ||
5411       (pat_patctl.control2 & ~POSIX_SUPPORTED_COMPILE_CONTROLS2) != 0)
5412     {
5413     show_controls(pat_patctl.control & ~POSIX_SUPPORTED_COMPILE_CONTROLS,
5414       pat_patctl.control2 & ~POSIX_SUPPORTED_COMPILE_CONTROLS2, msg);
5415     msg = "";
5416     }
5417 
5418   if (local_newline_default != 0) prmsg(&msg, "#newline_default");
5419   if (FLD(pat_context, max_pattern_length) != PCRE2_UNSET)
5420     prmsg(&msg, "max_pattern_length");
5421   if (FLD(pat_context, parens_nest_limit) != PARENS_NEST_DEFAULT)
5422     prmsg(&msg, "parens_nest_limit");
5423 
5424   if (msg[0] == 0) fprintf(outfile, "\n");
5425 
5426   /* Translate PCRE2 options to POSIX options and then compile. */
5427 
5428   if (utf) cflags |= REG_UTF;
5429   if ((pat_patctl.control & CTL_POSIX_NOSUB) != 0) cflags |= REG_NOSUB;
5430   if ((pat_patctl.options & PCRE2_UCP) != 0) cflags |= REG_UCP;
5431   if ((pat_patctl.options & PCRE2_CASELESS) != 0) cflags |= REG_ICASE;
5432   if ((pat_patctl.options & PCRE2_LITERAL) != 0) cflags |= REG_NOSPEC;
5433   if ((pat_patctl.options & PCRE2_MULTILINE) != 0) cflags |= REG_NEWLINE;
5434   if ((pat_patctl.options & PCRE2_DOTALL) != 0) cflags |= REG_DOTALL;
5435   if ((pat_patctl.options & PCRE2_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
5436 
5437   if ((pat_patctl.control & (CTL_HEXPAT|CTL_USE_LENGTH)) != 0)
5438     {
5439     preg.re_endp = (char *)pbuffer8 + patlen;
5440     cflags |= REG_PEND;
5441     }
5442 
5443   rc = regcomp(&preg, (char *)pbuffer8, cflags);
5444 
5445   /* Compiling failed */
5446 
5447   if (rc != 0)
5448     {
5449     size_t bsize, usize;
5450     int psize;
5451 
5452     preg.re_pcre2_code = NULL;     /* In case something was left in there */
5453     preg.re_match_data = NULL;
5454 
5455     bsize = (pat_patctl.regerror_buffsize != 0)?
5456       pat_patctl.regerror_buffsize : pbuffer8_size;
5457     if (bsize + 8 < pbuffer8_size)
5458       memcpy(pbuffer8 + bsize, "DEADBEEF", 8);
5459     usize = regerror(rc, &preg, (char *)pbuffer8, bsize);
5460 
5461     /* Inside regerror(), snprintf() is used. If the buffer is too small, some
5462     versions of snprintf() put a zero byte at the end, but others do not.
5463     Therefore, we print a maximum of one less than the size of the buffer. */
5464 
5465     psize = (int)bsize - 1;
5466     fprintf(outfile, "Failed: POSIX code %d: %.*s\n", rc, psize, pbuffer8);
5467     if (usize > bsize)
5468       {
5469       fprintf(outfile, "** regerror() message truncated\n");
5470       if (memcmp(pbuffer8 + bsize, "DEADBEEF", 8) != 0)
5471         fprintf(outfile, "** regerror() buffer overflow\n");
5472       }
5473     return PR_SKIP;
5474     }
5475 
5476   /* Compiling succeeded. Check that the values in the preg block are sensible.
5477   It can happen that pcre2test is accidentally linked with a different POSIX
5478   library which succeeds, but of course puts different things into preg. In
5479   this situation, calling regfree() may cause a segfault (or invalid free() in
5480   valgrind), so ensure that preg.re_pcre2_code is NULL, which suppresses the
5481   calling of regfree() on exit. */
5482 
5483   if (preg.re_pcre2_code == NULL ||
5484       ((pcre2_real_code_8 *)preg.re_pcre2_code)->magic_number != MAGIC_NUMBER ||
5485       ((pcre2_real_code_8 *)preg.re_pcre2_code)->top_bracket != preg.re_nsub ||
5486       preg.re_match_data == NULL ||
5487       preg.re_cflags != cflags)
5488     {
5489     fprintf(outfile,
5490       "** The regcomp() function returned zero (success), but the values set\n"
5491       "** in the preg block are not valid for PCRE2. Check that pcre2test is\n"
5492       "** linked with PCRE2's pcre2posix module (-lpcre2-posix) and not with\n"
5493       "** some other POSIX regex library.\n**\n");
5494     preg.re_pcre2_code = NULL;
5495     return PR_ABEND;
5496     }
5497 
5498   return PR_OK;
5499 #endif  /* SUPPORT_PCRE2_8 */
5500   }
5501 
5502 /* Handle compiling via the native interface. Controls that act later are
5503 ignored with "push". Replacements are locked out. */
5504 
5505 if ((pat_patctl.control & (CTL_PUSH|CTL_PUSHCOPY|CTL_PUSHTABLESCOPY)) != 0)
5506   {
5507   if (pat_patctl.replacement[0] != 0)
5508     {
5509     fprintf(outfile, "** Replacement text is not supported with 'push'.\n");
5510     return PR_OK;
5511     }
5512   if ((pat_patctl.control & ~PUSH_SUPPORTED_COMPILE_CONTROLS) != 0 ||
5513       (pat_patctl.control2 & ~PUSH_SUPPORTED_COMPILE_CONTROLS2) != 0)
5514     {
5515     show_controls(pat_patctl.control & ~PUSH_SUPPORTED_COMPILE_CONTROLS,
5516                   pat_patctl.control2 & ~PUSH_SUPPORTED_COMPILE_CONTROLS2,
5517       "** Ignored when compiled pattern is stacked with 'push':");
5518     fprintf(outfile, "\n");
5519     }
5520   if ((pat_patctl.control & PUSH_COMPILE_ONLY_CONTROLS) != 0 ||
5521       (pat_patctl.control2 & PUSH_COMPILE_ONLY_CONTROLS2) != 0)
5522     {
5523     show_controls(pat_patctl.control & PUSH_COMPILE_ONLY_CONTROLS,
5524                   pat_patctl.control2 & PUSH_COMPILE_ONLY_CONTROLS2,
5525       "** Applies only to compile when pattern is stacked with 'push':");
5526     fprintf(outfile, "\n");
5527     }
5528   }
5529 
5530 /* Convert the input in non-8-bit modes. */
5531 
5532 errorcode = 0;
5533 
5534 #ifdef SUPPORT_PCRE2_16
5535 if (test_mode == PCRE16_MODE) errorcode = to16(pbuffer8, utf, &patlen);
5536 #endif
5537 
5538 #ifdef SUPPORT_PCRE2_32
5539 if (test_mode == PCRE32_MODE) errorcode = to32(pbuffer8, utf, &patlen);
5540 #endif
5541 
5542 switch(errorcode)
5543   {
5544   case -1:
5545   fprintf(outfile, "** Failed: invalid UTF-8 string cannot be "
5546     "converted to %d-bit string\n", (test_mode == PCRE16_MODE)? 16:32);
5547   return PR_SKIP;
5548 
5549   case -2:
5550   fprintf(outfile, "** Failed: character value greater than 0x10ffff "
5551     "cannot be converted to UTF\n");
5552   return PR_SKIP;
5553 
5554   case -3:
5555   fprintf(outfile, "** Failed: character value greater than 0xffff "
5556     "cannot be converted to 16-bit in non-UTF mode\n");
5557   return PR_SKIP;
5558 
5559   default:
5560   break;
5561   }
5562 
5563 /* The pattern is now in pbuffer[8|16|32], with the length in code units in
5564 patlen. If it is to be converted, copy the result back afterwards so that it
5565 ends up back in the usual place. */
5566 
5567 if (pat_patctl.convert_type != CONVERT_UNSET)
5568   {
5569   int rc;
5570   int convert_return = PR_OK;
5571   uint32_t convert_options = pat_patctl.convert_type;
5572   void *converted_pattern;
5573   PCRE2_SIZE converted_length;
5574 
5575   if (pat_patctl.convert_length != 0)
5576     {
5577     converted_length = pat_patctl.convert_length;
5578     converted_pattern = malloc(converted_length * code_unit_size);
5579     if (converted_pattern == NULL)
5580       {
5581       fprintf(outfile, "** Failed: malloc failed for converted pattern\n");
5582       return PR_SKIP;
5583       }
5584     }
5585   else converted_pattern = NULL;  /* Let the library allocate */
5586 
5587   if (utf) convert_options |= PCRE2_CONVERT_UTF;
5588   if ((pat_patctl.options & PCRE2_NO_UTF_CHECK) != 0)
5589     convert_options |= PCRE2_CONVERT_NO_UTF_CHECK;
5590 
5591   CONCTXCPY(con_context, default_con_context);
5592 
5593   if (pat_patctl.convert_glob_escape != 0)
5594     {
5595     uint32_t escape = (pat_patctl.convert_glob_escape == '0')? 0 :
5596       pat_patctl.convert_glob_escape;
5597     PCRE2_SET_GLOB_ESCAPE(rc, con_context, escape);
5598     if (rc != 0)
5599       {
5600       fprintf(outfile, "** Invalid glob escape '%c'\n",
5601         pat_patctl.convert_glob_escape);
5602       convert_return = PR_SKIP;
5603       goto CONVERT_FINISH;
5604       }
5605     }
5606 
5607   if (pat_patctl.convert_glob_separator != 0)
5608     {
5609     PCRE2_SET_GLOB_SEPARATOR(rc, con_context, pat_patctl.convert_glob_separator);
5610     if (rc != 0)
5611       {
5612       fprintf(outfile, "** Invalid glob separator '%c'\n",
5613         pat_patctl.convert_glob_separator);
5614       convert_return = PR_SKIP;
5615       goto CONVERT_FINISH;
5616       }
5617     }
5618 
5619   PCRE2_PATTERN_CONVERT(rc, pbuffer, patlen, convert_options,
5620     &converted_pattern, &converted_length, con_context);
5621 
5622   if (rc != 0)
5623     {
5624     fprintf(outfile, "** Pattern conversion error at offset %" SIZ_FORM ": ",
5625       SIZ_CAST converted_length);
5626     convert_return = print_error_message(rc, "", "\n")? PR_SKIP:PR_ABEND;
5627     }
5628 
5629   /* Output the converted pattern, then copy it. */
5630 
5631   else
5632     {
5633     PCHARSV(converted_pattern, 0, converted_length, utf, outfile);
5634     fprintf(outfile, "\n");
5635     patlen = converted_length;
5636     CONVERT_COPY(pbuffer, converted_pattern, converted_length + 1);
5637     }
5638 
5639   /* Free the converted pattern. */
5640 
5641   CONVERT_FINISH:
5642   if (pat_patctl.convert_length != 0)
5643     free(converted_pattern);
5644   else
5645     PCRE2_CONVERTED_PATTERN_FREE(converted_pattern);
5646 
5647   /* Return if conversion was unsuccessful. */
5648 
5649   if (convert_return != PR_OK) return convert_return;
5650   }
5651 
5652 /* By default we pass a zero-terminated pattern, but a length is passed if
5653 "use_length" was specified or this is a hex pattern (which might contain binary
5654 zeros). When valgrind is supported, arrange for the unused part of the buffer
5655 to be marked as no access. */
5656 
5657 valgrind_access_length = patlen;
5658 if ((pat_patctl.control & (CTL_HEXPAT|CTL_USE_LENGTH)) == 0)
5659   {
5660   patlen = PCRE2_ZERO_TERMINATED;
5661   valgrind_access_length += 1;  /* For the terminating zero */
5662   }
5663 
5664 #ifdef SUPPORT_VALGRIND
5665 #ifdef SUPPORT_PCRE2_8
5666 if (test_mode == PCRE8_MODE && pbuffer8 != NULL)
5667   {
5668   VALGRIND_MAKE_MEM_NOACCESS(pbuffer8 + valgrind_access_length,
5669     pbuffer8_size - valgrind_access_length);
5670   }
5671 #endif
5672 #ifdef SUPPORT_PCRE2_16
5673 if (test_mode == PCRE16_MODE && pbuffer16 != NULL)
5674   {
5675   VALGRIND_MAKE_MEM_NOACCESS(pbuffer16 + valgrind_access_length,
5676     pbuffer16_size - valgrind_access_length*sizeof(uint16_t));
5677   }
5678 #endif
5679 #ifdef SUPPORT_PCRE2_32
5680 if (test_mode == PCRE32_MODE && pbuffer32 != NULL)
5681   {
5682   VALGRIND_MAKE_MEM_NOACCESS(pbuffer32 + valgrind_access_length,
5683     pbuffer32_size - valgrind_access_length*sizeof(uint32_t));
5684   }
5685 #endif
5686 #else  /* Valgrind not supported */
5687 (void)valgrind_access_length;  /* Avoid compiler warning */
5688 #endif
5689 
5690 /* If #newline_default has been used and the library was not compiled with an
5691 appropriate default newline setting, local_newline_default will be non-zero. We
5692 use this if there is no explicit newline modifier. */
5693 
5694 if ((pat_patctl.control2 & CTL2_NL_SET) == 0 && local_newline_default != 0)
5695   {
5696   SETFLD(pat_context, newline_convention, local_newline_default);
5697   }
5698 
5699 /* The null_context modifier is used to test calling pcre2_compile() with a
5700 NULL context. */
5701 
5702 use_pat_context = ((pat_patctl.control & CTL_NULLCONTEXT) != 0)?
5703   NULL : PTR(pat_context);
5704 
5705 /* If PCRE2_LITERAL is set, set use_forbid_utf zero because PCRE2_NEVER_UTF
5706 and PCRE2_NEVER_UCP are invalid with it. */
5707 
5708 if ((pat_patctl.options & PCRE2_LITERAL) != 0) use_forbid_utf = 0;
5709 
5710 /* Compile many times when timing. */
5711 
5712 if (timeit > 0)
5713   {
5714   int i;
5715   clock_t time_taken = 0;
5716   for (i = 0; i < timeit; i++)
5717     {
5718     clock_t start_time = clock();
5719     PCRE2_COMPILE(compiled_code, pbuffer, patlen,
5720       pat_patctl.options|use_forbid_utf, &errorcode, &erroroffset,
5721         use_pat_context);
5722     time_taken += clock() - start_time;
5723     if (TEST(compiled_code, !=, NULL))
5724       { SUB1(pcre2_code_free, compiled_code); }
5725     }
5726   total_compile_time += time_taken;
5727   fprintf(outfile, "Compile time %.4f milliseconds\n",
5728     (((double)time_taken * 1000.0) / (double)timeit) /
5729       (double)CLOCKS_PER_SEC);
5730   }
5731 
5732 /* A final compile that is used "for real". */
5733 
5734 PCRE2_COMPILE(compiled_code, pbuffer, patlen, pat_patctl.options|use_forbid_utf,
5735   &errorcode, &erroroffset, use_pat_context);
5736 
5737 /* Call the JIT compiler if requested. When timing, we must free and recompile
5738 the pattern each time because that is the only way to free the JIT compiled
5739 code. We know that compilation will always succeed. */
5740 
5741 if (TEST(compiled_code, !=, NULL) && pat_patctl.jit != 0)
5742   {
5743   if (timeit > 0)
5744     {
5745     int i;
5746     clock_t time_taken = 0;
5747     for (i = 0; i < timeit; i++)
5748       {
5749       clock_t start_time;
5750       SUB1(pcre2_code_free, compiled_code);
5751       PCRE2_COMPILE(compiled_code, pbuffer, patlen,
5752         pat_patctl.options|use_forbid_utf, &errorcode, &erroroffset,
5753         use_pat_context);
5754       start_time = clock();
5755       PCRE2_JIT_COMPILE(jitrc,compiled_code, pat_patctl.jit);
5756       time_taken += clock() - start_time;
5757       }
5758     total_jit_compile_time += time_taken;
5759     fprintf(outfile, "JIT compile  %.4f milliseconds\n",
5760       (((double)time_taken * 1000.0) / (double)timeit) /
5761         (double)CLOCKS_PER_SEC);
5762     }
5763   else
5764     {
5765     PCRE2_JIT_COMPILE(jitrc, compiled_code, pat_patctl.jit);
5766     }
5767   }
5768 
5769 /* If valgrind is supported, mark the pbuffer as accessible again. The 16-bit
5770 and 32-bit buffers can be marked completely undefined, but we must leave the
5771 pattern in the 8-bit buffer defined because it may be read from a callout
5772 during matching. */
5773 
5774 #ifdef SUPPORT_VALGRIND
5775 #ifdef SUPPORT_PCRE2_8
5776 if (test_mode == PCRE8_MODE)
5777   {
5778   VALGRIND_MAKE_MEM_UNDEFINED(pbuffer8 + valgrind_access_length,
5779     pbuffer8_size - valgrind_access_length);
5780   }
5781 #endif
5782 #ifdef SUPPORT_PCRE2_16
5783 if (test_mode == PCRE16_MODE)
5784   {
5785   VALGRIND_MAKE_MEM_UNDEFINED(pbuffer16, pbuffer16_size);
5786   }
5787 #endif
5788 #ifdef SUPPORT_PCRE2_32
5789 if (test_mode == PCRE32_MODE)
5790   {
5791   VALGRIND_MAKE_MEM_UNDEFINED(pbuffer32, pbuffer32_size);
5792   }
5793 #endif
5794 #endif
5795 
5796 /* Compilation failed; go back for another re, skipping to blank line
5797 if non-interactive. */
5798 
5799 if (TEST(compiled_code, ==, NULL))
5800   {
5801   fprintf(outfile, "Failed: error %d at offset %d: ", errorcode,
5802     (int)erroroffset);
5803   if (!print_error_message(errorcode, "", "\n")) return PR_ABEND;
5804   return PR_SKIP;
5805   }
5806 
5807 /* If forbid_utf is non-zero, we are running a non-UTF test. UTF and UCP are
5808 locked out at compile time, but we must also check for occurrences of \P, \p,
5809 and \X, which are only supported when Unicode is supported. */
5810 
5811 if (forbid_utf != 0)
5812   {
5813   if ((FLD(compiled_code, flags) & PCRE2_HASBKPORX) != 0)
5814     {
5815     fprintf(outfile, "** \\P, \\p, and \\X are not allowed after the "
5816       "#forbid_utf command\n");
5817     return PR_SKIP;
5818     }
5819   }
5820 
5821 /* Remember the maximum lookbehind, for partial matching. */
5822 
5823 if (pattern_info(PCRE2_INFO_MAXLOOKBEHIND, &maxlookbehind, FALSE) != 0)
5824   return PR_ABEND;
5825 
5826 /* Remember the number of captures. */
5827 
5828 if (pattern_info(PCRE2_INFO_CAPTURECOUNT, &maxcapcount, FALSE) < 0)
5829   return PR_ABEND;
5830 
5831 /* If an explicit newline modifier was given, set the information flag in the
5832 pattern so that it is preserved over push/pop. */
5833 
5834 if ((pat_patctl.control2 & CTL2_NL_SET) != 0)
5835   {
5836   SETFLD(compiled_code, flags, FLD(compiled_code, flags) | PCRE2_NL_SET);
5837   }
5838 
5839 /* Output code size and other information if requested. */
5840 
5841 if ((pat_patctl.control & CTL_MEMORY) != 0) show_memory_info();
5842 if ((pat_patctl.control & CTL_FRAMESIZE) != 0) show_framesize();
5843 if ((pat_patctl.control & CTL_ANYINFO) != 0)
5844   {
5845   int rc = show_pattern_info();
5846   if (rc != PR_OK) return rc;
5847   }
5848 
5849 /* The "push" control requests that the compiled pattern be remembered on a
5850 stack. This is mainly for testing the serialization functionality. */
5851 
5852 if ((pat_patctl.control & CTL_PUSH) != 0)
5853   {
5854   if (patstacknext >= PATSTACKSIZE)
5855     {
5856     fprintf(outfile, "** Too many pushed patterns (max %d)\n", PATSTACKSIZE);
5857     return PR_ABEND;
5858     }
5859   patstack[patstacknext++] = PTR(compiled_code);
5860   SET(compiled_code, NULL);
5861   }
5862 
5863 /* The "pushcopy" and "pushtablescopy" controls are similar, but push a
5864 copy of the pattern, the latter with a copy of its character tables. This tests
5865 the pcre2_code_copy() and pcre2_code_copy_with_tables() functions. */
5866 
5867 if ((pat_patctl.control & (CTL_PUSHCOPY|CTL_PUSHTABLESCOPY)) != 0)
5868   {
5869   if (patstacknext >= PATSTACKSIZE)
5870     {
5871     fprintf(outfile, "** Too many pushed patterns (max %d)\n", PATSTACKSIZE);
5872     return PR_ABEND;
5873     }
5874   if ((pat_patctl.control & CTL_PUSHCOPY) != 0)
5875     {
5876     PCRE2_CODE_COPY_TO_VOID(patstack[patstacknext++], compiled_code);
5877     }
5878   else
5879     {
5880     PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(patstack[patstacknext++],
5881       compiled_code); }
5882   }
5883 
5884 return PR_OK;
5885 }
5886 
5887 
5888 
5889 /*************************************************
5890 *          Check heap, match or depth limit      *
5891 *************************************************/
5892 
5893 /* This is used for DFA, normal, and JIT fast matching. For DFA matching it
5894 should only be called with the third argument set to PCRE2_ERROR_DEPTHLIMIT.
5895 
5896 Arguments:
5897   pp        the subject string
5898   ulen      length of subject or PCRE2_ZERO_TERMINATED
5899   errnumber defines which limit to test
5900   msg       string to include in final message
5901 
5902 Returns:    the return from the final match function call
5903 */
5904 
5905 static int
check_match_limit(uint8_t * pp,PCRE2_SIZE ulen,int errnumber,const char * msg)5906 check_match_limit(uint8_t *pp, PCRE2_SIZE ulen, int errnumber, const char *msg)
5907 {
5908 int capcount;
5909 uint32_t min = 0;
5910 uint32_t mid = 64;
5911 uint32_t max = UINT32_MAX;
5912 
5913 PCRE2_SET_MATCH_LIMIT(dat_context, max);
5914 PCRE2_SET_DEPTH_LIMIT(dat_context, max);
5915 PCRE2_SET_HEAP_LIMIT(dat_context, max);
5916 
5917 for (;;)
5918   {
5919   uint32_t stack_start = 0;
5920 
5921   if (errnumber == PCRE2_ERROR_HEAPLIMIT)
5922     {
5923     PCRE2_SET_HEAP_LIMIT(dat_context, mid);
5924     }
5925   else if (errnumber == PCRE2_ERROR_MATCHLIMIT)
5926     {
5927     PCRE2_SET_MATCH_LIMIT(dat_context, mid);
5928     }
5929   else
5930     {
5931     PCRE2_SET_DEPTH_LIMIT(dat_context, mid);
5932     }
5933 
5934   if ((dat_datctl.control & CTL_DFA) != 0)
5935     {
5936     stack_start = DFA_START_RWS_SIZE/1024;
5937     if (dfa_workspace == NULL)
5938       dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
5939     if (dfa_matched++ == 0)
5940       dfa_workspace[0] = -1;  /* To catch bad restart */
5941     PCRE2_DFA_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
5942       dat_datctl.options, match_data,
5943       PTR(dat_context), dfa_workspace, DFA_WS_DIMENSION);
5944     }
5945 
5946   else if ((pat_patctl.control & CTL_JITFAST) != 0)
5947     PCRE2_JIT_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
5948       dat_datctl.options, match_data, PTR(dat_context));
5949 
5950   else
5951     {
5952     stack_start = START_FRAMES_SIZE/1024;
5953     PCRE2_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
5954       dat_datctl.options, match_data, PTR(dat_context));
5955     }
5956 
5957   if (capcount == errnumber)
5958     {
5959     if ((mid & 0x80000000u) != 0)
5960       {
5961       fprintf(outfile, "Can't find minimum %s limit: check pattern for "
5962         "restriction\n", msg);
5963       break;
5964       }
5965 
5966     min = mid;
5967     mid = (mid == max - 1)? max : (max != UINT32_MAX)? (min + max)/2 : mid*2;
5968     }
5969   else if (capcount >= 0 ||
5970            capcount == PCRE2_ERROR_NOMATCH ||
5971            capcount == PCRE2_ERROR_PARTIAL)
5972     {
5973     /* If we've not hit the error with a heap limit less than the size of the
5974     initial stack frame vector (for pcre2_match()) or the initial stack
5975     workspace vector (for pcre2_dfa_match()), the heap is not being used, so
5976     the minimum limit is zero; there's no need to go on. The other limits are
5977     always greater than zero. */
5978 
5979     if (errnumber == PCRE2_ERROR_HEAPLIMIT && mid < stack_start)
5980       {
5981       fprintf(outfile, "Minimum %s limit = 0\n", msg);
5982       break;
5983       }
5984     if (mid == min + 1)
5985       {
5986       fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
5987       break;
5988       }
5989     max = mid;
5990     mid = (min + max)/2;
5991     }
5992   else break;    /* Some other error */
5993   }
5994 
5995 return capcount;
5996 }
5997 
5998 
5999 
6000 /*************************************************
6001 *        Substitute callout function             *
6002 *************************************************/
6003 
6004 /* Called from pcre2_substitute() when the substitute_callout modifier is set.
6005 Print out the data that is passed back. The substitute callout block is
6006 identical for all code unit widths, so we just pick one.
6007 
6008 Arguments:
6009   scb         pointer to substitute callout block
6010   data_ptr    callout data
6011 
6012 Returns:      nothing
6013 */
6014 
6015 static int
substitute_callout_function(pcre2_substitute_callout_block_8 * scb,void * data_ptr)6016 substitute_callout_function(pcre2_substitute_callout_block_8 *scb,
6017   void *data_ptr)
6018 {
6019 int yield = 0;
6020 BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
6021 (void)data_ptr;   /* Not used */
6022 
6023 fprintf(outfile, "%2d(%d) Old %" SIZ_FORM " %" SIZ_FORM " \"",
6024   scb->subscount, scb->oveccount,
6025   SIZ_CAST scb->ovector[0], SIZ_CAST scb->ovector[1]);
6026 
6027 PCHARSV(scb->input, scb->ovector[0], scb->ovector[1] - scb->ovector[0],
6028   utf, outfile);
6029 
6030 fprintf(outfile, "\" New %" SIZ_FORM " %" SIZ_FORM " \"",
6031   SIZ_CAST scb->output_offsets[0], SIZ_CAST scb->output_offsets[1]);
6032 
6033 PCHARSV(scb->output, scb->output_offsets[0],
6034   scb->output_offsets[1] - scb->output_offsets[0], utf, outfile);
6035 
6036 if (scb->subscount == dat_datctl.substitute_stop)
6037   {
6038   yield = -1;
6039   fprintf(outfile, " STOPPED");
6040   }
6041 else if (scb->subscount == dat_datctl.substitute_skip)
6042   {
6043   yield = +1;
6044   fprintf(outfile, " SKIPPED");
6045   }
6046 
6047 fprintf(outfile, "\"\n");
6048 return yield;
6049 }
6050 
6051 
6052 /*************************************************
6053 *              Callout function                  *
6054 *************************************************/
6055 
6056 /* Called from a PCRE2 library as a result of the (?C) item. We print out where
6057 we are in the match (unless suppressed). Yield zero unless more callouts than
6058 the fail count, or the callout data is not zero. The only differences in the
6059 callout block for different code unit widths are that the pointers to the
6060 subject, the most recent MARK, and a callout argument string point to strings
6061 of the appropriate width. Casts can be used to deal with this.
6062 
6063 Arguments:
6064   cb                a pointer to a callout block
6065   callout_data_ptr  the provided callout data
6066 
6067 Returns:            0 or 1 or an error, as determined by settings
6068 */
6069 
6070 static int
callout_function(pcre2_callout_block_8 * cb,void * callout_data_ptr)6071 callout_function(pcre2_callout_block_8 *cb, void *callout_data_ptr)
6072 {
6073 FILE *f, *fdefault;
6074 uint32_t i, pre_start, post_start, subject_length;
6075 PCRE2_SIZE current_position;
6076 BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
6077 BOOL callout_capture = (dat_datctl.control & CTL_CALLOUT_CAPTURE) != 0;
6078 BOOL callout_where = (dat_datctl.control2 & CTL2_CALLOUT_NO_WHERE) == 0;
6079 
6080 /* The FILE f is used for echoing the subject string if it is non-NULL. This
6081 happens only once in simple cases, but we want to repeat after any additional
6082 output caused by CALLOUT_EXTRA. */
6083 
6084 fdefault = (!first_callout && !callout_capture && cb->callout_string == NULL)?
6085   NULL : outfile;
6086 
6087 if ((dat_datctl.control2 & CTL2_CALLOUT_EXTRA) != 0)
6088   {
6089   f = outfile;
6090   switch (cb->callout_flags)
6091     {
6092     case PCRE2_CALLOUT_BACKTRACK:
6093     fprintf(f, "Backtrack\n");
6094     break;
6095 
6096     case PCRE2_CALLOUT_STARTMATCH|PCRE2_CALLOUT_BACKTRACK:
6097     fprintf(f, "Backtrack\nNo other matching paths\n");
6098     /* Fall through */
6099 
6100     case PCRE2_CALLOUT_STARTMATCH:
6101     fprintf(f, "New match attempt\n");
6102     break;
6103 
6104     default:
6105     f = fdefault;
6106     break;
6107     }
6108   }
6109 else f = fdefault;
6110 
6111 /* For a callout with a string argument, show the string first because there
6112 isn't a tidy way to fit it in the rest of the data. */
6113 
6114 if (cb->callout_string != NULL)
6115   {
6116   uint32_t delimiter = CODE_UNIT(cb->callout_string, -1);
6117   fprintf(outfile, "Callout (%" SIZ_FORM "): %c",
6118     SIZ_CAST cb->callout_string_offset, delimiter);
6119   PCHARSV(cb->callout_string, 0,
6120     cb->callout_string_length, utf, outfile);
6121   for (i = 0; callout_start_delims[i] != 0; i++)
6122     if (delimiter == callout_start_delims[i])
6123       {
6124       delimiter = callout_end_delims[i];
6125       break;
6126       }
6127   fprintf(outfile, "%c", delimiter);
6128   if (!callout_capture) fprintf(outfile, "\n");
6129   }
6130 
6131 /* Show captured strings if required */
6132 
6133 if (callout_capture)
6134   {
6135   if (cb->callout_string == NULL)
6136     fprintf(outfile, "Callout %d:", cb->callout_number);
6137   fprintf(outfile, " last capture = %d\n", cb->capture_last);
6138   for (i = 2; i < cb->capture_top * 2; i += 2)
6139     {
6140     fprintf(outfile, "%2d: ", i/2);
6141     if (cb->offset_vector[i] == PCRE2_UNSET)
6142       fprintf(outfile, "<unset>");
6143     else
6144       {
6145       PCHARSV(cb->subject, cb->offset_vector[i],
6146         cb->offset_vector[i+1] - cb->offset_vector[i], utf, f);
6147       }
6148     fprintf(outfile, "\n");
6149     }
6150   }
6151 
6152 /* Unless suppressed, re-print the subject in canonical form (with escapes for
6153 non-printing characters), the first time, or if giving full details. On
6154 subsequent calls in the same match, we use PCHARS() just to find the printed
6155 lengths of the substrings. */
6156 
6157 if (callout_where)
6158   {
6159   if (f != NULL) fprintf(f, "--->");
6160 
6161   /* The subject before the match start. */
6162 
6163   PCHARS(pre_start, cb->subject, 0, cb->start_match, utf, f);
6164 
6165   /* If a lookbehind is involved, the current position may be earlier than the
6166   match start. If so, use the match start instead. */
6167 
6168   current_position = (cb->current_position >= cb->start_match)?
6169     cb->current_position : cb->start_match;
6170 
6171   /* The subject between the match start and the current position. */
6172 
6173   PCHARS(post_start, cb->subject, cb->start_match,
6174     current_position - cb->start_match, utf, f);
6175 
6176   /* Print from the current position to the end. */
6177 
6178   PCHARSV(cb->subject, current_position, cb->subject_length - current_position,
6179     utf, f);
6180 
6181   /* Calculate the total subject printed length (no print). */
6182 
6183   PCHARS(subject_length, cb->subject, 0, cb->subject_length, utf, NULL);
6184 
6185   if (f != NULL) fprintf(f, "\n");
6186 
6187   /* For automatic callouts, show the pattern offset. Otherwise, for a
6188   numerical callout whose number has not already been shown with captured
6189   strings, show the number here. A callout with a string argument has been
6190   displayed above. */
6191 
6192   if (cb->callout_number == 255)
6193     {
6194     fprintf(outfile, "%+3d ", (int)cb->pattern_position);
6195     if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
6196     }
6197   else
6198     {
6199     if (callout_capture || cb->callout_string != NULL) fprintf(outfile, "    ");
6200       else fprintf(outfile, "%3d ", cb->callout_number);
6201     }
6202 
6203   /* Now show position indicators */
6204 
6205   for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
6206   fprintf(outfile, "^");
6207 
6208   if (post_start > 0)
6209     {
6210     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
6211     fprintf(outfile, "^");
6212     }
6213 
6214   for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
6215     fprintf(outfile, " ");
6216 
6217   if (cb->next_item_length != 0)
6218     fprintf(outfile, "%.*s", (int)(cb->next_item_length),
6219       pbuffer8 + cb->pattern_position);
6220   else
6221     fprintf(outfile, "End of pattern");
6222 
6223   fprintf(outfile, "\n");
6224   }
6225 
6226 first_callout = FALSE;
6227 
6228 /* Show any mark info */
6229 
6230 if (cb->mark != last_callout_mark)
6231   {
6232   if (cb->mark == NULL)
6233     fprintf(outfile, "Latest Mark: <unset>\n");
6234   else
6235     {
6236     fprintf(outfile, "Latest Mark: ");
6237     PCHARSV(cb->mark, 0, -1, utf, outfile);
6238     putc('\n', outfile);
6239     }
6240   last_callout_mark = cb->mark;
6241   }
6242 
6243 /* Show callout data */
6244 
6245 if (callout_data_ptr != NULL)
6246   {
6247   int callout_data = *((int32_t *)callout_data_ptr);
6248   if (callout_data != 0)
6249     {
6250     fprintf(outfile, "Callout data = %d\n", callout_data);
6251     return callout_data;
6252     }
6253   }
6254 
6255 /* Keep count and give the appropriate return code */
6256 
6257 callout_count++;
6258 
6259 if (cb->callout_number == dat_datctl.cerror[0] &&
6260     callout_count >= dat_datctl.cerror[1])
6261   return PCRE2_ERROR_CALLOUT;
6262 
6263 if (cb->callout_number == dat_datctl.cfail[0] &&
6264     callout_count >= dat_datctl.cfail[1])
6265   return 1;
6266 
6267 return 0;
6268 }
6269 
6270 
6271 
6272 /*************************************************
6273 *       Handle *MARK and copy/get tests          *
6274 *************************************************/
6275 
6276 /* This function is called after complete and partial matches. It runs the
6277 tests for substring extraction.
6278 
6279 Arguments:
6280   utf       TRUE for utf
6281   capcount  return from pcre2_match()
6282 
6283 Returns:    FALSE if print_error_message() fails
6284 */
6285 
6286 static BOOL
copy_and_get(BOOL utf,int capcount)6287 copy_and_get(BOOL utf, int capcount)
6288 {
6289 int i;
6290 uint8_t *nptr;
6291 
6292 /* Test copy strings by number */
6293 
6294 for (i = 0; i < MAXCPYGET && dat_datctl.copy_numbers[i] >= 0; i++)
6295   {
6296   int rc;
6297   PCRE2_SIZE length, length2;
6298   uint32_t copybuffer[256];
6299   uint32_t n = (uint32_t)(dat_datctl.copy_numbers[i]);
6300   length = sizeof(copybuffer)/code_unit_size;
6301   PCRE2_SUBSTRING_COPY_BYNUMBER(rc, match_data, n, copybuffer, &length);
6302   if (rc < 0)
6303     {
6304     fprintf(outfile, "Copy substring %d failed (%d): ", n, rc);
6305     if (!print_error_message(rc, "", "\n")) return FALSE;
6306     }
6307   else
6308     {
6309     PCRE2_SUBSTRING_LENGTH_BYNUMBER(rc, match_data, n, &length2);
6310     if (rc < 0)
6311       {
6312       fprintf(outfile, "Get substring %d length failed (%d): ", n, rc);
6313       if (!print_error_message(rc, "", "\n")) return FALSE;
6314       }
6315     else if (length2 != length)
6316       {
6317       fprintf(outfile, "Mismatched substring lengths: %"
6318         SIZ_FORM " %" SIZ_FORM "\n", SIZ_CAST length, SIZ_CAST length2);
6319       }
6320     fprintf(outfile, "%2dC ", n);
6321     PCHARSV(copybuffer, 0, length, utf, outfile);
6322     fprintf(outfile, " (%" SIZ_FORM ")\n", SIZ_CAST length);
6323     }
6324   }
6325 
6326 /* Test copy strings by name */
6327 
6328 nptr = dat_datctl.copy_names;
6329 for (;;)
6330   {
6331   int rc;
6332   int groupnumber;
6333   PCRE2_SIZE length, length2;
6334   uint32_t copybuffer[256];
6335   int namelen = strlen((const char *)nptr);
6336 #if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32
6337   PCRE2_SIZE cnl = namelen;
6338 #endif
6339   if (namelen == 0) break;
6340 
6341 #ifdef SUPPORT_PCRE2_8
6342   if (test_mode == PCRE8_MODE) strcpy((char *)pbuffer8, (char *)nptr);
6343 #endif
6344 #ifdef SUPPORT_PCRE2_16
6345   if (test_mode == PCRE16_MODE)(void)to16(nptr, utf, &cnl);
6346 #endif
6347 #ifdef SUPPORT_PCRE2_32
6348   if (test_mode == PCRE32_MODE)(void)to32(nptr, utf, &cnl);
6349 #endif
6350 
6351   PCRE2_SUBSTRING_NUMBER_FROM_NAME(groupnumber, compiled_code, pbuffer);
6352   if (groupnumber < 0 && groupnumber != PCRE2_ERROR_NOUNIQUESUBSTRING)
6353     fprintf(outfile, "Number not found for group '%s'\n", nptr);
6354 
6355   length = sizeof(copybuffer)/code_unit_size;
6356   PCRE2_SUBSTRING_COPY_BYNAME(rc, match_data, pbuffer, copybuffer, &length);
6357   if (rc < 0)
6358     {
6359     fprintf(outfile, "Copy substring '%s' failed (%d): ", nptr, rc);
6360     if (!print_error_message(rc, "", "\n")) return FALSE;
6361     }
6362   else
6363     {
6364     PCRE2_SUBSTRING_LENGTH_BYNAME(rc, match_data, pbuffer, &length2);
6365     if (rc < 0)
6366       {
6367       fprintf(outfile, "Get substring '%s' length failed (%d): ", nptr, rc);
6368       if (!print_error_message(rc, "", "\n")) return FALSE;
6369       }
6370     else if (length2 != length)
6371       {
6372       fprintf(outfile, "Mismatched substring lengths: %"
6373         SIZ_FORM " %" SIZ_FORM "\n", SIZ_CAST length, SIZ_CAST length2);
6374       }
6375     fprintf(outfile, "  C ");
6376     PCHARSV(copybuffer, 0, length, utf, outfile);
6377     fprintf(outfile, " (%" SIZ_FORM ") %s", SIZ_CAST length, nptr);
6378     if (groupnumber >= 0) fprintf(outfile, " (group %d)\n", groupnumber);
6379       else fprintf(outfile, " (non-unique)\n");
6380     }
6381   nptr += namelen + 1;
6382   }
6383 
6384 /* Test get strings by number */
6385 
6386 for (i = 0; i < MAXCPYGET && dat_datctl.get_numbers[i] >= 0; i++)
6387   {
6388   int rc;
6389   PCRE2_SIZE length;
6390   void *gotbuffer;
6391   uint32_t n = (uint32_t)(dat_datctl.get_numbers[i]);
6392   PCRE2_SUBSTRING_GET_BYNUMBER(rc, match_data, n, &gotbuffer, &length);
6393   if (rc < 0)
6394     {
6395     fprintf(outfile, "Get substring %d failed (%d): ", n, rc);
6396     if (!print_error_message(rc, "", "\n")) return FALSE;
6397     }
6398   else
6399     {
6400     fprintf(outfile, "%2dG ", n);
6401     PCHARSV(gotbuffer, 0, length, utf, outfile);
6402     fprintf(outfile, " (%" SIZ_FORM ")\n", SIZ_CAST length);
6403     PCRE2_SUBSTRING_FREE(gotbuffer);
6404     }
6405   }
6406 
6407 /* Test get strings by name */
6408 
6409 nptr = dat_datctl.get_names;
6410 for (;;)
6411   {
6412   PCRE2_SIZE length;
6413   void *gotbuffer;
6414   int rc;
6415   int groupnumber;
6416   int namelen = strlen((const char *)nptr);
6417 #if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32
6418   PCRE2_SIZE cnl = namelen;
6419 #endif
6420   if (namelen == 0) break;
6421 
6422 #ifdef SUPPORT_PCRE2_8
6423   if (test_mode == PCRE8_MODE) strcpy((char *)pbuffer8, (char *)nptr);
6424 #endif
6425 #ifdef SUPPORT_PCRE2_16
6426   if (test_mode == PCRE16_MODE)(void)to16(nptr, utf, &cnl);
6427 #endif
6428 #ifdef SUPPORT_PCRE2_32
6429   if (test_mode == PCRE32_MODE)(void)to32(nptr, utf, &cnl);
6430 #endif
6431 
6432   PCRE2_SUBSTRING_NUMBER_FROM_NAME(groupnumber, compiled_code, pbuffer);
6433   if (groupnumber < 0 && groupnumber != PCRE2_ERROR_NOUNIQUESUBSTRING)
6434     fprintf(outfile, "Number not found for group '%s'\n", nptr);
6435 
6436   PCRE2_SUBSTRING_GET_BYNAME(rc, match_data, pbuffer, &gotbuffer, &length);
6437   if (rc < 0)
6438     {
6439     fprintf(outfile, "Get substring '%s' failed (%d): ", nptr, rc);
6440     if (!print_error_message(rc, "", "\n")) return FALSE;
6441     }
6442   else
6443     {
6444     fprintf(outfile, "  G ");
6445     PCHARSV(gotbuffer, 0, length, utf, outfile);
6446     fprintf(outfile, " (%" SIZ_FORM ") %s", SIZ_CAST length, nptr);
6447     if (groupnumber >= 0) fprintf(outfile, " (group %d)\n", groupnumber);
6448       else fprintf(outfile, " (non-unique)\n");
6449     PCRE2_SUBSTRING_FREE(gotbuffer);
6450     }
6451   nptr += namelen + 1;
6452   }
6453 
6454 /* Test getting the complete list of captured strings. */
6455 
6456 if ((dat_datctl.control & CTL_GETALL) != 0)
6457   {
6458   int rc;
6459   void **stringlist;
6460   PCRE2_SIZE *lengths;
6461   PCRE2_SUBSTRING_LIST_GET(rc, match_data, &stringlist, &lengths);
6462   if (rc < 0)
6463     {
6464     fprintf(outfile, "get substring list failed (%d): ", rc);
6465     if (!print_error_message(rc, "", "\n")) return FALSE;
6466     }
6467   else
6468     {
6469     for (i = 0; i < capcount; i++)
6470       {
6471       fprintf(outfile, "%2dL ", i);
6472       PCHARSV(stringlist[i], 0, lengths[i], utf, outfile);
6473       putc('\n', outfile);
6474       }
6475     if (stringlist[i] != NULL)
6476       fprintf(outfile, "string list not terminated by NULL\n");
6477     PCRE2_SUBSTRING_LIST_FREE(stringlist);
6478     }
6479   }
6480 
6481 return TRUE;
6482 }
6483 
6484 
6485 
6486 /*************************************************
6487 *            Show an entire ovector              *
6488 *************************************************/
6489 
6490 /* This function is called after partial matching or match failure, when the
6491 "allvector" modifier is set. It is a means of checking the contents of the
6492 entire ovector, to ensure no modification of fields that should be unchanged.
6493 
6494 Arguments:
6495   ovector      points to the ovector
6496   oveccount    number of pairs
6497 
6498 Returns:       nothing
6499 */
6500 
6501 static void
show_ovector(PCRE2_SIZE * ovector,uint32_t oveccount)6502 show_ovector(PCRE2_SIZE *ovector, uint32_t oveccount)
6503 {
6504 uint32_t i;
6505 for (i = 0; i < 2*oveccount; i += 2)
6506   {
6507   PCRE2_SIZE start = ovector[i];
6508   PCRE2_SIZE end = ovector[i+1];
6509 
6510   fprintf(outfile, "%2d: ", i/2);
6511   if (start == PCRE2_UNSET && end == PCRE2_UNSET)
6512     fprintf(outfile, "<unset>\n");
6513   else if (start == JUNK_OFFSET && end == JUNK_OFFSET)
6514     fprintf(outfile, "<unchanged>\n");
6515   else
6516     fprintf(outfile, "%ld %ld\n", (unsigned long int)start,
6517       (unsigned long int)end);
6518   }
6519 }
6520 
6521 
6522 /*************************************************
6523 *               Process a data line              *
6524 *************************************************/
6525 
6526 /* The line is in buffer; it will not be empty.
6527 
6528 Arguments:  none
6529 
6530 Returns:    PR_OK     continue processing next line
6531             PR_SKIP   skip to a blank line
6532             PR_ABEND  abort the pcre2test run
6533 */
6534 
6535 static int
process_data(void)6536 process_data(void)
6537 {
6538 PCRE2_SIZE len, ulen, arg_ulen;
6539 uint32_t gmatched;
6540 uint32_t c, k;
6541 uint32_t g_notempty = 0;
6542 uint8_t *p, *pp, *start_rep;
6543 size_t needlen;
6544 void *use_dat_context;
6545 BOOL utf;
6546 BOOL subject_literal;
6547 
6548 PCRE2_SIZE *ovector;
6549 PCRE2_SIZE ovecsave[3];
6550 uint32_t oveccount;
6551 
6552 #ifdef SUPPORT_PCRE2_8
6553 uint8_t *q8 = NULL;
6554 #endif
6555 #ifdef SUPPORT_PCRE2_16
6556 uint16_t *q16 = NULL;
6557 #endif
6558 #ifdef SUPPORT_PCRE2_32
6559 uint32_t *q32 = NULL;
6560 #endif
6561 
6562 subject_literal = (pat_patctl.control2 & CTL2_SUBJECT_LITERAL) != 0;
6563 
6564 /* Copy the default context and data control blocks to the active ones. Then
6565 copy from the pattern the controls that can be set in either the pattern or the
6566 data. This allows them to be overridden in the data line. We do not do this for
6567 options because those that are common apply separately to compiling and
6568 matching. */
6569 
6570 DATCTXCPY(dat_context, default_dat_context);
6571 memcpy(&dat_datctl, &def_datctl, sizeof(datctl));
6572 dat_datctl.control |= (pat_patctl.control & CTL_ALLPD);
6573 dat_datctl.control2 |= (pat_patctl.control2 & CTL2_ALLPD);
6574 strcpy((char *)dat_datctl.replacement, (char *)pat_patctl.replacement);
6575 if (dat_datctl.jitstack == 0) dat_datctl.jitstack = pat_patctl.jitstack;
6576 
6577 if (dat_datctl.substitute_skip == 0)
6578     dat_datctl.substitute_skip = pat_patctl.substitute_skip;
6579 if (dat_datctl.substitute_stop == 0)
6580     dat_datctl.substitute_stop = pat_patctl.substitute_stop;
6581 
6582 /* Initialize for scanning the data line. */
6583 
6584 #ifdef SUPPORT_PCRE2_8
6585 utf = ((((pat_patctl.control & CTL_POSIX) != 0)?
6586   ((pcre2_real_code_8 *)preg.re_pcre2_code)->overall_options :
6587   FLD(compiled_code, overall_options)) & PCRE2_UTF) != 0;
6588 #else
6589 utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
6590 #endif
6591 
6592 start_rep = NULL;
6593 len = strlen((const char *)buffer);
6594 while (len > 0 && isspace(buffer[len-1])) len--;
6595 buffer[len] = 0;
6596 p = buffer;
6597 while (isspace(*p)) p++;
6598 
6599 /* Check that the data is well-formed UTF-8 if we're in UTF mode. To create
6600 invalid input to pcre2_match(), you must use \x?? or \x{} sequences. */
6601 
6602 if (utf)
6603   {
6604   uint8_t *q;
6605   uint32_t cc;
6606   int n = 1;
6607   for (q = p; n > 0 && *q; q += n) n = utf82ord(q, &cc);
6608   if (n <= 0)
6609     {
6610     fprintf(outfile, "** Failed: invalid UTF-8 string cannot be used as input "
6611       "in UTF mode\n");
6612     return PR_OK;
6613     }
6614   }
6615 
6616 #ifdef SUPPORT_VALGRIND
6617 /* Mark the dbuffer as addressable but undefined again. */
6618 if (dbuffer != NULL)
6619   {
6620   VALGRIND_MAKE_MEM_UNDEFINED(dbuffer, dbuffer_size);
6621   }
6622 #endif
6623 
6624 /* Allocate a buffer to hold the data line; len+1 is an upper bound on
6625 the number of code units that will be needed (though the buffer may have to be
6626 extended if replication is involved). */
6627 
6628 needlen = (size_t)((len+1) * code_unit_size);
6629 if (dbuffer == NULL || needlen >= dbuffer_size)
6630   {
6631   while (needlen >= dbuffer_size) dbuffer_size *= 2;
6632   dbuffer = (uint8_t *)realloc(dbuffer, dbuffer_size);
6633   if (dbuffer == NULL)
6634     {
6635     fprintf(stderr, "pcre2test: realloc(%d) failed\n", (int)dbuffer_size);
6636     exit(1);
6637     }
6638   }
6639 SETCASTPTR(q, dbuffer);  /* Sets q8, q16, or q32, as appropriate. */
6640 
6641 /* Scan the data line, interpreting data escapes, and put the result into a
6642 buffer of the appropriate width. In UTF mode, input is always UTF-8; otherwise,
6643 in 16- and 32-bit modes, it can be forced to UTF-8 by the utf8_input modifier.
6644 */
6645 
6646 while ((c = *p++) != 0)
6647   {
6648   int32_t i = 0;
6649   size_t replen;
6650 
6651   /* ] may mark the end of a replicated sequence */
6652 
6653   if (c == ']' && start_rep != NULL)
6654     {
6655     long li;
6656     char *endptr;
6657     size_t qoffset = CAST8VAR(q) - dbuffer;
6658     size_t rep_offset = start_rep - dbuffer;
6659 
6660     if (*p++ != '{')
6661       {
6662       fprintf(outfile, "** Expected '{' after \\[....]\n");
6663       return PR_OK;
6664       }
6665 
6666     li = strtol((const char *)p, &endptr, 10);
6667     if (S32OVERFLOW(li))
6668       {
6669       fprintf(outfile, "** Repeat count too large\n");
6670       return PR_OK;
6671       }
6672 
6673     p = (uint8_t *)endptr;
6674     if (*p++ != '}')
6675       {
6676       fprintf(outfile, "** Expected '}' after \\[...]{...\n");
6677       return PR_OK;
6678       }
6679 
6680     i = (int32_t)li;
6681     if (i-- == 0)
6682       {
6683       fprintf(outfile, "** Zero repeat not allowed\n");
6684       return PR_OK;
6685       }
6686 
6687     replen = CAST8VAR(q) - start_rep;
6688     needlen += replen * i;
6689 
6690     if (needlen >= dbuffer_size)
6691       {
6692       while (needlen >= dbuffer_size) dbuffer_size *= 2;
6693       dbuffer = (uint8_t *)realloc(dbuffer, dbuffer_size);
6694       if (dbuffer == NULL)
6695         {
6696         fprintf(stderr, "pcre2test: realloc(%d) failed\n", (int)dbuffer_size);
6697         exit(1);
6698         }
6699       SETCASTPTR(q, dbuffer + qoffset);
6700       start_rep = dbuffer + rep_offset;
6701       }
6702 
6703     while (i-- > 0)
6704       {
6705       memcpy(CAST8VAR(q), start_rep, replen);
6706       SETPLUS(q, replen/code_unit_size);
6707       }
6708 
6709     start_rep = NULL;
6710     continue;
6711     }
6712 
6713   /* Handle a non-escaped character. In non-UTF 32-bit mode with utf8_input
6714   set, do the fudge for setting the top bit. */
6715 
6716   if (c != '\\' || subject_literal)
6717     {
6718     uint32_t topbit = 0;
6719     if (test_mode == PCRE32_MODE && c == 0xff && *p != 0)
6720       {
6721       topbit = 0x80000000;
6722       c = *p++;
6723       }
6724     if ((utf || (pat_patctl.control & CTL_UTF8_INPUT) != 0) &&
6725       HASUTF8EXTRALEN(c)) { GETUTF8INC(c, p); }
6726     c |= topbit;
6727     }
6728 
6729   /* Handle backslash escapes */
6730 
6731   else switch ((c = *p++))
6732     {
6733     case '\\': break;
6734     case 'a': c = CHAR_BEL; break;
6735     case 'b': c = '\b'; break;
6736     case 'e': c = CHAR_ESC; break;
6737     case 'f': c = '\f'; break;
6738     case 'n': c = '\n'; break;
6739     case 'r': c = '\r'; break;
6740     case 't': c = '\t'; break;
6741     case 'v': c = '\v'; break;
6742 
6743     case '0': case '1': case '2': case '3':
6744     case '4': case '5': case '6': case '7':
6745     c -= '0';
6746     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
6747       c = c * 8 + *p++ - '0';
6748     break;
6749 
6750     case 'o':
6751     if (*p == '{')
6752       {
6753       uint8_t *pt = p;
6754       c = 0;
6755       for (pt++; isdigit(*pt) && *pt != '8' && *pt != '9'; pt++)
6756         {
6757         if (++i == 12)
6758           fprintf(outfile, "** Too many octal digits in \\o{...} item; "
6759                            "using only the first twelve.\n");
6760         else c = c * 8 + *pt - '0';
6761         }
6762       if (*pt == '}') p = pt + 1;
6763         else fprintf(outfile, "** Missing } after \\o{ (assumed)\n");
6764       }
6765     break;
6766 
6767     case 'x':
6768     if (*p == '{')
6769       {
6770       uint8_t *pt = p;
6771       c = 0;
6772 
6773       /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
6774       when isxdigit() is a macro that refers to its argument more than
6775       once. This is banned by the C Standard, but apparently happens in at
6776       least one MacOS environment. */
6777 
6778       for (pt++; isxdigit(*pt); pt++)
6779         {
6780         if (++i == 9)
6781           fprintf(outfile, "** Too many hex digits in \\x{...} item; "
6782                            "using only the first eight.\n");
6783         else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
6784         }
6785       if (*pt == '}')
6786         {
6787         p = pt + 1;
6788         break;
6789         }
6790       /* Not correct form for \x{...}; fall through */
6791       }
6792 
6793     /* \x without {} always defines just one byte in 8-bit mode. This
6794     allows UTF-8 characters to be constructed byte by byte, and also allows
6795     invalid UTF-8 sequences to be made. Just copy the byte in UTF-8 mode.
6796     Otherwise, pass it down as data. */
6797 
6798     c = 0;
6799     while (i++ < 2 && isxdigit(*p))
6800       {
6801       c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
6802       p++;
6803       }
6804 #if defined SUPPORT_PCRE2_8
6805     if (utf && (test_mode == PCRE8_MODE))
6806       {
6807       *q8++ = c;
6808       continue;
6809       }
6810 #endif
6811     break;
6812 
6813     case 0:     /* \ followed by EOF allows for an empty line */
6814     p--;
6815     continue;
6816 
6817     case '=':   /* \= terminates the data, starts modifiers */
6818     goto ENDSTRING;
6819 
6820     case '[':   /* \[ introduces a replicated character sequence */
6821     if (start_rep != NULL)
6822       {
6823       fprintf(outfile, "** Nested replication is not supported\n");
6824       return PR_OK;
6825       }
6826     start_rep = CAST8VAR(q);
6827     continue;
6828 
6829     default:
6830     if (isalnum(c))
6831       {
6832       fprintf(outfile, "** Unrecognized escape sequence \"\\%c\"\n", c);
6833       return PR_OK;
6834       }
6835     }
6836 
6837   /* We now have a character value in c that may be greater than 255.
6838   In 8-bit mode we convert to UTF-8 if we are in UTF mode. Values greater
6839   than 127 in UTF mode must have come from \x{...} or octal constructs
6840   because values from \x.. get this far only in non-UTF mode. */
6841 
6842 #ifdef SUPPORT_PCRE2_8
6843   if (test_mode == PCRE8_MODE)
6844     {
6845     if (utf)
6846       {
6847       if (c > 0x7fffffff)
6848         {
6849         fprintf(outfile, "** Character \\x{%x} is greater than 0x7fffffff "
6850           "and so cannot be converted to UTF-8\n", c);
6851         return PR_OK;
6852         }
6853       q8 += ord2utf8(c, q8);
6854       }
6855     else
6856       {
6857       if (c > 0xffu)
6858         {
6859         fprintf(outfile, "** Character \\x{%x} is greater than 255 "
6860           "and UTF-8 mode is not enabled.\n", c);
6861         fprintf(outfile, "** Truncation will probably give the wrong "
6862           "result.\n");
6863         }
6864       *q8++ = (uint8_t)c;
6865       }
6866     }
6867 #endif
6868 #ifdef SUPPORT_PCRE2_16
6869   if (test_mode == PCRE16_MODE)
6870     {
6871     if (utf)
6872       {
6873       if (c > 0x10ffffu)
6874         {
6875         fprintf(outfile, "** Failed: character \\x{%x} is greater than "
6876           "0x10ffff and so cannot be converted to UTF-16\n", c);
6877         return PR_OK;
6878         }
6879       else if (c >= 0x10000u)
6880         {
6881         c-= 0x10000u;
6882         *q16++ = 0xD800 | (c >> 10);
6883         *q16++ = 0xDC00 | (c & 0x3ff);
6884         }
6885       else
6886         *q16++ = c;
6887       }
6888     else
6889       {
6890       if (c > 0xffffu)
6891         {
6892         fprintf(outfile, "** Character \\x{%x} is greater than 0xffff "
6893           "and UTF-16 mode is not enabled.\n", c);
6894         fprintf(outfile, "** Truncation will probably give the wrong "
6895           "result.\n");
6896         }
6897 
6898       *q16++ = (uint16_t)c;
6899       }
6900     }
6901 #endif
6902 #ifdef SUPPORT_PCRE2_32
6903   if (test_mode == PCRE32_MODE)
6904     {
6905     *q32++ = c;
6906     }
6907 #endif
6908   }
6909 
6910 ENDSTRING:
6911 SET(*q, 0);
6912 len = CASTVAR(uint8_t *, q) - dbuffer;    /* Length in bytes */
6913 ulen = len/code_unit_size;                /* Length in code units */
6914 arg_ulen = ulen;                          /* Value to use in match arg */
6915 
6916 /* If the string was terminated by \= we must now interpret modifiers. */
6917 
6918 if (p[-1] != 0 && !decode_modifiers(p, CTX_DAT, NULL, &dat_datctl))
6919   return PR_OK;
6920 
6921 /* Setting substitute_{skip,fail} implies a substitute callout. */
6922 
6923 if (dat_datctl.substitute_skip != 0 || dat_datctl.substitute_stop != 0)
6924   dat_datctl.control2 |= CTL2_SUBSTITUTE_CALLOUT;
6925 
6926 /* Check for mutually exclusive modifiers. At present, these are all in the
6927 first control word. */
6928 
6929 for (k = 0; k < sizeof(exclusive_dat_controls)/sizeof(uint32_t); k++)
6930   {
6931   c = dat_datctl.control & exclusive_dat_controls[k];
6932   if (c != 0 && c != (c & (~c+1)))
6933     {
6934     show_controls(c, 0, "** Not allowed together:");
6935     fprintf(outfile, "\n");
6936     return PR_OK;
6937     }
6938   }
6939 
6940 if (pat_patctl.replacement[0] != 0)
6941   {
6942   if ((dat_datctl.control2 & CTL2_SUBSTITUTE_CALLOUT) != 0 &&
6943       (dat_datctl.control & CTL_NULLCONTEXT) != 0)
6944     {
6945     fprintf(outfile, "** Replacement callouts are not supported with null_context.\n");
6946     return PR_OK;
6947     }
6948 
6949   if ((dat_datctl.control & CTL_ALLCAPTURES) != 0)
6950     fprintf(outfile, "** Ignored with replacement text: allcaptures\n");
6951   }
6952 
6953 /* Warn for modifiers that are ignored for DFA. */
6954 
6955 if ((dat_datctl.control & CTL_DFA) != 0)
6956   {
6957   if ((dat_datctl.control & CTL_ALLCAPTURES) != 0)
6958     fprintf(outfile, "** Ignored after DFA matching: allcaptures\n");
6959   }
6960 
6961 /* We now have the subject in dbuffer, with len containing the byte length, and
6962 ulen containing the code unit length, with a copy in arg_ulen for use in match
6963 function arguments (this gets changed to PCRE2_ZERO_TERMINATED when the
6964 zero_terminate modifier is present).
6965 
6966 Move the data to the end of the buffer so that a read over the end can be
6967 caught by valgrind or other means. If we have explicit valgrind support, mark
6968 the unused start of the buffer unaddressable. If we are using the POSIX
6969 interface, or testing zero-termination, we must include the terminating zero in
6970 the usable data. */
6971 
6972 c = code_unit_size * (((pat_patctl.control & CTL_POSIX) +
6973                        (dat_datctl.control & CTL_ZERO_TERMINATE) != 0)? 1:0);
6974 pp = memmove(dbuffer + dbuffer_size - len - c, dbuffer, len + c);
6975 #ifdef SUPPORT_VALGRIND
6976   VALGRIND_MAKE_MEM_NOACCESS(dbuffer, dbuffer_size - (len + c));
6977 #endif
6978 
6979 /* Now pp points to the subject string. POSIX matching is only possible in
6980 8-bit mode, and it does not support timing or other fancy features. Some were
6981 checked at compile time, but we need to check the match-time settings here. */
6982 
6983 #ifdef SUPPORT_PCRE2_8
6984 if ((pat_patctl.control & CTL_POSIX) != 0)
6985   {
6986   int rc;
6987   int eflags = 0;
6988   regmatch_t *pmatch = NULL;
6989   const char *msg = "** Ignored with POSIX interface:";
6990 
6991   if (dat_datctl.cerror[0] != CFORE_UNSET || dat_datctl.cerror[1] != CFORE_UNSET)
6992     prmsg(&msg, "callout_error");
6993   if (dat_datctl.cfail[0] != CFORE_UNSET || dat_datctl.cfail[1] != CFORE_UNSET)
6994     prmsg(&msg, "callout_fail");
6995   if (dat_datctl.copy_numbers[0] >= 0 || dat_datctl.copy_names[0] != 0)
6996     prmsg(&msg, "copy");
6997   if (dat_datctl.get_numbers[0] >= 0 || dat_datctl.get_names[0] != 0)
6998     prmsg(&msg, "get");
6999   if (dat_datctl.jitstack != 0) prmsg(&msg, "jitstack");
7000   if (dat_datctl.offset != 0) prmsg(&msg, "offset");
7001 
7002   if ((dat_datctl.options & ~POSIX_SUPPORTED_MATCH_OPTIONS) != 0)
7003     {
7004     fprintf(outfile, "%s", msg);
7005     show_match_options(dat_datctl.options & ~POSIX_SUPPORTED_MATCH_OPTIONS);
7006     msg = "";
7007     }
7008   if ((dat_datctl.control & ~POSIX_SUPPORTED_MATCH_CONTROLS) != 0 ||
7009       (dat_datctl.control2 & ~POSIX_SUPPORTED_MATCH_CONTROLS2) != 0)
7010     {
7011     show_controls(dat_datctl.control & ~POSIX_SUPPORTED_MATCH_CONTROLS,
7012                   dat_datctl.control2 & ~POSIX_SUPPORTED_MATCH_CONTROLS2, msg);
7013     msg = "";
7014     }
7015 
7016   if (msg[0] == 0) fprintf(outfile, "\n");
7017 
7018   if (dat_datctl.oveccount > 0)
7019     {
7020     pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * dat_datctl.oveccount);
7021     if (pmatch == NULL)
7022       {
7023       fprintf(outfile, "** Failed to get memory for recording matching "
7024         "information (size set = %du)\n", dat_datctl.oveccount);
7025       return PR_OK;
7026       }
7027     }
7028 
7029   if (dat_datctl.startend[0] != CFORE_UNSET)
7030     {
7031     pmatch[0].rm_so = dat_datctl.startend[0];
7032     pmatch[0].rm_eo = (dat_datctl.startend[1] != 0)?
7033       dat_datctl.startend[1] : len;
7034     eflags |= REG_STARTEND;
7035     }
7036 
7037   if ((dat_datctl.options & PCRE2_NOTBOL) != 0) eflags |= REG_NOTBOL;
7038   if ((dat_datctl.options & PCRE2_NOTEOL) != 0) eflags |= REG_NOTEOL;
7039   if ((dat_datctl.options & PCRE2_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
7040 
7041   rc = regexec(&preg, (const char *)pp, dat_datctl.oveccount, pmatch, eflags);
7042   if (rc != 0)
7043     {
7044     (void)regerror(rc, &preg, (char *)pbuffer8, pbuffer8_size);
7045     fprintf(outfile, "No match: POSIX code %d: %s\n", rc, pbuffer8);
7046     }
7047   else if ((pat_patctl.control & CTL_POSIX_NOSUB) != 0)
7048     fprintf(outfile, "Matched with REG_NOSUB\n");
7049   else if (dat_datctl.oveccount == 0)
7050     fprintf(outfile, "Matched without capture\n");
7051   else
7052     {
7053     size_t i, j;
7054     size_t last_printed = (size_t)dat_datctl.oveccount;
7055     for (i = 0; i < (size_t)dat_datctl.oveccount; i++)
7056       {
7057       if (pmatch[i].rm_so >= 0)
7058         {
7059         PCRE2_SIZE start = pmatch[i].rm_so;
7060         PCRE2_SIZE end = pmatch[i].rm_eo;
7061         for (j = last_printed + 1; j < i; j++)
7062           fprintf(outfile, "%2d: <unset>\n", (int)j);
7063         last_printed = i;
7064         if (start > end)
7065           {
7066           start = pmatch[i].rm_eo;
7067           end = pmatch[i].rm_so;
7068           fprintf(outfile, "Start of matched string is beyond its end - "
7069             "displaying from end to start.\n");
7070           }
7071         fprintf(outfile, "%2d: ", (int)i);
7072         PCHARSV(pp, start, end - start, utf, outfile);
7073         fprintf(outfile, "\n");
7074 
7075         if ((i == 0 && (dat_datctl.control & CTL_AFTERTEXT) != 0) ||
7076             (dat_datctl.control & CTL_ALLAFTERTEXT) != 0)
7077           {
7078           fprintf(outfile, "%2d+ ", (int)i);
7079           /* Note: don't use the start/end variables here because we want to
7080           show the text from what is reported as the end. */
7081           PCHARSV(pp, pmatch[i].rm_eo, len - pmatch[i].rm_eo, utf, outfile);
7082           fprintf(outfile, "\n"); }
7083         }
7084       }
7085     }
7086   free(pmatch);
7087   return PR_OK;
7088   }
7089 #endif  /* SUPPORT_PCRE2_8 */
7090 
7091  /* Handle matching via the native interface. Check for consistency of
7092 modifiers. */
7093 
7094 if (dat_datctl.startend[0] != CFORE_UNSET)
7095   fprintf(outfile, "** \\=posix_startend ignored for non-POSIX matching\n");
7096 
7097 /* ALLUSEDTEXT is not supported with JIT, but JIT is not used with DFA
7098 matching, even if the JIT compiler was used. */
7099 
7100 if ((dat_datctl.control & (CTL_ALLUSEDTEXT|CTL_DFA)) == CTL_ALLUSEDTEXT &&
7101     FLD(compiled_code, executable_jit) != NULL)
7102   {
7103   fprintf(outfile, "** Showing all consulted text is not supported by JIT: ignored\n");
7104   dat_datctl.control &= ~CTL_ALLUSEDTEXT;
7105   }
7106 
7107 /* Handle passing the subject as zero-terminated. */
7108 
7109 if ((dat_datctl.control & CTL_ZERO_TERMINATE) != 0)
7110   arg_ulen = PCRE2_ZERO_TERMINATED;
7111 
7112 /* The nullcontext modifier is used to test calling pcre2_[jit_]match() with a
7113 NULL context. */
7114 
7115 use_dat_context = ((dat_datctl.control & CTL_NULLCONTEXT) != 0)?
7116   NULL : PTR(dat_context);
7117 
7118 /* Enable display of malloc/free if wanted. We can do this only if either the
7119 pattern or the subject is processed with a context. */
7120 
7121 show_memory = (dat_datctl.control & CTL_MEMORY) != 0;
7122 
7123 if (show_memory &&
7124     (pat_patctl.control & dat_datctl.control & CTL_NULLCONTEXT) != 0)
7125   fprintf(outfile, "** \\=memory requires either a pattern or a subject "
7126     "context: ignored\n");
7127 
7128 /* Create and assign a JIT stack if requested. */
7129 
7130 if (dat_datctl.jitstack != 0)
7131   {
7132   if (dat_datctl.jitstack != jit_stack_size)
7133     {
7134     PCRE2_JIT_STACK_FREE(jit_stack);
7135     PCRE2_JIT_STACK_CREATE(jit_stack, 1, dat_datctl.jitstack * 1024, NULL);
7136     jit_stack_size = dat_datctl.jitstack;
7137     }
7138   PCRE2_JIT_STACK_ASSIGN(dat_context, jit_callback, jit_stack);
7139   }
7140 
7141 /* Or de-assign */
7142 
7143 else if (jit_stack != NULL)
7144   {
7145   PCRE2_JIT_STACK_ASSIGN(dat_context, NULL, NULL);
7146   PCRE2_JIT_STACK_FREE(jit_stack);
7147   jit_stack = NULL;
7148   jit_stack_size = 0;
7149   }
7150 
7151 /* When no JIT stack is assigned, we must ensure that there is a JIT callback
7152 if we want to verify that JIT was actually used. */
7153 
7154 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_stack == NULL)
7155    {
7156    PCRE2_JIT_STACK_ASSIGN(dat_context, jit_callback, NULL);
7157    }
7158 
7159 /* Adjust match_data according to size of offsets required. A size of zero
7160 causes a new match data block to be obtained that exactly fits the pattern. */
7161 
7162 if (dat_datctl.oveccount == 0)
7163   {
7164   PCRE2_MATCH_DATA_FREE(match_data);
7165   PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(match_data, compiled_code, NULL);
7166   PCRE2_GET_OVECTOR_COUNT(max_oveccount, match_data);
7167   }
7168 else if (dat_datctl.oveccount <= max_oveccount)
7169   {
7170   SETFLD(match_data, oveccount, dat_datctl.oveccount);
7171   }
7172 else
7173   {
7174   max_oveccount = dat_datctl.oveccount;
7175   PCRE2_MATCH_DATA_FREE(match_data);
7176   PCRE2_MATCH_DATA_CREATE(match_data, max_oveccount, NULL);
7177   }
7178 
7179 if (CASTVAR(void *, match_data) == NULL)
7180   {
7181   fprintf(outfile, "** Failed to get memory for recording matching "
7182     "information (size requested: %d)\n", dat_datctl.oveccount);
7183   max_oveccount = 0;
7184   return PR_OK;
7185   }
7186 
7187 ovector = FLD(match_data, ovector);
7188 PCRE2_GET_OVECTOR_COUNT(oveccount, match_data);
7189 
7190 /* Replacement processing is ignored for DFA matching. */
7191 
7192 if (dat_datctl.replacement[0] != 0 && (dat_datctl.control & CTL_DFA) != 0)
7193   {
7194   fprintf(outfile, "** Ignored for DFA matching: replace\n");
7195   dat_datctl.replacement[0] = 0;
7196   }
7197 
7198 /* If a replacement string is provided, call pcre2_substitute() instead of one
7199 of the matching functions. First we have to convert the replacement string to
7200 the appropriate width. */
7201 
7202 if (dat_datctl.replacement[0] != 0)
7203   {
7204   int rc;
7205   uint8_t *pr;
7206   uint8_t rbuffer[REPLACE_BUFFSIZE];
7207   uint8_t nbuffer[REPLACE_BUFFSIZE];
7208   uint32_t xoptions;
7209   PCRE2_SIZE j, rlen, nsize, erroroffset;
7210   BOOL badutf = FALSE;
7211 
7212 #ifdef SUPPORT_PCRE2_8
7213   uint8_t *r8 = NULL;
7214 #endif
7215 #ifdef SUPPORT_PCRE2_16
7216   uint16_t *r16 = NULL;
7217 #endif
7218 #ifdef SUPPORT_PCRE2_32
7219   uint32_t *r32 = NULL;
7220 #endif
7221 
7222   /* Fill the ovector with junk to detect elements that do not get set
7223   when they should be (relevant only when "allvector" is specified). */
7224 
7225   for (j = 0; j < 2*oveccount; j++) ovector[j] = JUNK_OFFSET;
7226 
7227   if (timeitm)
7228     fprintf(outfile, "** Timing is not supported with replace: ignored\n");
7229 
7230   if ((dat_datctl.control & CTL_ALTGLOBAL) != 0)
7231     fprintf(outfile, "** Altglobal is not supported with replace: ignored\n");
7232 
7233   xoptions = (((dat_datctl.control & CTL_GLOBAL) == 0)? 0 :
7234                 PCRE2_SUBSTITUTE_GLOBAL) |
7235              (((dat_datctl.control2 & CTL2_SUBSTITUTE_EXTENDED) == 0)? 0 :
7236                 PCRE2_SUBSTITUTE_EXTENDED) |
7237              (((dat_datctl.control2 & CTL2_SUBSTITUTE_OVERFLOW_LENGTH) == 0)? 0 :
7238                 PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) |
7239              (((dat_datctl.control2 & CTL2_SUBSTITUTE_UNKNOWN_UNSET) == 0)? 0 :
7240                 PCRE2_SUBSTITUTE_UNKNOWN_UNSET) |
7241              (((dat_datctl.control2 & CTL2_SUBSTITUTE_UNSET_EMPTY) == 0)? 0 :
7242                 PCRE2_SUBSTITUTE_UNSET_EMPTY);
7243 
7244   SETCASTPTR(r, rbuffer);  /* Sets r8, r16, or r32, as appropriate. */
7245   pr = dat_datctl.replacement;
7246 
7247   /* If the replacement starts with '[<number>]' we interpret that as length
7248   value for the replacement buffer. */
7249 
7250   nsize = REPLACE_BUFFSIZE/code_unit_size;
7251   if (*pr == '[')
7252     {
7253     PCRE2_SIZE n = 0;
7254     while ((c = *(++pr)) >= CHAR_0 && c <= CHAR_9) n = n * 10 + c - CHAR_0;
7255     if (*pr++ != ']')
7256       {
7257       fprintf(outfile, "Bad buffer size in replacement string\n");
7258       return PR_OK;
7259       }
7260     if (n > nsize)
7261       {
7262       fprintf(outfile, "Replacement buffer setting (%" SIZ_FORM ") is too "
7263         "large (max %" SIZ_FORM ")\n", SIZ_CAST n, SIZ_CAST nsize);
7264       return PR_OK;
7265       }
7266     nsize = n;
7267     }
7268 
7269   /* Now copy the replacement string to a buffer of the appropriate width. No
7270   escape processing is done for replacements. In UTF mode, check for an invalid
7271   UTF-8 input string, and if it is invalid, just copy its code units without
7272   UTF interpretation. This provides a means of checking that an invalid string
7273   is detected. Otherwise, UTF-8 can be used to include wide characters in a
7274   replacement. */
7275 
7276   if (utf) badutf = valid_utf(pr, strlen((const char *)pr), &erroroffset);
7277 
7278   /* Not UTF or invalid UTF-8: just copy the code units. */
7279 
7280   if (!utf || badutf)
7281     {
7282     while ((c = *pr++) != 0)
7283       {
7284 #ifdef SUPPORT_PCRE2_8
7285       if (test_mode == PCRE8_MODE) *r8++ = c;
7286 #endif
7287 #ifdef SUPPORT_PCRE2_16
7288       if (test_mode == PCRE16_MODE) *r16++ = c;
7289 #endif
7290 #ifdef SUPPORT_PCRE2_32
7291       if (test_mode == PCRE32_MODE) *r32++ = c;
7292 #endif
7293       }
7294     }
7295 
7296   /* Valid UTF-8 replacement string */
7297 
7298   else while ((c = *pr++) != 0)
7299     {
7300     if (HASUTF8EXTRALEN(c)) { GETUTF8INC(c, pr); }
7301 
7302 #ifdef SUPPORT_PCRE2_8
7303     if (test_mode == PCRE8_MODE) r8 += ord2utf8(c, r8);
7304 #endif
7305 
7306 #ifdef SUPPORT_PCRE2_16
7307     if (test_mode == PCRE16_MODE)
7308       {
7309       if (c >= 0x10000u)
7310         {
7311         c-= 0x10000u;
7312         *r16++ = 0xD800 | (c >> 10);
7313         *r16++ = 0xDC00 | (c & 0x3ff);
7314         }
7315       else *r16++ = c;
7316       }
7317 #endif
7318 
7319 #ifdef SUPPORT_PCRE2_32
7320     if (test_mode == PCRE32_MODE) *r32++ = c;
7321 #endif
7322     }
7323 
7324   SET(*r, 0);
7325   if ((dat_datctl.control & CTL_ZERO_TERMINATE) != 0)
7326     rlen = PCRE2_ZERO_TERMINATED;
7327   else
7328     rlen = (CASTVAR(uint8_t *, r) - rbuffer)/code_unit_size;
7329 
7330   if ((dat_datctl.control2 & CTL2_SUBSTITUTE_CALLOUT) != 0)
7331     {
7332     PCRE2_SET_SUBSTITUTE_CALLOUT(dat_context, substitute_callout_function, NULL);
7333     }
7334   else
7335     {
7336     PCRE2_SET_SUBSTITUTE_CALLOUT(dat_context, NULL, NULL);  /* No callout */
7337     }
7338 
7339   PCRE2_SUBSTITUTE(rc, compiled_code, pp, arg_ulen, dat_datctl.offset,
7340     dat_datctl.options|xoptions, match_data, use_dat_context,
7341     rbuffer, rlen, nbuffer, &nsize);
7342 
7343   if (rc < 0)
7344     {
7345     fprintf(outfile, "Failed: error %d", rc);
7346     if (rc != PCRE2_ERROR_NOMEMORY && nsize != PCRE2_UNSET)
7347       fprintf(outfile, " at offset %ld in replacement", (long int)nsize);
7348     fprintf(outfile, ": ");
7349     if (!print_error_message(rc, "", "")) return PR_ABEND;
7350     if (rc == PCRE2_ERROR_NOMEMORY &&
7351         (xoptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) != 0)
7352       fprintf(outfile, ": %ld code units are needed", (long int)nsize);
7353     }
7354   else
7355     {
7356     fprintf(outfile, "%2d: ", rc);
7357     PCHARSV(nbuffer, 0, nsize, utf, outfile);
7358     }
7359 
7360   fprintf(outfile, "\n");
7361   show_memory = FALSE;
7362 
7363   /* Show final ovector contents if requested. */
7364 
7365   if ((dat_datctl.control2 & CTL2_ALLVECTOR) != 0)
7366     show_ovector(ovector, oveccount);
7367 
7368   return PR_OK;
7369   }   /* End of substitution handling */
7370 
7371 /* When a replacement string is not provided, run a loop for global matching
7372 with one of the basic matching functions. For altglobal (or first time round
7373 the loop), set an "unset" value for the previous match info. */
7374 
7375 ovecsave[0] = ovecsave[1] = ovecsave[2] = PCRE2_UNSET;
7376 
7377 for (gmatched = 0;; gmatched++)
7378   {
7379   PCRE2_SIZE j;
7380   int capcount;
7381 
7382   /* Fill the ovector with junk to detect elements that do not get set
7383   when they should be. */
7384 
7385   for (j = 0; j < 2*oveccount; j++) ovector[j] = JUNK_OFFSET;
7386 
7387   /* When matching is via pcre2_match(), we will detect the use of JIT via the
7388   stack callback function. */
7389 
7390   jit_was_used = (pat_patctl.control & CTL_JITFAST) != 0;
7391 
7392   /* Do timing if required. */
7393 
7394   if (timeitm > 0)
7395     {
7396     int i;
7397     clock_t start_time, time_taken;
7398 
7399     if ((dat_datctl.control & CTL_DFA) != 0)
7400       {
7401       if ((dat_datctl.options & PCRE2_DFA_RESTART) != 0)
7402         {
7403         fprintf(outfile, "Timing DFA restarts is not supported\n");
7404         return PR_OK;
7405         }
7406       if (dfa_workspace == NULL)
7407         dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
7408       start_time = clock();
7409       for (i = 0; i < timeitm; i++)
7410         {
7411         PCRE2_DFA_MATCH(capcount, compiled_code, pp, arg_ulen,
7412           dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
7413           use_dat_context, dfa_workspace, DFA_WS_DIMENSION);
7414         }
7415       }
7416 
7417     else if ((pat_patctl.control & CTL_JITFAST) != 0)
7418       {
7419       start_time = clock();
7420       for (i = 0; i < timeitm; i++)
7421         {
7422         PCRE2_JIT_MATCH(capcount, compiled_code, pp, arg_ulen,
7423           dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
7424           use_dat_context);
7425         }
7426       }
7427 
7428     else
7429       {
7430       start_time = clock();
7431       for (i = 0; i < timeitm; i++)
7432         {
7433         PCRE2_MATCH(capcount, compiled_code, pp, arg_ulen,
7434           dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
7435           use_dat_context);
7436         }
7437       }
7438     total_match_time += (time_taken = clock() - start_time);
7439     fprintf(outfile, "Match time %.4f milliseconds\n",
7440       (((double)time_taken * 1000.0) / (double)timeitm) /
7441         (double)CLOCKS_PER_SEC);
7442     }
7443 
7444   /* Find the heap, match and depth limits if requested. The depth and heap
7445   limits are not relevant for JIT. The return from check_match_limit() is the
7446   return from the final call to pcre2_match() or pcre2_dfa_match(). */
7447 
7448   if ((dat_datctl.control & CTL_FINDLIMITS) != 0)
7449     {
7450     capcount = 0;  /* This stops compiler warnings */
7451 
7452     if (FLD(compiled_code, executable_jit) == NULL ||
7453           (dat_datctl.options & PCRE2_NO_JIT) != 0)
7454       {
7455       (void)check_match_limit(pp, arg_ulen, PCRE2_ERROR_HEAPLIMIT, "heap");
7456       }
7457 
7458     capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_MATCHLIMIT,
7459       "match");
7460 
7461     if (FLD(compiled_code, executable_jit) == NULL ||
7462         (dat_datctl.options & PCRE2_NO_JIT) != 0 ||
7463         (dat_datctl.control & CTL_DFA) != 0)
7464       {
7465       capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_DEPTHLIMIT,
7466         "depth");
7467       }
7468 
7469     if (capcount == 0)
7470       {
7471       fprintf(outfile, "Matched, but offsets vector is too small to show all matches\n");
7472       capcount = dat_datctl.oveccount;
7473       }
7474     }
7475 
7476   /* Otherwise just run a single match, setting up a callout if required (the
7477   default). There is a copy of the pattern in pbuffer8 for use by callouts. */
7478 
7479   else
7480     {
7481     if ((dat_datctl.control & CTL_CALLOUT_NONE) == 0)
7482       {
7483       PCRE2_SET_CALLOUT(dat_context, callout_function,
7484         (void *)(&dat_datctl.callout_data));
7485       first_callout = TRUE;
7486       last_callout_mark = NULL;
7487       callout_count = 0;
7488       }
7489     else
7490       {
7491       PCRE2_SET_CALLOUT(dat_context, NULL, NULL);  /* No callout */
7492       }
7493 
7494     /* Run a single DFA or NFA match. */
7495 
7496     if ((dat_datctl.control & CTL_DFA) != 0)
7497       {
7498       if (dfa_workspace == NULL)
7499         dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
7500       if (dfa_matched++ == 0)
7501         dfa_workspace[0] = -1;  /* To catch bad restart */
7502       PCRE2_DFA_MATCH(capcount, compiled_code, pp, arg_ulen,
7503         dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
7504         use_dat_context, dfa_workspace, DFA_WS_DIMENSION);
7505       if (capcount == 0)
7506         {
7507         fprintf(outfile, "Matched, but offsets vector is too small to show all matches\n");
7508         capcount = dat_datctl.oveccount;
7509         }
7510       }
7511     else
7512       {
7513       if ((pat_patctl.control & CTL_JITFAST) != 0)
7514         PCRE2_JIT_MATCH(capcount, compiled_code, pp, arg_ulen, dat_datctl.offset,
7515           dat_datctl.options | g_notempty, match_data, use_dat_context);
7516       else
7517         PCRE2_MATCH(capcount, compiled_code, pp, arg_ulen, dat_datctl.offset,
7518           dat_datctl.options | g_notempty, match_data, use_dat_context);
7519       if (capcount == 0)
7520         {
7521         fprintf(outfile, "Matched, but too many substrings\n");
7522         capcount = dat_datctl.oveccount;
7523         }
7524       }
7525     }
7526 
7527   /* The result of the match is now in capcount. First handle a successful
7528   match. */
7529 
7530   if (capcount >= 0)
7531     {
7532     int i;
7533 
7534     if (capcount > (int)oveccount)   /* Check for lunatic return value */
7535       {
7536       fprintf(outfile,
7537         "** PCRE2 error: returned count %d is too big for ovector count %d\n",
7538         capcount, oveccount);
7539       capcount = oveccount;
7540       if ((dat_datctl.control & CTL_ANYGLOB) != 0)
7541         {
7542         fprintf(outfile, "** Global loop abandoned\n");
7543         dat_datctl.control &= ~CTL_ANYGLOB;        /* Break g/G loop */
7544         }
7545       }
7546 
7547     /* If PCRE2_COPY_MATCHED_SUBJECT was set, check that things are as they
7548     should be, but not for fast JIT, where it isn't supported. */
7549 
7550     if ((dat_datctl.options & PCRE2_COPY_MATCHED_SUBJECT) != 0 &&
7551         (pat_patctl.control & CTL_JITFAST) == 0)
7552       {
7553       if ((FLD(match_data, flags) & PCRE2_MD_COPIED_SUBJECT) == 0)
7554         fprintf(outfile,
7555           "** PCRE2 error: flag not set after copy_matched_subject\n");
7556 
7557       if (CASTFLD(void *, match_data, subject) == pp)
7558         fprintf(outfile,
7559           "** PCRE2 error: copy_matched_subject has not copied\n");
7560 
7561       if (memcmp(CASTFLD(void *, match_data, subject), pp, ulen) != 0)
7562         fprintf(outfile,
7563           "** PCRE2 error: copy_matched_subject mismatch\n");
7564       }
7565 
7566     /* If this is not the first time round a global loop, check that the
7567     returned string has changed. If it has not, check for an empty string match
7568     at different starting offset from the previous match. This is a failed test
7569     retry for null-matching patterns that don't match at their starting offset,
7570     for example /(?<=\G.)/. A repeated match at the same point is not such a
7571     pattern, and must be discarded, and we then proceed to seek a non-null
7572     match at the current point. For any other repeated match, there is a bug
7573     somewhere and we must break the loop because it will go on for ever. We
7574     know that there are always at least two elements in the ovector. */
7575 
7576     if (gmatched > 0 && ovecsave[0] == ovector[0] && ovecsave[1] == ovector[1])
7577       {
7578       if (ovector[0] == ovector[1] && ovecsave[2] != dat_datctl.offset)
7579         {
7580         g_notempty = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
7581         ovecsave[2] = dat_datctl.offset;
7582         continue;    /* Back to the top of the loop */
7583         }
7584       fprintf(outfile,
7585         "** PCRE2 error: global repeat returned the same string as previous\n");
7586       fprintf(outfile, "** Global loop abandoned\n");
7587       dat_datctl.control &= ~CTL_ANYGLOB;        /* Break g/G loop */
7588       }
7589 
7590     /* "allcaptures" requests showing of all captures in the pattern, to check
7591     unset ones at the end. It may be set on the pattern or the data. Implement
7592     by setting capcount to the maximum. This is not relevant for DFA matching,
7593     so ignore it (warning given above). */
7594 
7595     if ((dat_datctl.control & (CTL_ALLCAPTURES|CTL_DFA)) == CTL_ALLCAPTURES)
7596       {
7597       capcount = maxcapcount + 1;   /* Allow for full match */
7598       if (capcount > (int)oveccount) capcount = oveccount;
7599       }
7600 
7601     /* "allvector" request showing the entire ovector. */
7602 
7603     if ((dat_datctl.control2 & CTL2_ALLVECTOR) != 0) capcount = oveccount;
7604 
7605     /* Output the captured substrings. Note that, for the matched string,
7606     the use of \K in an assertion can make the start later than the end. */
7607 
7608     for (i = 0; i < 2*capcount; i += 2)
7609       {
7610       PCRE2_SIZE lleft, lmiddle, lright;
7611       PCRE2_SIZE start = ovector[i];
7612       PCRE2_SIZE end = ovector[i+1];
7613 
7614       if (start > end)
7615         {
7616         start = ovector[i+1];
7617         end = ovector[i];
7618         fprintf(outfile, "Start of matched string is beyond its end - "
7619           "displaying from end to start.\n");
7620         }
7621 
7622       fprintf(outfile, "%2d: ", i/2);
7623 
7624       /* Check for an unset group */
7625 
7626       if (start == PCRE2_UNSET && end == PCRE2_UNSET)
7627         {
7628         fprintf(outfile, "<unset>\n");
7629         continue;
7630         }
7631 
7632       /* Check for silly offsets, in particular, values that have not been
7633       set when they should have been. However, if we are past the end of the
7634       captures for this pattern ("allvector" causes this), or if we are DFA
7635       matching, it isn't an error if the entry is unchanged. */
7636 
7637       if (start > ulen || end > ulen)
7638         {
7639         if (((dat_datctl.control & CTL_DFA) != 0 ||
7640               i >= (int)(2*maxcapcount + 2)) &&
7641             start == JUNK_OFFSET && end == JUNK_OFFSET)
7642           fprintf(outfile, "<unchanged>\n");
7643         else
7644           fprintf(outfile, "ERROR: bad value(s) for offset(s): 0x%lx 0x%lx\n",
7645             (unsigned long int)start, (unsigned long int)end);
7646         continue;
7647         }
7648 
7649       /* When JIT is not being used, ALLUSEDTEXT may be set. (It if is set with
7650       JIT, it is disabled above, with a comment.) When the match is done by the
7651       interpreter, leftchar and rightchar are available, and if ALLUSEDTEXT is
7652       set, and if the leftmost consulted character is before the start of the
7653       match or the rightmost consulted character is past the end of the match,
7654       we want to show all consulted characters for the main matched string, and
7655       indicate which were lookarounds. */
7656 
7657       if (i == 0)
7658         {
7659         BOOL showallused;
7660         PCRE2_SIZE leftchar, rightchar;
7661 
7662         if ((dat_datctl.control & CTL_ALLUSEDTEXT) != 0)
7663           {
7664           leftchar = FLD(match_data, leftchar);
7665           rightchar = FLD(match_data, rightchar);
7666           showallused = i == 0 && (leftchar < start || rightchar > end);
7667           }
7668         else showallused = FALSE;
7669 
7670         if (showallused)
7671           {
7672           PCHARS(lleft, pp, leftchar, start - leftchar, utf, outfile);
7673           PCHARS(lmiddle, pp, start, end - start, utf, outfile);
7674           PCHARS(lright, pp, end, rightchar - end, utf, outfile);
7675           if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
7676             fprintf(outfile, " (JIT)");
7677           fprintf(outfile, "\n    ");
7678           for (j = 0; j < lleft; j++) fprintf(outfile, "<");
7679           for (j = 0; j < lmiddle; j++) fprintf(outfile, " ");
7680           for (j = 0; j < lright; j++) fprintf(outfile, ">");
7681           }
7682 
7683         /* When a pattern contains \K, the start of match position may be
7684         different to the start of the matched string. When this is the case,
7685         show it when requested. */
7686 
7687         else if ((dat_datctl.control & CTL_STARTCHAR) != 0)
7688           {
7689           PCRE2_SIZE startchar;
7690           PCRE2_GET_STARTCHAR(startchar, match_data);
7691           PCHARS(lleft, pp, startchar, start - startchar, utf, outfile);
7692           PCHARSV(pp, start, end - start, utf, outfile);
7693           if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
7694             fprintf(outfile, " (JIT)");
7695           if (startchar != start)
7696             {
7697             fprintf(outfile, "\n    ");
7698             for (j = 0; j < lleft; j++) fprintf(outfile, "^");
7699             }
7700           }
7701 
7702         /* Otherwise, just show the matched string. */
7703 
7704         else
7705           {
7706           PCHARSV(pp, start, end - start, utf, outfile);
7707           if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
7708             fprintf(outfile, " (JIT)");
7709           }
7710         }
7711 
7712       /* Not the main matched string. Just show it unadorned. */
7713 
7714       else
7715         {
7716         PCHARSV(pp, start, end - start, utf, outfile);
7717         }
7718 
7719       fprintf(outfile, "\n");
7720 
7721       /* Note: don't use the start/end variables here because we want to
7722       show the text from what is reported as the end. */
7723 
7724       if ((dat_datctl.control & CTL_ALLAFTERTEXT) != 0 ||
7725           (i == 0 && (dat_datctl.control & CTL_AFTERTEXT) != 0))
7726         {
7727         fprintf(outfile, "%2d+ ", i/2);
7728         PCHARSV(pp, ovector[i+1], ulen - ovector[i+1], utf, outfile);
7729         fprintf(outfile, "\n");
7730         }
7731       }
7732 
7733     /* Output (*MARK) data if requested */
7734 
7735     if ((dat_datctl.control & CTL_MARK) != 0 &&
7736          TESTFLD(match_data, mark, !=, NULL))
7737       {
7738       fprintf(outfile, "MK: ");
7739       PCHARSV(CASTFLD(void *, match_data, mark), 0, -1, utf, outfile);
7740       fprintf(outfile, "\n");
7741       }
7742 
7743     /* Process copy/get strings */
7744 
7745     if (!copy_and_get(utf, capcount)) return PR_ABEND;
7746 
7747     }    /* End of handling a successful match */
7748 
7749   /* There was a partial match. The value of ovector[0] is the bumpalong point,
7750   that is, startchar, not any \K point that might have been passed. */
7751 
7752   else if (capcount == PCRE2_ERROR_PARTIAL)
7753     {
7754     PCRE2_SIZE poffset;
7755     int backlength;
7756     int rubriclength = 0;
7757 
7758     fprintf(outfile, "Partial match");
7759     if ((dat_datctl.control & CTL_MARK) != 0 &&
7760          TESTFLD(match_data, mark, !=, NULL))
7761       {
7762       fprintf(outfile, ", mark=");
7763       PCHARS(rubriclength, CASTFLD(void *, match_data, mark), 0, -1, utf,
7764         outfile);
7765       rubriclength += 7;
7766       }
7767     fprintf(outfile, ": ");
7768     rubriclength += 15;
7769 
7770     poffset = backchars(pp, ovector[0], maxlookbehind, utf);
7771     PCHARS(backlength, pp, poffset, ovector[0] - poffset, utf, outfile);
7772     PCHARSV(pp, ovector[0], ulen - ovector[0], utf, outfile);
7773 
7774     if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
7775       fprintf(outfile, " (JIT)");
7776     fprintf(outfile, "\n");
7777 
7778     if (backlength != 0)
7779       {
7780       int i;
7781       for (i = 0; i < rubriclength; i++) fprintf(outfile, " ");
7782       for (i = 0; i < backlength; i++) fprintf(outfile, "<");
7783       fprintf(outfile, "\n");
7784       }
7785 
7786     if (ulen != ovector[1])
7787       fprintf(outfile, "** ovector[1] is not equal to the subject length: "
7788         "%ld != %ld\n", (unsigned long int)ovector[1], (unsigned long int)ulen);
7789 
7790     /* Process copy/get strings */
7791 
7792     if (!copy_and_get(utf, 1)) return PR_ABEND;
7793 
7794     /* "allvector" outputs the entire vector */
7795 
7796     if ((dat_datctl.control2 & CTL2_ALLVECTOR) != 0)
7797       show_ovector(ovector, oveccount);
7798 
7799     break;  /* Out of the /g loop */
7800     }       /* End of handling partial match */
7801 
7802   /* Failed to match. If this is a /g or /G loop, we might previously have
7803   set g_notempty (to PCRE2_NOTEMPTY_ATSTART|PCRE2_ANCHORED) after a null match.
7804   If that is the case, this is not necessarily the end. We want to advance the
7805   start offset, and continue. We won't be at the end of the string - that was
7806   checked before setting g_notempty. We achieve the effect by pretending that a
7807   single character was matched.
7808 
7809   Complication arises in the case when the newline convention is "any", "crlf",
7810   or "anycrlf". If the previous match was at the end of a line terminated by
7811   CRLF, an advance of one character just passes the CR, whereas we should
7812   prefer the longer newline sequence, as does the code in pcre2_match().
7813 
7814   Otherwise, in the case of UTF-8 or UTF-16 matching, the advance must be one
7815   character, not one byte. */
7816 
7817   else if (g_notempty != 0)   /* There was a previous null match */
7818     {
7819     uint16_t nl = FLD(compiled_code, newline_convention);
7820     PCRE2_SIZE start_offset = dat_datctl.offset;    /* Where the match was */
7821     PCRE2_SIZE end_offset = start_offset + 1;
7822 
7823     if ((nl == PCRE2_NEWLINE_CRLF || nl == PCRE2_NEWLINE_ANY ||
7824          nl == PCRE2_NEWLINE_ANYCRLF) &&
7825         start_offset < ulen - 1 &&
7826         CODE_UNIT(pp, start_offset) == '\r' &&
7827         CODE_UNIT(pp, end_offset) == '\n')
7828       end_offset++;
7829 
7830     else if (utf && test_mode != PCRE32_MODE)
7831       {
7832       if (test_mode == PCRE8_MODE)
7833         {
7834         for (; end_offset < ulen; end_offset++)
7835           if ((((PCRE2_SPTR8)pp)[end_offset] & 0xc0) != 0x80) break;
7836         }
7837       else  /* 16-bit mode */
7838         {
7839         for (; end_offset < ulen; end_offset++)
7840           if ((((PCRE2_SPTR16)pp)[end_offset] & 0xfc00) != 0xdc00) break;
7841         }
7842       }
7843 
7844     SETFLDVEC(match_data, ovector, 0, start_offset);
7845     SETFLDVEC(match_data, ovector, 1, end_offset);
7846     }  /* End of handling null match in a global loop */
7847 
7848   /* A "normal" match failure. There will be a negative error number in
7849   capcount. */
7850 
7851   else
7852     {
7853     switch(capcount)
7854       {
7855       case PCRE2_ERROR_NOMATCH:
7856       if (gmatched == 0)
7857         {
7858         fprintf(outfile, "No match");
7859         if ((dat_datctl.control & CTL_MARK) != 0 &&
7860              TESTFLD(match_data, mark, !=, NULL))
7861           {
7862           fprintf(outfile, ", mark = ");
7863           PCHARSV(CASTFLD(void *, match_data, mark), 0, -1, utf, outfile);
7864           }
7865         if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
7866           fprintf(outfile, " (JIT)");
7867         fprintf(outfile, "\n");
7868 
7869         /* "allvector" outputs the entire vector */
7870 
7871         if ((dat_datctl.control2 & CTL2_ALLVECTOR) != 0)
7872           show_ovector(ovector, oveccount);
7873         }
7874       break;
7875 
7876       case PCRE2_ERROR_BADUTFOFFSET:
7877       fprintf(outfile, "Error %d (bad UTF-%d offset)\n", capcount, test_mode);
7878       break;
7879 
7880       default:
7881       fprintf(outfile, "Failed: error %d: ", capcount);
7882       if (!print_error_message(capcount, "", "")) return PR_ABEND;
7883       if (capcount <= PCRE2_ERROR_UTF8_ERR1 &&
7884           capcount >= PCRE2_ERROR_UTF32_ERR2)
7885         {
7886         PCRE2_SIZE startchar;
7887         PCRE2_GET_STARTCHAR(startchar, match_data);
7888         fprintf(outfile, " at offset %" SIZ_FORM, SIZ_CAST startchar);
7889         }
7890       fprintf(outfile, "\n");
7891       break;
7892       }
7893 
7894     break;  /* Out of the /g loop */
7895     }       /* End of failed match handling */
7896 
7897   /* Control reaches here in two circumstances: (a) after a match, and (b)
7898   after a non-match that immediately followed a match on an empty string when
7899   doing a global search. Such a match is done with PCRE2_NOTEMPTY_ATSTART and
7900   PCRE2_ANCHORED set in g_notempty. The code above turns it into a fake match
7901   of one character. So effectively we get here only after a match. If we
7902   are not doing a global search, we are done. */
7903 
7904   if ((dat_datctl.control & CTL_ANYGLOB) == 0) break; else
7905     {
7906     PCRE2_SIZE match_offset = FLD(match_data, ovector)[0];
7907     PCRE2_SIZE end_offset = FLD(match_data, ovector)[1];
7908 
7909     /* We must now set up for the next iteration of a global search. If we have
7910     matched an empty string, first check to see if we are at the end of the
7911     subject. If so, the loop is over. Otherwise, mimic what Perl's /g option
7912     does. Set PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED and try the match again
7913     at the same point. If this fails it will be picked up above, where a fake
7914     match is set up so that at this point we advance to the next character.
7915 
7916     However, in order to cope with patterns that never match at their starting
7917     offset (e.g. /(?<=\G.)/) we don't do this when the match offset is greater
7918     than the starting offset. This means there will be a retry with the
7919     starting offset at the match offset. If this returns the same match again,
7920     it is picked up above and ignored, and the special action is then taken. */
7921 
7922     if (match_offset == end_offset)
7923       {
7924       if (end_offset == ulen) break;           /* End of subject */
7925       if (match_offset <= dat_datctl.offset)
7926         g_notempty = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
7927       }
7928 
7929     /* However, even after matching a non-empty string, there is still one
7930     tricky case. If a pattern contains \K within a lookbehind assertion at the
7931     start, the end of the matched string can be at the offset where the match
7932     started. In the case of a normal /g iteration without special action, this
7933     leads to a loop that keeps on returning the same substring. The loop would
7934     be caught above, but we really want to move on to the next match. */
7935 
7936     else
7937       {
7938       g_notempty = 0;   /* Set for a "normal" repeat */
7939       if ((dat_datctl.control & CTL_GLOBAL) != 0)
7940         {
7941         PCRE2_SIZE startchar;
7942         PCRE2_GET_STARTCHAR(startchar, match_data);
7943         if (end_offset <= startchar)
7944           {
7945           if (startchar >= ulen) break;       /* End of subject */
7946           end_offset = startchar + 1;
7947           if (utf && test_mode != PCRE32_MODE)
7948             {
7949             if (test_mode == PCRE8_MODE)
7950               {
7951               for (; end_offset < ulen; end_offset++)
7952                 if ((((PCRE2_SPTR8)pp)[end_offset] & 0xc0) != 0x80) break;
7953               }
7954             else  /* 16-bit mode */
7955               {
7956               for (; end_offset < ulen; end_offset++)
7957                 if ((((PCRE2_SPTR16)pp)[end_offset] & 0xfc00) != 0xdc00) break;
7958               }
7959             }
7960           }
7961         }
7962       }
7963 
7964     /* For a normal global (/g) iteration, save the current ovector[0,1] and
7965     the starting offset so that we can check that they do change each time.
7966     Otherwise a matching bug that returns the same string causes an infinite
7967     loop. It has happened! Then update the start offset, leaving other
7968     parameters alone. */
7969 
7970     if ((dat_datctl.control & CTL_GLOBAL) != 0)
7971       {
7972       ovecsave[0] = ovector[0];
7973       ovecsave[1] = ovector[1];
7974       ovecsave[2] = dat_datctl.offset;
7975       dat_datctl.offset = end_offset;
7976       }
7977 
7978     /* For altglobal, just update the pointer and length. */
7979 
7980     else
7981       {
7982       pp += end_offset * code_unit_size;
7983       len -= end_offset * code_unit_size;
7984       ulen -= end_offset;
7985       if (arg_ulen != PCRE2_ZERO_TERMINATED) arg_ulen -= end_offset;
7986       }
7987     }
7988   }  /* End of global loop */
7989 
7990 show_memory = FALSE;
7991 return PR_OK;
7992 }
7993 
7994 
7995 
7996 
7997 /*************************************************
7998 *               Print PCRE2 version              *
7999 *************************************************/
8000 
8001 static void
print_version(FILE * f)8002 print_version(FILE *f)
8003 {
8004 VERSION_TYPE *vp;
8005 fprintf(f, "PCRE2 version ");
8006 for (vp = version; *vp != 0; vp++) fprintf(f, "%c", *vp);
8007 fprintf(f, "\n");
8008 }
8009 
8010 
8011 
8012 /*************************************************
8013 *               Print Unicode version            *
8014 *************************************************/
8015 
8016 static void
print_unicode_version(FILE * f)8017 print_unicode_version(FILE *f)
8018 {
8019 VERSION_TYPE *vp;
8020 fprintf(f, "Unicode version ");
8021 for (vp = uversion; *vp != 0; vp++) fprintf(f, "%c", *vp);
8022 }
8023 
8024 
8025 
8026 /*************************************************
8027 *               Print JIT target                 *
8028 *************************************************/
8029 
8030 static void
print_jit_target(FILE * f)8031 print_jit_target(FILE *f)
8032 {
8033 VERSION_TYPE *vp;
8034 for (vp = jittarget; *vp != 0; vp++) fprintf(f, "%c", *vp);
8035 }
8036 
8037 
8038 
8039 /*************************************************
8040 *       Print newline configuration              *
8041 *************************************************/
8042 
8043 /* Output is always to stdout.
8044 
8045 Arguments:
8046   rc         the return code from PCRE2_CONFIG_NEWLINE
8047   isc        TRUE if called from "-C newline"
8048 Returns:     nothing
8049 */
8050 
8051 static void
print_newline_config(uint32_t optval,BOOL isc)8052 print_newline_config(uint32_t optval, BOOL isc)
8053 {
8054 if (!isc) printf("  Newline sequence is ");
8055 if (optval < sizeof(newlines)/sizeof(char *))
8056   printf("%s\n", newlines[optval]);
8057 else
8058   printf("a non-standard value: %d\n", optval);
8059 }
8060 
8061 
8062 
8063 /*************************************************
8064 *             Usage function                     *
8065 *************************************************/
8066 
8067 static void
usage(void)8068 usage(void)
8069 {
8070 printf("Usage:     pcre2test [options] [<input file> [<output file>]]\n\n");
8071 printf("Input and output default to stdin and stdout.\n");
8072 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
8073 printf("If input is a terminal, readline() is used to read from it.\n");
8074 #else
8075 printf("This version of pcre2test is not linked with readline().\n");
8076 #endif
8077 printf("\nOptions:\n");
8078 #ifdef SUPPORT_PCRE2_8
8079 printf("  -8            use the 8-bit library\n");
8080 #endif
8081 #ifdef SUPPORT_PCRE2_16
8082 printf("  -16           use the 16-bit library\n");
8083 #endif
8084 #ifdef SUPPORT_PCRE2_32
8085 printf("  -32           use the 32-bit library\n");
8086 #endif
8087 printf("  -ac           set default pattern modifier PCRE2_AUTO_CALLOUT\n");
8088 printf("  -AC           as -ac, but also set subject 'callout_extra' modifier\n");
8089 printf("  -b            set default pattern modifier 'fullbincode'\n");
8090 printf("  -C            show PCRE2 compile-time options and exit\n");
8091 printf("  -C arg        show a specific compile-time option and exit with its\n");
8092 printf("                  value if numeric (else 0). The arg can be:\n");
8093 printf("     backslash-C    use of \\C is enabled [0, 1]\n");
8094 printf("     bsr            \\R type [ANYCRLF, ANY]\n");
8095 printf("     ebcdic         compiled for EBCDIC character code [0,1]\n");
8096 printf("     ebcdic-nl      NL code if compiled for EBCDIC\n");
8097 printf("     jit            just-in-time compiler supported [0, 1]\n");
8098 printf("     linksize       internal link size [2, 3, 4]\n");
8099 printf("     newline        newline type [CR, LF, CRLF, ANYCRLF, ANY, NUL]\n");
8100 printf("     pcre2-8        8 bit library support enabled [0, 1]\n");
8101 printf("     pcre2-16       16 bit library support enabled [0, 1]\n");
8102 printf("     pcre2-32       32 bit library support enabled [0, 1]\n");
8103 printf("     unicode        Unicode and UTF support enabled [0, 1]\n");
8104 printf("  -d            set default pattern modifier 'debug'\n");
8105 printf("  -dfa          set default subject modifier 'dfa'\n");
8106 printf("  -error <n,m,..>  show messages for error numbers, then exit\n");
8107 printf("  -help         show usage information\n");
8108 printf("  -i            set default pattern modifier 'info'\n");
8109 printf("  -jit          set default pattern modifier 'jit'\n");
8110 printf("  -jitverify    set default pattern modifier 'jitverify'\n");
8111 printf("  -LM           list pattern and subject modifiers, then exit\n");
8112 printf("  -q            quiet: do not output PCRE2 version number at start\n");
8113 printf("  -pattern <s>  set default pattern modifier fields\n");
8114 printf("  -subject <s>  set default subject modifier fields\n");
8115 printf("  -S <n>        set stack size to <n> mebibytes\n");
8116 printf("  -t [<n>]      time compilation and execution, repeating <n> times\n");
8117 printf("  -tm [<n>]     time execution (matching) only, repeating <n> times\n");
8118 printf("  -T            same as -t, but show total times at the end\n");
8119 printf("  -TM           same as -tm, but show total time at the end\n");
8120 printf("  -version      show PCRE2 version and exit\n");
8121 }
8122 
8123 
8124 
8125 /*************************************************
8126 *             Handle -C option                   *
8127 *************************************************/
8128 
8129 /* This option outputs configuration options and sets an appropriate return
8130 code when asked for a single option. The code is abstracted into a separate
8131 function because of its size. Use whichever pcre2_config() function is
8132 available.
8133 
8134 Argument:   an option name or NULL
8135 Returns:    the return code
8136 */
8137 
8138 static int
c_option(const char * arg)8139 c_option(const char *arg)
8140 {
8141 uint32_t optval;
8142 unsigned int i = COPTLISTCOUNT;
8143 int yield = 0;
8144 
8145 if (arg != NULL && arg[0] != CHAR_MINUS)
8146   {
8147   for (i = 0; i < COPTLISTCOUNT; i++)
8148     if (strcmp(arg, coptlist[i].name) == 0) break;
8149 
8150   if (i >= COPTLISTCOUNT)
8151     {
8152     fprintf(stderr, "** Unknown -C option '%s'\n", arg);
8153     return 0;
8154     }
8155 
8156   switch (coptlist[i].type)
8157     {
8158     case CONF_BSR:
8159     (void)PCRE2_CONFIG(coptlist[i].value, &optval);
8160     printf("%s\n", (optval == PCRE2_BSR_ANYCRLF)? "ANYCRLF" : "ANY");
8161     break;
8162 
8163     case CONF_FIX:
8164     yield = coptlist[i].value;
8165     printf("%d\n", yield);
8166     break;
8167 
8168     case CONF_FIZ:
8169     optval = coptlist[i].value;
8170     printf("%d\n", optval);
8171     break;
8172 
8173     case CONF_INT:
8174     (void)PCRE2_CONFIG(coptlist[i].value, &yield);
8175     printf("%d\n", yield);
8176     break;
8177 
8178     case CONF_NL:
8179     (void)PCRE2_CONFIG(coptlist[i].value, &optval);
8180     print_newline_config(optval, TRUE);
8181     break;
8182     }
8183 
8184 /* For VMS, return the value by setting a symbol, for certain values only. This
8185 is contributed code which the PCRE2 developers have no means of testing. */
8186 
8187 #ifdef __VMS
8188 
8189 /* This is the original code provided by the first VMS contributor. */
8190 #ifdef NEVER
8191   if (copytlist[i].type == CONF_FIX || coptlist[i].type == CONF_INT)
8192     {
8193     char ucname[16];
8194     strcpy(ucname, coptlist[i].name);
8195     for (i = 0; ucname[i] != 0; i++) ucname[i] = toupper[ucname[i]];
8196     vms_setsymbol(ucname, 0, optval);
8197     }
8198 #endif
8199 
8200 /* This is the new code, provided by a second VMS contributor. */
8201 
8202   if (coptlist[i].type == CONF_FIX || coptlist[i].type == CONF_INT)
8203     {
8204     char nam_buf[22], val_buf[4];
8205     $DESCRIPTOR(nam, nam_buf);
8206     $DESCRIPTOR(val, val_buf);
8207 
8208     strcpy(nam_buf, coptlist[i].name);
8209     nam.dsc$w_length = strlen(nam_buf);
8210     sprintf(val_buf, "%d", yield);
8211     val.dsc$w_length = strlen(val_buf);
8212     lib$set_symbol(&nam, &val);
8213     }
8214 #endif  /* __VMS */
8215 
8216   return yield;
8217   }
8218 
8219 /* No argument for -C: output all configuration information. */
8220 
8221 print_version(stdout);
8222 printf("Compiled with\n");
8223 
8224 #ifdef EBCDIC
8225 printf("  EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
8226 #if defined NATIVE_ZOS
8227 printf("  EBCDIC code page %s or similar\n", pcrz_cpversion());
8228 #endif
8229 #endif
8230 
8231 (void)PCRE2_CONFIG(PCRE2_CONFIG_COMPILED_WIDTHS, &optval);
8232 if (optval & 1) printf("  8-bit support\n");
8233 if (optval & 2) printf("  16-bit support\n");
8234 if (optval & 4) printf("  32-bit support\n");
8235 
8236 #ifdef SUPPORT_VALGRIND
8237 printf("  Valgrind support\n");
8238 #endif
8239 
8240 (void)PCRE2_CONFIG(PCRE2_CONFIG_UNICODE, &optval);
8241 if (optval != 0)
8242   {
8243   printf("  UTF and UCP support (");
8244   print_unicode_version(stdout);
8245   printf(")\n");
8246   }
8247 else printf("  No Unicode support\n");
8248 
8249 (void)PCRE2_CONFIG(PCRE2_CONFIG_JIT, &optval);
8250 if (optval != 0)
8251   {
8252   printf("  Just-in-time compiler support: ");
8253   print_jit_target(stdout);
8254   printf("\n");
8255   }
8256 else
8257   {
8258   printf("  No just-in-time compiler support\n");
8259   }
8260 
8261 (void)PCRE2_CONFIG(PCRE2_CONFIG_NEWLINE, &optval);
8262 print_newline_config(optval, FALSE);
8263 (void)PCRE2_CONFIG(PCRE2_CONFIG_BSR, &optval);
8264 printf("  \\R matches %s\n",
8265   (optval == PCRE2_BSR_ANYCRLF)? "CR, LF, or CRLF only" :
8266                                  "all Unicode newlines");
8267 (void)PCRE2_CONFIG(PCRE2_CONFIG_NEVER_BACKSLASH_C, &optval);
8268 printf("  \\C is %ssupported\n", optval? "not ":"");
8269 (void)PCRE2_CONFIG(PCRE2_CONFIG_LINKSIZE, &optval);
8270 printf("  Internal link size = %d\n", optval);
8271 (void)PCRE2_CONFIG(PCRE2_CONFIG_PARENSLIMIT, &optval);
8272 printf("  Parentheses nest limit = %d\n", optval);
8273 (void)PCRE2_CONFIG(PCRE2_CONFIG_HEAPLIMIT, &optval);
8274 printf("  Default heap limit = %d kibibytes\n", optval);
8275 (void)PCRE2_CONFIG(PCRE2_CONFIG_MATCHLIMIT, &optval);
8276 printf("  Default match limit = %d\n", optval);
8277 (void)PCRE2_CONFIG(PCRE2_CONFIG_DEPTHLIMIT, &optval);
8278 printf("  Default depth limit = %d\n", optval);
8279 return 0;
8280 }
8281 
8282 
8283 
8284 /*************************************************
8285 *              Display one modifier              *
8286 *************************************************/
8287 
8288 static void
display_one_modifier(modstruct * m,BOOL for_pattern)8289 display_one_modifier(modstruct *m, BOOL for_pattern)
8290 {
8291 uint32_t c = (!for_pattern && (m->which == MOD_PND || m->which == MOD_PNDP))?
8292   '*' : ' ';
8293 printf("%c%s", c, m->name);
8294 }
8295 
8296 
8297 
8298 /*************************************************
8299 *       Display pattern or subject modifiers     *
8300 *************************************************/
8301 
8302 /* In order to print in two columns, first scan without printing to get a list
8303 of the modifiers that are required.
8304 
8305 Arguments:
8306   for_pattern   TRUE for pattern modifiers, FALSE for subject modifiers
8307   title         string to be used in title
8308 
8309 Returns:        nothing
8310 */
8311 
8312 static void
display_selected_modifiers(BOOL for_pattern,const char * title)8313 display_selected_modifiers(BOOL for_pattern, const char *title)
8314 {
8315 uint32_t i, j;
8316 uint32_t n = 0;
8317 uint32_t list[MODLISTCOUNT];
8318 
8319 for (i = 0; i < MODLISTCOUNT; i++)
8320   {
8321   BOOL is_pattern = TRUE;
8322   modstruct *m = modlist + i;
8323 
8324   switch (m->which)
8325     {
8326     case MOD_CTC:       /* Compile context */
8327     case MOD_PAT:       /* Pattern */
8328     case MOD_PATP:      /* Pattern, OK for Perl-compatible test */
8329     break;
8330 
8331     /* The MOD_PND and MOD_PNDP modifiers are precisely those that affect
8332     subjects, but can be given with a pattern. We list them as subject
8333     modifiers, but marked with an asterisk.*/
8334 
8335     case MOD_CTM:       /* Match context */
8336     case MOD_DAT:       /* Subject line */
8337     case MOD_PND:       /* As PD, but not default pattern */
8338     case MOD_PNDP:      /* As PND, OK for Perl-compatible test */
8339     is_pattern = FALSE;
8340     break;
8341 
8342     default: printf("** Unknown type for modifier '%s'\n", m->name);
8343     /* Fall through */
8344     case MOD_PD:        /* Pattern or subject */
8345     case MOD_PDP:       /* As PD, OK for Perl-compatible test */
8346     is_pattern = for_pattern;
8347     break;
8348     }
8349 
8350   if (for_pattern == is_pattern) list[n++] = i;
8351   }
8352 
8353 /* Now print from the list in two columns. */
8354 
8355 printf("-------------- %s MODIFIERS --------------\n", title);
8356 
8357 for (i = 0, j = (n+1)/2; i < (n+1)/2; i++, j++)
8358   {
8359   modstruct *m = modlist + list[i];
8360   display_one_modifier(m, for_pattern);
8361   if (j < n)
8362     {
8363     uint32_t k = 27 - strlen(m->name);
8364     while (k-- > 0) printf(" ");
8365     display_one_modifier(modlist + list[j], for_pattern);
8366     }
8367   printf("\n");
8368   }
8369 }
8370 
8371 
8372 
8373 /*************************************************
8374 *          Display the list of modifiers         *
8375 *************************************************/
8376 
8377 static void
display_modifiers(void)8378 display_modifiers(void)
8379 {
8380 printf(
8381   "An asterisk on a subject modifier means that it may be given on a pattern\n"
8382   "line, in order to apply to all subjects matched by that pattern. Modifiers\n"
8383   "that are listed for both patterns and subjects have different effects in\n"
8384   "each case.\n\n");
8385 display_selected_modifiers(TRUE, "PATTERN");
8386 printf("\n");
8387 display_selected_modifiers(FALSE, "SUBJECT");
8388 }
8389 
8390 
8391 
8392 /*************************************************
8393 *                Main Program                    *
8394 *************************************************/
8395 
8396 int
main(int argc,char ** argv)8397 main(int argc, char **argv)
8398 {
8399 uint32_t temp;
8400 uint32_t yield = 0;
8401 uint32_t op = 1;
8402 BOOL notdone = TRUE;
8403 BOOL quiet = FALSE;
8404 BOOL showtotaltimes = FALSE;
8405 BOOL skipping = FALSE;
8406 char *arg_subject = NULL;
8407 char *arg_pattern = NULL;
8408 char *arg_error = NULL;
8409 
8410 /* The offsets to the options and control bits fields of the pattern and data
8411 control blocks must be the same so that common options and controls such as
8412 "anchored" or "memory" can work for either of them from a single table entry.
8413 We cannot test this till runtime because "offsetof" does not work in the
8414 preprocessor. */
8415 
8416 if (PO(options) != DO(options) || PO(control) != DO(control) ||
8417     PO(control2) != DO(control2))
8418   {
8419   fprintf(stderr, "** Coding error: "
8420     "options and control offsets for pattern and data must be the same.\n");
8421   return 1;
8422   }
8423 
8424 /* Get the PCRE2 and Unicode version number and JIT target information, at the
8425 same time checking that a request for the length gives the same answer. Also
8426 check lengths for non-string items. */
8427 
8428 if (PCRE2_CONFIG(PCRE2_CONFIG_VERSION, NULL) !=
8429     PCRE2_CONFIG(PCRE2_CONFIG_VERSION, version) ||
8430 
8431     PCRE2_CONFIG(PCRE2_CONFIG_UNICODE_VERSION, NULL) !=
8432     PCRE2_CONFIG(PCRE2_CONFIG_UNICODE_VERSION, uversion) ||
8433 
8434     PCRE2_CONFIG(PCRE2_CONFIG_JITTARGET, NULL) !=
8435     PCRE2_CONFIG(PCRE2_CONFIG_JITTARGET, jittarget) ||
8436 
8437     PCRE2_CONFIG(PCRE2_CONFIG_UNICODE, NULL) != sizeof(uint32_t) ||
8438     PCRE2_CONFIG(PCRE2_CONFIG_MATCHLIMIT, NULL) != sizeof(uint32_t))
8439   {
8440   fprintf(stderr, "** Error in pcre2_config(): bad length\n");
8441   return 1;
8442   }
8443 
8444 /* Check that bad options are diagnosed. */
8445 
8446 if (PCRE2_CONFIG(999, NULL) != PCRE2_ERROR_BADOPTION ||
8447     PCRE2_CONFIG(999, &temp) != PCRE2_ERROR_BADOPTION)
8448   {
8449   fprintf(stderr, "** Error in pcre2_config(): bad option not diagnosed\n");
8450   return 1;
8451   }
8452 
8453 /* This configuration option is now obsolete, but running a quick check ensures
8454 that its code is covered. */
8455 
8456 (void)PCRE2_CONFIG(PCRE2_CONFIG_STACKRECURSE, &temp);
8457 
8458 /* Get buffers from malloc() so that valgrind will check their misuse when
8459 debugging. They grow automatically when very long lines are read. The 16-
8460 and 32-bit buffers (pbuffer16, pbuffer32) are obtained only if needed. */
8461 
8462 buffer = (uint8_t *)malloc(pbuffer8_size);
8463 pbuffer8 = (uint8_t *)malloc(pbuffer8_size);
8464 
8465 /* The following  _setmode() stuff is some Windows magic that tells its runtime
8466 library to translate CRLF into a single LF character. At least, that's what
8467 I've been told: never having used Windows I take this all on trust. Originally
8468 it set 0x8000, but then I was advised that _O_BINARY was better. */
8469 
8470 #if defined(_WIN32) || defined(WIN32)
8471 _setmode( _fileno( stdout ), _O_BINARY );
8472 #endif
8473 
8474 /* Initialization that does not depend on the running mode. */
8475 
8476 locale_name[0] = 0;
8477 
8478 memset(&def_patctl, 0, sizeof(patctl));
8479 def_patctl.convert_type = CONVERT_UNSET;
8480 
8481 memset(&def_datctl, 0, sizeof(datctl));
8482 def_datctl.oveccount = DEFAULT_OVECCOUNT;
8483 def_datctl.copy_numbers[0] = -1;
8484 def_datctl.get_numbers[0] = -1;
8485 def_datctl.startend[0] = def_datctl.startend[1] = CFORE_UNSET;
8486 def_datctl.cerror[0] = def_datctl.cerror[1] = CFORE_UNSET;
8487 def_datctl.cfail[0] = def_datctl.cfail[1] = CFORE_UNSET;
8488 
8489 /* Scan command line options. */
8490 
8491 while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0)
8492   {
8493   char *endptr;
8494   char *arg = argv[op];
8495   unsigned long uli;
8496 
8497   /* List modifiers and exit. */
8498 
8499   if (strcmp(arg, "-LM") == 0)
8500     {
8501     display_modifiers();
8502     goto EXIT;
8503     }
8504 
8505   /* Display and/or set return code for configuration options. */
8506 
8507   if (strcmp(arg, "-C") == 0)
8508     {
8509     yield = c_option(argv[op + 1]);
8510     goto EXIT;
8511     }
8512 
8513   /* Select operating mode. Ensure that pcre2_config() is called in 16-bit
8514   and 32-bit modes because that won't happen naturally when 8-bit is also
8515   configured. Also call some other functions that are not otherwise used. This
8516   means that a coverage report won't claim there are uncalled functions. */
8517 
8518   if (strcmp(arg, "-8") == 0)
8519     {
8520 #ifdef SUPPORT_PCRE2_8
8521     test_mode = PCRE8_MODE;
8522     (void)pcre2_set_bsr_8(pat_context8, 999);
8523     (void)pcre2_set_newline_8(pat_context8, 999);
8524 #else
8525     fprintf(stderr,
8526       "** This version of PCRE2 was built without 8-bit support\n");
8527     exit(1);
8528 #endif
8529     }
8530 
8531   else if (strcmp(arg, "-16") == 0)
8532     {
8533 #ifdef SUPPORT_PCRE2_16
8534     test_mode = PCRE16_MODE;
8535     (void)pcre2_config_16(PCRE2_CONFIG_VERSION, NULL);
8536     (void)pcre2_set_bsr_16(pat_context16, 999);
8537     (void)pcre2_set_newline_16(pat_context16, 999);
8538 #else
8539     fprintf(stderr,
8540       "** This version of PCRE2 was built without 16-bit support\n");
8541     exit(1);
8542 #endif
8543     }
8544 
8545   else if (strcmp(arg, "-32") == 0)
8546     {
8547 #ifdef SUPPORT_PCRE2_32
8548     test_mode = PCRE32_MODE;
8549     (void)pcre2_config_32(PCRE2_CONFIG_VERSION, NULL);
8550     (void)pcre2_set_bsr_32(pat_context32, 999);
8551     (void)pcre2_set_newline_32(pat_context32, 999);
8552 #else
8553     fprintf(stderr,
8554       "** This version of PCRE2 was built without 32-bit support\n");
8555     exit(1);
8556 #endif
8557     }
8558 
8559   /* Set quiet (no version verification) */
8560 
8561   else if (strcmp(arg, "-q") == 0) quiet = TRUE;
8562 
8563   /* Set system stack size */
8564 
8565   else if (strcmp(arg, "-S") == 0 && argc > 2 &&
8566       ((uli = strtoul(argv[op+1], &endptr, 10)), *endptr == 0))
8567     {
8568 #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS) || defined(__VMS)
8569     fprintf(stderr, "pcre2test: -S is not supported on this OS\n");
8570     exit(1);
8571 #else
8572     int rc;
8573     uint32_t stack_size;
8574     struct rlimit rlim;
8575     if (U32OVERFLOW(uli))
8576       {
8577       fprintf(stderr, "** Argument for -S is too big\n");
8578       exit(1);
8579       }
8580     stack_size = (uint32_t)uli;
8581     getrlimit(RLIMIT_STACK, &rlim);
8582     rlim.rlim_cur = stack_size * 1024 * 1024;
8583     if (rlim.rlim_cur > rlim.rlim_max)
8584       {
8585       fprintf(stderr,
8586         "pcre2test: requested stack size %luMiB is greater than hard limit "
8587           "%luMiB\n", (unsigned long int)stack_size,
8588           (unsigned long int)(rlim.rlim_max));
8589       exit(1);
8590       }
8591     rc = setrlimit(RLIMIT_STACK, &rlim);
8592     if (rc != 0)
8593       {
8594       fprintf(stderr, "pcre2test: setting stack size %luMiB failed: %s\n",
8595         (unsigned long int)stack_size, strerror(errno));
8596       exit(1);
8597       }
8598     op++;
8599     argc--;
8600 #endif
8601     }
8602 
8603   /* Set some common pattern and subject controls */
8604 
8605   else if (strcmp(arg, "-AC") == 0)
8606     {
8607     def_patctl.options |= PCRE2_AUTO_CALLOUT;
8608     def_datctl.control2 |= CTL2_CALLOUT_EXTRA;
8609     }
8610   else if (strcmp(arg, "-ac") == 0)  def_patctl.options |= PCRE2_AUTO_CALLOUT;
8611   else if (strcmp(arg, "-b") == 0)   def_patctl.control |= CTL_FULLBINCODE;
8612   else if (strcmp(arg, "-d") == 0)   def_patctl.control |= CTL_DEBUG;
8613   else if (strcmp(arg, "-dfa") == 0) def_datctl.control |= CTL_DFA;
8614   else if (strcmp(arg, "-i") == 0)   def_patctl.control |= CTL_INFO;
8615   else if (strcmp(arg, "-jit") == 0 || strcmp(arg, "-jitverify") == 0)
8616     {
8617     if (arg[4] != 0) def_patctl.control |= CTL_JITVERIFY;
8618     def_patctl.jit = 7;  /* full & partial */
8619 #ifndef SUPPORT_JIT
8620     fprintf(stderr, "** Warning: JIT support is not available: "
8621                     "-jit[verify] calls functions that do nothing.\n");
8622 #endif
8623     }
8624 
8625   /* Set timing parameters */
8626 
8627   else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0 ||
8628            strcmp(arg, "-T") == 0 || strcmp(arg, "-TM") == 0)
8629     {
8630     int both = arg[2] == 0;
8631     showtotaltimes = arg[1] == 'T';
8632     if (argc > 2 && (uli = strtoul(argv[op+1], &endptr, 10), *endptr == 0))
8633       {
8634       if (U32OVERFLOW(uli))
8635         {
8636         fprintf(stderr, "** Argument for %s is too big\n", arg);
8637         exit(1);
8638         }
8639       timeitm = (int)uli;
8640       op++;
8641       argc--;
8642       }
8643     else timeitm = LOOPREPEAT;
8644     if (both) timeit = timeitm;
8645     }
8646 
8647   /* Give help */
8648 
8649   else if (strcmp(arg, "-help") == 0 ||
8650            strcmp(arg, "--help") == 0)
8651     {
8652     usage();
8653     goto EXIT;
8654     }
8655 
8656   /* Show version */
8657 
8658   else if (strcmp(arg, "-version") == 0 ||
8659            strcmp(arg, "--version") == 0)
8660     {
8661     print_version(stdout);
8662     goto EXIT;
8663     }
8664 
8665   /* The following options save their data for processing once we know what
8666   the running mode is. */
8667 
8668   else if (strcmp(arg, "-error") == 0)
8669     {
8670     arg_error = argv[op+1];
8671     goto CHECK_VALUE_EXISTS;
8672     }
8673 
8674   else if (strcmp(arg, "-subject") == 0)
8675     {
8676     arg_subject = argv[op+1];
8677     goto CHECK_VALUE_EXISTS;
8678     }
8679 
8680   else if (strcmp(arg, "-pattern") == 0)
8681     {
8682     arg_pattern = argv[op+1];
8683     CHECK_VALUE_EXISTS:
8684     if (argc <= 2)
8685       {
8686       fprintf(stderr, "** Missing value for %s\n", arg);
8687       yield = 1;
8688       goto EXIT;
8689       }
8690     op++;
8691     argc--;
8692     }
8693 
8694   /* Unrecognized option */
8695 
8696   else
8697     {
8698     fprintf(stderr, "** Unknown or malformed option '%s'\n", arg);
8699     usage();
8700     yield = 1;
8701     goto EXIT;
8702     }
8703   op++;
8704   argc--;
8705   }
8706 
8707 /* If -error was present, get the error numbers, show the messages, and exit.
8708 We wait to do this until we know which mode we are in. */
8709 
8710 if (arg_error != NULL)
8711   {
8712   int len;
8713   int errcode;
8714   char *endptr;
8715 
8716 /* Ensure the relevant non-8-bit buffer is available. Ensure that it is at
8717 least 128 code units, because it is used for retrieving error messages. */
8718 
8719 #ifdef SUPPORT_PCRE2_16
8720   if (test_mode == PCRE16_MODE)
8721     {
8722     pbuffer16_size = 256;
8723     pbuffer16 = (uint16_t *)malloc(pbuffer16_size);
8724     if (pbuffer16 == NULL)
8725       {
8726       fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer16\n",
8727         SIZ_CAST pbuffer16_size);
8728       yield = 1;
8729       goto EXIT;
8730       }
8731     }
8732 #endif
8733 
8734 #ifdef SUPPORT_PCRE2_32
8735   if (test_mode == PCRE32_MODE)
8736     {
8737     pbuffer32_size = 512;
8738     pbuffer32 = (uint32_t *)malloc(pbuffer32_size);
8739     if (pbuffer32 == NULL)
8740       {
8741       fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer32\n",
8742         SIZ_CAST pbuffer32_size);
8743       yield = 1;
8744       goto EXIT;
8745       }
8746     }
8747 #endif
8748 
8749   /* Loop along a list of error numbers. */
8750 
8751   for (;;)
8752     {
8753     errcode = strtol(arg_error, &endptr, 10);
8754     if (*endptr != 0 && *endptr != CHAR_COMMA)
8755       {
8756       fprintf(stderr, "** '%s' is not a valid error number list\n", arg_error);
8757       yield = 1;
8758       goto EXIT;
8759       }
8760     printf("Error %d: ", errcode);
8761     PCRE2_GET_ERROR_MESSAGE(len, errcode, pbuffer);
8762     if (len < 0)
8763       {
8764       switch (len)
8765         {
8766         case PCRE2_ERROR_BADDATA:
8767         printf("PCRE2_ERROR_BADDATA (unknown error number)");
8768         break;
8769 
8770         case PCRE2_ERROR_NOMEMORY:
8771         printf("PCRE2_ERROR_NOMEMORY (buffer too small)");
8772         break;
8773 
8774         default:
8775         printf("Unexpected return (%d) from pcre2_get_error_message()", len);
8776         break;
8777         }
8778       }
8779     else
8780       {
8781       PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, stdout);
8782       }
8783     printf("\n");
8784     if (*endptr == 0) goto EXIT;
8785     arg_error = endptr + 1;
8786     }
8787   /* Control never reaches here */
8788   }  /* End of -error handling */
8789 
8790 /* Initialize things that cannot be done until we know which test mode we are
8791 running in. Exercise the general context copying function, which is not
8792 otherwise used. */
8793 
8794 code_unit_size = test_mode/8;
8795 max_oveccount = DEFAULT_OVECCOUNT;
8796 
8797 /* Use macros to save a lot of duplication. */
8798 
8799 #define CREATECONTEXTS \
8800   G(general_context,BITS) = G(pcre2_general_context_create_,BITS)(&my_malloc, &my_free, NULL); \
8801   G(general_context_copy,BITS) = G(pcre2_general_context_copy_,BITS)(G(general_context,BITS)); \
8802   G(default_pat_context,BITS) = G(pcre2_compile_context_create_,BITS)(G(general_context,BITS)); \
8803   G(pat_context,BITS) = G(pcre2_compile_context_copy_,BITS)(G(default_pat_context,BITS)); \
8804   G(default_dat_context,BITS) = G(pcre2_match_context_create_,BITS)(G(general_context,BITS)); \
8805   G(dat_context,BITS) = G(pcre2_match_context_copy_,BITS)(G(default_dat_context,BITS)); \
8806   G(default_con_context,BITS) = G(pcre2_convert_context_create_,BITS)(G(general_context,BITS)); \
8807   G(con_context,BITS) = G(pcre2_convert_context_copy_,BITS)(G(default_con_context,BITS)); \
8808   G(match_data,BITS) = G(pcre2_match_data_create_,BITS)(max_oveccount, G(general_context,BITS))
8809 
8810 #define CONTEXTTESTS \
8811   (void)G(pcre2_set_compile_extra_options_,BITS)(G(pat_context,BITS), 0); \
8812   (void)G(pcre2_set_max_pattern_length_,BITS)(G(pat_context,BITS), 0); \
8813   (void)G(pcre2_set_offset_limit_,BITS)(G(dat_context,BITS), 0); \
8814   (void)G(pcre2_set_recursion_memory_management_,BITS)(G(dat_context,BITS), my_malloc, my_free, NULL)
8815 
8816 /* Call the appropriate functions for the current mode, and exercise some
8817 functions that are not otherwise called. */
8818 
8819 #ifdef SUPPORT_PCRE2_8
8820 #undef BITS
8821 #define BITS 8
8822 if (test_mode == PCRE8_MODE)
8823   {
8824   CREATECONTEXTS;
8825   CONTEXTTESTS;
8826   }
8827 #endif
8828 
8829 #ifdef SUPPORT_PCRE2_16
8830 #undef BITS
8831 #define BITS 16
8832 if (test_mode == PCRE16_MODE)
8833   {
8834   CREATECONTEXTS;
8835   CONTEXTTESTS;
8836   }
8837 #endif
8838 
8839 #ifdef SUPPORT_PCRE2_32
8840 #undef BITS
8841 #define BITS 32
8842 if (test_mode == PCRE32_MODE)
8843   {
8844   CREATECONTEXTS;
8845   CONTEXTTESTS;
8846   }
8847 #endif
8848 
8849 /* Set a default parentheses nest limit that is large enough to run the
8850 standard tests (this also exercises the function). */
8851 
8852 PCRE2_SET_PARENS_NEST_LIMIT(default_pat_context, PARENS_NEST_DEFAULT);
8853 
8854 /* Handle command line modifier settings, sending any error messages to
8855 stderr. We need to know the mode before modifying the context, and it is tidier
8856 to do them all in the same way. */
8857 
8858 outfile = stderr;
8859 if ((arg_pattern != NULL &&
8860     !decode_modifiers((uint8_t *)arg_pattern, CTX_DEFPAT, &def_patctl, NULL)) ||
8861     (arg_subject != NULL &&
8862     !decode_modifiers((uint8_t *)arg_subject, CTX_DEFDAT, NULL, &def_datctl)))
8863   {
8864   yield = 1;
8865   goto EXIT;
8866   }
8867 
8868 /* Sort out the input and output files, defaulting to stdin/stdout. */
8869 
8870 infile = stdin;
8871 outfile = stdout;
8872 
8873 if (argc > 1 && strcmp(argv[op], "-") != 0)
8874   {
8875   infile = fopen(argv[op], INPUT_MODE);
8876   if (infile == NULL)
8877     {
8878     printf("** Failed to open '%s': %s\n", argv[op], strerror(errno));
8879     yield = 1;
8880     goto EXIT;
8881     }
8882   }
8883 
8884 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
8885 if (INTERACTIVE(infile)) using_history();
8886 #endif
8887 
8888 if (argc > 2)
8889   {
8890   outfile = fopen(argv[op+1], OUTPUT_MODE);
8891   if (outfile == NULL)
8892     {
8893     printf("** Failed to open '%s': %s\n", argv[op+1], strerror(errno));
8894     yield = 1;
8895     goto EXIT;
8896     }
8897   }
8898 
8899 /* Output a heading line unless quiet, then process input lines. */
8900 
8901 if (!quiet) print_version(outfile);
8902 
8903 SET(compiled_code, NULL);
8904 
8905 #ifdef SUPPORT_PCRE2_8
8906 preg.re_pcre2_code = NULL;
8907 preg.re_match_data = NULL;
8908 #endif
8909 
8910 while (notdone)
8911   {
8912   uint8_t *p;
8913   int rc = PR_OK;
8914   BOOL expectdata = TEST(compiled_code, !=, NULL);
8915 #ifdef SUPPORT_PCRE2_8
8916   expectdata |= preg.re_pcre2_code != NULL;
8917 #endif
8918 
8919   if (extend_inputline(infile, buffer, expectdata? "data> " : "  re> ") == NULL)
8920     break;
8921   if (!INTERACTIVE(infile)) fprintf(outfile, "%s", (char *)buffer);
8922   fflush(outfile);
8923   p = buffer;
8924 
8925   /* If we have a pattern set up for testing, or we are skipping after a
8926   compile failure, a blank line terminates this test. */
8927 
8928   if (expectdata || skipping)
8929     {
8930     while (isspace(*p)) p++;
8931     if (*p == 0)
8932       {
8933 #ifdef SUPPORT_PCRE2_8
8934       if (preg.re_pcre2_code != NULL)
8935         {
8936         regfree(&preg);
8937         preg.re_pcre2_code = NULL;
8938         preg.re_match_data = NULL;
8939         }
8940 #endif  /* SUPPORT_PCRE2_8 */
8941       if (TEST(compiled_code, !=, NULL))
8942         {
8943         SUB1(pcre2_code_free, compiled_code);
8944         SET(compiled_code, NULL);
8945         }
8946       skipping = FALSE;
8947       setlocale(LC_CTYPE, "C");
8948       }
8949 
8950     /* Otherwise, if we are not skipping, and the line is not a data comment
8951     line starting with "\=", process a data line. */
8952 
8953     else if (!skipping && !(p[0] == '\\' && p[1] == '=' && isspace(p[2])))
8954       {
8955       rc = process_data();
8956       }
8957     }
8958 
8959   /* We do not have a pattern set up for testing. Lines starting with # are
8960   either comments or special commands. Blank lines are ignored. Otherwise, the
8961   line must start with a valid delimiter. It is then processed as a pattern
8962   line. A copy of the pattern is left in pbuffer8 for use by callouts. Under
8963   valgrind, make the unused part of the buffer undefined, to catch overruns. */
8964 
8965   else if (*p == '#')
8966     {
8967     if (isspace(p[1]) || p[1] == '!' || p[1] == 0) continue;
8968     rc = process_command();
8969     }
8970 
8971   else if (strchr("/!\"'`%&-=_:;,@~", *p) != NULL)
8972     {
8973     rc = process_pattern();
8974     dfa_matched = 0;
8975     }
8976 
8977   else
8978     {
8979     while (isspace(*p)) p++;
8980     if (*p != 0)
8981       {
8982       fprintf(outfile, "** Invalid pattern delimiter '%c' (x%x).\n", *buffer,
8983         *buffer);
8984       rc = PR_SKIP;
8985       }
8986     }
8987 
8988   if (rc == PR_SKIP && !INTERACTIVE(infile)) skipping = TRUE;
8989   else if (rc == PR_ABEND)
8990     {
8991     fprintf(outfile, "** pcre2test run abandoned\n");
8992     yield = 1;
8993     goto EXIT;
8994     }
8995   }
8996 
8997 /* Finish off a normal run. */
8998 
8999 if (INTERACTIVE(infile)) fprintf(outfile, "\n");
9000 
9001 if (showtotaltimes)
9002   {
9003   const char *pad = "";
9004   fprintf(outfile, "--------------------------------------\n");
9005   if (timeit > 0)
9006     {
9007     fprintf(outfile, "Total compile time %.4f milliseconds\n",
9008       (((double)total_compile_time * 1000.0) / (double)timeit) /
9009         (double)CLOCKS_PER_SEC);
9010     if (total_jit_compile_time > 0)
9011       fprintf(outfile, "Total JIT compile  %.4f milliseconds\n",
9012         (((double)total_jit_compile_time * 1000.0) / (double)timeit) /
9013           (double)CLOCKS_PER_SEC);
9014     pad = "  ";
9015     }
9016   fprintf(outfile, "Total match time %s%.4f milliseconds\n", pad,
9017     (((double)total_match_time * 1000.0) / (double)timeitm) /
9018       (double)CLOCKS_PER_SEC);
9019   }
9020 
9021 
9022 EXIT:
9023 
9024 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
9025 if (infile != NULL && INTERACTIVE(infile)) clear_history();
9026 #endif
9027 
9028 if (infile != NULL && infile != stdin) fclose(infile);
9029 if (outfile != NULL && outfile != stdout) fclose(outfile);
9030 
9031 free(buffer);
9032 free(dbuffer);
9033 free(pbuffer8);
9034 free(dfa_workspace);
9035 free((void *)locale_tables);
9036 PCRE2_MATCH_DATA_FREE(match_data);
9037 SUB1(pcre2_code_free, compiled_code);
9038 
9039 while(patstacknext-- > 0)
9040   {
9041   SET(compiled_code, patstack[patstacknext]);
9042   SUB1(pcre2_code_free, compiled_code);
9043   }
9044 
9045 PCRE2_JIT_FREE_UNUSED_MEMORY(general_context);
9046 if (jit_stack != NULL)
9047   {
9048   PCRE2_JIT_STACK_FREE(jit_stack);
9049   }
9050 
9051 #define FREECONTEXTS \
9052   G(pcre2_general_context_free_,BITS)(G(general_context,BITS)); \
9053   G(pcre2_general_context_free_,BITS)(G(general_context_copy,BITS)); \
9054   G(pcre2_compile_context_free_,BITS)(G(pat_context,BITS)); \
9055   G(pcre2_compile_context_free_,BITS)(G(default_pat_context,BITS)); \
9056   G(pcre2_match_context_free_,BITS)(G(dat_context,BITS)); \
9057   G(pcre2_match_context_free_,BITS)(G(default_dat_context,BITS)); \
9058   G(pcre2_convert_context_free_,BITS)(G(default_con_context,BITS)); \
9059   G(pcre2_convert_context_free_,BITS)(G(con_context,BITS));
9060 
9061 #ifdef SUPPORT_PCRE2_8
9062 #undef BITS
9063 #define BITS 8
9064 if (preg.re_pcre2_code != NULL) regfree(&preg);
9065 FREECONTEXTS;
9066 #endif
9067 
9068 #ifdef SUPPORT_PCRE2_16
9069 #undef BITS
9070 #define BITS 16
9071 free(pbuffer16);
9072 FREECONTEXTS;
9073 #endif
9074 
9075 #ifdef SUPPORT_PCRE2_32
9076 #undef BITS
9077 #define BITS 32
9078 free(pbuffer32);
9079 FREECONTEXTS;
9080 #endif
9081 
9082 #if defined(__VMS)
9083   yield = SS$_NORMAL;  /* Return values via DCL symbols */
9084 #endif
9085 
9086 return yield;
9087 }
9088 
9089 /* End of pcre2test.c */
9090