1 /*************************************************
2 * PCRE2 testing program *
3 *************************************************/
4
5 /* PCRE2 is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language. In 2014
7 the API was completely revised and '2' was added to the name, because the old
8 API, which had lasted for 16 years, could not accommodate new requirements. At
9 the same time, this testing program was re-designed because its original
10 hacked-up (non-) design had also run out of steam.
11
12 Written by Philip Hazel
13 Original code Copyright (c) 1997-2012 University of Cambridge
14 Rewritten code Copyright (c) 2016-2022 University of Cambridge
15
16 -----------------------------------------------------------------------------
17 Redistribution and use in source and binary forms, with or without
18 modification, are permitted provided that the following conditions are met:
19
20 * Redistributions of source code must retain the above copyright notice,
21 this list of conditions and the following disclaimer.
22
23 * Redistributions in binary form must reproduce the above copyright
24 notice, this list of conditions and the following disclaimer in the
25 documentation and/or other materials provided with the distribution.
26
27 * Neither the name of the University of Cambridge nor the names of its
28 contributors may be used to endorse or promote products derived from
29 this software without specific prior written permission.
30
31 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
32 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
35 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
36 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
37 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
38 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
39 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
41 POSSIBILITY OF SUCH DAMAGE.
42 -----------------------------------------------------------------------------
43 */
44
45
46 /* This program supports testing of the 8-bit, 16-bit, and 32-bit PCRE2
47 libraries in a single program, though its input and output are always 8-bit.
48 It is different from modules such as pcre2_compile.c in the library itself,
49 which are compiled separately for each code unit width. If two widths are
50 enabled, for example, pcre2_compile.c is compiled twice. In contrast,
51 pcre2test.c is compiled only once, and linked with all the enabled libraries.
52 Therefore, it must not make use of any of the macros from pcre2.h or
53 pcre2_internal.h that depend on PCRE2_CODE_UNIT_WIDTH. It does, however, make
54 use of SUPPORT_PCRE2_8, SUPPORT_PCRE2_16, and SUPPORT_PCRE2_32, to ensure that
55 it references only the enabled library functions. */
56
57 #ifdef HAVE_CONFIG_H
58 #include "config.h"
59 #endif
60
61 #include <ctype.h>
62 #include <stdio.h>
63 #include <string.h>
64 #include <stdlib.h>
65 #include <time.h>
66 #include <locale.h>
67 #include <errno.h>
68
69 #if defined NATIVE_ZOS
70 #include "pcrzoscs.h"
71 /* That header is not included in the main PCRE2 distribution because other
72 apparatus is needed to compile pcre2test for z/OS. The header can be found in
73 the special z/OS distribution, which is available from www.zaconsultants.net or
74 from www.cbttape.org. */
75 #endif
76
77 #ifdef HAVE_UNISTD_H
78 #include <unistd.h>
79 #endif
80
81 /* Debugging code enabler */
82
83 /* #define DEBUG_SHOW_MALLOC_ADDRESSES */
84
85 /* Both libreadline and libedit are optionally supported */
86 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
87 #if defined(SUPPORT_LIBREADLINE)
88 #include <readline/readline.h>
89 #include <readline/history.h>
90 #else
91 #if defined(HAVE_EDITLINE_READLINE_H)
92 #include <editline/readline.h>
93 #elif defined(HAVE_EDIT_READLINE_READLINE_H)
94 #include <edit/readline/readline.h>
95 #else
96 #include <readline.h>
97 /* GNU readline defines this macro but libedit doesn't, if that ever changes
98 this needs to be updated or the build could break */
99 #ifdef RL_VERSION_MAJOR
100 #include <history.h>
101 #endif
102 #endif
103 #endif
104 #endif
105
106 /* Put the test for interactive input into a macro so that it can be changed if
107 required for different environments. */
108
109 #define INTERACTIVE(f) isatty(fileno(f))
110
111
112 /* ---------------------- System-specific definitions ---------------------- */
113
114 /* A number of things vary for Windows builds. Originally, pcretest opened its
115 input and output without "b"; then I was told that "b" was needed in some
116 environments, so it was added for release 5.0 to both the input and output. (It
117 makes no difference on Unix-like systems.) Later I was told that it is wrong
118 for the input on Windows. I've now abstracted the modes into macros that are
119 set here, to make it easier to fiddle with them, and removed "b" from the input
120 mode under Windows. The BINARY versions are used when saving/restoring compiled
121 patterns. */
122
123 #if defined(_WIN32) || defined(WIN32)
124 #include <io.h> /* For _setmode() */
125 #include <fcntl.h> /* For _O_BINARY */
126 #define INPUT_MODE "r"
127 #define OUTPUT_MODE "wb"
128 #define BINARY_INPUT_MODE "rb"
129 #define BINARY_OUTPUT_MODE "wb"
130
131 #ifndef isatty
132 #define isatty _isatty /* This is what Windows calls them, I'm told, */
133 #endif /* though in some environments they seem to */
134 /* be already defined, hence the #ifndefs. */
135 #ifndef fileno
136 #define fileno _fileno
137 #endif
138
139 /* A user sent this fix for Borland Builder 5 under Windows. */
140
141 #ifdef __BORLANDC__
142 #define _setmode(handle, mode) setmode(handle, mode)
143 #endif
144
145 /* Not Windows */
146
147 #else
148 #include <sys/time.h> /* These two includes are needed */
149 #include <sys/resource.h> /* for setrlimit(). */
150 #if defined NATIVE_ZOS /* z/OS uses non-binary I/O */
151 #define INPUT_MODE "r"
152 #define OUTPUT_MODE "w"
153 #define BINARY_INPUT_MODE "rb"
154 #define BINARY_OUTPUT_MODE "wb"
155 #else
156 #define INPUT_MODE "rb"
157 #define OUTPUT_MODE "wb"
158 #define BINARY_INPUT_MODE "rb"
159 #define BINARY_OUTPUT_MODE "wb"
160 #endif
161 #endif
162
163 /* VMS-specific code was included as suggested by a VMS user [1]. Another VMS
164 user [2] provided alternative code which worked better for him. I have
165 commented out the original, but kept it around just in case. */
166
167 #ifdef __VMS
168 #include <ssdef.h>
169 /* These two includes came from [2]. */
170 #include descrip
171 #include lib$routines
172 /* void vms_setsymbol( char *, char *, int ); Original code from [1]. */
173 #endif
174
175 /* old VC and older compilers don't support %td or %zu, and even some that
176 claim to be C99 don't support it (hence DISABLE_PERCENT_ZT). */
177
178 #if defined(DISABLE_PERCENT_ZT) || (defined(_MSC_VER) && (_MSC_VER < 1800)) || \
179 (!defined(_MSC_VER) && (!defined(__STDC_VERSION__) || (__STDC_VERSION__ < 199901L)))
180 #ifdef _WIN64
181 #define PTR_FORM "lld"
182 #define SIZ_FORM "llu"
183 #else
184 #define PTR_FORM "ld"
185 #define SIZ_FORM "lu"
186 #endif
187 #else
188 #define PTR_FORM "td"
189 #define SIZ_FORM "zu"
190 #endif
191
192 /* ------------------End of system-specific definitions -------------------- */
193
194 /* Glueing macros that are used in several places below. */
195
196 #define glue(a,b) a##b
197 #define G(a,b) glue(a,b)
198
199 /* Miscellaneous parameters and manifests */
200
201 #ifndef CLOCKS_PER_SEC
202 #ifdef CLK_TCK
203 #define CLOCKS_PER_SEC CLK_TCK
204 #else
205 #define CLOCKS_PER_SEC 100
206 #endif
207 #endif
208
209 #define CFORE_UNSET UINT32_MAX /* Unset value for startend/cfail/cerror fields */
210 #define CONVERT_UNSET UINT32_MAX /* Unset value for convert_type field */
211 #define DFA_WS_DIMENSION 1000 /* Size of DFA workspace */
212 #define DEFAULT_OVECCOUNT 15 /* Default ovector count */
213 #define JUNK_OFFSET 0xdeadbeef /* For initializing ovector */
214 #define LOCALESIZE 32 /* Size of locale name */
215 #define LOOPREPEAT 500000 /* Default loop count for timing */
216 #define MALLOCLISTSIZE 20 /* For remembering mallocs */
217 #define PARENS_NEST_DEFAULT 220 /* Default parentheses nest limit */
218 #define PATSTACKSIZE 20 /* Pattern stack for save/restore testing */
219 #define REPLACE_MODSIZE 100 /* Field for reading 8-bit replacement */
220 #define VERSION_SIZE 64 /* Size of buffer for the version strings */
221
222 /* Default JIT compile options */
223
224 #define JIT_DEFAULT (PCRE2_JIT_COMPLETE|\
225 PCRE2_JIT_PARTIAL_SOFT|\
226 PCRE2_JIT_PARTIAL_HARD)
227
228 /* Make sure the buffer into which replacement strings are copied is big enough
229 to hold them as 32-bit code units. */
230
231 #define REPLACE_BUFFSIZE 1024 /* This is a byte value */
232
233 /* Execution modes */
234
235 #define PCRE8_MODE 8
236 #define PCRE16_MODE 16
237 #define PCRE32_MODE 32
238
239 /* Processing returns */
240
241 enum { PR_OK, PR_SKIP, PR_ABEND };
242
243 /* The macro PRINTABLE determines whether to print an output character as-is or
244 as a hex value when showing compiled patterns. is We use it in cases when the
245 locale has not been explicitly changed, so as to get consistent output from
246 systems that differ in their output from isprint() even in the "C" locale. */
247
248 #ifdef EBCDIC
249 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
250 #else
251 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
252 #endif
253
254 #define PRINTOK(c) ((use_tables != NULL && c < 256)? isprint(c) : PRINTABLE(c))
255
256 /* We have to include some of the library source files because we need
257 to use some of the macros, internal structure definitions, and other internal
258 values - pcre2test has "inside information" compared to an application program
259 that strictly follows the PCRE2 API.
260
261 Before including pcre2_internal.h we define PRIV so that it does not get
262 defined therein. This ensures that PRIV names in the included files do not
263 clash with those in the libraries. Also, although pcre2_internal.h does itself
264 include pcre2.h, we explicitly include it beforehand, along with pcre2posix.h,
265 so that the PCRE2_EXP_xxx macros get set appropriately for an application, not
266 for building the library. */
267
268 #define PRIV(name) name
269 #define PCRE2_CODE_UNIT_WIDTH 0
270 #include "pcre2.h"
271 #include "pcre2posix.h"
272 #include "pcre2_internal.h"
273
274 /* We need access to some of the data tables that PCRE2 uses. Defining
275 PCRE2_PCRETEST makes some minor changes in the files. The previous definition
276 of PRIV avoids name clashes. */
277
278 #define PCRE2_PCRE2TEST
279 #include "pcre2_tables.c"
280 #include "pcre2_ucd.c"
281
282 /* 32-bit integer values in the input are read by strtoul() or strtol(). The
283 check needed for overflow depends on whether long ints are in fact longer than
284 ints. They are defined not to be shorter. */
285
286 #if ULONG_MAX > UINT32_MAX
287 #define U32OVERFLOW(x) (x > UINT32_MAX)
288 #else
289 #define U32OVERFLOW(x) (x == UINT32_MAX)
290 #endif
291
292 #if LONG_MAX > INT32_MAX
293 #define S32OVERFLOW(x) (x > INT32_MAX || x < INT32_MIN)
294 #else
295 #define S32OVERFLOW(x) (x == INT32_MAX || x == INT32_MIN)
296 #endif
297
298 /* When PCRE2_CODE_UNIT_WIDTH is zero, pcre2_internal.h does not include
299 pcre2_intmodedep.h, which is where mode-dependent macros and structures are
300 defined. We can now include it for each supported code unit width. Because
301 PCRE2_CODE_UNIT_WIDTH was defined as zero before including pcre2.h, it will
302 have left PCRE2_SUFFIX defined as a no-op. We must re-define it appropriately
303 while including these files, and then restore it to a no-op. Because LINK_SIZE
304 may be changed in 16-bit mode and forced to 1 in 32-bit mode, the order of
305 these inclusions should not be changed. */
306
307 #undef PCRE2_SUFFIX
308 #undef PCRE2_CODE_UNIT_WIDTH
309
310 #ifdef SUPPORT_PCRE2_8
311 #define PCRE2_CODE_UNIT_WIDTH 8
312 #define PCRE2_SUFFIX(a) G(a,8)
313 #include "pcre2_intmodedep.h"
314 #include "pcre2_printint.c"
315 #undef PCRE2_CODE_UNIT_WIDTH
316 #undef PCRE2_SUFFIX
317 #endif /* SUPPORT_PCRE2_8 */
318
319 #ifdef SUPPORT_PCRE2_16
320 #define PCRE2_CODE_UNIT_WIDTH 16
321 #define PCRE2_SUFFIX(a) G(a,16)
322 #include "pcre2_intmodedep.h"
323 #include "pcre2_printint.c"
324 #undef PCRE2_CODE_UNIT_WIDTH
325 #undef PCRE2_SUFFIX
326 #endif /* SUPPORT_PCRE2_16 */
327
328 #ifdef SUPPORT_PCRE2_32
329 #define PCRE2_CODE_UNIT_WIDTH 32
330 #define PCRE2_SUFFIX(a) G(a,32)
331 #include "pcre2_intmodedep.h"
332 #include "pcre2_printint.c"
333 #undef PCRE2_CODE_UNIT_WIDTH
334 #undef PCRE2_SUFFIX
335 #endif /* SUPPORT_PCRE2_32 */
336
337 #define PCRE2_SUFFIX(a) a
338
339 /* We need to be able to check input text for UTF-8 validity, whatever code
340 widths are actually available, because the input to pcre2test is always in
341 8-bit code units. So we include the UTF validity checking function for 8-bit
342 code units. */
343
344 extern int valid_utf(PCRE2_SPTR8, PCRE2_SIZE, PCRE2_SIZE *);
345
346 #define PCRE2_CODE_UNIT_WIDTH 8
347 #undef PCRE2_SPTR
348 #define PCRE2_SPTR PCRE2_SPTR8
349 #include "pcre2_valid_utf.c"
350 #undef PCRE2_CODE_UNIT_WIDTH
351 #undef PCRE2_SPTR
352
353 /* If we have 8-bit support, default to it; if there is also 16-or 32-bit
354 support, it can be selected by a command-line option. If there is no 8-bit
355 support, there must be 16-bit or 32-bit support, so default to one of them. The
356 config function, JIT stack, contexts, and version string are the same in all
357 modes, so use the form of the first that is available. */
358
359 #if defined SUPPORT_PCRE2_8
360 #define DEFAULT_TEST_MODE PCRE8_MODE
361 #define VERSION_TYPE PCRE2_UCHAR8
362 #define PCRE2_CONFIG pcre2_config_8
363 #define PCRE2_JIT_STACK pcre2_jit_stack_8
364 #define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_8
365 #define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_8
366 #define PCRE2_REAL_CONVERT_CONTEXT pcre2_real_convert_context_8
367 #define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_8
368
369 #elif defined SUPPORT_PCRE2_16
370 #define DEFAULT_TEST_MODE PCRE16_MODE
371 #define VERSION_TYPE PCRE2_UCHAR16
372 #define PCRE2_CONFIG pcre2_config_16
373 #define PCRE2_JIT_STACK pcre2_jit_stack_16
374 #define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_16
375 #define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_16
376 #define PCRE2_REAL_CONVERT_CONTEXT pcre2_real_convert_context_16
377 #define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_16
378
379 #elif defined SUPPORT_PCRE2_32
380 #define DEFAULT_TEST_MODE PCRE32_MODE
381 #define VERSION_TYPE PCRE2_UCHAR32
382 #define PCRE2_CONFIG pcre2_config_32
383 #define PCRE2_JIT_STACK pcre2_jit_stack_32
384 #define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_32
385 #define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_32
386 #define PCRE2_REAL_CONVERT_CONTEXT pcre2_real_convert_context_32
387 #define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_32
388 #endif
389
390 /* ------------- Structure and table for handling #-commands ------------- */
391
392 typedef struct cmdstruct {
393 const char *name;
394 int value;
395 } cmdstruct;
396
397 enum { CMD_FORBID_UTF, CMD_LOAD, CMD_LOADTABLES, CMD_NEWLINE_DEFAULT,
398 CMD_PATTERN, CMD_PERLTEST, CMD_POP, CMD_POPCOPY, CMD_SAVE, CMD_SUBJECT,
399 CMD_UNKNOWN };
400
401 static cmdstruct cmdlist[] = {
402 { "forbid_utf", CMD_FORBID_UTF },
403 { "load", CMD_LOAD },
404 { "loadtables", CMD_LOADTABLES },
405 { "newline_default", CMD_NEWLINE_DEFAULT },
406 { "pattern", CMD_PATTERN },
407 { "perltest", CMD_PERLTEST },
408 { "pop", CMD_POP },
409 { "popcopy", CMD_POPCOPY },
410 { "save", CMD_SAVE },
411 { "subject", CMD_SUBJECT }};
412
413 #define cmdlistcount (sizeof(cmdlist)/sizeof(cmdstruct))
414
415 /* ------------- Structures and tables for handling modifiers -------------- */
416
417 /* Table of names for newline types. Must be kept in step with the definitions
418 of PCRE2_NEWLINE_xx in pcre2.h. */
419
420 static const char *newlines[] = {
421 "DEFAULT", "CR", "LF", "CRLF", "ANY", "ANYCRLF", "NUL" };
422
423 /* Structure and table for handling pattern conversion types. */
424
425 typedef struct convertstruct {
426 const char *name;
427 uint32_t option;
428 } convertstruct;
429
430 static convertstruct convertlist[] = {
431 { "glob", PCRE2_CONVERT_GLOB },
432 { "glob_no_starstar", PCRE2_CONVERT_GLOB_NO_STARSTAR },
433 { "glob_no_wild_separator", PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR },
434 { "posix_basic", PCRE2_CONVERT_POSIX_BASIC },
435 { "posix_extended", PCRE2_CONVERT_POSIX_EXTENDED },
436 { "unset", CONVERT_UNSET }};
437
438 #define convertlistcount (sizeof(convertlist)/sizeof(convertstruct))
439
440 /* Modifier types and applicability */
441
442 enum { MOD_CTC, /* Applies to a compile context */
443 MOD_CTM, /* Applies to a match context */
444 MOD_PAT, /* Applies to a pattern */
445 MOD_PATP, /* Ditto, OK for Perl test */
446 MOD_DAT, /* Applies to a data line */
447 MOD_DATP, /* Ditto, OK for Perl test */
448 MOD_PD, /* Applies to a pattern or a data line */
449 MOD_PDP, /* As MOD_PD, OK for Perl test */
450 MOD_PND, /* As MOD_PD, but not for a default pattern */
451 MOD_PNDP, /* As MOD_PND, OK for Perl test */
452 MOD_CHR, /* Is a single character */
453 MOD_CON, /* Is a "convert" type/options list */
454 MOD_CTL, /* Is a control bit */
455 MOD_BSR, /* Is a BSR value */
456 MOD_IN2, /* Is one or two unsigned integers */
457 MOD_INS, /* Is a signed integer */
458 MOD_INT, /* Is an unsigned integer */
459 MOD_IND, /* Is an unsigned integer, but no value => default */
460 MOD_NL, /* Is a newline value */
461 MOD_NN, /* Is a number or a name; more than one may occur */
462 MOD_OPT, /* Is an option bit */
463 MOD_SIZ, /* Is a PCRE2_SIZE value */
464 MOD_STR }; /* Is a string */
465
466 /* Control bits. Some apply to compiling, some to matching, but some can be set
467 either on a pattern or a data line, so they must all be distinct. There are now
468 so many of them that they are split into two fields. */
469
470 #define CTL_AFTERTEXT 0x00000001u
471 #define CTL_ALLAFTERTEXT 0x00000002u
472 #define CTL_ALLCAPTURES 0x00000004u
473 #define CTL_ALLUSEDTEXT 0x00000008u
474 #define CTL_ALTGLOBAL 0x00000010u
475 #define CTL_BINCODE 0x00000020u
476 #define CTL_CALLOUT_CAPTURE 0x00000040u
477 #define CTL_CALLOUT_INFO 0x00000080u
478 #define CTL_CALLOUT_NONE 0x00000100u
479 #define CTL_DFA 0x00000200u
480 #define CTL_EXPAND 0x00000400u
481 #define CTL_FINDLIMITS 0x00000800u
482 #define CTL_FINDLIMITS_NOHEAP 0x00001000u
483 #define CTL_FULLBINCODE 0x00002000u
484 #define CTL_GETALL 0x00004000u
485 #define CTL_GLOBAL 0x00008000u
486 #define CTL_HEXPAT 0x00010000u /* Same word as USE_LENGTH */
487 #define CTL_INFO 0x00020000u
488 #define CTL_JITFAST 0x00040000u
489 #define CTL_JITVERIFY 0x00080000u
490 #define CTL_MARK 0x00100000u
491 #define CTL_MEMORY 0x00200000u
492 #define CTL_NULLCONTEXT 0x00400000u
493 #define CTL_POSIX 0x00800000u
494 #define CTL_POSIX_NOSUB 0x01000000u
495 #define CTL_PUSH 0x02000000u /* These three must be */
496 #define CTL_PUSHCOPY 0x04000000u /* all in the same */
497 #define CTL_PUSHTABLESCOPY 0x08000000u /* word. */
498 #define CTL_STARTCHAR 0x10000000u
499 #define CTL_USE_LENGTH 0x20000000u /* Same word as HEXPAT */
500 #define CTL_UTF8_INPUT 0x40000000u
501 #define CTL_ZERO_TERMINATE 0x80000000u
502
503 /* Combinations */
504
505 #define CTL_DEBUG (CTL_FULLBINCODE|CTL_INFO) /* For setting */
506 #define CTL_ANYINFO (CTL_DEBUG|CTL_BINCODE|CTL_CALLOUT_INFO)
507 #define CTL_ANYGLOB (CTL_ALTGLOBAL|CTL_GLOBAL)
508
509 /* Second control word */
510
511 #define CTL2_SUBSTITUTE_CALLOUT 0x00000001u
512 #define CTL2_SUBSTITUTE_EXTENDED 0x00000002u
513 #define CTL2_SUBSTITUTE_LITERAL 0x00000004u
514 #define CTL2_SUBSTITUTE_MATCHED 0x00000008u
515 #define CTL2_SUBSTITUTE_OVERFLOW_LENGTH 0x00000010u
516 #define CTL2_SUBSTITUTE_REPLACEMENT_ONLY 0x00000020u
517 #define CTL2_SUBSTITUTE_UNKNOWN_UNSET 0x00000040u
518 #define CTL2_SUBSTITUTE_UNSET_EMPTY 0x00000080u
519 #define CTL2_SUBJECT_LITERAL 0x00000100u
520 #define CTL2_CALLOUT_NO_WHERE 0x00000200u
521 #define CTL2_CALLOUT_EXTRA 0x00000400u
522 #define CTL2_ALLVECTOR 0x00000800u
523 #define CTL2_NULL_SUBJECT 0x00001000u
524 #define CTL2_NULL_REPLACEMENT 0x00002000u
525 #define CTL2_FRAMESIZE 0x00004000u
526
527 #define CTL2_NL_SET 0x40000000u /* Informational */
528 #define CTL2_BSR_SET 0x80000000u /* Informational */
529
530 /* These are the matching controls that may be set either on a pattern or on a
531 data line. They are copied from the pattern controls as initial settings for
532 data line controls. Note that CTL_MEMORY is not included here, because it does
533 different things in the two cases. */
534
535 #define CTL_ALLPD (CTL_AFTERTEXT|\
536 CTL_ALLAFTERTEXT|\
537 CTL_ALLCAPTURES|\
538 CTL_ALLUSEDTEXT|\
539 CTL_ALTGLOBAL|\
540 CTL_GLOBAL|\
541 CTL_MARK|\
542 CTL_STARTCHAR|\
543 CTL_UTF8_INPUT)
544
545 #define CTL2_ALLPD (CTL2_SUBSTITUTE_CALLOUT|\
546 CTL2_SUBSTITUTE_EXTENDED|\
547 CTL2_SUBSTITUTE_LITERAL|\
548 CTL2_SUBSTITUTE_MATCHED|\
549 CTL2_SUBSTITUTE_OVERFLOW_LENGTH|\
550 CTL2_SUBSTITUTE_REPLACEMENT_ONLY|\
551 CTL2_SUBSTITUTE_UNKNOWN_UNSET|\
552 CTL2_SUBSTITUTE_UNSET_EMPTY|\
553 CTL2_ALLVECTOR)
554
555 /* Structures for holding modifier information for patterns and subject strings
556 (data). Fields containing modifiers that can be set either for a pattern or a
557 subject must be at the start and in the same order in both cases so that the
558 same offset in the big table below works for both. */
559
560 typedef struct patctl { /* Structure for pattern modifiers. */
561 uint32_t options; /* Must be in same position as datctl */
562 uint32_t control; /* Must be in same position as datctl */
563 uint32_t control2; /* Must be in same position as datctl */
564 uint32_t jitstack; /* Must be in same position as datctl */
565 uint8_t replacement[REPLACE_MODSIZE]; /* So must this */
566 uint32_t substitute_skip; /* Must be in same position as patctl */
567 uint32_t substitute_stop; /* Must be in same position as patctl */
568 uint32_t jit;
569 uint32_t stackguard_test;
570 uint32_t tables_id;
571 uint32_t convert_type;
572 uint32_t convert_length;
573 uint32_t convert_glob_escape;
574 uint32_t convert_glob_separator;
575 uint32_t regerror_buffsize;
576 uint8_t locale[LOCALESIZE];
577 } patctl;
578
579 #define MAXCPYGET 10
580 #define LENCPYGET 64
581
582 typedef struct datctl { /* Structure for data line modifiers. */
583 uint32_t options; /* Must be in same position as patctl */
584 uint32_t control; /* Must be in same position as patctl */
585 uint32_t control2; /* Must be in same position as patctl */
586 uint32_t jitstack; /* Must be in same position as patctl */
587 uint8_t replacement[REPLACE_MODSIZE]; /* So must this */
588 uint32_t substitute_skip; /* Must be in same position as patctl */
589 uint32_t substitute_stop; /* Must be in same position as patctl */
590 uint32_t startend[2];
591 uint32_t cerror[2];
592 uint32_t cfail[2];
593 int32_t callout_data;
594 int32_t copy_numbers[MAXCPYGET];
595 int32_t get_numbers[MAXCPYGET];
596 uint32_t oveccount;
597 uint32_t offset;
598 uint8_t copy_names[LENCPYGET];
599 uint8_t get_names[LENCPYGET];
600 } datctl;
601
602 /* Ids for which context to modify. */
603
604 enum { CTX_PAT, /* Active pattern context */
605 CTX_POPPAT, /* Ditto, for a popped pattern */
606 CTX_DEFPAT, /* Default pattern context */
607 CTX_DAT, /* Active data (match) context */
608 CTX_DEFDAT }; /* Default data (match) context */
609
610 /* Macros to simplify the big table below. */
611
612 #define CO(name) offsetof(PCRE2_REAL_COMPILE_CONTEXT, name)
613 #define MO(name) offsetof(PCRE2_REAL_MATCH_CONTEXT, name)
614 #define PO(name) offsetof(patctl, name)
615 #define PD(name) PO(name)
616 #define DO(name) offsetof(datctl, name)
617
618 /* Table of all long-form modifiers. Must be in collating sequence of modifier
619 name because it is searched by binary chop. */
620
621 typedef struct modstruct {
622 const char *name;
623 uint16_t which;
624 uint16_t type;
625 uint32_t value;
626 PCRE2_SIZE offset;
627 } modstruct;
628
629 static modstruct modlist[] = {
630 { "aftertext", MOD_PNDP, MOD_CTL, CTL_AFTERTEXT, PO(control) },
631 { "allaftertext", MOD_PNDP, MOD_CTL, CTL_ALLAFTERTEXT, PO(control) },
632 { "allcaptures", MOD_PND, MOD_CTL, CTL_ALLCAPTURES, PO(control) },
633 { "allow_empty_class", MOD_PAT, MOD_OPT, PCRE2_ALLOW_EMPTY_CLASS, PO(options) },
634 { "allow_lookaround_bsk", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK, CO(extra_options) },
635 { "allow_surrogate_escapes", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES, CO(extra_options) },
636 { "allusedtext", MOD_PNDP, MOD_CTL, CTL_ALLUSEDTEXT, PO(control) },
637 { "allvector", MOD_PND, MOD_CTL, CTL2_ALLVECTOR, PO(control2) },
638 { "alt_bsux", MOD_PAT, MOD_OPT, PCRE2_ALT_BSUX, PO(options) },
639 { "alt_circumflex", MOD_PAT, MOD_OPT, PCRE2_ALT_CIRCUMFLEX, PO(options) },
640 { "alt_verbnames", MOD_PAT, MOD_OPT, PCRE2_ALT_VERBNAMES, PO(options) },
641 { "altglobal", MOD_PND, MOD_CTL, CTL_ALTGLOBAL, PO(control) },
642 { "anchored", MOD_PD, MOD_OPT, PCRE2_ANCHORED, PD(options) },
643 { "auto_callout", MOD_PAT, MOD_OPT, PCRE2_AUTO_CALLOUT, PO(options) },
644 { "bad_escape_is_literal", MOD_CTC, MOD_OPT, PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL, CO(extra_options) },
645 { "bincode", MOD_PAT, MOD_CTL, CTL_BINCODE, PO(control) },
646 { "bsr", MOD_CTC, MOD_BSR, 0, CO(bsr_convention) },
647 { "callout_capture", MOD_DAT, MOD_CTL, CTL_CALLOUT_CAPTURE, DO(control) },
648 { "callout_data", MOD_DAT, MOD_INS, 0, DO(callout_data) },
649 { "callout_error", MOD_DAT, MOD_IN2, 0, DO(cerror) },
650 { "callout_extra", MOD_DAT, MOD_CTL, CTL2_CALLOUT_EXTRA, DO(control2) },
651 { "callout_fail", MOD_DAT, MOD_IN2, 0, DO(cfail) },
652 { "callout_info", MOD_PAT, MOD_CTL, CTL_CALLOUT_INFO, PO(control) },
653 { "callout_no_where", MOD_DAT, MOD_CTL, CTL2_CALLOUT_NO_WHERE, DO(control2) },
654 { "callout_none", MOD_DAT, MOD_CTL, CTL_CALLOUT_NONE, DO(control) },
655 { "caseless", MOD_PATP, MOD_OPT, PCRE2_CASELESS, PO(options) },
656 { "convert", MOD_PAT, MOD_CON, 0, PO(convert_type) },
657 { "convert_glob_escape", MOD_PAT, MOD_CHR, 0, PO(convert_glob_escape) },
658 { "convert_glob_separator", MOD_PAT, MOD_CHR, 0, PO(convert_glob_separator) },
659 { "convert_length", MOD_PAT, MOD_INT, 0, PO(convert_length) },
660 { "copy", MOD_DAT, MOD_NN, DO(copy_numbers), DO(copy_names) },
661 { "copy_matched_subject", MOD_DAT, MOD_OPT, PCRE2_COPY_MATCHED_SUBJECT, DO(options) },
662 { "debug", MOD_PAT, MOD_CTL, CTL_DEBUG, PO(control) },
663 { "depth_limit", MOD_CTM, MOD_INT, 0, MO(depth_limit) },
664 { "dfa", MOD_DAT, MOD_CTL, CTL_DFA, DO(control) },
665 { "dfa_restart", MOD_DAT, MOD_OPT, PCRE2_DFA_RESTART, DO(options) },
666 { "dfa_shortest", MOD_DAT, MOD_OPT, PCRE2_DFA_SHORTEST, DO(options) },
667 { "dollar_endonly", MOD_PAT, MOD_OPT, PCRE2_DOLLAR_ENDONLY, PO(options) },
668 { "dotall", MOD_PATP, MOD_OPT, PCRE2_DOTALL, PO(options) },
669 { "dupnames", MOD_PATP, MOD_OPT, PCRE2_DUPNAMES, PO(options) },
670 { "endanchored", MOD_PD, MOD_OPT, PCRE2_ENDANCHORED, PD(options) },
671 { "escaped_cr_is_lf", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ESCAPED_CR_IS_LF, CO(extra_options) },
672 { "expand", MOD_PAT, MOD_CTL, CTL_EXPAND, PO(control) },
673 { "extended", MOD_PATP, MOD_OPT, PCRE2_EXTENDED, PO(options) },
674 { "extended_more", MOD_PATP, MOD_OPT, PCRE2_EXTENDED_MORE, PO(options) },
675 { "extra_alt_bsux", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ALT_BSUX, CO(extra_options) },
676 { "find_limits", MOD_DAT, MOD_CTL, CTL_FINDLIMITS, DO(control) },
677 { "find_limits_noheap", MOD_DAT, MOD_CTL, CTL_FINDLIMITS_NOHEAP, DO(control) },
678 { "firstline", MOD_PAT, MOD_OPT, PCRE2_FIRSTLINE, PO(options) },
679 { "framesize", MOD_PAT, MOD_CTL, CTL2_FRAMESIZE, PO(control2) },
680 { "fullbincode", MOD_PAT, MOD_CTL, CTL_FULLBINCODE, PO(control) },
681 { "get", MOD_DAT, MOD_NN, DO(get_numbers), DO(get_names) },
682 { "getall", MOD_DAT, MOD_CTL, CTL_GETALL, DO(control) },
683 { "global", MOD_PNDP, MOD_CTL, CTL_GLOBAL, PO(control) },
684 { "heap_limit", MOD_CTM, MOD_INT, 0, MO(heap_limit) },
685 { "hex", MOD_PAT, MOD_CTL, CTL_HEXPAT, PO(control) },
686 { "info", MOD_PAT, MOD_CTL, CTL_INFO, PO(control) },
687 { "jit", MOD_PAT, MOD_IND, 7, PO(jit) },
688 { "jitfast", MOD_PAT, MOD_CTL, CTL_JITFAST, PO(control) },
689 { "jitstack", MOD_PNDP, MOD_INT, 0, PO(jitstack) },
690 { "jitverify", MOD_PAT, MOD_CTL, CTL_JITVERIFY, PO(control) },
691 { "literal", MOD_PAT, MOD_OPT, PCRE2_LITERAL, PO(options) },
692 { "locale", MOD_PAT, MOD_STR, LOCALESIZE, PO(locale) },
693 { "mark", MOD_PNDP, MOD_CTL, CTL_MARK, PO(control) },
694 { "match_invalid_utf", MOD_PAT, MOD_OPT, PCRE2_MATCH_INVALID_UTF, PO(options) },
695 { "match_limit", MOD_CTM, MOD_INT, 0, MO(match_limit) },
696 { "match_line", MOD_CTC, MOD_OPT, PCRE2_EXTRA_MATCH_LINE, CO(extra_options) },
697 { "match_unset_backref", MOD_PAT, MOD_OPT, PCRE2_MATCH_UNSET_BACKREF, PO(options) },
698 { "match_word", MOD_CTC, MOD_OPT, PCRE2_EXTRA_MATCH_WORD, CO(extra_options) },
699 { "max_pattern_length", MOD_CTC, MOD_SIZ, 0, CO(max_pattern_length) },
700 { "memory", MOD_PD, MOD_CTL, CTL_MEMORY, PD(control) },
701 { "multiline", MOD_PATP, MOD_OPT, PCRE2_MULTILINE, PO(options) },
702 { "never_backslash_c", MOD_PAT, MOD_OPT, PCRE2_NEVER_BACKSLASH_C, PO(options) },
703 { "never_ucp", MOD_PAT, MOD_OPT, PCRE2_NEVER_UCP, PO(options) },
704 { "never_utf", MOD_PAT, MOD_OPT, PCRE2_NEVER_UTF, PO(options) },
705 { "newline", MOD_CTC, MOD_NL, 0, CO(newline_convention) },
706 { "no_auto_capture", MOD_PAT, MOD_OPT, PCRE2_NO_AUTO_CAPTURE, PO(options) },
707 { "no_auto_possess", MOD_PATP, MOD_OPT, PCRE2_NO_AUTO_POSSESS, PO(options) },
708 { "no_dotstar_anchor", MOD_PAT, MOD_OPT, PCRE2_NO_DOTSTAR_ANCHOR, PO(options) },
709 { "no_jit", MOD_DATP, MOD_OPT, PCRE2_NO_JIT, DO(options) },
710 { "no_start_optimize", MOD_PATP, MOD_OPT, PCRE2_NO_START_OPTIMIZE, PO(options) },
711 { "no_utf_check", MOD_PD, MOD_OPT, PCRE2_NO_UTF_CHECK, PD(options) },
712 { "notbol", MOD_DAT, MOD_OPT, PCRE2_NOTBOL, DO(options) },
713 { "notempty", MOD_DAT, MOD_OPT, PCRE2_NOTEMPTY, DO(options) },
714 { "notempty_atstart", MOD_DAT, MOD_OPT, PCRE2_NOTEMPTY_ATSTART, DO(options) },
715 { "noteol", MOD_DAT, MOD_OPT, PCRE2_NOTEOL, DO(options) },
716 { "null_context", MOD_PD, MOD_CTL, CTL_NULLCONTEXT, PO(control) },
717 { "null_replacement", MOD_DAT, MOD_CTL, CTL2_NULL_REPLACEMENT, DO(control2) },
718 { "null_subject", MOD_DAT, MOD_CTL, CTL2_NULL_SUBJECT, DO(control2) },
719 { "offset", MOD_DAT, MOD_INT, 0, DO(offset) },
720 { "offset_limit", MOD_CTM, MOD_SIZ, 0, MO(offset_limit)},
721 { "ovector", MOD_DAT, MOD_INT, 0, DO(oveccount) },
722 { "parens_nest_limit", MOD_CTC, MOD_INT, 0, CO(parens_nest_limit) },
723 { "partial_hard", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_HARD, DO(options) },
724 { "partial_soft", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_SOFT, DO(options) },
725 { "ph", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_HARD, DO(options) },
726 { "posix", MOD_PAT, MOD_CTL, CTL_POSIX, PO(control) },
727 { "posix_nosub", MOD_PAT, MOD_CTL, CTL_POSIX|CTL_POSIX_NOSUB, PO(control) },
728 { "posix_startend", MOD_DAT, MOD_IN2, 0, DO(startend) },
729 { "ps", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_SOFT, DO(options) },
730 { "push", MOD_PAT, MOD_CTL, CTL_PUSH, PO(control) },
731 { "pushcopy", MOD_PAT, MOD_CTL, CTL_PUSHCOPY, PO(control) },
732 { "pushtablescopy", MOD_PAT, MOD_CTL, CTL_PUSHTABLESCOPY, PO(control) },
733 { "recursion_limit", MOD_CTM, MOD_INT, 0, MO(depth_limit) }, /* Obsolete synonym */
734 { "regerror_buffsize", MOD_PAT, MOD_INT, 0, PO(regerror_buffsize) },
735 { "replace", MOD_PND, MOD_STR, REPLACE_MODSIZE, PO(replacement) },
736 { "stackguard", MOD_PAT, MOD_INT, 0, PO(stackguard_test) },
737 { "startchar", MOD_PND, MOD_CTL, CTL_STARTCHAR, PO(control) },
738 { "startoffset", MOD_DAT, MOD_INT, 0, DO(offset) },
739 { "subject_literal", MOD_PATP, MOD_CTL, CTL2_SUBJECT_LITERAL, PO(control2) },
740 { "substitute_callout", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_CALLOUT, PO(control2) },
741 { "substitute_extended", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_EXTENDED, PO(control2) },
742 { "substitute_literal", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_LITERAL, PO(control2) },
743 { "substitute_matched", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_MATCHED, PO(control2) },
744 { "substitute_overflow_length", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_OVERFLOW_LENGTH, PO(control2) },
745 { "substitute_replacement_only", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_REPLACEMENT_ONLY, PO(control2) },
746 { "substitute_skip", MOD_PND, MOD_INT, 0, PO(substitute_skip) },
747 { "substitute_stop", MOD_PND, MOD_INT, 0, PO(substitute_stop) },
748 { "substitute_unknown_unset", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_UNKNOWN_UNSET, PO(control2) },
749 { "substitute_unset_empty", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_UNSET_EMPTY, PO(control2) },
750 { "tables", MOD_PAT, MOD_INT, 0, PO(tables_id) },
751 { "ucp", MOD_PATP, MOD_OPT, PCRE2_UCP, PO(options) },
752 { "ungreedy", MOD_PAT, MOD_OPT, PCRE2_UNGREEDY, PO(options) },
753 { "use_length", MOD_PAT, MOD_CTL, CTL_USE_LENGTH, PO(control) },
754 { "use_offset_limit", MOD_PAT, MOD_OPT, PCRE2_USE_OFFSET_LIMIT, PO(options) },
755 { "utf", MOD_PATP, MOD_OPT, PCRE2_UTF, PO(options) },
756 { "utf8_input", MOD_PAT, MOD_CTL, CTL_UTF8_INPUT, PO(control) },
757 { "zero_terminate", MOD_DAT, MOD_CTL, CTL_ZERO_TERMINATE, DO(control) }
758 };
759
760 #define MODLISTCOUNT sizeof(modlist)/sizeof(modstruct)
761
762 /* Controls and options that are supported for use with the POSIX interface. */
763
764 #define POSIX_SUPPORTED_COMPILE_OPTIONS ( \
765 PCRE2_CASELESS|PCRE2_DOTALL|PCRE2_LITERAL|PCRE2_MULTILINE|PCRE2_UCP| \
766 PCRE2_UTF|PCRE2_UNGREEDY)
767
768 #define POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS (0)
769
770 #define POSIX_SUPPORTED_COMPILE_CONTROLS ( \
771 CTL_AFTERTEXT|CTL_ALLAFTERTEXT|CTL_EXPAND|CTL_HEXPAT|CTL_POSIX| \
772 CTL_POSIX_NOSUB|CTL_USE_LENGTH)
773
774 #define POSIX_SUPPORTED_COMPILE_CONTROLS2 (0)
775
776 #define POSIX_SUPPORTED_MATCH_OPTIONS ( \
777 PCRE2_NOTBOL|PCRE2_NOTEMPTY|PCRE2_NOTEOL)
778
779 #define POSIX_SUPPORTED_MATCH_CONTROLS (CTL_AFTERTEXT|CTL_ALLAFTERTEXT)
780 #define POSIX_SUPPORTED_MATCH_CONTROLS2 (CTL2_NULL_SUBJECT)
781
782 /* Control bits that are not ignored with 'push'. */
783
784 #define PUSH_SUPPORTED_COMPILE_CONTROLS ( \
785 CTL_BINCODE|CTL_CALLOUT_INFO|CTL_FULLBINCODE|CTL_HEXPAT|CTL_INFO| \
786 CTL_JITVERIFY|CTL_MEMORY|CTL_PUSH|CTL_PUSHCOPY| \
787 CTL_PUSHTABLESCOPY|CTL_USE_LENGTH)
788
789 #define PUSH_SUPPORTED_COMPILE_CONTROLS2 (CTL2_BSR_SET|CTL2_FRAMESIZE| \
790 CTL2_NL_SET)
791
792 /* Controls that apply only at compile time with 'push'. */
793
794 #define PUSH_COMPILE_ONLY_CONTROLS CTL_JITVERIFY
795 #define PUSH_COMPILE_ONLY_CONTROLS2 (0)
796
797 /* Controls that are forbidden with #pop or #popcopy. */
798
799 #define NOTPOP_CONTROLS (CTL_HEXPAT|CTL_POSIX|CTL_POSIX_NOSUB|CTL_PUSH| \
800 CTL_PUSHCOPY|CTL_PUSHTABLESCOPY|CTL_USE_LENGTH)
801
802 /* Pattern controls that are mutually exclusive. At present these are all in
803 the first control word. Note that CTL_POSIX_NOSUB is always accompanied by
804 CTL_POSIX, so it doesn't need its own entries. */
805
806 static uint32_t exclusive_pat_controls[] = {
807 CTL_POSIX | CTL_PUSH,
808 CTL_POSIX | CTL_PUSHCOPY,
809 CTL_POSIX | CTL_PUSHTABLESCOPY,
810 CTL_PUSH | CTL_PUSHCOPY,
811 CTL_PUSH | CTL_PUSHTABLESCOPY,
812 CTL_PUSHCOPY | CTL_PUSHTABLESCOPY,
813 CTL_EXPAND | CTL_HEXPAT };
814
815 /* Data controls that are mutually exclusive. At present these are all in the
816 first control word. */
817
818 static uint32_t exclusive_dat_controls[] = {
819 CTL_ALLUSEDTEXT | CTL_STARTCHAR,
820 CTL_FINDLIMITS | CTL_NULLCONTEXT,
821 CTL_FINDLIMITS_NOHEAP | CTL_NULLCONTEXT };
822
823 /* Table of single-character abbreviated modifiers. The index field is
824 initialized to -1, but the first time the modifier is encountered, it is filled
825 in with the index of the full entry in modlist, to save repeated searching when
826 processing multiple test items. This short list is searched serially, so its
827 order does not matter. */
828
829 typedef struct c1modstruct {
830 const char *fullname;
831 uint32_t onechar;
832 int index;
833 } c1modstruct;
834
835 static c1modstruct c1modlist[] = {
836 { "bincode", 'B', -1 },
837 { "info", 'I', -1 },
838 { "global", 'g', -1 },
839 { "caseless", 'i', -1 },
840 { "multiline", 'm', -1 },
841 { "no_auto_capture", 'n', -1 },
842 { "dotall", 's', -1 },
843 { "extended", 'x', -1 }
844 };
845
846 #define C1MODLISTCOUNT sizeof(c1modlist)/sizeof(c1modstruct)
847
848 /* Table of arguments for the -C command line option. Use macros to make the
849 table itself easier to read. */
850
851 #if defined SUPPORT_PCRE2_8
852 #define SUPPORT_8 1
853 #endif
854 #if defined SUPPORT_PCRE2_16
855 #define SUPPORT_16 1
856 #endif
857 #if defined SUPPORT_PCRE2_32
858 #define SUPPORT_32 1
859 #endif
860
861 #ifndef SUPPORT_8
862 #define SUPPORT_8 0
863 #endif
864 #ifndef SUPPORT_16
865 #define SUPPORT_16 0
866 #endif
867 #ifndef SUPPORT_32
868 #define SUPPORT_32 0
869 #endif
870
871 #ifdef EBCDIC
872 #define SUPPORT_EBCDIC 1
873 #define EBCDIC_NL CHAR_LF
874 #else
875 #define SUPPORT_EBCDIC 0
876 #define EBCDIC_NL 0
877 #endif
878
879 #ifdef NEVER_BACKSLASH_C
880 #define BACKSLASH_C 0
881 #else
882 #define BACKSLASH_C 1
883 #endif
884
885 typedef struct coptstruct {
886 const char *name;
887 uint32_t type;
888 uint32_t value;
889 } coptstruct;
890
891 enum { CONF_BSR,
892 CONF_FIX,
893 CONF_FIZ,
894 CONF_INT,
895 CONF_NL
896 };
897
898 static coptstruct coptlist[] = {
899 { "backslash-C", CONF_FIX, BACKSLASH_C },
900 { "bsr", CONF_BSR, PCRE2_CONFIG_BSR },
901 { "ebcdic", CONF_FIX, SUPPORT_EBCDIC },
902 { "ebcdic-nl", CONF_FIZ, EBCDIC_NL },
903 { "jit", CONF_INT, PCRE2_CONFIG_JIT },
904 { "linksize", CONF_INT, PCRE2_CONFIG_LINKSIZE },
905 { "newline", CONF_NL, PCRE2_CONFIG_NEWLINE },
906 { "pcre2-16", CONF_FIX, SUPPORT_16 },
907 { "pcre2-32", CONF_FIX, SUPPORT_32 },
908 { "pcre2-8", CONF_FIX, SUPPORT_8 },
909 { "unicode", CONF_INT, PCRE2_CONFIG_UNICODE }
910 };
911
912 #define COPTLISTCOUNT sizeof(coptlist)/sizeof(coptstruct)
913
914 #undef SUPPORT_8
915 #undef SUPPORT_16
916 #undef SUPPORT_32
917 #undef SUPPORT_EBCDIC
918
919
920 /* ----------------------- Static variables ------------------------ */
921
922 static FILE *infile;
923 static FILE *outfile;
924
925 static const void *last_callout_mark;
926 static PCRE2_JIT_STACK *jit_stack = NULL;
927 static size_t jit_stack_size = 0;
928
929 static BOOL first_callout;
930 static BOOL jit_was_used;
931 static BOOL restrict_for_perl_test = FALSE;
932 static BOOL show_memory = FALSE;
933
934 static int jitrc; /* Return from JIT compile */
935 static int test_mode = DEFAULT_TEST_MODE;
936 static int timeit = 0;
937 static int timeitm = 0;
938
939 clock_t total_compile_time = 0;
940 clock_t total_jit_compile_time = 0;
941 clock_t total_match_time = 0;
942
943 static uint32_t code_unit_size; /* Bytes */
944 static uint32_t dfa_matched;
945 static uint32_t forbid_utf = 0;
946 static uint32_t maxlookbehind;
947 static uint32_t max_oveccount;
948 static uint32_t callout_count;
949 static uint32_t maxcapcount;
950
951 static uint16_t local_newline_default = 0;
952
953 static VERSION_TYPE jittarget[VERSION_SIZE];
954 static VERSION_TYPE version[VERSION_SIZE];
955 static VERSION_TYPE uversion[VERSION_SIZE];
956
957 static patctl def_patctl;
958 static patctl pat_patctl;
959 static datctl def_datctl;
960 static datctl dat_datctl;
961
962 static void *patstack[PATSTACKSIZE];
963 static int patstacknext = 0;
964
965 static void *malloclist[MALLOCLISTSIZE];
966 static PCRE2_SIZE malloclistlength[MALLOCLISTSIZE];
967 static uint32_t malloclistptr = 0;
968
969 #ifdef SUPPORT_PCRE2_8
970 static regex_t preg = { NULL, NULL, 0, 0, 0, 0 };
971 #endif
972
973 static int *dfa_workspace = NULL;
974 static const uint8_t *locale_tables = NULL;
975 static const uint8_t *use_tables = NULL;
976 static uint8_t locale_name[32];
977 static uint8_t *tables3 = NULL; /* For binary-loaded tables */
978 static uint32_t loadtables_length = 0;
979
980 /* We need buffers for building 16/32-bit strings; 8-bit strings don't need
981 rebuilding, but set up the same naming scheme for use in macros. The "buffer"
982 buffer is where all input lines are read. Its size is the same as pbuffer8.
983 Pattern lines are always copied to pbuffer8 for use in callouts, even if they
984 are actually compiled from pbuffer16 or pbuffer32. */
985
986 static size_t pbuffer8_size = 50000; /* Initial size, bytes */
987 static uint8_t *pbuffer8 = NULL;
988 static uint8_t *buffer = NULL;
989
990 /* The dbuffer is where all processed data lines are put. In non-8-bit modes it
991 is cast as needed. For long data lines it grows as necessary. */
992
993 static size_t dbuffer_size = 1u << 14; /* Initial size, bytes */
994 static uint8_t *dbuffer = NULL;
995
996
997 /* ---------------- Mode-dependent variables -------------------*/
998
999 #ifdef SUPPORT_PCRE2_8
1000 static pcre2_code_8 *compiled_code8;
1001 static pcre2_general_context_8 *general_context8, *general_context_copy8;
1002 static pcre2_compile_context_8 *pat_context8, *default_pat_context8;
1003 static pcre2_convert_context_8 *con_context8, *default_con_context8;
1004 static pcre2_match_context_8 *dat_context8, *default_dat_context8;
1005 static pcre2_match_data_8 *match_data8;
1006 #endif
1007
1008 #ifdef SUPPORT_PCRE2_16
1009 static pcre2_code_16 *compiled_code16;
1010 static pcre2_general_context_16 *general_context16, *general_context_copy16;
1011 static pcre2_compile_context_16 *pat_context16, *default_pat_context16;
1012 static pcre2_convert_context_16 *con_context16, *default_con_context16;
1013 static pcre2_match_context_16 *dat_context16, *default_dat_context16;
1014 static pcre2_match_data_16 *match_data16;
1015 static PCRE2_SIZE pbuffer16_size = 0; /* Set only when needed */
1016 static uint16_t *pbuffer16 = NULL;
1017 #endif
1018
1019 #ifdef SUPPORT_PCRE2_32
1020 static pcre2_code_32 *compiled_code32;
1021 static pcre2_general_context_32 *general_context32, *general_context_copy32;
1022 static pcre2_compile_context_32 *pat_context32, *default_pat_context32;
1023 static pcre2_convert_context_32 *con_context32, *default_con_context32;
1024 static pcre2_match_context_32 *dat_context32, *default_dat_context32;
1025 static pcre2_match_data_32 *match_data32;
1026 static PCRE2_SIZE pbuffer32_size = 0; /* Set only when needed */
1027 static uint32_t *pbuffer32 = NULL;
1028 #endif
1029
1030
1031 /* ---------------- Macros that work in all modes ----------------- */
1032
1033 #define CAST8VAR(x) CASTVAR(uint8_t *, x)
1034 #define SET(x,y) SETOP(x,y,=)
1035 #define SETPLUS(x,y) SETOP(x,y,+=)
1036 #define strlen8(x) strlen((char *)x)
1037
1038
1039 /* ---------------- Mode-dependent, runtime-testing macros ------------------*/
1040
1041 /* Define macros for variables and functions that must be selected dynamically
1042 depending on the mode setting (8, 16, 32). These are dependent on which modes
1043 are supported. */
1044
1045 #if (defined (SUPPORT_PCRE2_8) + defined (SUPPORT_PCRE2_16) + \
1046 defined (SUPPORT_PCRE2_32)) >= 2
1047
1048 /* ----- All three modes supported ----- */
1049
1050 #if defined(SUPPORT_PCRE2_8) && defined(SUPPORT_PCRE2_16) && defined(SUPPORT_PCRE2_32)
1051
1052 #define CASTFLD(t,a,b) ((test_mode == PCRE8_MODE)? (t)(G(a,8)->b) : \
1053 (test_mode == PCRE16_MODE)? (t)(G(a,16)->b) : (t)(G(a,32)->b))
1054
1055 #define CASTVAR(t,x) ( \
1056 (test_mode == PCRE8_MODE)? (t)G(x,8) : \
1057 (test_mode == PCRE16_MODE)? (t)G(x,16) : (t)G(x,32))
1058
1059 #define CODE_UNIT(a,b) ( \
1060 (test_mode == PCRE8_MODE)? (uint32_t)(((PCRE2_SPTR8)(a))[b]) : \
1061 (test_mode == PCRE16_MODE)? (uint32_t)(((PCRE2_SPTR16)(a))[b]) : \
1062 (uint32_t)(((PCRE2_SPTR32)(a))[b]))
1063
1064 #define CONCTXCPY(a,b) \
1065 if (test_mode == PCRE8_MODE) \
1066 memcpy(G(a,8),G(b,8),sizeof(pcre2_convert_context_8)); \
1067 else if (test_mode == PCRE16_MODE) \
1068 memcpy(G(a,16),G(b,16),sizeof(pcre2_convert_context_16)); \
1069 else memcpy(G(a,32),G(b,32),sizeof(pcre2_convert_context_32))
1070
1071 #define CONVERT_COPY(a,b,c) \
1072 if (test_mode == PCRE8_MODE) \
1073 memcpy(G(a,8),(char *)b,c); \
1074 else if (test_mode == PCRE16_MODE) \
1075 memcpy(G(a,16),(char *)b,(c)*2); \
1076 else if (test_mode == PCRE32_MODE) \
1077 memcpy(G(a,32),(char *)b,(c)*4)
1078
1079 #define DATCTXCPY(a,b) \
1080 if (test_mode == PCRE8_MODE) \
1081 memcpy(G(a,8),G(b,8),sizeof(pcre2_match_context_8)); \
1082 else if (test_mode == PCRE16_MODE) \
1083 memcpy(G(a,16),G(b,16),sizeof(pcre2_match_context_16)); \
1084 else memcpy(G(a,32),G(b,32),sizeof(pcre2_match_context_32))
1085
1086 #define FLD(a,b) ((test_mode == PCRE8_MODE)? G(a,8)->b : \
1087 (test_mode == PCRE16_MODE)? G(a,16)->b : G(a,32)->b)
1088
1089 #define PATCTXCPY(a,b) \
1090 if (test_mode == PCRE8_MODE) \
1091 memcpy(G(a,8),G(b,8),sizeof(pcre2_compile_context_8)); \
1092 else if (test_mode == PCRE16_MODE) \
1093 memcpy(G(a,16),G(b,16),sizeof(pcre2_compile_context_16)); \
1094 else memcpy(G(a,32),G(b,32),sizeof(pcre2_compile_context_32))
1095
1096 #define PCHARS(lv, p, offset, len, utf, f) \
1097 if (test_mode == PCRE32_MODE) \
1098 lv = pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f); \
1099 else if (test_mode == PCRE16_MODE) \
1100 lv = pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f); \
1101 else \
1102 lv = pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
1103
1104 #define PCHARSV(p, offset, len, utf, f) \
1105 if (test_mode == PCRE32_MODE) \
1106 (void)pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f); \
1107 else if (test_mode == PCRE16_MODE) \
1108 (void)pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f); \
1109 else \
1110 (void)pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
1111
1112 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
1113 if (test_mode == PCRE8_MODE) \
1114 a = pcre2_callout_enumerate_8(compiled_code8, \
1115 (int (*)(struct pcre2_callout_enumerate_block_8 *, void *))b,c); \
1116 else if (test_mode == PCRE16_MODE) \
1117 a = pcre2_callout_enumerate_16(compiled_code16, \
1118 (int(*)(struct pcre2_callout_enumerate_block_16 *, void *))b,c); \
1119 else \
1120 a = pcre2_callout_enumerate_32(compiled_code32, \
1121 (int (*)(struct pcre2_callout_enumerate_block_32 *, void *))b,c)
1122
1123 #define PCRE2_CODE_COPY_FROM_VOID(a,b) \
1124 if (test_mode == PCRE8_MODE) \
1125 G(a,8) = pcre2_code_copy_8(b); \
1126 else if (test_mode == PCRE16_MODE) \
1127 G(a,16) = pcre2_code_copy_16(b); \
1128 else \
1129 G(a,32) = pcre2_code_copy_32(b)
1130
1131 #define PCRE2_CODE_COPY_TO_VOID(a,b) \
1132 if (test_mode == PCRE8_MODE) \
1133 a = (void *)pcre2_code_copy_8(G(b,8)); \
1134 else if (test_mode == PCRE16_MODE) \
1135 a = (void *)pcre2_code_copy_16(G(b,16)); \
1136 else \
1137 a = (void *)pcre2_code_copy_32(G(b,32))
1138
1139 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) \
1140 if (test_mode == PCRE8_MODE) \
1141 a = (void *)pcre2_code_copy_with_tables_8(G(b,8)); \
1142 else if (test_mode == PCRE16_MODE) \
1143 a = (void *)pcre2_code_copy_with_tables_16(G(b,16)); \
1144 else \
1145 a = (void *)pcre2_code_copy_with_tables_32(G(b,32))
1146
1147 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
1148 if (test_mode == PCRE8_MODE) \
1149 G(a,8) = pcre2_compile_8(G(b,8),c,d,e,f,g); \
1150 else if (test_mode == PCRE16_MODE) \
1151 G(a,16) = pcre2_compile_16(G(b,16),c,d,e,f,g); \
1152 else \
1153 G(a,32) = pcre2_compile_32(G(b,32),c,d,e,f,g)
1154
1155 #define PCRE2_CONVERTED_PATTERN_FREE(a) \
1156 if (test_mode == PCRE8_MODE) pcre2_converted_pattern_free_8((PCRE2_UCHAR8 *)a); \
1157 else if (test_mode == PCRE16_MODE) pcre2_converted_pattern_free_16((PCRE2_UCHAR16 *)a); \
1158 else pcre2_converted_pattern_free_32((PCRE2_UCHAR32 *)a)
1159
1160 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
1161 if (test_mode == PCRE8_MODE) \
1162 a = pcre2_dfa_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h,i,j); \
1163 else if (test_mode == PCRE16_MODE) \
1164 a = pcre2_dfa_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h,i,j); \
1165 else \
1166 a = pcre2_dfa_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h,i,j)
1167
1168 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
1169 if (test_mode == PCRE8_MODE) \
1170 r = pcre2_get_error_message_8(a,G(b,8),G(G(b,8),_size)); \
1171 else if (test_mode == PCRE16_MODE) \
1172 r = pcre2_get_error_message_16(a,G(b,16),G(G(b,16),_size/2)); \
1173 else \
1174 r = pcre2_get_error_message_32(a,G(b,32),G(G(b,32),_size/4))
1175
1176 #define PCRE2_GET_OVECTOR_COUNT(a,b) \
1177 if (test_mode == PCRE8_MODE) \
1178 a = pcre2_get_ovector_count_8(G(b,8)); \
1179 else if (test_mode == PCRE16_MODE) \
1180 a = pcre2_get_ovector_count_16(G(b,16)); \
1181 else \
1182 a = pcre2_get_ovector_count_32(G(b,32))
1183
1184 #define PCRE2_GET_STARTCHAR(a,b) \
1185 if (test_mode == PCRE8_MODE) \
1186 a = pcre2_get_startchar_8(G(b,8)); \
1187 else if (test_mode == PCRE16_MODE) \
1188 a = pcre2_get_startchar_16(G(b,16)); \
1189 else \
1190 a = pcre2_get_startchar_32(G(b,32))
1191
1192 #define PCRE2_JIT_COMPILE(r,a,b) \
1193 if (test_mode == PCRE8_MODE) r = pcre2_jit_compile_8(G(a,8),b); \
1194 else if (test_mode == PCRE16_MODE) r = pcre2_jit_compile_16(G(a,16),b); \
1195 else r = pcre2_jit_compile_32(G(a,32),b)
1196
1197 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) \
1198 if (test_mode == PCRE8_MODE) pcre2_jit_free_unused_memory_8(G(a,8)); \
1199 else if (test_mode == PCRE16_MODE) pcre2_jit_free_unused_memory_16(G(a,16)); \
1200 else pcre2_jit_free_unused_memory_32(G(a,32))
1201
1202 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
1203 if (test_mode == PCRE8_MODE) \
1204 a = pcre2_jit_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h); \
1205 else if (test_mode == PCRE16_MODE) \
1206 a = pcre2_jit_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h); \
1207 else \
1208 a = pcre2_jit_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
1209
1210 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
1211 if (test_mode == PCRE8_MODE) \
1212 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_8(b,c,d); \
1213 else if (test_mode == PCRE16_MODE) \
1214 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_16(b,c,d); \
1215 else \
1216 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_32(b,c,d);
1217
1218 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
1219 if (test_mode == PCRE8_MODE) \
1220 pcre2_jit_stack_assign_8(G(a,8),(pcre2_jit_callback_8)b,c); \
1221 else if (test_mode == PCRE16_MODE) \
1222 pcre2_jit_stack_assign_16(G(a,16),(pcre2_jit_callback_16)b,c); \
1223 else \
1224 pcre2_jit_stack_assign_32(G(a,32),(pcre2_jit_callback_32)b,c);
1225
1226 #define PCRE2_JIT_STACK_FREE(a) \
1227 if (test_mode == PCRE8_MODE) \
1228 pcre2_jit_stack_free_8((pcre2_jit_stack_8 *)a); \
1229 else if (test_mode == PCRE16_MODE) \
1230 pcre2_jit_stack_free_16((pcre2_jit_stack_16 *)a); \
1231 else \
1232 pcre2_jit_stack_free_32((pcre2_jit_stack_32 *)a);
1233
1234 #define PCRE2_MAKETABLES(a,c) \
1235 if (test_mode == PCRE8_MODE) a = pcre2_maketables_8(G(c,8)); \
1236 else if (test_mode == PCRE16_MODE) a = pcre2_maketables_16(G(c,16)); \
1237 else a = pcre2_maketables_32(G(c,32))
1238
1239 #define PCRE2_MAKETABLES_FREE(c,a) \
1240 if (test_mode == PCRE8_MODE) pcre2_maketables_free_8(G(c,8),a); \
1241 else if (test_mode == PCRE16_MODE) pcre2_maketables_free_16(G(c,16),a); \
1242 else pcre2_maketables_free_32(G(c,32),a)
1243
1244 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
1245 if (test_mode == PCRE8_MODE) \
1246 a = pcre2_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h); \
1247 else if (test_mode == PCRE16_MODE) \
1248 a = pcre2_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h); \
1249 else \
1250 a = pcre2_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
1251
1252 #define PCRE2_MATCH_DATA_CREATE(a,b,c) \
1253 if (test_mode == PCRE8_MODE) \
1254 G(a,8) = pcre2_match_data_create_8(b,G(c,8)); \
1255 else if (test_mode == PCRE16_MODE) \
1256 G(a,16) = pcre2_match_data_create_16(b,G(c,16)); \
1257 else \
1258 G(a,32) = pcre2_match_data_create_32(b,G(c,32))
1259
1260 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
1261 if (test_mode == PCRE8_MODE) \
1262 G(a,8) = pcre2_match_data_create_from_pattern_8(G(b,8),G(c,8)); \
1263 else if (test_mode == PCRE16_MODE) \
1264 G(a,16) = pcre2_match_data_create_from_pattern_16(G(b,16),G(c,16)); \
1265 else \
1266 G(a,32) = pcre2_match_data_create_from_pattern_32(G(b,32),G(c,32))
1267
1268 #define PCRE2_MATCH_DATA_FREE(a) \
1269 if (test_mode == PCRE8_MODE) \
1270 pcre2_match_data_free_8(G(a,8)); \
1271 else if (test_mode == PCRE16_MODE) \
1272 pcre2_match_data_free_16(G(a,16)); \
1273 else \
1274 pcre2_match_data_free_32(G(a,32))
1275
1276 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) \
1277 if (test_mode == PCRE8_MODE) \
1278 a = pcre2_pattern_convert_8(G(b,8),c,d,(PCRE2_UCHAR8 **)e,f,G(g,8)); \
1279 else if (test_mode == PCRE16_MODE) \
1280 a = pcre2_pattern_convert_16(G(b,16),c,d,(PCRE2_UCHAR16 **)e,f,G(g,16)); \
1281 else \
1282 a = pcre2_pattern_convert_32(G(b,32),c,d,(PCRE2_UCHAR32 **)e,f,G(g,32))
1283
1284 #define PCRE2_PATTERN_INFO(a,b,c,d) \
1285 if (test_mode == PCRE8_MODE) \
1286 a = pcre2_pattern_info_8(G(b,8),c,d); \
1287 else if (test_mode == PCRE16_MODE) \
1288 a = pcre2_pattern_info_16(G(b,16),c,d); \
1289 else \
1290 a = pcre2_pattern_info_32(G(b,32),c,d)
1291
1292 #define PCRE2_PRINTINT(a) \
1293 if (test_mode == PCRE8_MODE) \
1294 pcre2_printint_8(compiled_code8,outfile,a); \
1295 else if (test_mode == PCRE16_MODE) \
1296 pcre2_printint_16(compiled_code16,outfile,a); \
1297 else \
1298 pcre2_printint_32(compiled_code32,outfile,a)
1299
1300 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
1301 if (test_mode == PCRE8_MODE) \
1302 r = pcre2_serialize_decode_8((pcre2_code_8 **)a,b,c,G(d,8)); \
1303 else if (test_mode == PCRE16_MODE) \
1304 r = pcre2_serialize_decode_16((pcre2_code_16 **)a,b,c,G(d,16)); \
1305 else \
1306 r = pcre2_serialize_decode_32((pcre2_code_32 **)a,b,c,G(d,32))
1307
1308 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
1309 if (test_mode == PCRE8_MODE) \
1310 r = pcre2_serialize_encode_8((const pcre2_code_8 **)a,b,c,d,G(e,8)); \
1311 else if (test_mode == PCRE16_MODE) \
1312 r = pcre2_serialize_encode_16((const pcre2_code_16 **)a,b,c,d,G(e,16)); \
1313 else \
1314 r = pcre2_serialize_encode_32((const pcre2_code_32 **)a,b,c,d,G(e,32))
1315
1316 #define PCRE2_SERIALIZE_FREE(a) \
1317 if (test_mode == PCRE8_MODE) \
1318 pcre2_serialize_free_8(a); \
1319 else if (test_mode == PCRE16_MODE) \
1320 pcre2_serialize_free_16(a); \
1321 else \
1322 pcre2_serialize_free_32(a)
1323
1324 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
1325 if (test_mode == PCRE8_MODE) \
1326 r = pcre2_serialize_get_number_of_codes_8(a); \
1327 else if (test_mode == PCRE16_MODE) \
1328 r = pcre2_serialize_get_number_of_codes_16(a); \
1329 else \
1330 r = pcre2_serialize_get_number_of_codes_32(a); \
1331
1332 #define PCRE2_SET_CALLOUT(a,b,c) \
1333 if (test_mode == PCRE8_MODE) \
1334 pcre2_set_callout_8(G(a,8),(int (*)(pcre2_callout_block_8 *, void *))b,c); \
1335 else if (test_mode == PCRE16_MODE) \
1336 pcre2_set_callout_16(G(a,16),(int (*)(pcre2_callout_block_16 *, void *))b,c); \
1337 else \
1338 pcre2_set_callout_32(G(a,32),(int (*)(pcre2_callout_block_32 *, void *))b,c);
1339
1340 #define PCRE2_SET_CHARACTER_TABLES(a,b) \
1341 if (test_mode == PCRE8_MODE) \
1342 pcre2_set_character_tables_8(G(a,8),b); \
1343 else if (test_mode == PCRE16_MODE) \
1344 pcre2_set_character_tables_16(G(a,16),b); \
1345 else \
1346 pcre2_set_character_tables_32(G(a,32),b)
1347
1348 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
1349 if (test_mode == PCRE8_MODE) \
1350 pcre2_set_compile_recursion_guard_8(G(a,8),b,c); \
1351 else if (test_mode == PCRE16_MODE) \
1352 pcre2_set_compile_recursion_guard_16(G(a,16),b,c); \
1353 else \
1354 pcre2_set_compile_recursion_guard_32(G(a,32),b,c)
1355
1356 #define PCRE2_SET_DEPTH_LIMIT(a,b) \
1357 if (test_mode == PCRE8_MODE) \
1358 pcre2_set_depth_limit_8(G(a,8),b); \
1359 else if (test_mode == PCRE16_MODE) \
1360 pcre2_set_depth_limit_16(G(a,16),b); \
1361 else \
1362 pcre2_set_depth_limit_32(G(a,32),b)
1363
1364 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) \
1365 if (test_mode == PCRE8_MODE) \
1366 r = pcre2_set_glob_separator_8(G(a,8),b); \
1367 else if (test_mode == PCRE16_MODE) \
1368 r = pcre2_set_glob_separator_16(G(a,16),b); \
1369 else \
1370 r = pcre2_set_glob_separator_32(G(a,32),b)
1371
1372 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) \
1373 if (test_mode == PCRE8_MODE) \
1374 r = pcre2_set_glob_escape_8(G(a,8),b); \
1375 else if (test_mode == PCRE16_MODE) \
1376 r = pcre2_set_glob_escape_16(G(a,16),b); \
1377 else \
1378 r = pcre2_set_glob_escape_32(G(a,32),b)
1379
1380 #define PCRE2_SET_HEAP_LIMIT(a,b) \
1381 if (test_mode == PCRE8_MODE) \
1382 pcre2_set_heap_limit_8(G(a,8),b); \
1383 else if (test_mode == PCRE16_MODE) \
1384 pcre2_set_heap_limit_16(G(a,16),b); \
1385 else \
1386 pcre2_set_heap_limit_32(G(a,32),b)
1387
1388 #define PCRE2_SET_MATCH_LIMIT(a,b) \
1389 if (test_mode == PCRE8_MODE) \
1390 pcre2_set_match_limit_8(G(a,8),b); \
1391 else if (test_mode == PCRE16_MODE) \
1392 pcre2_set_match_limit_16(G(a,16),b); \
1393 else \
1394 pcre2_set_match_limit_32(G(a,32),b)
1395
1396 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) \
1397 if (test_mode == PCRE8_MODE) \
1398 pcre2_set_max_pattern_length_8(G(a,8),b); \
1399 else if (test_mode == PCRE16_MODE) \
1400 pcre2_set_max_pattern_length_16(G(a,16),b); \
1401 else \
1402 pcre2_set_max_pattern_length_32(G(a,32),b)
1403
1404 #define PCRE2_SET_OFFSET_LIMIT(a,b) \
1405 if (test_mode == PCRE8_MODE) \
1406 pcre2_set_offset_limit_8(G(a,8),b); \
1407 else if (test_mode == PCRE16_MODE) \
1408 pcre2_set_offset_limit_16(G(a,16),b); \
1409 else \
1410 pcre2_set_offset_limit_32(G(a,32),b)
1411
1412 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) \
1413 if (test_mode == PCRE8_MODE) \
1414 pcre2_set_parens_nest_limit_8(G(a,8),b); \
1415 else if (test_mode == PCRE16_MODE) \
1416 pcre2_set_parens_nest_limit_16(G(a,16),b); \
1417 else \
1418 pcre2_set_parens_nest_limit_32(G(a,32),b)
1419
1420 #define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
1421 if (test_mode == PCRE8_MODE) \
1422 pcre2_set_substitute_callout_8(G(a,8), \
1423 (int (*)(pcre2_substitute_callout_block_8 *, void *))b,c); \
1424 else if (test_mode == PCRE16_MODE) \
1425 pcre2_set_substitute_callout_16(G(a,16), \
1426 (int (*)(pcre2_substitute_callout_block_16 *, void *))b,c); \
1427 else \
1428 pcre2_set_substitute_callout_32(G(a,32), \
1429 (int (*)(pcre2_substitute_callout_block_32 *, void *))b,c)
1430
1431 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
1432 if (test_mode == PCRE8_MODE) \
1433 a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h, \
1434 (PCRE2_SPTR8)i,j,(PCRE2_UCHAR8 *)k,l); \
1435 else if (test_mode == PCRE16_MODE) \
1436 a = pcre2_substitute_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h, \
1437 (PCRE2_SPTR16)i,j,(PCRE2_UCHAR16 *)k,l); \
1438 else \
1439 a = pcre2_substitute_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h, \
1440 (PCRE2_SPTR32)i,j,(PCRE2_UCHAR32 *)k,l)
1441
1442 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
1443 if (test_mode == PCRE8_MODE) \
1444 a = pcre2_substring_copy_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 *)d,e); \
1445 else if (test_mode == PCRE16_MODE) \
1446 a = pcre2_substring_copy_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 *)d,e); \
1447 else \
1448 a = pcre2_substring_copy_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 *)d,e)
1449
1450 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
1451 if (test_mode == PCRE8_MODE) \
1452 a = pcre2_substring_copy_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 *)d,e); \
1453 else if (test_mode == PCRE16_MODE) \
1454 a = pcre2_substring_copy_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 *)d,e); \
1455 else \
1456 a = pcre2_substring_copy_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 *)d,e)
1457
1458 #define PCRE2_SUBSTRING_FREE(a) \
1459 if (test_mode == PCRE8_MODE) pcre2_substring_free_8((PCRE2_UCHAR8 *)a); \
1460 else if (test_mode == PCRE16_MODE) \
1461 pcre2_substring_free_16((PCRE2_UCHAR16 *)a); \
1462 else pcre2_substring_free_32((PCRE2_UCHAR32 *)a)
1463
1464 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
1465 if (test_mode == PCRE8_MODE) \
1466 a = pcre2_substring_get_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 **)d,e); \
1467 else if (test_mode == PCRE16_MODE) \
1468 a = pcre2_substring_get_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 **)d,e); \
1469 else \
1470 a = pcre2_substring_get_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 **)d,e)
1471
1472 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
1473 if (test_mode == PCRE8_MODE) \
1474 a = pcre2_substring_get_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 **)d,e); \
1475 else if (test_mode == PCRE16_MODE) \
1476 a = pcre2_substring_get_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 **)d,e); \
1477 else \
1478 a = pcre2_substring_get_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 **)d,e)
1479
1480 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
1481 if (test_mode == PCRE8_MODE) \
1482 a = pcre2_substring_length_byname_8(G(b,8),G(c,8),d); \
1483 else if (test_mode == PCRE16_MODE) \
1484 a = pcre2_substring_length_byname_16(G(b,16),G(c,16),d); \
1485 else \
1486 a = pcre2_substring_length_byname_32(G(b,32),G(c,32),d)
1487
1488 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
1489 if (test_mode == PCRE8_MODE) \
1490 a = pcre2_substring_length_bynumber_8(G(b,8),c,d); \
1491 else if (test_mode == PCRE16_MODE) \
1492 a = pcre2_substring_length_bynumber_16(G(b,16),c,d); \
1493 else \
1494 a = pcre2_substring_length_bynumber_32(G(b,32),c,d)
1495
1496 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
1497 if (test_mode == PCRE8_MODE) \
1498 a = pcre2_substring_list_get_8(G(b,8),(PCRE2_UCHAR8 ***)c,d); \
1499 else if (test_mode == PCRE16_MODE) \
1500 a = pcre2_substring_list_get_16(G(b,16),(PCRE2_UCHAR16 ***)c,d); \
1501 else \
1502 a = pcre2_substring_list_get_32(G(b,32),(PCRE2_UCHAR32 ***)c,d)
1503
1504 #define PCRE2_SUBSTRING_LIST_FREE(a) \
1505 if (test_mode == PCRE8_MODE) \
1506 pcre2_substring_list_free_8((PCRE2_SPTR8 *)a); \
1507 else if (test_mode == PCRE16_MODE) \
1508 pcre2_substring_list_free_16((PCRE2_SPTR16 *)a); \
1509 else \
1510 pcre2_substring_list_free_32((PCRE2_SPTR32 *)a)
1511
1512 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
1513 if (test_mode == PCRE8_MODE) \
1514 a = pcre2_substring_number_from_name_8(G(b,8),G(c,8)); \
1515 else if (test_mode == PCRE16_MODE) \
1516 a = pcre2_substring_number_from_name_16(G(b,16),G(c,16)); \
1517 else \
1518 a = pcre2_substring_number_from_name_32(G(b,32),G(c,32))
1519
1520 #define PTR(x) ( \
1521 (test_mode == PCRE8_MODE)? (void *)G(x,8) : \
1522 (test_mode == PCRE16_MODE)? (void *)G(x,16) : \
1523 (void *)G(x,32))
1524
1525 #define SETFLD(x,y,z) \
1526 if (test_mode == PCRE8_MODE) G(x,8)->y = z; \
1527 else if (test_mode == PCRE16_MODE) G(x,16)->y = z; \
1528 else G(x,32)->y = z
1529
1530 #define SETFLDVEC(x,y,v,z) \
1531 if (test_mode == PCRE8_MODE) G(x,8)->y[v] = z; \
1532 else if (test_mode == PCRE16_MODE) G(x,16)->y[v] = z; \
1533 else G(x,32)->y[v] = z
1534
1535 #define SETOP(x,y,z) \
1536 if (test_mode == PCRE8_MODE) G(x,8) z y; \
1537 else if (test_mode == PCRE16_MODE) G(x,16) z y; \
1538 else G(x,32) z y
1539
1540 #define SETCASTPTR(x,y) \
1541 if (test_mode == PCRE8_MODE) \
1542 G(x,8) = (uint8_t *)(y); \
1543 else if (test_mode == PCRE16_MODE) \
1544 G(x,16) = (uint16_t *)(y); \
1545 else \
1546 G(x,32) = (uint32_t *)(y)
1547
1548 #define STRLEN(p) ((test_mode == PCRE8_MODE)? ((int)strlen((char *)p)) : \
1549 (test_mode == PCRE16_MODE)? ((int)strlen16((PCRE2_SPTR16)p)) : \
1550 ((int)strlen32((PCRE2_SPTR32)p)))
1551
1552 #define SUB1(a,b) \
1553 if (test_mode == PCRE8_MODE) G(a,8)(G(b,8)); \
1554 else if (test_mode == PCRE16_MODE) G(a,16)(G(b,16)); \
1555 else G(a,32)(G(b,32))
1556
1557 #define SUB2(a,b,c) \
1558 if (test_mode == PCRE8_MODE) G(a,8)(G(b,8),G(c,8)); \
1559 else if (test_mode == PCRE16_MODE) G(a,16)(G(b,16),G(c,16)); \
1560 else G(a,32)(G(b,32),G(c,32))
1561
1562 #define TEST(x,r,y) ( \
1563 (test_mode == PCRE8_MODE && G(x,8) r (y)) || \
1564 (test_mode == PCRE16_MODE && G(x,16) r (y)) || \
1565 (test_mode == PCRE32_MODE && G(x,32) r (y)))
1566
1567 #define TESTFLD(x,f,r,y) ( \
1568 (test_mode == PCRE8_MODE && G(x,8)->f r (y)) || \
1569 (test_mode == PCRE16_MODE && G(x,16)->f r (y)) || \
1570 (test_mode == PCRE32_MODE && G(x,32)->f r (y)))
1571
1572
1573 /* ----- Two out of three modes are supported ----- */
1574
1575 #else
1576
1577 /* We can use some macro trickery to make a single set of definitions work in
1578 the three different cases. */
1579
1580 /* ----- 32-bit and 16-bit but not 8-bit supported ----- */
1581
1582 #if defined(SUPPORT_PCRE2_32) && defined(SUPPORT_PCRE2_16)
1583 #define BITONE 32
1584 #define BITTWO 16
1585
1586 /* ----- 32-bit and 8-bit but not 16-bit supported ----- */
1587
1588 #elif defined(SUPPORT_PCRE2_32) && defined(SUPPORT_PCRE2_8)
1589 #define BITONE 32
1590 #define BITTWO 8
1591
1592 /* ----- 16-bit and 8-bit but not 32-bit supported ----- */
1593
1594 #else
1595 #define BITONE 16
1596 #define BITTWO 8
1597 #endif
1598
1599
1600 /* ----- Common macros for two-mode cases ----- */
1601
1602 #define BYTEONE (BITONE/8)
1603 #define BYTETWO (BITTWO/8)
1604
1605 #define CASTFLD(t,a,b) \
1606 ((test_mode == G(G(PCRE,BITONE),_MODE))? (t)(G(a,BITONE)->b) : \
1607 (t)(G(a,BITTWO)->b))
1608
1609 #define CASTVAR(t,x) ( \
1610 (test_mode == G(G(PCRE,BITONE),_MODE))? \
1611 (t)G(x,BITONE) : (t)G(x,BITTWO))
1612
1613 #define CODE_UNIT(a,b) ( \
1614 (test_mode == G(G(PCRE,BITONE),_MODE))? \
1615 (uint32_t)(((G(PCRE2_SPTR,BITONE))(a))[b]) : \
1616 (uint32_t)(((G(PCRE2_SPTR,BITTWO))(a))[b]))
1617
1618 #define CONCTXCPY(a,b) \
1619 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1620 memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_convert_context_,BITONE))); \
1621 else \
1622 memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_convert_context_,BITTWO)))
1623
1624 #define CONVERT_COPY(a,b,c) \
1625 (test_mode == G(G(PCRE,BITONE),_MODE))? \
1626 memcpy(G(a,BITONE),(char *)b,(c)*BYTEONE) : \
1627 memcpy(G(a,BITTWO),(char *)b,(c)*BYTETWO)
1628
1629 #define DATCTXCPY(a,b) \
1630 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1631 memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_match_context_,BITONE))); \
1632 else \
1633 memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_match_context_,BITTWO)))
1634
1635 #define FLD(a,b) \
1636 ((test_mode == G(G(PCRE,BITONE),_MODE))? G(a,BITONE)->b : G(a,BITTWO)->b)
1637
1638 #define PATCTXCPY(a,b) \
1639 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1640 memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_compile_context_,BITONE))); \
1641 else \
1642 memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_compile_context_,BITTWO)))
1643
1644 #define PCHARS(lv, p, offset, len, utf, f) \
1645 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1646 lv = G(pchars,BITONE)((G(PCRE2_SPTR,BITONE))(p)+offset, len, utf, f); \
1647 else \
1648 lv = G(pchars,BITTWO)((G(PCRE2_SPTR,BITTWO))(p)+offset, len, utf, f)
1649
1650 #define PCHARSV(p, offset, len, utf, f) \
1651 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1652 (void)G(pchars,BITONE)((G(PCRE2_SPTR,BITONE))(p)+offset, len, utf, f); \
1653 else \
1654 (void)G(pchars,BITTWO)((G(PCRE2_SPTR,BITTWO))(p)+offset, len, utf, f)
1655
1656 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
1657 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1658 a = G(pcre2_callout_enumerate,BITONE)(G(compiled_code,BITONE), \
1659 (int (*)(struct G(pcre2_callout_enumerate_block_,BITONE) *, void *))b,c); \
1660 else \
1661 a = G(pcre2_callout_enumerate,BITTWO)(G(compiled_code,BITTWO), \
1662 (int (*)(struct G(pcre2_callout_enumerate_block_,BITTWO) *, void *))b,c)
1663
1664 #define PCRE2_CODE_COPY_FROM_VOID(a,b) \
1665 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1666 G(a,BITONE) = G(pcre2_code_copy_,BITONE)(b); \
1667 else \
1668 G(a,BITTWO) = G(pcre2_code_copy_,BITTWO)(b)
1669
1670 #define PCRE2_CODE_COPY_TO_VOID(a,b) \
1671 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1672 a = (void *)G(pcre2_code_copy_,BITONE)(G(b,BITONE)); \
1673 else \
1674 a = (void *)G(pcre2_code_copy_,BITTWO)(G(b,BITTWO))
1675
1676 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) \
1677 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1678 a = (void *)G(pcre2_code_copy_with_tables_,BITONE)(G(b,BITONE)); \
1679 else \
1680 a = (void *)G(pcre2_code_copy_with_tables_,BITTWO)(G(b,BITTWO))
1681
1682 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
1683 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1684 G(a,BITONE) = G(pcre2_compile_,BITONE)(G(b,BITONE),c,d,e,f,g); \
1685 else \
1686 G(a,BITTWO) = G(pcre2_compile_,BITTWO)(G(b,BITTWO),c,d,e,f,g)
1687
1688 #define PCRE2_CONVERTED_PATTERN_FREE(a) \
1689 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1690 G(pcre2_converted_pattern_free_,BITONE)((G(PCRE2_UCHAR,BITONE) *)a); \
1691 else \
1692 G(pcre2_converted_pattern_free_,BITTWO)((G(PCRE2_UCHAR,BITTWO) *)a)
1693
1694 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
1695 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1696 a = G(pcre2_dfa_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1697 G(g,BITONE),h,i,j); \
1698 else \
1699 a = G(pcre2_dfa_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1700 G(g,BITTWO),h,i,j)
1701
1702 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
1703 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1704 r = G(pcre2_get_error_message_,BITONE)(a,G(b,BITONE),G(G(b,BITONE),_size/BYTEONE)); \
1705 else \
1706 r = G(pcre2_get_error_message_,BITTWO)(a,G(b,BITTWO),G(G(b,BITTWO),_size/BYTETWO))
1707
1708 #define PCRE2_GET_OVECTOR_COUNT(a,b) \
1709 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1710 a = G(pcre2_get_ovector_count_,BITONE)(G(b,BITONE)); \
1711 else \
1712 a = G(pcre2_get_ovector_count_,BITTWO)(G(b,BITTWO))
1713
1714 #define PCRE2_GET_STARTCHAR(a,b) \
1715 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1716 a = G(pcre2_get_startchar_,BITONE)(G(b,BITONE)); \
1717 else \
1718 a = G(pcre2_get_startchar_,BITTWO)(G(b,BITTWO))
1719
1720 #define PCRE2_JIT_COMPILE(r,a,b) \
1721 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1722 r = G(pcre2_jit_compile_,BITONE)(G(a,BITONE),b); \
1723 else \
1724 r = G(pcre2_jit_compile_,BITTWO)(G(a,BITTWO),b)
1725
1726 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) \
1727 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1728 G(pcre2_jit_free_unused_memory_,BITONE)(G(a,BITONE)); \
1729 else \
1730 G(pcre2_jit_free_unused_memory_,BITTWO)(G(a,BITTWO))
1731
1732 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
1733 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1734 a = G(pcre2_jit_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1735 G(g,BITONE),h); \
1736 else \
1737 a = G(pcre2_jit_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1738 G(g,BITTWO),h)
1739
1740 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
1741 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1742 a = (PCRE2_JIT_STACK *)G(pcre2_jit_stack_create_,BITONE)(b,c,d); \
1743 else \
1744 a = (PCRE2_JIT_STACK *)G(pcre2_jit_stack_create_,BITTWO)(b,c,d); \
1745
1746 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
1747 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1748 G(pcre2_jit_stack_assign_,BITONE)(G(a,BITONE),(G(pcre2_jit_callback_,BITONE))b,c); \
1749 else \
1750 G(pcre2_jit_stack_assign_,BITTWO)(G(a,BITTWO),(G(pcre2_jit_callback_,BITTWO))b,c);
1751
1752 #define PCRE2_JIT_STACK_FREE(a) \
1753 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1754 G(pcre2_jit_stack_free_,BITONE)((G(pcre2_jit_stack_,BITONE) *)a); \
1755 else \
1756 G(pcre2_jit_stack_free_,BITTWO)((G(pcre2_jit_stack_,BITTWO) *)a);
1757
1758 #define PCRE2_MAKETABLES(a,c) \
1759 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1760 a = G(pcre2_maketables_,BITONE)(G(c,BITONE)); \
1761 else \
1762 a = G(pcre2_maketables_,BITTWO)(G(c,BITTWO))
1763
1764 #define PCRE2_MAKETABLES_FREE(c,a) \
1765 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1766 G(pcre2_maketables_free_,BITONE)(G(c,BITONE),a); \
1767 else \
1768 G(pcre2_maketables_free_,BITTWO)(G(c,BITTWO),a)
1769
1770 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
1771 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1772 a = G(pcre2_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1773 G(g,BITONE),h); \
1774 else \
1775 a = G(pcre2_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1776 G(g,BITTWO),h)
1777
1778 #define PCRE2_MATCH_DATA_CREATE(a,b,c) \
1779 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1780 G(a,BITONE) = G(pcre2_match_data_create_,BITONE)(b,G(c,BITONE)); \
1781 else \
1782 G(a,BITTWO) = G(pcre2_match_data_create_,BITTWO)(b,G(c,BITTWO))
1783
1784 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
1785 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1786 G(a,BITONE) = G(pcre2_match_data_create_from_pattern_,BITONE)(G(b,BITONE),G(c,BITONE)); \
1787 else \
1788 G(a,BITTWO) = G(pcre2_match_data_create_from_pattern_,BITTWO)(G(b,BITTWO),G(c,BITTWO))
1789
1790 #define PCRE2_MATCH_DATA_FREE(a) \
1791 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1792 G(pcre2_match_data_free_,BITONE)(G(a,BITONE)); \
1793 else \
1794 G(pcre2_match_data_free_,BITTWO)(G(a,BITTWO))
1795
1796 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) \
1797 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1798 a = G(pcre2_pattern_convert_,BITONE)(G(b,BITONE),c,d,(G(PCRE2_UCHAR,BITONE) **)e,f,G(g,BITONE)); \
1799 else \
1800 a = G(pcre2_pattern_convert_,BITTWO)(G(b,BITTWO),c,d,(G(PCRE2_UCHAR,BITTWO) **)e,f,G(g,BITTWO))
1801
1802 #define PCRE2_PATTERN_INFO(a,b,c,d) \
1803 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1804 a = G(pcre2_pattern_info_,BITONE)(G(b,BITONE),c,d); \
1805 else \
1806 a = G(pcre2_pattern_info_,BITTWO)(G(b,BITTWO),c,d)
1807
1808 #define PCRE2_PRINTINT(a) \
1809 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1810 G(pcre2_printint_,BITONE)(G(compiled_code,BITONE),outfile,a); \
1811 else \
1812 G(pcre2_printint_,BITTWO)(G(compiled_code,BITTWO),outfile,a)
1813
1814 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
1815 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1816 r = G(pcre2_serialize_decode_,BITONE)((G(pcre2_code_,BITONE) **)a,b,c,G(d,BITONE)); \
1817 else \
1818 r = G(pcre2_serialize_decode_,BITTWO)((G(pcre2_code_,BITTWO) **)a,b,c,G(d,BITTWO))
1819
1820 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
1821 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1822 r = G(pcre2_serialize_encode_,BITONE)((G(const pcre2_code_,BITONE) **)a,b,c,d,G(e,BITONE)); \
1823 else \
1824 r = G(pcre2_serialize_encode_,BITTWO)((G(const pcre2_code_,BITTWO) **)a,b,c,d,G(e,BITTWO))
1825
1826 #define PCRE2_SERIALIZE_FREE(a) \
1827 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1828 G(pcre2_serialize_free_,BITONE)(a); \
1829 else \
1830 G(pcre2_serialize_free_,BITTWO)(a)
1831
1832 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
1833 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1834 r = G(pcre2_serialize_get_number_of_codes_,BITONE)(a); \
1835 else \
1836 r = G(pcre2_serialize_get_number_of_codes_,BITTWO)(a)
1837
1838 #define PCRE2_SET_CALLOUT(a,b,c) \
1839 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1840 G(pcre2_set_callout_,BITONE)(G(a,BITONE), \
1841 (int (*)(G(pcre2_callout_block_,BITONE) *, void *))b,c); \
1842 else \
1843 G(pcre2_set_callout_,BITTWO)(G(a,BITTWO), \
1844 (int (*)(G(pcre2_callout_block_,BITTWO) *, void *))b,c);
1845
1846 #define PCRE2_SET_CHARACTER_TABLES(a,b) \
1847 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1848 G(pcre2_set_character_tables_,BITONE)(G(a,BITONE),b); \
1849 else \
1850 G(pcre2_set_character_tables_,BITTWO)(G(a,BITTWO),b)
1851
1852 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
1853 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1854 G(pcre2_set_compile_recursion_guard_,BITONE)(G(a,BITONE),b,c); \
1855 else \
1856 G(pcre2_set_compile_recursion_guard_,BITTWO)(G(a,BITTWO),b,c)
1857
1858 #define PCRE2_SET_DEPTH_LIMIT(a,b) \
1859 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1860 G(pcre2_set_depth_limit_,BITONE)(G(a,BITONE),b); \
1861 else \
1862 G(pcre2_set_depth_limit_,BITTWO)(G(a,BITTWO),b)
1863
1864 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) \
1865 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1866 r = G(pcre2_set_glob_escape_,BITONE)(G(a,BITONE),b); \
1867 else \
1868 r = G(pcre2_set_glob_escape_,BITTWO)(G(a,BITTWO),b)
1869
1870 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) \
1871 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1872 r = G(pcre2_set_glob_separator_,BITONE)(G(a,BITONE),b); \
1873 else \
1874 r = G(pcre2_set_glob_separator_,BITTWO)(G(a,BITTWO),b)
1875
1876 #define PCRE2_SET_HEAP_LIMIT(a,b) \
1877 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1878 G(pcre2_set_heap_limit_,BITONE)(G(a,BITONE),b); \
1879 else \
1880 G(pcre2_set_heap_limit_,BITTWO)(G(a,BITTWO),b)
1881
1882 #define PCRE2_SET_MATCH_LIMIT(a,b) \
1883 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1884 G(pcre2_set_match_limit_,BITONE)(G(a,BITONE),b); \
1885 else \
1886 G(pcre2_set_match_limit_,BITTWO)(G(a,BITTWO),b)
1887
1888 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) \
1889 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1890 G(pcre2_set_max_pattern_length_,BITONE)(G(a,BITONE),b); \
1891 else \
1892 G(pcre2_set_max_pattern_length_,BITTWO)(G(a,BITTWO),b)
1893
1894 #define PCRE2_SET_OFFSET_LIMIT(a,b) \
1895 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1896 G(pcre2_set_offset_limit_,BITONE)(G(a,BITONE),b); \
1897 else \
1898 G(pcre2_set_offset_limit_,BITTWO)(G(a,BITTWO),b)
1899
1900 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) \
1901 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1902 G(pcre2_set_parens_nest_limit_,BITONE)(G(a,BITONE),b); \
1903 else \
1904 G(pcre2_set_parens_nest_limit_,BITTWO)(G(a,BITTWO),b)
1905
1906 #define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
1907 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1908 G(pcre2_set_substitute_callout_,BITONE)(G(a,BITONE), \
1909 (int (*)(G(pcre2_substitute_callout_block_,BITONE) *, void *))b,c); \
1910 else \
1911 G(pcre2_set_substitute_callout_,BITTWO)(G(a,BITTWO), \
1912 (int (*)(G(pcre2_substitute_callout_block_,BITTWO) *, void *))b,c)
1913
1914 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
1915 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1916 a = G(pcre2_substitute_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1917 G(g,BITONE),h,(G(PCRE2_SPTR,BITONE))i,j, \
1918 (G(PCRE2_UCHAR,BITONE) *)k,l); \
1919 else \
1920 a = G(pcre2_substitute_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1921 G(g,BITTWO),h,(G(PCRE2_SPTR,BITTWO))i,j, \
1922 (G(PCRE2_UCHAR,BITTWO) *)k,l)
1923
1924 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
1925 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1926 a = G(pcre2_substring_copy_byname_,BITONE)(G(b,BITONE),G(c,BITONE),\
1927 (G(PCRE2_UCHAR,BITONE) *)d,e); \
1928 else \
1929 a = G(pcre2_substring_copy_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),\
1930 (G(PCRE2_UCHAR,BITTWO) *)d,e)
1931
1932 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
1933 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1934 a = G(pcre2_substring_copy_bynumber_,BITONE)(G(b,BITONE),c,\
1935 (G(PCRE2_UCHAR,BITONE) *)d,e); \
1936 else \
1937 a = G(pcre2_substring_copy_bynumber_,BITTWO)(G(b,BITTWO),c,\
1938 (G(PCRE2_UCHAR,BITTWO) *)d,e)
1939
1940 #define PCRE2_SUBSTRING_FREE(a) \
1941 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1942 G(pcre2_substring_free_,BITONE)((G(PCRE2_UCHAR,BITONE) *)a); \
1943 else G(pcre2_substring_free_,BITTWO)((G(PCRE2_UCHAR,BITTWO) *)a)
1944
1945 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
1946 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1947 a = G(pcre2_substring_get_byname_,BITONE)(G(b,BITONE),G(c,BITONE),\
1948 (G(PCRE2_UCHAR,BITONE) **)d,e); \
1949 else \
1950 a = G(pcre2_substring_get_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),\
1951 (G(PCRE2_UCHAR,BITTWO) **)d,e)
1952
1953 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
1954 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1955 a = G(pcre2_substring_get_bynumber_,BITONE)(G(b,BITONE),c,\
1956 (G(PCRE2_UCHAR,BITONE) **)d,e); \
1957 else \
1958 a = G(pcre2_substring_get_bynumber_,BITTWO)(G(b,BITTWO),c,\
1959 (G(PCRE2_UCHAR,BITTWO) **)d,e)
1960
1961 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
1962 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1963 a = G(pcre2_substring_length_byname_,BITONE)(G(b,BITONE),G(c,BITONE),d); \
1964 else \
1965 a = G(pcre2_substring_length_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),d)
1966
1967 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
1968 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1969 a = G(pcre2_substring_length_bynumber_,BITONE)(G(b,BITONE),c,d); \
1970 else \
1971 a = G(pcre2_substring_length_bynumber_,BITTWO)(G(b,BITTWO),c,d)
1972
1973 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
1974 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1975 a = G(pcre2_substring_list_get_,BITONE)(G(b,BITONE), \
1976 (G(PCRE2_UCHAR,BITONE) ***)c,d); \
1977 else \
1978 a = G(pcre2_substring_list_get_,BITTWO)(G(b,BITTWO), \
1979 (G(PCRE2_UCHAR,BITTWO) ***)c,d)
1980
1981 #define PCRE2_SUBSTRING_LIST_FREE(a) \
1982 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1983 G(pcre2_substring_list_free_,BITONE)((G(PCRE2_SPTR,BITONE) *)a); \
1984 else \
1985 G(pcre2_substring_list_free_,BITTWO)((G(PCRE2_SPTR,BITTWO) *)a)
1986
1987 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
1988 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1989 a = G(pcre2_substring_number_from_name_,BITONE)(G(b,BITONE),G(c,BITONE)); \
1990 else \
1991 a = G(pcre2_substring_number_from_name_,BITTWO)(G(b,BITTWO),G(c,BITTWO))
1992
1993 #define PTR(x) ( \
1994 (test_mode == G(G(PCRE,BITONE),_MODE))? (void *)G(x,BITONE) : \
1995 (void *)G(x,BITTWO))
1996
1997 #define SETFLD(x,y,z) \
1998 if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE)->y = z; \
1999 else G(x,BITTWO)->y = z
2000
2001 #define SETFLDVEC(x,y,v,z) \
2002 if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE)->y[v] = z; \
2003 else G(x,BITTWO)->y[v] = z
2004
2005 #define SETOP(x,y,z) \
2006 if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE) z y; \
2007 else G(x,BITTWO) z y
2008
2009 #define SETCASTPTR(x,y) \
2010 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
2011 G(x,BITONE) = (G(G(uint,BITONE),_t) *)(y); \
2012 else \
2013 G(x,BITTWO) = (G(G(uint,BITTWO),_t) *)(y)
2014
2015 #define STRLEN(p) ((test_mode == G(G(PCRE,BITONE),_MODE))? \
2016 G(strlen,BITONE)((G(PCRE2_SPTR,BITONE))p) : \
2017 G(strlen,BITTWO)((G(PCRE2_SPTR,BITTWO))p))
2018
2019 #define SUB1(a,b) \
2020 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
2021 G(a,BITONE)(G(b,BITONE)); \
2022 else \
2023 G(a,BITTWO)(G(b,BITTWO))
2024
2025 #define SUB2(a,b,c) \
2026 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
2027 G(a,BITONE))(G(b,BITONE),G(c,BITONE)); \
2028 else \
2029 G(a,BITTWO))(G(b,BITTWO),G(c,BITTWO))
2030
2031 #define TEST(x,r,y) ( \
2032 (test_mode == G(G(PCRE,BITONE),_MODE) && G(x,BITONE) r (y)) || \
2033 (test_mode == G(G(PCRE,BITTWO),_MODE) && G(x,BITTWO) r (y)))
2034
2035 #define TESTFLD(x,f,r,y) ( \
2036 (test_mode == G(G(PCRE,BITONE),_MODE) && G(x,BITONE)->f r (y)) || \
2037 (test_mode == G(G(PCRE,BITTWO),_MODE) && G(x,BITTWO)->f r (y)))
2038
2039
2040 #endif /* Two out of three modes */
2041
2042 /* ----- End of cases where more than one mode is supported ----- */
2043
2044
2045 /* ----- Only 8-bit mode is supported ----- */
2046
2047 #elif defined SUPPORT_PCRE2_8
2048 #define CASTFLD(t,a,b) (t)(G(a,8)->b)
2049 #define CASTVAR(t,x) (t)G(x,8)
2050 #define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR8)(a))[b])
2051 #define CONCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_convert_context_8))
2052 #define CONVERT_COPY(a,b,c) memcpy(G(a,8),(char *)b, c)
2053 #define DATCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_match_context_8))
2054 #define FLD(a,b) G(a,8)->b
2055 #define PATCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_compile_context_8))
2056 #define PCHARS(lv, p, offset, len, utf, f) \
2057 lv = pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
2058 #define PCHARSV(p, offset, len, utf, f) \
2059 (void)pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
2060 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
2061 a = pcre2_callout_enumerate_8(compiled_code8, \
2062 (int (*)(struct pcre2_callout_enumerate_block_8 *, void *))b,c)
2063 #define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,8) = pcre2_code_copy_8(b)
2064 #define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_8(G(b,8))
2065 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_8(G(b,8))
2066 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
2067 G(a,8) = pcre2_compile_8(G(b,8),c,d,e,f,g)
2068 #define PCRE2_CONVERTED_PATTERN_FREE(a) \
2069 pcre2_converted_pattern_free_8((PCRE2_UCHAR8 *)a)
2070 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
2071 a = pcre2_dfa_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h,i,j)
2072 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
2073 r = pcre2_get_error_message_8(a,G(b,8),G(G(b,8),_size))
2074 #define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_8(G(b,8))
2075 #define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_8(G(b,8))
2076 #define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_8(G(a,8),b)
2077 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_8(G(a,8))
2078 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
2079 a = pcre2_jit_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h)
2080 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
2081 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_8(b,c,d);
2082 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
2083 pcre2_jit_stack_assign_8(G(a,8),(pcre2_jit_callback_8)b,c);
2084 #define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_8((pcre2_jit_stack_8 *)a);
2085 #define PCRE2_MAKETABLES(a,c) a = pcre2_maketables_8(G(c,8))
2086 #define PCRE2_MAKETABLES_FREE(c,a) pcre2_maketables_free_8(G(c,8),a)
2087 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
2088 a = pcre2_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h)
2089 #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,8) = pcre2_match_data_create_8(b,G(c,8))
2090 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
2091 G(a,8) = pcre2_match_data_create_from_pattern_8(G(b,8),G(c,8))
2092 #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_8(G(a,8))
2093 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) a = pcre2_pattern_convert_8(G(b,8),c,d,(PCRE2_UCHAR8 **)e,f,G(g,8))
2094 #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_8(G(b,8),c,d)
2095 #define PCRE2_PRINTINT(a) pcre2_printint_8(compiled_code8,outfile,a)
2096 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
2097 r = pcre2_serialize_decode_8((pcre2_code_8 **)a,b,c,G(d,8))
2098 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
2099 r = pcre2_serialize_encode_8((const pcre2_code_8 **)a,b,c,d,G(e,8))
2100 #define PCRE2_SERIALIZE_FREE(a) pcre2_serialize_free_8(a)
2101 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
2102 r = pcre2_serialize_get_number_of_codes_8(a)
2103 #define PCRE2_SET_CALLOUT(a,b,c) \
2104 pcre2_set_callout_8(G(a,8),(int (*)(pcre2_callout_block_8 *, void *))b,c)
2105 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_8(G(a,8),b)
2106 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
2107 pcre2_set_compile_recursion_guard_8(G(a,8),b,c)
2108 #define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_8(G(a,8),b)
2109 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) r = pcre2_set_glob_escape_8(G(a,8),b)
2110 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_8(G(a,8),b)
2111 #define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_8(G(a,8),b)
2112 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_8(G(a,8),b)
2113 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_8(G(a,8),b)
2114 #define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_8(G(a,8),b)
2115 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_8(G(a,8),b)
2116 #define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
2117 pcre2_set_substitute_callout_8(G(a,8), \
2118 (int (*)(pcre2_substitute_callout_block_8 *, void *))b,c)
2119 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
2120 a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h, \
2121 (PCRE2_SPTR8)i,j,(PCRE2_UCHAR8 *)k,l)
2122 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
2123 a = pcre2_substring_copy_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 *)d,e)
2124 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
2125 a = pcre2_substring_copy_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 *)d,e)
2126 #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_8((PCRE2_UCHAR8 *)a)
2127 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
2128 a = pcre2_substring_get_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 **)d,e)
2129 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
2130 a = pcre2_substring_get_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 **)d,e)
2131 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
2132 a = pcre2_substring_length_byname_8(G(b,8),G(c,8),d)
2133 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
2134 a = pcre2_substring_length_bynumber_8(G(b,8),c,d)
2135 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
2136 a = pcre2_substring_list_get_8(G(b,8),(PCRE2_UCHAR8 ***)c,d)
2137 #define PCRE2_SUBSTRING_LIST_FREE(a) \
2138 pcre2_substring_list_free_8((PCRE2_SPTR8 *)a)
2139 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
2140 a = pcre2_substring_number_from_name_8(G(b,8),G(c,8));
2141 #define PTR(x) (void *)G(x,8)
2142 #define SETFLD(x,y,z) G(x,8)->y = z
2143 #define SETFLDVEC(x,y,v,z) G(x,8)->y[v] = z
2144 #define SETOP(x,y,z) G(x,8) z y
2145 #define SETCASTPTR(x,y) G(x,8) = (uint8_t *)(y)
2146 #define STRLEN(p) (int)strlen((char *)p)
2147 #define SUB1(a,b) G(a,8)(G(b,8))
2148 #define SUB2(a,b,c) G(a,8)(G(b,8),G(c,8))
2149 #define TEST(x,r,y) (G(x,8) r (y))
2150 #define TESTFLD(x,f,r,y) (G(x,8)->f r (y))
2151
2152
2153 /* ----- Only 16-bit mode is supported ----- */
2154
2155 #elif defined SUPPORT_PCRE2_16
2156 #define CASTFLD(t,a,b) (t)(G(a,16)->b)
2157 #define CASTVAR(t,x) (t)G(x,16)
2158 #define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR16)(a))[b])
2159 #define CONCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_convert_context_16))
2160 #define CONVERT_COPY(a,b,c) memcpy(G(a,16),(char *)b, (c)*2)
2161 #define DATCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_match_context_16))
2162 #define FLD(a,b) G(a,16)->b
2163 #define PATCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_compile_context_16))
2164 #define PCHARS(lv, p, offset, len, utf, f) \
2165 lv = pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f)
2166 #define PCHARSV(p, offset, len, utf, f) \
2167 (void)pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f)
2168 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
2169 a = pcre2_callout_enumerate_16(compiled_code16, \
2170 (int (*)(struct pcre2_callout_enumerate_block_16 *, void *))b,c)
2171 #define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,16) = pcre2_code_copy_16(b)
2172 #define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_16(G(b,16))
2173 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_16(G(b,16))
2174 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
2175 G(a,16) = pcre2_compile_16(G(b,16),c,d,e,f,g)
2176 #define PCRE2_CONVERTED_PATTERN_FREE(a) \
2177 pcre2_converted_pattern_free_16((PCRE2_UCHAR16 *)a)
2178 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
2179 a = pcre2_dfa_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h,i,j)
2180 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
2181 r = pcre2_get_error_message_16(a,G(b,16),G(G(b,16),_size/2))
2182 #define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_16(G(b,16))
2183 #define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_16(G(b,16))
2184 #define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_16(G(a,16),b)
2185 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_16(G(a,16))
2186 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
2187 a = pcre2_jit_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h)
2188 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
2189 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_16(b,c,d);
2190 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
2191 pcre2_jit_stack_assign_16(G(a,16),(pcre2_jit_callback_16)b,c);
2192 #define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_16((pcre2_jit_stack_16 *)a);
2193 #define PCRE2_MAKETABLES(a,c) a = pcre2_maketables_16(G(c,16))
2194 #define PCRE2_MAKETABLES_FREE(c,a) pcre2_maketables_free_16(G(c,16),a)
2195 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
2196 a = pcre2_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h)
2197 #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,16) = pcre2_match_data_create_16(b,G(c,16))
2198 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
2199 G(a,16) = pcre2_match_data_create_from_pattern_16(G(b,16),G(c,16))
2200 #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_16(G(a,16))
2201 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) a = pcre2_pattern_convert_16(G(b,16),c,d,(PCRE2_UCHAR16 **)e,f,G(g,16))
2202 #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_16(G(b,16),c,d)
2203 #define PCRE2_PRINTINT(a) pcre2_printint_16(compiled_code16,outfile,a)
2204 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
2205 r = pcre2_serialize_decode_16((pcre2_code_16 **)a,b,c,G(d,16))
2206 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
2207 r = pcre2_serialize_encode_16((const pcre2_code_16 **)a,b,c,d,G(e,16))
2208 #define PCRE2_SERIALIZE_FREE(a) pcre2_serialize_free_16(a)
2209 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
2210 r = pcre2_serialize_get_number_of_codes_16(a)
2211 #define PCRE2_SET_CALLOUT(a,b,c) \
2212 pcre2_set_callout_16(G(a,16),(int (*)(pcre2_callout_block_16 *, void *))b,c);
2213 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_16(G(a,16),b)
2214 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
2215 pcre2_set_compile_recursion_guard_16(G(a,16),b,c)
2216 #define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_16(G(a,16),b)
2217 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) r = pcre2_set_glob_escape_16(G(a,16),b)
2218 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_16(G(a,16),b)
2219 #define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_16(G(a,16),b)
2220 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_16(G(a,16),b)
2221 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_16(G(a,16),b)
2222 #define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_16(G(a,16),b)
2223 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_16(G(a,16),b)
2224 #define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
2225 pcre2_set_substitute_callout_16(G(a,16), \
2226 (int (*)(pcre2_substitute_callout_block_16 *, void *))b,c)
2227 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
2228 a = pcre2_substitute_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h, \
2229 (PCRE2_SPTR16)i,j,(PCRE2_UCHAR16 *)k,l)
2230 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
2231 a = pcre2_substring_copy_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 *)d,e)
2232 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
2233 a = pcre2_substring_copy_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 *)d,e)
2234 #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_16((PCRE2_UCHAR16 *)a)
2235 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
2236 a = pcre2_substring_get_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 **)d,e)
2237 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
2238 a = pcre2_substring_get_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 **)d,e)
2239 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
2240 a = pcre2_substring_length_byname_16(G(b,16),G(c,16),d)
2241 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
2242 a = pcre2_substring_length_bynumber_16(G(b,16),c,d)
2243 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
2244 a = pcre2_substring_list_get_16(G(b,16),(PCRE2_UCHAR16 ***)c,d)
2245 #define PCRE2_SUBSTRING_LIST_FREE(a) \
2246 pcre2_substring_list_free_16((PCRE2_SPTR16 *)a)
2247 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
2248 a = pcre2_substring_number_from_name_16(G(b,16),G(c,16));
2249 #define PTR(x) (void *)G(x,16)
2250 #define SETFLD(x,y,z) G(x,16)->y = z
2251 #define SETFLDVEC(x,y,v,z) G(x,16)->y[v] = z
2252 #define SETOP(x,y,z) G(x,16) z y
2253 #define SETCASTPTR(x,y) G(x,16) = (uint16_t *)(y)
2254 #define STRLEN(p) (int)strlen16((PCRE2_SPTR16)p)
2255 #define SUB1(a,b) G(a,16)(G(b,16))
2256 #define SUB2(a,b,c) G(a,16)(G(b,16),G(c,16))
2257 #define TEST(x,r,y) (G(x,16) r (y))
2258 #define TESTFLD(x,f,r,y) (G(x,16)->f r (y))
2259
2260
2261 /* ----- Only 32-bit mode is supported ----- */
2262
2263 #elif defined SUPPORT_PCRE2_32
2264 #define CASTFLD(t,a,b) (t)(G(a,32)->b)
2265 #define CASTVAR(t,x) (t)G(x,32)
2266 #define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR32)(a))[b])
2267 #define CONCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_convert_context_32))
2268 #define CONVERT_COPY(a,b,c) memcpy(G(a,32),(char *)b, (c)*4)
2269 #define DATCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_match_context_32))
2270 #define FLD(a,b) G(a,32)->b
2271 #define PATCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_compile_context_32))
2272 #define PCHARS(lv, p, offset, len, utf, f) \
2273 lv = pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f)
2274 #define PCHARSV(p, offset, len, utf, f) \
2275 (void)pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f)
2276 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
2277 a = pcre2_callout_enumerate_32(compiled_code32, \
2278 (int (*)(struct pcre2_callout_enumerate_block_32 *, void *))b,c)
2279 #define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,32) = pcre2_code_copy_32(b)
2280 #define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_32(G(b,32))
2281 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_32(G(b,32))
2282 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
2283 G(a,32) = pcre2_compile_32(G(b,32),c,d,e,f,g)
2284 #define PCRE2_CONVERTED_PATTERN_FREE(a) \
2285 pcre2_converted_pattern_free_32((PCRE2_UCHAR32 *)a)
2286 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
2287 a = pcre2_dfa_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h,i,j)
2288 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
2289 r = pcre2_get_error_message_32(a,G(b,32),G(G(b,32),_size/4))
2290 #define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_32(G(b,32))
2291 #define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_32(G(b,32))
2292 #define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_32(G(a,32),b)
2293 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_32(G(a,32))
2294 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
2295 a = pcre2_jit_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
2296 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
2297 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_32(b,c,d);
2298 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
2299 pcre2_jit_stack_assign_32(G(a,32),(pcre2_jit_callback_32)b,c);
2300 #define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_32((pcre2_jit_stack_32 *)a);
2301 #define PCRE2_MAKETABLES(a,c) a = pcre2_maketables_32(G(c,32))
2302 #define PCRE2_MAKETABLES_FREE(c,a) pcre2_maketables_free_32(G(c,32),a)
2303 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
2304 a = pcre2_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
2305 #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,32) = pcre2_match_data_create_32(b,G(c,32))
2306 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
2307 G(a,32) = pcre2_match_data_create_from_pattern_32(G(b,32),G(c,32))
2308 #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_32(G(a,32))
2309 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) a = pcre2_pattern_convert_32(G(b,32),c,d,(PCRE2_UCHAR32 **)e,f,G(g,32))
2310 #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_32(G(b,32),c,d)
2311 #define PCRE2_PRINTINT(a) pcre2_printint_32(compiled_code32,outfile,a)
2312 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
2313 r = pcre2_serialize_decode_32((pcre2_code_32 **)a,b,c,G(d,32))
2314 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
2315 r = pcre2_serialize_encode_32((const pcre2_code_32 **)a,b,c,d,G(e,32))
2316 #define PCRE2_SERIALIZE_FREE(a) pcre2_serialize_free_32(a)
2317 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
2318 r = pcre2_serialize_get_number_of_codes_32(a)
2319 #define PCRE2_SET_CALLOUT(a,b,c) \
2320 pcre2_set_callout_32(G(a,32),(int (*)(pcre2_callout_block_32 *, void *))b,c)
2321 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_32(G(a,32),b)
2322 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
2323 pcre2_set_compile_recursion_guard_32(G(a,32),b,c)
2324 #define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_32(G(a,32),b)
2325 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) r = pcre2_set_glob_escape_32(G(a,32),b)
2326 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_32(G(a,32),b)
2327 #define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_32(G(a,32),b)
2328 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_32(G(a,32),b)
2329 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_32(G(a,32),b)
2330 #define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_32(G(a,32),b)
2331 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_32(G(a,32),b)
2332 #define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
2333 pcre2_set_substitute_callout_32(G(a,32), \
2334 (int (*)(pcre2_substitute_callout_block_32 *, void *))b,c)
2335 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
2336 a = pcre2_substitute_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h, \
2337 (PCRE2_SPTR32)i,j,(PCRE2_UCHAR32 *)k,l)
2338 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
2339 a = pcre2_substring_copy_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 *)d,e)
2340 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
2341 a = pcre2_substring_copy_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 *)d,e);
2342 #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_32((PCRE2_UCHAR32 *)a)
2343 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
2344 a = pcre2_substring_get_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 **)d,e)
2345 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
2346 a = pcre2_substring_get_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 **)d,e)
2347 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
2348 a = pcre2_substring_length_byname_32(G(b,32),G(c,32),d)
2349 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
2350 a = pcre2_substring_length_bynumber_32(G(b,32),c,d)
2351 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
2352 a = pcre2_substring_list_get_32(G(b,32),(PCRE2_UCHAR32 ***)c,d)
2353 #define PCRE2_SUBSTRING_LIST_FREE(a) \
2354 pcre2_substring_list_free_32((PCRE2_SPTR32 *)a)
2355 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
2356 a = pcre2_substring_number_from_name_32(G(b,32),G(c,32));
2357 #define PTR(x) (void *)G(x,32)
2358 #define SETFLD(x,y,z) G(x,32)->y = z
2359 #define SETFLDVEC(x,y,v,z) G(x,32)->y[v] = z
2360 #define SETOP(x,y,z) G(x,32) z y
2361 #define SETCASTPTR(x,y) G(x,32) = (uint32_t *)(y)
2362 #define STRLEN(p) (int)strlen32((PCRE2_SPTR32)p)
2363 #define SUB1(a,b) G(a,32)(G(b,32))
2364 #define SUB2(a,b,c) G(a,32)(G(b,32),G(c,32))
2365 #define TEST(x,r,y) (G(x,32) r (y))
2366 #define TESTFLD(x,f,r,y) (G(x,32)->f r (y))
2367
2368 #endif
2369
2370 /* ----- End of mode-specific function call macros ----- */
2371
2372
2373
2374
2375 /*************************************************
2376 * Alternate character tables *
2377 *************************************************/
2378
2379 /* By default, the "tables" pointer in the compile context when calling
2380 pcre2_compile() is not set (= NULL), thereby using the default tables of the
2381 library. However, the tables modifier can be used to select alternate sets of
2382 tables, for different kinds of testing. Note that the locale modifier also
2383 adjusts the tables. */
2384
2385 /* This is the set of tables distributed as default with PCRE2. It recognizes
2386 only ASCII characters. */
2387
2388 static const uint8_t tables1[] = {
2389
2390 /* This table is a lower casing table. */
2391
2392 0, 1, 2, 3, 4, 5, 6, 7,
2393 8, 9, 10, 11, 12, 13, 14, 15,
2394 16, 17, 18, 19, 20, 21, 22, 23,
2395 24, 25, 26, 27, 28, 29, 30, 31,
2396 32, 33, 34, 35, 36, 37, 38, 39,
2397 40, 41, 42, 43, 44, 45, 46, 47,
2398 48, 49, 50, 51, 52, 53, 54, 55,
2399 56, 57, 58, 59, 60, 61, 62, 63,
2400 64, 97, 98, 99,100,101,102,103,
2401 104,105,106,107,108,109,110,111,
2402 112,113,114,115,116,117,118,119,
2403 120,121,122, 91, 92, 93, 94, 95,
2404 96, 97, 98, 99,100,101,102,103,
2405 104,105,106,107,108,109,110,111,
2406 112,113,114,115,116,117,118,119,
2407 120,121,122,123,124,125,126,127,
2408 128,129,130,131,132,133,134,135,
2409 136,137,138,139,140,141,142,143,
2410 144,145,146,147,148,149,150,151,
2411 152,153,154,155,156,157,158,159,
2412 160,161,162,163,164,165,166,167,
2413 168,169,170,171,172,173,174,175,
2414 176,177,178,179,180,181,182,183,
2415 184,185,186,187,188,189,190,191,
2416 192,193,194,195,196,197,198,199,
2417 200,201,202,203,204,205,206,207,
2418 208,209,210,211,212,213,214,215,
2419 216,217,218,219,220,221,222,223,
2420 224,225,226,227,228,229,230,231,
2421 232,233,234,235,236,237,238,239,
2422 240,241,242,243,244,245,246,247,
2423 248,249,250,251,252,253,254,255,
2424
2425 /* This table is a case flipping table. */
2426
2427 0, 1, 2, 3, 4, 5, 6, 7,
2428 8, 9, 10, 11, 12, 13, 14, 15,
2429 16, 17, 18, 19, 20, 21, 22, 23,
2430 24, 25, 26, 27, 28, 29, 30, 31,
2431 32, 33, 34, 35, 36, 37, 38, 39,
2432 40, 41, 42, 43, 44, 45, 46, 47,
2433 48, 49, 50, 51, 52, 53, 54, 55,
2434 56, 57, 58, 59, 60, 61, 62, 63,
2435 64, 97, 98, 99,100,101,102,103,
2436 104,105,106,107,108,109,110,111,
2437 112,113,114,115,116,117,118,119,
2438 120,121,122, 91, 92, 93, 94, 95,
2439 96, 65, 66, 67, 68, 69, 70, 71,
2440 72, 73, 74, 75, 76, 77, 78, 79,
2441 80, 81, 82, 83, 84, 85, 86, 87,
2442 88, 89, 90,123,124,125,126,127,
2443 128,129,130,131,132,133,134,135,
2444 136,137,138,139,140,141,142,143,
2445 144,145,146,147,148,149,150,151,
2446 152,153,154,155,156,157,158,159,
2447 160,161,162,163,164,165,166,167,
2448 168,169,170,171,172,173,174,175,
2449 176,177,178,179,180,181,182,183,
2450 184,185,186,187,188,189,190,191,
2451 192,193,194,195,196,197,198,199,
2452 200,201,202,203,204,205,206,207,
2453 208,209,210,211,212,213,214,215,
2454 216,217,218,219,220,221,222,223,
2455 224,225,226,227,228,229,230,231,
2456 232,233,234,235,236,237,238,239,
2457 240,241,242,243,244,245,246,247,
2458 248,249,250,251,252,253,254,255,
2459
2460 /* This table contains bit maps for various character classes. Each map is 32
2461 bytes long and the bits run from the least significant end of each byte. The
2462 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
2463 graph, print, punct, and cntrl. Other classes are built from combinations. */
2464
2465 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
2466 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2467 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2468 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2469
2470 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
2471 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
2472 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2473 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2474
2475 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
2476 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2477 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2478 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2479
2480 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2481 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
2482 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2483 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2484
2485 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2486 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
2487 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2488 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2489
2490 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
2491 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
2492 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2493 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2494
2495 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
2496 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
2497 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2498 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2499
2500 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
2501 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
2502 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2503 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2504
2505 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
2506 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
2507 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2508 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2509
2510 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
2511 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
2512 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2513 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2514
2515 /* This table identifies various classes of character by individual bits:
2516 0x01 white space character
2517 0x02 letter
2518 0x04 decimal digit
2519 0x08 hexadecimal digit
2520 0x10 alphanumeric or '_'
2521 0x80 regular expression metacharacter or binary zero
2522 */
2523
2524 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
2525 0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /* 8- 15 */
2526 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
2527 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
2528 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
2529 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
2530 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
2531 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
2532 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
2533 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
2534 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
2535 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
2536 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
2537 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
2538 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
2539 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
2540 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
2541 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
2542 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
2543 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
2544 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
2545 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
2546 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
2547 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
2548 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
2549 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
2550 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
2551 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
2552 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
2553 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
2554 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
2555 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
2556
2557 /* This is a set of tables that came originally from a Windows user. It seems
2558 to be at least an approximation of ISO 8859. In particular, there are
2559 characters greater than 128 that are marked as spaces, letters, etc. */
2560
2561 static const uint8_t tables2[] = {
2562 0,1,2,3,4,5,6,7,
2563 8,9,10,11,12,13,14,15,
2564 16,17,18,19,20,21,22,23,
2565 24,25,26,27,28,29,30,31,
2566 32,33,34,35,36,37,38,39,
2567 40,41,42,43,44,45,46,47,
2568 48,49,50,51,52,53,54,55,
2569 56,57,58,59,60,61,62,63,
2570 64,97,98,99,100,101,102,103,
2571 104,105,106,107,108,109,110,111,
2572 112,113,114,115,116,117,118,119,
2573 120,121,122,91,92,93,94,95,
2574 96,97,98,99,100,101,102,103,
2575 104,105,106,107,108,109,110,111,
2576 112,113,114,115,116,117,118,119,
2577 120,121,122,123,124,125,126,127,
2578 128,129,130,131,132,133,134,135,
2579 136,137,138,139,140,141,142,143,
2580 144,145,146,147,148,149,150,151,
2581 152,153,154,155,156,157,158,159,
2582 160,161,162,163,164,165,166,167,
2583 168,169,170,171,172,173,174,175,
2584 176,177,178,179,180,181,182,183,
2585 184,185,186,187,188,189,190,191,
2586 224,225,226,227,228,229,230,231,
2587 232,233,234,235,236,237,238,239,
2588 240,241,242,243,244,245,246,215,
2589 248,249,250,251,252,253,254,223,
2590 224,225,226,227,228,229,230,231,
2591 232,233,234,235,236,237,238,239,
2592 240,241,242,243,244,245,246,247,
2593 248,249,250,251,252,253,254,255,
2594 0,1,2,3,4,5,6,7,
2595 8,9,10,11,12,13,14,15,
2596 16,17,18,19,20,21,22,23,
2597 24,25,26,27,28,29,30,31,
2598 32,33,34,35,36,37,38,39,
2599 40,41,42,43,44,45,46,47,
2600 48,49,50,51,52,53,54,55,
2601 56,57,58,59,60,61,62,63,
2602 64,97,98,99,100,101,102,103,
2603 104,105,106,107,108,109,110,111,
2604 112,113,114,115,116,117,118,119,
2605 120,121,122,91,92,93,94,95,
2606 96,65,66,67,68,69,70,71,
2607 72,73,74,75,76,77,78,79,
2608 80,81,82,83,84,85,86,87,
2609 88,89,90,123,124,125,126,127,
2610 128,129,130,131,132,133,134,135,
2611 136,137,138,139,140,141,142,143,
2612 144,145,146,147,148,149,150,151,
2613 152,153,154,155,156,157,158,159,
2614 160,161,162,163,164,165,166,167,
2615 168,169,170,171,172,173,174,175,
2616 176,177,178,179,180,181,182,183,
2617 184,185,186,187,188,189,190,191,
2618 224,225,226,227,228,229,230,231,
2619 232,233,234,235,236,237,238,239,
2620 240,241,242,243,244,245,246,215,
2621 248,249,250,251,252,253,254,223,
2622 192,193,194,195,196,197,198,199,
2623 200,201,202,203,204,205,206,207,
2624 208,209,210,211,212,213,214,247,
2625 216,217,218,219,220,221,222,255,
2626 0,62,0,0,1,0,0,0,
2627 0,0,0,0,0,0,0,0,
2628 32,0,0,0,1,0,0,0,
2629 0,0,0,0,0,0,0,0,
2630 0,0,0,0,0,0,255,3,
2631 126,0,0,0,126,0,0,0,
2632 0,0,0,0,0,0,0,0,
2633 0,0,0,0,0,0,0,0,
2634 0,0,0,0,0,0,255,3,
2635 0,0,0,0,0,0,0,0,
2636 0,0,0,0,0,0,12,2,
2637 0,0,0,0,0,0,0,0,
2638 0,0,0,0,0,0,0,0,
2639 254,255,255,7,0,0,0,0,
2640 0,0,0,0,0,0,0,0,
2641 255,255,127,127,0,0,0,0,
2642 0,0,0,0,0,0,0,0,
2643 0,0,0,0,254,255,255,7,
2644 0,0,0,0,0,4,32,4,
2645 0,0,0,128,255,255,127,255,
2646 0,0,0,0,0,0,255,3,
2647 254,255,255,135,254,255,255,7,
2648 0,0,0,0,0,4,44,6,
2649 255,255,127,255,255,255,127,255,
2650 0,0,0,0,254,255,255,255,
2651 255,255,255,255,255,255,255,127,
2652 0,0,0,0,254,255,255,255,
2653 255,255,255,255,255,255,255,255,
2654 0,2,0,0,255,255,255,255,
2655 255,255,255,255,255,255,255,127,
2656 0,0,0,0,255,255,255,255,
2657 255,255,255,255,255,255,255,255,
2658 0,0,0,0,254,255,0,252,
2659 1,0,0,248,1,0,0,120,
2660 0,0,0,0,254,255,255,255,
2661 0,0,128,0,0,0,128,0,
2662 255,255,255,255,0,0,0,0,
2663 0,0,0,0,0,0,0,128,
2664 255,255,255,255,0,0,0,0,
2665 0,0,0,0,0,0,0,0,
2666 128,0,0,0,0,0,0,0,
2667 0,1,1,0,1,1,0,0,
2668 0,0,0,0,0,0,0,0,
2669 0,0,0,0,0,0,0,0,
2670 1,0,0,0,128,0,0,0,
2671 128,128,128,128,0,0,128,0,
2672 28,28,28,28,28,28,28,28,
2673 28,28,0,0,0,0,0,128,
2674 0,26,26,26,26,26,26,18,
2675 18,18,18,18,18,18,18,18,
2676 18,18,18,18,18,18,18,18,
2677 18,18,18,128,128,0,128,16,
2678 0,26,26,26,26,26,26,18,
2679 18,18,18,18,18,18,18,18,
2680 18,18,18,18,18,18,18,18,
2681 18,18,18,128,128,0,0,0,
2682 0,0,0,0,0,1,0,0,
2683 0,0,0,0,0,0,0,0,
2684 0,0,0,0,0,0,0,0,
2685 0,0,0,0,0,0,0,0,
2686 1,0,0,0,0,0,0,0,
2687 0,0,18,0,0,0,0,0,
2688 0,0,20,20,0,18,0,0,
2689 0,20,18,0,0,0,0,0,
2690 18,18,18,18,18,18,18,18,
2691 18,18,18,18,18,18,18,18,
2692 18,18,18,18,18,18,18,0,
2693 18,18,18,18,18,18,18,18,
2694 18,18,18,18,18,18,18,18,
2695 18,18,18,18,18,18,18,18,
2696 18,18,18,18,18,18,18,0,
2697 18,18,18,18,18,18,18,18
2698 };
2699
2700
2701
2702 #if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE)
2703 /*************************************************
2704 * Emulated memmove() for systems without it *
2705 *************************************************/
2706
2707 /* This function can make use of bcopy() if it is available. Otherwise do it by
2708 steam, as there are some non-Unix environments that lack both memmove() and
2709 bcopy(). */
2710
2711 static void *
emulated_memmove(void * d,const void * s,size_t n)2712 emulated_memmove(void *d, const void *s, size_t n)
2713 {
2714 #ifdef HAVE_BCOPY
2715 bcopy(s, d, n);
2716 return d;
2717 #else
2718 size_t i;
2719 unsigned char *dest = (unsigned char *)d;
2720 const unsigned char *src = (const unsigned char *)s;
2721 if (dest > src)
2722 {
2723 dest += n;
2724 src += n;
2725 for (i = 0; i < n; ++i) *(--dest) = *(--src);
2726 return (void *)dest;
2727 }
2728 else
2729 {
2730 for (i = 0; i < n; ++i) *dest++ = *src++;
2731 return (void *)(dest - n);
2732 }
2733 #endif /* not HAVE_BCOPY */
2734 }
2735 #undef memmove
2736 #define memmove(d,s,n) emulated_memmove(d,s,n)
2737 #endif /* not VPCOMPAT && not HAVE_MEMMOVE */
2738
2739
2740
2741 #ifndef HAVE_STRERROR
2742 /*************************************************
2743 * Provide strerror() for non-ANSI libraries *
2744 *************************************************/
2745
2746 /* Some old-fashioned systems (e.g. SunOS4) didn't have strerror() in their
2747 libraries. They may no longer be around, but just in case, we can try to
2748 provide the same facility by this simple alternative function. */
2749
2750 extern int sys_nerr;
2751 extern char *sys_errlist[];
2752
2753 char *
strerror(int n)2754 strerror(int n)
2755 {
2756 if (n < 0 || n >= sys_nerr) return "unknown error number";
2757 return sys_errlist[n];
2758 }
2759 #endif /* HAVE_STRERROR */
2760
2761
2762
2763 /*************************************************
2764 * Local memory functions *
2765 *************************************************/
2766
2767 /* Alternative memory functions, to test functionality. */
2768
my_malloc(PCRE2_SIZE size,void * data)2769 static void *my_malloc(PCRE2_SIZE size, void *data)
2770 {
2771 void *block = malloc(size);
2772 (void)data;
2773 if (show_memory)
2774 {
2775 if (block == NULL)
2776 {
2777 fprintf(outfile, "** malloc() failed for %" SIZ_FORM "\n", size);
2778 }
2779 else
2780 {
2781 fprintf(outfile, "malloc %5" SIZ_FORM, size);
2782 #ifdef DEBUG_SHOW_MALLOC_ADDRESSES
2783 fprintf(outfile, " %p", block); /* Not portable */
2784 #endif
2785 if (malloclistptr < MALLOCLISTSIZE)
2786 {
2787 malloclist[malloclistptr] = block;
2788 malloclistlength[malloclistptr++] = size;
2789 }
2790 else
2791 fprintf(outfile, " (not remembered)");
2792 fprintf(outfile, "\n");
2793 }
2794 }
2795 return block;
2796 }
2797
my_free(void * block,void * data)2798 static void my_free(void *block, void *data)
2799 {
2800 (void)data;
2801 if (show_memory && block != NULL)
2802 {
2803 uint32_t i, j;
2804 BOOL found = FALSE;
2805
2806 fprintf(outfile, "free");
2807 for (i = 0; i < malloclistptr; i++)
2808 {
2809 if (block == malloclist[i])
2810 {
2811 fprintf(outfile, " %5" SIZ_FORM, malloclistlength[i]);
2812 malloclistptr--;
2813 for (j = i; j < malloclistptr; j++)
2814 {
2815 malloclist[j] = malloclist[j+1];
2816 malloclistlength[j] = malloclistlength[j+1];
2817 }
2818 found = TRUE;
2819 break;
2820 }
2821 }
2822 if (!found) fprintf(outfile, " unremembered block");
2823 #ifdef DEBUG_SHOW_MALLOC_ADDRESSES
2824 fprintf(outfile, " %p", block); /* Not portable */
2825 #endif
2826 fprintf(outfile, "\n");
2827 }
2828 free(block);
2829 }
2830
2831
2832
2833 /*************************************************
2834 * Callback function for stack guard *
2835 *************************************************/
2836
2837 /* This is set up to be called from pcre2_compile() when the stackguard=n
2838 modifier sets a value greater than zero. The test we do is whether the
2839 parenthesis nesting depth is greater than the value set by the modifier.
2840
2841 Argument: the current parenthesis nesting depth
2842 Returns: non-zero to kill the compilation
2843 */
2844
2845 static int
stack_guard(uint32_t depth,void * user_data)2846 stack_guard(uint32_t depth, void *user_data)
2847 {
2848 (void)user_data;
2849 return depth > pat_patctl.stackguard_test;
2850 }
2851
2852
2853 /*************************************************
2854 * JIT memory callback *
2855 *************************************************/
2856
2857 static PCRE2_JIT_STACK*
jit_callback(void * arg)2858 jit_callback(void *arg)
2859 {
2860 jit_was_used = TRUE;
2861 return (PCRE2_JIT_STACK *)arg;
2862 }
2863
2864
2865 /*************************************************
2866 * Convert UTF-8 character to code point *
2867 *************************************************/
2868
2869 /* This function reads one or more bytes that represent a UTF-8 character,
2870 and returns the codepoint of that character. Note that the function supports
2871 the original UTF-8 definition of RFC 2279, allowing for values in the range 0
2872 to 0x7fffffff, up to 6 bytes long. This makes it possible to generate
2873 codepoints greater than 0x10ffff which are useful for testing PCRE2's error
2874 checking, and also for generating 32-bit non-UTF data values above the UTF
2875 limit.
2876
2877 Argument:
2878 utf8bytes a pointer to the byte vector
2879 vptr a pointer to an int to receive the value
2880
2881 Returns: > 0 => the number of bytes consumed
2882 -6 to 0 => malformed UTF-8 character at offset = (-return)
2883 */
2884
2885 static int
utf82ord(PCRE2_SPTR8 utf8bytes,uint32_t * vptr)2886 utf82ord(PCRE2_SPTR8 utf8bytes, uint32_t *vptr)
2887 {
2888 uint32_t c = *utf8bytes++;
2889 uint32_t d = c;
2890 int i, j, s;
2891
2892 for (i = -1; i < 6; i++) /* i is number of additional bytes */
2893 {
2894 if ((d & 0x80) == 0) break;
2895 d <<= 1;
2896 }
2897
2898 if (i == -1) { *vptr = c; return 1; } /* ascii character */
2899 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
2900
2901 /* i now has a value in the range 1-5 */
2902
2903 s = 6*i;
2904 d = (c & utf8_table3[i]) << s;
2905
2906 for (j = 0; j < i; j++)
2907 {
2908 c = *utf8bytes++;
2909 if ((c & 0xc0) != 0x80) return -(j+1);
2910 s -= 6;
2911 d |= (c & 0x3f) << s;
2912 }
2913
2914 /* Check that encoding was the correct unique one */
2915
2916 for (j = 0; j < utf8_table1_size; j++)
2917 if (d <= (uint32_t)utf8_table1[j]) break;
2918 if (j != i) return -(i+1);
2919
2920 /* Valid value */
2921
2922 *vptr = d;
2923 return i+1;
2924 }
2925
2926
2927
2928 /*************************************************
2929 * Print one character *
2930 *************************************************/
2931
2932 /* Print a single character either literally, or as a hex escape, and count how
2933 many printed characters are used.
2934
2935 Arguments:
2936 c the character
2937 utf TRUE in UTF mode
2938 f the FILE to print to, or NULL just to count characters
2939
2940 Returns: number of characters written
2941 */
2942
2943 static int
pchar(uint32_t c,BOOL utf,FILE * f)2944 pchar(uint32_t c, BOOL utf, FILE *f)
2945 {
2946 int n = 0;
2947 char tempbuffer[16];
2948
2949 if (PRINTOK(c))
2950 {
2951 if (f != NULL) fprintf(f, "%c", c);
2952 return 1;
2953 }
2954
2955 if (c < 0x100)
2956 {
2957 if (utf)
2958 {
2959 if (f != NULL) fprintf(f, "\\x{%02x}", c);
2960 return 6;
2961 }
2962 else
2963 {
2964 if (f != NULL) fprintf(f, "\\x%02x", c);
2965 return 4;
2966 }
2967 }
2968
2969 if (f != NULL) n = fprintf(f, "\\x{%02x}", c);
2970 else n = sprintf(tempbuffer, "\\x{%02x}", c);
2971
2972 return n >= 0 ? n : 0;
2973 }
2974
2975
2976
2977 #ifdef SUPPORT_PCRE2_16
2978 /*************************************************
2979 * Find length of 0-terminated 16-bit string *
2980 *************************************************/
2981
strlen16(PCRE2_SPTR16 p)2982 static size_t strlen16(PCRE2_SPTR16 p)
2983 {
2984 PCRE2_SPTR16 pp = p;
2985 while (*pp != 0) pp++;
2986 return (int)(pp - p);
2987 }
2988 #endif /* SUPPORT_PCRE2_16 */
2989
2990
2991
2992 #ifdef SUPPORT_PCRE2_32
2993 /*************************************************
2994 * Find length of 0-terminated 32-bit string *
2995 *************************************************/
2996
strlen32(PCRE2_SPTR32 p)2997 static size_t strlen32(PCRE2_SPTR32 p)
2998 {
2999 PCRE2_SPTR32 pp = p;
3000 while (*pp != 0) pp++;
3001 return (int)(pp - p);
3002 }
3003 #endif /* SUPPORT_PCRE2_32 */
3004
3005
3006 #ifdef SUPPORT_PCRE2_8
3007 /*************************************************
3008 * Print 8-bit character string *
3009 *************************************************/
3010
3011 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
3012 For printing *MARK strings, a negative length is given, indicating that the
3013 length is in the first code unit. If handed a NULL file, this function just
3014 counts chars without printing (because pchar() does that). */
3015
pchars8(PCRE2_SPTR8 p,int length,BOOL utf,FILE * f)3016 static int pchars8(PCRE2_SPTR8 p, int length, BOOL utf, FILE *f)
3017 {
3018 uint32_t c = 0;
3019 int yield = 0;
3020 if (length < 0) length = *p++;
3021 while (length-- > 0)
3022 {
3023 if (utf)
3024 {
3025 int rc = utf82ord(p, &c);
3026 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
3027 {
3028 length -= rc - 1;
3029 p += rc;
3030 yield += pchar(c, utf, f);
3031 continue;
3032 }
3033 }
3034 c = *p++;
3035 yield += pchar(c, utf, f);
3036 }
3037
3038 return yield;
3039 }
3040 #endif
3041
3042
3043 #ifdef SUPPORT_PCRE2_16
3044 /*************************************************
3045 * Print 16-bit character string *
3046 *************************************************/
3047
3048 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
3049 For printing *MARK strings, a negative length is given, indicating that the
3050 length is in the first code unit. If handed a NULL file, just counts chars
3051 without printing. */
3052
pchars16(PCRE2_SPTR16 p,int length,BOOL utf,FILE * f)3053 static int pchars16(PCRE2_SPTR16 p, int length, BOOL utf, FILE *f)
3054 {
3055 int yield = 0;
3056 if (length < 0) length = *p++;
3057 while (length-- > 0)
3058 {
3059 uint32_t c = *p++ & 0xffff;
3060 if (utf && c >= 0xD800 && c < 0xDC00 && length > 0)
3061 {
3062 int d = *p & 0xffff;
3063 if (d >= 0xDC00 && d <= 0xDFFF)
3064 {
3065 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
3066 length--;
3067 p++;
3068 }
3069 }
3070 yield += pchar(c, utf, f);
3071 }
3072 return yield;
3073 }
3074 #endif /* SUPPORT_PCRE2_16 */
3075
3076
3077
3078 #ifdef SUPPORT_PCRE2_32
3079 /*************************************************
3080 * Print 32-bit character string *
3081 *************************************************/
3082
3083 /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
3084 For printing *MARK strings, a negative length is given, indicating that the
3085 length is in the first code unit. If handed a NULL file, just counts chars
3086 without printing. */
3087
pchars32(PCRE2_SPTR32 p,int length,BOOL utf,FILE * f)3088 static int pchars32(PCRE2_SPTR32 p, int length, BOOL utf, FILE *f)
3089 {
3090 int yield = 0;
3091 (void)(utf); /* Avoid compiler warning */
3092 if (length < 0) length = *p++;
3093 while (length-- > 0)
3094 {
3095 uint32_t c = *p++;
3096 yield += pchar(c, utf, f);
3097 }
3098 return yield;
3099 }
3100 #endif /* SUPPORT_PCRE2_32 */
3101
3102
3103
3104
3105 /*************************************************
3106 * Convert character value to UTF-8 *
3107 *************************************************/
3108
3109 /* This function takes an integer value in the range 0 - 0x7fffffff
3110 and encodes it as a UTF-8 character in 0 to 6 bytes. It is needed even when the
3111 8-bit library is not supported, to generate UTF-8 output for non-ASCII
3112 characters.
3113
3114 Arguments:
3115 cvalue the character value
3116 utf8bytes pointer to buffer for result - at least 6 bytes long
3117
3118 Returns: number of characters placed in the buffer
3119 */
3120
3121 static int
ord2utf8(uint32_t cvalue,uint8_t * utf8bytes)3122 ord2utf8(uint32_t cvalue, uint8_t *utf8bytes)
3123 {
3124 int i, j;
3125 if (cvalue > 0x7fffffffu)
3126 return -1;
3127 for (i = 0; i < utf8_table1_size; i++)
3128 if (cvalue <= (uint32_t)utf8_table1[i]) break;
3129 utf8bytes += i;
3130 for (j = i; j > 0; j--)
3131 {
3132 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
3133 cvalue >>= 6;
3134 }
3135 *utf8bytes = utf8_table2[i] | cvalue;
3136 return i + 1;
3137 }
3138
3139
3140
3141 #ifdef SUPPORT_PCRE2_16
3142 /*************************************************
3143 * Convert string to 16-bit *
3144 *************************************************/
3145
3146 /* In UTF mode the input is always interpreted as a string of UTF-8 bytes using
3147 the original UTF-8 definition of RFC 2279, which allows for up to 6 bytes, and
3148 code values from 0 to 0x7fffffff. However, values greater than the later UTF
3149 limit of 0x10ffff cause an error. In non-UTF mode the input is interpreted as
3150 UTF-8 if the utf8_input modifier is set, but an error is generated for values
3151 greater than 0xffff.
3152
3153 If all the input bytes are ASCII, the space needed for a 16-bit string is
3154 exactly double the 8-bit size. Otherwise, the size needed for a 16-bit string
3155 is no more than double, because up to 0xffff uses no more than 3 bytes in UTF-8
3156 but possibly 4 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes
3157 in UTF-16. The result is always left in pbuffer16. Impose a minimum size to
3158 save repeated re-sizing.
3159
3160 Note that this function does not object to surrogate values. This is
3161 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
3162 for the purpose of testing that they are correctly faulted.
3163
3164 Arguments:
3165 p points to a byte string
3166 utf true in UTF mode
3167 lenptr points to number of bytes in the string (excluding trailing zero)
3168
3169 Returns: 0 on success, with the length updated to the number of 16-bit
3170 data items used (excluding the trailing zero)
3171 OR -1 if a UTF-8 string is malformed
3172 OR -2 if a value > 0x10ffff is encountered in UTF mode
3173 OR -3 if a value > 0xffff is encountered when not in UTF mode
3174 */
3175
3176 static int
to16(uint8_t * p,int utf,PCRE2_SIZE * lenptr)3177 to16(uint8_t *p, int utf, PCRE2_SIZE *lenptr)
3178 {
3179 uint16_t *pp;
3180 PCRE2_SIZE len = *lenptr;
3181
3182 if (pbuffer16_size < 2*len + 2)
3183 {
3184 if (pbuffer16 != NULL) free(pbuffer16);
3185 pbuffer16_size = 2*len + 2;
3186 if (pbuffer16_size < 4096) pbuffer16_size = 4096;
3187 pbuffer16 = (uint16_t *)malloc(pbuffer16_size);
3188 if (pbuffer16 == NULL)
3189 {
3190 fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer16\n",
3191 pbuffer16_size);
3192 exit(1);
3193 }
3194 }
3195
3196 pp = pbuffer16;
3197 if (!utf && (pat_patctl.control & CTL_UTF8_INPUT) == 0)
3198 {
3199 for (; len > 0; len--) *pp++ = *p++;
3200 }
3201 else while (len > 0)
3202 {
3203 uint32_t c;
3204 int chlen = utf82ord(p, &c);
3205 if (chlen <= 0) return -1;
3206 if (!utf && c > 0xffff) return -3;
3207 if (c > 0x10ffff) return -2;
3208 p += chlen;
3209 len -= chlen;
3210 if (c < 0x10000) *pp++ = c; else
3211 {
3212 c -= 0x10000;
3213 *pp++ = 0xD800 | (c >> 10);
3214 *pp++ = 0xDC00 | (c & 0x3ff);
3215 }
3216 }
3217
3218 *pp = 0;
3219 *lenptr = pp - pbuffer16;
3220 return 0;
3221 }
3222 #endif
3223
3224
3225
3226 #ifdef SUPPORT_PCRE2_32
3227 /*************************************************
3228 * Convert string to 32-bit *
3229 *************************************************/
3230
3231 /* In UTF mode the input is always interpreted as a string of UTF-8 bytes using
3232 the original UTF-8 definition of RFC 2279, which allows for up to 6 bytes, and
3233 code values from 0 to 0x7fffffff. However, values greater than the later UTF
3234 limit of 0x10ffff cause an error.
3235
3236 In non-UTF mode the input is interpreted as UTF-8 if the utf8_input modifier
3237 is set, and no limit is imposed. There is special interpretation of the 0xff
3238 byte (which is illegal in UTF-8) in this case: it causes the top bit of the
3239 next character to be set. This provides a way of generating 32-bit characters
3240 greater than 0x7fffffff.
3241
3242 If all the input bytes are ASCII, the space needed for a 32-bit string is
3243 exactly four times the 8-bit size. Otherwise, the size needed for a 32-bit
3244 string is no more than four times, because the number of characters must be
3245 less than the number of bytes. The result is always left in pbuffer32. Impose a
3246 minimum size to save repeated re-sizing.
3247
3248 Note that this function does not object to surrogate values. This is
3249 deliberate; it makes it possible to construct UTF-32 strings that are invalid,
3250 for the purpose of testing that they are correctly faulted.
3251
3252 Arguments:
3253 p points to a byte string
3254 utf true in UTF mode
3255 lenptr points to number of bytes in the string (excluding trailing zero)
3256
3257 Returns: 0 on success, with the length updated to the number of 32-bit
3258 data items used (excluding the trailing zero)
3259 OR -1 if a UTF-8 string is malformed
3260 OR -2 if a value > 0x10ffff is encountered in UTF mode
3261 */
3262
3263 static int
to32(uint8_t * p,int utf,PCRE2_SIZE * lenptr)3264 to32(uint8_t *p, int utf, PCRE2_SIZE *lenptr)
3265 {
3266 uint32_t *pp;
3267 PCRE2_SIZE len = *lenptr;
3268
3269 if (pbuffer32_size < 4*len + 4)
3270 {
3271 if (pbuffer32 != NULL) free(pbuffer32);
3272 pbuffer32_size = 4*len + 4;
3273 if (pbuffer32_size < 8192) pbuffer32_size = 8192;
3274 pbuffer32 = (uint32_t *)malloc(pbuffer32_size);
3275 if (pbuffer32 == NULL)
3276 {
3277 fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer32\n",
3278 pbuffer32_size);
3279 exit(1);
3280 }
3281 }
3282
3283 pp = pbuffer32;
3284
3285 if (!utf && (pat_patctl.control & CTL_UTF8_INPUT) == 0)
3286 {
3287 for (; len > 0; len--) *pp++ = *p++;
3288 }
3289
3290 else while (len > 0)
3291 {
3292 int chlen;
3293 uint32_t c;
3294 uint32_t topbit = 0;
3295 if (!utf && *p == 0xff && len > 1)
3296 {
3297 topbit = 0x80000000u;
3298 p++;
3299 len--;
3300 }
3301 chlen = utf82ord(p, &c);
3302 if (chlen <= 0) return -1;
3303 if (utf && c > 0x10ffff) return -2;
3304 p += chlen;
3305 len -= chlen;
3306 *pp++ = c | topbit;
3307 }
3308
3309 *pp = 0;
3310 *lenptr = pp - pbuffer32;
3311 return 0;
3312 }
3313 #endif /* SUPPORT_PCRE2_32 */
3314
3315
3316
3317 /* This function is no longer used. Keep it around for a while, just in case it
3318 needs to be re-instated. */
3319
3320 #ifdef NEVERNEVERNEVER
3321
3322 /*************************************************
3323 * Move back by so many characters *
3324 *************************************************/
3325
3326 /* Given a code unit offset in a subject string, move backwards by a number of
3327 characters, and return the resulting offset.
3328
3329 Arguments:
3330 subject pointer to the string
3331 offset start offset
3332 count count to move back by
3333 utf TRUE if in UTF mode
3334
3335 Returns: a possibly changed offset
3336 */
3337
3338 static PCRE2_SIZE
backchars(uint8_t * subject,PCRE2_SIZE offset,uint32_t count,BOOL utf)3339 backchars(uint8_t *subject, PCRE2_SIZE offset, uint32_t count, BOOL utf)
3340 {
3341 if (!utf || test_mode == PCRE32_MODE)
3342 return (count >= offset)? 0 : (offset - count);
3343
3344 else if (test_mode == PCRE8_MODE)
3345 {
3346 PCRE2_SPTR8 pp = (PCRE2_SPTR8)subject + offset;
3347 for (; count > 0 && pp > (PCRE2_SPTR8)subject; count--)
3348 {
3349 pp--;
3350 while ((*pp & 0xc0) == 0x80) pp--;
3351 }
3352 return pp - (PCRE2_SPTR8)subject;
3353 }
3354
3355 else /* 16-bit mode */
3356 {
3357 PCRE2_SPTR16 pp = (PCRE2_SPTR16)subject + offset;
3358 for (; count > 0 && pp > (PCRE2_SPTR16)subject; count--)
3359 {
3360 pp--;
3361 if ((*pp & 0xfc00) == 0xdc00) pp--;
3362 }
3363 return pp - (PCRE2_SPTR16)subject;
3364 }
3365 }
3366 #endif /* NEVERNEVERNEVER */
3367
3368
3369
3370 /*************************************************
3371 * Expand input buffers *
3372 *************************************************/
3373
3374 /* This function doubles the size of the input buffer and the buffer for
3375 keeping an 8-bit copy of patterns (pbuffer8), and copies the current buffers to
3376 the new ones.
3377
3378 Arguments: none
3379 Returns: nothing (aborts if malloc() fails)
3380 */
3381
3382 static void
expand_input_buffers(void)3383 expand_input_buffers(void)
3384 {
3385 int new_pbuffer8_size = 2*pbuffer8_size;
3386 uint8_t *new_buffer = (uint8_t *)malloc(new_pbuffer8_size);
3387 uint8_t *new_pbuffer8 = (uint8_t *)malloc(new_pbuffer8_size);
3388
3389 if (new_buffer == NULL || new_pbuffer8 == NULL)
3390 {
3391 fprintf(stderr, "pcre2test: malloc(%d) failed\n", new_pbuffer8_size);
3392 exit(1);
3393 }
3394
3395 memcpy(new_buffer, buffer, pbuffer8_size);
3396 memcpy(new_pbuffer8, pbuffer8, pbuffer8_size);
3397
3398 pbuffer8_size = new_pbuffer8_size;
3399
3400 free(buffer);
3401 free(pbuffer8);
3402
3403 buffer = new_buffer;
3404 pbuffer8 = new_pbuffer8;
3405 }
3406
3407
3408
3409 /*************************************************
3410 * Read or extend an input line *
3411 *************************************************/
3412
3413 /* Input lines are read into buffer, but both patterns and data lines can be
3414 continued over multiple input lines. In addition, if the buffer fills up, we
3415 want to automatically expand it so as to be able to handle extremely large
3416 lines that are needed for certain stress tests, although this is less likely
3417 now that there are repetition features for both patterns and data. When the
3418 input buffer is expanded, the other two buffers must also be expanded likewise,
3419 and the contents of pbuffer, which are a copy of the input for callouts, must
3420 be preserved (for when expansion happens for a data line). This is not the most
3421 optimal way of handling this, but hey, this is just a test program!
3422
3423 Arguments:
3424 f the file to read
3425 start where in buffer to start (this *must* be within buffer)
3426 prompt for stdin or readline()
3427
3428 Returns: pointer to the start of new data
3429 could be a copy of start, or could be moved
3430 NULL if no data read and EOF reached
3431 */
3432
3433 static uint8_t *
extend_inputline(FILE * f,uint8_t * start,const char * prompt)3434 extend_inputline(FILE *f, uint8_t *start, const char *prompt)
3435 {
3436 uint8_t *here = start;
3437
3438 for (;;)
3439 {
3440 size_t rlen = (size_t)(pbuffer8_size - (here - buffer));
3441
3442 if (rlen > 1000)
3443 {
3444 size_t dlen;
3445
3446 /* If libreadline or libedit support is required, use readline() to read a
3447 line if the input is a terminal. Note that readline() removes the trailing
3448 newline, so we must put it back again, to be compatible with fgets(). */
3449
3450 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
3451 if (INTERACTIVE(f))
3452 {
3453 size_t len;
3454 char *s = readline(prompt);
3455 if (s == NULL) return (here == start)? NULL : start;
3456 len = strlen(s);
3457 if (len > 0) add_history(s);
3458 if (len > rlen - 1) len = rlen - 1;
3459 memcpy(here, s, len);
3460 here[len] = '\n';
3461 here[len+1] = 0;
3462 free(s);
3463 }
3464 else
3465 #endif
3466
3467 /* Read the next line by normal means, prompting if the file is a tty. */
3468
3469 {
3470 if (INTERACTIVE(f)) printf("%s", prompt);
3471 if (fgets((char *)here, rlen, f) == NULL)
3472 return (here == start)? NULL : start;
3473 }
3474
3475 dlen = strlen((char *)here);
3476 here += dlen;
3477
3478 /* Check for end of line reached. Take care not to read data from before
3479 start (dlen will be zero for a file starting with a binary zero). */
3480
3481 if (here > start && here[-1] == '\n') return start;
3482
3483 /* If we have not read a newline when reading a file, we have either filled
3484 the buffer or reached the end of the file. We can detect the former by
3485 checking that the string fills the buffer, and the latter by feof(). If
3486 neither of these is true, it means we read a binary zero which has caused
3487 strlen() to give a short length. This is a hard error because pcre2test
3488 expects to work with C strings. */
3489
3490 if (!INTERACTIVE(f) && dlen < rlen - 1 && !feof(f))
3491 {
3492 fprintf(outfile, "** Binary zero encountered in input\n");
3493 fprintf(outfile, "** pcre2test run abandoned\n");
3494 exit(1);
3495 }
3496 }
3497
3498 else
3499 {
3500 size_t start_offset = start - buffer;
3501 size_t here_offset = here - buffer;
3502 expand_input_buffers();
3503 start = buffer + start_offset;
3504 here = buffer + here_offset;
3505 }
3506 }
3507
3508 /* Control never gets here */
3509 }
3510
3511
3512
3513 /*************************************************
3514 * Case-independent strncmp() function *
3515 *************************************************/
3516
3517 /*
3518 Arguments:
3519 s first string
3520 t second string
3521 n number of characters to compare
3522
3523 Returns: < 0, = 0, or > 0, according to the comparison
3524 */
3525
3526 static int
strncmpic(const uint8_t * s,const uint8_t * t,int n)3527 strncmpic(const uint8_t *s, const uint8_t *t, int n)
3528 {
3529 while (n--)
3530 {
3531 int c = tolower(*s++) - tolower(*t++);
3532 if (c != 0) return c;
3533 }
3534 return 0;
3535 }
3536
3537
3538
3539 /*************************************************
3540 * Scan the main modifier list *
3541 *************************************************/
3542
3543 /* This function searches the modifier list for a long modifier name.
3544
3545 Argument:
3546 p start of the name
3547 lenp length of the name
3548
3549 Returns: an index in the modifier list, or -1 on failure
3550 */
3551
3552 static int
scan_modifiers(const uint8_t * p,unsigned int len)3553 scan_modifiers(const uint8_t *p, unsigned int len)
3554 {
3555 int bot = 0;
3556 int top = MODLISTCOUNT;
3557
3558 while (top > bot)
3559 {
3560 int mid = (bot + top)/2;
3561 unsigned int mlen = strlen(modlist[mid].name);
3562 int c = strncmp((char *)p, modlist[mid].name, (len < mlen)? len : mlen);
3563 if (c == 0)
3564 {
3565 if (len == mlen) return mid;
3566 c = (int)len - (int)mlen;
3567 }
3568 if (c > 0) bot = mid + 1; else top = mid;
3569 }
3570
3571 return -1;
3572
3573 }
3574
3575
3576
3577 /*************************************************
3578 * Check a modifer and find its field *
3579 *************************************************/
3580
3581 /* This function is called when a modifier has been identified. We check that
3582 it is allowed here and find the field that is to be changed.
3583
3584 Arguments:
3585 m the modifier list entry
3586 ctx CTX_PAT => pattern context
3587 CTX_POPPAT => pattern context for popped pattern
3588 CTX_DEFPAT => default pattern context
3589 CTX_DAT => data context
3590 CTX_DEFDAT => default data context
3591 pctl point to pattern control block
3592 dctl point to data control block
3593 c a single character or 0
3594
3595 Returns: a field pointer or NULL
3596 */
3597
3598 static void *
check_modifier(modstruct * m,int ctx,patctl * pctl,datctl * dctl,uint32_t c)3599 check_modifier(modstruct *m, int ctx, patctl *pctl, datctl *dctl, uint32_t c)
3600 {
3601 void *field = NULL;
3602 PCRE2_SIZE offset = m->offset;
3603
3604 if (restrict_for_perl_test) switch(m->which)
3605 {
3606 case MOD_PNDP:
3607 case MOD_PATP:
3608 case MOD_DATP:
3609 case MOD_PDP:
3610 break;
3611
3612 default:
3613 fprintf(outfile, "** '%s' is not allowed in a Perl-compatible test\n",
3614 m->name);
3615 return NULL;
3616 }
3617
3618 switch (m->which)
3619 {
3620 case MOD_CTC: /* Compile context modifier */
3621 if (ctx == CTX_DEFPAT) field = PTR(default_pat_context);
3622 else if (ctx == CTX_PAT) field = PTR(pat_context);
3623 break;
3624
3625 case MOD_CTM: /* Match context modifier */
3626 if (ctx == CTX_DEFDAT) field = PTR(default_dat_context);
3627 else if (ctx == CTX_DAT) field = PTR(dat_context);
3628 break;
3629
3630 case MOD_DAT: /* Data line modifier */
3631 case MOD_DATP: /* Allowed for Perl test */
3632 if (dctl != NULL) field = dctl;
3633 break;
3634
3635 case MOD_PAT: /* Pattern modifier */
3636 case MOD_PATP: /* Allowed for Perl test */
3637 if (pctl != NULL) field = pctl;
3638 break;
3639
3640 case MOD_PD: /* Pattern or data line modifier */
3641 case MOD_PDP: /* Ditto, allowed for Perl test */
3642 case MOD_PND: /* Ditto, but not default pattern */
3643 case MOD_PNDP: /* Ditto, allowed for Perl test */
3644 if (dctl != NULL) field = dctl;
3645 else if (pctl != NULL && (m->which == MOD_PD || m->which == MOD_PDP ||
3646 ctx != CTX_DEFPAT))
3647 field = pctl;
3648 break;
3649 }
3650
3651 if (field == NULL)
3652 {
3653 if (c == 0)
3654 fprintf(outfile, "** '%s' is not valid here\n", m->name);
3655 else
3656 fprintf(outfile, "** /%c is not valid here\n", c);
3657 return NULL;
3658 }
3659
3660 return (char *)field + offset;
3661 }
3662
3663
3664
3665 /*************************************************
3666 * Decode a modifier list *
3667 *************************************************/
3668
3669 /* A pointer to a control block is NULL when called in cases when that block is
3670 not relevant. They are never all relevant in one call. At least one of patctl
3671 and datctl is NULL. The second argument specifies which context to use for
3672 modifiers that apply to contexts.
3673
3674 Arguments:
3675 p point to modifier string
3676 ctx CTX_PAT => pattern context
3677 CTX_POPPAT => pattern context for popped pattern
3678 CTX_DEFPAT => default pattern context
3679 CTX_DAT => data context
3680 CTX_DEFDAT => default data context
3681 pctl point to pattern control block
3682 dctl point to data control block
3683
3684 Returns: TRUE if successful decode, FALSE otherwise
3685 */
3686
3687 static BOOL
decode_modifiers(uint8_t * p,int ctx,patctl * pctl,datctl * dctl)3688 decode_modifiers(uint8_t *p, int ctx, patctl *pctl, datctl *dctl)
3689 {
3690 uint8_t *ep, *pp;
3691 long li;
3692 unsigned long uli;
3693 BOOL first = TRUE;
3694
3695 for (;;)
3696 {
3697 void *field;
3698 modstruct *m;
3699 BOOL off = FALSE;
3700 unsigned int i, len;
3701 int index;
3702 char *endptr;
3703
3704 /* Skip white space and commas. */
3705
3706 while (isspace(*p) || *p == ',') p++;
3707 if (*p == 0) break;
3708
3709 /* Find the end of the item; lose trailing whitespace at end of line. */
3710
3711 for (ep = p; *ep != 0 && *ep != ','; ep++);
3712 if (*ep == 0)
3713 {
3714 while (ep > p && isspace(ep[-1])) ep--;
3715 *ep = 0;
3716 }
3717
3718 /* Remember if the first character is '-'. */
3719
3720 if (*p == '-')
3721 {
3722 off = TRUE;
3723 p++;
3724 }
3725
3726 /* Find the length of a full-length modifier name, and scan for it. */
3727
3728 pp = p;
3729 while (pp < ep && *pp != '=') pp++;
3730 index = scan_modifiers(p, pp - p);
3731
3732 /* If the first modifier is unrecognized, try to interpret it as a sequence
3733 of single-character abbreviated modifiers. None of these modifiers have any
3734 associated data. They just set options or control bits. */
3735
3736 if (index < 0)
3737 {
3738 uint32_t cc;
3739 uint8_t *mp = p;
3740
3741 if (!first)
3742 {
3743 fprintf(outfile, "** Unrecognized modifier '%.*s'\n", (int)(ep-p), p);
3744 if (ep - p == 1)
3745 fprintf(outfile, "** Single-character modifiers must come first\n");
3746 return FALSE;
3747 }
3748
3749 for (cc = *p; cc != ',' && cc != '\n' && cc != 0; cc = *(++p))
3750 {
3751 for (i = 0; i < C1MODLISTCOUNT; i++)
3752 if (cc == c1modlist[i].onechar) break;
3753
3754 if (i >= C1MODLISTCOUNT)
3755 {
3756 fprintf(outfile, "** Unrecognized modifier '%c' in '%.*s'\n",
3757 *p, (int)(ep-mp), mp);
3758 return FALSE;
3759 }
3760
3761 if (c1modlist[i].index >= 0)
3762 {
3763 index = c1modlist[i].index;
3764 }
3765
3766 else
3767 {
3768 index = scan_modifiers((uint8_t *)(c1modlist[i].fullname),
3769 strlen(c1modlist[i].fullname));
3770 if (index < 0)
3771 {
3772 fprintf(outfile, "** Internal error: single-character equivalent "
3773 "modifier '%s' not found\n", c1modlist[i].fullname);
3774 return FALSE;
3775 }
3776 c1modlist[i].index = index; /* Cache for next time */
3777 }
3778
3779 field = check_modifier(modlist + index, ctx, pctl, dctl, *p);
3780 if (field == NULL) return FALSE;
3781
3782 /* /x is a special case; a second appearance changes PCRE2_EXTENDED to
3783 PCRE2_EXTENDED_MORE. */
3784
3785 if (cc == 'x' && (*((uint32_t *)field) & PCRE2_EXTENDED) != 0)
3786 {
3787 *((uint32_t *)field) &= ~PCRE2_EXTENDED;
3788 *((uint32_t *)field) |= PCRE2_EXTENDED_MORE;
3789 }
3790 else
3791 *((uint32_t *)field) |= modlist[index].value;
3792 }
3793
3794 continue; /* With tne next (fullname) modifier */
3795 }
3796
3797 /* We have a match on a full-name modifier. Check for the existence of data
3798 when needed. */
3799
3800 m = modlist + index; /* Save typing */
3801 if (m->type != MOD_CTL && m->type != MOD_OPT &&
3802 (m->type != MOD_IND || *pp == '='))
3803 {
3804 if (*pp++ != '=')
3805 {
3806 fprintf(outfile, "** '=' expected after '%s'\n", m->name);
3807 return FALSE;
3808 }
3809 if (off)
3810 {
3811 fprintf(outfile, "** '-' is not valid for '%s'\n", m->name);
3812 return FALSE;
3813 }
3814 }
3815
3816 /* These on/off types have no data. */
3817
3818 else if (*pp != ',' && *pp != '\n' && *pp != ' ' && *pp != 0)
3819 {
3820 fprintf(outfile, "** Unrecognized modifier '%.*s'\n", (int)(ep-p), p);
3821 return FALSE;
3822 }
3823
3824 /* Set the data length for those types that have data. Then find the field
3825 that is to be set. If check_modifier() returns NULL, it has already output an
3826 error message. */
3827
3828 len = ep - pp;
3829 field = check_modifier(m, ctx, pctl, dctl, 0);
3830 if (field == NULL) return FALSE;
3831
3832 /* Process according to data type. */
3833
3834 switch (m->type)
3835 {
3836 case MOD_CTL:
3837 case MOD_OPT:
3838 if (off) *((uint32_t *)field) &= ~m->value;
3839 else *((uint32_t *)field) |= m->value;
3840 break;
3841
3842 case MOD_BSR:
3843 if (len == 7 && strncmpic(pp, (const uint8_t *)"default", 7) == 0)
3844 {
3845 #ifdef BSR_ANYCRLF
3846 *((uint16_t *)field) = PCRE2_BSR_ANYCRLF;
3847 #else
3848 *((uint16_t *)field) = PCRE2_BSR_UNICODE;
3849 #endif
3850 if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 &= ~CTL2_BSR_SET;
3851 else dctl->control2 &= ~CTL2_BSR_SET;
3852 }
3853 else
3854 {
3855 if (len == 7 && strncmpic(pp, (const uint8_t *)"anycrlf", 7) == 0)
3856 *((uint16_t *)field) = PCRE2_BSR_ANYCRLF;
3857 else if (len == 7 && strncmpic(pp, (const uint8_t *)"unicode", 7) == 0)
3858 *((uint16_t *)field) = PCRE2_BSR_UNICODE;
3859 else goto INVALID_VALUE;
3860 if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 |= CTL2_BSR_SET;
3861 else dctl->control2 |= CTL2_BSR_SET;
3862 }
3863 pp = ep;
3864 break;
3865
3866 case MOD_CHR: /* A single character */
3867 *((uint32_t *)field) = *pp++;
3868 break;
3869
3870 case MOD_CON: /* A convert type/options list */
3871 for (;; pp++)
3872 {
3873 uint8_t *colon = (uint8_t *)strchr((const char *)pp, ':');
3874 len = ((colon != NULL && colon < ep)? colon:ep) - pp;
3875 for (i = 0; i < convertlistcount; i++)
3876 {
3877 if (strncmpic(pp, (const uint8_t *)convertlist[i].name, len) == 0)
3878 {
3879 if (*((uint32_t *)field) == CONVERT_UNSET)
3880 *((uint32_t *)field) = convertlist[i].option;
3881 else
3882 *((uint32_t *)field) |= convertlist[i].option;
3883 break;
3884 }
3885 }
3886 if (i >= convertlistcount) goto INVALID_VALUE;
3887 pp += len;
3888 if (*pp != ':') break;
3889 }
3890 break;
3891
3892 case MOD_IN2: /* One or two unsigned integers */
3893 if (!isdigit(*pp)) goto INVALID_VALUE;
3894 uli = strtoul((const char *)pp, &endptr, 10);
3895 if (U32OVERFLOW(uli)) goto INVALID_VALUE;
3896 ((uint32_t *)field)[0] = (uint32_t)uli;
3897 if (*endptr == ':')
3898 {
3899 uli = strtoul((const char *)endptr+1, &endptr, 10);
3900 if (U32OVERFLOW(uli)) goto INVALID_VALUE;
3901 ((uint32_t *)field)[1] = (uint32_t)uli;
3902 }
3903 else ((uint32_t *)field)[1] = 0;
3904 pp = (uint8_t *)endptr;
3905 break;
3906
3907 /* PCRE2_SIZE_MAX is usually SIZE_MAX, which may be greater, equal to, or
3908 less than ULONG_MAX. So first test for overflowing the long int, and then
3909 test for overflowing PCRE2_SIZE_MAX if it is smaller than ULONG_MAX. */
3910
3911 case MOD_SIZ: /* PCRE2_SIZE value */
3912 if (!isdigit(*pp)) goto INVALID_VALUE;
3913 uli = strtoul((const char *)pp, &endptr, 10);
3914 if (uli == ULONG_MAX) goto INVALID_VALUE;
3915 #if ULONG_MAX > PCRE2_SIZE_MAX
3916 if (uli > PCRE2_SIZE_MAX) goto INVALID_VALUE;
3917 #endif
3918 *((PCRE2_SIZE *)field) = (PCRE2_SIZE)uli;
3919 pp = (uint8_t *)endptr;
3920 break;
3921
3922 case MOD_IND: /* Unsigned integer with default */
3923 if (len == 0)
3924 {
3925 *((uint32_t *)field) = (uint32_t)(m->value);
3926 break;
3927 }
3928 /* Fall through */
3929
3930 case MOD_INT: /* Unsigned integer */
3931 if (!isdigit(*pp)) goto INVALID_VALUE;
3932 uli = strtoul((const char *)pp, &endptr, 10);
3933 if (U32OVERFLOW(uli)) goto INVALID_VALUE;
3934 *((uint32_t *)field) = (uint32_t)uli;
3935 pp = (uint8_t *)endptr;
3936 break;
3937
3938 case MOD_INS: /* Signed integer */
3939 if (!isdigit(*pp) && *pp != '-') goto INVALID_VALUE;
3940 li = strtol((const char *)pp, &endptr, 10);
3941 if (S32OVERFLOW(li)) goto INVALID_VALUE;
3942 *((int32_t *)field) = (int32_t)li;
3943 pp = (uint8_t *)endptr;
3944 break;
3945
3946 case MOD_NL:
3947 for (i = 0; i < sizeof(newlines)/sizeof(char *); i++)
3948 if (len == strlen(newlines[i]) &&
3949 strncmpic(pp, (const uint8_t *)newlines[i], len) == 0) break;
3950 if (i >= sizeof(newlines)/sizeof(char *)) goto INVALID_VALUE;
3951 if (i == 0)
3952 {
3953 *((uint16_t *)field) = NEWLINE_DEFAULT;
3954 if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 &= ~CTL2_NL_SET;
3955 else dctl->control2 &= ~CTL2_NL_SET;
3956 }
3957 else
3958 {
3959 *((uint16_t *)field) = i;
3960 if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 |= CTL2_NL_SET;
3961 else dctl->control2 |= CTL2_NL_SET;
3962 }
3963 pp = ep;
3964 break;
3965
3966 case MOD_NN: /* Name or (signed) number; may be several */
3967 if (isdigit(*pp) || *pp == '-')
3968 {
3969 int ct = MAXCPYGET - 1;
3970 int32_t value;
3971 li = strtol((const char *)pp, &endptr, 10);
3972 if (S32OVERFLOW(li)) goto INVALID_VALUE;
3973 value = (int32_t)li;
3974 field = (char *)field - m->offset + m->value; /* Adjust field ptr */
3975 if (value >= 0) /* Add new number */
3976 {
3977 while (*((int32_t *)field) >= 0 && ct-- > 0) /* Skip previous */
3978 field = (char *)field + sizeof(int32_t);
3979 if (ct <= 0)
3980 {
3981 fprintf(outfile, "** Too many numeric '%s' modifiers\n", m->name);
3982 return FALSE;
3983 }
3984 }
3985 *((int32_t *)field) = value;
3986 if (ct > 0) ((int32_t *)field)[1] = -1;
3987 pp = (uint8_t *)endptr;
3988 }
3989
3990 /* Multiple strings are put end to end. */
3991
3992 else
3993 {
3994 char *nn = (char *)field;
3995 if (len > 0) /* Add new name */
3996 {
3997 if (len > MAX_NAME_SIZE)
3998 {
3999 fprintf(outfile, "** Group name in '%s' is too long\n", m->name);
4000 return FALSE;
4001 }
4002 while (*nn != 0) nn += strlen(nn) + 1;
4003 if (nn + len + 2 - (char *)field > LENCPYGET)
4004 {
4005 fprintf(outfile, "** Too many characters in named '%s' modifiers\n",
4006 m->name);
4007 return FALSE;
4008 }
4009 memcpy(nn, pp, len);
4010 }
4011 nn[len] = 0 ;
4012 nn[len+1] = 0;
4013 pp = ep;
4014 }
4015 break;
4016
4017 case MOD_STR:
4018 if (len + 1 > m->value)
4019 {
4020 fprintf(outfile, "** Overlong value for '%s' (max %d code units)\n",
4021 m->name, m->value - 1);
4022 return FALSE;
4023 }
4024 memcpy(field, pp, len);
4025 ((uint8_t *)field)[len] = 0;
4026 pp = ep;
4027 break;
4028 }
4029
4030 if (*pp != ',' && *pp != '\n' && *pp != ' ' && *pp != 0)
4031 {
4032 fprintf(outfile, "** Comma expected after modifier item '%s'\n", m->name);
4033 return FALSE;
4034 }
4035
4036 p = pp;
4037 first = FALSE;
4038
4039 if (ctx == CTX_POPPAT &&
4040 (pctl->options != 0 ||
4041 pctl->tables_id != 0 ||
4042 pctl->locale[0] != 0 ||
4043 (pctl->control & NOTPOP_CONTROLS) != 0))
4044 {
4045 fprintf(outfile, "** '%s' is not valid here\n", m->name);
4046 return FALSE;
4047 }
4048 }
4049
4050 return TRUE;
4051
4052 INVALID_VALUE:
4053 fprintf(outfile, "** Invalid value in '%.*s'\n", (int)(ep-p), p);
4054 return FALSE;
4055 }
4056
4057
4058 /*************************************************
4059 * Get info from a pattern *
4060 *************************************************/
4061
4062 /* A wrapped call to pcre2_pattern_info(), applied to the current compiled
4063 pattern.
4064
4065 Arguments:
4066 what code for the required information
4067 where where to put the answer
4068 unsetok PCRE2_ERROR_UNSET is an "expected" result
4069
4070 Returns: the return from pcre2_pattern_info()
4071 */
4072
4073 static int
pattern_info(int what,void * where,BOOL unsetok)4074 pattern_info(int what, void *where, BOOL unsetok)
4075 {
4076 int rc;
4077 PCRE2_PATTERN_INFO(rc, compiled_code, what, NULL); /* Exercise the code */
4078 PCRE2_PATTERN_INFO(rc, compiled_code, what, where);
4079 if (rc >= 0) return 0;
4080 if (rc != PCRE2_ERROR_UNSET || !unsetok)
4081 {
4082 fprintf(outfile, "Error %d from pcre2_pattern_info_%d(%d)\n", rc, test_mode,
4083 what);
4084 if (rc == PCRE2_ERROR_BADMODE)
4085 fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
4086 "%d-bit mode\n", test_mode,
4087 8 * (FLD(compiled_code, flags) & PCRE2_MODE_MASK));
4088 }
4089 return rc;
4090 }
4091
4092
4093
4094 #ifdef SUPPORT_PCRE2_8
4095 /*************************************************
4096 * Show something in a list *
4097 *************************************************/
4098
4099 /* This function just helps to keep the code that uses it tidier. It's used for
4100 various lists of things where there needs to be introductory text before the
4101 first item. As these calls are all in the POSIX-support code, they happen only
4102 when 8-bit mode is supported. */
4103
4104 static void
prmsg(const char ** msg,const char * s)4105 prmsg(const char **msg, const char *s)
4106 {
4107 fprintf(outfile, "%s %s", *msg, s);
4108 *msg = "";
4109 }
4110 #endif /* SUPPORT_PCRE2_8 */
4111
4112
4113
4114 /*************************************************
4115 * Show control bits *
4116 *************************************************/
4117
4118 /* Called for mutually exclusive controls and for unsupported POSIX controls.
4119 Because the bits are unique, this can be used for both pattern and data control
4120 words.
4121
4122 Arguments:
4123 controls control bits
4124 controls2 more control bits
4125 before text to print before
4126
4127 Returns: nothing
4128 */
4129
4130 static void
show_controls(uint32_t controls,uint32_t controls2,const char * before)4131 show_controls(uint32_t controls, uint32_t controls2, const char *before)
4132 {
4133 fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
4134 before,
4135 ((controls & CTL_AFTERTEXT) != 0)? " aftertext" : "",
4136 ((controls & CTL_ALLAFTERTEXT) != 0)? " allaftertext" : "",
4137 ((controls & CTL_ALLCAPTURES) != 0)? " allcaptures" : "",
4138 ((controls & CTL_ALLUSEDTEXT) != 0)? " allusedtext" : "",
4139 ((controls2 & CTL2_ALLVECTOR) != 0)? " allvector" : "",
4140 ((controls & CTL_ALTGLOBAL) != 0)? " altglobal" : "",
4141 ((controls & CTL_BINCODE) != 0)? " bincode" : "",
4142 ((controls2 & CTL2_BSR_SET) != 0)? " bsr" : "",
4143 ((controls & CTL_CALLOUT_CAPTURE) != 0)? " callout_capture" : "",
4144 ((controls2 & CTL2_CALLOUT_EXTRA) != 0)? " callout_extra" : "",
4145 ((controls & CTL_CALLOUT_INFO) != 0)? " callout_info" : "",
4146 ((controls & CTL_CALLOUT_NONE) != 0)? " callout_none" : "",
4147 ((controls2 & CTL2_CALLOUT_NO_WHERE) != 0)? " callout_no_where" : "",
4148 ((controls & CTL_DFA) != 0)? " dfa" : "",
4149 ((controls & CTL_EXPAND) != 0)? " expand" : "",
4150 ((controls & CTL_FINDLIMITS) != 0)? " find_limits" : "",
4151 ((controls & CTL_FINDLIMITS_NOHEAP) != 0)? " find_limits_noheap" : "",
4152 ((controls2 & CTL2_FRAMESIZE) != 0)? " framesize" : "",
4153 ((controls & CTL_FULLBINCODE) != 0)? " fullbincode" : "",
4154 ((controls & CTL_GETALL) != 0)? " getall" : "",
4155 ((controls & CTL_GLOBAL) != 0)? " global" : "",
4156 ((controls & CTL_HEXPAT) != 0)? " hex" : "",
4157 ((controls & CTL_INFO) != 0)? " info" : "",
4158 ((controls & CTL_JITFAST) != 0)? " jitfast" : "",
4159 ((controls & CTL_JITVERIFY) != 0)? " jitverify" : "",
4160 ((controls & CTL_MARK) != 0)? " mark" : "",
4161 ((controls & CTL_MEMORY) != 0)? " memory" : "",
4162 ((controls2 & CTL2_NL_SET) != 0)? " newline" : "",
4163 ((controls & CTL_NULLCONTEXT) != 0)? " null_context" : "",
4164 ((controls2 & CTL2_NULL_REPLACEMENT) != 0)? " null_replacement" : "",
4165 ((controls2 & CTL2_NULL_SUBJECT) != 0)? " null_subject" : "",
4166 ((controls & CTL_POSIX) != 0)? " posix" : "",
4167 ((controls & CTL_POSIX_NOSUB) != 0)? " posix_nosub" : "",
4168 ((controls & CTL_PUSH) != 0)? " push" : "",
4169 ((controls & CTL_PUSHCOPY) != 0)? " pushcopy" : "",
4170 ((controls & CTL_PUSHTABLESCOPY) != 0)? " pushtablescopy" : "",
4171 ((controls & CTL_STARTCHAR) != 0)? " startchar" : "",
4172 ((controls2 & CTL2_SUBSTITUTE_CALLOUT) != 0)? " substitute_callout" : "",
4173 ((controls2 & CTL2_SUBSTITUTE_EXTENDED) != 0)? " substitute_extended" : "",
4174 ((controls2 & CTL2_SUBSTITUTE_LITERAL) != 0)? " substitute_literal" : "",
4175 ((controls2 & CTL2_SUBSTITUTE_MATCHED) != 0)? " substitute_matched" : "",
4176 ((controls2 & CTL2_SUBSTITUTE_OVERFLOW_LENGTH) != 0)? " substitute_overflow_length" : "",
4177 ((controls2 & CTL2_SUBSTITUTE_REPLACEMENT_ONLY) != 0)? " substitute_replacement_only" : "",
4178 ((controls2 & CTL2_SUBSTITUTE_UNKNOWN_UNSET) != 0)? " substitute_unknown_unset" : "",
4179 ((controls2 & CTL2_SUBSTITUTE_UNSET_EMPTY) != 0)? " substitute_unset_empty" : "",
4180 ((controls & CTL_USE_LENGTH) != 0)? " use_length" : "",
4181 ((controls & CTL_UTF8_INPUT) != 0)? " utf8_input" : "",
4182 ((controls & CTL_ZERO_TERMINATE) != 0)? " zero_terminate" : "");
4183 }
4184
4185
4186
4187 /*************************************************
4188 * Show compile options *
4189 *************************************************/
4190
4191 /* Called from show_pattern_info() and for unsupported POSIX options.
4192
4193 Arguments:
4194 options an options word
4195 before text to print before
4196 after text to print after
4197
4198 Returns: nothing
4199 */
4200
4201 static void
show_compile_options(uint32_t options,const char * before,const char * after)4202 show_compile_options(uint32_t options, const char *before, const char *after)
4203 {
4204 if (options == 0) fprintf(outfile, "%s <none>%s", before, after);
4205 else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
4206 before,
4207 ((options & PCRE2_ALT_BSUX) != 0)? " alt_bsux" : "",
4208 ((options & PCRE2_ALT_CIRCUMFLEX) != 0)? " alt_circumflex" : "",
4209 ((options & PCRE2_ALT_VERBNAMES) != 0)? " alt_verbnames" : "",
4210 ((options & PCRE2_ALLOW_EMPTY_CLASS) != 0)? " allow_empty_class" : "",
4211 ((options & PCRE2_ANCHORED) != 0)? " anchored" : "",
4212 ((options & PCRE2_AUTO_CALLOUT) != 0)? " auto_callout" : "",
4213 ((options & PCRE2_CASELESS) != 0)? " caseless" : "",
4214 ((options & PCRE2_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
4215 ((options & PCRE2_DOTALL) != 0)? " dotall" : "",
4216 ((options & PCRE2_DUPNAMES) != 0)? " dupnames" : "",
4217 ((options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "",
4218 ((options & PCRE2_EXTENDED) != 0)? " extended" : "",
4219 ((options & PCRE2_EXTENDED_MORE) != 0)? " extended_more" : "",
4220 ((options & PCRE2_FIRSTLINE) != 0)? " firstline" : "",
4221 ((options & PCRE2_LITERAL) != 0)? " literal" : "",
4222 ((options & PCRE2_MATCH_INVALID_UTF) != 0)? " match_invalid_utf" : "",
4223 ((options & PCRE2_MATCH_UNSET_BACKREF) != 0)? " match_unset_backref" : "",
4224 ((options & PCRE2_MULTILINE) != 0)? " multiline" : "",
4225 ((options & PCRE2_NEVER_BACKSLASH_C) != 0)? " never_backslash_c" : "",
4226 ((options & PCRE2_NEVER_UCP) != 0)? " never_ucp" : "",
4227 ((options & PCRE2_NEVER_UTF) != 0)? " never_utf" : "",
4228 ((options & PCRE2_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
4229 ((options & PCRE2_NO_AUTO_POSSESS) != 0)? " no_auto_possess" : "",
4230 ((options & PCRE2_NO_DOTSTAR_ANCHOR) != 0)? " no_dotstar_anchor" : "",
4231 ((options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
4232 ((options & PCRE2_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
4233 ((options & PCRE2_UCP) != 0)? " ucp" : "",
4234 ((options & PCRE2_UNGREEDY) != 0)? " ungreedy" : "",
4235 ((options & PCRE2_USE_OFFSET_LIMIT) != 0)? " use_offset_limit" : "",
4236 ((options & PCRE2_UTF) != 0)? " utf" : "",
4237 after);
4238 }
4239
4240
4241 /*************************************************
4242 * Show compile extra options *
4243 *************************************************/
4244
4245 /* Called from show_pattern_info() and for unsupported POSIX options.
4246
4247 Arguments:
4248 options an options word
4249 before text to print before
4250 after text to print after
4251
4252 Returns: nothing
4253 */
4254
4255 static void
show_compile_extra_options(uint32_t options,const char * before,const char * after)4256 show_compile_extra_options(uint32_t options, const char *before,
4257 const char *after)
4258 {
4259 if (options == 0) fprintf(outfile, "%s <none>%s", before, after);
4260 else fprintf(outfile, "%s%s%s%s%s%s%s%s",
4261 before,
4262 ((options & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) != 0)? " allow_surrogate_escapes" : "",
4263 ((options & PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL) != 0)? " bad_escape_is_literal" : "",
4264 ((options & PCRE2_EXTRA_ALT_BSUX) != 0)? " extra_alt_bsux" : "",
4265 ((options & PCRE2_EXTRA_MATCH_WORD) != 0)? " match_word" : "",
4266 ((options & PCRE2_EXTRA_MATCH_LINE) != 0)? " match_line" : "",
4267 ((options & PCRE2_EXTRA_ESCAPED_CR_IS_LF) != 0)? " escaped_cr_is_lf" : "",
4268 after);
4269 }
4270
4271
4272
4273 #ifdef SUPPORT_PCRE2_8
4274 /*************************************************
4275 * Show match options *
4276 *************************************************/
4277
4278 /* Called for unsupported POSIX options. */
4279
4280 static void
show_match_options(uint32_t options)4281 show_match_options(uint32_t options)
4282 {
4283 fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s",
4284 ((options & PCRE2_ANCHORED) != 0)? " anchored" : "",
4285 ((options & PCRE2_COPY_MATCHED_SUBJECT) != 0)? " copy_matched_subject" : "",
4286 ((options & PCRE2_DFA_RESTART) != 0)? " dfa_restart" : "",
4287 ((options & PCRE2_DFA_SHORTEST) != 0)? " dfa_shortest" : "",
4288 ((options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "",
4289 ((options & PCRE2_NO_JIT) != 0)? " no_jit" : "",
4290 ((options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
4291 ((options & PCRE2_NOTBOL) != 0)? " notbol" : "",
4292 ((options & PCRE2_NOTEMPTY) != 0)? " notempty" : "",
4293 ((options & PCRE2_NOTEMPTY_ATSTART) != 0)? " notempty_atstart" : "",
4294 ((options & PCRE2_NOTEOL) != 0)? " noteol" : "",
4295 ((options & PCRE2_PARTIAL_HARD) != 0)? " partial_hard" : "",
4296 ((options & PCRE2_PARTIAL_SOFT) != 0)? " partial_soft" : "");
4297 }
4298 #endif /* SUPPORT_PCRE2_8 */
4299
4300
4301
4302 /*************************************************
4303 * Show memory usage info for a pattern *
4304 *************************************************/
4305
4306 static void
show_memory_info(void)4307 show_memory_info(void)
4308 {
4309 uint32_t name_count, name_entry_size;
4310 size_t size, cblock_size;
4311
4312 /* One of the test_mode values will always be true, but to stop a compiler
4313 warning we must initialize cblock_size. */
4314
4315 cblock_size = 0;
4316 #ifdef SUPPORT_PCRE2_8
4317 if (test_mode == PCRE8_MODE) cblock_size = sizeof(pcre2_real_code_8);
4318 #endif
4319 #ifdef SUPPORT_PCRE2_16
4320 if (test_mode == PCRE16_MODE) cblock_size = sizeof(pcre2_real_code_16);
4321 #endif
4322 #ifdef SUPPORT_PCRE2_32
4323 if (test_mode == PCRE32_MODE) cblock_size = sizeof(pcre2_real_code_32);
4324 #endif
4325
4326 (void)pattern_info(PCRE2_INFO_SIZE, &size, FALSE);
4327 (void)pattern_info(PCRE2_INFO_NAMECOUNT, &name_count, FALSE);
4328 (void)pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &name_entry_size, FALSE);
4329
4330 /* The uint32_t variables are cast before multiplying to stop code analyzers
4331 grumbling about potential overflow. */
4332
4333 fprintf(outfile, "Memory allocation (code space): %" SIZ_FORM "\n", size -
4334 (size_t)name_count * (size_t)name_entry_size * (size_t)code_unit_size -
4335 cblock_size);
4336
4337 if (pat_patctl.jit != 0)
4338 {
4339 (void)pattern_info(PCRE2_INFO_JITSIZE, &size, FALSE);
4340 fprintf(outfile, "Memory allocation (JIT code): %" SIZ_FORM "\n", size);
4341 }
4342 }
4343
4344
4345
4346 /*************************************************
4347 * Show frame size info for a pattern *
4348 *************************************************/
4349
4350 static void
show_framesize(void)4351 show_framesize(void)
4352 {
4353 size_t frame_size;
4354 (void)pattern_info(PCRE2_INFO_FRAMESIZE, &frame_size, FALSE);
4355 fprintf(outfile, "Frame size for pcre2_match(): %" SIZ_FORM "\n", frame_size);
4356 }
4357
4358
4359
4360 /*************************************************
4361 * Get and output an error message *
4362 *************************************************/
4363
4364 static BOOL
print_error_message(int errorcode,const char * before,const char * after)4365 print_error_message(int errorcode, const char *before, const char *after)
4366 {
4367 int len;
4368 PCRE2_GET_ERROR_MESSAGE(len, errorcode, pbuffer);
4369 if (len < 0)
4370 {
4371 fprintf(outfile, "\n** pcre2test internal error: cannot interpret error "
4372 "number\n** Unexpected return (%d) from pcre2_get_error_message()\n", len);
4373 }
4374 else
4375 {
4376 fprintf(outfile, "%s", before);
4377 PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, outfile);
4378 fprintf(outfile, "%s", after);
4379 }
4380 return len >= 0;
4381 }
4382
4383
4384 /*************************************************
4385 * Callback function for callout enumeration *
4386 *************************************************/
4387
4388 /* The only differences in the callout emumeration block for different code
4389 unit widths are that the pointers to the subject, the most recent MARK, and a
4390 callout argument string point to strings of the appropriate width. Casts can be
4391 used to deal with this.
4392
4393 Argument:
4394 cb pointer to enumerate block
4395 callout_data user data
4396
4397 Returns: 0
4398 */
4399
callout_callback(pcre2_callout_enumerate_block_8 * cb,void * callout_data)4400 static int callout_callback(pcre2_callout_enumerate_block_8 *cb,
4401 void *callout_data)
4402 {
4403 uint32_t i;
4404 BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
4405
4406 (void)callout_data; /* Not currently displayed */
4407
4408 fprintf(outfile, "Callout ");
4409 if (cb->callout_string != NULL)
4410 {
4411 uint32_t delimiter = CODE_UNIT(cb->callout_string, -1);
4412 fprintf(outfile, "%c", delimiter);
4413 PCHARSV(cb->callout_string, 0,
4414 cb->callout_string_length, utf, outfile);
4415 for (i = 0; callout_start_delims[i] != 0; i++)
4416 if (delimiter == callout_start_delims[i])
4417 {
4418 delimiter = callout_end_delims[i];
4419 break;
4420 }
4421 fprintf(outfile, "%c ", delimiter);
4422 }
4423 else fprintf(outfile, "%d ", cb->callout_number);
4424
4425 fprintf(outfile, "%.*s\n",
4426 (int)((cb->next_item_length == 0)? 1 : cb->next_item_length),
4427 pbuffer8 + cb->pattern_position);
4428
4429 return 0;
4430 }
4431
4432
4433
4434 /*************************************************
4435 * Show information about a pattern *
4436 *************************************************/
4437
4438 /* This function is called after a pattern has been compiled if any of the
4439 information-requesting controls have been set.
4440
4441 Arguments: none
4442
4443 Returns: PR_OK continue processing next line
4444 PR_SKIP skip to a blank line
4445 PR_ABEND abort the pcre2test run
4446 */
4447
4448 static int
show_pattern_info(void)4449 show_pattern_info(void)
4450 {
4451 uint32_t compile_options, overall_options, extra_options;
4452 BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
4453
4454 if ((pat_patctl.control & (CTL_BINCODE|CTL_FULLBINCODE)) != 0)
4455 {
4456 fprintf(outfile, "------------------------------------------------------------------\n");
4457 PCRE2_PRINTINT((pat_patctl.control & CTL_FULLBINCODE) != 0);
4458 }
4459
4460 if ((pat_patctl.control & CTL_INFO) != 0)
4461 {
4462 int rc;
4463 void *nametable;
4464 uint8_t *start_bits;
4465 BOOL heap_limit_set, match_limit_set, depth_limit_set;
4466 uint32_t backrefmax, bsr_convention, capture_count, first_ctype, first_cunit,
4467 hasbackslashc, hascrorlf, jchanged, last_ctype, last_cunit, match_empty,
4468 depth_limit, heap_limit, match_limit, minlength, nameentrysize, namecount,
4469 newline_convention;
4470
4471 /* Exercise the error route. */
4472
4473 PCRE2_PATTERN_INFO(rc, compiled_code, 999, NULL);
4474 (void)rc;
4475
4476 /* These info requests may return PCRE2_ERROR_UNSET. */
4477
4478 switch(pattern_info(PCRE2_INFO_HEAPLIMIT, &heap_limit, TRUE))
4479 {
4480 case 0:
4481 heap_limit_set = TRUE;
4482 break;
4483
4484 case PCRE2_ERROR_UNSET:
4485 heap_limit_set = FALSE;
4486 break;
4487
4488 default:
4489 return PR_ABEND;
4490 }
4491
4492 switch(pattern_info(PCRE2_INFO_MATCHLIMIT, &match_limit, TRUE))
4493 {
4494 case 0:
4495 match_limit_set = TRUE;
4496 break;
4497
4498 case PCRE2_ERROR_UNSET:
4499 match_limit_set = FALSE;
4500 break;
4501
4502 default:
4503 return PR_ABEND;
4504 }
4505
4506 switch(pattern_info(PCRE2_INFO_DEPTHLIMIT, &depth_limit, TRUE))
4507 {
4508 case 0:
4509 depth_limit_set = TRUE;
4510 break;
4511
4512 case PCRE2_ERROR_UNSET:
4513 depth_limit_set = FALSE;
4514 break;
4515
4516 default:
4517 return PR_ABEND;
4518 }
4519
4520 /* These info requests should always succeed. */
4521
4522 if (pattern_info(PCRE2_INFO_BACKREFMAX, &backrefmax, FALSE) +
4523 pattern_info(PCRE2_INFO_BSR, &bsr_convention, FALSE) +
4524 pattern_info(PCRE2_INFO_CAPTURECOUNT, &capture_count, FALSE) +
4525 pattern_info(PCRE2_INFO_FIRSTBITMAP, &start_bits, FALSE) +
4526 pattern_info(PCRE2_INFO_FIRSTCODEUNIT, &first_cunit, FALSE) +
4527 pattern_info(PCRE2_INFO_FIRSTCODETYPE, &first_ctype, FALSE) +
4528 pattern_info(PCRE2_INFO_HASBACKSLASHC, &hasbackslashc, FALSE) +
4529 pattern_info(PCRE2_INFO_HASCRORLF, &hascrorlf, FALSE) +
4530 pattern_info(PCRE2_INFO_JCHANGED, &jchanged, FALSE) +
4531 pattern_info(PCRE2_INFO_LASTCODEUNIT, &last_cunit, FALSE) +
4532 pattern_info(PCRE2_INFO_LASTCODETYPE, &last_ctype, FALSE) +
4533 pattern_info(PCRE2_INFO_MATCHEMPTY, &match_empty, FALSE) +
4534 pattern_info(PCRE2_INFO_MINLENGTH, &minlength, FALSE) +
4535 pattern_info(PCRE2_INFO_NAMECOUNT, &namecount, FALSE) +
4536 pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &nameentrysize, FALSE) +
4537 pattern_info(PCRE2_INFO_NAMETABLE, &nametable, FALSE) +
4538 pattern_info(PCRE2_INFO_NEWLINE, &newline_convention, FALSE)
4539 != 0)
4540 return PR_ABEND;
4541
4542 fprintf(outfile, "Capture group count = %d\n", capture_count);
4543
4544 if (backrefmax > 0)
4545 fprintf(outfile, "Max back reference = %d\n", backrefmax);
4546
4547 if (maxlookbehind > 0)
4548 fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
4549
4550 if (heap_limit_set)
4551 fprintf(outfile, "Heap limit = %u\n", heap_limit);
4552
4553 if (match_limit_set)
4554 fprintf(outfile, "Match limit = %u\n", match_limit);
4555
4556 if (depth_limit_set)
4557 fprintf(outfile, "Depth limit = %u\n", depth_limit);
4558
4559 if (namecount > 0)
4560 {
4561 fprintf(outfile, "Named capture groups:\n");
4562 for (; namecount > 0; namecount--)
4563 {
4564 int imm2_size = test_mode == PCRE8_MODE ? 2 : 1;
4565 uint32_t length = (uint32_t)STRLEN(nametable + imm2_size);
4566 fprintf(outfile, " ");
4567
4568 /* In UTF mode the name may be a UTF string containing non-ASCII
4569 letters and digits. We must output it as a UTF-8 string. In non-UTF mode,
4570 use the normal string printing functions, which use escapes for all
4571 non-ASCII characters. */
4572
4573 if (utf)
4574 {
4575 #ifdef SUPPORT_PCRE2_32
4576 if (test_mode == PCRE32_MODE)
4577 {
4578 PCRE2_SPTR32 nameptr = (PCRE2_SPTR32)nametable + imm2_size;
4579 while (*nameptr != 0)
4580 {
4581 uint8_t u8buff[6];
4582 int len = ord2utf8(*nameptr++, u8buff);
4583 fprintf(outfile, "%.*s", len, u8buff);
4584 }
4585 }
4586 #endif
4587 #ifdef SUPPORT_PCRE2_16
4588 if (test_mode == PCRE16_MODE)
4589 {
4590 PCRE2_SPTR16 nameptr = (PCRE2_SPTR16)nametable + imm2_size;
4591 while (*nameptr != 0)
4592 {
4593 int len;
4594 uint8_t u8buff[6];
4595 uint32_t c = *nameptr++ & 0xffff;
4596 if (c >= 0xD800 && c < 0xDC00)
4597 c = ((c & 0x3ff) << 10) + (*nameptr++ & 0x3ff) + 0x10000;
4598 len = ord2utf8(c, u8buff);
4599 fprintf(outfile, "%.*s", len, u8buff);
4600 }
4601 }
4602 #endif
4603 #ifdef SUPPORT_PCRE2_8
4604 if (test_mode == PCRE8_MODE)
4605 fprintf(outfile, "%s", (PCRE2_SPTR8)nametable + imm2_size);
4606 #endif
4607 }
4608 else /* Not UTF mode */
4609 {
4610 PCHARSV(nametable, imm2_size, length, FALSE, outfile);
4611 }
4612
4613 while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
4614
4615 #ifdef SUPPORT_PCRE2_32
4616 if (test_mode == PCRE32_MODE)
4617 fprintf(outfile, "%3d\n", (int)(((PCRE2_SPTR32)nametable)[0]));
4618 #endif
4619 #ifdef SUPPORT_PCRE2_16
4620 if (test_mode == PCRE16_MODE)
4621 fprintf(outfile, "%3d\n", (int)(((PCRE2_SPTR16)nametable)[0]));
4622 #endif
4623 #ifdef SUPPORT_PCRE2_8
4624 if (test_mode == PCRE8_MODE)
4625 fprintf(outfile, "%3d\n", (int)(
4626 ((((PCRE2_SPTR8)nametable)[0]) << 8) | ((PCRE2_SPTR8)nametable)[1]));
4627 #endif
4628
4629 nametable = (void*)((PCRE2_SPTR8)nametable + nameentrysize * code_unit_size);
4630 }
4631 }
4632
4633 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
4634 if (hasbackslashc) fprintf(outfile, "Contains \\C\n");
4635 if (match_empty) fprintf(outfile, "May match empty string\n");
4636
4637 pattern_info(PCRE2_INFO_ARGOPTIONS, &compile_options, FALSE);
4638 pattern_info(PCRE2_INFO_ALLOPTIONS, &overall_options, FALSE);
4639 pattern_info(PCRE2_INFO_EXTRAOPTIONS, &extra_options, FALSE);
4640
4641 /* Remove UTF/UCP if they were there only because of forbid_utf. This saves
4642 cluttering up the verification output of non-UTF test files. */
4643
4644 if ((pat_patctl.options & PCRE2_NEVER_UTF) == 0)
4645 {
4646 compile_options &= ~PCRE2_NEVER_UTF;
4647 overall_options &= ~PCRE2_NEVER_UTF;
4648 }
4649
4650 if ((pat_patctl.options & PCRE2_NEVER_UCP) == 0)
4651 {
4652 compile_options &= ~PCRE2_NEVER_UCP;
4653 overall_options &= ~PCRE2_NEVER_UCP;
4654 }
4655
4656 if ((compile_options|overall_options) != 0)
4657 {
4658 if (compile_options == overall_options)
4659 show_compile_options(compile_options, "Options:", "\n");
4660 else
4661 {
4662 show_compile_options(compile_options, "Compile options:", "\n");
4663 show_compile_options(overall_options, "Overall options:", "\n");
4664 }
4665 }
4666
4667 if (extra_options != 0)
4668 show_compile_extra_options(extra_options, "Extra options:", "\n");
4669
4670 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
4671
4672 if ((pat_patctl.control2 & CTL2_BSR_SET) != 0 ||
4673 (FLD(compiled_code, flags) & PCRE2_BSR_SET) != 0)
4674 fprintf(outfile, "\\R matches %s\n", (bsr_convention == PCRE2_BSR_UNICODE)?
4675 "any Unicode newline" : "CR, LF, or CRLF");
4676
4677 if ((FLD(compiled_code, flags) & PCRE2_NL_SET) != 0)
4678 {
4679 switch (newline_convention)
4680 {
4681 case PCRE2_NEWLINE_CR:
4682 fprintf(outfile, "Forced newline is CR\n");
4683 break;
4684
4685 case PCRE2_NEWLINE_LF:
4686 fprintf(outfile, "Forced newline is LF\n");
4687 break;
4688
4689 case PCRE2_NEWLINE_CRLF:
4690 fprintf(outfile, "Forced newline is CRLF\n");
4691 break;
4692
4693 case PCRE2_NEWLINE_ANYCRLF:
4694 fprintf(outfile, "Forced newline is CR, LF, or CRLF\n");
4695 break;
4696
4697 case PCRE2_NEWLINE_ANY:
4698 fprintf(outfile, "Forced newline is any Unicode newline\n");
4699 break;
4700
4701 case PCRE2_NEWLINE_NUL:
4702 fprintf(outfile, "Forced newline is NUL\n");
4703 break;
4704
4705 default:
4706 break;
4707 }
4708 }
4709
4710 if (first_ctype == 2)
4711 {
4712 fprintf(outfile, "First code unit at start or follows newline\n");
4713 }
4714 else if (first_ctype == 1)
4715 {
4716 const char *caseless =
4717 ((FLD(compiled_code, flags) & PCRE2_FIRSTCASELESS) == 0)?
4718 "" : " (caseless)";
4719 if (PRINTOK(first_cunit))
4720 fprintf(outfile, "First code unit = \'%c\'%s\n", first_cunit, caseless);
4721 else
4722 {
4723 fprintf(outfile, "First code unit = ");
4724 pchar(first_cunit, FALSE, outfile);
4725 fprintf(outfile, "%s\n", caseless);
4726 }
4727 }
4728 else if (start_bits != NULL)
4729 {
4730 int i;
4731 int c = 24;
4732 fprintf(outfile, "Starting code units: ");
4733 for (i = 0; i < 256; i++)
4734 {
4735 if ((start_bits[i/8] & (1u << (i&7))) != 0)
4736 {
4737 if (c > 75)
4738 {
4739 fprintf(outfile, "\n ");
4740 c = 2;
4741 }
4742 if (PRINTOK(i) && i != ' ')
4743 {
4744 fprintf(outfile, "%c ", i);
4745 c += 2;
4746 }
4747 else
4748 {
4749 fprintf(outfile, "\\x%02x ", i);
4750 c += 5;
4751 }
4752 }
4753 }
4754 fprintf(outfile, "\n");
4755 }
4756
4757 if (last_ctype != 0)
4758 {
4759 const char *caseless =
4760 ((FLD(compiled_code, flags) & PCRE2_LASTCASELESS) == 0)?
4761 "" : " (caseless)";
4762 if (PRINTOK(last_cunit))
4763 fprintf(outfile, "Last code unit = \'%c\'%s\n", last_cunit, caseless);
4764 else
4765 {
4766 fprintf(outfile, "Last code unit = ");
4767 pchar(last_cunit, FALSE, outfile);
4768 fprintf(outfile, "%s\n", caseless);
4769 }
4770 }
4771
4772 if ((FLD(compiled_code, overall_options) & PCRE2_NO_START_OPTIMIZE) == 0)
4773 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
4774
4775 if (pat_patctl.jit != 0 && (pat_patctl.control & CTL_JITVERIFY) != 0)
4776 {
4777 #ifdef SUPPORT_JIT
4778 if (FLD(compiled_code, executable_jit) != NULL)
4779 fprintf(outfile, "JIT compilation was successful\n");
4780 else
4781 {
4782 fprintf(outfile, "JIT compilation was not successful");
4783 if (jitrc != 0 && !print_error_message(jitrc, " (", ")"))
4784 return PR_ABEND;
4785 fprintf(outfile, "\n");
4786 }
4787 #else
4788 fprintf(outfile, "JIT support is not available in this version of PCRE2\n");
4789 #endif
4790 }
4791 }
4792
4793 if ((pat_patctl.control & CTL_CALLOUT_INFO) != 0)
4794 {
4795 int errorcode;
4796 PCRE2_CALLOUT_ENUMERATE(errorcode, callout_callback, 0);
4797 if (errorcode != 0)
4798 {
4799 fprintf(outfile, "Callout enumerate failed: error %d: ", errorcode);
4800 if (errorcode < 0 && !print_error_message(errorcode, "", "\n"))
4801 return PR_ABEND;
4802 return PR_SKIP;
4803 }
4804 }
4805
4806 return PR_OK;
4807 }
4808
4809
4810
4811 /*************************************************
4812 * Handle serialization error *
4813 *************************************************/
4814
4815 /* Print an error message after a serialization failure.
4816
4817 Arguments:
4818 rc the error code
4819 msg an initial message for what failed
4820
4821 Returns: FALSE if print_error_message() fails
4822 */
4823
4824 static BOOL
serial_error(int rc,const char * msg)4825 serial_error(int rc, const char *msg)
4826 {
4827 fprintf(outfile, "%s failed: error %d: ", msg, rc);
4828 return print_error_message(rc, "", "\n");
4829 }
4830
4831
4832
4833 /*************************************************
4834 * Open file for save/load commands *
4835 *************************************************/
4836
4837 /* This function decodes the file name and opens the file.
4838
4839 Arguments:
4840 buffptr point after the #command
4841 mode open mode
4842 fptr points to the FILE variable
4843 name name of # command
4844
4845 Returns: PR_OK or PR_ABEND
4846 */
4847
4848 static int
open_file(uint8_t * buffptr,const char * mode,FILE ** fptr,const char * name)4849 open_file(uint8_t *buffptr, const char *mode, FILE **fptr, const char *name)
4850 {
4851 char *endf;
4852 char *filename = (char *)buffptr;
4853 while (isspace(*filename)) filename++;
4854 endf = filename + strlen8(filename);
4855 while (endf > filename && isspace(endf[-1])) endf--;
4856
4857 if (endf == filename)
4858 {
4859 fprintf(outfile, "** File name expected after %s\n", name);
4860 return PR_ABEND;
4861 }
4862
4863 *endf = 0;
4864 *fptr = fopen((const char *)filename, mode);
4865 if (*fptr == NULL)
4866 {
4867 fprintf(outfile, "** Failed to open '%s': %s\n", filename, strerror(errno));
4868 return PR_ABEND;
4869 }
4870
4871 return PR_OK;
4872 }
4873
4874
4875
4876 /*************************************************
4877 * Process command line *
4878 *************************************************/
4879
4880 /* This function is called for lines beginning with # and a character that is
4881 not ! or whitespace, when encountered between tests, which means that there is
4882 no compiled pattern (compiled_code is NULL). The line is in buffer.
4883
4884 Arguments: none
4885
4886 Returns: PR_OK continue processing next line
4887 PR_SKIP skip to a blank line
4888 PR_ABEND abort the pcre2test run
4889 */
4890
4891 static int
process_command(void)4892 process_command(void)
4893 {
4894 FILE *f;
4895 PCRE2_SIZE serial_size;
4896 size_t i;
4897 int rc, cmd, cmdlen, yield;
4898 uint16_t first_listed_newline;
4899 const char *cmdname;
4900 uint8_t *argptr, *serial;
4901
4902 yield = PR_OK;
4903 cmd = CMD_UNKNOWN;
4904 cmdlen = 0;
4905
4906 for (i = 0; i < cmdlistcount; i++)
4907 {
4908 cmdname = cmdlist[i].name;
4909 cmdlen = strlen(cmdname);
4910 if (strncmp((char *)(buffer+1), cmdname, cmdlen) == 0 &&
4911 isspace(buffer[cmdlen+1]))
4912 {
4913 cmd = cmdlist[i].value;
4914 break;
4915 }
4916 }
4917
4918 argptr = buffer + cmdlen + 1;
4919
4920 if (restrict_for_perl_test && cmd != CMD_PATTERN && cmd != CMD_SUBJECT)
4921 {
4922 fprintf(outfile, "** #%s is not allowed after #perltest\n", cmdname);
4923 return PR_ABEND;
4924 }
4925
4926 switch(cmd)
4927 {
4928 case CMD_UNKNOWN:
4929 fprintf(outfile, "** Unknown command: %s", buffer);
4930 break;
4931
4932 case CMD_FORBID_UTF:
4933 forbid_utf = PCRE2_NEVER_UTF|PCRE2_NEVER_UCP;
4934 break;
4935
4936 case CMD_PERLTEST:
4937 restrict_for_perl_test = TRUE;
4938 break;
4939
4940 /* Set default pattern modifiers */
4941
4942 case CMD_PATTERN:
4943 (void)decode_modifiers(argptr, CTX_DEFPAT, &def_patctl, NULL);
4944 if (def_patctl.jit == 0 && (def_patctl.control & CTL_JITVERIFY) != 0)
4945 def_patctl.jit = JIT_DEFAULT;
4946 break;
4947
4948 /* Set default subject modifiers */
4949
4950 case CMD_SUBJECT:
4951 (void)decode_modifiers(argptr, CTX_DEFDAT, NULL, &def_datctl);
4952 break;
4953
4954 /* Check the default newline, and if not one of those listed, set up the
4955 first one to be forced. An empty list unsets. */
4956
4957 case CMD_NEWLINE_DEFAULT:
4958 local_newline_default = 0; /* Unset */
4959 first_listed_newline = 0;
4960 for (;;)
4961 {
4962 while (isspace(*argptr)) argptr++;
4963 if (*argptr == 0) break;
4964 for (i = 1; i < sizeof(newlines)/sizeof(char *); i++)
4965 {
4966 size_t nlen = strlen(newlines[i]);
4967 if (strncmpic(argptr, (const uint8_t *)newlines[i], nlen) == 0 &&
4968 isspace(argptr[nlen]))
4969 {
4970 if (i == NEWLINE_DEFAULT) return PR_OK; /* Default is valid */
4971 if (first_listed_newline == 0) first_listed_newline = i;
4972 }
4973 }
4974 while (*argptr != 0 && !isspace(*argptr)) argptr++;
4975 }
4976 local_newline_default = first_listed_newline;
4977 break;
4978
4979 /* Pop or copy a compiled pattern off the stack. Modifiers that do not affect
4980 the compiled pattern (e.g. to give information) are permitted. The default
4981 pattern modifiers are ignored. */
4982
4983 case CMD_POP:
4984 case CMD_POPCOPY:
4985 if (patstacknext <= 0)
4986 {
4987 fprintf(outfile, "** Can't pop off an empty stack\n");
4988 return PR_SKIP;
4989 }
4990 memset(&pat_patctl, 0, sizeof(patctl)); /* Completely unset */
4991 if (!decode_modifiers(argptr, CTX_POPPAT, &pat_patctl, NULL))
4992 return PR_SKIP;
4993
4994 if (cmd == CMD_POP)
4995 {
4996 SET(compiled_code, patstack[--patstacknext]);
4997 }
4998 else
4999 {
5000 PCRE2_CODE_COPY_FROM_VOID(compiled_code, patstack[patstacknext - 1]);
5001 }
5002
5003 if (pat_patctl.jit != 0)
5004 {
5005 PCRE2_JIT_COMPILE(jitrc, compiled_code, pat_patctl.jit);
5006 }
5007 if ((pat_patctl.control & CTL_MEMORY) != 0) show_memory_info();
5008 if ((pat_patctl.control2 & CTL2_FRAMESIZE) != 0) show_framesize();
5009 if ((pat_patctl.control & CTL_ANYINFO) != 0)
5010 {
5011 rc = show_pattern_info();
5012 if (rc != PR_OK) return rc;
5013 }
5014 break;
5015
5016 /* Save the stack of compiled patterns to a file, then empty the stack. */
5017
5018 case CMD_SAVE:
5019 if (patstacknext <= 0)
5020 {
5021 fprintf(outfile, "** No stacked patterns to save\n");
5022 return PR_OK;
5023 }
5024
5025 rc = open_file(argptr+1, BINARY_OUTPUT_MODE, &f, "#save");
5026 if (rc != PR_OK) return rc;
5027
5028 PCRE2_SERIALIZE_ENCODE(rc, patstack, patstacknext, &serial, &serial_size,
5029 general_context);
5030 if (rc < 0)
5031 {
5032 fclose(f);
5033 if (!serial_error(rc, "Serialization")) return PR_ABEND;
5034 break;
5035 }
5036
5037 /* Write the length at the start of the file to make it straightforward to
5038 get the right memory when re-loading. This saves having to read the file size
5039 in different operating systems. To allow for different endianness (even
5040 though reloading with the opposite endianness does not work), write the
5041 length byte-by-byte. */
5042
5043 for (i = 0; i < 4; i++) fputc((serial_size >> (i*8)) & 255, f);
5044 if (fwrite(serial, 1, serial_size, f) != serial_size)
5045 {
5046 fprintf(outfile, "** Wrong return from fwrite()\n");
5047 fclose(f);
5048 return PR_ABEND;
5049 }
5050
5051 fclose(f);
5052 PCRE2_SERIALIZE_FREE(serial);
5053 while(patstacknext > 0)
5054 {
5055 SET(compiled_code, patstack[--patstacknext]);
5056 SUB1(pcre2_code_free, compiled_code);
5057 }
5058 SET(compiled_code, NULL);
5059 break;
5060
5061 /* Load a set of compiled patterns from a file onto the stack */
5062
5063 case CMD_LOAD:
5064 rc = open_file(argptr+1, BINARY_INPUT_MODE, &f, "#load");
5065 if (rc != PR_OK) return rc;
5066
5067 serial_size = 0;
5068 for (i = 0; i < 4; i++) serial_size |= fgetc(f) << (i*8);
5069
5070 serial = malloc(serial_size);
5071 if (serial == NULL)
5072 {
5073 fprintf(outfile, "** Failed to get memory (size %" SIZ_FORM ") for #load\n",
5074 serial_size);
5075 fclose(f);
5076 return PR_ABEND;
5077 }
5078
5079 i = fread(serial, 1, serial_size, f);
5080 fclose(f);
5081
5082 if (i != serial_size)
5083 {
5084 fprintf(outfile, "** Wrong return from fread()\n");
5085 yield = PR_ABEND;
5086 }
5087 else
5088 {
5089 PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(rc, serial);
5090 if (rc < 0)
5091 {
5092 if (!serial_error(rc, "Get number of codes")) yield = PR_ABEND;
5093 }
5094 else
5095 {
5096 if (rc + patstacknext > PATSTACKSIZE)
5097 {
5098 fprintf(outfile, "** Not enough space on pattern stack for %d pattern%s\n",
5099 rc, (rc == 1)? "" : "s");
5100 rc = PATSTACKSIZE - patstacknext;
5101 fprintf(outfile, "** Decoding %d pattern%s\n", rc,
5102 (rc == 1)? "" : "s");
5103 }
5104 PCRE2_SERIALIZE_DECODE(rc, patstack + patstacknext, rc, serial,
5105 general_context);
5106 if (rc < 0)
5107 {
5108 if (!serial_error(rc, "Deserialization")) yield = PR_ABEND;
5109 }
5110 else patstacknext += rc;
5111 }
5112 }
5113
5114 free(serial);
5115 break;
5116
5117 /* Load a set of binary tables into tables3. */
5118
5119 case CMD_LOADTABLES:
5120 rc = open_file(argptr+1, BINARY_INPUT_MODE, &f, "#loadtables");
5121 if (rc != PR_OK) return rc;
5122
5123 if (tables3 == NULL)
5124 {
5125 (void)PCRE2_CONFIG(PCRE2_CONFIG_TABLES_LENGTH, &loadtables_length);
5126 tables3 = malloc(loadtables_length);
5127 }
5128
5129 if (tables3 == NULL)
5130 {
5131 fprintf(outfile, "** Failed: malloc failed for #loadtables\n");
5132 yield = PR_ABEND;
5133 }
5134 else if (fread(tables3, 1, loadtables_length, f) != loadtables_length)
5135 {
5136 fprintf(outfile, "** Wrong return from fread()\n");
5137 yield = PR_ABEND;
5138 }
5139
5140 fclose(f);
5141 break;
5142 }
5143
5144 return yield;
5145 }
5146
5147
5148
5149 /*************************************************
5150 * Process pattern line *
5151 *************************************************/
5152
5153 /* This function is called when the input buffer contains the start of a
5154 pattern. The first character is known to be a valid delimiter. The pattern is
5155 read, modifiers are interpreted, and a suitable local context is set up for
5156 this test. The pattern is then compiled.
5157
5158 Arguments: none
5159
5160 Returns: PR_OK continue processing next line
5161 PR_SKIP skip to a blank line
5162 PR_ABEND abort the pcre2test run
5163 */
5164
5165 static int
process_pattern(void)5166 process_pattern(void)
5167 {
5168 BOOL utf;
5169 uint32_t k;
5170 uint8_t *p = buffer;
5171 unsigned int delimiter = *p++;
5172 int errorcode;
5173 void *use_pat_context;
5174 uint32_t use_forbid_utf = forbid_utf;
5175 PCRE2_SIZE patlen;
5176 PCRE2_SIZE valgrind_access_length;
5177 PCRE2_SIZE erroroffset;
5178
5179 /* The perltest.sh script supports only / as a delimiter. */
5180
5181 if (restrict_for_perl_test && delimiter != '/')
5182 {
5183 fprintf(outfile, "** The only allowed delimiter after #perltest is '/'\n");
5184 return PR_ABEND;
5185 }
5186
5187 /* Initialize the context and pattern/data controls for this test from the
5188 defaults. */
5189
5190 PATCTXCPY(pat_context, default_pat_context);
5191 memcpy(&pat_patctl, &def_patctl, sizeof(patctl));
5192
5193 /* Find the end of the pattern, reading more lines if necessary. */
5194
5195 for(;;)
5196 {
5197 while (*p != 0)
5198 {
5199 if (*p == '\\' && p[1] != 0) p++;
5200 else if (*p == delimiter) break;
5201 p++;
5202 }
5203 if (*p != 0) break;
5204 if ((p = extend_inputline(infile, p, " > ")) == NULL)
5205 {
5206 fprintf(outfile, "** Unexpected EOF\n");
5207 return PR_ABEND;
5208 }
5209 if (!INTERACTIVE(infile)) fprintf(outfile, "%s", (char *)p);
5210 }
5211
5212 /* If the first character after the delimiter is backslash, make the pattern
5213 end with backslash. This is purely to provide a way of testing for the error
5214 message when a pattern ends with backslash. */
5215
5216 if (p[1] == '\\') *p++ = '\\';
5217
5218 /* Terminate the pattern at the delimiter, and compute the length. */
5219
5220 *p++ = 0;
5221 patlen = p - buffer - 2;
5222
5223 /* Look for modifiers and options after the final delimiter. */
5224
5225 if (!decode_modifiers(p, CTX_PAT, &pat_patctl, NULL)) return PR_SKIP;
5226
5227 /* Note that the match_invalid_utf option also sets utf when passed to
5228 pcre2_compile(). */
5229
5230 utf = (pat_patctl.options & (PCRE2_UTF|PCRE2_MATCH_INVALID_UTF)) != 0;
5231
5232 /* The utf8_input modifier is not allowed in 8-bit mode, and is mutually
5233 exclusive with the utf modifier. */
5234
5235 if ((pat_patctl.control & CTL_UTF8_INPUT) != 0)
5236 {
5237 if (test_mode == PCRE8_MODE)
5238 {
5239 fprintf(outfile, "** The utf8_input modifier is not allowed in 8-bit mode\n");
5240 return PR_SKIP;
5241 }
5242 if (utf)
5243 {
5244 fprintf(outfile, "** The utf and utf8_input modifiers are mutually exclusive\n");
5245 return PR_SKIP;
5246 }
5247 }
5248
5249 /* The convert and posix modifiers are mutually exclusive. */
5250
5251 if (pat_patctl.convert_type != CONVERT_UNSET &&
5252 (pat_patctl.control & CTL_POSIX) != 0)
5253 {
5254 fprintf(outfile, "** The convert and posix modifiers are mutually exclusive\n");
5255 return PR_SKIP;
5256 }
5257
5258 /* Check for mutually exclusive control modifiers. At present, these are all in
5259 the first control word. */
5260
5261 for (k = 0; k < sizeof(exclusive_pat_controls)/sizeof(uint32_t); k++)
5262 {
5263 uint32_t c = pat_patctl.control & exclusive_pat_controls[k];
5264 if (c != 0 && c != (c & (~c+1)))
5265 {
5266 show_controls(c, 0, "** Not allowed together:");
5267 fprintf(outfile, "\n");
5268 return PR_SKIP;
5269 }
5270 }
5271
5272 /* Assume full JIT compile for jitverify and/or jitfast if nothing else was
5273 specified. */
5274
5275 if (pat_patctl.jit == 0 &&
5276 (pat_patctl.control & (CTL_JITVERIFY|CTL_JITFAST)) != 0)
5277 pat_patctl.jit = JIT_DEFAULT;
5278
5279 /* Now copy the pattern to pbuffer8 for use in 8-bit testing and for reflecting
5280 in callouts. Convert from hex if requested (literal strings in quotes may be
5281 present within the hexadecimal pairs). The result must necessarily be fewer
5282 characters so will always fit in pbuffer8. */
5283
5284 if ((pat_patctl.control & CTL_HEXPAT) != 0)
5285 {
5286 uint8_t *pp, *pt;
5287 uint32_t c, d;
5288
5289 pt = pbuffer8;
5290 for (pp = buffer + 1; *pp != 0; pp++)
5291 {
5292 if (isspace(*pp)) continue;
5293 c = *pp++;
5294
5295 /* Handle a literal substring */
5296
5297 if (c == '\'' || c == '"')
5298 {
5299 uint8_t *pq = pp;
5300 for (;; pp++)
5301 {
5302 d = *pp;
5303 if (d == 0)
5304 {
5305 fprintf(outfile, "** Missing closing quote in hex pattern: "
5306 "opening quote is at offset %" PTR_FORM ".\n", pq - buffer - 2);
5307 return PR_SKIP;
5308 }
5309 if (d == c) break;
5310 *pt++ = d;
5311 }
5312 }
5313
5314 /* Expect a hex pair */
5315
5316 else
5317 {
5318 if (!isxdigit(c))
5319 {
5320 fprintf(outfile, "** Unexpected non-hex-digit '%c' at offset %"
5321 PTR_FORM " in hex pattern: quote missing?\n", c, pp - buffer - 2);
5322 return PR_SKIP;
5323 }
5324 if (*pp == 0)
5325 {
5326 fprintf(outfile, "** Odd number of digits in hex pattern\n");
5327 return PR_SKIP;
5328 }
5329 d = *pp;
5330 if (!isxdigit(d))
5331 {
5332 fprintf(outfile, "** Unexpected non-hex-digit '%c' at offset %"
5333 PTR_FORM " in hex pattern: quote missing?\n", d, pp - buffer - 1);
5334 return PR_SKIP;
5335 }
5336 c = toupper(c);
5337 d = toupper(d);
5338 *pt++ = ((isdigit(c)? (c - '0') : (c - 'A' + 10)) << 4) +
5339 (isdigit(d)? (d - '0') : (d - 'A' + 10));
5340 }
5341 }
5342 *pt = 0;
5343 patlen = pt - pbuffer8;
5344 }
5345
5346 /* If not a hex string, process for repetition expansion if requested. */
5347
5348 else if ((pat_patctl.control & CTL_EXPAND) != 0)
5349 {
5350 uint8_t *pp, *pt;
5351
5352 pt = pbuffer8;
5353 for (pp = buffer + 1; *pp != 0; pp++)
5354 {
5355 uint8_t *pc = pp;
5356 uint32_t count = 1;
5357 size_t length = 1;
5358
5359 /* Check for replication syntax; if not found, the defaults just set will
5360 prevail and one character will be copied. */
5361
5362 if (pp[0] == '\\' && pp[1] == '[')
5363 {
5364 uint8_t *pe;
5365 for (pe = pp + 2; *pe != 0; pe++)
5366 {
5367 if (pe[0] == ']' && pe[1] == '{')
5368 {
5369 uint32_t clen = pe - pc - 2;
5370 uint32_t i = 0;
5371 unsigned long uli;
5372 char *endptr;
5373
5374 pe += 2;
5375 uli = strtoul((const char *)pe, &endptr, 10);
5376 if (U32OVERFLOW(uli))
5377 {
5378 fprintf(outfile, "** Pattern repeat count too large\n");
5379 return PR_SKIP;
5380 }
5381
5382 i = (uint32_t)uli;
5383 pe = (uint8_t *)endptr;
5384 if (*pe == '}')
5385 {
5386 if (i == 0)
5387 {
5388 fprintf(outfile, "** Zero repeat not allowed\n");
5389 return PR_SKIP;
5390 }
5391 pc += 2;
5392 count = i;
5393 length = clen;
5394 pp = pe;
5395 break;
5396 }
5397 }
5398 }
5399 }
5400
5401 /* Add to output. If the buffer is too small expand it. The function for
5402 expanding buffers always keeps buffer and pbuffer8 in step as far as their
5403 size goes. */
5404
5405 while (pt + count * length > pbuffer8 + pbuffer8_size)
5406 {
5407 size_t pc_offset = pc - buffer;
5408 size_t pp_offset = pp - buffer;
5409 size_t pt_offset = pt - pbuffer8;
5410 expand_input_buffers();
5411 pc = buffer + pc_offset;
5412 pp = buffer + pp_offset;
5413 pt = pbuffer8 + pt_offset;
5414 }
5415
5416 for (; count > 0; count--)
5417 {
5418 memcpy(pt, pc, length);
5419 pt += length;
5420 }
5421 }
5422
5423 *pt = 0;
5424 patlen = pt - pbuffer8;
5425
5426 if ((pat_patctl.control & CTL_INFO) != 0)
5427 fprintf(outfile, "Expanded: %s\n", pbuffer8);
5428 }
5429
5430 /* Neither hex nor expanded, just copy the input verbatim. */
5431
5432 else
5433 {
5434 strncpy((char *)pbuffer8, (char *)(buffer+1), patlen + 1);
5435 }
5436
5437 /* Sort out character tables */
5438
5439 if (pat_patctl.locale[0] != 0)
5440 {
5441 if (pat_patctl.tables_id != 0)
5442 {
5443 fprintf(outfile, "** 'Locale' and 'tables' must not both be set\n");
5444 return PR_SKIP;
5445 }
5446 if (setlocale(LC_CTYPE, (const char *)pat_patctl.locale) == NULL)
5447 {
5448 fprintf(outfile, "** Failed to set locale '%s'\n", pat_patctl.locale);
5449 return PR_SKIP;
5450 }
5451 if (strcmp((const char *)pat_patctl.locale, (const char *)locale_name) != 0)
5452 {
5453 strcpy((char *)locale_name, (char *)pat_patctl.locale);
5454 if (locale_tables != NULL)
5455 {
5456 PCRE2_MAKETABLES_FREE(general_context, (void *)locale_tables);
5457 }
5458 PCRE2_MAKETABLES(locale_tables, general_context);
5459 }
5460 use_tables = locale_tables;
5461 }
5462
5463 else switch (pat_patctl.tables_id)
5464 {
5465 case 0: use_tables = NULL; break;
5466 case 1: use_tables = tables1; break;
5467 case 2: use_tables = tables2; break;
5468
5469 case 3:
5470 if (tables3 == NULL)
5471 {
5472 fprintf(outfile, "** 'Tables = 3' is invalid: binary tables have not "
5473 "been loaded\n");
5474 return PR_SKIP;
5475 }
5476 use_tables = tables3;
5477 break;
5478
5479 default:
5480 fprintf(outfile, "** 'Tables' must specify 0, 1, 2, or 3.\n");
5481 return PR_SKIP;
5482 }
5483
5484 PCRE2_SET_CHARACTER_TABLES(pat_context, use_tables);
5485
5486 /* Set up for the stackguard test. */
5487
5488 if (pat_patctl.stackguard_test != 0)
5489 {
5490 PCRE2_SET_COMPILE_RECURSION_GUARD(pat_context, stack_guard, NULL);
5491 }
5492
5493 /* Handle compiling via the POSIX interface, which doesn't support the
5494 timing, showing, or debugging options, nor the ability to pass over
5495 local character tables. Neither does it have 16-bit or 32-bit support. */
5496
5497 if ((pat_patctl.control & CTL_POSIX) != 0)
5498 {
5499 #ifdef SUPPORT_PCRE2_8
5500 int rc;
5501 int cflags = 0;
5502 const char *msg = "** Ignored with POSIX interface:";
5503 #endif
5504
5505 if (test_mode != PCRE8_MODE)
5506 {
5507 fprintf(outfile, "** The POSIX interface is available only in 8-bit mode\n");
5508 return PR_SKIP;
5509 }
5510
5511 #ifdef SUPPORT_PCRE2_8
5512 /* Check for features that the POSIX interface does not support. */
5513
5514 if (pat_patctl.locale[0] != 0) prmsg(&msg, "locale");
5515 if (pat_patctl.replacement[0] != 0) prmsg(&msg, "replace");
5516 if (pat_patctl.tables_id != 0) prmsg(&msg, "tables");
5517 if (pat_patctl.stackguard_test != 0) prmsg(&msg, "stackguard");
5518 if (timeit > 0) prmsg(&msg, "timing");
5519 if (pat_patctl.jit != 0) prmsg(&msg, "JIT");
5520
5521 if ((pat_patctl.options & ~POSIX_SUPPORTED_COMPILE_OPTIONS) != 0)
5522 {
5523 show_compile_options(
5524 pat_patctl.options & (uint32_t)(~POSIX_SUPPORTED_COMPILE_OPTIONS),
5525 msg, "");
5526 msg = "";
5527 }
5528
5529 if ((FLD(pat_context, extra_options) &
5530 (uint32_t)(~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS)) != 0)
5531 {
5532 show_compile_extra_options(
5533 FLD(pat_context, extra_options) &
5534 (uint32_t)(~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS), msg, "");
5535 msg = "";
5536 }
5537
5538 if ((pat_patctl.control & (uint32_t)(~POSIX_SUPPORTED_COMPILE_CONTROLS)) != 0 ||
5539 (pat_patctl.control2 & (uint32_t)(~POSIX_SUPPORTED_COMPILE_CONTROLS2)) != 0)
5540 {
5541 show_controls(
5542 pat_patctl.control & (uint32_t)(~POSIX_SUPPORTED_COMPILE_CONTROLS),
5543 pat_patctl.control2 & (uint32_t)(~POSIX_SUPPORTED_COMPILE_CONTROLS2),
5544 msg);
5545 msg = "";
5546 }
5547
5548 if (local_newline_default != 0) prmsg(&msg, "#newline_default");
5549 if (FLD(pat_context, max_pattern_length) != PCRE2_UNSET)
5550 prmsg(&msg, "max_pattern_length");
5551 if (FLD(pat_context, parens_nest_limit) != PARENS_NEST_DEFAULT)
5552 prmsg(&msg, "parens_nest_limit");
5553
5554 if (msg[0] == 0) fprintf(outfile, "\n");
5555
5556 /* Translate PCRE2 options to POSIX options and then compile. */
5557
5558 if (utf) cflags |= REG_UTF;
5559 if ((pat_patctl.control & CTL_POSIX_NOSUB) != 0) cflags |= REG_NOSUB;
5560 if ((pat_patctl.options & PCRE2_UCP) != 0) cflags |= REG_UCP;
5561 if ((pat_patctl.options & PCRE2_CASELESS) != 0) cflags |= REG_ICASE;
5562 if ((pat_patctl.options & PCRE2_LITERAL) != 0) cflags |= REG_NOSPEC;
5563 if ((pat_patctl.options & PCRE2_MULTILINE) != 0) cflags |= REG_NEWLINE;
5564 if ((pat_patctl.options & PCRE2_DOTALL) != 0) cflags |= REG_DOTALL;
5565 if ((pat_patctl.options & PCRE2_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
5566
5567 if ((pat_patctl.control & (CTL_HEXPAT|CTL_USE_LENGTH)) != 0)
5568 {
5569 preg.re_endp = (char *)pbuffer8 + patlen;
5570 cflags |= REG_PEND;
5571 }
5572
5573 rc = regcomp(&preg, (char *)pbuffer8, cflags);
5574
5575 /* Compiling failed */
5576
5577 if (rc != 0)
5578 {
5579 size_t bsize, usize;
5580 int psize;
5581
5582 preg.re_pcre2_code = NULL; /* In case something was left in there */
5583 preg.re_match_data = NULL;
5584
5585 bsize = (pat_patctl.regerror_buffsize != 0)?
5586 pat_patctl.regerror_buffsize : pbuffer8_size;
5587 if (bsize + 8 < pbuffer8_size)
5588 memcpy(pbuffer8 + bsize, "DEADBEEF", 8);
5589 usize = regerror(rc, &preg, (char *)pbuffer8, bsize);
5590
5591 /* Inside regerror(), snprintf() is used. If the buffer is too small, some
5592 versions of snprintf() put a zero byte at the end, but others do not.
5593 Therefore, we print a maximum of one less than the size of the buffer. */
5594
5595 psize = (int)bsize - 1;
5596 fprintf(outfile, "Failed: POSIX code %d: %.*s\n", rc, psize, pbuffer8);
5597 if (usize > bsize)
5598 {
5599 fprintf(outfile, "** regerror() message truncated\n");
5600 if (memcmp(pbuffer8 + bsize, "DEADBEEF", 8) != 0)
5601 fprintf(outfile, "** regerror() buffer overflow\n");
5602 }
5603 return PR_SKIP;
5604 }
5605
5606 /* Compiling succeeded. Check that the values in the preg block are sensible.
5607 It can happen that pcre2test is accidentally linked with a different POSIX
5608 library which succeeds, but of course puts different things into preg. In
5609 this situation, calling regfree() may cause a segfault (or invalid free() in
5610 valgrind), so ensure that preg.re_pcre2_code is NULL, which suppresses the
5611 calling of regfree() on exit. */
5612
5613 if (preg.re_pcre2_code == NULL ||
5614 ((pcre2_real_code_8 *)preg.re_pcre2_code)->magic_number != MAGIC_NUMBER ||
5615 ((pcre2_real_code_8 *)preg.re_pcre2_code)->top_bracket != preg.re_nsub ||
5616 preg.re_match_data == NULL ||
5617 preg.re_cflags != cflags)
5618 {
5619 fprintf(outfile,
5620 "** The regcomp() function returned zero (success), but the values set\n"
5621 "** in the preg block are not valid for PCRE2. Check that pcre2test is\n"
5622 "** linked with PCRE2's pcre2posix module (-lpcre2-posix) and not with\n"
5623 "** some other POSIX regex library.\n**\n");
5624 preg.re_pcre2_code = NULL;
5625 return PR_ABEND;
5626 }
5627
5628 return PR_OK;
5629 #endif /* SUPPORT_PCRE2_8 */
5630 }
5631
5632 /* Handle compiling via the native interface. Controls that act later are
5633 ignored with "push". Replacements are locked out. */
5634
5635 if ((pat_patctl.control & (CTL_PUSH|CTL_PUSHCOPY|CTL_PUSHTABLESCOPY)) != 0)
5636 {
5637 if (pat_patctl.replacement[0] != 0)
5638 {
5639 fprintf(outfile, "** Replacement text is not supported with 'push'.\n");
5640 return PR_OK;
5641 }
5642 if ((pat_patctl.control & ~PUSH_SUPPORTED_COMPILE_CONTROLS) != 0 ||
5643 (pat_patctl.control2 & ~PUSH_SUPPORTED_COMPILE_CONTROLS2) != 0)
5644 {
5645 show_controls(pat_patctl.control & ~PUSH_SUPPORTED_COMPILE_CONTROLS,
5646 pat_patctl.control2 & ~PUSH_SUPPORTED_COMPILE_CONTROLS2,
5647 "** Ignored when compiled pattern is stacked with 'push':");
5648 fprintf(outfile, "\n");
5649 }
5650 if ((pat_patctl.control & PUSH_COMPILE_ONLY_CONTROLS) != 0 ||
5651 (pat_patctl.control2 & PUSH_COMPILE_ONLY_CONTROLS2) != 0)
5652 {
5653 show_controls(pat_patctl.control & PUSH_COMPILE_ONLY_CONTROLS,
5654 pat_patctl.control2 & PUSH_COMPILE_ONLY_CONTROLS2,
5655 "** Applies only to compile when pattern is stacked with 'push':");
5656 fprintf(outfile, "\n");
5657 }
5658 }
5659
5660 /* Convert the input in non-8-bit modes. */
5661
5662 errorcode = 0;
5663
5664 #ifdef SUPPORT_PCRE2_16
5665 if (test_mode == PCRE16_MODE) errorcode = to16(pbuffer8, utf, &patlen);
5666 #endif
5667
5668 #ifdef SUPPORT_PCRE2_32
5669 if (test_mode == PCRE32_MODE) errorcode = to32(pbuffer8, utf, &patlen);
5670 #endif
5671
5672 switch(errorcode)
5673 {
5674 case -1:
5675 fprintf(outfile, "** Failed: invalid UTF-8 string cannot be "
5676 "converted to %d-bit string\n", (test_mode == PCRE16_MODE)? 16:32);
5677 return PR_SKIP;
5678
5679 case -2:
5680 fprintf(outfile, "** Failed: character value greater than 0x10ffff "
5681 "cannot be converted to UTF\n");
5682 return PR_SKIP;
5683
5684 case -3:
5685 fprintf(outfile, "** Failed: character value greater than 0xffff "
5686 "cannot be converted to 16-bit in non-UTF mode\n");
5687 return PR_SKIP;
5688
5689 default:
5690 break;
5691 }
5692
5693 /* The pattern is now in pbuffer[8|16|32], with the length in code units in
5694 patlen. If it is to be converted, copy the result back afterwards so that it
5695 ends up back in the usual place. */
5696
5697 if (pat_patctl.convert_type != CONVERT_UNSET)
5698 {
5699 int rc;
5700 int convert_return = PR_OK;
5701 uint32_t convert_options = pat_patctl.convert_type;
5702 void *converted_pattern;
5703 PCRE2_SIZE converted_length;
5704
5705 if (pat_patctl.convert_length != 0)
5706 {
5707 converted_length = pat_patctl.convert_length;
5708 converted_pattern = malloc(converted_length * code_unit_size);
5709 if (converted_pattern == NULL)
5710 {
5711 fprintf(outfile, "** Failed: malloc failed for converted pattern\n");
5712 return PR_SKIP;
5713 }
5714 }
5715 else converted_pattern = NULL; /* Let the library allocate */
5716
5717 if (utf) convert_options |= PCRE2_CONVERT_UTF;
5718 if ((pat_patctl.options & PCRE2_NO_UTF_CHECK) != 0)
5719 convert_options |= PCRE2_CONVERT_NO_UTF_CHECK;
5720
5721 CONCTXCPY(con_context, default_con_context);
5722
5723 if (pat_patctl.convert_glob_escape != 0)
5724 {
5725 uint32_t escape = (pat_patctl.convert_glob_escape == '0')? 0 :
5726 pat_patctl.convert_glob_escape;
5727 PCRE2_SET_GLOB_ESCAPE(rc, con_context, escape);
5728 if (rc != 0)
5729 {
5730 fprintf(outfile, "** Invalid glob escape '%c'\n",
5731 pat_patctl.convert_glob_escape);
5732 convert_return = PR_SKIP;
5733 goto CONVERT_FINISH;
5734 }
5735 }
5736
5737 if (pat_patctl.convert_glob_separator != 0)
5738 {
5739 PCRE2_SET_GLOB_SEPARATOR(rc, con_context, pat_patctl.convert_glob_separator);
5740 if (rc != 0)
5741 {
5742 fprintf(outfile, "** Invalid glob separator '%c'\n",
5743 pat_patctl.convert_glob_separator);
5744 convert_return = PR_SKIP;
5745 goto CONVERT_FINISH;
5746 }
5747 }
5748
5749 PCRE2_PATTERN_CONVERT(rc, pbuffer, patlen, convert_options,
5750 &converted_pattern, &converted_length, con_context);
5751
5752 if (rc != 0)
5753 {
5754 fprintf(outfile, "** Pattern conversion error at offset %" SIZ_FORM ": ",
5755 converted_length);
5756 convert_return = print_error_message(rc, "", "\n")? PR_SKIP:PR_ABEND;
5757 }
5758
5759 /* Output the converted pattern, then copy it. */
5760
5761 else
5762 {
5763 PCHARSV(converted_pattern, 0, converted_length, utf, outfile);
5764 fprintf(outfile, "\n");
5765 patlen = converted_length;
5766 CONVERT_COPY(pbuffer, converted_pattern, converted_length + 1);
5767 }
5768
5769 /* Free the converted pattern. */
5770
5771 CONVERT_FINISH:
5772 if (pat_patctl.convert_length != 0)
5773 free(converted_pattern);
5774 else
5775 PCRE2_CONVERTED_PATTERN_FREE(converted_pattern);
5776
5777 /* Return if conversion was unsuccessful. */
5778
5779 if (convert_return != PR_OK) return convert_return;
5780 }
5781
5782 /* By default we pass a zero-terminated pattern, but a length is passed if
5783 "use_length" was specified or this is a hex pattern (which might contain binary
5784 zeros). When valgrind is supported, arrange for the unused part of the buffer
5785 to be marked as no access. */
5786
5787 valgrind_access_length = patlen;
5788 if ((pat_patctl.control & (CTL_HEXPAT|CTL_USE_LENGTH)) == 0)
5789 {
5790 patlen = PCRE2_ZERO_TERMINATED;
5791 valgrind_access_length += 1; /* For the terminating zero */
5792 }
5793
5794 #ifdef SUPPORT_VALGRIND
5795 #ifdef SUPPORT_PCRE2_8
5796 if (test_mode == PCRE8_MODE && pbuffer8 != NULL)
5797 {
5798 VALGRIND_MAKE_MEM_NOACCESS(pbuffer8 + valgrind_access_length,
5799 pbuffer8_size - valgrind_access_length);
5800 }
5801 #endif
5802 #ifdef SUPPORT_PCRE2_16
5803 if (test_mode == PCRE16_MODE && pbuffer16 != NULL)
5804 {
5805 VALGRIND_MAKE_MEM_NOACCESS(pbuffer16 + valgrind_access_length,
5806 pbuffer16_size - valgrind_access_length*sizeof(uint16_t));
5807 }
5808 #endif
5809 #ifdef SUPPORT_PCRE2_32
5810 if (test_mode == PCRE32_MODE && pbuffer32 != NULL)
5811 {
5812 VALGRIND_MAKE_MEM_NOACCESS(pbuffer32 + valgrind_access_length,
5813 pbuffer32_size - valgrind_access_length*sizeof(uint32_t));
5814 }
5815 #endif
5816 #else /* Valgrind not supported */
5817 (void)valgrind_access_length; /* Avoid compiler warning */
5818 #endif
5819
5820 /* If #newline_default has been used and the library was not compiled with an
5821 appropriate default newline setting, local_newline_default will be non-zero. We
5822 use this if there is no explicit newline modifier. */
5823
5824 if ((pat_patctl.control2 & CTL2_NL_SET) == 0 && local_newline_default != 0)
5825 {
5826 SETFLD(pat_context, newline_convention, local_newline_default);
5827 }
5828
5829 /* The null_context modifier is used to test calling pcre2_compile() with a
5830 NULL context. */
5831
5832 use_pat_context = ((pat_patctl.control & CTL_NULLCONTEXT) != 0)?
5833 NULL : PTR(pat_context);
5834
5835 /* If PCRE2_LITERAL is set, set use_forbid_utf zero because PCRE2_NEVER_UTF
5836 and PCRE2_NEVER_UCP are invalid with it. */
5837
5838 if ((pat_patctl.options & PCRE2_LITERAL) != 0) use_forbid_utf = 0;
5839
5840 /* Compile many times when timing. */
5841
5842 if (timeit > 0)
5843 {
5844 int i;
5845 clock_t time_taken = 0;
5846 for (i = 0; i < timeit; i++)
5847 {
5848 clock_t start_time = clock();
5849 PCRE2_COMPILE(compiled_code, pbuffer, patlen,
5850 pat_patctl.options|use_forbid_utf, &errorcode, &erroroffset,
5851 use_pat_context);
5852 time_taken += clock() - start_time;
5853 if (TEST(compiled_code, !=, NULL))
5854 { SUB1(pcre2_code_free, compiled_code); }
5855 }
5856 total_compile_time += time_taken;
5857 fprintf(outfile, "Compile time %.4f milliseconds\n",
5858 (((double)time_taken * 1000.0) / (double)timeit) /
5859 (double)CLOCKS_PER_SEC);
5860 }
5861
5862 /* A final compile that is used "for real". */
5863
5864 PCRE2_COMPILE(compiled_code, pbuffer, patlen, pat_patctl.options|use_forbid_utf,
5865 &errorcode, &erroroffset, use_pat_context);
5866
5867 /* Call the JIT compiler if requested. When timing, we must free and recompile
5868 the pattern each time because that is the only way to free the JIT compiled
5869 code. We know that compilation will always succeed. */
5870
5871 if (TEST(compiled_code, !=, NULL) && pat_patctl.jit != 0)
5872 {
5873 if (timeit > 0)
5874 {
5875 int i;
5876 clock_t time_taken = 0;
5877
5878 for (i = 0; i < timeit; i++)
5879 {
5880 clock_t start_time;
5881 SUB1(pcre2_code_free, compiled_code);
5882 PCRE2_COMPILE(compiled_code, pbuffer, patlen,
5883 pat_patctl.options|use_forbid_utf, &errorcode, &erroroffset,
5884 use_pat_context);
5885 start_time = clock();
5886 PCRE2_JIT_COMPILE(jitrc, compiled_code, pat_patctl.jit);
5887 time_taken += clock() - start_time;
5888 }
5889 total_jit_compile_time += time_taken;
5890 fprintf(outfile, "JIT compile %.4f milliseconds\n",
5891 (((double)time_taken * 1000.0) / (double)timeit) /
5892 (double)CLOCKS_PER_SEC);
5893 }
5894 else
5895 {
5896 PCRE2_JIT_COMPILE(jitrc, compiled_code, pat_patctl.jit);
5897 }
5898 }
5899
5900 /* If valgrind is supported, mark the pbuffer as accessible again. The 16-bit
5901 and 32-bit buffers can be marked completely undefined, but we must leave the
5902 pattern in the 8-bit buffer defined because it may be read from a callout
5903 during matching. */
5904
5905 #ifdef SUPPORT_VALGRIND
5906 #ifdef SUPPORT_PCRE2_8
5907 if (test_mode == PCRE8_MODE)
5908 {
5909 VALGRIND_MAKE_MEM_UNDEFINED(pbuffer8 + valgrind_access_length,
5910 pbuffer8_size - valgrind_access_length);
5911 }
5912 #endif
5913 #ifdef SUPPORT_PCRE2_16
5914 if (test_mode == PCRE16_MODE)
5915 {
5916 VALGRIND_MAKE_MEM_UNDEFINED(pbuffer16, pbuffer16_size);
5917 }
5918 #endif
5919 #ifdef SUPPORT_PCRE2_32
5920 if (test_mode == PCRE32_MODE)
5921 {
5922 VALGRIND_MAKE_MEM_UNDEFINED(pbuffer32, pbuffer32_size);
5923 }
5924 #endif
5925 #endif
5926
5927 /* Compilation failed; go back for another re, skipping to blank line
5928 if non-interactive. */
5929
5930 if (TEST(compiled_code, ==, NULL))
5931 {
5932 fprintf(outfile, "Failed: error %d at offset %d: ", errorcode,
5933 (int)erroroffset);
5934 if (!print_error_message(errorcode, "", "\n")) return PR_ABEND;
5935 return PR_SKIP;
5936 }
5937
5938 /* If forbid_utf is non-zero, we are running a non-UTF test. UTF and UCP are
5939 locked out at compile time, but we must also check for occurrences of \P, \p,
5940 and \X, which are only supported when Unicode is supported. */
5941
5942 if (forbid_utf != 0)
5943 {
5944 if ((FLD(compiled_code, flags) & PCRE2_HASBKPORX) != 0)
5945 {
5946 fprintf(outfile, "** \\P, \\p, and \\X are not allowed after the "
5947 "#forbid_utf command\n");
5948 return PR_SKIP;
5949 }
5950 }
5951
5952 /* Remember the maximum lookbehind, for partial matching. */
5953
5954 if (pattern_info(PCRE2_INFO_MAXLOOKBEHIND, &maxlookbehind, FALSE) != 0)
5955 return PR_ABEND;
5956
5957 /* Remember the number of captures. */
5958
5959 if (pattern_info(PCRE2_INFO_CAPTURECOUNT, &maxcapcount, FALSE) < 0)
5960 return PR_ABEND;
5961
5962 /* If an explicit newline modifier was given, set the information flag in the
5963 pattern so that it is preserved over push/pop. */
5964
5965 if ((pat_patctl.control2 & CTL2_NL_SET) != 0)
5966 {
5967 SETFLD(compiled_code, flags, FLD(compiled_code, flags) | PCRE2_NL_SET);
5968 }
5969
5970 /* Output code size and other information if requested. */
5971
5972 if ((pat_patctl.control & CTL_MEMORY) != 0) show_memory_info();
5973 if ((pat_patctl.control2 & CTL2_FRAMESIZE) != 0) show_framesize();
5974 if ((pat_patctl.control & CTL_ANYINFO) != 0)
5975 {
5976 int rc = show_pattern_info();
5977 if (rc != PR_OK) return rc;
5978 }
5979
5980 /* The "push" control requests that the compiled pattern be remembered on a
5981 stack. This is mainly for testing the serialization functionality. */
5982
5983 if ((pat_patctl.control & CTL_PUSH) != 0)
5984 {
5985 if (patstacknext >= PATSTACKSIZE)
5986 {
5987 fprintf(outfile, "** Too many pushed patterns (max %d)\n", PATSTACKSIZE);
5988 return PR_ABEND;
5989 }
5990 patstack[patstacknext++] = PTR(compiled_code);
5991 SET(compiled_code, NULL);
5992 }
5993
5994 /* The "pushcopy" and "pushtablescopy" controls are similar, but push a
5995 copy of the pattern, the latter with a copy of its character tables. This tests
5996 the pcre2_code_copy() and pcre2_code_copy_with_tables() functions. */
5997
5998 if ((pat_patctl.control & (CTL_PUSHCOPY|CTL_PUSHTABLESCOPY)) != 0)
5999 {
6000 if (patstacknext >= PATSTACKSIZE)
6001 {
6002 fprintf(outfile, "** Too many pushed patterns (max %d)\n", PATSTACKSIZE);
6003 return PR_ABEND;
6004 }
6005 if ((pat_patctl.control & CTL_PUSHCOPY) != 0)
6006 {
6007 PCRE2_CODE_COPY_TO_VOID(patstack[patstacknext++], compiled_code);
6008 }
6009 else
6010 {
6011 PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(patstack[patstacknext++],
6012 compiled_code); }
6013 }
6014
6015 return PR_OK;
6016 }
6017
6018
6019
6020 /*************************************************
6021 * Check heap, match or depth limit *
6022 *************************************************/
6023
6024 /* This is used for DFA, normal, and JIT fast matching. For DFA matching it
6025 should only be called with the third argument set to PCRE2_ERROR_DEPTHLIMIT.
6026
6027 Arguments:
6028 pp the subject string
6029 ulen length of subject or PCRE2_ZERO_TERMINATED
6030 errnumber defines which limit to test
6031 msg string to include in final message
6032
6033 Returns: the return from the final match function call
6034 */
6035
6036 static int
check_match_limit(uint8_t * pp,PCRE2_SIZE ulen,int errnumber,const char * msg)6037 check_match_limit(uint8_t *pp, PCRE2_SIZE ulen, int errnumber, const char *msg)
6038 {
6039 int capcount;
6040 uint32_t min = 0;
6041 uint32_t mid = 64;
6042 uint32_t max = UINT32_MAX;
6043
6044 PCRE2_SET_MATCH_LIMIT(dat_context, max);
6045 PCRE2_SET_DEPTH_LIMIT(dat_context, max);
6046 PCRE2_SET_HEAP_LIMIT(dat_context, max);
6047
6048 for (;;)
6049 {
6050 uint32_t stack_start = 0;
6051
6052 /* If we are checking the heap limit, free any frames vector that is cached
6053 in the match_data so we always start without one. */
6054
6055 if (errnumber == PCRE2_ERROR_HEAPLIMIT)
6056 {
6057 PCRE2_SET_HEAP_LIMIT(dat_context, mid);
6058
6059 #ifdef SUPPORT_PCRE2_8
6060 if (code_unit_size == 1)
6061 {
6062 match_data8->memctl.free(match_data8->heapframes,
6063 match_data8->memctl.memory_data);
6064 match_data8->heapframes = NULL;
6065 match_data8->heapframes_size = 0;
6066 }
6067 #endif
6068
6069 #ifdef SUPPORT_PCRE2_16
6070 if (code_unit_size == 2)
6071 {
6072 match_data16->memctl.free(match_data16->heapframes,
6073 match_data16->memctl.memory_data);
6074 match_data16->heapframes = NULL;
6075 match_data16->heapframes_size = 0;
6076 }
6077 #endif
6078
6079 #ifdef SUPPORT_PCRE2_32
6080 if (code_unit_size == 4)
6081 {
6082 match_data32->memctl.free(match_data32->heapframes,
6083 match_data32->memctl.memory_data);
6084 match_data32->heapframes = NULL;
6085 match_data32->heapframes_size = 0;
6086 }
6087 #endif
6088 }
6089
6090 /* No need to mess with the frames vector for match or depth limits. */
6091
6092 else if (errnumber == PCRE2_ERROR_MATCHLIMIT)
6093 {
6094 PCRE2_SET_MATCH_LIMIT(dat_context, mid);
6095 }
6096 else
6097 {
6098 PCRE2_SET_DEPTH_LIMIT(dat_context, mid);
6099 }
6100
6101 /* Do the appropriate match */
6102
6103 if ((dat_datctl.control & CTL_DFA) != 0)
6104 {
6105 stack_start = DFA_START_RWS_SIZE/1024;
6106 if (dfa_workspace == NULL)
6107 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
6108 if (dfa_matched++ == 0)
6109 dfa_workspace[0] = -1; /* To catch bad restart */
6110 PCRE2_DFA_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
6111 dat_datctl.options, match_data,
6112 PTR(dat_context), dfa_workspace, DFA_WS_DIMENSION);
6113 }
6114
6115 else if ((pat_patctl.control & CTL_JITFAST) != 0)
6116 PCRE2_JIT_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
6117 dat_datctl.options, match_data, PTR(dat_context));
6118
6119 else
6120 {
6121 PCRE2_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
6122 dat_datctl.options, match_data, PTR(dat_context));
6123 }
6124
6125 if (capcount == errnumber)
6126 {
6127 if ((mid & 0x80000000u) != 0)
6128 {
6129 fprintf(outfile, "Can't find minimum %s limit: check pattern for "
6130 "restriction\n", msg);
6131 break;
6132 }
6133
6134 min = mid;
6135 mid = (mid == max - 1)? max : (max != UINT32_MAX)? (min + max)/2 : mid*2;
6136 }
6137 else if (capcount >= 0 ||
6138 capcount == PCRE2_ERROR_NOMATCH ||
6139 capcount == PCRE2_ERROR_PARTIAL)
6140 {
6141 /* If we've not hit the error with a heap limit less than the size of the
6142 initial stack frame vector (for pcre2_match()) or the initial stack
6143 workspace vector (for pcre2_dfa_match()), the heap is not being used, so
6144 the minimum limit is zero; there's no need to go on. The other limits are
6145 always greater than zero. */
6146
6147 if (errnumber == PCRE2_ERROR_HEAPLIMIT && mid < stack_start)
6148 {
6149 fprintf(outfile, "Minimum %s limit = 0\n", msg);
6150 break;
6151 }
6152 if (mid == min + 1)
6153 {
6154 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
6155 break;
6156 }
6157 max = mid;
6158 mid = (min + max)/2;
6159 }
6160 else break; /* Some other error */
6161 }
6162
6163 return capcount;
6164 }
6165
6166
6167
6168 /*************************************************
6169 * Substitute callout function *
6170 *************************************************/
6171
6172 /* Called from pcre2_substitute() when the substitute_callout modifier is set.
6173 Print out the data that is passed back. The substitute callout block is
6174 identical for all code unit widths, so we just pick one.
6175
6176 Arguments:
6177 scb pointer to substitute callout block
6178 data_ptr callout data
6179
6180 Returns: nothing
6181 */
6182
6183 static int
substitute_callout_function(pcre2_substitute_callout_block_8 * scb,void * data_ptr)6184 substitute_callout_function(pcre2_substitute_callout_block_8 *scb,
6185 void *data_ptr)
6186 {
6187 int yield = 0;
6188 BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
6189 (void)data_ptr; /* Not used */
6190
6191 fprintf(outfile, "%2d(%d) Old %" SIZ_FORM " %" SIZ_FORM " \"",
6192 scb->subscount, scb->oveccount,
6193 scb->ovector[0], scb->ovector[1]);
6194
6195 PCHARSV(scb->input, scb->ovector[0], scb->ovector[1] - scb->ovector[0],
6196 utf, outfile);
6197
6198 fprintf(outfile, "\" New %" SIZ_FORM " %" SIZ_FORM " \"",
6199 scb->output_offsets[0], scb->output_offsets[1]);
6200
6201 PCHARSV(scb->output, scb->output_offsets[0],
6202 scb->output_offsets[1] - scb->output_offsets[0], utf, outfile);
6203
6204 if (scb->subscount == dat_datctl.substitute_stop)
6205 {
6206 yield = -1;
6207 fprintf(outfile, " STOPPED");
6208 }
6209 else if (scb->subscount == dat_datctl.substitute_skip)
6210 {
6211 yield = +1;
6212 fprintf(outfile, " SKIPPED");
6213 }
6214
6215 fprintf(outfile, "\"\n");
6216 return yield;
6217 }
6218
6219
6220 /*************************************************
6221 * Callout function *
6222 *************************************************/
6223
6224 /* Called from a PCRE2 library as a result of the (?C) item. We print out where
6225 we are in the match (unless suppressed). Yield zero unless more callouts than
6226 the fail count, or the callout data is not zero. The only differences in the
6227 callout block for different code unit widths are that the pointers to the
6228 subject, the most recent MARK, and a callout argument string point to strings
6229 of the appropriate width. Casts can be used to deal with this.
6230
6231 Arguments:
6232 cb a pointer to a callout block
6233 callout_data_ptr the provided callout data
6234
6235 Returns: 0 or 1 or an error, as determined by settings
6236 */
6237
6238 static int
callout_function(pcre2_callout_block_8 * cb,void * callout_data_ptr)6239 callout_function(pcre2_callout_block_8 *cb, void *callout_data_ptr)
6240 {
6241 FILE *f, *fdefault;
6242 uint32_t i, pre_start, post_start, subject_length;
6243 PCRE2_SIZE current_position;
6244 BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
6245 BOOL callout_capture = (dat_datctl.control & CTL_CALLOUT_CAPTURE) != 0;
6246 BOOL callout_where = (dat_datctl.control2 & CTL2_CALLOUT_NO_WHERE) == 0;
6247
6248 /* The FILE f is used for echoing the subject string if it is non-NULL. This
6249 happens only once in simple cases, but we want to repeat after any additional
6250 output caused by CALLOUT_EXTRA. */
6251
6252 fdefault = (!first_callout && !callout_capture && cb->callout_string == NULL)?
6253 NULL : outfile;
6254
6255 if ((dat_datctl.control2 & CTL2_CALLOUT_EXTRA) != 0)
6256 {
6257 f = outfile;
6258 switch (cb->callout_flags)
6259 {
6260 case PCRE2_CALLOUT_BACKTRACK:
6261 fprintf(f, "Backtrack\n");
6262 break;
6263
6264 case PCRE2_CALLOUT_STARTMATCH|PCRE2_CALLOUT_BACKTRACK:
6265 fprintf(f, "Backtrack\nNo other matching paths\n");
6266 /* Fall through */
6267
6268 case PCRE2_CALLOUT_STARTMATCH:
6269 fprintf(f, "New match attempt\n");
6270 break;
6271
6272 default:
6273 f = fdefault;
6274 break;
6275 }
6276 }
6277 else f = fdefault;
6278
6279 /* For a callout with a string argument, show the string first because there
6280 isn't a tidy way to fit it in the rest of the data. */
6281
6282 if (cb->callout_string != NULL)
6283 {
6284 uint32_t delimiter = CODE_UNIT(cb->callout_string, -1);
6285 fprintf(outfile, "Callout (%" SIZ_FORM "): %c",
6286 cb->callout_string_offset, delimiter);
6287 PCHARSV(cb->callout_string, 0,
6288 cb->callout_string_length, utf, outfile);
6289 for (i = 0; callout_start_delims[i] != 0; i++)
6290 if (delimiter == callout_start_delims[i])
6291 {
6292 delimiter = callout_end_delims[i];
6293 break;
6294 }
6295 fprintf(outfile, "%c", delimiter);
6296 if (!callout_capture) fprintf(outfile, "\n");
6297 }
6298
6299 /* Show captured strings if required */
6300
6301 if (callout_capture)
6302 {
6303 if (cb->callout_string == NULL)
6304 fprintf(outfile, "Callout %d:", cb->callout_number);
6305 fprintf(outfile, " last capture = %d\n", cb->capture_last);
6306 for (i = 2; i < cb->capture_top * 2; i += 2)
6307 {
6308 fprintf(outfile, "%2d: ", i/2);
6309 if (cb->offset_vector[i] == PCRE2_UNSET)
6310 fprintf(outfile, "<unset>");
6311 else
6312 {
6313 PCHARSV(cb->subject, cb->offset_vector[i],
6314 cb->offset_vector[i+1] - cb->offset_vector[i], utf, f);
6315 }
6316 fprintf(outfile, "\n");
6317 }
6318 }
6319
6320 /* Unless suppressed, re-print the subject in canonical form (with escapes for
6321 non-printing characters), the first time, or if giving full details. On
6322 subsequent calls in the same match, we use PCHARS() just to find the printed
6323 lengths of the substrings. */
6324
6325 if (callout_where)
6326 {
6327 if (f != NULL) fprintf(f, "--->");
6328
6329 /* The subject before the match start. */
6330
6331 PCHARS(pre_start, cb->subject, 0, cb->start_match, utf, f);
6332
6333 /* If a lookbehind is involved, the current position may be earlier than the
6334 match start. If so, use the match start instead. */
6335
6336 current_position = (cb->current_position >= cb->start_match)?
6337 cb->current_position : cb->start_match;
6338
6339 /* The subject between the match start and the current position. */
6340
6341 PCHARS(post_start, cb->subject, cb->start_match,
6342 current_position - cb->start_match, utf, f);
6343
6344 /* Print from the current position to the end. */
6345
6346 PCHARSV(cb->subject, current_position, cb->subject_length - current_position,
6347 utf, f);
6348
6349 /* Calculate the total subject printed length (no print). */
6350
6351 PCHARS(subject_length, cb->subject, 0, cb->subject_length, utf, NULL);
6352
6353 if (f != NULL) fprintf(f, "\n");
6354
6355 /* For automatic callouts, show the pattern offset. Otherwise, for a
6356 numerical callout whose number has not already been shown with captured
6357 strings, show the number here. A callout with a string argument has been
6358 displayed above. */
6359
6360 if (cb->callout_number == 255)
6361 {
6362 fprintf(outfile, "%+3d ", (int)cb->pattern_position);
6363 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
6364 }
6365 else
6366 {
6367 if (callout_capture || cb->callout_string != NULL) fprintf(outfile, " ");
6368 else fprintf(outfile, "%3d ", cb->callout_number);
6369 }
6370
6371 /* Now show position indicators */
6372
6373 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
6374 fprintf(outfile, "^");
6375
6376 if (post_start > 0)
6377 {
6378 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
6379 fprintf(outfile, "^");
6380 }
6381
6382 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
6383 fprintf(outfile, " ");
6384
6385 if (cb->next_item_length != 0)
6386 fprintf(outfile, "%.*s", (int)(cb->next_item_length),
6387 pbuffer8 + cb->pattern_position);
6388 else
6389 fprintf(outfile, "End of pattern");
6390
6391 fprintf(outfile, "\n");
6392 }
6393
6394 first_callout = FALSE;
6395
6396 /* Show any mark info */
6397
6398 if (cb->mark != last_callout_mark)
6399 {
6400 if (cb->mark == NULL)
6401 fprintf(outfile, "Latest Mark: <unset>\n");
6402 else
6403 {
6404 fprintf(outfile, "Latest Mark: ");
6405 PCHARSV(cb->mark, -1, -1, utf, outfile);
6406 putc('\n', outfile);
6407 }
6408 last_callout_mark = cb->mark;
6409 }
6410
6411 /* Show callout data */
6412
6413 if (callout_data_ptr != NULL)
6414 {
6415 int callout_data = *((int32_t *)callout_data_ptr);
6416 if (callout_data != 0)
6417 {
6418 fprintf(outfile, "Callout data = %d\n", callout_data);
6419 return callout_data;
6420 }
6421 }
6422
6423 /* Keep count and give the appropriate return code */
6424
6425 callout_count++;
6426
6427 if (cb->callout_number == dat_datctl.cerror[0] &&
6428 callout_count >= dat_datctl.cerror[1])
6429 return PCRE2_ERROR_CALLOUT;
6430
6431 if (cb->callout_number == dat_datctl.cfail[0] &&
6432 callout_count >= dat_datctl.cfail[1])
6433 return 1;
6434
6435 return 0;
6436 }
6437
6438
6439
6440 /*************************************************
6441 * Handle *MARK and copy/get tests *
6442 *************************************************/
6443
6444 /* This function is called after complete and partial matches. It runs the
6445 tests for substring extraction.
6446
6447 Arguments:
6448 utf TRUE for utf
6449 capcount return from pcre2_match()
6450
6451 Returns: FALSE if print_error_message() fails
6452 */
6453
6454 static BOOL
copy_and_get(BOOL utf,int capcount)6455 copy_and_get(BOOL utf, int capcount)
6456 {
6457 int i;
6458 uint8_t *nptr;
6459
6460 /* Test copy strings by number */
6461
6462 for (i = 0; i < MAXCPYGET && dat_datctl.copy_numbers[i] >= 0; i++)
6463 {
6464 int rc;
6465 PCRE2_SIZE length, length2;
6466 uint32_t copybuffer[256];
6467 uint32_t n = (uint32_t)(dat_datctl.copy_numbers[i]);
6468 length = sizeof(copybuffer)/code_unit_size;
6469 PCRE2_SUBSTRING_COPY_BYNUMBER(rc, match_data, n, copybuffer, &length);
6470 if (rc < 0)
6471 {
6472 fprintf(outfile, "Copy substring %d failed (%d): ", n, rc);
6473 if (!print_error_message(rc, "", "\n")) return FALSE;
6474 }
6475 else
6476 {
6477 PCRE2_SUBSTRING_LENGTH_BYNUMBER(rc, match_data, n, &length2);
6478 if (rc < 0)
6479 {
6480 fprintf(outfile, "Get substring %d length failed (%d): ", n, rc);
6481 if (!print_error_message(rc, "", "\n")) return FALSE;
6482 }
6483 else if (length2 != length)
6484 {
6485 fprintf(outfile, "Mismatched substring lengths: %"
6486 SIZ_FORM " %" SIZ_FORM "\n", length, length2);
6487 }
6488 fprintf(outfile, "%2dC ", n);
6489 PCHARSV(copybuffer, 0, length, utf, outfile);
6490 fprintf(outfile, " (%" SIZ_FORM ")\n", length);
6491 }
6492 }
6493
6494 /* Test copy strings by name */
6495
6496 nptr = dat_datctl.copy_names;
6497 for (;;)
6498 {
6499 int rc;
6500 int groupnumber;
6501 PCRE2_SIZE length, length2;
6502 uint32_t copybuffer[256];
6503 int namelen = strlen((const char *)nptr);
6504 #if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32
6505 PCRE2_SIZE cnl = namelen;
6506 #endif
6507 if (namelen == 0) break;
6508
6509 #ifdef SUPPORT_PCRE2_8
6510 if (test_mode == PCRE8_MODE) strcpy((char *)pbuffer8, (char *)nptr);
6511 #endif
6512 #ifdef SUPPORT_PCRE2_16
6513 if (test_mode == PCRE16_MODE)(void)to16(nptr, utf, &cnl);
6514 #endif
6515 #ifdef SUPPORT_PCRE2_32
6516 if (test_mode == PCRE32_MODE)(void)to32(nptr, utf, &cnl);
6517 #endif
6518
6519 PCRE2_SUBSTRING_NUMBER_FROM_NAME(groupnumber, compiled_code, pbuffer);
6520 if (groupnumber < 0 && groupnumber != PCRE2_ERROR_NOUNIQUESUBSTRING)
6521 fprintf(outfile, "Number not found for group '%s'\n", nptr);
6522
6523 length = sizeof(copybuffer)/code_unit_size;
6524 PCRE2_SUBSTRING_COPY_BYNAME(rc, match_data, pbuffer, copybuffer, &length);
6525 if (rc < 0)
6526 {
6527 fprintf(outfile, "Copy substring '%s' failed (%d): ", nptr, rc);
6528 if (!print_error_message(rc, "", "\n")) return FALSE;
6529 }
6530 else
6531 {
6532 PCRE2_SUBSTRING_LENGTH_BYNAME(rc, match_data, pbuffer, &length2);
6533 if (rc < 0)
6534 {
6535 fprintf(outfile, "Get substring '%s' length failed (%d): ", nptr, rc);
6536 if (!print_error_message(rc, "", "\n")) return FALSE;
6537 }
6538 else if (length2 != length)
6539 {
6540 fprintf(outfile, "Mismatched substring lengths: %"
6541 SIZ_FORM " %" SIZ_FORM "\n", length, length2);
6542 }
6543 fprintf(outfile, " C ");
6544 PCHARSV(copybuffer, 0, length, utf, outfile);
6545 fprintf(outfile, " (%" SIZ_FORM ") %s", length, nptr);
6546 if (groupnumber >= 0) fprintf(outfile, " (group %d)\n", groupnumber);
6547 else fprintf(outfile, " (non-unique)\n");
6548 }
6549 nptr += namelen + 1;
6550 }
6551
6552 /* Test get strings by number */
6553
6554 for (i = 0; i < MAXCPYGET && dat_datctl.get_numbers[i] >= 0; i++)
6555 {
6556 int rc;
6557 PCRE2_SIZE length;
6558 void *gotbuffer;
6559 uint32_t n = (uint32_t)(dat_datctl.get_numbers[i]);
6560 PCRE2_SUBSTRING_GET_BYNUMBER(rc, match_data, n, &gotbuffer, &length);
6561 if (rc < 0)
6562 {
6563 fprintf(outfile, "Get substring %d failed (%d): ", n, rc);
6564 if (!print_error_message(rc, "", "\n")) return FALSE;
6565 }
6566 else
6567 {
6568 fprintf(outfile, "%2dG ", n);
6569 PCHARSV(gotbuffer, 0, length, utf, outfile);
6570 fprintf(outfile, " (%" SIZ_FORM ")\n", length);
6571 PCRE2_SUBSTRING_FREE(gotbuffer);
6572 }
6573 }
6574
6575 /* Test get strings by name */
6576
6577 nptr = dat_datctl.get_names;
6578 for (;;)
6579 {
6580 PCRE2_SIZE length;
6581 void *gotbuffer;
6582 int rc;
6583 int groupnumber;
6584 int namelen = strlen((const char *)nptr);
6585 #if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32
6586 PCRE2_SIZE cnl = namelen;
6587 #endif
6588 if (namelen == 0) break;
6589
6590 #ifdef SUPPORT_PCRE2_8
6591 if (test_mode == PCRE8_MODE) strcpy((char *)pbuffer8, (char *)nptr);
6592 #endif
6593 #ifdef SUPPORT_PCRE2_16
6594 if (test_mode == PCRE16_MODE)(void)to16(nptr, utf, &cnl);
6595 #endif
6596 #ifdef SUPPORT_PCRE2_32
6597 if (test_mode == PCRE32_MODE)(void)to32(nptr, utf, &cnl);
6598 #endif
6599
6600 PCRE2_SUBSTRING_NUMBER_FROM_NAME(groupnumber, compiled_code, pbuffer);
6601 if (groupnumber < 0 && groupnumber != PCRE2_ERROR_NOUNIQUESUBSTRING)
6602 fprintf(outfile, "Number not found for group '%s'\n", nptr);
6603
6604 PCRE2_SUBSTRING_GET_BYNAME(rc, match_data, pbuffer, &gotbuffer, &length);
6605 if (rc < 0)
6606 {
6607 fprintf(outfile, "Get substring '%s' failed (%d): ", nptr, rc);
6608 if (!print_error_message(rc, "", "\n")) return FALSE;
6609 }
6610 else
6611 {
6612 fprintf(outfile, " G ");
6613 PCHARSV(gotbuffer, 0, length, utf, outfile);
6614 fprintf(outfile, " (%" SIZ_FORM ") %s", length, nptr);
6615 if (groupnumber >= 0) fprintf(outfile, " (group %d)\n", groupnumber);
6616 else fprintf(outfile, " (non-unique)\n");
6617 PCRE2_SUBSTRING_FREE(gotbuffer);
6618 }
6619 nptr += namelen + 1;
6620 }
6621
6622 /* Test getting the complete list of captured strings. */
6623
6624 if ((dat_datctl.control & CTL_GETALL) != 0)
6625 {
6626 int rc;
6627 void **stringlist;
6628 PCRE2_SIZE *lengths;
6629 PCRE2_SUBSTRING_LIST_GET(rc, match_data, &stringlist, &lengths);
6630 if (rc < 0)
6631 {
6632 fprintf(outfile, "get substring list failed (%d): ", rc);
6633 if (!print_error_message(rc, "", "\n")) return FALSE;
6634 }
6635 else
6636 {
6637 for (i = 0; i < capcount; i++)
6638 {
6639 fprintf(outfile, "%2dL ", i);
6640 PCHARSV(stringlist[i], 0, lengths[i], utf, outfile);
6641 putc('\n', outfile);
6642 }
6643 if (stringlist[i] != NULL)
6644 fprintf(outfile, "string list not terminated by NULL\n");
6645 PCRE2_SUBSTRING_LIST_FREE(stringlist);
6646 }
6647 }
6648
6649 return TRUE;
6650 }
6651
6652
6653
6654 /*************************************************
6655 * Show an entire ovector *
6656 *************************************************/
6657
6658 /* This function is called after partial matching or match failure, when the
6659 "allvector" modifier is set. It is a means of checking the contents of the
6660 entire ovector, to ensure no modification of fields that should be unchanged.
6661
6662 Arguments:
6663 ovector points to the ovector
6664 oveccount number of pairs
6665
6666 Returns: nothing
6667 */
6668
6669 static void
show_ovector(PCRE2_SIZE * ovector,uint32_t oveccount)6670 show_ovector(PCRE2_SIZE *ovector, uint32_t oveccount)
6671 {
6672 uint32_t i;
6673 for (i = 0; i < 2*oveccount; i += 2)
6674 {
6675 PCRE2_SIZE start = ovector[i];
6676 PCRE2_SIZE end = ovector[i+1];
6677
6678 fprintf(outfile, "%2d: ", i/2);
6679 if (start == PCRE2_UNSET && end == PCRE2_UNSET)
6680 fprintf(outfile, "<unset>\n");
6681 else if (start == JUNK_OFFSET && end == JUNK_OFFSET)
6682 fprintf(outfile, "<unchanged>\n");
6683 else
6684 fprintf(outfile, "%ld %ld\n", (unsigned long int)start,
6685 (unsigned long int)end);
6686 }
6687 }
6688
6689
6690 /*************************************************
6691 * Process a data line *
6692 *************************************************/
6693
6694 /* The line is in buffer; it will not be empty.
6695
6696 Arguments: none
6697
6698 Returns: PR_OK continue processing next line
6699 PR_SKIP skip to a blank line
6700 PR_ABEND abort the pcre2test run
6701 */
6702
6703 static int
process_data(void)6704 process_data(void)
6705 {
6706 PCRE2_SIZE len, ulen, arg_ulen;
6707 uint32_t gmatched;
6708 uint32_t c, k;
6709 uint32_t g_notempty = 0;
6710 uint8_t *p, *pp, *start_rep;
6711 size_t needlen;
6712 void *use_dat_context;
6713 BOOL utf;
6714 BOOL subject_literal;
6715
6716 PCRE2_SIZE *ovector;
6717 PCRE2_SIZE ovecsave[3];
6718 uint32_t oveccount;
6719
6720 #ifdef SUPPORT_PCRE2_8
6721 uint8_t *q8 = NULL;
6722 #endif
6723 #ifdef SUPPORT_PCRE2_16
6724 uint16_t *q16 = NULL;
6725 #endif
6726 #ifdef SUPPORT_PCRE2_32
6727 uint32_t *q32 = NULL;
6728 #endif
6729
6730 subject_literal = (pat_patctl.control2 & CTL2_SUBJECT_LITERAL) != 0;
6731
6732 /* Copy the default context and data control blocks to the active ones. Then
6733 copy from the pattern the controls that can be set in either the pattern or the
6734 data. This allows them to be overridden in the data line. We do not do this for
6735 options because those that are common apply separately to compiling and
6736 matching. */
6737
6738 DATCTXCPY(dat_context, default_dat_context);
6739 memcpy(&dat_datctl, &def_datctl, sizeof(datctl));
6740 dat_datctl.control |= (pat_patctl.control & CTL_ALLPD);
6741 dat_datctl.control2 |= (pat_patctl.control2 & CTL2_ALLPD);
6742 strcpy((char *)dat_datctl.replacement, (char *)pat_patctl.replacement);
6743 if (dat_datctl.jitstack == 0) dat_datctl.jitstack = pat_patctl.jitstack;
6744
6745 if (dat_datctl.substitute_skip == 0)
6746 dat_datctl.substitute_skip = pat_patctl.substitute_skip;
6747 if (dat_datctl.substitute_stop == 0)
6748 dat_datctl.substitute_stop = pat_patctl.substitute_stop;
6749
6750 /* Initialize for scanning the data line. */
6751
6752 #ifdef SUPPORT_PCRE2_8
6753 utf = ((((pat_patctl.control & CTL_POSIX) != 0)?
6754 ((pcre2_real_code_8 *)preg.re_pcre2_code)->overall_options :
6755 FLD(compiled_code, overall_options)) & PCRE2_UTF) != 0;
6756 #else
6757 utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
6758 #endif
6759
6760 start_rep = NULL;
6761 len = strlen((const char *)buffer);
6762 while (len > 0 && isspace(buffer[len-1])) len--;
6763 buffer[len] = 0;
6764 p = buffer;
6765 while (isspace(*p)) p++;
6766
6767 /* Check that the data is well-formed UTF-8 if we're in UTF mode. To create
6768 invalid input to pcre2_match(), you must use \x?? or \x{} sequences. */
6769
6770 if (utf)
6771 {
6772 uint8_t *q;
6773 uint32_t cc;
6774 int n = 1;
6775 for (q = p; n > 0 && *q; q += n) n = utf82ord(q, &cc);
6776 if (n <= 0)
6777 {
6778 fprintf(outfile, "** Failed: invalid UTF-8 string cannot be used as input "
6779 "in UTF mode\n");
6780 return PR_OK;
6781 }
6782 }
6783
6784 #ifdef SUPPORT_VALGRIND
6785 /* Mark the dbuffer as addressable but undefined again. */
6786 if (dbuffer != NULL)
6787 {
6788 VALGRIND_MAKE_MEM_UNDEFINED(dbuffer, dbuffer_size);
6789 }
6790 #endif
6791
6792 /* Allocate a buffer to hold the data line; len+1 is an upper bound on
6793 the number of code units that will be needed (though the buffer may have to be
6794 extended if replication is involved). */
6795
6796 needlen = (size_t)((len+1) * code_unit_size);
6797 if (dbuffer == NULL || needlen >= dbuffer_size)
6798 {
6799 while (needlen >= dbuffer_size) dbuffer_size *= 2;
6800 dbuffer = (uint8_t *)realloc(dbuffer, dbuffer_size);
6801 if (dbuffer == NULL)
6802 {
6803 fprintf(stderr, "pcre2test: realloc(%d) failed\n", (int)dbuffer_size);
6804 exit(1);
6805 }
6806 }
6807 SETCASTPTR(q, dbuffer); /* Sets q8, q16, or q32, as appropriate. */
6808
6809 /* Scan the data line, interpreting data escapes, and put the result into a
6810 buffer of the appropriate width. In UTF mode, input is always UTF-8; otherwise,
6811 in 16- and 32-bit modes, it can be forced to UTF-8 by the utf8_input modifier.
6812 */
6813
6814 while ((c = *p++) != 0)
6815 {
6816 int32_t i = 0;
6817 size_t replen;
6818
6819 /* ] may mark the end of a replicated sequence */
6820
6821 if (c == ']' && start_rep != NULL)
6822 {
6823 long li;
6824 char *endptr;
6825
6826 if (*p++ != '{')
6827 {
6828 fprintf(outfile, "** Expected '{' after \\[....]\n");
6829 return PR_OK;
6830 }
6831
6832 li = strtol((const char *)p, &endptr, 10);
6833 if (S32OVERFLOW(li))
6834 {
6835 fprintf(outfile, "** Repeat count too large\n");
6836 return PR_OK;
6837 }
6838
6839 p = (uint8_t *)endptr;
6840 if (*p++ != '}')
6841 {
6842 fprintf(outfile, "** Expected '}' after \\[...]{...\n");
6843 return PR_OK;
6844 }
6845
6846 i = (int32_t)li;
6847 if (i-- <= 0)
6848 {
6849 fprintf(outfile, "** Zero or negative repeat not allowed\n");
6850 return PR_OK;
6851 }
6852
6853 replen = CAST8VAR(q) - start_rep;
6854 needlen += replen * i;
6855
6856 if (needlen >= dbuffer_size)
6857 {
6858 size_t qoffset = CAST8VAR(q) - dbuffer;
6859 size_t rep_offset = start_rep - dbuffer;
6860 while (needlen >= dbuffer_size) dbuffer_size *= 2;
6861 dbuffer = (uint8_t *)realloc(dbuffer, dbuffer_size);
6862 if (dbuffer == NULL)
6863 {
6864 fprintf(stderr, "pcre2test: realloc(%d) failed\n", (int)dbuffer_size);
6865 exit(1);
6866 }
6867 SETCASTPTR(q, dbuffer + qoffset);
6868 start_rep = dbuffer + rep_offset;
6869 }
6870
6871 while (i-- > 0)
6872 {
6873 memcpy(CAST8VAR(q), start_rep, replen);
6874 SETPLUS(q, replen/code_unit_size);
6875 }
6876
6877 start_rep = NULL;
6878 continue;
6879 }
6880
6881 /* Handle a non-escaped character. In non-UTF 32-bit mode with utf8_input
6882 set, do the fudge for setting the top bit. */
6883
6884 if (c != '\\' || subject_literal)
6885 {
6886 uint32_t topbit = 0;
6887 if (test_mode == PCRE32_MODE && c == 0xff && *p != 0)
6888 {
6889 topbit = 0x80000000;
6890 c = *p++;
6891 }
6892 if ((utf || (pat_patctl.control & CTL_UTF8_INPUT) != 0) &&
6893 HASUTF8EXTRALEN(c)) { GETUTF8INC(c, p); }
6894 c |= topbit;
6895 }
6896
6897 /* Handle backslash escapes */
6898
6899 else switch ((c = *p++))
6900 {
6901 case '\\': break;
6902 case 'a': c = CHAR_BEL; break;
6903 case 'b': c = '\b'; break;
6904 case 'e': c = CHAR_ESC; break;
6905 case 'f': c = '\f'; break;
6906 case 'n': c = '\n'; break;
6907 case 'r': c = '\r'; break;
6908 case 't': c = '\t'; break;
6909 case 'v': c = '\v'; break;
6910
6911 case '0': case '1': case '2': case '3':
6912 case '4': case '5': case '6': case '7':
6913 c -= '0';
6914 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
6915 c = c * 8 + *p++ - '0';
6916 break;
6917
6918 case 'o':
6919 if (*p == '{')
6920 {
6921 uint8_t *pt = p;
6922 c = 0;
6923 for (pt++; isdigit(*pt) && *pt != '8' && *pt != '9'; pt++)
6924 {
6925 if (++i == 12)
6926 fprintf(outfile, "** Too many octal digits in \\o{...} item; "
6927 "using only the first twelve.\n");
6928 else c = c * 8 + *pt - '0';
6929 }
6930 if (*pt == '}') p = pt + 1;
6931 else fprintf(outfile, "** Missing } after \\o{ (assumed)\n");
6932 }
6933 break;
6934
6935 case 'x':
6936 if (*p == '{')
6937 {
6938 uint8_t *pt = p;
6939 c = 0;
6940
6941 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
6942 when isxdigit() is a macro that refers to its argument more than
6943 once. This is banned by the C Standard, but apparently happens in at
6944 least one MacOS environment. */
6945
6946 for (pt++; isxdigit(*pt); pt++)
6947 {
6948 if (++i == 9)
6949 fprintf(outfile, "** Too many hex digits in \\x{...} item; "
6950 "using only the first eight.\n");
6951 else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
6952 }
6953 if (*pt == '}')
6954 {
6955 p = pt + 1;
6956 break;
6957 }
6958 /* Not correct form for \x{...}; fall through */
6959 }
6960
6961 /* \x without {} always defines just one byte in 8-bit mode. This
6962 allows UTF-8 characters to be constructed byte by byte, and also allows
6963 invalid UTF-8 sequences to be made. Just copy the byte in UTF-8 mode.
6964 Otherwise, pass it down as data. */
6965
6966 c = 0;
6967 while (i++ < 2 && isxdigit(*p))
6968 {
6969 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
6970 p++;
6971 }
6972 #if defined SUPPORT_PCRE2_8
6973 if (utf && (test_mode == PCRE8_MODE))
6974 {
6975 *q8++ = c;
6976 continue;
6977 }
6978 #endif
6979 break;
6980
6981 case 0: /* \ followed by EOF allows for an empty line */
6982 p--;
6983 continue;
6984
6985 case '=': /* \= terminates the data, starts modifiers */
6986 goto ENDSTRING;
6987
6988 case '[': /* \[ introduces a replicated character sequence */
6989 if (start_rep != NULL)
6990 {
6991 fprintf(outfile, "** Nested replication is not supported\n");
6992 return PR_OK;
6993 }
6994 start_rep = CAST8VAR(q);
6995 continue;
6996
6997 default:
6998 if (isalnum(c))
6999 {
7000 fprintf(outfile, "** Unrecognized escape sequence \"\\%c\"\n", c);
7001 return PR_OK;
7002 }
7003 }
7004
7005 /* We now have a character value in c that may be greater than 255.
7006 In 8-bit mode we convert to UTF-8 if we are in UTF mode. Values greater
7007 than 127 in UTF mode must have come from \x{...} or octal constructs
7008 because values from \x.. get this far only in non-UTF mode. */
7009
7010 #ifdef SUPPORT_PCRE2_8
7011 if (test_mode == PCRE8_MODE)
7012 {
7013 if (utf)
7014 {
7015 if (c > 0x7fffffff)
7016 {
7017 fprintf(outfile, "** Character \\x{%x} is greater than 0x7fffffff "
7018 "and so cannot be converted to UTF-8\n", c);
7019 return PR_OK;
7020 }
7021 q8 += ord2utf8(c, q8);
7022 }
7023 else
7024 {
7025 if (c > 0xffu)
7026 {
7027 fprintf(outfile, "** Character \\x{%x} is greater than 255 "
7028 "and UTF-8 mode is not enabled.\n", c);
7029 fprintf(outfile, "** Truncation will probably give the wrong "
7030 "result.\n");
7031 }
7032 *q8++ = (uint8_t)c;
7033 }
7034 }
7035 #endif
7036 #ifdef SUPPORT_PCRE2_16
7037 if (test_mode == PCRE16_MODE)
7038 {
7039 if (utf)
7040 {
7041 if (c > 0x10ffffu)
7042 {
7043 fprintf(outfile, "** Failed: character \\x{%x} is greater than "
7044 "0x10ffff and so cannot be converted to UTF-16\n", c);
7045 return PR_OK;
7046 }
7047 else if (c >= 0x10000u)
7048 {
7049 c-= 0x10000u;
7050 *q16++ = 0xD800 | (c >> 10);
7051 *q16++ = 0xDC00 | (c & 0x3ff);
7052 }
7053 else
7054 *q16++ = c;
7055 }
7056 else
7057 {
7058 if (c > 0xffffu)
7059 {
7060 fprintf(outfile, "** Character \\x{%x} is greater than 0xffff "
7061 "and UTF-16 mode is not enabled.\n", c);
7062 fprintf(outfile, "** Truncation will probably give the wrong "
7063 "result.\n");
7064 }
7065
7066 *q16++ = (uint16_t)c;
7067 }
7068 }
7069 #endif
7070 #ifdef SUPPORT_PCRE2_32
7071 if (test_mode == PCRE32_MODE)
7072 {
7073 *q32++ = c;
7074 }
7075 #endif
7076 }
7077
7078 ENDSTRING:
7079 SET(*q, 0);
7080 len = CASTVAR(uint8_t *, q) - dbuffer; /* Length in bytes */
7081 ulen = len/code_unit_size; /* Length in code units */
7082 arg_ulen = ulen; /* Value to use in match arg */
7083
7084 /* If the string was terminated by \= we must now interpret modifiers. */
7085
7086 if (p[-1] != 0 && !decode_modifiers(p, CTX_DAT, NULL, &dat_datctl))
7087 return PR_OK;
7088
7089 /* Setting substitute_{skip,fail} implies a substitute callout. */
7090
7091 if (dat_datctl.substitute_skip != 0 || dat_datctl.substitute_stop != 0)
7092 dat_datctl.control2 |= CTL2_SUBSTITUTE_CALLOUT;
7093
7094 /* Check for mutually exclusive modifiers. At present, these are all in the
7095 first control word. */
7096
7097 for (k = 0; k < sizeof(exclusive_dat_controls)/sizeof(uint32_t); k++)
7098 {
7099 c = dat_datctl.control & exclusive_dat_controls[k];
7100 if (c != 0 && c != (c & (~c+1)))
7101 {
7102 show_controls(c, 0, "** Not allowed together:");
7103 fprintf(outfile, "\n");
7104 return PR_OK;
7105 }
7106 }
7107
7108 if (pat_patctl.replacement[0] != 0)
7109 {
7110 if ((dat_datctl.control2 & CTL2_SUBSTITUTE_CALLOUT) != 0 &&
7111 (dat_datctl.control & CTL_NULLCONTEXT) != 0)
7112 {
7113 fprintf(outfile, "** Replacement callouts are not supported with null_context.\n");
7114 return PR_OK;
7115 }
7116
7117 if ((dat_datctl.control & CTL_ALLCAPTURES) != 0)
7118 fprintf(outfile, "** Ignored with replacement text: allcaptures\n");
7119 }
7120
7121 /* Warn for modifiers that are ignored for DFA. */
7122
7123 if ((dat_datctl.control & CTL_DFA) != 0)
7124 {
7125 if ((dat_datctl.control & CTL_ALLCAPTURES) != 0)
7126 fprintf(outfile, "** Ignored after DFA matching: allcaptures\n");
7127 }
7128
7129 /* We now have the subject in dbuffer, with len containing the byte length, and
7130 ulen containing the code unit length, with a copy in arg_ulen for use in match
7131 function arguments (this gets changed to PCRE2_ZERO_TERMINATED when the
7132 zero_terminate modifier is present).
7133
7134 Move the data to the end of the buffer so that a read over the end can be
7135 caught by valgrind or other means. If we have explicit valgrind support, mark
7136 the unused start of the buffer unaddressable. If we are using the POSIX
7137 interface, or testing zero-termination, we must include the terminating zero in
7138 the usable data. */
7139
7140 c = code_unit_size * (((pat_patctl.control & CTL_POSIX) +
7141 (dat_datctl.control & CTL_ZERO_TERMINATE) != 0)? 1:0);
7142 pp = memmove(dbuffer + dbuffer_size - len - c, dbuffer, len + c);
7143 #ifdef SUPPORT_VALGRIND
7144 VALGRIND_MAKE_MEM_NOACCESS(dbuffer, dbuffer_size - (len + c));
7145 #endif
7146
7147 /* Now pp points to the subject string, but if null_subject was specified, set
7148 it to NULL to test PCRE2's behaviour. */
7149
7150 if ((dat_datctl.control2 & CTL2_NULL_SUBJECT) != 0) pp = NULL;
7151
7152 /* POSIX matching is only possible in 8-bit mode, and it does not support
7153 timing or other fancy features. Some were checked at compile time, but we need
7154 to check the match-time settings here. */
7155
7156 #ifdef SUPPORT_PCRE2_8
7157 if ((pat_patctl.control & CTL_POSIX) != 0)
7158 {
7159 int rc;
7160 int eflags = 0;
7161 regmatch_t *pmatch = NULL;
7162 const char *msg = "** Ignored with POSIX interface:";
7163
7164 if (dat_datctl.cerror[0] != CFORE_UNSET || dat_datctl.cerror[1] != CFORE_UNSET)
7165 prmsg(&msg, "callout_error");
7166 if (dat_datctl.cfail[0] != CFORE_UNSET || dat_datctl.cfail[1] != CFORE_UNSET)
7167 prmsg(&msg, "callout_fail");
7168 if (dat_datctl.copy_numbers[0] >= 0 || dat_datctl.copy_names[0] != 0)
7169 prmsg(&msg, "copy");
7170 if (dat_datctl.get_numbers[0] >= 0 || dat_datctl.get_names[0] != 0)
7171 prmsg(&msg, "get");
7172 if (dat_datctl.jitstack != 0) prmsg(&msg, "jitstack");
7173 if (dat_datctl.offset != 0) prmsg(&msg, "offset");
7174
7175 if ((dat_datctl.options & ~POSIX_SUPPORTED_MATCH_OPTIONS) != 0)
7176 {
7177 fprintf(outfile, "%s", msg);
7178 show_match_options(dat_datctl.options & ~POSIX_SUPPORTED_MATCH_OPTIONS);
7179 msg = "";
7180 }
7181 if ((dat_datctl.control & ~POSIX_SUPPORTED_MATCH_CONTROLS) != 0 ||
7182 (dat_datctl.control2 & ~POSIX_SUPPORTED_MATCH_CONTROLS2) != 0)
7183 {
7184 show_controls(dat_datctl.control & ~POSIX_SUPPORTED_MATCH_CONTROLS,
7185 dat_datctl.control2 & ~POSIX_SUPPORTED_MATCH_CONTROLS2, msg);
7186 msg = "";
7187 }
7188
7189 if (msg[0] == 0) fprintf(outfile, "\n");
7190
7191 if (dat_datctl.oveccount > 0)
7192 {
7193 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * dat_datctl.oveccount);
7194 if (pmatch == NULL)
7195 {
7196 fprintf(outfile, "** Failed to get memory for recording matching "
7197 "information (size set = %du)\n", dat_datctl.oveccount);
7198 return PR_OK;
7199 }
7200 }
7201
7202 if (dat_datctl.startend[0] != CFORE_UNSET)
7203 {
7204 pmatch[0].rm_so = dat_datctl.startend[0];
7205 pmatch[0].rm_eo = (dat_datctl.startend[1] != 0)?
7206 dat_datctl.startend[1] : len;
7207 eflags |= REG_STARTEND;
7208 }
7209
7210 if ((dat_datctl.options & PCRE2_NOTBOL) != 0) eflags |= REG_NOTBOL;
7211 if ((dat_datctl.options & PCRE2_NOTEOL) != 0) eflags |= REG_NOTEOL;
7212 if ((dat_datctl.options & PCRE2_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
7213
7214 rc = regexec(&preg, (const char *)pp, dat_datctl.oveccount, pmatch, eflags);
7215 if (rc != 0)
7216 {
7217 (void)regerror(rc, &preg, (char *)pbuffer8, pbuffer8_size);
7218 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, pbuffer8);
7219 }
7220 else if ((pat_patctl.control & CTL_POSIX_NOSUB) != 0)
7221 fprintf(outfile, "Matched with REG_NOSUB\n");
7222 else if (dat_datctl.oveccount == 0)
7223 fprintf(outfile, "Matched without capture\n");
7224 else
7225 {
7226 size_t i, j;
7227 size_t last_printed = (size_t)dat_datctl.oveccount;
7228 for (i = 0; i < (size_t)dat_datctl.oveccount; i++)
7229 {
7230 if (pmatch[i].rm_so >= 0)
7231 {
7232 PCRE2_SIZE start = pmatch[i].rm_so;
7233 PCRE2_SIZE end = pmatch[i].rm_eo;
7234 for (j = last_printed + 1; j < i; j++)
7235 fprintf(outfile, "%2d: <unset>\n", (int)j);
7236 last_printed = i;
7237 if (start > end)
7238 {
7239 start = pmatch[i].rm_eo;
7240 end = pmatch[i].rm_so;
7241 fprintf(outfile, "Start of matched string is beyond its end - "
7242 "displaying from end to start.\n");
7243 }
7244 fprintf(outfile, "%2d: ", (int)i);
7245 PCHARSV(pp, start, end - start, utf, outfile);
7246 fprintf(outfile, "\n");
7247
7248 if ((i == 0 && (dat_datctl.control & CTL_AFTERTEXT) != 0) ||
7249 (dat_datctl.control & CTL_ALLAFTERTEXT) != 0)
7250 {
7251 fprintf(outfile, "%2d+ ", (int)i);
7252 /* Note: don't use the start/end variables here because we want to
7253 show the text from what is reported as the end. */
7254 PCHARSV(pp, pmatch[i].rm_eo, len - pmatch[i].rm_eo, utf, outfile);
7255 fprintf(outfile, "\n"); }
7256 }
7257 }
7258 }
7259 free(pmatch);
7260 return PR_OK;
7261 }
7262 #endif /* SUPPORT_PCRE2_8 */
7263
7264 /* Handle matching via the native interface. Check for consistency of
7265 modifiers. */
7266
7267 if (dat_datctl.startend[0] != CFORE_UNSET)
7268 fprintf(outfile, "** \\=posix_startend ignored for non-POSIX matching\n");
7269
7270 /* ALLUSEDTEXT is not supported with JIT, but JIT is not used with DFA
7271 matching, even if the JIT compiler was used. */
7272
7273 if ((dat_datctl.control & (CTL_ALLUSEDTEXT|CTL_DFA)) == CTL_ALLUSEDTEXT &&
7274 FLD(compiled_code, executable_jit) != NULL)
7275 {
7276 fprintf(outfile, "** Showing all consulted text is not supported by JIT: ignored\n");
7277 dat_datctl.control &= ~CTL_ALLUSEDTEXT;
7278 }
7279
7280 /* Handle passing the subject as zero-terminated. */
7281
7282 if ((dat_datctl.control & CTL_ZERO_TERMINATE) != 0)
7283 arg_ulen = PCRE2_ZERO_TERMINATED;
7284
7285 /* The nullcontext modifier is used to test calling pcre2_[jit_]match() with a
7286 NULL context. */
7287
7288 use_dat_context = ((dat_datctl.control & CTL_NULLCONTEXT) != 0)?
7289 NULL : PTR(dat_context);
7290
7291 /* Enable display of malloc/free if wanted. We can do this only if either the
7292 pattern or the subject is processed with a context. */
7293
7294 show_memory = (dat_datctl.control & CTL_MEMORY) != 0;
7295
7296 if (show_memory &&
7297 (pat_patctl.control & dat_datctl.control & CTL_NULLCONTEXT) != 0)
7298 fprintf(outfile, "** \\=memory requires either a pattern or a subject "
7299 "context: ignored\n");
7300
7301 /* Create and assign a JIT stack if requested. */
7302
7303 if (dat_datctl.jitstack != 0)
7304 {
7305 if (dat_datctl.jitstack != jit_stack_size)
7306 {
7307 PCRE2_JIT_STACK_FREE(jit_stack);
7308 PCRE2_JIT_STACK_CREATE(jit_stack, 1, dat_datctl.jitstack * 1024, NULL);
7309 jit_stack_size = dat_datctl.jitstack;
7310 }
7311 PCRE2_JIT_STACK_ASSIGN(dat_context, jit_callback, jit_stack);
7312 }
7313
7314 /* Or de-assign */
7315
7316 else if (jit_stack != NULL)
7317 {
7318 PCRE2_JIT_STACK_ASSIGN(dat_context, NULL, NULL);
7319 PCRE2_JIT_STACK_FREE(jit_stack);
7320 jit_stack = NULL;
7321 jit_stack_size = 0;
7322 }
7323
7324 /* When no JIT stack is assigned, we must ensure that there is a JIT callback
7325 if we want to verify that JIT was actually used. */
7326
7327 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_stack == NULL)
7328 {
7329 PCRE2_JIT_STACK_ASSIGN(dat_context, jit_callback, NULL);
7330 }
7331
7332 /* Adjust match_data according to size of offsets required. A size of zero
7333 causes a new match data block to be obtained that exactly fits the pattern. */
7334
7335 if (dat_datctl.oveccount == 0)
7336 {
7337 PCRE2_MATCH_DATA_FREE(match_data);
7338 PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(match_data, compiled_code,
7339 general_context);
7340 PCRE2_GET_OVECTOR_COUNT(max_oveccount, match_data);
7341 }
7342 else if (dat_datctl.oveccount <= max_oveccount)
7343 {
7344 SETFLD(match_data, oveccount, dat_datctl.oveccount);
7345 }
7346 else
7347 {
7348 max_oveccount = dat_datctl.oveccount;
7349 PCRE2_MATCH_DATA_FREE(match_data);
7350 PCRE2_MATCH_DATA_CREATE(match_data, max_oveccount, general_context);
7351 }
7352
7353 if (CASTVAR(void *, match_data) == NULL)
7354 {
7355 fprintf(outfile, "** Failed to get memory for recording matching "
7356 "information (size requested: %d)\n", dat_datctl.oveccount);
7357 max_oveccount = 0;
7358 return PR_OK;
7359 }
7360
7361 ovector = FLD(match_data, ovector);
7362 PCRE2_GET_OVECTOR_COUNT(oveccount, match_data);
7363
7364 /* Replacement processing is ignored for DFA matching. */
7365
7366 if (dat_datctl.replacement[0] != 0 && (dat_datctl.control & CTL_DFA) != 0)
7367 {
7368 fprintf(outfile, "** Ignored for DFA matching: replace\n");
7369 dat_datctl.replacement[0] = 0;
7370 }
7371
7372 /* If a replacement string is provided, call pcre2_substitute() instead of one
7373 of the matching functions. First we have to convert the replacement string to
7374 the appropriate width. */
7375
7376 if (dat_datctl.replacement[0] != 0)
7377 {
7378 int rc;
7379 uint8_t *pr;
7380 uint8_t rbuffer[REPLACE_BUFFSIZE];
7381 uint8_t nbuffer[REPLACE_BUFFSIZE];
7382 uint8_t *rbptr;
7383 uint32_t xoptions;
7384 uint32_t emoption; /* External match option */
7385 PCRE2_SIZE j, rlen, nsize, erroroffset;
7386 BOOL badutf = FALSE;
7387
7388 #ifdef SUPPORT_PCRE2_8
7389 uint8_t *r8 = NULL;
7390 #endif
7391 #ifdef SUPPORT_PCRE2_16
7392 uint16_t *r16 = NULL;
7393 #endif
7394 #ifdef SUPPORT_PCRE2_32
7395 uint32_t *r32 = NULL;
7396 #endif
7397
7398 /* Fill the ovector with junk to detect elements that do not get set
7399 when they should be (relevant only when "allvector" is specified). */
7400
7401 for (j = 0; j < 2*oveccount; j++) ovector[j] = JUNK_OFFSET;
7402
7403 if (timeitm)
7404 fprintf(outfile, "** Timing is not supported with replace: ignored\n");
7405
7406 if ((dat_datctl.control & CTL_ALTGLOBAL) != 0)
7407 fprintf(outfile, "** Altglobal is not supported with replace: ignored\n");
7408
7409 /* Check for a test that does substitution after an initial external match.
7410 If this is set, we run the external match, but leave the interpretation of
7411 its output to pcre2_substitute(). */
7412
7413 emoption = ((dat_datctl.control2 & CTL2_SUBSTITUTE_MATCHED) == 0)? 0 :
7414 PCRE2_SUBSTITUTE_MATCHED;
7415
7416 if (emoption != 0)
7417 {
7418 PCRE2_MATCH(rc, compiled_code, pp, arg_ulen, dat_datctl.offset,
7419 dat_datctl.options, match_data, use_dat_context);
7420 }
7421
7422 xoptions = emoption |
7423 (((dat_datctl.control & CTL_GLOBAL) == 0)? 0 :
7424 PCRE2_SUBSTITUTE_GLOBAL) |
7425 (((dat_datctl.control2 & CTL2_SUBSTITUTE_EXTENDED) == 0)? 0 :
7426 PCRE2_SUBSTITUTE_EXTENDED) |
7427 (((dat_datctl.control2 & CTL2_SUBSTITUTE_LITERAL) == 0)? 0 :
7428 PCRE2_SUBSTITUTE_LITERAL) |
7429 (((dat_datctl.control2 & CTL2_SUBSTITUTE_OVERFLOW_LENGTH) == 0)? 0 :
7430 PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) |
7431 (((dat_datctl.control2 & CTL2_SUBSTITUTE_REPLACEMENT_ONLY) == 0)? 0 :
7432 PCRE2_SUBSTITUTE_REPLACEMENT_ONLY) |
7433 (((dat_datctl.control2 & CTL2_SUBSTITUTE_UNKNOWN_UNSET) == 0)? 0 :
7434 PCRE2_SUBSTITUTE_UNKNOWN_UNSET) |
7435 (((dat_datctl.control2 & CTL2_SUBSTITUTE_UNSET_EMPTY) == 0)? 0 :
7436 PCRE2_SUBSTITUTE_UNSET_EMPTY);
7437
7438 SETCASTPTR(r, rbuffer); /* Sets r8, r16, or r32, as appropriate. */
7439 pr = dat_datctl.replacement;
7440
7441 /* If the replacement starts with '[<number>]' we interpret that as length
7442 value for the replacement buffer. */
7443
7444 nsize = REPLACE_BUFFSIZE/code_unit_size;
7445 if (*pr == '[')
7446 {
7447 PCRE2_SIZE n = 0;
7448 while ((c = *(++pr)) >= CHAR_0 && c <= CHAR_9) n = n * 10 + c - CHAR_0;
7449 if (*pr++ != ']')
7450 {
7451 fprintf(outfile, "Bad buffer size in replacement string\n");
7452 return PR_OK;
7453 }
7454 if (n > nsize)
7455 {
7456 fprintf(outfile, "Replacement buffer setting (%" SIZ_FORM ") is too "
7457 "large (max %" SIZ_FORM ")\n", n, nsize);
7458 return PR_OK;
7459 }
7460 nsize = n;
7461 }
7462
7463 /* Now copy the replacement string to a buffer of the appropriate width. No
7464 escape processing is done for replacements. In UTF mode, check for an invalid
7465 UTF-8 input string, and if it is invalid, just copy its code units without
7466 UTF interpretation. This provides a means of checking that an invalid string
7467 is detected. Otherwise, UTF-8 can be used to include wide characters in a
7468 replacement. */
7469
7470 if (utf) badutf = valid_utf(pr, strlen((const char *)pr), &erroroffset);
7471
7472 /* Not UTF or invalid UTF-8: just copy the code units. */
7473
7474 if (!utf || badutf)
7475 {
7476 while ((c = *pr++) != 0)
7477 {
7478 #ifdef SUPPORT_PCRE2_8
7479 if (test_mode == PCRE8_MODE) *r8++ = c;
7480 #endif
7481 #ifdef SUPPORT_PCRE2_16
7482 if (test_mode == PCRE16_MODE) *r16++ = c;
7483 #endif
7484 #ifdef SUPPORT_PCRE2_32
7485 if (test_mode == PCRE32_MODE) *r32++ = c;
7486 #endif
7487 }
7488 }
7489
7490 /* Valid UTF-8 replacement string */
7491
7492 else while ((c = *pr++) != 0)
7493 {
7494 if (HASUTF8EXTRALEN(c)) { GETUTF8INC(c, pr); }
7495
7496 #ifdef SUPPORT_PCRE2_8
7497 if (test_mode == PCRE8_MODE) r8 += ord2utf8(c, r8);
7498 #endif
7499
7500 #ifdef SUPPORT_PCRE2_16
7501 if (test_mode == PCRE16_MODE)
7502 {
7503 if (c >= 0x10000u)
7504 {
7505 c-= 0x10000u;
7506 *r16++ = 0xD800 | (c >> 10);
7507 *r16++ = 0xDC00 | (c & 0x3ff);
7508 }
7509 else *r16++ = c;
7510 }
7511 #endif
7512
7513 #ifdef SUPPORT_PCRE2_32
7514 if (test_mode == PCRE32_MODE) *r32++ = c;
7515 #endif
7516 }
7517
7518 SET(*r, 0);
7519 if ((dat_datctl.control & CTL_ZERO_TERMINATE) != 0)
7520 rlen = PCRE2_ZERO_TERMINATED;
7521 else
7522 rlen = (CASTVAR(uint8_t *, r) - rbuffer)/code_unit_size;
7523
7524 if ((dat_datctl.control2 & CTL2_SUBSTITUTE_CALLOUT) != 0)
7525 {
7526 PCRE2_SET_SUBSTITUTE_CALLOUT(dat_context, substitute_callout_function, NULL);
7527 }
7528 else
7529 {
7530 PCRE2_SET_SUBSTITUTE_CALLOUT(dat_context, NULL, NULL); /* No callout */
7531 }
7532
7533 /* There is a special option to set the replacement to NULL in order to test
7534 that case. */
7535
7536 rbptr = ((dat_datctl.control2 & CTL2_NULL_REPLACEMENT) == 0)? rbuffer : NULL;
7537
7538 PCRE2_SUBSTITUTE(rc, compiled_code, pp, arg_ulen, dat_datctl.offset,
7539 dat_datctl.options|xoptions, match_data, use_dat_context,
7540 rbptr, rlen, nbuffer, &nsize);
7541
7542 if (rc < 0)
7543 {
7544 fprintf(outfile, "Failed: error %d", rc);
7545 if (rc != PCRE2_ERROR_NOMEMORY && nsize != PCRE2_UNSET)
7546 fprintf(outfile, " at offset %ld in replacement", (long int)nsize);
7547 fprintf(outfile, ": ");
7548 if (!print_error_message(rc, "", "")) return PR_ABEND;
7549 if (rc == PCRE2_ERROR_NOMEMORY &&
7550 (xoptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) != 0)
7551 fprintf(outfile, ": %ld code units are needed", (long int)nsize);
7552 }
7553 else
7554 {
7555 fprintf(outfile, "%2d: ", rc);
7556 PCHARSV(nbuffer, 0, nsize, utf, outfile);
7557 }
7558
7559 fprintf(outfile, "\n");
7560 show_memory = FALSE;
7561
7562 /* Show final ovector contents if requested. */
7563
7564 if ((dat_datctl.control2 & CTL2_ALLVECTOR) != 0)
7565 show_ovector(ovector, oveccount);
7566
7567 return PR_OK;
7568 } /* End of substitution handling */
7569
7570 /* When a replacement string is not provided, run a loop for global matching
7571 with one of the basic matching functions. For altglobal (or first time round
7572 the loop), set an "unset" value for the previous match info. */
7573
7574 ovecsave[0] = ovecsave[1] = ovecsave[2] = PCRE2_UNSET;
7575
7576 for (gmatched = 0;; gmatched++)
7577 {
7578 PCRE2_SIZE j;
7579 int capcount;
7580
7581 /* Fill the ovector with junk to detect elements that do not get set
7582 when they should be. */
7583
7584 for (j = 0; j < 2*oveccount; j++) ovector[j] = JUNK_OFFSET;
7585
7586 /* When matching is via pcre2_match(), we will detect the use of JIT via the
7587 stack callback function. */
7588
7589 jit_was_used = (pat_patctl.control & CTL_JITFAST) != 0;
7590
7591 /* Do timing if required. */
7592
7593 if (timeitm > 0)
7594 {
7595 int i;
7596 clock_t start_time, time_taken;
7597
7598 if ((dat_datctl.control & CTL_DFA) != 0)
7599 {
7600 if ((dat_datctl.options & PCRE2_DFA_RESTART) != 0)
7601 {
7602 fprintf(outfile, "Timing DFA restarts is not supported\n");
7603 return PR_OK;
7604 }
7605 if (dfa_workspace == NULL)
7606 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
7607 start_time = clock();
7608 for (i = 0; i < timeitm; i++)
7609 {
7610 PCRE2_DFA_MATCH(capcount, compiled_code, pp, arg_ulen,
7611 dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
7612 use_dat_context, dfa_workspace, DFA_WS_DIMENSION);
7613 }
7614 }
7615
7616 else if ((pat_patctl.control & CTL_JITFAST) != 0)
7617 {
7618 start_time = clock();
7619 for (i = 0; i < timeitm; i++)
7620 {
7621 PCRE2_JIT_MATCH(capcount, compiled_code, pp, arg_ulen,
7622 dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
7623 use_dat_context);
7624 }
7625 }
7626
7627 else
7628 {
7629 start_time = clock();
7630 for (i = 0; i < timeitm; i++)
7631 {
7632 PCRE2_MATCH(capcount, compiled_code, pp, arg_ulen,
7633 dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
7634 use_dat_context);
7635 }
7636 }
7637 total_match_time += (time_taken = clock() - start_time);
7638 fprintf(outfile, "Match time %.4f milliseconds\n",
7639 (((double)time_taken * 1000.0) / (double)timeitm) /
7640 (double)CLOCKS_PER_SEC);
7641 }
7642
7643 /* Find the heap, match and depth limits if requested. The depth and heap
7644 limits are not relevant for JIT. The return from check_match_limit() is the
7645 return from the final call to pcre2_match() or pcre2_dfa_match(). */
7646
7647 if ((dat_datctl.control & (CTL_FINDLIMITS|CTL_FINDLIMITS_NOHEAP)) != 0)
7648 {
7649 capcount = 0; /* This stops compiler warnings */
7650
7651 if ((dat_datctl.control & CTL_FINDLIMITS_NOHEAP) == 0 &&
7652 (FLD(compiled_code, executable_jit) == NULL ||
7653 (dat_datctl.options & PCRE2_NO_JIT) != 0))
7654 {
7655 (void)check_match_limit(pp, arg_ulen, PCRE2_ERROR_HEAPLIMIT, "heap");
7656 }
7657
7658 capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_MATCHLIMIT,
7659 "match");
7660
7661 if (FLD(compiled_code, executable_jit) == NULL ||
7662 (dat_datctl.options & PCRE2_NO_JIT) != 0 ||
7663 (dat_datctl.control & CTL_DFA) != 0)
7664 {
7665 capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_DEPTHLIMIT,
7666 "depth");
7667 }
7668
7669 if (capcount == 0)
7670 {
7671 fprintf(outfile, "Matched, but offsets vector is too small to show all matches\n");
7672 capcount = dat_datctl.oveccount;
7673 }
7674 }
7675
7676 /* Otherwise just run a single match, setting up a callout if required (the
7677 default). There is a copy of the pattern in pbuffer8 for use by callouts. */
7678
7679 else
7680 {
7681 if ((dat_datctl.control & CTL_CALLOUT_NONE) == 0)
7682 {
7683 PCRE2_SET_CALLOUT(dat_context, callout_function,
7684 (void *)(&dat_datctl.callout_data));
7685 first_callout = TRUE;
7686 last_callout_mark = NULL;
7687 callout_count = 0;
7688 }
7689 else
7690 {
7691 PCRE2_SET_CALLOUT(dat_context, NULL, NULL); /* No callout */
7692 }
7693
7694 /* Run a single DFA or NFA match. */
7695
7696 if ((dat_datctl.control & CTL_DFA) != 0)
7697 {
7698 if (dfa_workspace == NULL)
7699 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
7700 if (dfa_matched++ == 0)
7701 dfa_workspace[0] = -1; /* To catch bad restart */
7702 PCRE2_DFA_MATCH(capcount, compiled_code, pp, arg_ulen,
7703 dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
7704 use_dat_context, dfa_workspace, DFA_WS_DIMENSION);
7705 if (capcount == 0)
7706 {
7707 fprintf(outfile, "Matched, but offsets vector is too small to show all matches\n");
7708 capcount = dat_datctl.oveccount;
7709 }
7710 }
7711 else
7712 {
7713 if ((pat_patctl.control & CTL_JITFAST) != 0)
7714 PCRE2_JIT_MATCH(capcount, compiled_code, pp, arg_ulen, dat_datctl.offset,
7715 dat_datctl.options | g_notempty, match_data, use_dat_context);
7716 else
7717 PCRE2_MATCH(capcount, compiled_code, pp, arg_ulen, dat_datctl.offset,
7718 dat_datctl.options | g_notempty, match_data, use_dat_context);
7719 if (capcount == 0)
7720 {
7721 fprintf(outfile, "Matched, but too many substrings\n");
7722 capcount = dat_datctl.oveccount;
7723 }
7724 }
7725 }
7726
7727 /* The result of the match is now in capcount. First handle a successful
7728 match. If pp was forced to be NULL (to test NULL handling) it will have been
7729 treated as an empty string if the length was zero. So re-create that for
7730 outputting. */
7731
7732 if (capcount >= 0)
7733 {
7734 int i;
7735
7736 if (pp == NULL) pp = (uint8_t *)"";
7737
7738 if (capcount > (int)oveccount) /* Check for lunatic return value */
7739 {
7740 fprintf(outfile,
7741 "** PCRE2 error: returned count %d is too big for ovector count %d\n",
7742 capcount, oveccount);
7743 capcount = oveccount;
7744 if ((dat_datctl.control & CTL_ANYGLOB) != 0)
7745 {
7746 fprintf(outfile, "** Global loop abandoned\n");
7747 dat_datctl.control &= ~CTL_ANYGLOB; /* Break g/G loop */
7748 }
7749 }
7750
7751 /* If PCRE2_COPY_MATCHED_SUBJECT was set, check that things are as they
7752 should be, but not for fast JIT, where it isn't supported. */
7753
7754 if ((dat_datctl.options & PCRE2_COPY_MATCHED_SUBJECT) != 0 &&
7755 (pat_patctl.control & CTL_JITFAST) == 0)
7756 {
7757 if ((FLD(match_data, flags) & PCRE2_MD_COPIED_SUBJECT) == 0)
7758 fprintf(outfile,
7759 "** PCRE2 error: flag not set after copy_matched_subject\n");
7760
7761 if (CASTFLD(void *, match_data, subject) == pp)
7762 fprintf(outfile,
7763 "** PCRE2 error: copy_matched_subject has not copied\n");
7764
7765 if (memcmp(CASTFLD(void *, match_data, subject), pp, ulen) != 0)
7766 fprintf(outfile,
7767 "** PCRE2 error: copy_matched_subject mismatch\n");
7768 }
7769
7770 /* If this is not the first time round a global loop, check that the
7771 returned string has changed. If it has not, check for an empty string match
7772 at different starting offset from the previous match. This is a failed test
7773 retry for null-matching patterns that don't match at their starting offset,
7774 for example /(?<=\G.)/. A repeated match at the same point is not such a
7775 pattern, and must be discarded, and we then proceed to seek a non-null
7776 match at the current point. For any other repeated match, there is a bug
7777 somewhere and we must break the loop because it will go on for ever. We
7778 know that there are always at least two elements in the ovector. */
7779
7780 if (gmatched > 0 && ovecsave[0] == ovector[0] && ovecsave[1] == ovector[1])
7781 {
7782 if (ovector[0] == ovector[1] && ovecsave[2] != dat_datctl.offset)
7783 {
7784 g_notempty = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
7785 ovecsave[2] = dat_datctl.offset;
7786 continue; /* Back to the top of the loop */
7787 }
7788 fprintf(outfile,
7789 "** PCRE2 error: global repeat returned the same string as previous\n");
7790 fprintf(outfile, "** Global loop abandoned\n");
7791 dat_datctl.control &= ~CTL_ANYGLOB; /* Break g/G loop */
7792 }
7793
7794 /* "allcaptures" requests showing of all captures in the pattern, to check
7795 unset ones at the end. It may be set on the pattern or the data. Implement
7796 by setting capcount to the maximum. This is not relevant for DFA matching,
7797 so ignore it (warning given above). */
7798
7799 if ((dat_datctl.control & (CTL_ALLCAPTURES|CTL_DFA)) == CTL_ALLCAPTURES)
7800 {
7801 capcount = maxcapcount + 1; /* Allow for full match */
7802 if (capcount > (int)oveccount) capcount = oveccount;
7803 }
7804
7805 /* "allvector" request showing the entire ovector. */
7806
7807 if ((dat_datctl.control2 & CTL2_ALLVECTOR) != 0) capcount = oveccount;
7808
7809 /* Output the captured substrings. Note that, for the matched string,
7810 the use of \K in an assertion can make the start later than the end. */
7811
7812 for (i = 0; i < 2*capcount; i += 2)
7813 {
7814 PCRE2_SIZE lleft, lmiddle, lright;
7815 PCRE2_SIZE start = ovector[i];
7816 PCRE2_SIZE end = ovector[i+1];
7817
7818 if (start > end)
7819 {
7820 start = ovector[i+1];
7821 end = ovector[i];
7822 fprintf(outfile, "Start of matched string is beyond its end - "
7823 "displaying from end to start.\n");
7824 }
7825
7826 fprintf(outfile, "%2d: ", i/2);
7827
7828 /* Check for an unset group */
7829
7830 if (start == PCRE2_UNSET && end == PCRE2_UNSET)
7831 {
7832 fprintf(outfile, "<unset>\n");
7833 continue;
7834 }
7835
7836 /* Check for silly offsets, in particular, values that have not been
7837 set when they should have been. However, if we are past the end of the
7838 captures for this pattern ("allvector" causes this), or if we are DFA
7839 matching, it isn't an error if the entry is unchanged. */
7840
7841 if (start > ulen || end > ulen)
7842 {
7843 if (((dat_datctl.control & CTL_DFA) != 0 ||
7844 i >= (int)(2*maxcapcount + 2)) &&
7845 start == JUNK_OFFSET && end == JUNK_OFFSET)
7846 fprintf(outfile, "<unchanged>\n");
7847 else
7848 fprintf(outfile, "ERROR: bad value(s) for offset(s): 0x%lx 0x%lx\n",
7849 (unsigned long int)start, (unsigned long int)end);
7850 continue;
7851 }
7852
7853 /* When JIT is not being used, ALLUSEDTEXT may be set. (It if is set with
7854 JIT, it is disabled above, with a comment.) When the match is done by the
7855 interpreter, leftchar and rightchar are available, and if ALLUSEDTEXT is
7856 set, and if the leftmost consulted character is before the start of the
7857 match or the rightmost consulted character is past the end of the match,
7858 we want to show all consulted characters for the main matched string, and
7859 indicate which were lookarounds. */
7860
7861 if (i == 0)
7862 {
7863 BOOL showallused;
7864 PCRE2_SIZE leftchar, rightchar;
7865
7866 if ((dat_datctl.control & CTL_ALLUSEDTEXT) != 0)
7867 {
7868 leftchar = FLD(match_data, leftchar);
7869 rightchar = FLD(match_data, rightchar);
7870 showallused = i == 0 && (leftchar < start || rightchar > end);
7871 }
7872 else showallused = FALSE;
7873
7874 if (showallused)
7875 {
7876 PCHARS(lleft, pp, leftchar, start - leftchar, utf, outfile);
7877 PCHARS(lmiddle, pp, start, end - start, utf, outfile);
7878 PCHARS(lright, pp, end, rightchar - end, utf, outfile);
7879 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
7880 fprintf(outfile, " (JIT)");
7881 fprintf(outfile, "\n ");
7882 for (j = 0; j < lleft; j++) fprintf(outfile, "<");
7883 for (j = 0; j < lmiddle; j++) fprintf(outfile, " ");
7884 for (j = 0; j < lright; j++) fprintf(outfile, ">");
7885 }
7886
7887 /* When a pattern contains \K, the start of match position may be
7888 different to the start of the matched string. When this is the case,
7889 show it when requested. */
7890
7891 else if ((dat_datctl.control & CTL_STARTCHAR) != 0)
7892 {
7893 PCRE2_SIZE startchar;
7894 PCRE2_GET_STARTCHAR(startchar, match_data);
7895 PCHARS(lleft, pp, startchar, start - startchar, utf, outfile);
7896 PCHARSV(pp, start, end - start, utf, outfile);
7897 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
7898 fprintf(outfile, " (JIT)");
7899 if (startchar != start)
7900 {
7901 fprintf(outfile, "\n ");
7902 for (j = 0; j < lleft; j++) fprintf(outfile, "^");
7903 }
7904 }
7905
7906 /* Otherwise, just show the matched string. */
7907
7908 else
7909 {
7910 PCHARSV(pp, start, end - start, utf, outfile);
7911 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
7912 fprintf(outfile, " (JIT)");
7913 }
7914 }
7915
7916 /* Not the main matched string. Just show it unadorned. */
7917
7918 else
7919 {
7920 PCHARSV(pp, start, end - start, utf, outfile);
7921 }
7922
7923 fprintf(outfile, "\n");
7924
7925 /* Note: don't use the start/end variables here because we want to
7926 show the text from what is reported as the end. */
7927
7928 if ((dat_datctl.control & CTL_ALLAFTERTEXT) != 0 ||
7929 (i == 0 && (dat_datctl.control & CTL_AFTERTEXT) != 0))
7930 {
7931 fprintf(outfile, "%2d+ ", i/2);
7932 PCHARSV(pp, ovector[i+1], ulen - ovector[i+1], utf, outfile);
7933 fprintf(outfile, "\n");
7934 }
7935 }
7936
7937 /* Output (*MARK) data if requested */
7938
7939 if ((dat_datctl.control & CTL_MARK) != 0 &&
7940 TESTFLD(match_data, mark, !=, NULL))
7941 {
7942 fprintf(outfile, "MK: ");
7943 PCHARSV(CASTFLD(void *, match_data, mark), -1, -1, utf, outfile);
7944 fprintf(outfile, "\n");
7945 }
7946
7947 /* Process copy/get strings */
7948
7949 if (!copy_and_get(utf, capcount)) return PR_ABEND;
7950
7951 } /* End of handling a successful match */
7952
7953 /* There was a partial match. The value of ovector[0] is the bumpalong point,
7954 that is, startchar, not any \K point that might have been passed. When JIT is
7955 not in use, "allusedtext" may be set, in which case we indicate the leftmost
7956 consulted character. */
7957
7958 else if (capcount == PCRE2_ERROR_PARTIAL)
7959 {
7960 PCRE2_SIZE leftchar;
7961 int backlength;
7962 int rubriclength = 0;
7963
7964 if ((dat_datctl.control & CTL_ALLUSEDTEXT) != 0)
7965 {
7966 leftchar = FLD(match_data, leftchar);
7967 }
7968 else leftchar = ovector[0];
7969
7970 fprintf(outfile, "Partial match");
7971 if ((dat_datctl.control & CTL_MARK) != 0 &&
7972 TESTFLD(match_data, mark, !=, NULL))
7973 {
7974 fprintf(outfile, ", mark=");
7975 PCHARS(rubriclength, CASTFLD(void *, match_data, mark), -1, -1, utf,
7976 outfile);
7977 rubriclength += 7;
7978 }
7979 fprintf(outfile, ": ");
7980 rubriclength += 15;
7981
7982 PCHARS(backlength, pp, leftchar, ovector[0] - leftchar, utf, outfile);
7983 PCHARSV(pp, ovector[0], ulen - ovector[0], utf, outfile);
7984
7985 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
7986 fprintf(outfile, " (JIT)");
7987 fprintf(outfile, "\n");
7988
7989 if (backlength != 0)
7990 {
7991 int i;
7992 for (i = 0; i < rubriclength; i++) fprintf(outfile, " ");
7993 for (i = 0; i < backlength; i++) fprintf(outfile, "<");
7994 fprintf(outfile, "\n");
7995 }
7996
7997 if (ulen != ovector[1])
7998 fprintf(outfile, "** ovector[1] is not equal to the subject length: "
7999 "%ld != %ld\n", (unsigned long int)ovector[1], (unsigned long int)ulen);
8000
8001 /* Process copy/get strings */
8002
8003 if (!copy_and_get(utf, 1)) return PR_ABEND;
8004
8005 /* "allvector" outputs the entire vector */
8006
8007 if ((dat_datctl.control2 & CTL2_ALLVECTOR) != 0)
8008 show_ovector(ovector, oveccount);
8009
8010 break; /* Out of the /g loop */
8011 } /* End of handling partial match */
8012
8013 /* Failed to match. If this is a /g or /G loop, we might previously have
8014 set g_notempty (to PCRE2_NOTEMPTY_ATSTART|PCRE2_ANCHORED) after a null match.
8015 If that is the case, this is not necessarily the end. We want to advance the
8016 start offset, and continue. We won't be at the end of the string - that was
8017 checked before setting g_notempty. We achieve the effect by pretending that a
8018 single character was matched.
8019
8020 Complication arises in the case when the newline convention is "any", "crlf",
8021 or "anycrlf". If the previous match was at the end of a line terminated by
8022 CRLF, an advance of one character just passes the CR, whereas we should
8023 prefer the longer newline sequence, as does the code in pcre2_match().
8024
8025 Otherwise, in the case of UTF-8 or UTF-16 matching, the advance must be one
8026 character, not one byte. */
8027
8028 else if (g_notempty != 0) /* There was a previous null match */
8029 {
8030 uint16_t nl = FLD(compiled_code, newline_convention);
8031 PCRE2_SIZE start_offset = dat_datctl.offset; /* Where the match was */
8032 PCRE2_SIZE end_offset = start_offset + 1;
8033
8034 if ((nl == PCRE2_NEWLINE_CRLF || nl == PCRE2_NEWLINE_ANY ||
8035 nl == PCRE2_NEWLINE_ANYCRLF) &&
8036 start_offset < ulen - 1 &&
8037 CODE_UNIT(pp, start_offset) == '\r' &&
8038 CODE_UNIT(pp, end_offset) == '\n')
8039 end_offset++;
8040
8041 else if (utf && test_mode != PCRE32_MODE)
8042 {
8043 if (test_mode == PCRE8_MODE)
8044 {
8045 for (; end_offset < ulen; end_offset++)
8046 if ((((PCRE2_SPTR8)pp)[end_offset] & 0xc0) != 0x80) break;
8047 }
8048 else /* 16-bit mode */
8049 {
8050 for (; end_offset < ulen; end_offset++)
8051 if ((((PCRE2_SPTR16)pp)[end_offset] & 0xfc00) != 0xdc00) break;
8052 }
8053 }
8054
8055 SETFLDVEC(match_data, ovector, 0, start_offset);
8056 SETFLDVEC(match_data, ovector, 1, end_offset);
8057 } /* End of handling null match in a global loop */
8058
8059 /* A "normal" match failure. There will be a negative error number in
8060 capcount. */
8061
8062 else
8063 {
8064 switch(capcount)
8065 {
8066 case PCRE2_ERROR_NOMATCH:
8067 if (gmatched == 0)
8068 {
8069 fprintf(outfile, "No match");
8070 if ((dat_datctl.control & CTL_MARK) != 0 &&
8071 TESTFLD(match_data, mark, !=, NULL))
8072 {
8073 fprintf(outfile, ", mark = ");
8074 PCHARSV(CASTFLD(void *, match_data, mark), -1, -1, utf, outfile);
8075 }
8076 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
8077 fprintf(outfile, " (JIT)");
8078 fprintf(outfile, "\n");
8079
8080 /* "allvector" outputs the entire vector */
8081
8082 if ((dat_datctl.control2 & CTL2_ALLVECTOR) != 0)
8083 show_ovector(ovector, oveccount);
8084 }
8085 break;
8086
8087 case PCRE2_ERROR_BADUTFOFFSET:
8088 fprintf(outfile, "Error %d (bad UTF-%d offset)\n", capcount, test_mode);
8089 break;
8090
8091 default:
8092 fprintf(outfile, "Failed: error %d: ", capcount);
8093 if (!print_error_message(capcount, "", "")) return PR_ABEND;
8094 if (capcount <= PCRE2_ERROR_UTF8_ERR1 &&
8095 capcount >= PCRE2_ERROR_UTF32_ERR2)
8096 {
8097 PCRE2_SIZE startchar;
8098 PCRE2_GET_STARTCHAR(startchar, match_data);
8099 fprintf(outfile, " at offset %" SIZ_FORM, startchar);
8100 }
8101 fprintf(outfile, "\n");
8102 break;
8103 }
8104
8105 break; /* Out of the /g loop */
8106 } /* End of failed match handling */
8107
8108 /* Control reaches here in two circumstances: (a) after a match, and (b)
8109 after a non-match that immediately followed a match on an empty string when
8110 doing a global search. Such a match is done with PCRE2_NOTEMPTY_ATSTART and
8111 PCRE2_ANCHORED set in g_notempty. The code above turns it into a fake match
8112 of one character. So effectively we get here only after a match. If we
8113 are not doing a global search, we are done. */
8114
8115 if ((dat_datctl.control & CTL_ANYGLOB) == 0) break; else
8116 {
8117 PCRE2_SIZE match_offset = FLD(match_data, ovector)[0];
8118 PCRE2_SIZE end_offset = FLD(match_data, ovector)[1];
8119
8120 /* We must now set up for the next iteration of a global search. If we have
8121 matched an empty string, first check to see if we are at the end of the
8122 subject. If so, the loop is over. Otherwise, mimic what Perl's /g option
8123 does. Set PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED and try the match again
8124 at the same point. If this fails it will be picked up above, where a fake
8125 match is set up so that at this point we advance to the next character.
8126
8127 However, in order to cope with patterns that never match at their starting
8128 offset (e.g. /(?<=\G.)/) we don't do this when the match offset is greater
8129 than the starting offset. This means there will be a retry with the
8130 starting offset at the match offset. If this returns the same match again,
8131 it is picked up above and ignored, and the special action is then taken. */
8132
8133 if (match_offset == end_offset)
8134 {
8135 if (end_offset == ulen) break; /* End of subject */
8136 if (match_offset <= dat_datctl.offset)
8137 g_notempty = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
8138 }
8139
8140 /* However, even after matching a non-empty string, there is still one
8141 tricky case. If a pattern contains \K within a lookbehind assertion at the
8142 start, the end of the matched string can be at the offset where the match
8143 started. In the case of a normal /g iteration without special action, this
8144 leads to a loop that keeps on returning the same substring. The loop would
8145 be caught above, but we really want to move on to the next match. */
8146
8147 else
8148 {
8149 g_notempty = 0; /* Set for a "normal" repeat */
8150 if ((dat_datctl.control & CTL_GLOBAL) != 0)
8151 {
8152 PCRE2_SIZE startchar;
8153 PCRE2_GET_STARTCHAR(startchar, match_data);
8154 if (end_offset <= startchar)
8155 {
8156 if (startchar >= ulen) break; /* End of subject */
8157 end_offset = startchar + 1;
8158 if (utf && test_mode != PCRE32_MODE)
8159 {
8160 if (test_mode == PCRE8_MODE)
8161 {
8162 for (; end_offset < ulen; end_offset++)
8163 if ((((PCRE2_SPTR8)pp)[end_offset] & 0xc0) != 0x80) break;
8164 }
8165 else /* 16-bit mode */
8166 {
8167 for (; end_offset < ulen; end_offset++)
8168 if ((((PCRE2_SPTR16)pp)[end_offset] & 0xfc00) != 0xdc00) break;
8169 }
8170 }
8171 }
8172 }
8173 }
8174
8175 /* For a normal global (/g) iteration, save the current ovector[0,1] and
8176 the starting offset so that we can check that they do change each time.
8177 Otherwise a matching bug that returns the same string causes an infinite
8178 loop. It has happened! Then update the start offset, leaving other
8179 parameters alone. */
8180
8181 if ((dat_datctl.control & CTL_GLOBAL) != 0)
8182 {
8183 ovecsave[0] = ovector[0];
8184 ovecsave[1] = ovector[1];
8185 ovecsave[2] = dat_datctl.offset;
8186 dat_datctl.offset = end_offset;
8187 }
8188
8189 /* For altglobal, just update the pointer and length. */
8190
8191 else
8192 {
8193 pp += end_offset * code_unit_size;
8194 len -= end_offset * code_unit_size;
8195 ulen -= end_offset;
8196 if (arg_ulen != PCRE2_ZERO_TERMINATED) arg_ulen -= end_offset;
8197 }
8198 }
8199 } /* End of global loop */
8200
8201 show_memory = FALSE;
8202 return PR_OK;
8203 }
8204
8205
8206
8207
8208 /*************************************************
8209 * Print PCRE2 version *
8210 *************************************************/
8211
8212 static void
print_version(FILE * f)8213 print_version(FILE *f)
8214 {
8215 VERSION_TYPE *vp;
8216 fprintf(f, "PCRE2 version ");
8217 for (vp = version; *vp != 0; vp++) fprintf(f, "%c", *vp);
8218 fprintf(f, "\n");
8219 }
8220
8221
8222
8223 /*************************************************
8224 * Print Unicode version *
8225 *************************************************/
8226
8227 static void
print_unicode_version(FILE * f)8228 print_unicode_version(FILE *f)
8229 {
8230 VERSION_TYPE *vp;
8231 fprintf(f, "Unicode version ");
8232 for (vp = uversion; *vp != 0; vp++) fprintf(f, "%c", *vp);
8233 }
8234
8235
8236
8237 /*************************************************
8238 * Print JIT target *
8239 *************************************************/
8240
8241 static void
print_jit_target(FILE * f)8242 print_jit_target(FILE *f)
8243 {
8244 VERSION_TYPE *vp;
8245 for (vp = jittarget; *vp != 0; vp++) fprintf(f, "%c", *vp);
8246 }
8247
8248
8249
8250 /*************************************************
8251 * Print newline configuration *
8252 *************************************************/
8253
8254 /* Output is always to stdout.
8255
8256 Arguments:
8257 rc the return code from PCRE2_CONFIG_NEWLINE
8258 isc TRUE if called from "-C newline"
8259 Returns: nothing
8260 */
8261
8262 static void
print_newline_config(uint32_t optval,BOOL isc)8263 print_newline_config(uint32_t optval, BOOL isc)
8264 {
8265 if (!isc) printf(" Default newline sequence is ");
8266 if (optval < sizeof(newlines)/sizeof(char *))
8267 printf("%s\n", newlines[optval]);
8268 else
8269 printf("a non-standard value: %d\n", optval);
8270 }
8271
8272
8273
8274 /*************************************************
8275 * Usage function *
8276 *************************************************/
8277
8278 static void
usage(void)8279 usage(void)
8280 {
8281 printf("Usage: pcre2test [options] [<input file> [<output file>]]\n\n");
8282 printf("Input and output default to stdin and stdout.\n");
8283 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
8284 printf("If input is a terminal, readline() is used to read from it.\n");
8285 #else
8286 printf("This version of pcre2test is not linked with readline().\n");
8287 #endif
8288 printf("\nOptions:\n");
8289 #ifdef SUPPORT_PCRE2_8
8290 printf(" -8 use the 8-bit library\n");
8291 #endif
8292 #ifdef SUPPORT_PCRE2_16
8293 printf(" -16 use the 16-bit library\n");
8294 #endif
8295 #ifdef SUPPORT_PCRE2_32
8296 printf(" -32 use the 32-bit library\n");
8297 #endif
8298 printf(" -ac set default pattern modifier PCRE2_AUTO_CALLOUT\n");
8299 printf(" -AC as -ac, but also set subject 'callout_extra' modifier\n");
8300 printf(" -b set default pattern modifier 'fullbincode'\n");
8301 printf(" -C show PCRE2 compile-time options and exit\n");
8302 printf(" -C arg show a specific compile-time option and exit with its\n");
8303 printf(" value if numeric (else 0). The arg can be:\n");
8304 printf(" backslash-C use of \\C is enabled [0, 1]\n");
8305 printf(" bsr \\R type [ANYCRLF, ANY]\n");
8306 printf(" ebcdic compiled for EBCDIC character code [0,1]\n");
8307 printf(" ebcdic-nl NL code if compiled for EBCDIC\n");
8308 printf(" jit just-in-time compiler supported [0, 1]\n");
8309 printf(" linksize internal link size [2, 3, 4]\n");
8310 printf(" newline newline type [CR, LF, CRLF, ANYCRLF, ANY, NUL]\n");
8311 printf(" pcre2-8 8 bit library support enabled [0, 1]\n");
8312 printf(" pcre2-16 16 bit library support enabled [0, 1]\n");
8313 printf(" pcre2-32 32 bit library support enabled [0, 1]\n");
8314 printf(" unicode Unicode and UTF support enabled [0, 1]\n");
8315 printf(" -d set default pattern modifier 'debug'\n");
8316 printf(" -dfa set default subject modifier 'dfa'\n");
8317 printf(" -error <n,m,..> show messages for error numbers, then exit\n");
8318 printf(" -help show usage information\n");
8319 printf(" -i set default pattern modifier 'info'\n");
8320 printf(" -jit set default pattern modifier 'jit'\n");
8321 printf(" -jitfast set default pattern modifier 'jitfast'\n");
8322 printf(" -jitverify set default pattern modifier 'jitverify'\n");
8323 printf(" -LM list pattern and subject modifiers, then exit\n");
8324 printf(" -LP list non-script properties, then exit\n");
8325 printf(" -LS list supported scripts, then exit\n");
8326 printf(" -q quiet: do not output PCRE2 version number at start\n");
8327 printf(" -pattern <s> set default pattern modifier fields\n");
8328 printf(" -subject <s> set default subject modifier fields\n");
8329 printf(" -S <n> set stack size to <n> mebibytes\n");
8330 printf(" -t [<n>] time compilation and execution, repeating <n> times\n");
8331 printf(" -tm [<n>] time execution (matching) only, repeating <n> times\n");
8332 printf(" -T same as -t, but show total times at the end\n");
8333 printf(" -TM same as -tm, but show total time at the end\n");
8334 printf(" -version show PCRE2 version and exit\n");
8335 }
8336
8337
8338
8339 /*************************************************
8340 * Handle -C option *
8341 *************************************************/
8342
8343 /* This option outputs configuration options and sets an appropriate return
8344 code when asked for a single option. The code is abstracted into a separate
8345 function because of its size. Use whichever pcre2_config() function is
8346 available.
8347
8348 Argument: an option name or NULL
8349 Returns: the return code
8350 */
8351
8352 static int
c_option(const char * arg)8353 c_option(const char *arg)
8354 {
8355 uint32_t optval;
8356 unsigned int i = COPTLISTCOUNT;
8357 int yield = 0;
8358
8359 if (arg != NULL && arg[0] != CHAR_MINUS)
8360 {
8361 for (i = 0; i < COPTLISTCOUNT; i++)
8362 if (strcmp(arg, coptlist[i].name) == 0) break;
8363
8364 if (i >= COPTLISTCOUNT)
8365 {
8366 fprintf(stderr, "** Unknown -C option '%s'\n", arg);
8367 return 0;
8368 }
8369
8370 switch (coptlist[i].type)
8371 {
8372 case CONF_BSR:
8373 (void)PCRE2_CONFIG(coptlist[i].value, &optval);
8374 printf("%s\n", (optval == PCRE2_BSR_ANYCRLF)? "ANYCRLF" : "ANY");
8375 break;
8376
8377 case CONF_FIX:
8378 yield = coptlist[i].value;
8379 printf("%d\n", yield);
8380 break;
8381
8382 case CONF_FIZ:
8383 optval = coptlist[i].value;
8384 printf("%d\n", optval);
8385 break;
8386
8387 case CONF_INT:
8388 (void)PCRE2_CONFIG(coptlist[i].value, &yield);
8389 printf("%d\n", yield);
8390 break;
8391
8392 case CONF_NL:
8393 (void)PCRE2_CONFIG(coptlist[i].value, &optval);
8394 print_newline_config(optval, TRUE);
8395 break;
8396 }
8397
8398 /* For VMS, return the value by setting a symbol, for certain values only. This
8399 is contributed code which the PCRE2 developers have no means of testing. */
8400
8401 #ifdef __VMS
8402
8403 /* This is the original code provided by the first VMS contributor. */
8404 #ifdef NEVER
8405 if (copytlist[i].type == CONF_FIX || coptlist[i].type == CONF_INT)
8406 {
8407 char ucname[16];
8408 strcpy(ucname, coptlist[i].name);
8409 for (i = 0; ucname[i] != 0; i++) ucname[i] = toupper[ucname[i]];
8410 vms_setsymbol(ucname, 0, optval);
8411 }
8412 #endif
8413
8414 /* This is the new code, provided by a second VMS contributor. */
8415
8416 if (coptlist[i].type == CONF_FIX || coptlist[i].type == CONF_INT)
8417 {
8418 char nam_buf[22], val_buf[4];
8419 $DESCRIPTOR(nam, nam_buf);
8420 $DESCRIPTOR(val, val_buf);
8421
8422 strcpy(nam_buf, coptlist[i].name);
8423 nam.dsc$w_length = strlen(nam_buf);
8424 sprintf(val_buf, "%d", yield);
8425 val.dsc$w_length = strlen(val_buf);
8426 lib$set_symbol(&nam, &val);
8427 }
8428 #endif /* __VMS */
8429
8430 return yield;
8431 }
8432
8433 /* No argument for -C: output all configuration information. */
8434
8435 print_version(stdout);
8436 printf("Compiled with\n");
8437
8438 #ifdef EBCDIC
8439 printf(" EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
8440 #if defined NATIVE_ZOS
8441 printf(" EBCDIC code page %s or similar\n", pcrz_cpversion());
8442 #endif
8443 #endif
8444
8445 (void)PCRE2_CONFIG(PCRE2_CONFIG_COMPILED_WIDTHS, &optval);
8446 if (optval & 1) printf(" 8-bit support\n");
8447 if (optval & 2) printf(" 16-bit support\n");
8448 if (optval & 4) printf(" 32-bit support\n");
8449
8450 #ifdef SUPPORT_VALGRIND
8451 printf(" Valgrind support\n");
8452 #endif
8453
8454 (void)PCRE2_CONFIG(PCRE2_CONFIG_UNICODE, &optval);
8455 if (optval != 0)
8456 {
8457 printf(" UTF and UCP support (");
8458 print_unicode_version(stdout);
8459 printf(")\n");
8460 }
8461 else printf(" No Unicode support\n");
8462
8463 (void)PCRE2_CONFIG(PCRE2_CONFIG_JIT, &optval);
8464 if (optval != 0)
8465 {
8466 printf(" Just-in-time compiler support: ");
8467 print_jit_target(stdout);
8468 printf("\n");
8469 }
8470 else
8471 {
8472 printf(" No just-in-time compiler support\n");
8473 }
8474
8475 (void)PCRE2_CONFIG(PCRE2_CONFIG_NEWLINE, &optval);
8476 print_newline_config(optval, FALSE);
8477 (void)PCRE2_CONFIG(PCRE2_CONFIG_BSR, &optval);
8478 printf(" \\R matches %s\n",
8479 (optval == PCRE2_BSR_ANYCRLF)? "CR, LF, or CRLF only" :
8480 "all Unicode newlines");
8481 (void)PCRE2_CONFIG(PCRE2_CONFIG_NEVER_BACKSLASH_C, &optval);
8482 printf(" \\C is %ssupported\n", optval? "not ":"");
8483 (void)PCRE2_CONFIG(PCRE2_CONFIG_LINKSIZE, &optval);
8484 printf(" Internal link size = %d\n", optval);
8485 (void)PCRE2_CONFIG(PCRE2_CONFIG_PARENSLIMIT, &optval);
8486 printf(" Parentheses nest limit = %d\n", optval);
8487 (void)PCRE2_CONFIG(PCRE2_CONFIG_HEAPLIMIT, &optval);
8488 printf(" Default heap limit = %d kibibytes\n", optval);
8489 (void)PCRE2_CONFIG(PCRE2_CONFIG_MATCHLIMIT, &optval);
8490 printf(" Default match limit = %d\n", optval);
8491 (void)PCRE2_CONFIG(PCRE2_CONFIG_DEPTHLIMIT, &optval);
8492 printf(" Default depth limit = %d\n", optval);
8493
8494 #if defined SUPPORT_LIBREADLINE
8495 printf(" pcre2test has libreadline support\n");
8496 #elif defined SUPPORT_LIBEDIT
8497 printf(" pcre2test has libedit support\n");
8498 #else
8499 printf(" pcre2test has neither libreadline nor libedit support\n");
8500 #endif
8501
8502 return 0;
8503 }
8504
8505
8506 /*************************************************
8507 * Format one property/script list item *
8508 *************************************************/
8509
8510 #ifdef SUPPORT_UNICODE
8511 static void
format_list_item(int16_t * ff,char * buff,BOOL isscript)8512 format_list_item(int16_t *ff, char *buff, BOOL isscript)
8513 {
8514 int count;
8515 int maxi = 0;
8516 const char *maxs = "";
8517 size_t max = 0;
8518
8519 for (count = 0; ff[count] >= 0; count++) {}
8520
8521 /* Find the name to put first. For scripts, any 3-character name is chosen.
8522 For non-scripts, or if there is no 3-character name, take the longest. */
8523
8524 for (int i = 0; ff[i] >= 0; i++)
8525 {
8526 const char *s = PRIV(utt_names) + ff[i];
8527 size_t len = strlen(s);
8528 if (isscript && len == 3)
8529 {
8530 maxi = i;
8531 max = len;
8532 maxs = s;
8533 break;
8534 }
8535 else if (len > max)
8536 {
8537 max = len;
8538 maxi = i;
8539 maxs = s;
8540 }
8541 }
8542
8543 strcpy(buff, maxs);
8544 buff += max;
8545
8546 if (count > 1)
8547 {
8548 const char *sep = " (";
8549 for (int i = 0; i < count; i++)
8550 {
8551 if (i == maxi) continue;
8552 buff += sprintf(buff, "%s%s", sep, PRIV(utt_names) + ff[i]);
8553 sep = ", ";
8554 }
8555 (void)sprintf(buff, ")");
8556 }
8557 }
8558 #endif /* SUPPORT_UNICODE */
8559
8560
8561
8562 /*************************************************
8563 * Display scripts or properties *
8564 *************************************************/
8565
8566 #define MAX_SYNONYMS 5
8567
8568 static void
display_properties(BOOL wantscripts)8569 display_properties(BOOL wantscripts)
8570 {
8571 #ifndef SUPPORT_UNICODE
8572 (void)wantscripts;
8573 printf("** This version of PCRE2 was compiled without Unicode support.\n");
8574 #else
8575
8576 const char *typename;
8577 uint16_t seentypes[1024];
8578 uint16_t seenvalues[1024];
8579 int seencount = 0;
8580 int16_t found[256][MAX_SYNONYMS + 1];
8581 int fc = 0;
8582 int colwidth = 40;
8583 int n;
8584
8585 if (wantscripts)
8586 {
8587 n = ucp_Script_Count;
8588 typename = "SCRIPTS";
8589 }
8590 else
8591 {
8592 n = ucp_Bprop_Count;
8593 typename = "PROPERTIES";
8594 }
8595
8596 for (size_t i = 0; i < PRIV(utt_size); i++)
8597 {
8598 int k;
8599 int m = 0;
8600 int16_t *fv;
8601 const ucp_type_table *t = PRIV(utt) + i;
8602 unsigned int value = t->value;
8603
8604 if (wantscripts)
8605 {
8606 if (t->type != PT_SC && t->type != PT_SCX) continue;
8607 }
8608 else
8609 {
8610 if (t->type != PT_BOOL) continue;
8611 }
8612
8613 for (k = 0; k < seencount; k++)
8614 {
8615 if (t->type == seentypes[k] && t->value == seenvalues[k]) break;
8616 }
8617 if (k < seencount) continue;
8618
8619 seentypes[seencount] = t->type;
8620 seenvalues[seencount++] = t->value;
8621
8622 fv = found[fc++];
8623 fv[m++] = t->name_offset;
8624
8625 for (size_t j = i + 1; j < PRIV(utt_size); j++)
8626 {
8627 const ucp_type_table *tt = PRIV(utt) + j;
8628 if (tt->type != t->type || tt->value != value) continue;
8629 if (m >= MAX_SYNONYMS)
8630 printf("** Too many synonyms: %s ignored\n",
8631 PRIV(utt_names) + tt->name_offset);
8632 else fv[m++] = tt->name_offset;
8633 }
8634
8635 fv[m] = -1;
8636 }
8637
8638 printf("-------------------------- SUPPORTED %s --------------------------\n\n",
8639 typename);
8640
8641 if (!wantscripts) printf(
8642 "This release of PCRE2 supports Unicode's general category properties such\n"
8643 "as Lu (upper case letter), bi-directional properties such as Bidi_Class,\n"
8644 "and the following binary (yes/no) properties:\n\n");
8645
8646
8647 for (int k = 0; k < (n+1)/2; k++)
8648 {
8649 int x;
8650 char buff1[128];
8651 char buff2[128];
8652
8653 format_list_item(found[k], buff1, wantscripts);
8654 x = k + (n+1)/2;
8655 if (x < n) format_list_item(found[x], buff2, wantscripts);
8656 else buff2[0] = 0;
8657
8658 x = printf("%s", buff1);
8659 while (x++ < colwidth) printf(" ");
8660 printf("%s\n", buff2);
8661 }
8662
8663 #endif /* SUPPORT_UNICODE */
8664 }
8665
8666
8667
8668 /*************************************************
8669 * Display one modifier *
8670 *************************************************/
8671
8672 static void
display_one_modifier(modstruct * m,BOOL for_pattern)8673 display_one_modifier(modstruct *m, BOOL for_pattern)
8674 {
8675 uint32_t c = (!for_pattern && (m->which == MOD_PND || m->which == MOD_PNDP))?
8676 '*' : ' ';
8677 printf("%c%s", c, m->name);
8678 for (size_t i = 0; i < C1MODLISTCOUNT; i++)
8679 {
8680 if (strcmp(m->name, c1modlist[i].fullname) == 0)
8681 printf(" (%c)", c1modlist[i].onechar);
8682 }
8683 }
8684
8685
8686
8687 /*************************************************
8688 * Display pattern or subject modifiers *
8689 *************************************************/
8690
8691 /* In order to print in two columns, first scan without printing to get a list
8692 of the modifiers that are required.
8693
8694 Arguments:
8695 for_pattern TRUE for pattern modifiers, FALSE for subject modifiers
8696 title string to be used in title
8697
8698 Returns: nothing
8699 */
8700
8701 static void
display_selected_modifiers(BOOL for_pattern,const char * title)8702 display_selected_modifiers(BOOL for_pattern, const char *title)
8703 {
8704 uint32_t i, j;
8705 uint32_t n = 0;
8706 uint32_t list[MODLISTCOUNT];
8707 uint32_t extra[MODLISTCOUNT];
8708
8709 for (i = 0; i < MODLISTCOUNT; i++)
8710 {
8711 BOOL is_pattern = TRUE;
8712 modstruct *m = modlist + i;
8713
8714 switch (m->which)
8715 {
8716 case MOD_CTC: /* Compile context */
8717 case MOD_PAT: /* Pattern */
8718 case MOD_PATP: /* Pattern, OK for Perl-compatible test */
8719 break;
8720
8721 /* The MOD_PND and MOD_PNDP modifiers are precisely those that affect
8722 subjects, but can be given with a pattern. We list them as subject
8723 modifiers, but marked with an asterisk.*/
8724
8725 case MOD_CTM: /* Match context */
8726 case MOD_DAT: /* Subject line */
8727 case MOD_DATP: /* Subject line, OK for Perl-compatible test */
8728 case MOD_PND: /* As PD, but not default pattern */
8729 case MOD_PNDP: /* As PND, OK for Perl-compatible test */
8730 is_pattern = FALSE;
8731 break;
8732
8733 default: printf("** Unknown type for modifier '%s'\n", m->name);
8734 /* Fall through */
8735 case MOD_PD: /* Pattern or subject */
8736 case MOD_PDP: /* As PD, OK for Perl-compatible test */
8737 is_pattern = for_pattern;
8738 break;
8739 }
8740
8741 if (for_pattern == is_pattern)
8742 {
8743 extra[n] = 0;
8744 for (size_t k = 0; k < C1MODLISTCOUNT; k++)
8745 {
8746 if (strcmp(m->name, c1modlist[k].fullname) == 0)
8747 {
8748 extra[n] += 4;
8749 break;
8750 }
8751 }
8752 list[n++] = i;
8753 }
8754 }
8755
8756 /* Now print from the list in two columns. */
8757
8758 printf("-------------- %s MODIFIERS --------------\n", title);
8759
8760 for (i = 0, j = (n+1)/2; i < (n+1)/2; i++, j++)
8761 {
8762 modstruct *m = modlist + list[i];
8763 display_one_modifier(m, for_pattern);
8764 if (j < n)
8765 {
8766 uint32_t k = 27 - strlen(m->name) - extra[i];
8767 while (k-- > 0) printf(" ");
8768 display_one_modifier(modlist + list[j], for_pattern);
8769 }
8770 printf("\n");
8771 }
8772 }
8773
8774
8775
8776 /*************************************************
8777 * Display the list of modifiers *
8778 *************************************************/
8779
8780 static void
display_modifiers(void)8781 display_modifiers(void)
8782 {
8783 printf(
8784 "An asterisk on a subject modifier means that it may be given on a pattern\n"
8785 "line, in order to apply to all subjects matched by that pattern. Modifiers\n"
8786 "that are listed for both patterns and subjects have different effects in\n"
8787 "each case.\n\n");
8788 display_selected_modifiers(TRUE, "PATTERN");
8789 printf("\n");
8790 display_selected_modifiers(FALSE, "SUBJECT");
8791 }
8792
8793
8794
8795 /*************************************************
8796 * Main Program *
8797 *************************************************/
8798
8799 int
main(int argc,char ** argv)8800 main(int argc, char **argv)
8801 {
8802 uint32_t temp;
8803 uint32_t yield = 0;
8804 uint32_t op = 1;
8805 BOOL notdone = TRUE;
8806 BOOL quiet = FALSE;
8807 BOOL showtotaltimes = FALSE;
8808 BOOL skipping = FALSE;
8809 char *arg_subject = NULL;
8810 char *arg_pattern = NULL;
8811 char *arg_error = NULL;
8812
8813 /* The offsets to the options and control bits fields of the pattern and data
8814 control blocks must be the same so that common options and controls such as
8815 "anchored" or "memory" can work for either of them from a single table entry.
8816 We cannot test this till runtime because "offsetof" does not work in the
8817 preprocessor. */
8818
8819 if (PO(options) != DO(options) || PO(control) != DO(control) ||
8820 PO(control2) != DO(control2))
8821 {
8822 fprintf(stderr, "** Coding error: "
8823 "options and control offsets for pattern and data must be the same.\n");
8824 return 1;
8825 }
8826
8827 /* Get the PCRE2 and Unicode version number and JIT target information, at the
8828 same time checking that a request for the length gives the same answer. Also
8829 check lengths for non-string items. */
8830
8831 if (PCRE2_CONFIG(PCRE2_CONFIG_VERSION, NULL) !=
8832 PCRE2_CONFIG(PCRE2_CONFIG_VERSION, version) ||
8833
8834 PCRE2_CONFIG(PCRE2_CONFIG_UNICODE_VERSION, NULL) !=
8835 PCRE2_CONFIG(PCRE2_CONFIG_UNICODE_VERSION, uversion) ||
8836
8837 PCRE2_CONFIG(PCRE2_CONFIG_JITTARGET, NULL) !=
8838 PCRE2_CONFIG(PCRE2_CONFIG_JITTARGET, jittarget) ||
8839
8840 PCRE2_CONFIG(PCRE2_CONFIG_UNICODE, NULL) != sizeof(uint32_t) ||
8841 PCRE2_CONFIG(PCRE2_CONFIG_MATCHLIMIT, NULL) != sizeof(uint32_t))
8842 {
8843 fprintf(stderr, "** Error in pcre2_config(): bad length\n");
8844 return 1;
8845 }
8846
8847 /* Check that bad options are diagnosed. */
8848
8849 if (PCRE2_CONFIG(999, NULL) != PCRE2_ERROR_BADOPTION ||
8850 PCRE2_CONFIG(999, &temp) != PCRE2_ERROR_BADOPTION)
8851 {
8852 fprintf(stderr, "** Error in pcre2_config(): bad option not diagnosed\n");
8853 return 1;
8854 }
8855
8856 /* This configuration option is now obsolete, but running a quick check ensures
8857 that its code is covered. */
8858
8859 (void)PCRE2_CONFIG(PCRE2_CONFIG_STACKRECURSE, &temp);
8860
8861 /* Get buffers from malloc() so that valgrind will check their misuse when
8862 debugging. They grow automatically when very long lines are read. The 16-
8863 and 32-bit buffers (pbuffer16, pbuffer32) are obtained only if needed. */
8864
8865 buffer = (uint8_t *)malloc(pbuffer8_size);
8866 pbuffer8 = (uint8_t *)malloc(pbuffer8_size);
8867
8868 /* The following _setmode() stuff is some Windows magic that tells its runtime
8869 library to translate CRLF into a single LF character. At least, that's what
8870 I've been told: never having used Windows I take this all on trust. Originally
8871 it set 0x8000, but then I was advised that _O_BINARY was better. */
8872
8873 #if defined(_WIN32) || defined(WIN32)
8874 _setmode( _fileno( stdout ), _O_BINARY );
8875 #endif
8876
8877 /* Initialization that does not depend on the running mode. */
8878
8879 locale_name[0] = 0;
8880
8881 memset(&def_patctl, 0, sizeof(patctl));
8882 def_patctl.convert_type = CONVERT_UNSET;
8883
8884 memset(&def_datctl, 0, sizeof(datctl));
8885 def_datctl.oveccount = DEFAULT_OVECCOUNT;
8886 def_datctl.copy_numbers[0] = -1;
8887 def_datctl.get_numbers[0] = -1;
8888 def_datctl.startend[0] = def_datctl.startend[1] = CFORE_UNSET;
8889 def_datctl.cerror[0] = def_datctl.cerror[1] = CFORE_UNSET;
8890 def_datctl.cfail[0] = def_datctl.cfail[1] = CFORE_UNSET;
8891
8892 /* Scan command line options. */
8893
8894 while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0)
8895 {
8896 char *endptr;
8897 char *arg = argv[op];
8898 unsigned long uli;
8899
8900 /* List modifiers and exit. */
8901
8902 if (strcmp(arg, "-LM") == 0)
8903 {
8904 display_modifiers();
8905 goto EXIT;
8906 }
8907
8908 /* List properties and exit */
8909
8910 if (strcmp(arg, "-LP") == 0)
8911 {
8912 display_properties(FALSE);
8913 goto EXIT;
8914 }
8915
8916 /* List scripts and exit */
8917
8918 if (strcmp(arg, "-LS") == 0)
8919 {
8920 display_properties(TRUE);
8921 goto EXIT;
8922 }
8923
8924 /* Display and/or set return code for configuration options. */
8925
8926 if (strcmp(arg, "-C") == 0)
8927 {
8928 yield = c_option(argv[op + 1]);
8929 goto EXIT;
8930 }
8931
8932 /* Select operating mode. Ensure that pcre2_config() is called in 16-bit
8933 and 32-bit modes because that won't happen naturally when 8-bit is also
8934 configured. Also call some other functions that are not otherwise used. This
8935 means that a coverage report won't claim there are uncalled functions. */
8936
8937 if (strcmp(arg, "-8") == 0)
8938 {
8939 #ifdef SUPPORT_PCRE2_8
8940 test_mode = PCRE8_MODE;
8941 (void)pcre2_set_bsr_8(pat_context8, 999);
8942 (void)pcre2_set_newline_8(pat_context8, 999);
8943 #else
8944 fprintf(stderr,
8945 "** This version of PCRE2 was built without 8-bit support\n");
8946 exit(1);
8947 #endif
8948 }
8949
8950 else if (strcmp(arg, "-16") == 0)
8951 {
8952 #ifdef SUPPORT_PCRE2_16
8953 test_mode = PCRE16_MODE;
8954 (void)pcre2_config_16(PCRE2_CONFIG_VERSION, NULL);
8955 (void)pcre2_set_bsr_16(pat_context16, 999);
8956 (void)pcre2_set_newline_16(pat_context16, 999);
8957 #else
8958 fprintf(stderr,
8959 "** This version of PCRE2 was built without 16-bit support\n");
8960 exit(1);
8961 #endif
8962 }
8963
8964 else if (strcmp(arg, "-32") == 0)
8965 {
8966 #ifdef SUPPORT_PCRE2_32
8967 test_mode = PCRE32_MODE;
8968 (void)pcre2_config_32(PCRE2_CONFIG_VERSION, NULL);
8969 (void)pcre2_set_bsr_32(pat_context32, 999);
8970 (void)pcre2_set_newline_32(pat_context32, 999);
8971 #else
8972 fprintf(stderr,
8973 "** This version of PCRE2 was built without 32-bit support\n");
8974 exit(1);
8975 #endif
8976 }
8977
8978 /* Set quiet (no version verification) */
8979
8980 else if (strcmp(arg, "-q") == 0) quiet = TRUE;
8981
8982 /* Set system stack size */
8983
8984 else if (strcmp(arg, "-S") == 0 && argc > 2 &&
8985 ((uli = strtoul(argv[op+1], &endptr, 10)), *endptr == 0))
8986 {
8987 #if defined(_WIN32) || defined(WIN32) || defined(__HAIKU__) || defined(NATIVE_ZOS) || defined(__VMS)
8988 fprintf(stderr, "pcre2test: -S is not supported on this OS\n");
8989 exit(1);
8990 #else
8991 int rc;
8992 uint32_t stack_size;
8993 struct rlimit rlim;
8994 if (U32OVERFLOW(uli))
8995 {
8996 fprintf(stderr, "** Argument for -S is too big\n");
8997 exit(1);
8998 }
8999 stack_size = (uint32_t)uli;
9000 getrlimit(RLIMIT_STACK, &rlim);
9001 rlim.rlim_cur = stack_size * 1024 * 1024;
9002 if (rlim.rlim_cur > rlim.rlim_max)
9003 {
9004 fprintf(stderr,
9005 "pcre2test: requested stack size %luMiB is greater than hard limit ",
9006 (unsigned long int)stack_size);
9007 if (rlim.rlim_max % (1024*1024) == 0) fprintf(stderr, "%luMiB\n",
9008 (unsigned long int)(rlim.rlim_max/(1024 * 1024)));
9009 else if (rlim.rlim_max % 1024 == 0) fprintf(stderr, "%luKiB\n",
9010 (unsigned long int)(rlim.rlim_max/1024));
9011 else fprintf(stderr, "%lu bytes\n", (unsigned long int)(rlim.rlim_max));
9012 exit(1);
9013 }
9014 rc = setrlimit(RLIMIT_STACK, &rlim);
9015 if (rc != 0)
9016 {
9017 fprintf(stderr, "pcre2test: setting stack size %luMiB failed: %s\n",
9018 (unsigned long int)stack_size, strerror(errno));
9019 exit(1);
9020 }
9021 op++;
9022 argc--;
9023 #endif
9024 }
9025
9026 /* Set some common pattern and subject controls */
9027
9028 else if (strcmp(arg, "-AC") == 0)
9029 {
9030 def_patctl.options |= PCRE2_AUTO_CALLOUT;
9031 def_datctl.control2 |= CTL2_CALLOUT_EXTRA;
9032 }
9033 else if (strcmp(arg, "-ac") == 0) def_patctl.options |= PCRE2_AUTO_CALLOUT;
9034 else if (strcmp(arg, "-b") == 0) def_patctl.control |= CTL_FULLBINCODE;
9035 else if (strcmp(arg, "-d") == 0) def_patctl.control |= CTL_DEBUG;
9036 else if (strcmp(arg, "-dfa") == 0) def_datctl.control |= CTL_DFA;
9037 else if (strcmp(arg, "-i") == 0) def_patctl.control |= CTL_INFO;
9038 else if (strcmp(arg, "-jit") == 0 || strcmp(arg, "-jitverify") == 0 ||
9039 strcmp(arg, "-jitfast") == 0)
9040 {
9041 if (arg[4] == 'v') def_patctl.control |= CTL_JITVERIFY;
9042 else if (arg[4] == 'f') def_patctl.control |= CTL_JITFAST;
9043 def_patctl.jit = JIT_DEFAULT; /* full & partial */
9044 #ifndef SUPPORT_JIT
9045 fprintf(stderr, "** Warning: JIT support is not available: "
9046 "-jit[fast|verify] calls functions that do nothing.\n");
9047 #endif
9048 }
9049
9050 /* Set timing parameters */
9051
9052 else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0 ||
9053 strcmp(arg, "-T") == 0 || strcmp(arg, "-TM") == 0)
9054 {
9055 int both = arg[2] == 0;
9056 showtotaltimes = arg[1] == 'T';
9057 if (argc > 2 && (uli = strtoul(argv[op+1], &endptr, 10), *endptr == 0))
9058 {
9059 if (uli == 0)
9060 {
9061 fprintf(stderr, "** Argument for %s must not be zero\n", arg);
9062 exit(1);
9063 }
9064 if (U32OVERFLOW(uli))
9065 {
9066 fprintf(stderr, "** Argument for %s is too big\n", arg);
9067 exit(1);
9068 }
9069 timeitm = (int)uli;
9070 op++;
9071 argc--;
9072 }
9073 else timeitm = LOOPREPEAT;
9074 if (both) timeit = timeitm;
9075 }
9076
9077 /* Give help */
9078
9079 else if (strcmp(arg, "-help") == 0 ||
9080 strcmp(arg, "--help") == 0)
9081 {
9082 usage();
9083 goto EXIT;
9084 }
9085
9086 /* Show version */
9087
9088 else if (strcmp(arg, "-version") == 0 ||
9089 strcmp(arg, "--version") == 0)
9090 {
9091 print_version(stdout);
9092 goto EXIT;
9093 }
9094
9095 /* The following options save their data for processing once we know what
9096 the running mode is. */
9097
9098 else if (strcmp(arg, "-error") == 0)
9099 {
9100 arg_error = argv[op+1];
9101 goto CHECK_VALUE_EXISTS;
9102 }
9103
9104 else if (strcmp(arg, "-subject") == 0)
9105 {
9106 arg_subject = argv[op+1];
9107 goto CHECK_VALUE_EXISTS;
9108 }
9109
9110 else if (strcmp(arg, "-pattern") == 0)
9111 {
9112 arg_pattern = argv[op+1];
9113 CHECK_VALUE_EXISTS:
9114 if (argc <= 2)
9115 {
9116 fprintf(stderr, "** Missing value for %s\n", arg);
9117 yield = 1;
9118 goto EXIT;
9119 }
9120 op++;
9121 argc--;
9122 }
9123
9124 /* Unrecognized option */
9125
9126 else
9127 {
9128 fprintf(stderr, "** Unknown or malformed option '%s'\n", arg);
9129 usage();
9130 yield = 1;
9131 goto EXIT;
9132 }
9133 op++;
9134 argc--;
9135 }
9136
9137 /* If -error was present, get the error numbers, show the messages, and exit.
9138 We wait to do this until we know which mode we are in. */
9139
9140 if (arg_error != NULL)
9141 {
9142 int len;
9143 int errcode;
9144 char *endptr;
9145
9146 /* Ensure the relevant non-8-bit buffer is available. Ensure that it is at
9147 least 128 code units, because it is used for retrieving error messages. */
9148
9149 #ifdef SUPPORT_PCRE2_16
9150 if (test_mode == PCRE16_MODE)
9151 {
9152 pbuffer16_size = 256;
9153 pbuffer16 = (uint16_t *)malloc(pbuffer16_size);
9154 if (pbuffer16 == NULL)
9155 {
9156 fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer16\n",
9157 pbuffer16_size);
9158 yield = 1;
9159 goto EXIT;
9160 }
9161 }
9162 #endif
9163
9164 #ifdef SUPPORT_PCRE2_32
9165 if (test_mode == PCRE32_MODE)
9166 {
9167 pbuffer32_size = 512;
9168 pbuffer32 = (uint32_t *)malloc(pbuffer32_size);
9169 if (pbuffer32 == NULL)
9170 {
9171 fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer32\n",
9172 pbuffer32_size);
9173 yield = 1;
9174 goto EXIT;
9175 }
9176 }
9177 #endif
9178
9179 /* Loop along a list of error numbers. */
9180
9181 for (;;)
9182 {
9183 errcode = strtol(arg_error, &endptr, 10);
9184 if (*endptr != 0 && *endptr != CHAR_COMMA)
9185 {
9186 fprintf(stderr, "** '%s' is not a valid error number list\n", arg_error);
9187 yield = 1;
9188 goto EXIT;
9189 }
9190 printf("Error %d: ", errcode);
9191 PCRE2_GET_ERROR_MESSAGE(len, errcode, pbuffer);
9192 if (len < 0)
9193 {
9194 switch (len)
9195 {
9196 case PCRE2_ERROR_BADDATA:
9197 printf("PCRE2_ERROR_BADDATA (unknown error number)");
9198 break;
9199
9200 case PCRE2_ERROR_NOMEMORY:
9201 printf("PCRE2_ERROR_NOMEMORY (buffer too small)");
9202 break;
9203
9204 default:
9205 printf("Unexpected return (%d) from pcre2_get_error_message()", len);
9206 break;
9207 }
9208 }
9209 else
9210 {
9211 PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, stdout);
9212 }
9213 printf("\n");
9214 if (*endptr == 0) goto EXIT;
9215 arg_error = endptr + 1;
9216 }
9217 /* Control never reaches here */
9218 } /* End of -error handling */
9219
9220 /* Initialize things that cannot be done until we know which test mode we are
9221 running in. Exercise the general context copying and match data size functions,
9222 which are not otherwise used. */
9223
9224 code_unit_size = test_mode/8;
9225 max_oveccount = DEFAULT_OVECCOUNT;
9226
9227 /* Use macros to save a lot of duplication. */
9228
9229 #define CREATECONTEXTS \
9230 G(general_context,BITS) = G(pcre2_general_context_create_,BITS)(&my_malloc, &my_free, NULL); \
9231 G(general_context_copy,BITS) = G(pcre2_general_context_copy_,BITS)(G(general_context,BITS)); \
9232 G(default_pat_context,BITS) = G(pcre2_compile_context_create_,BITS)(G(general_context,BITS)); \
9233 G(pat_context,BITS) = G(pcre2_compile_context_copy_,BITS)(G(default_pat_context,BITS)); \
9234 G(default_dat_context,BITS) = G(pcre2_match_context_create_,BITS)(G(general_context,BITS)); \
9235 G(dat_context,BITS) = G(pcre2_match_context_copy_,BITS)(G(default_dat_context,BITS)); \
9236 G(default_con_context,BITS) = G(pcre2_convert_context_create_,BITS)(G(general_context,BITS)); \
9237 G(con_context,BITS) = G(pcre2_convert_context_copy_,BITS)(G(default_con_context,BITS)); \
9238 G(match_data,BITS) = G(pcre2_match_data_create_,BITS)(max_oveccount, G(general_context,BITS))
9239
9240 #define CONTEXTTESTS \
9241 (void)G(pcre2_set_compile_extra_options_,BITS)(G(pat_context,BITS), 0); \
9242 (void)G(pcre2_set_max_pattern_length_,BITS)(G(pat_context,BITS), 0); \
9243 (void)G(pcre2_set_offset_limit_,BITS)(G(dat_context,BITS), 0); \
9244 (void)G(pcre2_get_match_data_size_,BITS)(G(match_data,BITS))
9245
9246
9247 /* Call the appropriate functions for the current mode, and exercise some
9248 functions that are not otherwise called. */
9249
9250 #ifdef SUPPORT_PCRE2_8
9251 #undef BITS
9252 #define BITS 8
9253 if (test_mode == PCRE8_MODE)
9254 {
9255 CREATECONTEXTS;
9256 CONTEXTTESTS;
9257 }
9258 #endif
9259
9260 #ifdef SUPPORT_PCRE2_16
9261 #undef BITS
9262 #define BITS 16
9263 if (test_mode == PCRE16_MODE)
9264 {
9265 CREATECONTEXTS;
9266 CONTEXTTESTS;
9267 }
9268 #endif
9269
9270 #ifdef SUPPORT_PCRE2_32
9271 #undef BITS
9272 #define BITS 32
9273 if (test_mode == PCRE32_MODE)
9274 {
9275 CREATECONTEXTS;
9276 CONTEXTTESTS;
9277 }
9278 #endif
9279
9280 /* Set a default parentheses nest limit that is large enough to run the
9281 standard tests (this also exercises the function). */
9282
9283 PCRE2_SET_PARENS_NEST_LIMIT(default_pat_context, PARENS_NEST_DEFAULT);
9284
9285 /* Handle command line modifier settings, sending any error messages to
9286 stderr. We need to know the mode before modifying the context, and it is tidier
9287 to do them all in the same way. */
9288
9289 outfile = stderr;
9290 if ((arg_pattern != NULL &&
9291 !decode_modifiers((uint8_t *)arg_pattern, CTX_DEFPAT, &def_patctl, NULL)) ||
9292 (arg_subject != NULL &&
9293 !decode_modifiers((uint8_t *)arg_subject, CTX_DEFDAT, NULL, &def_datctl)))
9294 {
9295 yield = 1;
9296 goto EXIT;
9297 }
9298
9299 /* Sort out the input and output files, defaulting to stdin/stdout. */
9300
9301 infile = stdin;
9302 outfile = stdout;
9303
9304 if (argc > 1 && strcmp(argv[op], "-") != 0)
9305 {
9306 infile = fopen(argv[op], INPUT_MODE);
9307 if (infile == NULL)
9308 {
9309 printf("** Failed to open '%s': %s\n", argv[op], strerror(errno));
9310 yield = 1;
9311 goto EXIT;
9312 }
9313 }
9314
9315 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
9316 if (INTERACTIVE(infile)) using_history();
9317 #endif
9318
9319 if (argc > 2)
9320 {
9321 outfile = fopen(argv[op+1], OUTPUT_MODE);
9322 if (outfile == NULL)
9323 {
9324 printf("** Failed to open '%s': %s\n", argv[op+1], strerror(errno));
9325 yield = 1;
9326 goto EXIT;
9327 }
9328 }
9329
9330 /* Output a heading line unless quiet, then process input lines. */
9331
9332 if (!quiet) print_version(outfile);
9333
9334 SET(compiled_code, NULL);
9335
9336 #ifdef SUPPORT_PCRE2_8
9337 preg.re_pcre2_code = NULL;
9338 preg.re_match_data = NULL;
9339 #endif
9340
9341 while (notdone)
9342 {
9343 uint8_t *p;
9344 int rc = PR_OK;
9345 BOOL expectdata = TEST(compiled_code, !=, NULL);
9346 #ifdef SUPPORT_PCRE2_8
9347 expectdata |= preg.re_pcre2_code != NULL;
9348 #endif
9349
9350 if (extend_inputline(infile, buffer, expectdata? "data> " : " re> ") == NULL)
9351 break;
9352 if (!INTERACTIVE(infile)) fprintf(outfile, "%s", (char *)buffer);
9353 fflush(outfile);
9354 p = buffer;
9355
9356 /* If we have a pattern set up for testing, or we are skipping after a
9357 compile failure, a blank line terminates this test. */
9358
9359 if (expectdata || skipping)
9360 {
9361 while (isspace(*p)) p++;
9362 if (*p == 0)
9363 {
9364 #ifdef SUPPORT_PCRE2_8
9365 if (preg.re_pcre2_code != NULL)
9366 {
9367 regfree(&preg);
9368 preg.re_pcre2_code = NULL;
9369 preg.re_match_data = NULL;
9370 }
9371 #endif /* SUPPORT_PCRE2_8 */
9372 if (TEST(compiled_code, !=, NULL))
9373 {
9374 SUB1(pcre2_code_free, compiled_code);
9375 SET(compiled_code, NULL);
9376 }
9377 skipping = FALSE;
9378 setlocale(LC_CTYPE, "C");
9379 }
9380
9381 /* Otherwise, if we are not skipping, and the line is not a data comment
9382 line starting with "\=", process a data line. */
9383
9384 else if (!skipping && !(p[0] == '\\' && p[1] == '=' && isspace(p[2])))
9385 {
9386 rc = process_data();
9387 }
9388 }
9389
9390 /* We do not have a pattern set up for testing. Lines starting with # are
9391 either comments or special commands. Blank lines are ignored. Otherwise, the
9392 line must start with a valid delimiter. It is then processed as a pattern
9393 line. A copy of the pattern is left in pbuffer8 for use by callouts. Under
9394 valgrind, make the unused part of the buffer undefined, to catch overruns. */
9395
9396 else if (*p == '#')
9397 {
9398 if (isspace(p[1]) || p[1] == '!' || p[1] == 0) continue;
9399 rc = process_command();
9400 }
9401
9402 else if (strchr("/!\"'`%&-=_:;,@~", *p) != NULL)
9403 {
9404 rc = process_pattern();
9405 dfa_matched = 0;
9406 }
9407
9408 else
9409 {
9410 while (isspace(*p)) p++;
9411 if (*p != 0)
9412 {
9413 fprintf(outfile, "** Invalid pattern delimiter '%c' (x%x).\n", *buffer,
9414 *buffer);
9415 rc = PR_SKIP;
9416 }
9417 }
9418
9419 if (rc == PR_SKIP && !INTERACTIVE(infile)) skipping = TRUE;
9420 else if (rc == PR_ABEND)
9421 {
9422 fprintf(outfile, "** pcre2test run abandoned\n");
9423 yield = 1;
9424 goto EXIT;
9425 }
9426 }
9427
9428 /* Finish off a normal run. */
9429
9430 if (INTERACTIVE(infile)) fprintf(outfile, "\n");
9431
9432 if (showtotaltimes)
9433 {
9434 const char *pad = "";
9435 fprintf(outfile, "--------------------------------------\n");
9436 if (timeit > 0)
9437 {
9438 fprintf(outfile, "Total compile time %.4f milliseconds\n",
9439 (((double)total_compile_time * 1000.0) / (double)timeit) /
9440 (double)CLOCKS_PER_SEC);
9441 if (total_jit_compile_time > 0)
9442 fprintf(outfile, "Total JIT compile %.4f milliseconds\n",
9443 (((double)total_jit_compile_time * 1000.0) / (double)timeit) /
9444 (double)CLOCKS_PER_SEC);
9445 pad = " ";
9446 }
9447 fprintf(outfile, "Total match time %s%.4f milliseconds\n", pad,
9448 (((double)total_match_time * 1000.0) / (double)timeitm) /
9449 (double)CLOCKS_PER_SEC);
9450 }
9451
9452
9453 EXIT:
9454
9455 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
9456 if (infile != NULL && INTERACTIVE(infile)) clear_history();
9457 #endif
9458
9459 if (infile != NULL && infile != stdin) fclose(infile);
9460 if (outfile != NULL && outfile != stdout) fclose(outfile);
9461
9462 free(buffer);
9463 free(dbuffer);
9464 free(pbuffer8);
9465 free(dfa_workspace);
9466 free(tables3);
9467 PCRE2_MAKETABLES_FREE(general_context, (void *)locale_tables);
9468 PCRE2_MATCH_DATA_FREE(match_data);
9469 SUB1(pcre2_code_free, compiled_code);
9470
9471 while(patstacknext-- > 0)
9472 {
9473 SET(compiled_code, patstack[patstacknext]);
9474 SUB1(pcre2_code_free, compiled_code);
9475 }
9476
9477 PCRE2_JIT_FREE_UNUSED_MEMORY(general_context);
9478 if (jit_stack != NULL)
9479 {
9480 PCRE2_JIT_STACK_FREE(jit_stack);
9481 }
9482
9483 #define FREECONTEXTS \
9484 G(pcre2_general_context_free_,BITS)(G(general_context,BITS)); \
9485 G(pcre2_general_context_free_,BITS)(G(general_context_copy,BITS)); \
9486 G(pcre2_compile_context_free_,BITS)(G(pat_context,BITS)); \
9487 G(pcre2_compile_context_free_,BITS)(G(default_pat_context,BITS)); \
9488 G(pcre2_match_context_free_,BITS)(G(dat_context,BITS)); \
9489 G(pcre2_match_context_free_,BITS)(G(default_dat_context,BITS)); \
9490 G(pcre2_convert_context_free_,BITS)(G(default_con_context,BITS)); \
9491 G(pcre2_convert_context_free_,BITS)(G(con_context,BITS));
9492
9493 #ifdef SUPPORT_PCRE2_8
9494 #undef BITS
9495 #define BITS 8
9496 if (preg.re_pcre2_code != NULL) regfree(&preg);
9497 FREECONTEXTS;
9498 #endif
9499
9500 #ifdef SUPPORT_PCRE2_16
9501 #undef BITS
9502 #define BITS 16
9503 free(pbuffer16);
9504 FREECONTEXTS;
9505 #endif
9506
9507 #ifdef SUPPORT_PCRE2_32
9508 #undef BITS
9509 #define BITS 32
9510 free(pbuffer32);
9511 FREECONTEXTS;
9512 #endif
9513
9514 #if defined(__VMS)
9515 yield = SS$_NORMAL; /* Return values via DCL symbols */
9516 #endif
9517
9518 return yield;
9519 }
9520
9521 /* End of pcre2test.c */
9522