1 /*************************************************
2 * PCRE2 testing program *
3 *************************************************/
4
5 /* PCRE2 is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language. In 2014
7 the API was completely revised and '2' was added to the name, because the old
8 API, which had lasted for 16 years, could not accommodate new requirements. At
9 the same time, this testing program was re-designed because its original
10 hacked-up (non-) design had also run out of steam.
11
12 Written by Philip Hazel
13 Original code Copyright (c) 1997-2012 University of Cambridge
14 Rewritten code Copyright (c) 2016 University of Cambridge
15
16 -----------------------------------------------------------------------------
17 Redistribution and use in source and binary forms, with or without
18 modification, are permitted provided that the following conditions are met:
19
20 * Redistributions of source code must retain the above copyright notice,
21 this list of conditions and the following disclaimer.
22
23 * Redistributions in binary form must reproduce the above copyright
24 notice, this list of conditions and the following disclaimer in the
25 documentation and/or other materials provided with the distribution.
26
27 * Neither the name of the University of Cambridge nor the names of its
28 contributors may be used to endorse or promote products derived from
29 this software without specific prior written permission.
30
31 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
32 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
35 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
36 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
37 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
38 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
39 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
41 POSSIBILITY OF SUCH DAMAGE.
42 -----------------------------------------------------------------------------
43 */
44
45
46 /* This program supports testing of the 8-bit, 16-bit, and 32-bit PCRE2
47 libraries in a single program, though its input and output are always 8-bit.
48 It is different from modules such as pcre2_compile.c in the library itself,
49 which are compiled separately for each code unit width. If two widths are
50 enabled, for example, pcre2_compile.c is compiled twice. In contrast,
51 pcre2test.c is compiled only once, and linked with all the enabled libraries.
52 Therefore, it must not make use of any of the macros from pcre2.h or
53 pcre2_internal.h that depend on PCRE2_CODE_UNIT_WIDTH. It does, however, make
54 use of SUPPORT_PCRE2_8, SUPPORT_PCRE2_16, and SUPPORT_PCRE2_32, to ensure that
55 it references only the enabled library functions. */
56
57 #ifdef HAVE_CONFIG_H
58 #include "config.h"
59 #endif
60
61 #include <ctype.h>
62 #include <stdio.h>
63 #include <string.h>
64 #include <stdlib.h>
65 #include <time.h>
66 #include <locale.h>
67 #include <errno.h>
68
69 #if defined NATIVE_ZOS
70 #include "pcrzoscs.h"
71 /* That header is not included in the main PCRE2 distribution because other
72 apparatus is needed to compile pcre2test for z/OS. The header can be found in
73 the special z/OS distribution, which is available from www.zaconsultants.net or
74 from www.cbttape.org. */
75 #endif
76
77 #ifdef HAVE_UNISTD_H
78 #include <unistd.h>
79 #endif
80
81 /* Both libreadline and libedit are optionally supported. The user-supplied
82 original patch uses readline/readline.h for libedit, but in at least one system
83 it is installed as editline/readline.h, so the configuration code now looks for
84 that first, falling back to readline/readline.h. */
85
86 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
87 #if defined(SUPPORT_LIBREADLINE)
88 #include <readline/readline.h>
89 #include <readline/history.h>
90 #else
91 #if defined(HAVE_EDITLINE_READLINE_H)
92 #include <editline/readline.h>
93 #else
94 #include <readline/readline.h>
95 #endif
96 #endif
97 #endif
98
99 /* Put the test for interactive input into a macro so that it can be changed if
100 required for different environments. */
101
102 #define INTERACTIVE(f) isatty(fileno(f))
103
104
105 /* ---------------------- System-specific definitions ---------------------- */
106
107 /* A number of things vary for Windows builds. Originally, pcretest opened its
108 input and output without "b"; then I was told that "b" was needed in some
109 environments, so it was added for release 5.0 to both the input and output. (It
110 makes no difference on Unix-like systems.) Later I was told that it is wrong
111 for the input on Windows. I've now abstracted the modes into macros that are
112 set here, to make it easier to fiddle with them, and removed "b" from the input
113 mode under Windows. The BINARY versions are used when saving/restoring compiled
114 patterns. */
115
116 #if defined(_WIN32) || defined(WIN32)
117 #include <io.h> /* For _setmode() */
118 #include <fcntl.h> /* For _O_BINARY */
119 #define INPUT_MODE "r"
120 #define OUTPUT_MODE "wb"
121 #define BINARY_INPUT_MODE "rb"
122 #define BINARY_OUTPUT_MODE "wb"
123
124 #ifndef isatty
125 #define isatty _isatty /* This is what Windows calls them, I'm told, */
126 #endif /* though in some environments they seem to */
127 /* be already defined, hence the #ifndefs. */
128 #ifndef fileno
129 #define fileno _fileno
130 #endif
131
132 /* A user sent this fix for Borland Builder 5 under Windows. */
133
134 #ifdef __BORLANDC__
135 #define _setmode(handle, mode) setmode(handle, mode)
136 #endif
137
138 /* Not Windows */
139
140 #else
141 #include <sys/time.h> /* These two includes are needed */
142 #include <sys/resource.h> /* for setrlimit(). */
143 #if defined NATIVE_ZOS /* z/OS uses non-binary I/O */
144 #define INPUT_MODE "r"
145 #define OUTPUT_MODE "w"
146 #define BINARY_INPUT_MODE "rb"
147 #define BINARY_OUTPUT_MODE "wb"
148 #else
149 #define INPUT_MODE "rb"
150 #define OUTPUT_MODE "wb"
151 #define BINARY_INPUT_MODE "rb"
152 #define BINARY_OUTPUT_MODE "wb"
153 #endif
154 #endif
155
156 #ifdef __VMS
157 #include <ssdef.h>
158 void vms_setsymbol( char *, char *, int );
159 #endif
160
161 /* ------------------End of system-specific definitions -------------------- */
162
163 /* Glueing macros that are used in several places below. */
164
165 #define glue(a,b) a##b
166 #define G(a,b) glue(a,b)
167
168 /* Miscellaneous parameters and manifests */
169
170 #ifndef CLOCKS_PER_SEC
171 #ifdef CLK_TCK
172 #define CLOCKS_PER_SEC CLK_TCK
173 #else
174 #define CLOCKS_PER_SEC 100
175 #endif
176 #endif
177
178 #define CFAIL_UNSET UINT32_MAX /* Unset value for cfail fields */
179 #define DFA_WS_DIMENSION 1000 /* Size of DFA workspace */
180 #define DEFAULT_OVECCOUNT 15 /* Default ovector count */
181 #define JUNK_OFFSET 0xdeadbeef /* For initializing ovector */
182 #define LOCALESIZE 32 /* Size of locale name */
183 #define LOOPREPEAT 500000 /* Default loop count for timing */
184 #define PATSTACKSIZE 20 /* Pattern stack for save/restore testing */
185 #define REPLACE_MODSIZE 100 /* Field for reading 8-bit replacement */
186 #define VERSION_SIZE 64 /* Size of buffer for the version strings */
187
188 /* Make sure the buffer into which replacement strings are copied is big enough
189 to hold them as 32-bit code units. */
190
191 #define REPLACE_BUFFSIZE 1024 /* This is a byte value */
192
193 /* Execution modes */
194
195 #define PCRE8_MODE 8
196 #define PCRE16_MODE 16
197 #define PCRE32_MODE 32
198
199 /* Processing returns */
200
201 enum { PR_OK, PR_SKIP, PR_ABEND };
202
203 /* The macro PRINTABLE determines whether to print an output character as-is or
204 as a hex value when showing compiled patterns. is We use it in cases when the
205 locale has not been explicitly changed, so as to get consistent output from
206 systems that differ in their output from isprint() even in the "C" locale. */
207
208 #ifdef EBCDIC
209 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
210 #else
211 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
212 #endif
213
214 #define PRINTOK(c) ((locale_tables != NULL)? isprint(c) : PRINTABLE(c))
215
216 /* We have to include some of the library source files because we need
217 to use some of the macros, internal structure definitions, and other internal
218 values - pcre2test has "inside information" compared to an application program
219 that strictly follows the PCRE2 API.
220
221 Before including pcre2_internal.h we define PRIV so that it does not get
222 defined therein. This ensures that PRIV names in the included files do not
223 clash with those in the libraries. Also, although pcre2_internal.h does itself
224 include pcre2.h, we explicitly include it beforehand, along with pcre2posix.h,
225 so that the PCRE2_EXP_xxx macros get set appropriately for an application, not
226 for building the library. */
227
228 #define PRIV(name) name
229 #define PCRE2_CODE_UNIT_WIDTH 0
230 #include "pcre2.h"
231 #include "pcre2posix.h"
232 #include "pcre2_internal.h"
233
234 /* We need access to some of the data tables that PCRE2 uses. Defining
235 PCRE2_PCRETEST makes some minor changes in the files. The previous definition
236 of PRIV avoids name clashes. */
237
238 #define PCRE2_PCRE2TEST
239 #include "pcre2_tables.c"
240 #include "pcre2_ucd.c"
241
242 /* 32-bit integer values in the input are read by strtoul() or strtol(). The
243 check needed for overflow depends on whether long ints are in fact longer than
244 ints. They are defined not to be shorter. */
245
246 #if ULONG_MAX > UINT32_MAX
247 #define U32OVERFLOW(x) (x > UINT32_MAX)
248 #else
249 #define U32OVERFLOW(x) (x == UINT32_MAX)
250 #endif
251
252 #if LONG_MAX > INT32_MAX
253 #define S32OVERFLOW(x) (x > INT32_MAX || x < INT32_MIN)
254 #else
255 #define S32OVERFLOW(x) (x == INT32_MAX || x == INT32_MIN)
256 #endif
257
258 /* When PCRE2_CODE_UNIT_WIDTH is zero, pcre2_internal.h does not include
259 pcre2_intmodedep.h, which is where mode-dependent macros and structures are
260 defined. We can now include it for each supported code unit width. Because
261 PCRE2_CODE_UNIT_WIDTH was defined as zero before including pcre2.h, it will
262 have left PCRE2_SUFFIX defined as a no-op. We must re-define it appropriately
263 while including these files, and then restore it to a no-op. Because LINK_SIZE
264 may be changed in 16-bit mode and forced to 1 in 32-bit mode, the order of
265 these inclusions should not be changed. */
266
267 #undef PCRE2_SUFFIX
268 #undef PCRE2_CODE_UNIT_WIDTH
269
270 #ifdef SUPPORT_PCRE2_8
271 #define PCRE2_CODE_UNIT_WIDTH 8
272 #define PCRE2_SUFFIX(a) G(a,8)
273 #include "pcre2_intmodedep.h"
274 #include "pcre2_printint.c"
275 #undef PCRE2_CODE_UNIT_WIDTH
276 #undef PCRE2_SUFFIX
277 #endif /* SUPPORT_PCRE2_8 */
278
279 #ifdef SUPPORT_PCRE2_16
280 #define PCRE2_CODE_UNIT_WIDTH 16
281 #define PCRE2_SUFFIX(a) G(a,16)
282 #include "pcre2_intmodedep.h"
283 #include "pcre2_printint.c"
284 #undef PCRE2_CODE_UNIT_WIDTH
285 #undef PCRE2_SUFFIX
286 #endif /* SUPPORT_PCRE2_16 */
287
288 #ifdef SUPPORT_PCRE2_32
289 #define PCRE2_CODE_UNIT_WIDTH 32
290 #define PCRE2_SUFFIX(a) G(a,32)
291 #include "pcre2_intmodedep.h"
292 #include "pcre2_printint.c"
293 #undef PCRE2_CODE_UNIT_WIDTH
294 #undef PCRE2_SUFFIX
295 #endif /* SUPPORT_PCRE2_32 */
296
297 #define PCRE2_SUFFIX(a) a
298
299 /* We need to be able to check input text for UTF-8 validity, whatever code
300 widths are actually available, because the input to pcre2test is always in
301 8-bit code units. So we include the UTF validity checking function for 8-bit
302 code units. */
303
304 extern int valid_utf(PCRE2_SPTR8, PCRE2_SIZE, PCRE2_SIZE *);
305
306 #define PCRE2_CODE_UNIT_WIDTH 8
307 #undef PCRE2_SPTR
308 #define PCRE2_SPTR PCRE2_SPTR8
309 #include "pcre2_valid_utf.c"
310 #undef PCRE2_CODE_UNIT_WIDTH
311 #undef PCRE2_SPTR
312
313 /* If we have 8-bit support, default to it; if there is also 16-or 32-bit
314 support, it can be selected by a command-line option. If there is no 8-bit
315 support, there must be 16- or 32-bit support, so default to one of them. The
316 config function, JIT stack, contexts, and version string are the same in all
317 modes, so use the form of the first that is available. */
318
319 #if defined SUPPORT_PCRE2_8
320 #define DEFAULT_TEST_MODE PCRE8_MODE
321 #define VERSION_TYPE PCRE2_UCHAR8
322 #define PCRE2_CONFIG pcre2_config_8
323 #define PCRE2_JIT_STACK pcre2_jit_stack_8
324 #define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_8
325 #define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_8
326 #define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_8
327 #define VERSION_TYPE PCRE2_UCHAR8
328
329 #elif defined SUPPORT_PCRE2_16
330 #define DEFAULT_TEST_MODE PCRE16_MODE
331 #define VERSION_TYPE PCRE2_UCHAR16
332 #define PCRE2_CONFIG pcre2_config_16
333 #define PCRE2_JIT_STACK pcre2_jit_stack_16
334 #define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_16
335 #define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_16
336 #define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_16
337
338 #elif defined SUPPORT_PCRE2_32
339 #define DEFAULT_TEST_MODE PCRE32_MODE
340 #define VERSION_TYPE PCRE2_UCHAR32
341 #define PCRE2_CONFIG pcre2_config_32
342 #define PCRE2_JIT_STACK pcre2_jit_stack_32
343 #define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_32
344 #define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_32
345 #define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_32
346 #endif
347
348 /* ------------- Structure and table for handling #-commands ------------- */
349
350 typedef struct cmdstruct {
351 const char *name;
352 int value;
353 } cmdstruct;
354
355 enum { CMD_FORBID_UTF, CMD_LOAD, CMD_NEWLINE_DEFAULT, CMD_PATTERN,
356 CMD_PERLTEST, CMD_POP, CMD_POPCOPY, CMD_SAVE, CMD_SUBJECT, CMD_UNKNOWN };
357
358 static cmdstruct cmdlist[] = {
359 { "forbid_utf", CMD_FORBID_UTF },
360 { "load", CMD_LOAD },
361 { "newline_default", CMD_NEWLINE_DEFAULT },
362 { "pattern", CMD_PATTERN },
363 { "perltest", CMD_PERLTEST },
364 { "pop", CMD_POP },
365 { "popcopy", CMD_POPCOPY },
366 { "save", CMD_SAVE },
367 { "subject", CMD_SUBJECT }};
368
369 #define cmdlistcount sizeof(cmdlist)/sizeof(cmdstruct)
370
371 /* ------------- Structures and tables for handling modifiers -------------- */
372
373 /* Table of names for newline types. Must be kept in step with the definitions
374 of PCRE2_NEWLINE_xx in pcre2.h. */
375
376 static const char *newlines[] = {
377 "DEFAULT", "CR", "LF", "CRLF", "ANY", "ANYCRLF" };
378
379 /* Modifier types and applicability */
380
381 enum { MOD_CTC, /* Applies to a compile context */
382 MOD_CTM, /* Applies to a match context */
383 MOD_PAT, /* Applies to a pattern */
384 MOD_PATP, /* Ditto, OK for Perl test */
385 MOD_DAT, /* Applies to a data line */
386 MOD_PD, /* Applies to a pattern or a data line */
387 MOD_PDP, /* As MOD_PD, OK for Perl test */
388 MOD_PND, /* As MOD_PD, but not for a default pattern */
389 MOD_PNDP, /* As MOD_PND, OK for Perl test */
390 MOD_CTL, /* Is a control bit */
391 MOD_BSR, /* Is a BSR value */
392 MOD_IN2, /* Is one or two unsigned integers */
393 MOD_INS, /* Is a signed integer */
394 MOD_INT, /* Is an unsigned integer */
395 MOD_IND, /* Is an unsigned integer, but no value => default */
396 MOD_NL, /* Is a newline value */
397 MOD_NN, /* Is a number or a name; more than one may occur */
398 MOD_OPT, /* Is an option bit */
399 MOD_SIZ, /* Is a PCRE2_SIZE value */
400 MOD_STR }; /* Is a string */
401
402 /* Control bits. Some apply to compiling, some to matching, but some can be set
403 either on a pattern or a data line, so they must all be distinct. There are now
404 so many of them that they are split into two fields. */
405
406 #define CTL_AFTERTEXT 0x00000001u
407 #define CTL_ALLAFTERTEXT 0x00000002u
408 #define CTL_ALLCAPTURES 0x00000004u
409 #define CTL_ALLUSEDTEXT 0x00000008u
410 #define CTL_ALTGLOBAL 0x00000010u
411 #define CTL_BINCODE 0x00000020u
412 #define CTL_CALLOUT_CAPTURE 0x00000040u
413 #define CTL_CALLOUT_INFO 0x00000080u
414 #define CTL_CALLOUT_NONE 0x00000100u
415 #define CTL_DFA 0x00000200u
416 #define CTL_EXPAND 0x00000400u
417 #define CTL_FINDLIMITS 0x00000800u
418 #define CTL_FULLBINCODE 0x00001000u
419 #define CTL_GETALL 0x00002000u
420 #define CTL_GLOBAL 0x00004000u
421 #define CTL_HEXPAT 0x00008000u
422 #define CTL_INFO 0x00010000u
423 #define CTL_JITFAST 0x00020000u
424 #define CTL_JITVERIFY 0x00040000u
425 #define CTL_MARK 0x00080000u
426 #define CTL_MEMORY 0x00100000u
427 #define CTL_NULLCONTEXT 0x00200000u
428 #define CTL_POSIX 0x00400000u
429 #define CTL_POSIX_NOSUB 0x00800000u
430 #define CTL_PUSH 0x01000000u
431 #define CTL_PUSHCOPY 0x02000000u
432 #define CTL_STARTCHAR 0x04000000u
433 #define CTL_ZERO_TERMINATE 0x08000000u
434 /* Spare 0x10000000u */
435 /* Spare 0x20000000u */
436 #define CTL_NL_SET 0x40000000u /* Informational */
437 #define CTL_BSR_SET 0x80000000u /* Informational */
438
439 /* Second control word */
440
441 #define CTL2_SUBSTITUTE_EXTENDED 0x00000001u
442 #define CTL2_SUBSTITUTE_OVERFLOW_LENGTH 0x00000002u
443 #define CTL2_SUBSTITUTE_UNKNOWN_UNSET 0x00000004u
444 #define CTL2_SUBSTITUTE_UNSET_EMPTY 0x00000008u
445
446 /* Combinations */
447
448 #define CTL_DEBUG (CTL_FULLBINCODE|CTL_INFO) /* For setting */
449 #define CTL_ANYINFO (CTL_DEBUG|CTL_BINCODE|CTL_CALLOUT_INFO)
450 #define CTL_ANYGLOB (CTL_ALTGLOBAL|CTL_GLOBAL)
451
452 /* These are all the controls that may be set either on a pattern or on a
453 data line. */
454
455 #define CTL_ALLPD (CTL_AFTERTEXT|\
456 CTL_ALLAFTERTEXT|\
457 CTL_ALLCAPTURES|\
458 CTL_ALLUSEDTEXT|\
459 CTL_ALTGLOBAL|\
460 CTL_GLOBAL|\
461 CTL_MARK|\
462 CTL_MEMORY|\
463 CTL_STARTCHAR)
464
465 #define CTL2_ALLPD (CTL2_SUBSTITUTE_EXTENDED|\
466 CTL2_SUBSTITUTE_OVERFLOW_LENGTH|\
467 CTL2_SUBSTITUTE_UNKNOWN_UNSET|\
468 CTL2_SUBSTITUTE_UNSET_EMPTY)
469
470 /* Structures for holding modifier information for patterns and subject strings
471 (data). Fields containing modifiers that can be set either for a pattern or a
472 subject must be at the start and in the same order in both cases so that the
473 same offset in the big table below works for both. */
474
475 typedef struct patctl { /* Structure for pattern modifiers. */
476 uint32_t options; /* Must be in same position as datctl */
477 uint32_t control; /* Must be in same position as datctl */
478 uint32_t control2; /* Must be in same position as datctl */
479 uint8_t replacement[REPLACE_MODSIZE]; /* So must this */
480 uint32_t jit;
481 uint32_t stackguard_test;
482 uint32_t tables_id;
483 uint32_t regerror_buffsize;
484 uint8_t locale[LOCALESIZE];
485 } patctl;
486
487 #define MAXCPYGET 10
488 #define LENCPYGET 64
489
490 typedef struct datctl { /* Structure for data line modifiers. */
491 uint32_t options; /* Must be in same position as patctl */
492 uint32_t control; /* Must be in same position as patctl */
493 uint32_t control2; /* Must be in same position as patctl */
494 uint8_t replacement[REPLACE_MODSIZE]; /* So must this */
495 uint32_t cfail[2];
496 int32_t callout_data;
497 int32_t copy_numbers[MAXCPYGET];
498 int32_t get_numbers[MAXCPYGET];
499 uint32_t jitstack;
500 uint32_t oveccount;
501 uint32_t offset;
502 uint8_t copy_names[LENCPYGET];
503 uint8_t get_names[LENCPYGET];
504 } datctl;
505
506 /* Ids for which context to modify. */
507
508 enum { CTX_PAT, /* Active pattern context */
509 CTX_POPPAT, /* Ditto, for a popped pattern */
510 CTX_DEFPAT, /* Default pattern context */
511 CTX_DAT, /* Active data (match) context */
512 CTX_DEFDAT }; /* Default data (match) context */
513
514 /* Macros to simplify the big table below. */
515
516 #define CO(name) offsetof(PCRE2_REAL_COMPILE_CONTEXT, name)
517 #define MO(name) offsetof(PCRE2_REAL_MATCH_CONTEXT, name)
518 #define PO(name) offsetof(patctl, name)
519 #define PD(name) PO(name)
520 #define DO(name) offsetof(datctl, name)
521
522 /* Table of all long-form modifiers. Must be in collating sequence of modifier
523 name because it is searched by binary chop. */
524
525 typedef struct modstruct {
526 const char *name;
527 uint16_t which;
528 uint16_t type;
529 uint32_t value;
530 PCRE2_SIZE offset;
531 } modstruct;
532
533 static modstruct modlist[] = {
534 { "aftertext", MOD_PNDP, MOD_CTL, CTL_AFTERTEXT, PO(control) },
535 { "allaftertext", MOD_PNDP, MOD_CTL, CTL_ALLAFTERTEXT, PO(control) },
536 { "allcaptures", MOD_PND, MOD_CTL, CTL_ALLCAPTURES, PO(control) },
537 { "allow_empty_class", MOD_PAT, MOD_OPT, PCRE2_ALLOW_EMPTY_CLASS, PO(options) },
538 { "allusedtext", MOD_PNDP, MOD_CTL, CTL_ALLUSEDTEXT, PO(control) },
539 { "alt_bsux", MOD_PAT, MOD_OPT, PCRE2_ALT_BSUX, PO(options) },
540 { "alt_circumflex", MOD_PAT, MOD_OPT, PCRE2_ALT_CIRCUMFLEX, PO(options) },
541 { "alt_verbnames", MOD_PAT, MOD_OPT, PCRE2_ALT_VERBNAMES, PO(options) },
542 { "altglobal", MOD_PND, MOD_CTL, CTL_ALTGLOBAL, PO(control) },
543 { "anchored", MOD_PD, MOD_OPT, PCRE2_ANCHORED, PD(options) },
544 { "auto_callout", MOD_PAT, MOD_OPT, PCRE2_AUTO_CALLOUT, PO(options) },
545 { "bincode", MOD_PAT, MOD_CTL, CTL_BINCODE, PO(control) },
546 { "bsr", MOD_CTC, MOD_BSR, 0, CO(bsr_convention) },
547 { "callout_capture", MOD_DAT, MOD_CTL, CTL_CALLOUT_CAPTURE, DO(control) },
548 { "callout_data", MOD_DAT, MOD_INS, 0, DO(callout_data) },
549 { "callout_fail", MOD_DAT, MOD_IN2, 0, DO(cfail) },
550 { "callout_info", MOD_PAT, MOD_CTL, CTL_CALLOUT_INFO, PO(control) },
551 { "callout_none", MOD_DAT, MOD_CTL, CTL_CALLOUT_NONE, DO(control) },
552 { "caseless", MOD_PATP, MOD_OPT, PCRE2_CASELESS, PO(options) },
553 { "copy", MOD_DAT, MOD_NN, DO(copy_numbers), DO(copy_names) },
554 { "debug", MOD_PAT, MOD_CTL, CTL_DEBUG, PO(control) },
555 { "dfa", MOD_DAT, MOD_CTL, CTL_DFA, DO(control) },
556 { "dfa_restart", MOD_DAT, MOD_OPT, PCRE2_DFA_RESTART, DO(options) },
557 { "dfa_shortest", MOD_DAT, MOD_OPT, PCRE2_DFA_SHORTEST, DO(options) },
558 { "dollar_endonly", MOD_PAT, MOD_OPT, PCRE2_DOLLAR_ENDONLY, PO(options) },
559 { "dotall", MOD_PATP, MOD_OPT, PCRE2_DOTALL, PO(options) },
560 { "dupnames", MOD_PATP, MOD_OPT, PCRE2_DUPNAMES, PO(options) },
561 { "expand", MOD_PAT, MOD_CTL, CTL_EXPAND, PO(control) },
562 { "extended", MOD_PATP, MOD_OPT, PCRE2_EXTENDED, PO(options) },
563 { "find_limits", MOD_DAT, MOD_CTL, CTL_FINDLIMITS, DO(control) },
564 { "firstline", MOD_PAT, MOD_OPT, PCRE2_FIRSTLINE, PO(options) },
565 { "fullbincode", MOD_PAT, MOD_CTL, CTL_FULLBINCODE, PO(control) },
566 { "get", MOD_DAT, MOD_NN, DO(get_numbers), DO(get_names) },
567 { "getall", MOD_DAT, MOD_CTL, CTL_GETALL, DO(control) },
568 { "global", MOD_PNDP, MOD_CTL, CTL_GLOBAL, PO(control) },
569 { "hex", MOD_PAT, MOD_CTL, CTL_HEXPAT, PO(control) },
570 { "info", MOD_PAT, MOD_CTL, CTL_INFO, PO(control) },
571 { "jit", MOD_PAT, MOD_IND, 7, PO(jit) },
572 { "jitfast", MOD_PAT, MOD_CTL, CTL_JITFAST, PO(control) },
573 { "jitstack", MOD_DAT, MOD_INT, 0, DO(jitstack) },
574 { "jitverify", MOD_PAT, MOD_CTL, CTL_JITVERIFY, PO(control) },
575 { "locale", MOD_PAT, MOD_STR, LOCALESIZE, PO(locale) },
576 { "mark", MOD_PNDP, MOD_CTL, CTL_MARK, PO(control) },
577 { "match_limit", MOD_CTM, MOD_INT, 0, MO(match_limit) },
578 { "match_unset_backref", MOD_PAT, MOD_OPT, PCRE2_MATCH_UNSET_BACKREF, PO(options) },
579 { "max_pattern_length", MOD_CTC, MOD_SIZ, 0, CO(max_pattern_length) },
580 { "memory", MOD_PD, MOD_CTL, CTL_MEMORY, PD(control) },
581 { "multiline", MOD_PATP, MOD_OPT, PCRE2_MULTILINE, PO(options) },
582 { "never_backslash_c", MOD_PAT, MOD_OPT, PCRE2_NEVER_BACKSLASH_C, PO(options) },
583 { "never_ucp", MOD_PAT, MOD_OPT, PCRE2_NEVER_UCP, PO(options) },
584 { "never_utf", MOD_PAT, MOD_OPT, PCRE2_NEVER_UTF, PO(options) },
585 { "newline", MOD_CTC, MOD_NL, 0, CO(newline_convention) },
586 { "no_auto_capture", MOD_PAT, MOD_OPT, PCRE2_NO_AUTO_CAPTURE, PO(options) },
587 { "no_auto_possess", MOD_PATP, MOD_OPT, PCRE2_NO_AUTO_POSSESS, PO(options) },
588 { "no_dotstar_anchor", MOD_PAT, MOD_OPT, PCRE2_NO_DOTSTAR_ANCHOR, PO(options) },
589 { "no_jit", MOD_DAT, MOD_OPT, PCRE2_NO_JIT, DO(options) },
590 { "no_start_optimize", MOD_PATP, MOD_OPT, PCRE2_NO_START_OPTIMIZE, PO(options) },
591 { "no_utf_check", MOD_PD, MOD_OPT, PCRE2_NO_UTF_CHECK, PD(options) },
592 { "notbol", MOD_DAT, MOD_OPT, PCRE2_NOTBOL, DO(options) },
593 { "notempty", MOD_DAT, MOD_OPT, PCRE2_NOTEMPTY, DO(options) },
594 { "notempty_atstart", MOD_DAT, MOD_OPT, PCRE2_NOTEMPTY_ATSTART, DO(options) },
595 { "noteol", MOD_DAT, MOD_OPT, PCRE2_NOTEOL, DO(options) },
596 { "null_context", MOD_PD, MOD_CTL, CTL_NULLCONTEXT, PO(control) },
597 { "offset", MOD_DAT, MOD_INT, 0, DO(offset) },
598 { "offset_limit", MOD_CTM, MOD_SIZ, 0, MO(offset_limit)},
599 { "ovector", MOD_DAT, MOD_INT, 0, DO(oveccount) },
600 { "parens_nest_limit", MOD_CTC, MOD_INT, 0, CO(parens_nest_limit) },
601 { "partial_hard", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_HARD, DO(options) },
602 { "partial_soft", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_SOFT, DO(options) },
603 { "ph", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_HARD, DO(options) },
604 { "posix", MOD_PAT, MOD_CTL, CTL_POSIX, PO(control) },
605 { "posix_nosub", MOD_PAT, MOD_CTL, CTL_POSIX|CTL_POSIX_NOSUB, PO(control) },
606 { "ps", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_SOFT, DO(options) },
607 { "push", MOD_PAT, MOD_CTL, CTL_PUSH, PO(control) },
608 { "pushcopy", MOD_PAT, MOD_CTL, CTL_PUSHCOPY, PO(control) },
609 { "recursion_limit", MOD_CTM, MOD_INT, 0, MO(recursion_limit) },
610 { "regerror_buffsize", MOD_PAT, MOD_INT, 0, PO(regerror_buffsize) },
611 { "replace", MOD_PND, MOD_STR, REPLACE_MODSIZE, PO(replacement) },
612 { "stackguard", MOD_PAT, MOD_INT, 0, PO(stackguard_test) },
613 { "startchar", MOD_PND, MOD_CTL, CTL_STARTCHAR, PO(control) },
614 { "startoffset", MOD_DAT, MOD_INT, 0, DO(offset) },
615 { "substitute_extended", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_EXTENDED, PO(control2) },
616 { "substitute_overflow_length", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_OVERFLOW_LENGTH, PO(control2) },
617 { "substitute_unknown_unset", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_UNKNOWN_UNSET, PO(control2) },
618 { "substitute_unset_empty", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_UNSET_EMPTY, PO(control2) },
619 { "tables", MOD_PAT, MOD_INT, 0, PO(tables_id) },
620 { "ucp", MOD_PATP, MOD_OPT, PCRE2_UCP, PO(options) },
621 { "ungreedy", MOD_PAT, MOD_OPT, PCRE2_UNGREEDY, PO(options) },
622 { "use_offset_limit", MOD_PAT, MOD_OPT, PCRE2_USE_OFFSET_LIMIT, PO(options) },
623 { "utf", MOD_PATP, MOD_OPT, PCRE2_UTF, PO(options) },
624 { "zero_terminate", MOD_DAT, MOD_CTL, CTL_ZERO_TERMINATE, DO(control) }
625 };
626
627 #define MODLISTCOUNT sizeof(modlist)/sizeof(modstruct)
628
629 /* Controls and options that are supported for use with the POSIX interface. */
630
631 #define POSIX_SUPPORTED_COMPILE_OPTIONS ( \
632 PCRE2_CASELESS|PCRE2_DOTALL|PCRE2_MULTILINE|PCRE2_UCP|PCRE2_UTF| \
633 PCRE2_UNGREEDY)
634
635 #define POSIX_SUPPORTED_COMPILE_CONTROLS ( \
636 CTL_AFTERTEXT|CTL_ALLAFTERTEXT|CTL_EXPAND|CTL_POSIX|CTL_POSIX_NOSUB)
637
638 #define POSIX_SUPPORTED_COMPILE_CONTROLS2 (0)
639
640 #define POSIX_SUPPORTED_MATCH_OPTIONS ( \
641 PCRE2_NOTBOL|PCRE2_NOTEMPTY|PCRE2_NOTEOL)
642
643 #define POSIX_SUPPORTED_MATCH_CONTROLS (CTL_AFTERTEXT|CTL_ALLAFTERTEXT)
644 #define POSIX_SUPPORTED_MATCH_CONTROLS2 (0)
645
646 /* Control bits that are not ignored with 'push'. */
647
648 #define PUSH_SUPPORTED_COMPILE_CONTROLS ( \
649 CTL_BINCODE|CTL_CALLOUT_INFO|CTL_FULLBINCODE|CTL_HEXPAT|CTL_INFO| \
650 CTL_JITVERIFY|CTL_MEMORY|CTL_PUSH|CTL_PUSHCOPY|CTL_BSR_SET|CTL_NL_SET)
651
652 #define PUSH_SUPPORTED_COMPILE_CONTROLS2 (0)
653
654 /* Controls that apply only at compile time with 'push'. */
655
656 #define PUSH_COMPILE_ONLY_CONTROLS CTL_JITVERIFY
657 #define PUSH_COMPILE_ONLY_CONTROLS2 (0)
658
659 /* Controls that are forbidden with #pop or #popcopy. */
660
661 #define NOTPOP_CONTROLS (CTL_HEXPAT|CTL_POSIX|CTL_POSIX_NOSUB|CTL_PUSH| \
662 CTL_PUSHCOPY)
663
664 /* Pattern controls that are mutually exclusive. At present these are all in
665 the first control word. Note that CTL_POSIX_NOSUB is always accompanied by
666 CTL_POSIX, so it doesn't need its own entries. */
667
668 static uint32_t exclusive_pat_controls[] = {
669 CTL_POSIX | CTL_HEXPAT,
670 CTL_POSIX | CTL_PUSH,
671 CTL_POSIX | CTL_PUSHCOPY,
672 CTL_EXPAND | CTL_HEXPAT };
673
674 /* Data controls that are mutually exclusive. At present these are all in the
675 first control word. */
676 static uint32_t exclusive_dat_controls[] = {
677 CTL_ALLUSEDTEXT | CTL_STARTCHAR,
678 CTL_FINDLIMITS | CTL_NULLCONTEXT };
679
680 /* Table of single-character abbreviated modifiers. The index field is
681 initialized to -1, but the first time the modifier is encountered, it is filled
682 in with the index of the full entry in modlist, to save repeated searching when
683 processing multiple test items. This short list is searched serially, so its
684 order does not matter. */
685
686 typedef struct c1modstruct {
687 const char *fullname;
688 uint32_t onechar;
689 int index;
690 } c1modstruct;
691
692 static c1modstruct c1modlist[] = {
693 { "bincode", 'B', -1 },
694 { "info", 'I', -1 },
695 { "global", 'g', -1 },
696 { "caseless", 'i', -1 },
697 { "multiline", 'm', -1 },
698 { "dotall", 's', -1 },
699 { "extended", 'x', -1 }
700 };
701
702 #define C1MODLISTCOUNT sizeof(c1modlist)/sizeof(c1modstruct)
703
704 /* Table of arguments for the -C command line option. Use macros to make the
705 table itself easier to read. */
706
707 #if defined SUPPORT_PCRE2_8
708 #define SUPPORT_8 1
709 #endif
710 #if defined SUPPORT_PCRE2_16
711 #define SUPPORT_16 1
712 #endif
713 #if defined SUPPORT_PCRE2_32
714 #define SUPPORT_32 1
715 #endif
716
717 #ifndef SUPPORT_8
718 #define SUPPORT_8 0
719 #endif
720 #ifndef SUPPORT_16
721 #define SUPPORT_16 0
722 #endif
723 #ifndef SUPPORT_32
724 #define SUPPORT_32 0
725 #endif
726
727 #ifdef EBCDIC
728 #define SUPPORT_EBCDIC 1
729 #define EBCDIC_NL CHAR_LF
730 #else
731 #define SUPPORT_EBCDIC 0
732 #define EBCDIC_NL 0
733 #endif
734
735 #ifdef NEVER_BACKSLASH_C
736 #define BACKSLASH_C 0
737 #else
738 #define BACKSLASH_C 1
739 #endif
740
741 typedef struct coptstruct {
742 const char *name;
743 uint32_t type;
744 uint32_t value;
745 } coptstruct;
746
747 enum { CONF_BSR,
748 CONF_FIX,
749 CONF_FIZ,
750 CONF_INT,
751 CONF_NL
752 };
753
754 static coptstruct coptlist[] = {
755 { "backslash-C", CONF_FIX, BACKSLASH_C },
756 { "bsr", CONF_BSR, PCRE2_CONFIG_BSR },
757 { "ebcdic", CONF_FIX, SUPPORT_EBCDIC },
758 { "ebcdic-nl", CONF_FIZ, EBCDIC_NL },
759 { "jit", CONF_INT, PCRE2_CONFIG_JIT },
760 { "linksize", CONF_INT, PCRE2_CONFIG_LINKSIZE },
761 { "newline", CONF_NL, PCRE2_CONFIG_NEWLINE },
762 { "pcre2-16", CONF_FIX, SUPPORT_16 },
763 { "pcre2-32", CONF_FIX, SUPPORT_32 },
764 { "pcre2-8", CONF_FIX, SUPPORT_8 },
765 { "unicode", CONF_INT, PCRE2_CONFIG_UNICODE }
766 };
767
768 #define COPTLISTCOUNT sizeof(coptlist)/sizeof(coptstruct)
769
770 #undef SUPPORT_8
771 #undef SUPPORT_16
772 #undef SUPPORT_32
773 #undef SUPPORT_EBCDIC
774
775
776 /* ----------------------- Static variables ------------------------ */
777
778 static FILE *infile;
779 static FILE *outfile;
780
781 static const void *last_callout_mark;
782 static PCRE2_JIT_STACK *jit_stack = NULL;
783 static size_t jit_stack_size = 0;
784
785 static BOOL first_callout;
786 static BOOL jit_was_used;
787 static BOOL restrict_for_perl_test = FALSE;
788 static BOOL show_memory = FALSE;
789
790 static int code_unit_size; /* Bytes */
791 static int jitrc; /* Return from JIT compile */
792 static int test_mode = DEFAULT_TEST_MODE;
793 static int timeit = 0;
794 static int timeitm = 0;
795
796 clock_t total_compile_time = 0;
797 clock_t total_jit_compile_time = 0;
798 clock_t total_match_time = 0;
799
800 static uint32_t dfa_matched;
801 static uint32_t forbid_utf = 0;
802 static uint32_t maxlookbehind;
803 static uint32_t max_oveccount;
804 static uint32_t callout_count;
805
806 static uint16_t local_newline_default = 0;
807
808 static VERSION_TYPE jittarget[VERSION_SIZE];
809 static VERSION_TYPE version[VERSION_SIZE];
810 static VERSION_TYPE uversion[VERSION_SIZE];
811
812 static patctl def_patctl;
813 static patctl pat_patctl;
814 static datctl def_datctl;
815 static datctl dat_datctl;
816
817 static void *patstack[PATSTACKSIZE];
818 static int patstacknext = 0;
819
820 #ifdef SUPPORT_PCRE2_8
821 static regex_t preg = { NULL, NULL, 0, 0, 0 };
822 #endif
823
824 static int *dfa_workspace = NULL;
825 static const uint8_t *locale_tables = NULL;
826 static uint8_t locale_name[32];
827
828 /* We need buffers for building 16/32-bit strings; 8-bit strings don't need
829 rebuilding, but set up the same naming scheme for use in macros. The "buffer"
830 buffer is where all input lines are read. Its size is the same as pbuffer8.
831 Pattern lines are always copied to pbuffer8 for use in callouts, even if they
832 are actually compiled from pbuffer16 or pbuffer32. */
833
834 static size_t pbuffer8_size = 50000; /* Initial size, bytes */
835 static uint8_t *pbuffer8 = NULL;
836 static uint8_t *buffer = NULL;
837
838 /* The dbuffer is where all processed data lines are put. In non-8-bit modes it
839 is cast as needed. For long data lines it grows as necessary. */
840
841 static size_t dbuffer_size = 1u << 14; /* Initial size, bytes */
842 static uint8_t *dbuffer = NULL;
843
844
845 /* ---------------- Mode-dependent variables -------------------*/
846
847 #ifdef SUPPORT_PCRE2_8
848 static pcre2_code_8 *compiled_code8;
849 static pcre2_general_context_8 *general_context8, *general_context_copy8;
850 static pcre2_compile_context_8 *pat_context8, *default_pat_context8;
851 static pcre2_match_context_8 *dat_context8, *default_dat_context8;
852 static pcre2_match_data_8 *match_data8;
853 #endif
854
855 #ifdef SUPPORT_PCRE2_16
856 static pcre2_code_16 *compiled_code16;
857 static pcre2_general_context_16 *general_context16, *general_context_copy16;
858 static pcre2_compile_context_16 *pat_context16, *default_pat_context16;
859 static pcre2_match_context_16 *dat_context16, *default_dat_context16;
860 static pcre2_match_data_16 *match_data16;
861 static PCRE2_SIZE pbuffer16_size = 0; /* Set only when needed */
862 static uint16_t *pbuffer16 = NULL;
863 #endif
864
865 #ifdef SUPPORT_PCRE2_32
866 static pcre2_code_32 *compiled_code32;
867 static pcre2_general_context_32 *general_context32, *general_context_copy32;
868 static pcre2_compile_context_32 *pat_context32, *default_pat_context32;
869 static pcre2_match_context_32 *dat_context32, *default_dat_context32;
870 static pcre2_match_data_32 *match_data32;
871 static PCRE2_SIZE pbuffer32_size = 0; /* Set only when needed */
872 static uint32_t *pbuffer32 = NULL;
873 #endif
874
875
876 /* ---------------- Macros that work in all modes ----------------- */
877
878 #define CAST8VAR(x) CASTVAR(uint8_t *, x)
879 #define SET(x,y) SETOP(x,y,=)
880 #define SETPLUS(x,y) SETOP(x,y,+=)
881 #define strlen8(x) strlen((char *)x)
882
883
884 /* ---------------- Mode-dependent, runtime-testing macros ------------------*/
885
886 /* Define macros for variables and functions that must be selected dynamically
887 depending on the mode setting (8, 16, 32). These are dependent on which modes
888 are supported. */
889
890 #if (defined (SUPPORT_PCRE2_8) + defined (SUPPORT_PCRE2_16) + \
891 defined (SUPPORT_PCRE2_32)) >= 2
892
893 /* ----- All three modes supported ----- */
894
895 #if defined(SUPPORT_PCRE2_8) && defined(SUPPORT_PCRE2_16) && defined(SUPPORT_PCRE2_32)
896
897 #define CASTFLD(t,a,b) ((test_mode == PCRE8_MODE)? (t)(G(a,8)->b) : \
898 (test_mode == PCRE16_MODE)? (t)(G(a,16)->b) : (t)(G(a,32)->b))
899
900 #define CASTVAR(t,x) ( \
901 (test_mode == PCRE8_MODE)? (t)G(x,8) : \
902 (test_mode == PCRE16_MODE)? (t)G(x,16) : (t)G(x,32))
903
904 #define CODE_UNIT(a,b) ( \
905 (test_mode == PCRE8_MODE)? (uint32_t)(((PCRE2_SPTR8)(a))[b]) : \
906 (test_mode == PCRE16_MODE)? (uint32_t)(((PCRE2_SPTR16)(a))[b]) : \
907 (uint32_t)(((PCRE2_SPTR32)(a))[b]))
908
909 #define DATCTXCPY(a,b) \
910 if (test_mode == PCRE8_MODE) \
911 memcpy(G(a,8),G(b,8),sizeof(pcre2_match_context_8)); \
912 else if (test_mode == PCRE16_MODE) \
913 memcpy(G(a,16),G(b,16),sizeof(pcre2_match_context_16)); \
914 else memcpy(G(a,32),G(b,32),sizeof(pcre2_match_context_32))
915
916 #define FLD(a,b) ((test_mode == PCRE8_MODE)? G(a,8)->b : \
917 (test_mode == PCRE16_MODE)? G(a,16)->b : G(a,32)->b)
918
919 #define PATCTXCPY(a,b) \
920 if (test_mode == PCRE8_MODE) \
921 memcpy(G(a,8),G(b,8),sizeof(pcre2_compile_context_8)); \
922 else if (test_mode == PCRE16_MODE) \
923 memcpy(G(a,16),G(b,16),sizeof(pcre2_compile_context_16)); \
924 else memcpy(G(a,32),G(b,32),sizeof(pcre2_compile_context_32))
925
926 #define PCHARS(lv, p, offset, len, utf, f) \
927 if (test_mode == PCRE32_MODE) \
928 lv = pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f); \
929 else if (test_mode == PCRE16_MODE) \
930 lv = pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f); \
931 else \
932 lv = pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
933
934 #define PCHARSV(p, offset, len, utf, f) \
935 if (test_mode == PCRE32_MODE) \
936 (void)pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f); \
937 else if (test_mode == PCRE16_MODE) \
938 (void)pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f); \
939 else \
940 (void)pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
941
942 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
943 if (test_mode == PCRE8_MODE) \
944 a = pcre2_callout_enumerate_8(compiled_code8, \
945 (int (*)(struct pcre2_callout_enumerate_block_8 *, void *))b,c); \
946 else if (test_mode == PCRE16_MODE) \
947 a = pcre2_callout_enumerate_16(compiled_code16, \
948 (int(*)(struct pcre2_callout_enumerate_block_16 *, void *))b,c); \
949 else \
950 a = pcre2_callout_enumerate_32(compiled_code32, \
951 (int (*)(struct pcre2_callout_enumerate_block_32 *, void *))b,c)
952
953 #define PCRE2_CODE_COPY_FROM_VOID(a,b) \
954 if (test_mode == PCRE8_MODE) \
955 G(a,8) = pcre2_code_copy_8(b); \
956 else if (test_mode == PCRE16_MODE) \
957 G(a,16) = pcre2_code_copy_16(b); \
958 else \
959 G(a,32) = pcre2_code_copy_32(b)
960
961 #define PCRE2_CODE_COPY_TO_VOID(a,b) \
962 if (test_mode == PCRE8_MODE) \
963 a = (void *)pcre2_code_copy_8(G(b,8)); \
964 else if (test_mode == PCRE16_MODE) \
965 a = (void *)pcre2_code_copy_16(G(b,16)); \
966 else \
967 a = (void *)pcre2_code_copy_32(G(b,32))
968
969 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
970 if (test_mode == PCRE8_MODE) \
971 G(a,8) = pcre2_compile_8(G(b,8),c,d,e,f,g); \
972 else if (test_mode == PCRE16_MODE) \
973 G(a,16) = pcre2_compile_16(G(b,16),c,d,e,f,g); \
974 else \
975 G(a,32) = pcre2_compile_32(G(b,32),c,d,e,f,g)
976
977 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
978 if (test_mode == PCRE8_MODE) \
979 a = pcre2_dfa_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h,i,j); \
980 else if (test_mode == PCRE16_MODE) \
981 a = pcre2_dfa_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h,i,j); \
982 else \
983 a = pcre2_dfa_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h,i,j)
984
985 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
986 if (test_mode == PCRE8_MODE) \
987 r = pcre2_get_error_message_8(a,G(b,8),G(G(b,8),_size)); \
988 else if (test_mode == PCRE16_MODE) \
989 r = pcre2_get_error_message_16(a,G(b,16),G(G(b,16),_size)); \
990 else \
991 r = pcre2_get_error_message_32(a,G(b,32),G(G(b,32),_size))
992
993 #define PCRE2_GET_OVECTOR_COUNT(a,b) \
994 if (test_mode == PCRE8_MODE) \
995 a = pcre2_get_ovector_count_8(G(b,8)); \
996 else if (test_mode == PCRE16_MODE) \
997 a = pcre2_get_ovector_count_16(G(b,16)); \
998 else \
999 a = pcre2_get_ovector_count_32(G(b,32))
1000
1001 #define PCRE2_GET_STARTCHAR(a,b) \
1002 if (test_mode == PCRE8_MODE) \
1003 a = pcre2_get_startchar_8(G(b,8)); \
1004 else if (test_mode == PCRE16_MODE) \
1005 a = pcre2_get_startchar_16(G(b,16)); \
1006 else \
1007 a = pcre2_get_startchar_32(G(b,32))
1008
1009 #define PCRE2_JIT_COMPILE(r,a,b) \
1010 if (test_mode == PCRE8_MODE) r = pcre2_jit_compile_8(G(a,8),b); \
1011 else if (test_mode == PCRE16_MODE) r = pcre2_jit_compile_16(G(a,16),b); \
1012 else r = pcre2_jit_compile_32(G(a,32),b)
1013
1014 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) \
1015 if (test_mode == PCRE8_MODE) pcre2_jit_free_unused_memory_8(G(a,8)); \
1016 else if (test_mode == PCRE16_MODE) pcre2_jit_free_unused_memory_16(G(a,16)); \
1017 else pcre2_jit_free_unused_memory_32(G(a,32))
1018
1019 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
1020 if (test_mode == PCRE8_MODE) \
1021 a = pcre2_jit_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h); \
1022 else if (test_mode == PCRE16_MODE) \
1023 a = pcre2_jit_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h); \
1024 else \
1025 a = pcre2_jit_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
1026
1027 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
1028 if (test_mode == PCRE8_MODE) \
1029 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_8(b,c,d); \
1030 else if (test_mode == PCRE16_MODE) \
1031 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_16(b,c,d); \
1032 else \
1033 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_32(b,c,d);
1034
1035 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
1036 if (test_mode == PCRE8_MODE) \
1037 pcre2_jit_stack_assign_8(G(a,8),(pcre2_jit_callback_8)b,c); \
1038 else if (test_mode == PCRE16_MODE) \
1039 pcre2_jit_stack_assign_16(G(a,16),(pcre2_jit_callback_16)b,c); \
1040 else \
1041 pcre2_jit_stack_assign_32(G(a,32),(pcre2_jit_callback_32)b,c);
1042
1043 #define PCRE2_JIT_STACK_FREE(a) \
1044 if (test_mode == PCRE8_MODE) \
1045 pcre2_jit_stack_free_8((pcre2_jit_stack_8 *)a); \
1046 else if (test_mode == PCRE16_MODE) \
1047 pcre2_jit_stack_free_16((pcre2_jit_stack_16 *)a); \
1048 else \
1049 pcre2_jit_stack_free_32((pcre2_jit_stack_32 *)a);
1050
1051 #define PCRE2_MAKETABLES(a) \
1052 if (test_mode == PCRE8_MODE) a = pcre2_maketables_8(NULL); \
1053 else if (test_mode == PCRE16_MODE) a = pcre2_maketables_16(NULL); \
1054 else a = pcre2_maketables_32(NULL)
1055
1056 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
1057 if (test_mode == PCRE8_MODE) \
1058 a = pcre2_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h); \
1059 else if (test_mode == PCRE16_MODE) \
1060 a = pcre2_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h); \
1061 else \
1062 a = pcre2_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
1063
1064 #define PCRE2_MATCH_DATA_CREATE(a,b,c) \
1065 if (test_mode == PCRE8_MODE) \
1066 G(a,8) = pcre2_match_data_create_8(b,c); \
1067 else if (test_mode == PCRE16_MODE) \
1068 G(a,16) = pcre2_match_data_create_16(b,c); \
1069 else \
1070 G(a,32) = pcre2_match_data_create_32(b,c)
1071
1072 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
1073 if (test_mode == PCRE8_MODE) \
1074 G(a,8) = pcre2_match_data_create_from_pattern_8(G(b,8),c); \
1075 else if (test_mode == PCRE16_MODE) \
1076 G(a,16) = pcre2_match_data_create_from_pattern_16(G(b,16),c); \
1077 else \
1078 G(a,32) = pcre2_match_data_create_from_pattern_32(G(b,32),c)
1079
1080 #define PCRE2_MATCH_DATA_FREE(a) \
1081 if (test_mode == PCRE8_MODE) \
1082 pcre2_match_data_free_8(G(a,8)); \
1083 else if (test_mode == PCRE16_MODE) \
1084 pcre2_match_data_free_16(G(a,16)); \
1085 else \
1086 pcre2_match_data_free_32(G(a,32))
1087
1088 #define PCRE2_PATTERN_INFO(a,b,c,d) \
1089 if (test_mode == PCRE8_MODE) \
1090 a = pcre2_pattern_info_8(G(b,8),c,d); \
1091 else if (test_mode == PCRE16_MODE) \
1092 a = pcre2_pattern_info_16(G(b,16),c,d); \
1093 else \
1094 a = pcre2_pattern_info_32(G(b,32),c,d)
1095
1096 #define PCRE2_PRINTINT(a) \
1097 if (test_mode == PCRE8_MODE) \
1098 pcre2_printint_8(compiled_code8,outfile,a); \
1099 else if (test_mode == PCRE16_MODE) \
1100 pcre2_printint_16(compiled_code16,outfile,a); \
1101 else \
1102 pcre2_printint_32(compiled_code32,outfile,a)
1103
1104 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
1105 if (test_mode == PCRE8_MODE) \
1106 r = pcre2_serialize_decode_8((pcre2_code_8 **)a,b,c,G(d,8)); \
1107 else if (test_mode == PCRE16_MODE) \
1108 r = pcre2_serialize_decode_16((pcre2_code_16 **)a,b,c,G(d,16)); \
1109 else \
1110 r = pcre2_serialize_decode_32((pcre2_code_32 **)a,b,c,G(d,32))
1111
1112 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
1113 if (test_mode == PCRE8_MODE) \
1114 r = pcre2_serialize_encode_8((const pcre2_code_8 **)a,b,c,d,G(e,8)); \
1115 else if (test_mode == PCRE16_MODE) \
1116 r = pcre2_serialize_encode_16((const pcre2_code_16 **)a,b,c,d,G(e,16)); \
1117 else \
1118 r = pcre2_serialize_encode_32((const pcre2_code_32 **)a,b,c,d,G(e,32))
1119
1120 #define PCRE2_SERIALIZE_FREE(a) \
1121 if (test_mode == PCRE8_MODE) \
1122 pcre2_serialize_free_8(a); \
1123 else if (test_mode == PCRE16_MODE) \
1124 pcre2_serialize_free_16(a); \
1125 else \
1126 pcre2_serialize_free_32(a)
1127
1128 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
1129 if (test_mode == PCRE8_MODE) \
1130 r = pcre2_serialize_get_number_of_codes_8(a); \
1131 else if (test_mode == PCRE16_MODE) \
1132 r = pcre2_serialize_get_number_of_codes_16(a); \
1133 else \
1134 r = pcre2_serialize_get_number_of_codes_32(a); \
1135
1136 #define PCRE2_SET_CALLOUT(a,b,c) \
1137 if (test_mode == PCRE8_MODE) \
1138 pcre2_set_callout_8(G(a,8),(int (*)(pcre2_callout_block_8 *, void *))b,c); \
1139 else if (test_mode == PCRE16_MODE) \
1140 pcre2_set_callout_16(G(a,16),(int (*)(pcre2_callout_block_16 *, void *))b,c); \
1141 else \
1142 pcre2_set_callout_32(G(a,32),(int (*)(pcre2_callout_block_32 *, void *))b,c);
1143
1144 #define PCRE2_SET_CHARACTER_TABLES(a,b) \
1145 if (test_mode == PCRE8_MODE) \
1146 pcre2_set_character_tables_8(G(a,8),b); \
1147 else if (test_mode == PCRE16_MODE) \
1148 pcre2_set_character_tables_16(G(a,16),b); \
1149 else \
1150 pcre2_set_character_tables_32(G(a,32),b)
1151
1152 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
1153 if (test_mode == PCRE8_MODE) \
1154 pcre2_set_compile_recursion_guard_8(G(a,8),b,c); \
1155 else if (test_mode == PCRE16_MODE) \
1156 pcre2_set_compile_recursion_guard_16(G(a,16),b,c); \
1157 else \
1158 pcre2_set_compile_recursion_guard_32(G(a,32),b,c)
1159
1160 #define PCRE2_SET_MATCH_LIMIT(a,b) \
1161 if (test_mode == PCRE8_MODE) \
1162 pcre2_set_match_limit_8(G(a,8),b); \
1163 else if (test_mode == PCRE16_MODE) \
1164 pcre2_set_match_limit_16(G(a,16),b); \
1165 else \
1166 pcre2_set_match_limit_32(G(a,32),b)
1167
1168 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) \
1169 if (test_mode == PCRE8_MODE) \
1170 pcre2_set_max_pattern_length_8(G(a,8),b); \
1171 else if (test_mode == PCRE16_MODE) \
1172 pcre2_set_max_pattern_length_16(G(a,16),b); \
1173 else \
1174 pcre2_set_max_pattern_length_32(G(a,32),b)
1175
1176 #define PCRE2_SET_OFFSET_LIMIT(a,b) \
1177 if (test_mode == PCRE8_MODE) \
1178 pcre2_set_offset_limit_8(G(a,8),b); \
1179 else if (test_mode == PCRE16_MODE) \
1180 pcre2_set_offset_limit_16(G(a,16),b); \
1181 else \
1182 pcre2_set_offset_limit_32(G(a,32),b)
1183
1184 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) \
1185 if (test_mode == PCRE8_MODE) \
1186 pcre2_set_parens_nest_limit_8(G(a,8),b); \
1187 else if (test_mode == PCRE16_MODE) \
1188 pcre2_set_parens_nest_limit_16(G(a,16),b); \
1189 else \
1190 pcre2_set_parens_nest_limit_32(G(a,32),b)
1191
1192 #define PCRE2_SET_RECURSION_LIMIT(a,b) \
1193 if (test_mode == PCRE8_MODE) \
1194 pcre2_set_recursion_limit_8(G(a,8),b); \
1195 else if (test_mode == PCRE16_MODE) \
1196 pcre2_set_recursion_limit_16(G(a,16),b); \
1197 else \
1198 pcre2_set_recursion_limit_32(G(a,32),b)
1199
1200 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
1201 if (test_mode == PCRE8_MODE) \
1202 a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),G(h,8), \
1203 (PCRE2_SPTR8)i,j,(PCRE2_UCHAR8 *)k,l); \
1204 else if (test_mode == PCRE16_MODE) \
1205 a = pcre2_substitute_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),G(h,16), \
1206 (PCRE2_SPTR16)i,j,(PCRE2_UCHAR16 *)k,l); \
1207 else \
1208 a = pcre2_substitute_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),G(h,32), \
1209 (PCRE2_SPTR32)i,j,(PCRE2_UCHAR32 *)k,l)
1210
1211 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
1212 if (test_mode == PCRE8_MODE) \
1213 a = pcre2_substring_copy_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 *)d,e); \
1214 else if (test_mode == PCRE16_MODE) \
1215 a = pcre2_substring_copy_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 *)d,e); \
1216 else \
1217 a = pcre2_substring_copy_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 *)d,e)
1218
1219 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
1220 if (test_mode == PCRE8_MODE) \
1221 a = pcre2_substring_copy_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 *)d,e); \
1222 else if (test_mode == PCRE16_MODE) \
1223 a = pcre2_substring_copy_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 *)d,e); \
1224 else \
1225 a = pcre2_substring_copy_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 *)d,e)
1226
1227 #define PCRE2_SUBSTRING_FREE(a) \
1228 if (test_mode == PCRE8_MODE) pcre2_substring_free_8((PCRE2_UCHAR8 *)a); \
1229 else if (test_mode == PCRE16_MODE) \
1230 pcre2_substring_free_16((PCRE2_UCHAR16 *)a); \
1231 else pcre2_substring_free_32((PCRE2_UCHAR32 *)a)
1232
1233 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
1234 if (test_mode == PCRE8_MODE) \
1235 a = pcre2_substring_get_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 **)d,e); \
1236 else if (test_mode == PCRE16_MODE) \
1237 a = pcre2_substring_get_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 **)d,e); \
1238 else \
1239 a = pcre2_substring_get_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 **)d,e)
1240
1241 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
1242 if (test_mode == PCRE8_MODE) \
1243 a = pcre2_substring_get_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 **)d,e); \
1244 else if (test_mode == PCRE16_MODE) \
1245 a = pcre2_substring_get_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 **)d,e); \
1246 else \
1247 a = pcre2_substring_get_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 **)d,e)
1248
1249 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
1250 if (test_mode == PCRE8_MODE) \
1251 a = pcre2_substring_length_byname_8(G(b,8),G(c,8),d); \
1252 else if (test_mode == PCRE16_MODE) \
1253 a = pcre2_substring_length_byname_16(G(b,16),G(c,16),d); \
1254 else \
1255 a = pcre2_substring_length_byname_32(G(b,32),G(c,32),d)
1256
1257 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
1258 if (test_mode == PCRE8_MODE) \
1259 a = pcre2_substring_length_bynumber_8(G(b,8),c,d); \
1260 else if (test_mode == PCRE16_MODE) \
1261 a = pcre2_substring_length_bynumber_16(G(b,16),c,d); \
1262 else \
1263 a = pcre2_substring_length_bynumber_32(G(b,32),c,d)
1264
1265 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
1266 if (test_mode == PCRE8_MODE) \
1267 a = pcre2_substring_list_get_8(G(b,8),(PCRE2_UCHAR8 ***)c,d); \
1268 else if (test_mode == PCRE16_MODE) \
1269 a = pcre2_substring_list_get_16(G(b,16),(PCRE2_UCHAR16 ***)c,d); \
1270 else \
1271 a = pcre2_substring_list_get_32(G(b,32),(PCRE2_UCHAR32 ***)c,d)
1272
1273 #define PCRE2_SUBSTRING_LIST_FREE(a) \
1274 if (test_mode == PCRE8_MODE) \
1275 pcre2_substring_list_free_8((PCRE2_SPTR8 *)a); \
1276 else if (test_mode == PCRE16_MODE) \
1277 pcre2_substring_list_free_16((PCRE2_SPTR16 *)a); \
1278 else \
1279 pcre2_substring_list_free_32((PCRE2_SPTR32 *)a)
1280
1281 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
1282 if (test_mode == PCRE8_MODE) \
1283 a = pcre2_substring_number_from_name_8(G(b,8),G(c,8)); \
1284 else if (test_mode == PCRE16_MODE) \
1285 a = pcre2_substring_number_from_name_16(G(b,16),G(c,16)); \
1286 else \
1287 a = pcre2_substring_number_from_name_32(G(b,32),G(c,32))
1288
1289 #define PTR(x) ( \
1290 (test_mode == PCRE8_MODE)? (void *)G(x,8) : \
1291 (test_mode == PCRE16_MODE)? (void *)G(x,16) : \
1292 (void *)G(x,32))
1293
1294 #define SETFLD(x,y,z) \
1295 if (test_mode == PCRE8_MODE) G(x,8)->y = z; \
1296 else if (test_mode == PCRE16_MODE) G(x,16)->y = z; \
1297 else G(x,32)->y = z
1298
1299 #define SETFLDVEC(x,y,v,z) \
1300 if (test_mode == PCRE8_MODE) G(x,8)->y[v] = z; \
1301 else if (test_mode == PCRE16_MODE) G(x,16)->y[v] = z; \
1302 else G(x,32)->y[v] = z
1303
1304 #define SETOP(x,y,z) \
1305 if (test_mode == PCRE8_MODE) G(x,8) z y; \
1306 else if (test_mode == PCRE16_MODE) G(x,16) z y; \
1307 else G(x,32) z y
1308
1309 #define SETCASTPTR(x,y) \
1310 if (test_mode == PCRE8_MODE) \
1311 G(x,8) = (uint8_t *)(y); \
1312 else if (test_mode == PCRE16_MODE) \
1313 G(x,16) = (uint16_t *)(y); \
1314 else \
1315 G(x,32) = (uint32_t *)(y)
1316
1317 #define STRLEN(p) ((test_mode == PCRE8_MODE)? ((int)strlen((char *)p)) : \
1318 (test_mode == PCRE16_MODE)? ((int)strlen16((PCRE2_SPTR16)p)) : \
1319 ((int)strlen32((PCRE2_SPTR32)p)))
1320
1321 #define SUB1(a,b) \
1322 if (test_mode == PCRE8_MODE) G(a,8)(G(b,8)); \
1323 else if (test_mode == PCRE16_MODE) G(a,16)(G(b,16)); \
1324 else G(a,32)(G(b,32))
1325
1326 #define SUB2(a,b,c) \
1327 if (test_mode == PCRE8_MODE) G(a,8)(G(b,8),G(c,8)); \
1328 else if (test_mode == PCRE16_MODE) G(a,16)(G(b,16),G(c,16)); \
1329 else G(a,32)(G(b,32),G(c,32))
1330
1331 #define TEST(x,r,y) ( \
1332 (test_mode == PCRE8_MODE && G(x,8) r (y)) || \
1333 (test_mode == PCRE16_MODE && G(x,16) r (y)) || \
1334 (test_mode == PCRE32_MODE && G(x,32) r (y)))
1335
1336 #define TESTFLD(x,f,r,y) ( \
1337 (test_mode == PCRE8_MODE && G(x,8)->f r (y)) || \
1338 (test_mode == PCRE16_MODE && G(x,16)->f r (y)) || \
1339 (test_mode == PCRE32_MODE && G(x,32)->f r (y)))
1340
1341
1342
1343 /* ----- Two out of three modes are supported ----- */
1344
1345 #else
1346
1347 /* We can use some macro trickery to make a single set of definitions work in
1348 the three different cases. */
1349
1350 /* ----- 32-bit and 16-bit but not 8-bit supported ----- */
1351
1352 #if defined(SUPPORT_PCRE2_32) && defined(SUPPORT_PCRE2_16)
1353 #define BITONE 32
1354 #define BITTWO 16
1355
1356 /* ----- 32-bit and 8-bit but not 16-bit supported ----- */
1357
1358 #elif defined(SUPPORT_PCRE2_32) && defined(SUPPORT_PCRE2_8)
1359 #define BITONE 32
1360 #define BITTWO 8
1361
1362 /* ----- 16-bit and 8-bit but not 32-bit supported ----- */
1363
1364 #else
1365 #define BITONE 16
1366 #define BITTWO 8
1367 #endif
1368
1369
1370 /* ----- Common macros for two-mode cases ----- */
1371
1372 #define CASTFLD(t,a,b) \
1373 ((test_mode == G(G(PCRE,BITONE),_MODE))? (t)(G(a,BITONE)->b) : \
1374 (t)(G(a,BITTWO)->b))
1375
1376 #define CASTVAR(t,x) ( \
1377 (test_mode == G(G(PCRE,BITONE),_MODE))? \
1378 (t)G(x,BITONE) : (t)G(x,BITTWO))
1379
1380 #define CODE_UNIT(a,b) ( \
1381 (test_mode == G(G(PCRE,BITONE),_MODE))? \
1382 (uint32_t)(((G(PCRE2_SPTR,BITONE))(a))[b]) : \
1383 (uint32_t)(((G(PCRE2_SPTR,BITTWO))(a))[b]))
1384
1385 #define DATCTXCPY(a,b) \
1386 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1387 memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_match_context_,BITONE))); \
1388 else \
1389 memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_match_context_,BITTWO)))
1390
1391 #define FLD(a,b) \
1392 ((test_mode == G(G(PCRE,BITONE),_MODE))? G(a,BITONE)->b : G(a,BITTWO)->b)
1393
1394 #define PATCTXCPY(a,b) \
1395 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1396 memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_compile_context_,BITONE))); \
1397 else \
1398 memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_compile_context_,BITTWO)))
1399
1400 #define PCHARS(lv, p, offset, len, utf, f) \
1401 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1402 lv = G(pchars,BITONE)((G(PCRE2_SPTR,BITONE))(p)+offset, len, utf, f); \
1403 else \
1404 lv = G(pchars,BITTWO)((G(PCRE2_SPTR,BITTWO))(p)+offset, len, utf, f)
1405
1406 #define PCHARSV(p, offset, len, utf, f) \
1407 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1408 (void)G(pchars,BITONE)((G(PCRE2_SPTR,BITONE))(p)+offset, len, utf, f); \
1409 else \
1410 (void)G(pchars,BITTWO)((G(PCRE2_SPTR,BITTWO))(p)+offset, len, utf, f)
1411
1412 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
1413 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1414 a = G(pcre2_callout_enumerate,BITONE)(G(compiled_code,BITONE), \
1415 (int (*)(struct G(pcre2_callout_enumerate_block_,BITONE) *, void *))b,c); \
1416 else \
1417 a = G(pcre2_callout_enumerate,BITTWO)(G(compiled_code,BITTWO), \
1418 (int (*)(struct G(pcre2_callout_enumerate_block_,BITTWO) *, void *))b,c)
1419
1420 #define PCRE2_CODE_COPY_FROM_VOID(a,b) \
1421 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1422 G(a,BITONE) = G(pcre2_code_copy_,BITONE)(b); \
1423 else \
1424 G(a,BITTWO) = G(pcre2_code_copy_,BITTWO)(b)
1425
1426 #define PCRE2_CODE_COPY_TO_VOID(a,b) \
1427 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1428 a = (void *)G(pcre2_code_copy_,BITONE)(G(b,BITONE)); \
1429 else \
1430 a = (void *)G(pcre2_code_copy_,BITTWO)(G(b,BITTWO))
1431
1432 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
1433 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1434 G(a,BITONE) = G(pcre2_compile_,BITONE)(G(b,BITONE),c,d,e,f,g); \
1435 else \
1436 G(a,BITTWO) = G(pcre2_compile_,BITTWO)(G(b,BITTWO),c,d,e,f,g)
1437
1438 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
1439 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1440 a = G(pcre2_dfa_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1441 G(g,BITONE),h,i,j); \
1442 else \
1443 a = G(pcre2_dfa_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1444 G(g,BITTWO),h,i,j)
1445
1446 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
1447 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1448 r = G(pcre2_get_error_message_,BITONE)(a,G(b,BITONE),G(G(b,BITONE),_size)); \
1449 else \
1450 r = G(pcre2_get_error_message_,BITTWO)(a,G(b,BITTWO),G(G(b,BITTWO),_size))
1451
1452 #define PCRE2_GET_OVECTOR_COUNT(a,b) \
1453 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1454 a = G(pcre2_get_ovector_count_,BITONE)(G(b,BITONE)); \
1455 else \
1456 a = G(pcre2_get_ovector_count_,BITTWO)(G(b,BITTWO))
1457
1458 #define PCRE2_GET_STARTCHAR(a,b) \
1459 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1460 a = G(pcre2_get_startchar_,BITONE)(G(b,BITONE)); \
1461 else \
1462 a = G(pcre2_get_startchar_,BITTWO)(G(b,BITTWO))
1463
1464 #define PCRE2_JIT_COMPILE(r,a,b) \
1465 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1466 r = G(pcre2_jit_compile_,BITONE)(G(a,BITONE),b); \
1467 else \
1468 r = G(pcre2_jit_compile_,BITTWO)(G(a,BITTWO),b)
1469
1470 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) \
1471 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1472 G(pcre2_jit_free_unused_memory_,BITONE)(G(a,BITONE)); \
1473 else \
1474 G(pcre2_jit_free_unused_memory_,BITTWO)(G(a,BITTWO))
1475
1476 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
1477 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1478 a = G(pcre2_jit_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1479 G(g,BITONE),h); \
1480 else \
1481 a = G(pcre2_jit_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1482 G(g,BITTWO),h)
1483
1484 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
1485 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1486 a = (PCRE2_JIT_STACK *)G(pcre2_jit_stack_create_,BITONE)(b,c,d); \
1487 else \
1488 a = (PCRE2_JIT_STACK *)G(pcre2_jit_stack_create_,BITTWO)(b,c,d); \
1489
1490 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
1491 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1492 G(pcre2_jit_stack_assign_,BITONE)(G(a,BITONE),(G(pcre2_jit_callback_,BITONE))b,c); \
1493 else \
1494 G(pcre2_jit_stack_assign_,BITTWO)(G(a,BITTWO),(G(pcre2_jit_callback_,BITTWO))b,c);
1495
1496 #define PCRE2_JIT_STACK_FREE(a) \
1497 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1498 G(pcre2_jit_stack_free_,BITONE)((G(pcre2_jit_stack_,BITONE) *)a); \
1499 else \
1500 G(pcre2_jit_stack_free_,BITTWO)((G(pcre2_jit_stack_,BITTWO) *)a);
1501
1502 #define PCRE2_MAKETABLES(a) \
1503 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1504 a = G(pcre2_maketables_,BITONE)(NULL); \
1505 else \
1506 a = G(pcre2_maketables_,BITTWO)(NULL)
1507
1508 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
1509 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1510 a = G(pcre2_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1511 G(g,BITONE),h); \
1512 else \
1513 a = G(pcre2_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1514 G(g,BITTWO),h)
1515
1516 #define PCRE2_MATCH_DATA_CREATE(a,b,c) \
1517 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1518 G(a,BITONE) = G(pcre2_match_data_create_,BITONE)(b,c); \
1519 else \
1520 G(a,BITTWO) = G(pcre2_match_data_create_,BITTWO)(b,c)
1521
1522 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
1523 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1524 G(a,BITONE) = G(pcre2_match_data_create_from_pattern_,BITONE)(G(b,BITONE),c); \
1525 else \
1526 G(a,BITTWO) = G(pcre2_match_data_create_from_pattern_,BITTWO)(G(b,BITTWO),c)
1527
1528 #define PCRE2_MATCH_DATA_FREE(a) \
1529 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1530 G(pcre2_match_data_free_,BITONE)(G(a,BITONE)); \
1531 else \
1532 G(pcre2_match_data_free_,BITTWO)(G(a,BITTWO))
1533
1534 #define PCRE2_PATTERN_INFO(a,b,c,d) \
1535 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1536 a = G(pcre2_pattern_info_,BITONE)(G(b,BITONE),c,d); \
1537 else \
1538 a = G(pcre2_pattern_info_,BITTWO)(G(b,BITTWO),c,d)
1539
1540 #define PCRE2_PRINTINT(a) \
1541 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1542 G(pcre2_printint_,BITONE)(G(compiled_code,BITONE),outfile,a); \
1543 else \
1544 G(pcre2_printint_,BITTWO)(G(compiled_code,BITTWO),outfile,a)
1545
1546 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
1547 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1548 r = G(pcre2_serialize_decode_,BITONE)((G(pcre2_code_,BITONE) **)a,b,c,G(d,BITONE)); \
1549 else \
1550 r = G(pcre2_serialize_decode_,BITTWO)((G(pcre2_code_,BITTWO) **)a,b,c,G(d,BITTWO))
1551
1552 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
1553 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1554 r = G(pcre2_serialize_encode_,BITONE)((G(const pcre2_code_,BITONE) **)a,b,c,d,G(e,BITONE)); \
1555 else \
1556 r = G(pcre2_serialize_encode_,BITTWO)((G(const pcre2_code_,BITTWO) **)a,b,c,d,G(e,BITTWO))
1557
1558 #define PCRE2_SERIALIZE_FREE(a) \
1559 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1560 G(pcre2_serialize_free_,BITONE)(a); \
1561 else \
1562 G(pcre2_serialize_free_,BITTWO)(a)
1563
1564 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
1565 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1566 r = G(pcre2_serialize_get_number_of_codes_,BITONE)(a); \
1567 else \
1568 r = G(pcre2_serialize_get_number_of_codes_,BITTWO)(a)
1569
1570 #define PCRE2_SET_CALLOUT(a,b,c) \
1571 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1572 G(pcre2_set_callout_,BITONE)(G(a,BITONE), \
1573 (int (*)(G(pcre2_callout_block_,BITONE) *, void *))b,c); \
1574 else \
1575 G(pcre2_set_callout_,BITTWO)(G(a,BITTWO), \
1576 (int (*)(G(pcre2_callout_block_,BITTWO) *, void *))b,c);
1577
1578 #define PCRE2_SET_CHARACTER_TABLES(a,b) \
1579 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1580 G(pcre2_set_character_tables_,BITONE)(G(a,BITONE),b); \
1581 else \
1582 G(pcre2_set_character_tables_,BITTWO)(G(a,BITTWO),b)
1583
1584 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
1585 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1586 G(pcre2_set_compile_recursion_guard_,BITONE)(G(a,BITONE),b,c); \
1587 else \
1588 G(pcre2_set_compile_recursion_guard_,BITTWO)(G(a,BITTWO),b,c)
1589
1590 #define PCRE2_SET_MATCH_LIMIT(a,b) \
1591 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1592 G(pcre2_set_match_limit_,BITONE)(G(a,BITONE),b); \
1593 else \
1594 G(pcre2_set_match_limit_,BITTWO)(G(a,BITTWO),b)
1595
1596 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) \
1597 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1598 G(pcre2_set_max_pattern_length_,BITONE)(G(a,BITONE),b); \
1599 else \
1600 G(pcre2_set_max_pattern_length_,BITTWO)(G(a,BITTWO),b)
1601
1602 #define PCRE2_SET_OFFSET_LIMIT(a,b) \
1603 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1604 G(pcre2_set_offset_limit_,BITONE)(G(a,BITONE),b); \
1605 else \
1606 G(pcre2_set_offset_limit_,BITTWO)(G(a,BITTWO),b)
1607
1608 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) \
1609 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1610 G(pcre2_set_parens_nest_limit_,BITONE)(G(a,BITONE),b); \
1611 else \
1612 G(pcre2_set_parens_nest_limit_,BITTWO)(G(a,BITTWO),b)
1613
1614 #define PCRE2_SET_RECURSION_LIMIT(a,b) \
1615 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1616 G(pcre2_set_recursion_limit_,BITONE)(G(a,BITONE),b); \
1617 else \
1618 G(pcre2_set_recursion_limit_,BITTWO)(G(a,BITTWO),b)
1619
1620 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
1621 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1622 a = G(pcre2_substitute_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1623 G(g,BITONE),G(h,BITONE),(G(PCRE2_SPTR,BITONE))i,j, \
1624 (G(PCRE2_UCHAR,BITONE) *)k,l); \
1625 else \
1626 a = G(pcre2_substitute_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1627 G(g,BITTWO),G(h,BITTWO),(G(PCRE2_SPTR,BITTWO))i,j, \
1628 (G(PCRE2_UCHAR,BITTWO) *)k,l)
1629
1630 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
1631 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1632 a = G(pcre2_substring_copy_byname_,BITONE)(G(b,BITONE),G(c,BITONE),\
1633 (G(PCRE2_UCHAR,BITONE) *)d,e); \
1634 else \
1635 a = G(pcre2_substring_copy_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),\
1636 (G(PCRE2_UCHAR,BITTWO) *)d,e)
1637
1638 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
1639 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1640 a = G(pcre2_substring_copy_bynumber_,BITONE)(G(b,BITONE),c,\
1641 (G(PCRE2_UCHAR,BITONE) *)d,e); \
1642 else \
1643 a = G(pcre2_substring_copy_bynumber_,BITTWO)(G(b,BITTWO),c,\
1644 (G(PCRE2_UCHAR,BITTWO) *)d,e)
1645
1646 #define PCRE2_SUBSTRING_FREE(a) \
1647 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1648 G(pcre2_substring_free_,BITONE)((G(PCRE2_UCHAR,BITONE) *)a); \
1649 else G(pcre2_substring_free_,BITTWO)((G(PCRE2_UCHAR,BITTWO) *)a)
1650
1651 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
1652 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1653 a = G(pcre2_substring_get_byname_,BITONE)(G(b,BITONE),G(c,BITONE),\
1654 (G(PCRE2_UCHAR,BITONE) **)d,e); \
1655 else \
1656 a = G(pcre2_substring_get_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),\
1657 (G(PCRE2_UCHAR,BITTWO) **)d,e)
1658
1659 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
1660 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1661 a = G(pcre2_substring_get_bynumber_,BITONE)(G(b,BITONE),c,\
1662 (G(PCRE2_UCHAR,BITONE) **)d,e); \
1663 else \
1664 a = G(pcre2_substring_get_bynumber_,BITTWO)(G(b,BITTWO),c,\
1665 (G(PCRE2_UCHAR,BITTWO) **)d,e)
1666
1667 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
1668 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1669 a = G(pcre2_substring_length_byname_,BITONE)(G(b,BITONE),G(c,BITONE),d); \
1670 else \
1671 a = G(pcre2_substring_length_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),d)
1672
1673 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
1674 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1675 a = G(pcre2_substring_length_bynumber_,BITONE)(G(b,BITONE),c,d); \
1676 else \
1677 a = G(pcre2_substring_length_bynumber_,BITTWO)(G(b,BITTWO),c,d)
1678
1679 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
1680 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1681 a = G(pcre2_substring_list_get_,BITONE)(G(b,BITONE), \
1682 (G(PCRE2_UCHAR,BITONE) ***)c,d); \
1683 else \
1684 a = G(pcre2_substring_list_get_,BITTWO)(G(b,BITTWO), \
1685 (G(PCRE2_UCHAR,BITTWO) ***)c,d)
1686
1687 #define PCRE2_SUBSTRING_LIST_FREE(a) \
1688 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1689 G(pcre2_substring_list_free_,BITONE)((G(PCRE2_SPTR,BITONE) *)a); \
1690 else \
1691 G(pcre2_substring_list_free_,BITTWO)((G(PCRE2_SPTR,BITTWO) *)a)
1692
1693 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
1694 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1695 a = G(pcre2_substring_number_from_name_,BITONE)(G(b,BITONE),G(c,BITONE)); \
1696 else \
1697 a = G(pcre2_substring_number_from_name_,BITTWO)(G(b,BITTWO),G(c,BITTWO))
1698
1699 #define PTR(x) ( \
1700 (test_mode == G(G(PCRE,BITONE),_MODE))? (void *)G(x,BITONE) : \
1701 (void *)G(x,BITTWO))
1702
1703 #define SETFLD(x,y,z) \
1704 if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE)->y = z; \
1705 else G(x,BITTWO)->y = z
1706
1707 #define SETFLDVEC(x,y,v,z) \
1708 if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE)->y[v] = z; \
1709 else G(x,BITTWO)->y[v] = z
1710
1711 #define SETOP(x,y,z) \
1712 if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE) z y; \
1713 else G(x,BITTWO) z y
1714
1715 #define SETCASTPTR(x,y) \
1716 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1717 G(x,BITONE) = (G(G(uint,BITONE),_t) *)(y); \
1718 else \
1719 G(x,BITTWO) = (G(G(uint,BITTWO),_t) *)(y)
1720
1721 #define STRLEN(p) ((test_mode == G(G(PCRE,BITONE),_MODE))? \
1722 G(strlen,BITONE)((G(PCRE2_SPTR,BITONE))p) : \
1723 G(strlen,BITTWO)((G(PCRE2_SPTR,BITTWO))p))
1724
1725 #define SUB1(a,b) \
1726 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1727 G(a,BITONE)(G(b,BITONE)); \
1728 else \
1729 G(a,BITTWO)(G(b,BITTWO))
1730
1731 #define SUB2(a,b,c) \
1732 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1733 G(a,BITONE))(G(b,BITONE),G(c,BITONE)); \
1734 else \
1735 G(a,BITTWO))(G(b,BITTWO),G(c,BITTWO))
1736
1737 #define TEST(x,r,y) ( \
1738 (test_mode == G(G(PCRE,BITONE),_MODE) && G(x,BITONE) r (y)) || \
1739 (test_mode == G(G(PCRE,BITTWO),_MODE) && G(x,BITTWO) r (y)))
1740
1741 #define TESTFLD(x,f,r,y) ( \
1742 (test_mode == G(G(PCRE,BITONE),_MODE) && G(x,BITONE)->f r (y)) || \
1743 (test_mode == G(G(PCRE,BITTWO),_MODE) && G(x,BITTWO)->f r (y)))
1744
1745
1746 #endif /* Two out of three modes */
1747
1748 /* ----- End of cases where more than one mode is supported ----- */
1749
1750
1751 /* ----- Only 8-bit mode is supported ----- */
1752
1753 #elif defined SUPPORT_PCRE2_8
1754 #define CASTFLD(t,a,b) (t)(G(a,8)->b)
1755 #define CASTVAR(t,x) (t)G(x,8)
1756 #define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR8)(a))[b])
1757 #define DATCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_match_context_8))
1758 #define FLD(a,b) G(a,8)->b
1759 #define PATCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_compile_context_8))
1760 #define PCHARS(lv, p, offset, len, utf, f) \
1761 lv = pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
1762 #define PCHARSV(p, offset, len, utf, f) \
1763 (void)pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
1764 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
1765 a = pcre2_callout_enumerate_8(compiled_code8, \
1766 (int (*)(struct pcre2_callout_enumerate_block_8 *, void *))b,c)
1767 #define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,8) = pcre2_code_copy_8(b)
1768 #define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_8(G(b,8))
1769 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
1770 G(a,8) = pcre2_compile_8(G(b,8),c,d,e,f,g)
1771 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
1772 a = pcre2_dfa_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h,i,j)
1773 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
1774 r = pcre2_get_error_message_8(a,G(b,8),G(G(b,8),_size))
1775 #define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_8(G(b,8))
1776 #define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_8(G(b,8))
1777 #define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_8(G(a,8),b)
1778 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_8(G(a,8))
1779 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
1780 a = pcre2_jit_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h)
1781 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
1782 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_8(b,c,d);
1783 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
1784 pcre2_jit_stack_assign_8(G(a,8),(pcre2_jit_callback_8)b,c);
1785 #define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_8((pcre2_jit_stack_8 *)a);
1786 #define PCRE2_MAKETABLES(a) a = pcre2_maketables_8(NULL)
1787 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
1788 a = pcre2_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h)
1789 #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,8) = pcre2_match_data_create_8(b,c)
1790 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
1791 G(a,8) = pcre2_match_data_create_from_pattern_8(G(b,8),c)
1792 #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_8(G(a,8))
1793 #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_8(G(b,8),c,d)
1794 #define PCRE2_PRINTINT(a) pcre2_printint_8(compiled_code8,outfile,a)
1795 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
1796 r = pcre2_serialize_decode_8((pcre2_code_8 **)a,b,c,G(d,8))
1797 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
1798 r = pcre2_serialize_encode_8((const pcre2_code_8 **)a,b,c,d,G(e,8))
1799 #define PCRE2_SERIALIZE_FREE(a) pcre2_serialize_free_8(a)
1800 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
1801 r = pcre2_serialize_get_number_of_codes_8(a)
1802 #define PCRE2_SET_CALLOUT(a,b,c) \
1803 pcre2_set_callout_8(G(a,8),(int (*)(pcre2_callout_block_8 *, void *))b,c)
1804 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_8(G(a,8),b)
1805 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
1806 pcre2_set_compile_recursion_guard_8(G(a,8),b,c)
1807 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_8(G(a,8),b)
1808 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_8(G(a,8),b)
1809 #define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_8(G(a,8),b)
1810 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_8(G(a,8),b)
1811 #define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_8(G(a,8),b)
1812 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
1813 a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),G(h,8), \
1814 (PCRE2_SPTR8)i,j,(PCRE2_UCHAR8 *)k,l)
1815 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
1816 a = pcre2_substring_copy_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 *)d,e)
1817 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
1818 a = pcre2_substring_copy_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 *)d,e)
1819 #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_8((PCRE2_UCHAR8 *)a)
1820 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
1821 a = pcre2_substring_get_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 **)d,e)
1822 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
1823 a = pcre2_substring_get_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 **)d,e)
1824 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
1825 a = pcre2_substring_length_byname_8(G(b,8),G(c,8),d)
1826 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
1827 a = pcre2_substring_length_bynumber_8(G(b,8),c,d)
1828 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
1829 a = pcre2_substring_list_get_8(G(b,8),(PCRE2_UCHAR8 ***)c,d)
1830 #define PCRE2_SUBSTRING_LIST_FREE(a) \
1831 pcre2_substring_list_free_8((PCRE2_SPTR8 *)a)
1832 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
1833 a = pcre2_substring_number_from_name_8(G(b,8),G(c,8));
1834 #define PTR(x) (void *)G(x,8)
1835 #define SETFLD(x,y,z) G(x,8)->y = z
1836 #define SETFLDVEC(x,y,v,z) G(x,8)->y[v] = z
1837 #define SETOP(x,y,z) G(x,8) z y
1838 #define SETCASTPTR(x,y) G(x,8) = (uint8_t *)(y)
1839 #define STRLEN(p) (int)strlen((char *)p)
1840 #define SUB1(a,b) G(a,8)(G(b,8))
1841 #define SUB2(a,b,c) G(a,8)(G(b,8),G(c,8))
1842 #define TEST(x,r,y) (G(x,8) r (y))
1843 #define TESTFLD(x,f,r,y) (G(x,8)->f r (y))
1844
1845
1846 /* ----- Only 16-bit mode is supported ----- */
1847
1848 #elif defined SUPPORT_PCRE2_16
1849 #define CASTFLD(t,a,b) (t)(G(a,16)->b)
1850 #define CASTVAR(t,x) (t)G(x,16)
1851 #define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR16)(a))[b])
1852 #define DATCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_match_context_16))
1853 #define FLD(a,b) G(a,16)->b
1854 #define PATCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_compile_context_16))
1855 #define PCHARS(lv, p, offset, len, utf, f) \
1856 lv = pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f)
1857 #define PCHARSV(p, offset, len, utf, f) \
1858 (void)pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f)
1859 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
1860 a = pcre2_callout_enumerate_16(compiled_code16, \
1861 (int (*)(struct pcre2_callout_enumerate_block_16 *, void *))b,c)
1862 #define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,16) = pcre2_code_copy_16(b)
1863 #define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_16(G(b,16))
1864 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
1865 G(a,16) = pcre2_compile_16(G(b,16),c,d,e,f,g)
1866 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
1867 a = pcre2_dfa_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h,i,j)
1868 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
1869 r = pcre2_get_error_message_16(a,G(b,16),G(G(b,16),_size))
1870 #define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_16(G(b,16))
1871 #define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_16(G(b,16))
1872 #define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_16(G(a,16),b)
1873 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_16(G(a,16))
1874 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
1875 a = pcre2_jit_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h)
1876 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
1877 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_16(b,c,d);
1878 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
1879 pcre2_jit_stack_assign_16(G(a,16),(pcre2_jit_callback_16)b,c);
1880 #define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_16((pcre2_jit_stack_16 *)a);
1881 #define PCRE2_MAKETABLES(a) a = pcre2_maketables_16(NULL)
1882 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
1883 a = pcre2_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h)
1884 #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,16) = pcre2_match_data_create_16(b,c)
1885 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
1886 G(a,16) = pcre2_match_data_create_from_pattern_16(G(b,16),c)
1887 #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_16(G(a,16))
1888 #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_16(G(b,16),c,d)
1889 #define PCRE2_PRINTINT(a) pcre2_printint_16(compiled_code16,outfile,a)
1890 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
1891 r = pcre2_serialize_decode_16((pcre2_code_16 **)a,b,c,G(d,16))
1892 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
1893 r = pcre2_serialize_encode_16((const pcre2_code_16 **)a,b,c,d,G(e,16))
1894 #define PCRE2_SERIALIZE_FREE(a) pcre2_serialize_free_16(a)
1895 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
1896 r = pcre2_serialize_get_number_of_codes_16(a)
1897 #define PCRE2_SET_CALLOUT(a,b,c) \
1898 pcre2_set_callout_16(G(a,16),(int (*)(pcre2_callout_block_16 *, void *))b,c);
1899 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_16(G(a,16),b)
1900 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
1901 pcre2_set_compile_recursion_guard_16(G(a,16),b,c)
1902 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_16(G(a,16),b)
1903 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_16(G(a,16),b)
1904 #define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_16(G(a,16),b)
1905 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_16(G(a,16),b)
1906 #define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_16(G(a,16),b)
1907 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
1908 a = pcre2_substitute_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),G(h,16), \
1909 (PCRE2_SPTR16)i,j,(PCRE2_UCHAR16 *)k,l)
1910 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
1911 a = pcre2_substring_copy_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 *)d,e)
1912 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
1913 a = pcre2_substring_copy_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 *)d,e)
1914 #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_16((PCRE2_UCHAR16 *)a)
1915 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
1916 a = pcre2_substring_get_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 **)d,e)
1917 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
1918 a = pcre2_substring_get_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 **)d,e)
1919 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
1920 a = pcre2_substring_length_byname_16(G(b,16),G(c,16),d)
1921 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
1922 a = pcre2_substring_length_bynumber_16(G(b,16),c,d)
1923 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
1924 a = pcre2_substring_list_get_16(G(b,16),(PCRE2_UCHAR16 ***)c,d)
1925 #define PCRE2_SUBSTRING_LIST_FREE(a) \
1926 pcre2_substring_list_free_16((PCRE2_SPTR16 *)a)
1927 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
1928 a = pcre2_substring_number_from_name_16(G(b,16),G(c,16));
1929 #define PTR(x) (void *)G(x,16)
1930 #define SETFLD(x,y,z) G(x,16)->y = z
1931 #define SETFLDVEC(x,y,v,z) G(x,16)->y[v] = z
1932 #define SETOP(x,y,z) G(x,16) z y
1933 #define SETCASTPTR(x,y) G(x,16) = (uint16_t *)(y)
1934 #define STRLEN(p) (int)strlen16((PCRE2_SPTR16)p)
1935 #define SUB1(a,b) G(a,16)(G(b,16))
1936 #define SUB2(a,b,c) G(a,16)(G(b,16),G(c,16))
1937 #define TEST(x,r,y) (G(x,16) r (y))
1938 #define TESTFLD(x,f,r,y) (G(x,16)->f r (y))
1939
1940
1941 /* ----- Only 32-bit mode is supported ----- */
1942
1943 #elif defined SUPPORT_PCRE2_32
1944 #define CASTFLD(t,a,b) (t)(G(a,32)->b)
1945 #define CASTVAR(t,x) (t)G(x,32)
1946 #define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR32)(a))[b])
1947 #define DATCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_match_context_32))
1948 #define FLD(a,b) G(a,32)->b
1949 #define PATCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_compile_context_32))
1950 #define PCHARS(lv, p, offset, len, utf, f) \
1951 lv = pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f)
1952 #define PCHARSV(p, offset, len, utf, f) \
1953 (void)pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f)
1954 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
1955 a = pcre2_callout_enumerate_32(compiled_code32, \
1956 (int (*)(struct pcre2_callout_enumerate_block_32 *, void *))b,c)
1957 #define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,32) = pcre2_code_copy_32(b)
1958 #define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_32(G(b,32))
1959 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
1960 G(a,32) = pcre2_compile_32(G(b,32),c,d,e,f,g)
1961 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
1962 a = pcre2_dfa_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h,i,j)
1963 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
1964 r = pcre2_get_error_message_32(a,G(b,32),G(G(b,32),_size))
1965 #define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_32(G(b,32))
1966 #define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_32(G(b,32))
1967 #define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_32(G(a,32),b)
1968 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_32(G(a,32))
1969 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
1970 a = pcre2_jit_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
1971 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
1972 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_32(b,c,d);
1973 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
1974 pcre2_jit_stack_assign_32(G(a,32),(pcre2_jit_callback_32)b,c);
1975 #define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_32((pcre2_jit_stack_32 *)a);
1976 #define PCRE2_MAKETABLES(a) a = pcre2_maketables_32(NULL)
1977 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
1978 a = pcre2_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
1979 #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,32) = pcre2_match_data_create_32(b,c)
1980 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
1981 G(a,32) = pcre2_match_data_create_from_pattern_32(G(b,32),c)
1982 #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_32(G(a,32))
1983 #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_32(G(b,32),c,d)
1984 #define PCRE2_PRINTINT(a) pcre2_printint_32(compiled_code32,outfile,a)
1985 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
1986 r = pcre2_serialize_decode_32((pcre2_code_32 **)a,b,c,G(d,32))
1987 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
1988 r = pcre2_serialize_encode_32((const pcre2_code_32 **)a,b,c,d,G(e,32))
1989 #define PCRE2_SERIALIZE_FREE(a) pcre2_serialize_free_32(a)
1990 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
1991 r = pcre2_serialize_get_number_of_codes_32(a)
1992 #define PCRE2_SET_CALLOUT(a,b,c) \
1993 pcre2_set_callout_32(G(a,32),(int (*)(pcre2_callout_block_32 *, void *))b,c);
1994 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_32(G(a,32),b)
1995 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
1996 pcre2_set_compile_recursion_guard_32(G(a,32),b,c)
1997 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_32(G(a,32),b)
1998 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_32(G(a,32),b)
1999 #define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_32(G(a,32),b)
2000 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_32(G(a,32),b)
2001 #define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_32(G(a,32),b)
2002 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
2003 a = pcre2_substitute_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),G(h,32), \
2004 (PCRE2_SPTR32)i,j,(PCRE2_UCHAR32 *)k,l)
2005 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
2006 a = pcre2_substring_copy_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 *)d,e)
2007 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
2008 a = pcre2_substring_copy_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 *)d,e);
2009 #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_32((PCRE2_UCHAR32 *)a)
2010 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
2011 a = pcre2_substring_get_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 **)d,e)
2012 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
2013 a = pcre2_substring_get_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 **)d,e)
2014 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
2015 a = pcre2_substring_length_byname_32(G(b,32),G(c,32),d)
2016 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
2017 a = pcre2_substring_length_bynumber_32(G(b,32),c,d)
2018 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
2019 a = pcre2_substring_list_get_32(G(b,32),(PCRE2_UCHAR32 ***)c,d)
2020 #define PCRE2_SUBSTRING_LIST_FREE(a) \
2021 pcre2_substring_list_free_32((PCRE2_SPTR32 *)a)
2022 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
2023 a = pcre2_substring_number_from_name_32(G(b,32),G(c,32));
2024 #define PTR(x) (void *)G(x,32)
2025 #define SETFLD(x,y,z) G(x,32)->y = z
2026 #define SETFLDVEC(x,y,v,z) G(x,32)->y[v] = z
2027 #define SETOP(x,y,z) G(x,32) z y
2028 #define SETCASTPTR(x,y) G(x,32) = (uint32_t *)(y)
2029 #define STRLEN(p) (int)strlen32((PCRE2_SPTR32)p)
2030 #define SUB1(a,b) G(a,32)(G(b,32))
2031 #define SUB2(a,b,c) G(a,32)(G(b,32),G(c,32))
2032 #define TEST(x,r,y) (G(x,32) r (y))
2033 #define TESTFLD(x,f,r,y) (G(x,32)->f r (y))
2034
2035 #endif
2036
2037 /* ----- End of mode-specific function call macros ----- */
2038
2039
2040
2041
2042 /*************************************************
2043 * Alternate character tables *
2044 *************************************************/
2045
2046 /* By default, the "tables" pointer in the compile context when calling
2047 pcre2_compile() is not set (= NULL), thereby using the default tables of the
2048 library. However, the tables modifier can be used to select alternate sets of
2049 tables, for different kinds of testing. Note that the locale modifier also
2050 adjusts the tables. */
2051
2052 /* This is the set of tables distributed as default with PCRE2. It recognizes
2053 only ASCII characters. */
2054
2055 static const uint8_t tables1[] = {
2056
2057 /* This table is a lower casing table. */
2058
2059 0, 1, 2, 3, 4, 5, 6, 7,
2060 8, 9, 10, 11, 12, 13, 14, 15,
2061 16, 17, 18, 19, 20, 21, 22, 23,
2062 24, 25, 26, 27, 28, 29, 30, 31,
2063 32, 33, 34, 35, 36, 37, 38, 39,
2064 40, 41, 42, 43, 44, 45, 46, 47,
2065 48, 49, 50, 51, 52, 53, 54, 55,
2066 56, 57, 58, 59, 60, 61, 62, 63,
2067 64, 97, 98, 99,100,101,102,103,
2068 104,105,106,107,108,109,110,111,
2069 112,113,114,115,116,117,118,119,
2070 120,121,122, 91, 92, 93, 94, 95,
2071 96, 97, 98, 99,100,101,102,103,
2072 104,105,106,107,108,109,110,111,
2073 112,113,114,115,116,117,118,119,
2074 120,121,122,123,124,125,126,127,
2075 128,129,130,131,132,133,134,135,
2076 136,137,138,139,140,141,142,143,
2077 144,145,146,147,148,149,150,151,
2078 152,153,154,155,156,157,158,159,
2079 160,161,162,163,164,165,166,167,
2080 168,169,170,171,172,173,174,175,
2081 176,177,178,179,180,181,182,183,
2082 184,185,186,187,188,189,190,191,
2083 192,193,194,195,196,197,198,199,
2084 200,201,202,203,204,205,206,207,
2085 208,209,210,211,212,213,214,215,
2086 216,217,218,219,220,221,222,223,
2087 224,225,226,227,228,229,230,231,
2088 232,233,234,235,236,237,238,239,
2089 240,241,242,243,244,245,246,247,
2090 248,249,250,251,252,253,254,255,
2091
2092 /* This table is a case flipping table. */
2093
2094 0, 1, 2, 3, 4, 5, 6, 7,
2095 8, 9, 10, 11, 12, 13, 14, 15,
2096 16, 17, 18, 19, 20, 21, 22, 23,
2097 24, 25, 26, 27, 28, 29, 30, 31,
2098 32, 33, 34, 35, 36, 37, 38, 39,
2099 40, 41, 42, 43, 44, 45, 46, 47,
2100 48, 49, 50, 51, 52, 53, 54, 55,
2101 56, 57, 58, 59, 60, 61, 62, 63,
2102 64, 97, 98, 99,100,101,102,103,
2103 104,105,106,107,108,109,110,111,
2104 112,113,114,115,116,117,118,119,
2105 120,121,122, 91, 92, 93, 94, 95,
2106 96, 65, 66, 67, 68, 69, 70, 71,
2107 72, 73, 74, 75, 76, 77, 78, 79,
2108 80, 81, 82, 83, 84, 85, 86, 87,
2109 88, 89, 90,123,124,125,126,127,
2110 128,129,130,131,132,133,134,135,
2111 136,137,138,139,140,141,142,143,
2112 144,145,146,147,148,149,150,151,
2113 152,153,154,155,156,157,158,159,
2114 160,161,162,163,164,165,166,167,
2115 168,169,170,171,172,173,174,175,
2116 176,177,178,179,180,181,182,183,
2117 184,185,186,187,188,189,190,191,
2118 192,193,194,195,196,197,198,199,
2119 200,201,202,203,204,205,206,207,
2120 208,209,210,211,212,213,214,215,
2121 216,217,218,219,220,221,222,223,
2122 224,225,226,227,228,229,230,231,
2123 232,233,234,235,236,237,238,239,
2124 240,241,242,243,244,245,246,247,
2125 248,249,250,251,252,253,254,255,
2126
2127 /* This table contains bit maps for various character classes. Each map is 32
2128 bytes long and the bits run from the least significant end of each byte. The
2129 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
2130 graph, print, punct, and cntrl. Other classes are built from combinations. */
2131
2132 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
2133 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2134 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2135 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2136
2137 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
2138 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
2139 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2140 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2141
2142 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
2143 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2144 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2145 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2146
2147 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2148 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
2149 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2150 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2151
2152 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2153 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
2154 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2155 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2156
2157 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
2158 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
2159 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2160 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2161
2162 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
2163 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
2164 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2165 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2166
2167 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
2168 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
2169 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2170 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2171
2172 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
2173 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
2174 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2175 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2176
2177 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
2178 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
2179 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2180 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2181
2182 /* This table identifies various classes of character by individual bits:
2183 0x01 white space character
2184 0x02 letter
2185 0x04 decimal digit
2186 0x08 hexadecimal digit
2187 0x10 alphanumeric or '_'
2188 0x80 regular expression metacharacter or binary zero
2189 */
2190
2191 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
2192 0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /* 8- 15 */
2193 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
2194 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
2195 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
2196 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
2197 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
2198 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
2199 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
2200 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
2201 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
2202 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
2203 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
2204 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
2205 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
2206 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
2207 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
2208 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
2209 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
2210 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
2211 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
2212 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
2213 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
2214 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
2215 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
2216 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
2217 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
2218 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
2219 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
2220 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
2221 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
2222 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
2223
2224 /* This is a set of tables that came originally from a Windows user. It seems
2225 to be at least an approximation of ISO 8859. In particular, there are
2226 characters greater than 128 that are marked as spaces, letters, etc. */
2227
2228 static const uint8_t tables2[] = {
2229 0,1,2,3,4,5,6,7,
2230 8,9,10,11,12,13,14,15,
2231 16,17,18,19,20,21,22,23,
2232 24,25,26,27,28,29,30,31,
2233 32,33,34,35,36,37,38,39,
2234 40,41,42,43,44,45,46,47,
2235 48,49,50,51,52,53,54,55,
2236 56,57,58,59,60,61,62,63,
2237 64,97,98,99,100,101,102,103,
2238 104,105,106,107,108,109,110,111,
2239 112,113,114,115,116,117,118,119,
2240 120,121,122,91,92,93,94,95,
2241 96,97,98,99,100,101,102,103,
2242 104,105,106,107,108,109,110,111,
2243 112,113,114,115,116,117,118,119,
2244 120,121,122,123,124,125,126,127,
2245 128,129,130,131,132,133,134,135,
2246 136,137,138,139,140,141,142,143,
2247 144,145,146,147,148,149,150,151,
2248 152,153,154,155,156,157,158,159,
2249 160,161,162,163,164,165,166,167,
2250 168,169,170,171,172,173,174,175,
2251 176,177,178,179,180,181,182,183,
2252 184,185,186,187,188,189,190,191,
2253 224,225,226,227,228,229,230,231,
2254 232,233,234,235,236,237,238,239,
2255 240,241,242,243,244,245,246,215,
2256 248,249,250,251,252,253,254,223,
2257 224,225,226,227,228,229,230,231,
2258 232,233,234,235,236,237,238,239,
2259 240,241,242,243,244,245,246,247,
2260 248,249,250,251,252,253,254,255,
2261 0,1,2,3,4,5,6,7,
2262 8,9,10,11,12,13,14,15,
2263 16,17,18,19,20,21,22,23,
2264 24,25,26,27,28,29,30,31,
2265 32,33,34,35,36,37,38,39,
2266 40,41,42,43,44,45,46,47,
2267 48,49,50,51,52,53,54,55,
2268 56,57,58,59,60,61,62,63,
2269 64,97,98,99,100,101,102,103,
2270 104,105,106,107,108,109,110,111,
2271 112,113,114,115,116,117,118,119,
2272 120,121,122,91,92,93,94,95,
2273 96,65,66,67,68,69,70,71,
2274 72,73,74,75,76,77,78,79,
2275 80,81,82,83,84,85,86,87,
2276 88,89,90,123,124,125,126,127,
2277 128,129,130,131,132,133,134,135,
2278 136,137,138,139,140,141,142,143,
2279 144,145,146,147,148,149,150,151,
2280 152,153,154,155,156,157,158,159,
2281 160,161,162,163,164,165,166,167,
2282 168,169,170,171,172,173,174,175,
2283 176,177,178,179,180,181,182,183,
2284 184,185,186,187,188,189,190,191,
2285 224,225,226,227,228,229,230,231,
2286 232,233,234,235,236,237,238,239,
2287 240,241,242,243,244,245,246,215,
2288 248,249,250,251,252,253,254,223,
2289 192,193,194,195,196,197,198,199,
2290 200,201,202,203,204,205,206,207,
2291 208,209,210,211,212,213,214,247,
2292 216,217,218,219,220,221,222,255,
2293 0,62,0,0,1,0,0,0,
2294 0,0,0,0,0,0,0,0,
2295 32,0,0,0,1,0,0,0,
2296 0,0,0,0,0,0,0,0,
2297 0,0,0,0,0,0,255,3,
2298 126,0,0,0,126,0,0,0,
2299 0,0,0,0,0,0,0,0,
2300 0,0,0,0,0,0,0,0,
2301 0,0,0,0,0,0,255,3,
2302 0,0,0,0,0,0,0,0,
2303 0,0,0,0,0,0,12,2,
2304 0,0,0,0,0,0,0,0,
2305 0,0,0,0,0,0,0,0,
2306 254,255,255,7,0,0,0,0,
2307 0,0,0,0,0,0,0,0,
2308 255,255,127,127,0,0,0,0,
2309 0,0,0,0,0,0,0,0,
2310 0,0,0,0,254,255,255,7,
2311 0,0,0,0,0,4,32,4,
2312 0,0,0,128,255,255,127,255,
2313 0,0,0,0,0,0,255,3,
2314 254,255,255,135,254,255,255,7,
2315 0,0,0,0,0,4,44,6,
2316 255,255,127,255,255,255,127,255,
2317 0,0,0,0,254,255,255,255,
2318 255,255,255,255,255,255,255,127,
2319 0,0,0,0,254,255,255,255,
2320 255,255,255,255,255,255,255,255,
2321 0,2,0,0,255,255,255,255,
2322 255,255,255,255,255,255,255,127,
2323 0,0,0,0,255,255,255,255,
2324 255,255,255,255,255,255,255,255,
2325 0,0,0,0,254,255,0,252,
2326 1,0,0,248,1,0,0,120,
2327 0,0,0,0,254,255,255,255,
2328 0,0,128,0,0,0,128,0,
2329 255,255,255,255,0,0,0,0,
2330 0,0,0,0,0,0,0,128,
2331 255,255,255,255,0,0,0,0,
2332 0,0,0,0,0,0,0,0,
2333 128,0,0,0,0,0,0,0,
2334 0,1,1,0,1,1,0,0,
2335 0,0,0,0,0,0,0,0,
2336 0,0,0,0,0,0,0,0,
2337 1,0,0,0,128,0,0,0,
2338 128,128,128,128,0,0,128,0,
2339 28,28,28,28,28,28,28,28,
2340 28,28,0,0,0,0,0,128,
2341 0,26,26,26,26,26,26,18,
2342 18,18,18,18,18,18,18,18,
2343 18,18,18,18,18,18,18,18,
2344 18,18,18,128,128,0,128,16,
2345 0,26,26,26,26,26,26,18,
2346 18,18,18,18,18,18,18,18,
2347 18,18,18,18,18,18,18,18,
2348 18,18,18,128,128,0,0,0,
2349 0,0,0,0,0,1,0,0,
2350 0,0,0,0,0,0,0,0,
2351 0,0,0,0,0,0,0,0,
2352 0,0,0,0,0,0,0,0,
2353 1,0,0,0,0,0,0,0,
2354 0,0,18,0,0,0,0,0,
2355 0,0,20,20,0,18,0,0,
2356 0,20,18,0,0,0,0,0,
2357 18,18,18,18,18,18,18,18,
2358 18,18,18,18,18,18,18,18,
2359 18,18,18,18,18,18,18,0,
2360 18,18,18,18,18,18,18,18,
2361 18,18,18,18,18,18,18,18,
2362 18,18,18,18,18,18,18,18,
2363 18,18,18,18,18,18,18,0,
2364 18,18,18,18,18,18,18,18
2365 };
2366
2367
2368
2369 /*************************************************
2370 * Local memory functions *
2371 *************************************************/
2372
2373 /* Alternative memory functions, to test functionality. */
2374
my_malloc(size_t size,void * data)2375 static void *my_malloc(size_t size, void *data)
2376 {
2377 void *block = malloc(size);
2378 (void)data;
2379 if (show_memory)
2380 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
2381 return block;
2382 }
2383
my_free(void * block,void * data)2384 static void my_free(void *block, void *data)
2385 {
2386 (void)data;
2387 if (show_memory)
2388 fprintf(outfile, "free %p\n", block);
2389 free(block);
2390 }
2391
2392 /* For recursion malloc/free, to test stacking calls */
2393
2394 #ifdef HEAP_MATCH_RECURSE
my_stack_malloc(size_t size,void * data)2395 static void *my_stack_malloc(size_t size, void *data)
2396 {
2397 void *block = malloc(size);
2398 (void)data;
2399 if (show_memory)
2400 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
2401 return block;
2402 }
2403
my_stack_free(void * block,void * data)2404 static void my_stack_free(void *block, void *data)
2405 {
2406 (void)data;
2407 if (show_memory)
2408 fprintf(outfile, "stack_free %p\n", block);
2409 free(block);
2410 }
2411 #endif /* HEAP_MATCH_RECURSE */
2412
2413
2414 /*************************************************
2415 * Callback function for stack guard *
2416 *************************************************/
2417
2418 /* This is set up to be called from pcre2_compile() when the stackguard=n
2419 modifier sets a value greater than zero. The test we do is whether the
2420 parenthesis nesting depth is greater than the value set by the modifier.
2421
2422 Argument: the current parenthesis nesting depth
2423 Returns: non-zero to kill the compilation
2424 */
2425
2426 static int
stack_guard(uint32_t depth,void * user_data)2427 stack_guard(uint32_t depth, void *user_data)
2428 {
2429 (void)user_data;
2430 return depth > pat_patctl.stackguard_test;
2431 }
2432
2433
2434 /*************************************************
2435 * JIT memory callback *
2436 *************************************************/
2437
2438 static PCRE2_JIT_STACK*
jit_callback(void * arg)2439 jit_callback(void *arg)
2440 {
2441 jit_was_used = TRUE;
2442 return (PCRE2_JIT_STACK *)arg;
2443 }
2444
2445
2446 /*************************************************
2447 * Convert UTF-8 character to code point *
2448 *************************************************/
2449
2450 /* This function reads one or more bytes that represent a UTF-8 character,
2451 and returns the codepoint of that character. Note that the function supports
2452 the original UTF-8 definition of RFC 2279, allowing for values in the range 0
2453 to 0x7fffffff, up to 6 bytes long. This makes it possible to generate
2454 codepoints greater than 0x10ffff which are useful for testing PCRE2's error
2455 checking, and also for generating 32-bit non-UTF data values above the UTF
2456 limit.
2457
2458 Argument:
2459 utf8bytes a pointer to the byte vector
2460 vptr a pointer to an int to receive the value
2461
2462 Returns: > 0 => the number of bytes consumed
2463 -6 to 0 => malformed UTF-8 character at offset = (-return)
2464 */
2465
2466 static int
utf82ord(PCRE2_SPTR8 utf8bytes,uint32_t * vptr)2467 utf82ord(PCRE2_SPTR8 utf8bytes, uint32_t *vptr)
2468 {
2469 uint32_t c = *utf8bytes++;
2470 uint32_t d = c;
2471 int i, j, s;
2472
2473 for (i = -1; i < 6; i++) /* i is number of additional bytes */
2474 {
2475 if ((d & 0x80) == 0) break;
2476 d <<= 1;
2477 }
2478
2479 if (i == -1) { *vptr = c; return 1; } /* ascii character */
2480 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
2481
2482 /* i now has a value in the range 1-5 */
2483
2484 s = 6*i;
2485 d = (c & utf8_table3[i]) << s;
2486
2487 for (j = 0; j < i; j++)
2488 {
2489 c = *utf8bytes++;
2490 if ((c & 0xc0) != 0x80) return -(j+1);
2491 s -= 6;
2492 d |= (c & 0x3f) << s;
2493 }
2494
2495 /* Check that encoding was the correct unique one */
2496
2497 for (j = 0; j < utf8_table1_size; j++)
2498 if (d <= (uint32_t)utf8_table1[j]) break;
2499 if (j != i) return -(i+1);
2500
2501 /* Valid value */
2502
2503 *vptr = d;
2504 return i+1;
2505 }
2506
2507
2508
2509 /*************************************************
2510 * Print one character *
2511 *************************************************/
2512
2513 /* Print a single character either literally, or as a hex escape, and count how
2514 many printed characters are used.
2515
2516 Arguments:
2517 c the character
2518 utf TRUE in UTF mode
2519 f the FILE to print to, or NULL just to count characters
2520
2521 Returns: number of characters written
2522 */
2523
2524 static int
pchar(uint32_t c,BOOL utf,FILE * f)2525 pchar(uint32_t c, BOOL utf, FILE *f)
2526 {
2527 int n = 0;
2528 if (PRINTOK(c))
2529 {
2530 if (f != NULL) fprintf(f, "%c", c);
2531 return 1;
2532 }
2533
2534 if (c < 0x100)
2535 {
2536 if (utf)
2537 {
2538 if (f != NULL) fprintf(f, "\\x{%02x}", c);
2539 return 6;
2540 }
2541 else
2542 {
2543 if (f != NULL) fprintf(f, "\\x%02x", c);
2544 return 4;
2545 }
2546 }
2547
2548 if (f != NULL) n = fprintf(f, "\\x{%02x}", c);
2549 return n >= 0 ? n : 0;
2550 }
2551
2552
2553
2554 #ifdef SUPPORT_PCRE2_16
2555 /*************************************************
2556 * Find length of 0-terminated 16-bit string *
2557 *************************************************/
2558
strlen16(PCRE2_SPTR16 p)2559 static size_t strlen16(PCRE2_SPTR16 p)
2560 {
2561 PCRE2_SPTR16 pp = p;
2562 while (*pp != 0) pp++;
2563 return (int)(pp - p);
2564 }
2565 #endif /* SUPPORT_PCRE2_16 */
2566
2567
2568
2569 #ifdef SUPPORT_PCRE2_32
2570 /*************************************************
2571 * Find length of 0-terminated 32-bit string *
2572 *************************************************/
2573
strlen32(PCRE2_SPTR32 p)2574 static size_t strlen32(PCRE2_SPTR32 p)
2575 {
2576 PCRE2_SPTR32 pp = p;
2577 while (*pp != 0) pp++;
2578 return (int)(pp - p);
2579 }
2580 #endif /* SUPPORT_PCRE2_32 */
2581
2582
2583 #ifdef SUPPORT_PCRE2_8
2584 /*************************************************
2585 * Print 8-bit character string *
2586 *************************************************/
2587
2588 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
2589 For printing *MARK strings, a negative length is given. If handed a NULL file,
2590 just counts chars without printing (because pchar() does that). */
2591
pchars8(PCRE2_SPTR8 p,int length,BOOL utf,FILE * f)2592 static int pchars8(PCRE2_SPTR8 p, int length, BOOL utf, FILE *f)
2593 {
2594 uint32_t c = 0;
2595 int yield = 0;
2596
2597 if (length < 0) length = p[-1];
2598 while (length-- > 0)
2599 {
2600 if (utf)
2601 {
2602 int rc = utf82ord(p, &c);
2603 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
2604 {
2605 length -= rc - 1;
2606 p += rc;
2607 yield += pchar(c, utf, f);
2608 continue;
2609 }
2610 }
2611 c = *p++;
2612 yield += pchar(c, utf, f);
2613 }
2614
2615 return yield;
2616 }
2617 #endif
2618
2619
2620 #ifdef SUPPORT_PCRE2_16
2621 /*************************************************
2622 * Print 16-bit character string *
2623 *************************************************/
2624
2625 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
2626 For printing *MARK strings, a negative length is given. If handed a NULL file,
2627 just counts chars without printing. */
2628
pchars16(PCRE2_SPTR16 p,int length,BOOL utf,FILE * f)2629 static int pchars16(PCRE2_SPTR16 p, int length, BOOL utf, FILE *f)
2630 {
2631 int yield = 0;
2632 if (length < 0) length = p[-1];
2633 while (length-- > 0)
2634 {
2635 uint32_t c = *p++ & 0xffff;
2636 if (utf && c >= 0xD800 && c < 0xDC00 && length > 0)
2637 {
2638 int d = *p & 0xffff;
2639 if (d >= 0xDC00 && d <= 0xDFFF)
2640 {
2641 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
2642 length--;
2643 p++;
2644 }
2645 }
2646 yield += pchar(c, utf, f);
2647 }
2648 return yield;
2649 }
2650 #endif /* SUPPORT_PCRE2_16 */
2651
2652
2653
2654 #ifdef SUPPORT_PCRE2_32
2655 /*************************************************
2656 * Print 32-bit character string *
2657 *************************************************/
2658
2659 /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
2660 For printing *MARK strings, a negative length is given.If handed a NULL file,
2661 just counts chars without printing. */
2662
pchars32(PCRE2_SPTR32 p,int length,BOOL utf,FILE * f)2663 static int pchars32(PCRE2_SPTR32 p, int length, BOOL utf, FILE *f)
2664 {
2665 int yield = 0;
2666 (void)(utf); /* Avoid compiler warning */
2667 if (length < 0) length = p[-1];
2668 while (length-- > 0)
2669 {
2670 uint32_t c = *p++;
2671 yield += pchar(c, utf, f);
2672 }
2673 return yield;
2674 }
2675 #endif /* SUPPORT_PCRE2_32 */
2676
2677
2678
2679
2680 #ifdef SUPPORT_PCRE2_8
2681 /*************************************************
2682 * Convert character value to UTF-8 *
2683 *************************************************/
2684
2685 /* This function takes an integer value in the range 0 - 0x7fffffff
2686 and encodes it as a UTF-8 character in 0 to 6 bytes.
2687
2688 Arguments:
2689 cvalue the character value
2690 utf8bytes pointer to buffer for result - at least 6 bytes long
2691
2692 Returns: number of characters placed in the buffer
2693 */
2694
2695 static int
ord2utf8(uint32_t cvalue,uint8_t * utf8bytes)2696 ord2utf8(uint32_t cvalue, uint8_t *utf8bytes)
2697 {
2698 register int i, j;
2699 if (cvalue > 0x7fffffffu)
2700 return -1;
2701 for (i = 0; i < utf8_table1_size; i++)
2702 if (cvalue <= (uint32_t)utf8_table1[i]) break;
2703 utf8bytes += i;
2704 for (j = i; j > 0; j--)
2705 {
2706 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
2707 cvalue >>= 6;
2708 }
2709 *utf8bytes = utf8_table2[i] | cvalue;
2710 return i + 1;
2711 }
2712 #endif /* SUPPORT_PCRE2_8 */
2713
2714
2715
2716 #ifdef SUPPORT_PCRE2_16
2717 /*************************************************
2718 * Convert pattern to 16-bit *
2719 *************************************************/
2720
2721 /* In UTF mode the input is always interpreted as a string of UTF-8 bytes. If
2722 all the input bytes are ASCII, the space needed for a 16-bit string is exactly
2723 double the 8-bit size. Otherwise, the size needed for a 16-bit string is no
2724 more than double, because up to 0xffff uses no more than 3 bytes in UTF-8 but
2725 possibly 4 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in
2726 UTF-16. The result is always left in pbuffer16. Impose a minimum size to save
2727 repeated re-sizing.
2728
2729 Note that this function does not object to surrogate values. This is
2730 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
2731 for the purpose of testing that they are correctly faulted.
2732
2733 Arguments:
2734 p points to a byte string
2735 utf non-zero if converting to UTF-16
2736 lenptr points to number of bytes in the string (excluding trailing zero)
2737
2738 Returns: 0 on success, with the length updated to the number of 16-bit
2739 data items used (excluding the trailing zero)
2740 OR -1 if a UTF-8 string is malformed
2741 OR -2 if a value > 0x10ffff is encountered in UTF mode
2742 OR -3 if a value > 0xffff is encountered when not in UTF mode
2743 */
2744
2745 static PCRE2_SIZE
to16(uint8_t * p,int utf,PCRE2_SIZE * lenptr)2746 to16(uint8_t *p, int utf, PCRE2_SIZE *lenptr)
2747 {
2748 uint16_t *pp;
2749 PCRE2_SIZE len = *lenptr;
2750
2751 if (pbuffer16_size < 2*len + 2)
2752 {
2753 if (pbuffer16 != NULL) free(pbuffer16);
2754 pbuffer16_size = 2*len + 2;
2755 if (pbuffer16_size < 256) pbuffer16_size = 256;
2756 pbuffer16 = (uint16_t *)malloc(pbuffer16_size);
2757 if (pbuffer16 == NULL)
2758 {
2759 fprintf(stderr, "pcre2test: malloc(%lu) failed for pbuffer16\n",
2760 (unsigned long int)pbuffer16_size);
2761 exit(1);
2762 }
2763 }
2764
2765 pp = pbuffer16;
2766 if (!utf)
2767 {
2768 for (; len > 0; len--) *pp++ = *p++;
2769 }
2770 else while (len > 0)
2771 {
2772 uint32_t c;
2773 int chlen = utf82ord(p, &c);
2774 if (chlen <= 0) return -1;
2775 if (c > 0x10ffff) return -2;
2776 p += chlen;
2777 len -= chlen;
2778 if (c < 0x10000) *pp++ = c; else
2779 {
2780 if (!utf) return -3;
2781 c -= 0x10000;
2782 *pp++ = 0xD800 | (c >> 10);
2783 *pp++ = 0xDC00 | (c & 0x3ff);
2784 }
2785 }
2786
2787 *pp = 0;
2788 *lenptr = pp - pbuffer16;
2789 return 0;
2790 }
2791 #endif
2792
2793
2794
2795 #ifdef SUPPORT_PCRE2_32
2796 /*************************************************
2797 * Convert pattern to 32-bit *
2798 *************************************************/
2799
2800 /* In UTF mode the input is always interpreted as a string of UTF-8 bytes. If
2801 all the input bytes are ASCII, the space needed for a 32-bit string is exactly
2802 four times the 8-bit size. Otherwise, the size needed for a 32-bit string is no
2803 more than four times, because the number of characters must be less than the
2804 number of bytes. The result is always left in pbuffer32. Impose a minimum size
2805 to save repeated re-sizing.
2806
2807 Note that this function does not object to surrogate values. This is
2808 deliberate; it makes it possible to construct UTF-32 strings that are invalid,
2809 for the purpose of testing that they are correctly faulted.
2810
2811 Arguments:
2812 p points to a byte string
2813 utf true if UTF-8 (to be converted to UTF-32)
2814 lenptr points to number of bytes in the string (excluding trailing zero)
2815
2816 Returns: 0 on success, with the length updated to the number of 32-bit
2817 data items used (excluding the trailing zero)
2818 OR -1 if a UTF-8 string is malformed
2819 OR -2 if a value > 0x10ffff is encountered in UTF mode
2820 */
2821
2822 static PCRE2_SIZE
to32(uint8_t * p,int utf,PCRE2_SIZE * lenptr)2823 to32(uint8_t *p, int utf, PCRE2_SIZE *lenptr)
2824 {
2825 uint32_t *pp;
2826 PCRE2_SIZE len = *lenptr;
2827
2828 if (pbuffer32_size < 4*len + 4)
2829 {
2830 if (pbuffer32 != NULL) free(pbuffer32);
2831 pbuffer32_size = 4*len + 4;
2832 if (pbuffer32_size < 256) pbuffer32_size = 256;
2833 pbuffer32 = (uint32_t *)malloc(pbuffer32_size);
2834 if (pbuffer32 == NULL)
2835 {
2836 fprintf(stderr, "pcre2test: malloc(%lu) failed for pbuffer32\n",
2837 (unsigned long int)pbuffer32_size);
2838 exit(1);
2839 }
2840 }
2841
2842 pp = pbuffer32;
2843 if (!utf)
2844 {
2845 for (; len > 0; len--) *pp++ = *p++;
2846 }
2847 else while (len > 0)
2848 {
2849 uint32_t c;
2850 int chlen = utf82ord(p, &c);
2851 if (chlen <= 0) return -1;
2852 if (utf && c > 0x10ffff) return -2;
2853 p += chlen;
2854 len -= chlen;
2855 *pp++ = c;
2856 }
2857
2858 *pp = 0;
2859 *lenptr = pp - pbuffer32;
2860 return 0;
2861 }
2862 #endif /* SUPPORT_PCRE2_32 */
2863
2864
2865
2866 /*************************************************
2867 * Move back by so many characters *
2868 *************************************************/
2869
2870 /* Given a code unit offset in a subject string, move backwards by a number of
2871 characters, and return the resulting offset.
2872
2873 Arguments:
2874 subject pointer to the string
2875 offset start offset
2876 count count to move back by
2877 utf TRUE if in UTF mode
2878
2879 Returns: a possibly changed offset
2880 */
2881
2882 static PCRE2_SIZE
backchars(uint8_t * subject,PCRE2_SIZE offset,uint32_t count,BOOL utf)2883 backchars(uint8_t *subject, PCRE2_SIZE offset, uint32_t count, BOOL utf)
2884 {
2885 if (!utf || test_mode == PCRE32_MODE)
2886 return (count >= offset)? 0 : (offset - count);
2887
2888 else if (test_mode == PCRE8_MODE)
2889 {
2890 PCRE2_SPTR8 pp = (PCRE2_SPTR8)subject + offset;
2891 for (; count > 0 && pp > (PCRE2_SPTR8)subject; count--)
2892 {
2893 pp--;
2894 while ((*pp & 0xc0) == 0x80) pp--;
2895 }
2896 return pp - (PCRE2_SPTR8)subject;
2897 }
2898
2899 else /* 16-bit mode */
2900 {
2901 PCRE2_SPTR16 pp = (PCRE2_SPTR16)subject + offset;
2902 for (; count > 0 && pp > (PCRE2_SPTR16)subject; count--)
2903 {
2904 pp--;
2905 if ((*pp & 0xfc00) == 0xdc00) pp--;
2906 }
2907 return pp - (PCRE2_SPTR16)subject;
2908 }
2909 }
2910
2911
2912
2913 /*************************************************
2914 * Expand input buffers *
2915 *************************************************/
2916
2917 /* This function doubles the size of the input buffer and the buffer for
2918 keeping an 8-bit copy of patterns (pbuffer8), and copies the current buffers to
2919 the new ones.
2920
2921 Arguments: none
2922 Returns: nothing (aborts if malloc() fails)
2923 */
2924
2925 static void
expand_input_buffers(void)2926 expand_input_buffers(void)
2927 {
2928 int new_pbuffer8_size = 2*pbuffer8_size;
2929 uint8_t *new_buffer = (uint8_t *)malloc(new_pbuffer8_size);
2930 uint8_t *new_pbuffer8 = (uint8_t *)malloc(new_pbuffer8_size);
2931
2932 if (new_buffer == NULL || new_pbuffer8 == NULL)
2933 {
2934 fprintf(stderr, "pcre2test: malloc(%d) failed\n", new_pbuffer8_size);
2935 exit(1);
2936 }
2937
2938 memcpy(new_buffer, buffer, pbuffer8_size);
2939 memcpy(new_pbuffer8, pbuffer8, pbuffer8_size);
2940
2941 pbuffer8_size = new_pbuffer8_size;
2942
2943 free(buffer);
2944 free(pbuffer8);
2945
2946 buffer = new_buffer;
2947 pbuffer8 = new_pbuffer8;
2948 }
2949
2950
2951
2952 /*************************************************
2953 * Read or extend an input line *
2954 *************************************************/
2955
2956 /* Input lines are read into buffer, but both patterns and data lines can be
2957 continued over multiple input lines. In addition, if the buffer fills up, we
2958 want to automatically expand it so as to be able to handle extremely large
2959 lines that are needed for certain stress tests, although this is less likely
2960 now that there are repetition features for both patterns and data. When the
2961 input buffer is expanded, the other two buffers must also be expanded likewise,
2962 and the contents of pbuffer, which are a copy of the input for callouts, must
2963 be preserved (for when expansion happens for a data line). This is not the most
2964 optimal way of handling this, but hey, this is just a test program!
2965
2966 Arguments:
2967 f the file to read
2968 start where in buffer to start (this *must* be within buffer)
2969 prompt for stdin or readline()
2970
2971 Returns: pointer to the start of new data
2972 could be a copy of start, or could be moved
2973 NULL if no data read and EOF reached
2974 */
2975
2976 static uint8_t *
extend_inputline(FILE * f,uint8_t * start,const char * prompt)2977 extend_inputline(FILE *f, uint8_t *start, const char *prompt)
2978 {
2979 uint8_t *here = start;
2980
2981 for (;;)
2982 {
2983 size_t rlen = (size_t)(pbuffer8_size - (here - buffer));
2984
2985 if (rlen > 1000)
2986 {
2987 size_t dlen;
2988
2989 /* If libreadline or libedit support is required, use readline() to read a
2990 line if the input is a terminal. Note that readline() removes the trailing
2991 newline, so we must put it back again, to be compatible with fgets(). */
2992
2993 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
2994 if (INTERACTIVE(f))
2995 {
2996 size_t len;
2997 char *s = readline(prompt);
2998 if (s == NULL) return (here == start)? NULL : start;
2999 len = strlen(s);
3000 if (len > 0) add_history(s);
3001 if (len > rlen - 1) len = rlen - 1;
3002 memcpy(here, s, len);
3003 here[len] = '\n';
3004 here[len+1] = 0;
3005 free(s);
3006 }
3007 else
3008 #endif
3009
3010 /* Read the next line by normal means, prompting if the file is a tty. */
3011
3012 {
3013 if (INTERACTIVE(f)) printf("%s", prompt);
3014 if (fgets((char *)here, rlen, f) == NULL)
3015 return (here == start)? NULL : start;
3016 }
3017
3018 dlen = strlen((char *)here);
3019 here += dlen;
3020
3021 /* Check for end of line reached. Take care not to read data from before
3022 start (dlen will be zero for a file starting with a binary zero). */
3023
3024 if (here > start && here[-1] == '\n') return start;
3025
3026 /* If we have not read a newline when reading a file, we have either filled
3027 the buffer or reached the end of the file. We can detect the former by
3028 checking that the string fills the buffer, and the latter by feof(). If
3029 neither of these is true, it means we read a binary zero which has caused
3030 strlen() to give a short length. This is a hard error because pcre2test
3031 expects to work with C strings. */
3032
3033 if (!INTERACTIVE(f) && dlen < rlen - 1 && !feof(f))
3034 {
3035 fprintf(outfile, "** Binary zero encountered in input\n");
3036 fprintf(outfile, "** pcre2test run abandoned\n");
3037 exit(1);
3038 }
3039 }
3040
3041 else
3042 {
3043 size_t start_offset = start - buffer;
3044 size_t here_offset = here - buffer;
3045 expand_input_buffers();
3046 start = buffer + start_offset;
3047 here = buffer + here_offset;
3048 }
3049 }
3050
3051 /* Control never gets here */
3052 }
3053
3054
3055
3056 /*************************************************
3057 * Case-independent strncmp() function *
3058 *************************************************/
3059
3060 /*
3061 Arguments:
3062 s first string
3063 t second string
3064 n number of characters to compare
3065
3066 Returns: < 0, = 0, or > 0, according to the comparison
3067 */
3068
3069 static int
strncmpic(const uint8_t * s,const uint8_t * t,int n)3070 strncmpic(const uint8_t *s, const uint8_t *t, int n)
3071 {
3072 while (n--)
3073 {
3074 int c = tolower(*s++) - tolower(*t++);
3075 if (c) return c;
3076 }
3077 return 0;
3078 }
3079
3080
3081
3082 /*************************************************
3083 * Scan the main modifier list *
3084 *************************************************/
3085
3086 /* This function searches the modifier list for a long modifier name.
3087
3088 Argument:
3089 p start of the name
3090 lenp length of the name
3091
3092 Returns: an index in the modifier list, or -1 on failure
3093 */
3094
3095 static int
scan_modifiers(const uint8_t * p,unsigned int len)3096 scan_modifiers(const uint8_t *p, unsigned int len)
3097 {
3098 int bot = 0;
3099 int top = MODLISTCOUNT;
3100
3101 while (top > bot)
3102 {
3103 int mid = (bot + top)/2;
3104 unsigned int mlen = strlen(modlist[mid].name);
3105 int c = strncmp((char *)p, modlist[mid].name, (len < mlen)? len : mlen);
3106 if (c == 0)
3107 {
3108 if (len == mlen) return mid;
3109 c = (int)len - (int)mlen;
3110 }
3111 if (c > 0) bot = mid + 1; else top = mid;
3112 }
3113
3114 return -1;
3115
3116 }
3117
3118
3119
3120 /*************************************************
3121 * Check a modifer and find its field *
3122 *************************************************/
3123
3124 /* This function is called when a modifier has been identified. We check that
3125 it is allowed here and find the field that is to be changed.
3126
3127 Arguments:
3128 m the modifier list entry
3129 ctx CTX_PAT => pattern context
3130 CTX_POPPAT => pattern context for popped pattern
3131 CTX_DEFPAT => default pattern context
3132 CTX_DAT => data context
3133 CTX_DEFDAT => default data context
3134 pctl point to pattern control block
3135 dctl point to data control block
3136 c a single character or 0
3137
3138 Returns: a field pointer or NULL
3139 */
3140
3141 static void *
check_modifier(modstruct * m,int ctx,patctl * pctl,datctl * dctl,uint32_t c)3142 check_modifier(modstruct *m, int ctx, patctl *pctl, datctl *dctl, uint32_t c)
3143 {
3144 void *field = NULL;
3145 PCRE2_SIZE offset = m->offset;
3146
3147 if (restrict_for_perl_test) switch(m->which)
3148 {
3149 case MOD_PNDP:
3150 case MOD_PATP:
3151 case MOD_PDP:
3152 break;
3153
3154 default:
3155 fprintf(outfile, "** '%s' is not allowed in a Perl-compatible test\n",
3156 m->name);
3157 return NULL;
3158 }
3159
3160 switch (m->which)
3161 {
3162 case MOD_CTC: /* Compile context modifier */
3163 if (ctx == CTX_DEFPAT) field = PTR(default_pat_context);
3164 else if (ctx == CTX_PAT) field = PTR(pat_context);
3165 break;
3166
3167 case MOD_CTM: /* Match context modifier */
3168 if (ctx == CTX_DEFDAT) field = PTR(default_dat_context);
3169 else if (ctx == CTX_DAT) field = PTR(dat_context);
3170 break;
3171
3172 case MOD_DAT: /* Data line modifier */
3173 if (dctl != NULL) field = dctl;
3174 break;
3175
3176 case MOD_PAT: /* Pattern modifier */
3177 case MOD_PATP: /* Allowed for Perl test */
3178 if (pctl != NULL) field = pctl;
3179 break;
3180
3181 case MOD_PD: /* Pattern or data line modifier */
3182 case MOD_PDP: /* Ditto, allowed for Perl test */
3183 case MOD_PND: /* Ditto, but not default pattern */
3184 case MOD_PNDP: /* Ditto, allowed for Perl test */
3185 if (dctl != NULL) field = dctl;
3186 else if (pctl != NULL && (m->which == MOD_PD || ctx != CTX_DEFPAT))
3187 field = pctl;
3188 break;
3189 }
3190
3191 if (field == NULL)
3192 {
3193 if (c == 0)
3194 fprintf(outfile, "** '%s' is not valid here\n", m->name);
3195 else
3196 fprintf(outfile, "** /%c is not valid here\n", c);
3197 return NULL;
3198 }
3199
3200 return (char *)field + offset;
3201 }
3202
3203
3204
3205 /*************************************************
3206 * Decode a modifier list *
3207 *************************************************/
3208
3209 /* A pointer to a control block is NULL when called in cases when that block is
3210 not relevant. They are never all relevant in one call. At least one of patctl
3211 and datctl is NULL. The second argument specifies which context to use for
3212 modifiers that apply to contexts.
3213
3214 Arguments:
3215 p point to modifier string
3216 ctx CTX_PAT => pattern context
3217 CTX_POPPAT => pattern context for popped pattern
3218 CTX_DEFPAT => default pattern context
3219 CTX_DAT => data context
3220 CTX_DEFDAT => default data context
3221 pctl point to pattern control block
3222 dctl point to data control block
3223
3224 Returns: TRUE if successful decode, FALSE otherwise
3225 */
3226
3227 static BOOL
decode_modifiers(uint8_t * p,int ctx,patctl * pctl,datctl * dctl)3228 decode_modifiers(uint8_t *p, int ctx, patctl *pctl, datctl *dctl)
3229 {
3230 uint8_t *ep, *pp;
3231 long li;
3232 unsigned long uli;
3233 BOOL first = TRUE;
3234
3235 for (;;)
3236 {
3237 void *field;
3238 modstruct *m;
3239 BOOL off = FALSE;
3240 unsigned int i, len;
3241 int index;
3242 char *endptr;
3243
3244 /* Skip white space and commas. */
3245
3246 while (isspace(*p) || *p == ',') p++;
3247 if (*p == 0) break;
3248
3249 /* Find the end of the item; lose trailing whitespace at end of line. */
3250
3251 for (ep = p; *ep != 0 && *ep != ','; ep++);
3252 if (*ep == 0)
3253 {
3254 while (ep > p && isspace(ep[-1])) ep--;
3255 *ep = 0;
3256 }
3257
3258 /* Remember if the first character is '-'. */
3259
3260 if (*p == '-')
3261 {
3262 off = TRUE;
3263 p++;
3264 }
3265
3266 /* Find the length of a full-length modifier name, and scan for it. */
3267
3268 pp = p;
3269 while (pp < ep && *pp != '=') pp++;
3270 index = scan_modifiers(p, pp - p);
3271
3272 /* If the first modifier is unrecognized, try to interpret it as a sequence
3273 of single-character abbreviated modifiers. None of these modifiers have any
3274 associated data. They just set options or control bits. */
3275
3276 if (index < 0)
3277 {
3278 uint32_t cc;
3279 uint8_t *mp = p;
3280
3281 if (!first)
3282 {
3283 fprintf(outfile, "** Unrecognized modifier '%.*s'\n", (int)(ep-p), p);
3284 if (ep - p == 1)
3285 fprintf(outfile, "** Single-character modifiers must come first\n");
3286 return FALSE;
3287 }
3288
3289 for (cc = *p; cc != ',' && cc != '\n' && cc != 0; cc = *(++p))
3290 {
3291 for (i = 0; i < C1MODLISTCOUNT; i++)
3292 if (cc == c1modlist[i].onechar) break;
3293
3294 if (i >= C1MODLISTCOUNT)
3295 {
3296 fprintf(outfile, "** Unrecognized modifier '%c' in '%.*s'\n",
3297 *p, (int)(ep-mp), mp);
3298 return FALSE;
3299 }
3300
3301 if (c1modlist[i].index >= 0)
3302 {
3303 index = c1modlist[i].index;
3304 }
3305
3306 else
3307 {
3308 index = scan_modifiers((uint8_t *)(c1modlist[i].fullname),
3309 strlen(c1modlist[i].fullname));
3310 if (index < 0)
3311 {
3312 fprintf(outfile, "** Internal error: single-character equivalent "
3313 "modifier '%s' not found\n", c1modlist[i].fullname);
3314 return FALSE;
3315 }
3316 c1modlist[i].index = index; /* Cache for next time */
3317 }
3318
3319 field = check_modifier(modlist + index, ctx, pctl, dctl, *p);
3320 if (field == NULL) return FALSE;
3321 *((uint32_t *)field) |= modlist[index].value;
3322 }
3323
3324 continue; /* With tne next (fullname) modifier */
3325 }
3326
3327 /* We have a match on a full-name modifier. Check for the existence of data
3328 when needed. */
3329
3330 m = modlist + index; /* Save typing */
3331 if (m->type != MOD_CTL && m->type != MOD_OPT &&
3332 (m->type != MOD_IND || *pp == '='))
3333 {
3334 if (*pp++ != '=')
3335 {
3336 fprintf(outfile, "** '=' expected after '%s'\n", m->name);
3337 return FALSE;
3338 }
3339 if (off)
3340 {
3341 fprintf(outfile, "** '-' is not valid for '%s'\n", m->name);
3342 return FALSE;
3343 }
3344 }
3345
3346 /* These on/off types have no data. */
3347
3348 else if (*pp != ',' && *pp != '\n' && *pp != ' ' && *pp != 0)
3349 {
3350 fprintf(outfile, "** Unrecognized modifier '%.*s'\n", (int)(ep-p), p);
3351 return FALSE;
3352 }
3353
3354 /* Set the data length for those types that have data. Then find the field
3355 that is to be set. If check_modifier() returns NULL, it has already output an
3356 error message. */
3357
3358 len = ep - pp;
3359 field = check_modifier(m, ctx, pctl, dctl, 0);
3360 if (field == NULL) return FALSE;
3361
3362 /* Process according to data type. */
3363
3364 switch (m->type)
3365 {
3366 case MOD_CTL:
3367 case MOD_OPT:
3368 if (off) *((uint32_t *)field) &= ~m->value;
3369 else *((uint32_t *)field) |= m->value;
3370 break;
3371
3372 case MOD_BSR:
3373 if (len == 7 && strncmpic(pp, (const uint8_t *)"default", 7) == 0)
3374 {
3375 #ifdef BSR_ANYCRLF
3376 *((uint16_t *)field) = PCRE2_BSR_ANYCRLF;
3377 #else
3378 *((uint16_t *)field) = PCRE2_BSR_UNICODE;
3379 #endif
3380 if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control &= ~CTL_BSR_SET;
3381 else dctl->control &= ~CTL_BSR_SET;
3382 }
3383 else
3384 {
3385 if (len == 7 && strncmpic(pp, (const uint8_t *)"anycrlf", 7) == 0)
3386 *((uint16_t *)field) = PCRE2_BSR_ANYCRLF;
3387 else if (len == 7 && strncmpic(pp, (const uint8_t *)"unicode", 7) == 0)
3388 *((uint16_t *)field) = PCRE2_BSR_UNICODE;
3389 else goto INVALID_VALUE;
3390 if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control |= CTL_BSR_SET;
3391 else dctl->control |= CTL_BSR_SET;
3392 }
3393 pp = ep;
3394 break;
3395
3396 case MOD_IN2: /* One or two unsigned integers */
3397 if (!isdigit(*pp)) goto INVALID_VALUE;
3398 uli = strtoul((const char *)pp, &endptr, 10);
3399 if (U32OVERFLOW(uli)) goto INVALID_VALUE;
3400 ((uint32_t *)field)[0] = (uint32_t)uli;
3401 if (*endptr == ':')
3402 {
3403 uli = strtoul((const char *)endptr+1, &endptr, 10);
3404 if (U32OVERFLOW(uli)) goto INVALID_VALUE;
3405 ((uint32_t *)field)[1] = (uint32_t)uli;
3406 }
3407 else ((uint32_t *)field)[1] = 0;
3408 pp = (uint8_t *)endptr;
3409 break;
3410
3411 /* PCRE2_SIZE_MAX is usually SIZE_MAX, which may be greater, equal to, or
3412 less than ULONG_MAX. So first test for overflowing the long int, and then
3413 test for overflowing PCRE2_SIZE_MAX if it is smaller than ULONG_MAX. */
3414
3415 case MOD_SIZ: /* PCRE2_SIZE value */
3416 if (!isdigit(*pp)) goto INVALID_VALUE;
3417 uli = strtoul((const char *)pp, &endptr, 10);
3418 if (uli == ULONG_MAX) goto INVALID_VALUE;
3419 #if ULONG_MAX > PCRE2_SIZE_MAX
3420 if (uli > PCRE2_SIZE_MAX) goto INVALID_VALUE;
3421 #endif
3422 *((PCRE2_SIZE *)field) = (PCRE2_SIZE)uli;
3423 pp = (uint8_t *)endptr;
3424 break;
3425
3426 case MOD_IND: /* Unsigned integer with default */
3427 if (len == 0)
3428 {
3429 *((uint32_t *)field) = (uint32_t)(m->value);
3430 break;
3431 }
3432 /* Fall through */
3433
3434 case MOD_INT: /* Unsigned integer */
3435 if (!isdigit(*pp)) goto INVALID_VALUE;
3436 uli = strtoul((const char *)pp, &endptr, 10);
3437 if (U32OVERFLOW(uli)) goto INVALID_VALUE;
3438 *((uint32_t *)field) = (uint32_t)uli;
3439 pp = (uint8_t *)endptr;
3440 break;
3441
3442 case MOD_INS: /* Signed integer */
3443 if (!isdigit(*pp) && *pp != '-') goto INVALID_VALUE;
3444 li = strtol((const char *)pp, &endptr, 10);
3445 if (S32OVERFLOW(li)) goto INVALID_VALUE;
3446 *((int32_t *)field) = (int32_t)li;
3447 pp = (uint8_t *)endptr;
3448 break;
3449
3450 case MOD_NL:
3451 for (i = 0; i < sizeof(newlines)/sizeof(char *); i++)
3452 if (len == strlen(newlines[i]) &&
3453 strncmpic(pp, (const uint8_t *)newlines[i], len) == 0) break;
3454 if (i >= sizeof(newlines)/sizeof(char *)) goto INVALID_VALUE;
3455 if (i == 0)
3456 {
3457 *((uint16_t *)field) = NEWLINE_DEFAULT;
3458 if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control &= ~CTL_NL_SET;
3459 else dctl->control &= ~CTL_NL_SET;
3460 }
3461 else
3462 {
3463 *((uint16_t *)field) = i;
3464 if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control |= CTL_NL_SET;
3465 else dctl->control |= CTL_NL_SET;
3466 }
3467 pp = ep;
3468 break;
3469
3470 case MOD_NN: /* Name or (signed) number; may be several */
3471 if (isdigit(*pp) || *pp == '-')
3472 {
3473 int ct = MAXCPYGET - 1;
3474 int32_t value;
3475 li = strtol((const char *)pp, &endptr, 10);
3476 if (S32OVERFLOW(li)) goto INVALID_VALUE;
3477 value = (int32_t)li;
3478 field = (char *)field - m->offset + m->value; /* Adjust field ptr */
3479 if (value >= 0) /* Add new number */
3480 {
3481 while (*((int32_t *)field) >= 0 && ct-- > 0) /* Skip previous */
3482 field = (char *)field + sizeof(int32_t);
3483 if (ct <= 0)
3484 {
3485 fprintf(outfile, "** Too many numeric '%s' modifiers\n", m->name);
3486 return FALSE;
3487 }
3488 }
3489 *((int32_t *)field) = value;
3490 if (ct > 0) ((int32_t *)field)[1] = -1;
3491 pp = (uint8_t *)endptr;
3492 }
3493
3494 /* Multiple strings are put end to end. */
3495
3496 else
3497 {
3498 char *nn = (char *)field;
3499 if (len > 0) /* Add new name */
3500 {
3501 while (*nn != 0) nn += strlen(nn) + 1;
3502 if (nn + len + 1 - (char *)field > LENCPYGET)
3503 {
3504 fprintf(outfile, "** Too many named '%s' modifiers\n", m->name);
3505 return FALSE;
3506 }
3507 memcpy(nn, pp, len);
3508 }
3509 nn[len] = 0 ;
3510 nn[len+1] = 0;
3511 pp = ep;
3512 }
3513 break;
3514
3515 case MOD_STR:
3516 if (len + 1 > m->value)
3517 {
3518 fprintf(outfile, "** Overlong value for '%s' (max %d code units)\n",
3519 m->name, m->value - 1);
3520 return FALSE;
3521 }
3522 memcpy(field, pp, len);
3523 ((uint8_t *)field)[len] = 0;
3524 pp = ep;
3525 break;
3526 }
3527
3528 if (*pp != ',' && *pp != '\n' && *pp != ' ' && *pp != 0)
3529 {
3530 fprintf(outfile, "** Comma expected after modifier item '%s'\n", m->name);
3531 return FALSE;
3532 }
3533
3534 p = pp;
3535 first = FALSE;
3536
3537 if (ctx == CTX_POPPAT &&
3538 (pctl->options != 0 ||
3539 pctl->tables_id != 0 ||
3540 pctl->locale[0] != 0 ||
3541 (pctl->control & NOTPOP_CONTROLS) != 0))
3542 {
3543 fprintf(outfile, "** '%s' is not valid here\n", m->name);
3544 return FALSE;
3545 }
3546 }
3547
3548 return TRUE;
3549
3550 INVALID_VALUE:
3551 fprintf(outfile, "** Invalid value in '%.*s'\n", (int)(ep-p), p);
3552 return FALSE;
3553 }
3554
3555
3556 /*************************************************
3557 * Get info from a pattern *
3558 *************************************************/
3559
3560 /* A wrapped call to pcre2_pattern_info(), applied to the current compiled
3561 pattern.
3562
3563 Arguments:
3564 what code for the required information
3565 where where to put the answer
3566 unsetok PCRE2_ERROR_UNSET is an "expected" result
3567
3568 Returns: the return from pcre2_pattern_info()
3569 */
3570
3571 static int
pattern_info(int what,void * where,BOOL unsetok)3572 pattern_info(int what, void *where, BOOL unsetok)
3573 {
3574 int rc;
3575 PCRE2_PATTERN_INFO(rc, compiled_code, what, where);
3576 if (rc >= 0) return 0;
3577 if (rc != PCRE2_ERROR_UNSET || !unsetok)
3578 {
3579 fprintf(outfile, "Error %d from pcre2_pattern_info_%d(%d)\n", rc, test_mode,
3580 what);
3581 if (rc == PCRE2_ERROR_BADMODE)
3582 fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
3583 "%d-bit mode\n", test_mode,
3584 8 * (FLD(compiled_code, flags) & PCRE2_MODE_MASK));
3585 }
3586 return rc;
3587 }
3588
3589
3590
3591 #ifdef SUPPORT_PCRE2_8
3592 /*************************************************
3593 * Show something in a list *
3594 *************************************************/
3595
3596 /* This function just helps to keep the code that uses it tidier. It's used for
3597 various lists of things where there needs to be introductory text before the
3598 first item. As these calls are all in the POSIX-support code, they happen only
3599 when 8-bit mode is supported. */
3600
3601 static void
prmsg(const char ** msg,const char * s)3602 prmsg(const char **msg, const char *s)
3603 {
3604 fprintf(outfile, "%s %s", *msg, s);
3605 *msg = "";
3606 }
3607 #endif /* SUPPORT_PCRE2_8 */
3608
3609
3610
3611 /*************************************************
3612 * Show control bits *
3613 *************************************************/
3614
3615 /* Called for mutually exclusive controls and for unsupported POSIX controls.
3616 Because the bits are unique, this can be used for both pattern and data control
3617 words.
3618
3619 Arguments:
3620 controls control bits
3621 controls2 more control bits
3622 before text to print before
3623
3624 Returns: nothing
3625 */
3626
3627 static void
show_controls(uint32_t controls,uint32_t controls2,const char * before)3628 show_controls(uint32_t controls, uint32_t controls2, const char *before)
3629 {
3630 fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
3631 before,
3632 ((controls & CTL_AFTERTEXT) != 0)? " aftertext" : "",
3633 ((controls & CTL_ALLAFTERTEXT) != 0)? " allaftertext" : "",
3634 ((controls & CTL_ALLCAPTURES) != 0)? " allcaptures" : "",
3635 ((controls & CTL_ALLUSEDTEXT) != 0)? " allusedtext" : "",
3636 ((controls & CTL_ALTGLOBAL) != 0)? " altglobal" : "",
3637 ((controls & CTL_BINCODE) != 0)? " bincode" : "",
3638 ((controls & CTL_BSR_SET) != 0)? " bsr" : "",
3639 ((controls & CTL_CALLOUT_CAPTURE) != 0)? " callout_capture" : "",
3640 ((controls & CTL_CALLOUT_INFO) != 0)? " callout_info" : "",
3641 ((controls & CTL_CALLOUT_NONE) != 0)? " callout_none" : "",
3642 ((controls & CTL_DFA) != 0)? " dfa" : "",
3643 ((controls & CTL_EXPAND) != 0)? " expand" : "",
3644 ((controls & CTL_FINDLIMITS) != 0)? " find_limits" : "",
3645 ((controls & CTL_FULLBINCODE) != 0)? " fullbincode" : "",
3646 ((controls & CTL_GETALL) != 0)? " getall" : "",
3647 ((controls & CTL_GLOBAL) != 0)? " global" : "",
3648 ((controls & CTL_HEXPAT) != 0)? " hex" : "",
3649 ((controls & CTL_INFO) != 0)? " info" : "",
3650 ((controls & CTL_JITFAST) != 0)? " jitfast" : "",
3651 ((controls & CTL_JITVERIFY) != 0)? " jitverify" : "",
3652 ((controls & CTL_MARK) != 0)? " mark" : "",
3653 ((controls & CTL_MEMORY) != 0)? " memory" : "",
3654 ((controls & CTL_NL_SET) != 0)? " newline" : "",
3655 ((controls & CTL_NULLCONTEXT) != 0)? " null_context" : "",
3656 ((controls & CTL_POSIX) != 0)? " posix" : "",
3657 ((controls & CTL_POSIX_NOSUB) != 0)? " posix_nosub" : "",
3658 ((controls & CTL_PUSH) != 0)? " push" : "",
3659 ((controls & CTL_PUSHCOPY) != 0)? " pushcopy" : "",
3660 ((controls & CTL_STARTCHAR) != 0)? " startchar" : "",
3661 ((controls2 & CTL2_SUBSTITUTE_EXTENDED) != 0)? " substitute_extended" : "",
3662 ((controls2 & CTL2_SUBSTITUTE_OVERFLOW_LENGTH) != 0)? " substitute_overflow_length" : "",
3663 ((controls2 & CTL2_SUBSTITUTE_UNKNOWN_UNSET) != 0)? " substitute_unknown_unset" : "",
3664 ((controls2 & CTL2_SUBSTITUTE_UNSET_EMPTY) != 0)? " substitute_unset_empty" : "",
3665 ((controls & CTL_ZERO_TERMINATE) != 0)? " zero_terminate" : "");
3666 }
3667
3668
3669
3670 /*************************************************
3671 * Show compile options *
3672 *************************************************/
3673
3674 /* Called from show_pattern_info() and for unsupported POSIX options.
3675
3676 Arguments:
3677 options an options word
3678 before text to print before
3679 after text to print after
3680
3681 Returns: nothing
3682 */
3683
3684 static void
show_compile_options(uint32_t options,const char * before,const char * after)3685 show_compile_options(uint32_t options, const char *before, const char *after)
3686 {
3687 if (options == 0) fprintf(outfile, "%s <none>%s", before, after);
3688 else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
3689 before,
3690 ((options & PCRE2_ALT_BSUX) != 0)? " alt_bsux" : "",
3691 ((options & PCRE2_ALT_CIRCUMFLEX) != 0)? " alt_circumflex" : "",
3692 ((options & PCRE2_ALT_VERBNAMES) != 0)? " alt_verbnames" : "",
3693 ((options & PCRE2_ALLOW_EMPTY_CLASS) != 0)? " allow_empty_class" : "",
3694 ((options & PCRE2_ANCHORED) != 0)? " anchored" : "",
3695 ((options & PCRE2_AUTO_CALLOUT) != 0)? " auto_callout" : "",
3696 ((options & PCRE2_CASELESS) != 0)? " caseless" : "",
3697 ((options & PCRE2_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
3698 ((options & PCRE2_DOTALL) != 0)? " dotall" : "",
3699 ((options & PCRE2_DUPNAMES) != 0)? " dupnames" : "",
3700 ((options & PCRE2_EXTENDED) != 0)? " extended" : "",
3701 ((options & PCRE2_FIRSTLINE) != 0)? " firstline" : "",
3702 ((options & PCRE2_MATCH_UNSET_BACKREF) != 0)? " match_unset_backref" : "",
3703 ((options & PCRE2_MULTILINE) != 0)? " multiline" : "",
3704 ((options & PCRE2_NEVER_BACKSLASH_C) != 0)? " never_backslash_c" : "",
3705 ((options & PCRE2_NEVER_UCP) != 0)? " never_ucp" : "",
3706 ((options & PCRE2_NEVER_UTF) != 0)? " never_utf" : "",
3707 ((options & PCRE2_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3708 ((options & PCRE2_NO_AUTO_POSSESS) != 0)? " no_auto_possess" : "",
3709 ((options & PCRE2_NO_DOTSTAR_ANCHOR) != 0)? " no_dotstar_anchor" : "",
3710 ((options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
3711 ((options & PCRE2_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3712 ((options & PCRE2_UCP) != 0)? " ucp" : "",
3713 ((options & PCRE2_UNGREEDY) != 0)? " ungreedy" : "",
3714 ((options & PCRE2_USE_OFFSET_LIMIT) != 0)? " use_offset_limit" : "",
3715 ((options & PCRE2_UTF) != 0)? " utf" : "",
3716 after);
3717 }
3718
3719
3720
3721 #ifdef SUPPORT_PCRE2_8
3722 /*************************************************
3723 * Show match options *
3724 *************************************************/
3725
3726 /* Called for unsupported POSIX options. */
3727
3728 static void
show_match_options(uint32_t options)3729 show_match_options(uint32_t options)
3730 {
3731 fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s",
3732 ((options & PCRE2_ANCHORED) != 0)? " anchored" : "",
3733 ((options & PCRE2_DFA_RESTART) != 0)? " dfa_restart" : "",
3734 ((options & PCRE2_DFA_SHORTEST) != 0)? " dfa_shortest" : "",
3735 ((options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
3736 ((options & PCRE2_NOTBOL) != 0)? " notbol" : "",
3737 ((options & PCRE2_NOTEMPTY) != 0)? " notempty" : "",
3738 ((options & PCRE2_NOTEMPTY_ATSTART) != 0)? " notempty_atstart" : "",
3739 ((options & PCRE2_NOTEOL) != 0)? " noteol" : "",
3740 ((options & PCRE2_PARTIAL_HARD) != 0)? " partial_hard" : "",
3741 ((options & PCRE2_PARTIAL_SOFT) != 0)? " partial_soft" : "");
3742 }
3743 #endif /* SUPPORT_PCRE2_8 */
3744
3745
3746
3747 /*************************************************
3748 * Show memory usage info for a pattern *
3749 *************************************************/
3750
3751 static void
show_memory_info(void)3752 show_memory_info(void)
3753 {
3754 uint32_t name_count, name_entry_size;
3755 size_t size, cblock_size;
3756
3757 /* One of the test_mode values will always be true, but to stop a compiler
3758 warning we must initialize cblock_size. */
3759
3760 cblock_size = 0;
3761 #ifdef SUPPORT_PCRE2_8
3762 if (test_mode == 8) cblock_size = sizeof(pcre2_real_code_8);
3763 #endif
3764 #ifdef SUPPORT_PCRE2_16
3765 if (test_mode == 16) cblock_size = sizeof(pcre2_real_code_16);
3766 #endif
3767 #ifdef SUPPORT_PCRE2_32
3768 if (test_mode == 32) cblock_size = sizeof(pcre2_real_code_32);
3769 #endif
3770
3771 (void)pattern_info(PCRE2_INFO_SIZE, &size, FALSE);
3772 (void)pattern_info(PCRE2_INFO_NAMECOUNT, &name_count, FALSE);
3773 (void)pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &name_entry_size, FALSE);
3774 fprintf(outfile, "Memory allocation (code space): %d\n",
3775 (int)(size - name_count*name_entry_size*code_unit_size - cblock_size));
3776 if (pat_patctl.jit != 0)
3777 {
3778 (void)pattern_info(PCRE2_INFO_JITSIZE, &size, FALSE);
3779 fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)size);
3780 }
3781 }
3782
3783
3784
3785 /*************************************************
3786 * Callback function for callout enumeration *
3787 *************************************************/
3788
3789 /* The only differences in the callout emumeration block for different code
3790 unit widths are that the pointers to the subject, the most recent MARK, and a
3791 callout argument string point to strings of the appropriate width. Casts can be
3792 used to deal with this.
3793
3794 Argument:
3795 cb pointer to enumerate block
3796 callout_data user data
3797
3798 Returns: 0
3799 */
3800
callout_callback(pcre2_callout_enumerate_block_8 * cb,void * callout_data)3801 static int callout_callback(pcre2_callout_enumerate_block_8 *cb,
3802 void *callout_data)
3803 {
3804 uint32_t i;
3805 BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
3806
3807 (void)callout_data; /* Not currently displayed */
3808
3809 fprintf(outfile, "Callout ");
3810 if (cb->callout_string != NULL)
3811 {
3812 uint32_t delimiter = CODE_UNIT(cb->callout_string, -1);
3813 fprintf(outfile, "%c", delimiter);
3814 PCHARSV(cb->callout_string, 0,
3815 cb->callout_string_length, utf, outfile);
3816 for (i = 0; callout_start_delims[i] != 0; i++)
3817 if (delimiter == callout_start_delims[i])
3818 {
3819 delimiter = callout_end_delims[i];
3820 break;
3821 }
3822 fprintf(outfile, "%c ", delimiter);
3823 }
3824 else fprintf(outfile, "%d ", cb->callout_number);
3825
3826 fprintf(outfile, "%.*s\n",
3827 (int)((cb->next_item_length == 0)? 1 : cb->next_item_length),
3828 pbuffer8 + cb->pattern_position);
3829
3830 return 0;
3831 }
3832
3833
3834
3835 /*************************************************
3836 * Show information about a pattern *
3837 *************************************************/
3838
3839 /* This function is called after a pattern has been compiled if any of the
3840 information-requesting controls have been set.
3841
3842 Arguments: none
3843
3844 Returns: PR_OK continue processing next line
3845 PR_SKIP skip to a blank line
3846 PR_ABEND abort the pcre2test run
3847 */
3848
3849 static int
show_pattern_info(void)3850 show_pattern_info(void)
3851 {
3852 uint32_t compile_options, overall_options;
3853
3854 if ((pat_patctl.control & (CTL_BINCODE|CTL_FULLBINCODE)) != 0)
3855 {
3856 fprintf(outfile, "------------------------------------------------------------------\n");
3857 PCRE2_PRINTINT((pat_patctl.control & CTL_FULLBINCODE) != 0);
3858 }
3859
3860 if ((pat_patctl.control & CTL_INFO) != 0)
3861 {
3862 void *nametable;
3863 uint8_t *start_bits;
3864 BOOL match_limit_set, recursion_limit_set;
3865 uint32_t backrefmax, bsr_convention, capture_count, first_ctype, first_cunit,
3866 hasbackslashc, hascrorlf, jchanged, last_ctype, last_cunit, match_empty,
3867 match_limit, minlength, nameentrysize, namecount, newline_convention,
3868 recursion_limit;
3869
3870 /* These info requests may return PCRE2_ERROR_UNSET. */
3871
3872 switch(pattern_info(PCRE2_INFO_MATCHLIMIT, &match_limit, TRUE))
3873 {
3874 case 0:
3875 match_limit_set = TRUE;
3876 break;
3877
3878 case PCRE2_ERROR_UNSET:
3879 match_limit_set = FALSE;
3880 break;
3881
3882 default:
3883 return PR_ABEND;
3884 }
3885
3886 switch(pattern_info(PCRE2_INFO_RECURSIONLIMIT, &recursion_limit, TRUE))
3887 {
3888 case 0:
3889 recursion_limit_set = TRUE;
3890 break;
3891
3892 case PCRE2_ERROR_UNSET:
3893 recursion_limit_set = FALSE;
3894 break;
3895
3896 default:
3897 return PR_ABEND;
3898 }
3899
3900 /* These info requests should always succeed. */
3901
3902 if (pattern_info(PCRE2_INFO_BACKREFMAX, &backrefmax, FALSE) +
3903 pattern_info(PCRE2_INFO_BSR, &bsr_convention, FALSE) +
3904 pattern_info(PCRE2_INFO_CAPTURECOUNT, &capture_count, FALSE) +
3905 pattern_info(PCRE2_INFO_FIRSTBITMAP, &start_bits, FALSE) +
3906 pattern_info(PCRE2_INFO_FIRSTCODEUNIT, &first_cunit, FALSE) +
3907 pattern_info(PCRE2_INFO_FIRSTCODETYPE, &first_ctype, FALSE) +
3908 pattern_info(PCRE2_INFO_HASBACKSLASHC, &hasbackslashc, FALSE) +
3909 pattern_info(PCRE2_INFO_HASCRORLF, &hascrorlf, FALSE) +
3910 pattern_info(PCRE2_INFO_JCHANGED, &jchanged, FALSE) +
3911 pattern_info(PCRE2_INFO_LASTCODEUNIT, &last_cunit, FALSE) +
3912 pattern_info(PCRE2_INFO_LASTCODETYPE, &last_ctype, FALSE) +
3913 pattern_info(PCRE2_INFO_MATCHEMPTY, &match_empty, FALSE) +
3914 pattern_info(PCRE2_INFO_MINLENGTH, &minlength, FALSE) +
3915 pattern_info(PCRE2_INFO_NAMECOUNT, &namecount, FALSE) +
3916 pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &nameentrysize, FALSE) +
3917 pattern_info(PCRE2_INFO_NAMETABLE, &nametable, FALSE) +
3918 pattern_info(PCRE2_INFO_NEWLINE, &newline_convention, FALSE)
3919 != 0)
3920 return PR_ABEND;
3921
3922 fprintf(outfile, "Capturing subpattern count = %d\n", capture_count);
3923
3924 if (backrefmax > 0)
3925 fprintf(outfile, "Max back reference = %d\n", backrefmax);
3926
3927 if (maxlookbehind > 0)
3928 fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
3929
3930 if (match_limit_set)
3931 fprintf(outfile, "Match limit = %u\n", match_limit);
3932
3933 if (recursion_limit_set)
3934 fprintf(outfile, "Recursion limit = %u\n", recursion_limit);
3935
3936 if (namecount > 0)
3937 {
3938 fprintf(outfile, "Named capturing subpatterns:\n");
3939 for (; namecount > 0; namecount--)
3940 {
3941 int imm2_size = test_mode == PCRE8_MODE ? 2 : 1;
3942 uint32_t length = (uint32_t)STRLEN(nametable + imm2_size);
3943 fprintf(outfile, " ");
3944 PCHARSV(nametable, imm2_size, length, FALSE, outfile);
3945 while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3946 #ifdef SUPPORT_PCRE2_32
3947 if (test_mode == PCRE32_MODE)
3948 fprintf(outfile, "%3d\n", (int)(((PCRE2_SPTR32)nametable)[0]));
3949 #endif
3950 #ifdef SUPPORT_PCRE2_16
3951 if (test_mode == PCRE16_MODE)
3952 fprintf(outfile, "%3d\n", (int)(((PCRE2_SPTR16)nametable)[0]));
3953 #endif
3954 #ifdef SUPPORT_PCRE2_8
3955 if (test_mode == PCRE8_MODE)
3956 fprintf(outfile, "%3d\n", (int)(
3957 ((((PCRE2_SPTR8)nametable)[0]) << 8) | ((PCRE2_SPTR8)nametable)[1]));
3958 #endif
3959 nametable = (void*)((PCRE2_SPTR8)nametable + nameentrysize * code_unit_size);
3960 }
3961 }
3962
3963 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3964 if (hasbackslashc) fprintf(outfile, "Contains \\C\n");
3965 if (match_empty) fprintf(outfile, "May match empty string\n");
3966
3967 pattern_info(PCRE2_INFO_ARGOPTIONS, &compile_options, FALSE);
3968 pattern_info(PCRE2_INFO_ALLOPTIONS, &overall_options, FALSE);
3969
3970 /* Remove UTF/UCP if they were there only because of forbid_utf. This saves
3971 cluttering up the verification output of non-UTF test files. */
3972
3973 if ((pat_patctl.options & PCRE2_NEVER_UTF) == 0)
3974 {
3975 compile_options &= ~PCRE2_NEVER_UTF;
3976 overall_options &= ~PCRE2_NEVER_UTF;
3977 }
3978
3979 if ((pat_patctl.options & PCRE2_NEVER_UCP) == 0)
3980 {
3981 compile_options &= ~PCRE2_NEVER_UCP;
3982 overall_options &= ~PCRE2_NEVER_UCP;
3983 }
3984
3985 if ((compile_options|overall_options) != 0)
3986 {
3987 if (compile_options == overall_options)
3988 show_compile_options(compile_options, "Options:", "\n");
3989 else
3990 {
3991 show_compile_options(compile_options, "Compile options:", "\n");
3992 show_compile_options(overall_options, "Overall options:", "\n");
3993 }
3994 }
3995
3996 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
3997
3998 if ((pat_patctl.control & CTL_BSR_SET) != 0 ||
3999 (FLD(compiled_code, flags) & PCRE2_BSR_SET) != 0)
4000 fprintf(outfile, "\\R matches %s\n", (bsr_convention == PCRE2_BSR_UNICODE)?
4001 "any Unicode newline" : "CR, LF, or CRLF");
4002
4003 if ((FLD(compiled_code, flags) & PCRE2_NL_SET) != 0)
4004 {
4005 switch (newline_convention)
4006 {
4007 case PCRE2_NEWLINE_CR:
4008 fprintf(outfile, "Forced newline is CR\n");
4009 break;
4010
4011 case PCRE2_NEWLINE_LF:
4012 fprintf(outfile, "Forced newline is LF\n");
4013 break;
4014
4015 case PCRE2_NEWLINE_CRLF:
4016 fprintf(outfile, "Forced newline is CRLF\n");
4017 break;
4018
4019 case PCRE2_NEWLINE_ANYCRLF:
4020 fprintf(outfile, "Forced newline is CR, LF, or CRLF\n");
4021 break;
4022
4023 case PCRE2_NEWLINE_ANY:
4024 fprintf(outfile, "Forced newline is any Unicode newline\n");
4025 break;
4026
4027 default:
4028 break;
4029 }
4030 }
4031
4032 if (first_ctype == 2)
4033 {
4034 fprintf(outfile, "First code unit at start or follows newline\n");
4035 }
4036 else if (first_ctype == 1)
4037 {
4038 const char *caseless =
4039 ((FLD(compiled_code, flags) & PCRE2_FIRSTCASELESS) == 0)?
4040 "" : " (caseless)";
4041 if (PRINTOK(first_cunit))
4042 fprintf(outfile, "First code unit = \'%c\'%s\n", first_cunit, caseless);
4043 else
4044 {
4045 fprintf(outfile, "First code unit = ");
4046 pchar(first_cunit, FALSE, outfile);
4047 fprintf(outfile, "%s\n", caseless);
4048 }
4049 }
4050 else if (start_bits != NULL)
4051 {
4052 int i;
4053 int c = 24;
4054 fprintf(outfile, "Starting code units: ");
4055 for (i = 0; i < 256; i++)
4056 {
4057 if ((start_bits[i/8] & (1<<(i&7))) != 0)
4058 {
4059 if (c > 75)
4060 {
4061 fprintf(outfile, "\n ");
4062 c = 2;
4063 }
4064 if (PRINTOK(i) && i != ' ')
4065 {
4066 fprintf(outfile, "%c ", i);
4067 c += 2;
4068 }
4069 else
4070 {
4071 fprintf(outfile, "\\x%02x ", i);
4072 c += 5;
4073 }
4074 }
4075 }
4076 fprintf(outfile, "\n");
4077 }
4078
4079 if (last_ctype != 0)
4080 {
4081 const char *caseless =
4082 ((FLD(compiled_code, flags) & PCRE2_LASTCASELESS) == 0)?
4083 "" : " (caseless)";
4084 if (PRINTOK(last_cunit))
4085 fprintf(outfile, "Last code unit = \'%c\'%s\n", last_cunit, caseless);
4086 else
4087 {
4088 fprintf(outfile, "Last code unit = ");
4089 pchar(last_cunit, FALSE, outfile);
4090 fprintf(outfile, "%s\n", caseless);
4091 }
4092 }
4093
4094 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
4095
4096 if (pat_patctl.jit != 0 && (pat_patctl.control & CTL_JITVERIFY) != 0)
4097 {
4098 if (FLD(compiled_code, executable_jit) != NULL)
4099 fprintf(outfile, "JIT compilation was successful\n");
4100 else
4101 {
4102 #ifdef SUPPORT_JIT
4103 int len;
4104 fprintf(outfile, "JIT compilation was not successful");
4105 if (jitrc != 0)
4106 {
4107 fprintf(outfile, " (");
4108 PCRE2_GET_ERROR_MESSAGE(len, jitrc, pbuffer);
4109 PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, outfile);
4110 fprintf(outfile, ")");
4111 }
4112 fprintf(outfile, "\n");
4113 #else
4114 fprintf(outfile, "JIT support is not available in this version of PCRE2\n");
4115 #endif
4116 }
4117 }
4118 }
4119
4120 if ((pat_patctl.control & CTL_CALLOUT_INFO) != 0)
4121 {
4122 int errorcode;
4123 PCRE2_CALLOUT_ENUMERATE(errorcode, callout_callback, 0);
4124 if (errorcode != 0)
4125 {
4126 int len;
4127 fprintf(outfile, "Callout enumerate failed: error %d: ", errorcode);
4128 if (errorcode < 0)
4129 {
4130 PCRE2_GET_ERROR_MESSAGE(len, errorcode, pbuffer);
4131 PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, outfile);
4132 }
4133 fprintf(outfile, "\n");
4134 return PR_SKIP;
4135 }
4136 }
4137
4138 return PR_OK;
4139 }
4140
4141
4142
4143 /*************************************************
4144 * Handle serialization error *
4145 *************************************************/
4146
4147 /* Print an error message after a serialization failure.
4148
4149 Arguments:
4150 rc the error code
4151 msg an initial message for what failed
4152
4153 Returns: nothing
4154 */
4155
4156 static void
serial_error(int rc,const char * msg)4157 serial_error(int rc, const char *msg)
4158 {
4159 fprintf(outfile, "%s failed: error %d: ", msg, rc);
4160 PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
4161 PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
4162 fprintf(outfile, "\n");
4163 }
4164
4165
4166
4167 /*************************************************
4168 * Open file for save/load commands *
4169 *************************************************/
4170
4171 /* This function decodes the file name and opens the file.
4172
4173 Arguments:
4174 buffptr point after the #command
4175 mode open mode
4176 fptr points to the FILE variable
4177
4178 Returns: PR_OK or PR_ABEND
4179 */
4180
4181 static int
open_file(uint8_t * buffptr,const char * mode,FILE ** fptr)4182 open_file(uint8_t *buffptr, const char *mode, FILE **fptr)
4183 {
4184 char *endf;
4185 char *filename = (char *)buffptr;
4186 while (isspace(*filename)) filename++;
4187 endf = filename + strlen8(filename);
4188 while (endf > filename && isspace(endf[-1])) endf--;
4189
4190 if (endf == filename)
4191 {
4192 fprintf(outfile, "** File name expected after #save\n");
4193 return PR_ABEND;
4194 }
4195
4196 *endf = 0;
4197 *fptr = fopen((const char *)filename, mode);
4198 if (*fptr == NULL)
4199 {
4200 fprintf(outfile, "** Failed to open '%s'\n", filename);
4201 return PR_ABEND;
4202 }
4203
4204 return PR_OK;
4205 }
4206
4207
4208
4209 /*************************************************
4210 * Process command line *
4211 *************************************************/
4212
4213 /* This function is called for lines beginning with # and a character that is
4214 not ! or whitespace, when encountered between tests, which means that there is
4215 no compiled pattern (compiled_code is NULL). The line is in buffer.
4216
4217 Arguments: none
4218
4219 Returns: PR_OK continue processing next line
4220 PR_SKIP skip to a blank line
4221 PR_ABEND abort the pcre2test run
4222 */
4223
4224 static int
process_command(void)4225 process_command(void)
4226 {
4227 FILE *f;
4228 PCRE2_SIZE serial_size;
4229 size_t i;
4230 int rc, cmd, cmdlen;
4231 uint16_t first_listed_newline;
4232 const char *cmdname;
4233 uint8_t *argptr, *serial;
4234
4235 if (restrict_for_perl_test)
4236 {
4237 fprintf(outfile, "** #-commands are not allowed after #perltest\n");
4238 return PR_ABEND;
4239 }
4240
4241 cmd = CMD_UNKNOWN;
4242 cmdlen = 0;
4243
4244 for (i = 0; i < cmdlistcount; i++)
4245 {
4246 cmdname = cmdlist[i].name;
4247 cmdlen = strlen(cmdname);
4248 if (strncmp((char *)(buffer+1), cmdname, cmdlen) == 0 &&
4249 isspace(buffer[cmdlen+1]))
4250 {
4251 cmd = cmdlist[i].value;
4252 break;
4253 }
4254 }
4255
4256 argptr = buffer + cmdlen + 1;
4257
4258 switch(cmd)
4259 {
4260 case CMD_UNKNOWN:
4261 fprintf(outfile, "** Unknown command: %s", buffer);
4262 break;
4263
4264 case CMD_FORBID_UTF:
4265 forbid_utf = PCRE2_NEVER_UTF|PCRE2_NEVER_UCP;
4266 break;
4267
4268 case CMD_PERLTEST:
4269 restrict_for_perl_test = TRUE;
4270 break;
4271
4272 /* Set default pattern modifiers */
4273
4274 case CMD_PATTERN:
4275 (void)decode_modifiers(argptr, CTX_DEFPAT, &def_patctl, NULL);
4276 if (def_patctl.jit == 0 && (def_patctl.control & CTL_JITVERIFY) != 0)
4277 def_patctl.jit = 7;
4278 break;
4279
4280 /* Set default subject modifiers */
4281
4282 case CMD_SUBJECT:
4283 (void)decode_modifiers(argptr, CTX_DEFDAT, NULL, &def_datctl);
4284 break;
4285
4286 /* Check the default newline, and if not one of those listed, set up the
4287 first one to be forced. An empty list unsets. */
4288
4289 case CMD_NEWLINE_DEFAULT:
4290 local_newline_default = 0; /* Unset */
4291 first_listed_newline = 0;
4292 for (;;)
4293 {
4294 while (isspace(*argptr)) argptr++;
4295 if (*argptr == 0) break;
4296 for (i = 1; i < sizeof(newlines)/sizeof(char *); i++)
4297 {
4298 size_t nlen = strlen(newlines[i]);
4299 if (strncmpic(argptr, (const uint8_t *)newlines[i], nlen) == 0 &&
4300 isspace(argptr[nlen]))
4301 {
4302 if (i == NEWLINE_DEFAULT) return PR_OK; /* Default is valid */
4303 if (first_listed_newline == 0) first_listed_newline = i;
4304 }
4305 }
4306 while (*argptr != 0 && !isspace(*argptr)) argptr++;
4307 }
4308 local_newline_default = first_listed_newline;
4309 break;
4310
4311 /* Pop or copy a compiled pattern off the stack. Modifiers that do not affect
4312 the compiled pattern (e.g. to give information) are permitted. The default
4313 pattern modifiers are ignored. */
4314
4315 case CMD_POP:
4316 case CMD_POPCOPY:
4317 if (patstacknext <= 0)
4318 {
4319 fprintf(outfile, "** Can't pop off an empty stack\n");
4320 return PR_SKIP;
4321 }
4322 memset(&pat_patctl, 0, sizeof(patctl)); /* Completely unset */
4323 if (!decode_modifiers(argptr, CTX_POPPAT, &pat_patctl, NULL))
4324 return PR_SKIP;
4325
4326 if (cmd == CMD_POP)
4327 {
4328 SET(compiled_code, patstack[--patstacknext]);
4329 }
4330 else
4331 {
4332 PCRE2_CODE_COPY_FROM_VOID(compiled_code, patstack[patstacknext - 1]);
4333 }
4334
4335 if (pat_patctl.jit != 0)
4336 {
4337 PCRE2_JIT_COMPILE(jitrc, compiled_code, pat_patctl.jit);
4338 }
4339 if ((pat_patctl.control & CTL_MEMORY) != 0) show_memory_info();
4340 if ((pat_patctl.control & CTL_ANYINFO) != 0)
4341 {
4342 rc = show_pattern_info();
4343 if (rc != PR_OK) return rc;
4344 }
4345 break;
4346
4347 /* Save the stack of compiled patterns to a file, then empty the stack. */
4348
4349 case CMD_SAVE:
4350 if (patstacknext <= 0)
4351 {
4352 fprintf(outfile, "** No stacked patterns to save\n");
4353 return PR_OK;
4354 }
4355
4356 rc = open_file(argptr+1, BINARY_OUTPUT_MODE, &f);
4357 if (rc != PR_OK) return rc;
4358
4359 PCRE2_SERIALIZE_ENCODE(rc, patstack, patstacknext, &serial, &serial_size,
4360 general_context);
4361 if (rc < 0)
4362 {
4363 serial_error(rc, "Serialization");
4364 break;
4365 }
4366
4367 /* Write the length at the start of the file to make it straightforward to
4368 get the right memory when re-loading. This saves having to read the file size
4369 in different operating systems. To allow for different endianness (even
4370 though reloading with the opposite endianness does not work), write the
4371 length byte-by-byte. */
4372
4373 for (i = 0; i < 4; i++) fputc((serial_size >> (i*8)) & 255, f);
4374 if (fwrite(serial, 1, serial_size, f) != serial_size)
4375 {
4376 fprintf(outfile, "** Wrong return from fwrite()\n");
4377 return PR_ABEND;
4378 }
4379
4380 fclose(f);
4381 PCRE2_SERIALIZE_FREE(serial);
4382 while(patstacknext > 0)
4383 {
4384 SET(compiled_code, patstack[--patstacknext]);
4385 SUB1(pcre2_code_free, compiled_code);
4386 }
4387 SET(compiled_code, NULL);
4388 break;
4389
4390 /* Load a set of compiled patterns from a file onto the stack */
4391
4392 case CMD_LOAD:
4393 rc = open_file(argptr+1, BINARY_INPUT_MODE, &f);
4394 if (rc != PR_OK) return rc;
4395
4396 serial_size = 0;
4397 for (i = 0; i < 4; i++) serial_size |= fgetc(f) << (i*8);
4398
4399 serial = malloc(serial_size);
4400 if (serial == NULL)
4401 {
4402 fprintf(outfile, "** Failed to get memory (size %lu) for #load\n",
4403 (unsigned long int)serial_size);
4404 return PR_ABEND;
4405 }
4406
4407 if (fread(serial, 1, serial_size, f) != serial_size)
4408 {
4409 fprintf(outfile, "** Wrong return from fread()\n");
4410 return PR_ABEND;
4411 }
4412 fclose(f);
4413
4414 PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(rc, serial);
4415 if (rc < 0) serial_error(rc, "Get number of codes"); else
4416 {
4417 if (rc + patstacknext > PATSTACKSIZE)
4418 {
4419 fprintf(outfile, "** Not enough space on pattern stack for %d pattern%s\n",
4420 rc, (rc == 1)? "" : "s");
4421 rc = PATSTACKSIZE - patstacknext;
4422 fprintf(outfile, "** Decoding %d pattern%s\n", rc,
4423 (rc == 1)? "" : "s");
4424 }
4425 PCRE2_SERIALIZE_DECODE(rc, patstack + patstacknext, rc, serial,
4426 general_context);
4427 if (rc < 0) serial_error(rc, "Deserialization");
4428 else patstacknext += rc;
4429 }
4430
4431 free(serial);
4432 break;
4433 }
4434
4435 return PR_OK;
4436 }
4437
4438
4439
4440 /*************************************************
4441 * Process pattern line *
4442 *************************************************/
4443
4444 /* This function is called when the input buffer contains the start of a
4445 pattern. The first character is known to be a valid delimiter. The pattern is
4446 read, modifiers are interpreted, and a suitable local context is set up for
4447 this test. The pattern is then compiled.
4448
4449 Arguments: none
4450
4451 Returns: PR_OK continue processing next line
4452 PR_SKIP skip to a blank line
4453 PR_ABEND abort the pcre2test run
4454 */
4455
4456 static int
process_pattern(void)4457 process_pattern(void)
4458 {
4459 BOOL utf;
4460 uint32_t k;
4461 uint8_t *p = buffer;
4462 const uint8_t *use_tables;
4463 unsigned int delimiter = *p++;
4464 int errorcode;
4465 void *use_pat_context;
4466 PCRE2_SIZE patlen;
4467 PCRE2_SIZE erroroffset;
4468
4469 /* Initialize the context and pattern/data controls for this test from the
4470 defaults. */
4471
4472 PATCTXCPY(pat_context, default_pat_context);
4473 memcpy(&pat_patctl, &def_patctl, sizeof(patctl));
4474
4475 /* Find the end of the pattern, reading more lines if necessary. */
4476
4477 for(;;)
4478 {
4479 while (*p != 0)
4480 {
4481 if (*p == '\\' && p[1] != 0) p++;
4482 else if (*p == delimiter) break;
4483 p++;
4484 }
4485 if (*p != 0) break;
4486 if ((p = extend_inputline(infile, p, " > ")) == NULL)
4487 {
4488 fprintf(outfile, "** Unexpected EOF\n");
4489 return PR_ABEND;
4490 }
4491 if (!INTERACTIVE(infile)) fprintf(outfile, "%s", (char *)p);
4492 }
4493
4494 /* If the first character after the delimiter is backslash, make the pattern
4495 end with backslash. This is purely to provide a way of testing for the error
4496 message when a pattern ends with backslash. */
4497
4498 if (p[1] == '\\') *p++ = '\\';
4499
4500 /* Terminate the pattern at the delimiter, and compute the length. */
4501
4502 *p++ = 0;
4503 patlen = p - buffer - 2;
4504
4505 /* Look for modifiers and options after the final delimiter. */
4506
4507 if (!decode_modifiers(p, CTX_PAT, &pat_patctl, NULL)) return PR_SKIP;
4508 utf = (pat_patctl.options & PCRE2_UTF) != 0;
4509
4510 /* Check for mutually exclusive modifiers. At present, these are all in the
4511 first control word. */
4512
4513 for (k = 0; k < sizeof(exclusive_pat_controls)/sizeof(uint32_t); k++)
4514 {
4515 uint32_t c = pat_patctl.control & exclusive_pat_controls[k];
4516 if (c != 0 && c != (c & (~c+1)))
4517 {
4518 show_controls(c, 0, "** Not allowed together:");
4519 fprintf(outfile, "\n");
4520 return PR_SKIP;
4521 }
4522 }
4523
4524 /* Assume full JIT compile for jitverify and/or jitfast if nothing else was
4525 specified. */
4526
4527 if (pat_patctl.jit == 0 &&
4528 (pat_patctl.control & (CTL_JITVERIFY|CTL_JITFAST)) != 0)
4529 pat_patctl.jit = 7;
4530
4531 /* Now copy the pattern to pbuffer8 for use in 8-bit testing and for reflecting
4532 in callouts. Convert from hex if requested (literal strings in quotes may be
4533 present within the hexadecimal pairs). The result must necessarily be fewer
4534 characters so will always fit in pbuffer8. */
4535
4536 if ((pat_patctl.control & CTL_HEXPAT) != 0)
4537 {
4538 uint8_t *pp, *pt;
4539 uint32_t c, d;
4540
4541 pt = pbuffer8;
4542 for (pp = buffer + 1; *pp != 0; pp++)
4543 {
4544 if (isspace(*pp)) continue;
4545 c = *pp++;
4546
4547 /* Handle a literal substring */
4548
4549 if (c == '\'' || c == '"')
4550 {
4551 for (;; pp++)
4552 {
4553 d = *pp;
4554 if (d == 0)
4555 {
4556 fprintf(outfile, "** Missing closing quote in hex pattern\n");
4557 return PR_SKIP;
4558 }
4559 if (d == c) break;
4560 *pt++ = d;
4561 }
4562 }
4563
4564 /* Expect a hex pair */
4565
4566 else
4567 {
4568 if (!isxdigit(c))
4569 {
4570 fprintf(outfile, "** Unexpected non-hex-digit '%c' in hex pattern: "
4571 "quote missing?\n", c);
4572 return PR_SKIP;
4573 }
4574 if (*pp == 0)
4575 {
4576 fprintf(outfile, "** Odd number of digits in hex pattern\n");
4577 return PR_SKIP;
4578 }
4579 d = *pp;
4580 if (!isxdigit(d))
4581 {
4582 fprintf(outfile, "** Unexpected non-hex-digit '%c' in hex pattern: "
4583 "quote missing?\n", d);
4584 return PR_SKIP;
4585 }
4586 c = toupper(c);
4587 d = toupper(d);
4588 *pt++ = ((isdigit(c)? (c - '0') : (c - 'A' + 10)) << 4) +
4589 (isdigit(d)? (d - '0') : (d - 'A' + 10));
4590 }
4591 }
4592 *pt = 0;
4593 patlen = pt - pbuffer8;
4594 }
4595
4596 /* If not a hex string, process for repetition expansion if requested. */
4597
4598 else if ((pat_patctl.control & CTL_EXPAND) != 0)
4599 {
4600 uint8_t *pp, *pt;
4601
4602 pt = pbuffer8;
4603 for (pp = buffer + 1; *pp != 0; pp++)
4604 {
4605 uint8_t *pc = pp;
4606 uint32_t count = 1;
4607 size_t length = 1;
4608
4609 /* Check for replication syntax; if not found, the defaults just set will
4610 prevail and one character will be copied. */
4611
4612 if (pp[0] == '\\' && pp[1] == '[')
4613 {
4614 uint8_t *pe;
4615 for (pe = pp + 2; *pe != 0; pe++)
4616 {
4617 if (pe[0] == ']' && pe[1] == '{')
4618 {
4619 uint32_t clen = pe - pc - 2;
4620 uint32_t i = 0;
4621 unsigned long uli;
4622 char *endptr;
4623
4624 pe += 2;
4625 uli = strtoul((const char *)pe, &endptr, 10);
4626 if (U32OVERFLOW(uli))
4627 {
4628 fprintf(outfile, "** Pattern repeat count too large\n");
4629 return PR_SKIP;
4630 }
4631
4632 i = (uint32_t)uli;
4633 pe = (uint8_t *)endptr;
4634 if (*pe == '}')
4635 {
4636 if (i == 0)
4637 {
4638 fprintf(outfile, "** Zero repeat not allowed\n");
4639 return PR_SKIP;
4640 }
4641 pc += 2;
4642 count = i;
4643 length = clen;
4644 pp = pe;
4645 break;
4646 }
4647 }
4648 }
4649 }
4650
4651 /* Add to output. If the buffer is too small expand it. The function for
4652 expanding buffers always keeps buffer and pbuffer8 in step as far as their
4653 size goes. */
4654
4655 while (pt + count * length > pbuffer8 + pbuffer8_size)
4656 {
4657 size_t pc_offset = pc - buffer;
4658 size_t pp_offset = pp - buffer;
4659 size_t pt_offset = pt - pbuffer8;
4660 expand_input_buffers();
4661 pc = buffer + pc_offset;
4662 pp = buffer + pp_offset;
4663 pt = pbuffer8 + pt_offset;
4664 }
4665
4666 for (; count > 0; count--)
4667 {
4668 memcpy(pt, pc, length);
4669 pt += length;
4670 }
4671 }
4672
4673 *pt = 0;
4674 patlen = pt - pbuffer8;
4675
4676 if ((pat_patctl.control & CTL_INFO) != 0)
4677 fprintf(outfile, "Expanded: %s\n", pbuffer8);
4678 }
4679
4680 /* Neither hex nor expanded, just copy the input verbatim. */
4681
4682 else
4683 {
4684 strncpy((char *)pbuffer8, (char *)(buffer+1), patlen + 1);
4685 }
4686
4687 /* Sort out character tables */
4688
4689 if (pat_patctl.locale[0] != 0)
4690 {
4691 if (pat_patctl.tables_id != 0)
4692 {
4693 fprintf(outfile, "** 'Locale' and 'tables' must not both be set\n");
4694 return PR_SKIP;
4695 }
4696 if (setlocale(LC_CTYPE, (const char *)pat_patctl.locale) == NULL)
4697 {
4698 fprintf(outfile, "** Failed to set locale '%s'\n", pat_patctl.locale);
4699 return PR_SKIP;
4700 }
4701 if (strcmp((const char *)pat_patctl.locale, (const char *)locale_name) != 0)
4702 {
4703 strcpy((char *)locale_name, (char *)pat_patctl.locale);
4704 if (locale_tables != NULL) free((void *)locale_tables);
4705 PCRE2_MAKETABLES(locale_tables);
4706 }
4707 use_tables = locale_tables;
4708 }
4709
4710 else switch (pat_patctl.tables_id)
4711 {
4712 case 0: use_tables = NULL; break;
4713 case 1: use_tables = tables1; break;
4714 case 2: use_tables = tables2; break;
4715 default:
4716 fprintf(outfile, "** 'Tables' must specify 0, 1, or 2.\n");
4717 return PR_SKIP;
4718 }
4719
4720 PCRE2_SET_CHARACTER_TABLES(pat_context, use_tables);
4721
4722 /* Set up for the stackguard test. */
4723
4724 if (pat_patctl.stackguard_test != 0)
4725 {
4726 PCRE2_SET_COMPILE_RECURSION_GUARD(pat_context, stack_guard, NULL);
4727 }
4728
4729 /* Handle compiling via the POSIX interface, which doesn't support the
4730 timing, showing, or debugging options, nor the ability to pass over
4731 local character tables. Neither does it have 16-bit or 32-bit support. */
4732
4733 if ((pat_patctl.control & CTL_POSIX) != 0)
4734 {
4735 #ifdef SUPPORT_PCRE2_8
4736 int rc;
4737 int cflags = 0;
4738 const char *msg = "** Ignored with POSIX interface:";
4739 #endif
4740
4741 if (test_mode != 8)
4742 {
4743 fprintf(outfile, "** The POSIX interface is available only in 8-bit mode\n");
4744 return PR_SKIP;
4745 }
4746
4747 #ifdef SUPPORT_PCRE2_8
4748 /* Check for features that the POSIX interface does not support. */
4749
4750 if (pat_patctl.locale[0] != 0) prmsg(&msg, "locale");
4751 if (pat_patctl.replacement[0] != 0) prmsg(&msg, "replace");
4752 if (pat_patctl.tables_id != 0) prmsg(&msg, "tables");
4753 if (pat_patctl.stackguard_test != 0) prmsg(&msg, "stackguard");
4754 if (timeit > 0) prmsg(&msg, "timing");
4755 if (pat_patctl.jit != 0) prmsg(&msg, "JIT");
4756
4757 if ((pat_patctl.options & ~POSIX_SUPPORTED_COMPILE_OPTIONS) != 0)
4758 {
4759 show_compile_options(
4760 pat_patctl.options & ~POSIX_SUPPORTED_COMPILE_OPTIONS, msg, "");
4761 msg = "";
4762 }
4763 if ((pat_patctl.control & ~POSIX_SUPPORTED_COMPILE_CONTROLS) != 0 ||
4764 (pat_patctl.control2 & ~POSIX_SUPPORTED_COMPILE_CONTROLS2) != 0)
4765 {
4766 show_controls(pat_patctl.control & ~POSIX_SUPPORTED_COMPILE_CONTROLS,
4767 pat_patctl.control2 & ~POSIX_SUPPORTED_COMPILE_CONTROLS2, msg);
4768 msg = "";
4769 }
4770
4771 if (local_newline_default != 0) prmsg(&msg, "#newline_default");
4772
4773 if (msg[0] == 0) fprintf(outfile, "\n");
4774
4775 /* Translate PCRE2 options to POSIX options and then compile. */
4776
4777 if (utf) cflags |= REG_UTF;
4778 if ((pat_patctl.control & CTL_POSIX_NOSUB) != 0) cflags |= REG_NOSUB;
4779 if ((pat_patctl.options & PCRE2_UCP) != 0) cflags |= REG_UCP;
4780 if ((pat_patctl.options & PCRE2_CASELESS) != 0) cflags |= REG_ICASE;
4781 if ((pat_patctl.options & PCRE2_MULTILINE) != 0) cflags |= REG_NEWLINE;
4782 if ((pat_patctl.options & PCRE2_DOTALL) != 0) cflags |= REG_DOTALL;
4783 if ((pat_patctl.options & PCRE2_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
4784
4785 rc = regcomp(&preg, (char *)pbuffer8, cflags);
4786
4787 /* Compiling failed */
4788
4789 if (rc != 0)
4790 {
4791 size_t bsize, usize;
4792 int psize;
4793
4794 preg.re_pcre2_code = NULL; /* In case something was left in there */
4795 preg.re_match_data = NULL;
4796
4797 bsize = (pat_patctl.regerror_buffsize != 0)?
4798 pat_patctl.regerror_buffsize : pbuffer8_size;
4799 if (bsize + 8 < pbuffer8_size)
4800 memcpy(pbuffer8 + bsize, "DEADBEEF", 8);
4801 usize = regerror(rc, &preg, (char *)pbuffer8, bsize);
4802
4803 /* Inside regerror(), snprintf() is used. If the buffer is too small, some
4804 versions of snprintf() put a zero byte at the end, but others do not.
4805 Therefore, we print a maximum of one less than the size of the buffer. */
4806
4807 psize = (int)bsize - 1;
4808 fprintf(outfile, "Failed: POSIX code %d: %.*s\n", rc, psize, pbuffer8);
4809 if (usize > bsize)
4810 {
4811 fprintf(outfile, "** regerror() message truncated\n");
4812 if (memcmp(pbuffer8 + bsize, "DEADBEEF", 8) != 0)
4813 fprintf(outfile, "** regerror() buffer overflow\n");
4814 }
4815 return PR_SKIP;
4816 }
4817
4818 /* Compiling succeeded. Check that the values in the preg block are sensible.
4819 It can happen that pcre2test is accidentally linked with a different POSIX
4820 library which succeeds, but of course puts different things into preg. In
4821 this situation, calling regfree() may cause a segfault (or invalid free() in
4822 valgrind), so ensure that preg.re_pcre2_code is NULL, which suppresses the
4823 calling of regfree() on exit. */
4824
4825 if (preg.re_pcre2_code == NULL ||
4826 ((pcre2_real_code_8 *)preg.re_pcre2_code)->magic_number != MAGIC_NUMBER ||
4827 ((pcre2_real_code_8 *)preg.re_pcre2_code)->top_bracket != preg.re_nsub ||
4828 preg.re_match_data == NULL ||
4829 preg.re_cflags != cflags)
4830 {
4831 fprintf(outfile,
4832 "** The regcomp() function returned zero (success), but the values set\n"
4833 "** in the preg block are not valid for PCRE2. Check that pcre2test is\n"
4834 "** linked with PCRE2's pcre2posix module (-lpcre2-posix) and not with\n"
4835 "** some other POSIX regex library.\n**\n");
4836 preg.re_pcre2_code = NULL;
4837 return PR_ABEND;
4838 }
4839
4840 return PR_OK;
4841 #endif /* SUPPORT_PCRE2_8 */
4842 }
4843
4844 /* Handle compiling via the native interface. Controls that act later are
4845 ignored with "push". Replacements are locked out. */
4846
4847 if ((pat_patctl.control & (CTL_PUSH|CTL_PUSHCOPY)) != 0)
4848 {
4849 if (pat_patctl.replacement[0] != 0)
4850 {
4851 fprintf(outfile, "** Replacement text is not supported with 'push'.\n");
4852 return PR_OK;
4853 }
4854 if ((pat_patctl.control & ~PUSH_SUPPORTED_COMPILE_CONTROLS) != 0 ||
4855 (pat_patctl.control2 & ~PUSH_SUPPORTED_COMPILE_CONTROLS2) != 0)
4856 {
4857 show_controls(pat_patctl.control & ~PUSH_SUPPORTED_COMPILE_CONTROLS,
4858 pat_patctl.control2 & ~PUSH_SUPPORTED_COMPILE_CONTROLS2,
4859 "** Ignored when compiled pattern is stacked with 'push':");
4860 fprintf(outfile, "\n");
4861 }
4862 if ((pat_patctl.control & PUSH_COMPILE_ONLY_CONTROLS) != 0 ||
4863 (pat_patctl.control2 & PUSH_COMPILE_ONLY_CONTROLS2) != 0)
4864 {
4865 show_controls(pat_patctl.control & PUSH_COMPILE_ONLY_CONTROLS,
4866 pat_patctl.control2 & PUSH_COMPILE_ONLY_CONTROLS2,
4867 "** Applies only to compile when pattern is stacked with 'push':");
4868 fprintf(outfile, "\n");
4869 }
4870 }
4871
4872 /* Convert the input in non-8-bit modes. */
4873
4874 errorcode = 0;
4875
4876 #ifdef SUPPORT_PCRE2_16
4877 if (test_mode == PCRE16_MODE) errorcode = to16(pbuffer8, utf, &patlen);
4878 #endif
4879
4880 #ifdef SUPPORT_PCRE2_32
4881 if (test_mode == PCRE32_MODE) errorcode = to32(pbuffer8, utf, &patlen);
4882 #endif
4883
4884 switch(errorcode)
4885 {
4886 case -1:
4887 fprintf(outfile, "** Failed: invalid UTF-8 string cannot be "
4888 "converted to %d-bit string\n", (test_mode == PCRE16_MODE)? 16:32);
4889 return PR_SKIP;
4890
4891 case -2:
4892 fprintf(outfile, "** Failed: character value greater than 0x10ffff "
4893 "cannot be converted to UTF\n");
4894 return PR_SKIP;
4895
4896 case -3:
4897 fprintf(outfile, "** Failed: character value greater than 0xffff "
4898 "cannot be converted to 16-bit in non-UTF mode\n");
4899 return PR_SKIP;
4900
4901 default:
4902 break;
4903 }
4904
4905 /* The pattern is now in pbuffer[8|16|32], with the length in patlen. By
4906 default, however, we pass a zero-terminated pattern. The length is passed only
4907 if we had a hex pattern. */
4908
4909 if ((pat_patctl.control & CTL_HEXPAT) == 0) patlen = PCRE2_ZERO_TERMINATED;
4910
4911 /* If #newline_default has been used and the library was not compiled with an
4912 appropriate default newline setting, local_newline_default will be non-zero. We
4913 use this if there is no explicit newline modifier. */
4914
4915 if ((pat_patctl.control & CTL_NL_SET) == 0 && local_newline_default != 0)
4916 {
4917 SETFLD(pat_context, newline_convention, local_newline_default);
4918 }
4919
4920 /* The nullcontext modifier is used to test calling pcre2_compile() with a NULL
4921 context. */
4922
4923 use_pat_context = ((pat_patctl.control & CTL_NULLCONTEXT) != 0)?
4924 NULL : PTR(pat_context);
4925
4926 /* Compile many times when timing. */
4927
4928 if (timeit > 0)
4929 {
4930 register int i;
4931 clock_t time_taken = 0;
4932 for (i = 0; i < timeit; i++)
4933 {
4934 clock_t start_time = clock();
4935 PCRE2_COMPILE(compiled_code, pbuffer, patlen,
4936 pat_patctl.options|forbid_utf, &errorcode, &erroroffset, use_pat_context);
4937 time_taken += clock() - start_time;
4938 if (TEST(compiled_code, !=, NULL))
4939 { SUB1(pcre2_code_free, compiled_code); }
4940 }
4941 total_compile_time += time_taken;
4942 fprintf(outfile, "Compile time %.4f milliseconds\n",
4943 (((double)time_taken * 1000.0) / (double)timeit) /
4944 (double)CLOCKS_PER_SEC);
4945 }
4946
4947 /* A final compile that is used "for real". */
4948
4949 PCRE2_COMPILE(compiled_code, pbuffer, patlen, pat_patctl.options|forbid_utf,
4950 &errorcode, &erroroffset, use_pat_context);
4951
4952 /* Compilation failed; go back for another re, skipping to blank line
4953 if non-interactive. */
4954
4955 if (TEST(compiled_code, ==, NULL))
4956 {
4957 int len;
4958 fprintf(outfile, "Failed: error %d at offset %d: ", errorcode,
4959 (int)erroroffset);
4960 PCRE2_GET_ERROR_MESSAGE(len, errorcode, pbuffer);
4961 PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, outfile);
4962 fprintf(outfile, "\n");
4963 return PR_SKIP;
4964 }
4965
4966 /* If forbid_utf is non-zero, we are running a non-UTF test. UTF and UCP are
4967 locked out at compile time, but we must also check for occurrences of \P, \p,
4968 and \X, which are only supported when Unicode is supported. */
4969
4970 if (forbid_utf != 0)
4971 {
4972 if ((FLD(compiled_code, flags) & PCRE2_HASBKPORX) != 0)
4973 {
4974 fprintf(outfile, "** \\P, \\p, and \\X are not allowed after the "
4975 "#forbid_utf command\n");
4976 return PR_SKIP;
4977 }
4978 }
4979
4980 /* Remember the maximum lookbehind, for partial matching. */
4981
4982 if (pattern_info(PCRE2_INFO_MAXLOOKBEHIND, &maxlookbehind, FALSE) != 0)
4983 return PR_ABEND;
4984
4985 /* Call the JIT compiler if requested. When timing, we must free and recompile
4986 the pattern each time because that is the only way to free the JIT compiled
4987 code. We know that compilation will always succeed. */
4988
4989 if (pat_patctl.jit != 0)
4990 {
4991 if (timeit > 0)
4992 {
4993 register int i;
4994 clock_t time_taken = 0;
4995 for (i = 0; i < timeit; i++)
4996 {
4997 clock_t start_time;
4998 SUB1(pcre2_code_free, compiled_code);
4999 PCRE2_COMPILE(compiled_code, pbuffer, patlen,
5000 pat_patctl.options|forbid_utf, &errorcode, &erroroffset,
5001 use_pat_context);
5002 start_time = clock();
5003 PCRE2_JIT_COMPILE(jitrc,compiled_code, pat_patctl.jit);
5004 time_taken += clock() - start_time;
5005 }
5006 total_jit_compile_time += time_taken;
5007 fprintf(outfile, "JIT compile %.4f milliseconds\n",
5008 (((double)time_taken * 1000.0) / (double)timeit) /
5009 (double)CLOCKS_PER_SEC);
5010 }
5011 else
5012 {
5013 PCRE2_JIT_COMPILE(jitrc, compiled_code, pat_patctl.jit);
5014 }
5015 }
5016
5017 /* If an explicit newline modifier was given, set the information flag in the
5018 pattern so that it is preserved over push/pop. */
5019
5020 if ((pat_patctl.control & CTL_NL_SET) != 0)
5021 {
5022 SETFLD(compiled_code, flags, FLD(compiled_code, flags) | PCRE2_NL_SET);
5023 }
5024
5025 /* Output code size and other information if requested. */
5026
5027 if ((pat_patctl.control & CTL_MEMORY) != 0) show_memory_info();
5028 if ((pat_patctl.control & CTL_ANYINFO) != 0)
5029 {
5030 int rc = show_pattern_info();
5031 if (rc != PR_OK) return rc;
5032 }
5033
5034 /* The "push" control requests that the compiled pattern be remembered on a
5035 stack. This is mainly for testing the serialization functionality. */
5036
5037 if ((pat_patctl.control & CTL_PUSH) != 0)
5038 {
5039 if (patstacknext >= PATSTACKSIZE)
5040 {
5041 fprintf(outfile, "** Too many pushed patterns (max %d)\n", PATSTACKSIZE);
5042 return PR_ABEND;
5043 }
5044 patstack[patstacknext++] = PTR(compiled_code);
5045 SET(compiled_code, NULL);
5046 }
5047
5048 /* The "pushcopy" control is similar, but pushes a copy of the pattern. This
5049 tests the pcre2_code_copy() function. */
5050
5051 if ((pat_patctl.control & CTL_PUSHCOPY) != 0)
5052 {
5053 if (patstacknext >= PATSTACKSIZE)
5054 {
5055 fprintf(outfile, "** Too many pushed patterns (max %d)\n", PATSTACKSIZE);
5056 return PR_ABEND;
5057 }
5058 PCRE2_CODE_COPY_TO_VOID(patstack[patstacknext++], compiled_code);
5059 }
5060
5061 return PR_OK;
5062 }
5063
5064
5065
5066 /*************************************************
5067 * Check match or recursion limit *
5068 *************************************************/
5069
5070 static int
check_match_limit(uint8_t * pp,size_t ulen,int errnumber,const char * msg)5071 check_match_limit(uint8_t *pp, size_t ulen, int errnumber, const char *msg)
5072 {
5073 int capcount;
5074 uint32_t min = 0;
5075 uint32_t mid = 64;
5076 uint32_t max = UINT32_MAX;
5077
5078 PCRE2_SET_MATCH_LIMIT(dat_context, max);
5079 PCRE2_SET_RECURSION_LIMIT(dat_context, max);
5080
5081 for (;;)
5082 {
5083 if (errnumber == PCRE2_ERROR_MATCHLIMIT)
5084 {
5085 PCRE2_SET_MATCH_LIMIT(dat_context, mid);
5086 }
5087 else
5088 {
5089 PCRE2_SET_RECURSION_LIMIT(dat_context, mid);
5090 }
5091
5092 if ((pat_patctl.control & CTL_JITFAST) != 0)
5093 PCRE2_JIT_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
5094 dat_datctl.options, match_data, PTR(dat_context));
5095 else
5096 PCRE2_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
5097 dat_datctl.options, match_data, PTR(dat_context));
5098
5099 if (capcount == errnumber)
5100 {
5101 min = mid;
5102 mid = (mid == max - 1)? max : (max != UINT32_MAX)? (min + max)/2 : mid*2;
5103 }
5104 else if (capcount >= 0 ||
5105 capcount == PCRE2_ERROR_NOMATCH ||
5106 capcount == PCRE2_ERROR_PARTIAL)
5107 {
5108 if (mid == min + 1)
5109 {
5110 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
5111 break;
5112 }
5113 max = mid;
5114 mid = (min + mid)/2;
5115 }
5116 else break; /* Some other error */
5117 }
5118
5119 return capcount;
5120 }
5121
5122
5123
5124 /*************************************************
5125 * Callout function *
5126 *************************************************/
5127
5128 /* Called from a PCRE2 library as a result of the (?C) item. We print out where
5129 we are in the match. Yield zero unless more callouts than the fail count, or
5130 the callout data is not zero. The only differences in the callout block for
5131 different code unit widths are that the pointers to the subject, the most
5132 recent MARK, and a callout argument string point to strings of the appropriate
5133 width. Casts can be used to deal with this.
5134
5135 Argument: a pointer to a callout block
5136 Return:
5137 */
5138
5139 static int
callout_function(pcre2_callout_block_8 * cb,void * callout_data_ptr)5140 callout_function(pcre2_callout_block_8 *cb, void *callout_data_ptr)
5141 {
5142 uint32_t i, pre_start, post_start, subject_length;
5143 PCRE2_SIZE current_position;
5144 BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
5145 BOOL callout_capture = (dat_datctl.control & CTL_CALLOUT_CAPTURE) != 0;
5146
5147 /* This FILE is used for echoing the subject. This is done only once in simple
5148 cases. */
5149
5150 FILE *f = (first_callout || callout_capture || cb->callout_string != NULL)?
5151 outfile : NULL;
5152
5153 /* For a callout with a string argument, show the string first because there
5154 isn't a tidy way to fit it in the rest of the data. */
5155
5156 if (cb->callout_string != NULL)
5157 {
5158 uint32_t delimiter = CODE_UNIT(cb->callout_string, -1);
5159 fprintf(outfile, "Callout (%lu): %c",
5160 (unsigned long int)cb->callout_string_offset, delimiter);
5161 PCHARSV(cb->callout_string, 0,
5162 cb->callout_string_length, utf, outfile);
5163 for (i = 0; callout_start_delims[i] != 0; i++)
5164 if (delimiter == callout_start_delims[i])
5165 {
5166 delimiter = callout_end_delims[i];
5167 break;
5168 }
5169 fprintf(outfile, "%c", delimiter);
5170 if (!callout_capture) fprintf(outfile, "\n");
5171 }
5172
5173 /* Show captured strings if required */
5174
5175 if (callout_capture)
5176 {
5177 if (cb->callout_string == NULL)
5178 fprintf(outfile, "Callout %d:", cb->callout_number);
5179 fprintf(outfile, " last capture = %d\n", cb->capture_last);
5180 for (i = 0; i < cb->capture_top * 2; i += 2)
5181 {
5182 fprintf(outfile, "%2d: ", i/2);
5183 if (cb->offset_vector[i] == PCRE2_UNSET)
5184 fprintf(outfile, "<unset>");
5185 else
5186 {
5187 PCHARSV(cb->subject, cb->offset_vector[i],
5188 cb->offset_vector[i+1] - cb->offset_vector[i], utf, f);
5189 }
5190 fprintf(outfile, "\n");
5191 }
5192 }
5193
5194 /* Re-print the subject in canonical form (with escapes for non-printing
5195 characters), the first time, or if giving full details. On subsequent calls in
5196 the same match, we use PCHARS() just to find the printed lengths of the
5197 substrings. */
5198
5199 if (f != NULL) fprintf(f, "--->");
5200
5201 /* The subject before the match start. */
5202
5203 PCHARS(pre_start, cb->subject, 0, cb->start_match, utf, f);
5204
5205 /* If a lookbehind is involved, the current position may be earlier than the
5206 match start. If so, use the match start instead. */
5207
5208 current_position = (cb->current_position >= cb->start_match)?
5209 cb->current_position : cb->start_match;
5210
5211 /* The subject between the match start and the current position. */
5212
5213 PCHARS(post_start, cb->subject, cb->start_match,
5214 current_position - cb->start_match, utf, f);
5215
5216 /* Print from the current position to the end. */
5217
5218 PCHARSV(cb->subject, current_position, cb->subject_length - current_position,
5219 utf, f);
5220
5221 /* Calculate the total subject printed length (no print). */
5222
5223 PCHARS(subject_length, cb->subject, 0, cb->subject_length, utf, NULL);
5224
5225 if (f != NULL) fprintf(f, "\n");
5226
5227 /* For automatic callouts, show the pattern offset. Otherwise, for a numerical
5228 callout whose number has not already been shown with captured strings, show the
5229 number here. A callout with a string argument has been displayed above. */
5230
5231 if (cb->callout_number == 255)
5232 {
5233 fprintf(outfile, "%+3d ", (int)cb->pattern_position);
5234 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
5235 }
5236 else
5237 {
5238 if (callout_capture || cb->callout_string != NULL) fprintf(outfile, " ");
5239 else fprintf(outfile, "%3d ", cb->callout_number);
5240 }
5241
5242 /* Now show position indicators */
5243
5244 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
5245 fprintf(outfile, "^");
5246
5247 if (post_start > 0)
5248 {
5249 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
5250 fprintf(outfile, "^");
5251 }
5252
5253 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
5254 fprintf(outfile, " ");
5255
5256 fprintf(outfile, "%.*s",
5257 (int)((cb->next_item_length == 0)? 1 : cb->next_item_length),
5258 pbuffer8 + cb->pattern_position);
5259
5260 fprintf(outfile, "\n");
5261 first_callout = FALSE;
5262
5263 if (cb->mark != last_callout_mark)
5264 {
5265 if (cb->mark == NULL)
5266 fprintf(outfile, "Latest Mark: <unset>\n");
5267 else
5268 {
5269 fprintf(outfile, "Latest Mark: ");
5270 PCHARSV(cb->mark, 0, -1, utf, outfile);
5271 putc('\n', outfile);
5272 }
5273 last_callout_mark = cb->mark;
5274 }
5275
5276 if (callout_data_ptr != NULL)
5277 {
5278 int callout_data = *((int32_t *)callout_data_ptr);
5279 if (callout_data != 0)
5280 {
5281 fprintf(outfile, "Callout data = %d\n", callout_data);
5282 return callout_data;
5283 }
5284 }
5285
5286 return (cb->callout_number != dat_datctl.cfail[0])? 0 :
5287 (++callout_count >= dat_datctl.cfail[1])? 1 : 0;
5288 }
5289
5290
5291
5292 /*************************************************
5293 * Handle *MARK and copy/get tests *
5294 *************************************************/
5295
5296 /* This function is called after complete and partial matches. It runs the
5297 tests for substring extraction.
5298
5299 Arguments:
5300 utf TRUE for utf
5301 capcount return from pcre2_match()
5302
5303 Returns: nothing
5304 */
5305
5306 static void
copy_and_get(BOOL utf,int capcount)5307 copy_and_get(BOOL utf, int capcount)
5308 {
5309 int i;
5310 uint8_t *nptr;
5311
5312 /* Test copy strings by number */
5313
5314 for (i = 0; i < MAXCPYGET && dat_datctl.copy_numbers[i] >= 0; i++)
5315 {
5316 int rc;
5317 PCRE2_SIZE length, length2;
5318 uint32_t copybuffer[256];
5319 uint32_t n = (uint32_t)(dat_datctl.copy_numbers[i]);
5320 length = sizeof(copybuffer)/code_unit_size;
5321 PCRE2_SUBSTRING_COPY_BYNUMBER(rc, match_data, n, copybuffer, &length);
5322 if (rc < 0)
5323 {
5324 fprintf(outfile, "Copy substring %d failed (%d): ", n, rc);
5325 PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
5326 PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
5327 fprintf(outfile, "\n");
5328 }
5329 else
5330 {
5331 PCRE2_SUBSTRING_LENGTH_BYNUMBER(rc, match_data, n, &length2);
5332 if (rc < 0)
5333 {
5334 fprintf(outfile, "Get substring %d length failed (%d): ", n, rc);
5335 PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
5336 PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
5337 fprintf(outfile, "\n");
5338 }
5339 else if (length2 != length)
5340 {
5341 fprintf(outfile, "Mismatched substring lengths: %lu %lu\n",
5342 (unsigned long int)length, (unsigned long int)length2);
5343 }
5344 fprintf(outfile, "%2dC ", n);
5345 PCHARSV(copybuffer, 0, length, utf, outfile);
5346 fprintf(outfile, " (%lu)\n", (unsigned long)length);
5347 }
5348 }
5349
5350 /* Test copy strings by name */
5351
5352 nptr = dat_datctl.copy_names;
5353 for (;;)
5354 {
5355 int rc;
5356 int groupnumber;
5357 PCRE2_SIZE length, length2;
5358 uint32_t copybuffer[256];
5359 int namelen = strlen((const char *)nptr);
5360 #if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32
5361 PCRE2_SIZE cnl = namelen;
5362 #endif
5363 if (namelen == 0) break;
5364
5365 #ifdef SUPPORT_PCRE2_8
5366 if (test_mode == PCRE8_MODE) strcpy((char *)pbuffer8, (char *)nptr);
5367 #endif
5368 #ifdef SUPPORT_PCRE2_16
5369 if (test_mode == PCRE16_MODE)(void)to16(nptr, utf, &cnl);
5370 #endif
5371 #ifdef SUPPORT_PCRE2_32
5372 if (test_mode == PCRE32_MODE)(void)to32(nptr, utf, &cnl);
5373 #endif
5374
5375 PCRE2_SUBSTRING_NUMBER_FROM_NAME(groupnumber, compiled_code, pbuffer);
5376 if (groupnumber < 0 && groupnumber != PCRE2_ERROR_NOUNIQUESUBSTRING)
5377 fprintf(outfile, "Number not found for group '%s'\n", nptr);
5378
5379 length = sizeof(copybuffer)/code_unit_size;
5380 PCRE2_SUBSTRING_COPY_BYNAME(rc, match_data, pbuffer, copybuffer, &length);
5381 if (rc < 0)
5382 {
5383 fprintf(outfile, "Copy substring '%s' failed (%d): ", nptr, rc);
5384 PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
5385 PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
5386 fprintf(outfile, "\n");
5387 }
5388 else
5389 {
5390 PCRE2_SUBSTRING_LENGTH_BYNAME(rc, match_data, pbuffer, &length2);
5391 if (rc < 0)
5392 {
5393 fprintf(outfile, "Get substring '%s' length failed (%d): ", nptr, rc);
5394 PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
5395 PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
5396 fprintf(outfile, "\n");
5397 }
5398 else if (length2 != length)
5399 {
5400 fprintf(outfile, "Mismatched substring lengths: %lu %lu\n",
5401 (unsigned long int)length, (unsigned long int)length2);
5402 }
5403 fprintf(outfile, " C ");
5404 PCHARSV(copybuffer, 0, length, utf, outfile);
5405 fprintf(outfile, " (%lu) %s", (unsigned long)length, nptr);
5406 if (groupnumber >= 0) fprintf(outfile, " (group %d)\n", groupnumber);
5407 else fprintf(outfile, " (non-unique)\n");
5408 }
5409 nptr += namelen + 1;
5410 }
5411
5412 /* Test get strings by number */
5413
5414 for (i = 0; i < MAXCPYGET && dat_datctl.get_numbers[i] >= 0; i++)
5415 {
5416 int rc;
5417 PCRE2_SIZE length;
5418 void *gotbuffer;
5419 uint32_t n = (uint32_t)(dat_datctl.get_numbers[i]);
5420 PCRE2_SUBSTRING_GET_BYNUMBER(rc, match_data, n, &gotbuffer, &length);
5421 if (rc < 0)
5422 {
5423 fprintf(outfile, "Get substring %d failed (%d): ", n, rc);
5424 PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
5425 PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
5426 fprintf(outfile, "\n");
5427 }
5428 else
5429 {
5430 fprintf(outfile, "%2dG ", n);
5431 PCHARSV(gotbuffer, 0, length, utf, outfile);
5432 fprintf(outfile, " (%lu)\n", (unsigned long)length);
5433 PCRE2_SUBSTRING_FREE(gotbuffer);
5434 }
5435 }
5436
5437 /* Test get strings by name */
5438
5439 nptr = dat_datctl.get_names;
5440 for (;;)
5441 {
5442 PCRE2_SIZE length;
5443 void *gotbuffer;
5444 int rc;
5445 int groupnumber;
5446 int namelen = strlen((const char *)nptr);
5447 #if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32
5448 PCRE2_SIZE cnl = namelen;
5449 #endif
5450 if (namelen == 0) break;
5451
5452 #ifdef SUPPORT_PCRE2_8
5453 if (test_mode == PCRE8_MODE) strcpy((char *)pbuffer8, (char *)nptr);
5454 #endif
5455 #ifdef SUPPORT_PCRE2_16
5456 if (test_mode == PCRE16_MODE)(void)to16(nptr, utf, &cnl);
5457 #endif
5458 #ifdef SUPPORT_PCRE2_32
5459 if (test_mode == PCRE32_MODE)(void)to32(nptr, utf, &cnl);
5460 #endif
5461
5462 PCRE2_SUBSTRING_NUMBER_FROM_NAME(groupnumber, compiled_code, pbuffer);
5463 if (groupnumber < 0 && groupnumber != PCRE2_ERROR_NOUNIQUESUBSTRING)
5464 fprintf(outfile, "Number not found for group '%s'\n", nptr);
5465
5466 PCRE2_SUBSTRING_GET_BYNAME(rc, match_data, pbuffer, &gotbuffer, &length);
5467 if (rc < 0)
5468 {
5469 fprintf(outfile, "Get substring '%s' failed (%d): ", nptr, rc);
5470 PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
5471 PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
5472 fprintf(outfile, "\n");
5473 }
5474 else
5475 {
5476 fprintf(outfile, " G ");
5477 PCHARSV(gotbuffer, 0, length, utf, outfile);
5478 fprintf(outfile, " (%lu) %s", (unsigned long)length, nptr);
5479 if (groupnumber >= 0) fprintf(outfile, " (group %d)\n", groupnumber);
5480 else fprintf(outfile, " (non-unique)\n");
5481 PCRE2_SUBSTRING_FREE(gotbuffer);
5482 }
5483 nptr += namelen + 1;
5484 }
5485
5486 /* Test getting the complete list of captured strings. */
5487
5488 if ((dat_datctl.control & CTL_GETALL) != 0)
5489 {
5490 int rc;
5491 void **stringlist;
5492 PCRE2_SIZE *lengths;
5493 PCRE2_SUBSTRING_LIST_GET(rc, match_data, &stringlist, &lengths);
5494 if (rc < 0)
5495 {
5496 fprintf(outfile, "get substring list failed (%d): ", rc);
5497 PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
5498 PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
5499 fprintf(outfile, "\n");
5500 }
5501 else
5502 {
5503 for (i = 0; i < capcount; i++)
5504 {
5505 fprintf(outfile, "%2dL ", i);
5506 PCHARSV(stringlist[i], 0, lengths[i], utf, outfile);
5507 putc('\n', outfile);
5508 }
5509 if (stringlist[i] != NULL)
5510 fprintf(outfile, "string list not terminated by NULL\n");
5511 PCRE2_SUBSTRING_LIST_FREE(stringlist);
5512 }
5513 }
5514 }
5515
5516
5517
5518 /*************************************************
5519 * Process a data line *
5520 *************************************************/
5521
5522 /* The line is in buffer; it will not be empty.
5523
5524 Arguments: none
5525
5526 Returns: PR_OK continue processing next line
5527 PR_SKIP skip to a blank line
5528 PR_ABEND abort the pcre2test run
5529 */
5530
5531 static int
process_data(void)5532 process_data(void)
5533 {
5534 PCRE2_SIZE len, ulen;
5535 uint32_t gmatched;
5536 uint32_t c, k;
5537 uint32_t g_notempty = 0;
5538 uint8_t *p, *pp, *start_rep;
5539 size_t needlen;
5540 void *use_dat_context;
5541 BOOL utf;
5542
5543 #ifdef SUPPORT_PCRE2_8
5544 uint8_t *q8 = NULL;
5545 #endif
5546 #ifdef SUPPORT_PCRE2_16
5547 uint16_t *q16 = NULL;
5548 #endif
5549 #ifdef SUPPORT_PCRE2_32
5550 uint32_t *q32 = NULL;
5551 #endif
5552
5553 /* Copy the default context and data control blocks to the active ones. Then
5554 copy from the pattern the controls that can be set in either the pattern or the
5555 data. This allows them to be overridden in the data line. We do not do this for
5556 options because those that are common apply separately to compiling and
5557 matching. */
5558
5559 DATCTXCPY(dat_context, default_dat_context);
5560 memcpy(&dat_datctl, &def_datctl, sizeof(datctl));
5561 dat_datctl.control |= (pat_patctl.control & CTL_ALLPD);
5562 dat_datctl.control2 |= (pat_patctl.control2 & CTL2_ALLPD);
5563 strcpy((char *)dat_datctl.replacement, (char *)pat_patctl.replacement);
5564
5565 /* Initialize for scanning the data line. */
5566
5567 #ifdef SUPPORT_PCRE2_8
5568 utf = ((((pat_patctl.control & CTL_POSIX) != 0)?
5569 ((pcre2_real_code_8 *)preg.re_pcre2_code)->overall_options :
5570 FLD(compiled_code, overall_options)) & PCRE2_UTF) != 0;
5571 #else
5572 utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
5573 #endif
5574
5575 start_rep = NULL;
5576 len = strlen((const char *)buffer);
5577 while (len > 0 && isspace(buffer[len-1])) len--;
5578 buffer[len] = 0;
5579 p = buffer;
5580 while (isspace(*p)) p++;
5581
5582 /* Check that the data is well-formed UTF-8 if we're in UTF mode. To create
5583 invalid input to pcre2_match(), you must use \x?? or \x{} sequences. */
5584
5585 if (utf)
5586 {
5587 uint8_t *q;
5588 uint32_t cc;
5589 int n = 1;
5590 for (q = p; n > 0 && *q; q += n) n = utf82ord(q, &cc);
5591 if (n <= 0)
5592 {
5593 fprintf(outfile, "** Failed: invalid UTF-8 string cannot be used as input "
5594 "in UTF mode\n");
5595 return PR_OK;
5596 }
5597 }
5598
5599 #ifdef SUPPORT_VALGRIND
5600 /* Mark the dbuffer as addressable but undefined again. */
5601 if (dbuffer != NULL)
5602 {
5603 VALGRIND_MAKE_MEM_UNDEFINED(dbuffer, dbuffer_size);
5604 }
5605 #endif
5606
5607 /* Allocate a buffer to hold the data line; len+1 is an upper bound on
5608 the number of code units that will be needed (though the buffer may have to be
5609 extended if replication is involved). */
5610
5611 needlen = (size_t)((len+1) * code_unit_size);
5612 if (dbuffer == NULL || needlen >= dbuffer_size)
5613 {
5614 while (needlen >= dbuffer_size) dbuffer_size *= 2;
5615 dbuffer = (uint8_t *)realloc(dbuffer, dbuffer_size);
5616 if (dbuffer == NULL)
5617 {
5618 fprintf(stderr, "pcre2test: realloc(%d) failed\n", (int)dbuffer_size);
5619 exit(1);
5620 }
5621 }
5622 SETCASTPTR(q, dbuffer); /* Sets q8, q16, or q32, as appropriate. */
5623
5624 /* Scan the data line, interpreting data escapes, and put the result into a
5625 buffer of the appropriate width. In UTF mode, input can be UTF-8. */
5626
5627 while ((c = *p++) != 0)
5628 {
5629 int32_t i = 0;
5630 size_t replen;
5631
5632 /* ] may mark the end of a replicated sequence */
5633
5634 if (c == ']' && start_rep != NULL)
5635 {
5636 long li;
5637 char *endptr;
5638 size_t qoffset = CAST8VAR(q) - dbuffer;
5639 size_t rep_offset = start_rep - dbuffer;
5640
5641 if (*p++ != '{')
5642 {
5643 fprintf(outfile, "** Expected '{' after \\[....]\n");
5644 return PR_OK;
5645 }
5646
5647 li = strtol((const char *)p, &endptr, 10);
5648 if (S32OVERFLOW(li))
5649 {
5650 fprintf(outfile, "** Repeat count too large\n");
5651 return PR_OK;
5652 }
5653
5654 p = (uint8_t *)endptr;
5655 if (*p++ != '}')
5656 {
5657 fprintf(outfile, "** Expected '}' after \\[...]{...\n");
5658 return PR_OK;
5659 }
5660
5661 i = (int32_t)li;
5662 if (i-- == 0)
5663 {
5664 fprintf(outfile, "** Zero repeat not allowed\n");
5665 return PR_OK;
5666 }
5667
5668 replen = CAST8VAR(q) - start_rep;
5669 needlen += replen * i;
5670
5671 if (needlen >= dbuffer_size)
5672 {
5673 while (needlen >= dbuffer_size) dbuffer_size *= 2;
5674 dbuffer = (uint8_t *)realloc(dbuffer, dbuffer_size);
5675 if (dbuffer == NULL)
5676 {
5677 fprintf(stderr, "pcre2test: realloc(%d) failed\n", (int)dbuffer_size);
5678 exit(1);
5679 }
5680 SETCASTPTR(q, dbuffer + qoffset);
5681 start_rep = dbuffer + rep_offset;
5682 }
5683
5684 while (i-- > 0)
5685 {
5686 memcpy(CAST8VAR(q), start_rep, replen);
5687 SETPLUS(q, replen/code_unit_size);
5688 }
5689
5690 start_rep = NULL;
5691 continue;
5692 }
5693
5694 /* Handle a non-escaped character */
5695
5696 if (c != '\\')
5697 {
5698 if (utf && HASUTF8EXTRALEN(c)) { GETUTF8INC(c, p); }
5699 }
5700
5701 /* Handle backslash escapes */
5702
5703 else switch ((c = *p++))
5704 {
5705 case '\\': break;
5706 case 'a': c = CHAR_BEL; break;
5707 case 'b': c = '\b'; break;
5708 case 'e': c = CHAR_ESC; break;
5709 case 'f': c = '\f'; break;
5710 case 'n': c = '\n'; break;
5711 case 'r': c = '\r'; break;
5712 case 't': c = '\t'; break;
5713 case 'v': c = '\v'; break;
5714
5715 case '0': case '1': case '2': case '3':
5716 case '4': case '5': case '6': case '7':
5717 c -= '0';
5718 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
5719 c = c * 8 + *p++ - '0';
5720 break;
5721
5722 case 'o':
5723 if (*p == '{')
5724 {
5725 uint8_t *pt = p;
5726 c = 0;
5727 for (pt++; isdigit(*pt) && *pt != '8' && *pt != '9'; pt++)
5728 {
5729 if (++i == 12)
5730 fprintf(outfile, "** Too many octal digits in \\o{...} item; "
5731 "using only the first twelve.\n");
5732 else c = c * 8 + *pt - '0';
5733 }
5734 if (*pt == '}') p = pt + 1;
5735 else fprintf(outfile, "** Missing } after \\o{ (assumed)\n");
5736 }
5737 break;
5738
5739 case 'x':
5740 if (*p == '{')
5741 {
5742 uint8_t *pt = p;
5743 c = 0;
5744
5745 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
5746 when isxdigit() is a macro that refers to its argument more than
5747 once. This is banned by the C Standard, but apparently happens in at
5748 least one MacOS environment. */
5749
5750 for (pt++; isxdigit(*pt); pt++)
5751 {
5752 if (++i == 9)
5753 fprintf(outfile, "** Too many hex digits in \\x{...} item; "
5754 "using only the first eight.\n");
5755 else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
5756 }
5757 if (*pt == '}')
5758 {
5759 p = pt + 1;
5760 break;
5761 }
5762 /* Not correct form for \x{...}; fall through */
5763 }
5764
5765 /* \x without {} always defines just one byte in 8-bit mode. This
5766 allows UTF-8 characters to be constructed byte by byte, and also allows
5767 invalid UTF-8 sequences to be made. Just copy the byte in UTF-8 mode.
5768 Otherwise, pass it down as data. */
5769
5770 c = 0;
5771 while (i++ < 2 && isxdigit(*p))
5772 {
5773 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
5774 p++;
5775 }
5776 #if defined SUPPORT_PCRE2_8
5777 if (utf && (test_mode == PCRE8_MODE))
5778 {
5779 *q8++ = c;
5780 continue;
5781 }
5782 #endif
5783 break;
5784
5785 case 0: /* \ followed by EOF allows for an empty line */
5786 p--;
5787 continue;
5788
5789 case '=': /* \= terminates the data, starts modifiers */
5790 goto ENDSTRING;
5791
5792 case '[': /* \[ introduces a replicated character sequence */
5793 if (start_rep != NULL)
5794 {
5795 fprintf(outfile, "** Nested replication is not supported\n");
5796 return PR_OK;
5797 }
5798 start_rep = CAST8VAR(q);
5799 continue;
5800
5801 default:
5802 if (isalnum(c))
5803 {
5804 fprintf(outfile, "** Unrecognized escape sequence \"\\%c\"\n", c);
5805 return PR_OK;
5806 }
5807 }
5808
5809 /* We now have a character value in c that may be greater than 255.
5810 In 8-bit mode we convert to UTF-8 if we are in UTF mode. Values greater
5811 than 127 in UTF mode must have come from \x{...} or octal constructs
5812 because values from \x.. get this far only in non-UTF mode. */
5813
5814 #ifdef SUPPORT_PCRE2_8
5815 if (test_mode == PCRE8_MODE)
5816 {
5817 if (utf)
5818 {
5819 if (c > 0x7fffffff)
5820 {
5821 fprintf(outfile, "** Character \\x{%x} is greater than 0x7fffffff "
5822 "and so cannot be converted to UTF-8\n", c);
5823 return PR_OK;
5824 }
5825 q8 += ord2utf8(c, q8);
5826 }
5827 else
5828 {
5829 if (c > 0xffu)
5830 {
5831 fprintf(outfile, "** Character \\x{%x} is greater than 255 "
5832 "and UTF-8 mode is not enabled.\n", c);
5833 fprintf(outfile, "** Truncation will probably give the wrong "
5834 "result.\n");
5835 }
5836 *q8++ = c;
5837 }
5838 }
5839 #endif
5840 #ifdef SUPPORT_PCRE2_16
5841 if (test_mode == PCRE16_MODE)
5842 {
5843 if (utf)
5844 {
5845 if (c > 0x10ffffu)
5846 {
5847 fprintf(outfile, "** Failed: character \\x{%x} is greater than "
5848 "0x10ffff and so cannot be converted to UTF-16\n", c);
5849 return PR_OK;
5850 }
5851 else if (c >= 0x10000u)
5852 {
5853 c-= 0x10000u;
5854 *q16++ = 0xD800 | (c >> 10);
5855 *q16++ = 0xDC00 | (c & 0x3ff);
5856 }
5857 else
5858 *q16++ = c;
5859 }
5860 else
5861 {
5862 if (c > 0xffffu)
5863 {
5864 fprintf(outfile, "** Character \\x{%x} is greater than 0xffff "
5865 "and UTF-16 mode is not enabled.\n", c);
5866 fprintf(outfile, "** Truncation will probably give the wrong "
5867 "result.\n");
5868 }
5869
5870 *q16++ = c;
5871 }
5872 }
5873 #endif
5874 #ifdef SUPPORT_PCRE2_32
5875 if (test_mode == PCRE32_MODE)
5876 {
5877 *q32++ = c;
5878 }
5879 #endif
5880 }
5881
5882 ENDSTRING:
5883 SET(*q, 0);
5884 len = CASTVAR(uint8_t *, q) - dbuffer; /* Length in bytes */
5885 ulen = len/code_unit_size; /* Length in code units */
5886
5887 /* If the string was terminated by \= we must now interpret modifiers. */
5888
5889 if (p[-1] != 0 && !decode_modifiers(p, CTX_DAT, NULL, &dat_datctl))
5890 return PR_OK;
5891
5892 /* Check for mutually exclusive modifiers. At present, these are all in the
5893 first control word. */
5894
5895 for (k = 0; k < sizeof(exclusive_dat_controls)/sizeof(uint32_t); k++)
5896 {
5897 c = dat_datctl.control & exclusive_dat_controls[k];
5898 if (c != 0 && c != (c & (~c+1)))
5899 {
5900 show_controls(c, 0, "** Not allowed together:");
5901 fprintf(outfile, "\n");
5902 return PR_OK;
5903 }
5904 }
5905
5906 if (pat_patctl.replacement[0] != 0 &&
5907 (dat_datctl.control & CTL_NULLCONTEXT) != 0)
5908 {
5909 fprintf(outfile, "** Replacement text is not supported with null_context.\n");
5910 return PR_OK;
5911 }
5912
5913 /* We now have the subject in dbuffer, with len containing the byte length, and
5914 ulen containing the code unit length. Move the data to the end of the buffer so
5915 that a read over the end can be caught by valgrind or other means. If we have
5916 explicit valgrind support, mark the unused start of the buffer unaddressable.
5917 If we are using the POSIX interface, or testing zero-termination, we must
5918 include the terminating zero in the usable data. */
5919
5920 c = code_unit_size * (((pat_patctl.control & CTL_POSIX) +
5921 (dat_datctl.control & CTL_ZERO_TERMINATE) != 0)? 1:0);
5922 pp = memmove(dbuffer + dbuffer_size - len - c, dbuffer, len + c);
5923 #ifdef SUPPORT_VALGRIND
5924 VALGRIND_MAKE_MEM_NOACCESS(dbuffer, dbuffer_size - (len + c));
5925 #endif
5926
5927 /* Now pp points to the subject string. POSIX matching is only possible in
5928 8-bit mode, and it does not support timing or other fancy features. Some were
5929 checked at compile time, but we need to check the match-time settings here. */
5930
5931 #ifdef SUPPORT_PCRE2_8
5932 if ((pat_patctl.control & CTL_POSIX) != 0)
5933 {
5934 int rc;
5935 int eflags = 0;
5936 regmatch_t *pmatch = NULL;
5937 const char *msg = "** Ignored with POSIX interface:";
5938
5939 if (dat_datctl.cfail[0] != CFAIL_UNSET || dat_datctl.cfail[1] != CFAIL_UNSET)
5940 prmsg(&msg, "callout_fail");
5941 if (dat_datctl.copy_numbers[0] >= 0 || dat_datctl.copy_names[0] != 0)
5942 prmsg(&msg, "copy");
5943 if (dat_datctl.get_numbers[0] >= 0 || dat_datctl.get_names[0] != 0)
5944 prmsg(&msg, "get");
5945 if (dat_datctl.jitstack != 0) prmsg(&msg, "jitstack");
5946
5947 if ((dat_datctl.options & ~POSIX_SUPPORTED_MATCH_OPTIONS) != 0)
5948 {
5949 fprintf(outfile, "%s", msg);
5950 show_match_options(dat_datctl.options & ~POSIX_SUPPORTED_MATCH_OPTIONS);
5951 msg = "";
5952 }
5953 if ((dat_datctl.control & ~POSIX_SUPPORTED_MATCH_CONTROLS) != 0 ||
5954 (dat_datctl.control2 & ~POSIX_SUPPORTED_MATCH_CONTROLS2) != 0)
5955 {
5956 show_controls(dat_datctl.control & ~POSIX_SUPPORTED_MATCH_CONTROLS,
5957 dat_datctl.control2 & ~POSIX_SUPPORTED_MATCH_CONTROLS2, msg);
5958 msg = "";
5959 }
5960
5961 if (msg[0] == 0) fprintf(outfile, "\n");
5962
5963 if (dat_datctl.oveccount > 0)
5964 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * dat_datctl.oveccount);
5965 if ((dat_datctl.options & PCRE2_NOTBOL) != 0) eflags |= REG_NOTBOL;
5966 if ((dat_datctl.options & PCRE2_NOTEOL) != 0) eflags |= REG_NOTEOL;
5967 if ((dat_datctl.options & PCRE2_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
5968
5969 rc = regexec(&preg, (const char *)pp + dat_datctl.offset,
5970 dat_datctl.oveccount, pmatch, eflags);
5971 if (rc != 0)
5972 {
5973 (void)regerror(rc, &preg, (char *)pbuffer8, pbuffer8_size);
5974 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, pbuffer8);
5975 }
5976 else if ((pat_patctl.control & CTL_POSIX_NOSUB) != 0)
5977 fprintf(outfile, "Matched with REG_NOSUB\n");
5978 else if (dat_datctl.oveccount == 0)
5979 fprintf(outfile, "Matched without capture\n");
5980 else
5981 {
5982 size_t i;
5983 for (i = 0; i < (size_t)dat_datctl.oveccount; i++)
5984 {
5985 if (pmatch[i].rm_so >= 0)
5986 {
5987 fprintf(outfile, "%2d: ", (int)i);
5988 PCHARSV(pp, pmatch[i].rm_so,
5989 pmatch[i].rm_eo - pmatch[i].rm_so, utf, outfile);
5990 fprintf(outfile, "\n");
5991 if ((i == 0 && (dat_datctl.control & CTL_AFTERTEXT) != 0) ||
5992 (dat_datctl.control & CTL_ALLAFTERTEXT) != 0)
5993 {
5994 fprintf(outfile, "%2d+ ", (int)i);
5995 PCHARSV(pp, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
5996 utf, outfile);
5997 fprintf(outfile, "\n");
5998 }
5999 }
6000 }
6001 }
6002 free(pmatch);
6003 return PR_OK;
6004 }
6005 #endif /* SUPPORT_PCRE2_8 */
6006
6007 /* Handle matching via the native interface. Check for consistency of
6008 modifiers. */
6009
6010 if ((dat_datctl.control & (CTL_DFA|CTL_FINDLIMITS)) == (CTL_DFA|CTL_FINDLIMITS))
6011 {
6012 fprintf(outfile, "** Finding match limits is not relevant for DFA matching: ignored\n");
6013 dat_datctl.control &= ~CTL_FINDLIMITS;
6014 }
6015
6016 /* ALLUSEDTEXT is not supported with JIT, but JIT is not used with DFA
6017 matching, even if the JIT compiler was used. */
6018
6019 if ((dat_datctl.control & (CTL_ALLUSEDTEXT|CTL_DFA)) == CTL_ALLUSEDTEXT &&
6020 FLD(compiled_code, executable_jit) != NULL)
6021 {
6022 fprintf(outfile, "** Showing all consulted text is not supported by JIT: ignored\n");
6023 dat_datctl.control &= ~CTL_ALLUSEDTEXT;
6024 }
6025
6026 /* Handle passing the subject as zero-terminated. */
6027
6028 if ((dat_datctl.control & CTL_ZERO_TERMINATE) != 0)
6029 ulen = PCRE2_ZERO_TERMINATED;
6030
6031 /* The nullcontext modifier is used to test calling pcre2_[jit_]match() with a
6032 NULL context. */
6033
6034 use_dat_context = ((dat_datctl.control & CTL_NULLCONTEXT) != 0)?
6035 NULL : PTR(dat_context);
6036
6037 /* Enable display of malloc/free if wanted. */
6038
6039 show_memory = (dat_datctl.control & CTL_MEMORY) != 0;
6040
6041 /* Create and assign a JIT stack if requested. */
6042
6043 if (dat_datctl.jitstack != 0)
6044 {
6045 if (dat_datctl.jitstack != jit_stack_size)
6046 {
6047 PCRE2_JIT_STACK_FREE(jit_stack);
6048 PCRE2_JIT_STACK_CREATE(jit_stack, 1, dat_datctl.jitstack * 1024, NULL);
6049 jit_stack_size = dat_datctl.jitstack;
6050 }
6051 PCRE2_JIT_STACK_ASSIGN(dat_context, jit_callback, jit_stack);
6052 }
6053
6054 /* Or de-assign */
6055
6056 else if (jit_stack != NULL)
6057 {
6058 PCRE2_JIT_STACK_ASSIGN(dat_context, NULL, NULL);
6059 PCRE2_JIT_STACK_FREE(jit_stack);
6060 jit_stack = NULL;
6061 jit_stack_size = 0;
6062 }
6063
6064 /* When no JIT stack is assigned, we must ensure that there is a JIT callback
6065 if we want to verify that JIT was actually used. */
6066
6067 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_stack == NULL)
6068 {
6069 PCRE2_JIT_STACK_ASSIGN(dat_context, jit_callback, NULL);
6070 }
6071
6072 /* Adjust match_data according to size of offsets required. A size of zero
6073 causes a new match data block to be obtained that exactly fits the pattern. */
6074
6075 if (dat_datctl.oveccount == 0)
6076 {
6077 PCRE2_MATCH_DATA_FREE(match_data);
6078 PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(match_data, compiled_code, NULL);
6079 PCRE2_GET_OVECTOR_COUNT(max_oveccount, match_data);
6080 }
6081 else if (dat_datctl.oveccount <= max_oveccount)
6082 {
6083 SETFLD(match_data, oveccount, dat_datctl.oveccount);
6084 }
6085 else
6086 {
6087 max_oveccount = dat_datctl.oveccount;
6088 PCRE2_MATCH_DATA_FREE(match_data);
6089 PCRE2_MATCH_DATA_CREATE(match_data, max_oveccount, NULL);
6090 }
6091
6092 /* Replacement processing is ignored for DFA matching. */
6093
6094 if (dat_datctl.replacement[0] != 0 && (dat_datctl.control & CTL_DFA) != 0)
6095 {
6096 fprintf(outfile, "** Ignored for DFA matching: replace\n");
6097 dat_datctl.replacement[0] = 0;
6098 }
6099
6100 /* If a replacement string is provided, call pcre2_substitute() instead of one
6101 of the matching functions. First we have to convert the replacement string to
6102 the appropriate width. */
6103
6104 if (dat_datctl.replacement[0] != 0)
6105 {
6106 int rc;
6107 uint8_t *pr;
6108 uint8_t rbuffer[REPLACE_BUFFSIZE];
6109 uint8_t nbuffer[REPLACE_BUFFSIZE];
6110 uint32_t xoptions;
6111 PCRE2_SIZE rlen, nsize, erroroffset;
6112 BOOL badutf = FALSE;
6113
6114 #ifdef SUPPORT_PCRE2_8
6115 uint8_t *r8 = NULL;
6116 #endif
6117 #ifdef SUPPORT_PCRE2_16
6118 uint16_t *r16 = NULL;
6119 #endif
6120 #ifdef SUPPORT_PCRE2_32
6121 uint32_t *r32 = NULL;
6122 #endif
6123
6124 if (timeitm)
6125 fprintf(outfile, "** Timing is not supported with replace: ignored\n");
6126
6127 xoptions = (((dat_datctl.control & CTL_GLOBAL) == 0)? 0 :
6128 PCRE2_SUBSTITUTE_GLOBAL) |
6129 (((dat_datctl.control2 & CTL2_SUBSTITUTE_EXTENDED) == 0)? 0 :
6130 PCRE2_SUBSTITUTE_EXTENDED) |
6131 (((dat_datctl.control2 & CTL2_SUBSTITUTE_OVERFLOW_LENGTH) == 0)? 0 :
6132 PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) |
6133 (((dat_datctl.control2 & CTL2_SUBSTITUTE_UNKNOWN_UNSET) == 0)? 0 :
6134 PCRE2_SUBSTITUTE_UNKNOWN_UNSET) |
6135 (((dat_datctl.control2 & CTL2_SUBSTITUTE_UNSET_EMPTY) == 0)? 0 :
6136 PCRE2_SUBSTITUTE_UNSET_EMPTY);
6137
6138 SETCASTPTR(r, rbuffer); /* Sets r8, r16, or r32, as appropriate. */
6139 pr = dat_datctl.replacement;
6140
6141 /* If the replacement starts with '[<number>]' we interpret that as length
6142 value for the replacement buffer. */
6143
6144 nsize = REPLACE_BUFFSIZE/code_unit_size;
6145 if (*pr == '[')
6146 {
6147 PCRE2_SIZE n = 0;
6148 while ((c = *(++pr)) >= CHAR_0 && c <= CHAR_9) n = n * 10 + c - CHAR_0;
6149 if (*pr++ != ']')
6150 {
6151 fprintf(outfile, "Bad buffer size in replacement string\n");
6152 return PR_OK;
6153 }
6154 if (n > nsize)
6155 {
6156 fprintf(outfile, "Replacement buffer setting (%lu) is too large "
6157 "(max %lu)\n", (unsigned long int)n, (unsigned long int)nsize);
6158 return PR_OK;
6159 }
6160 nsize = n;
6161 }
6162
6163 /* Now copy the replacement string to a buffer of the appropriate width. No
6164 escape processing is done for replacements. In UTF mode, check for an invalid
6165 UTF-8 input string, and if it is invalid, just copy its code units without
6166 UTF interpretation. This provides a means of checking that an invalid string
6167 is detected. Otherwise, UTF-8 can be used to include wide characters in a
6168 replacement. */
6169
6170 if (utf) badutf = valid_utf(pr, strlen((const char *)pr), &erroroffset);
6171
6172 /* Not UTF or invalid UTF-8: just copy the code units. */
6173
6174 if (!utf || badutf)
6175 {
6176 while ((c = *pr++) != 0)
6177 {
6178 #ifdef SUPPORT_PCRE2_8
6179 if (test_mode == PCRE8_MODE) *r8++ = c;
6180 #endif
6181 #ifdef SUPPORT_PCRE2_16
6182 if (test_mode == PCRE16_MODE) *r16++ = c;
6183 #endif
6184 #ifdef SUPPORT_PCRE2_32
6185 if (test_mode == PCRE32_MODE) *r32++ = c;
6186 #endif
6187 }
6188 }
6189
6190 /* Valid UTF-8 replacement string */
6191
6192 else while ((c = *pr++) != 0)
6193 {
6194 if (HASUTF8EXTRALEN(c)) { GETUTF8INC(c, pr); }
6195
6196 #ifdef SUPPORT_PCRE2_8
6197 if (test_mode == PCRE8_MODE) r8 += ord2utf8(c, r8);
6198 #endif
6199
6200 #ifdef SUPPORT_PCRE2_16
6201 if (test_mode == PCRE16_MODE)
6202 {
6203 if (c >= 0x10000u)
6204 {
6205 c-= 0x10000u;
6206 *r16++ = 0xD800 | (c >> 10);
6207 *r16++ = 0xDC00 | (c & 0x3ff);
6208 }
6209 else *r16++ = c;
6210 }
6211 #endif
6212
6213 #ifdef SUPPORT_PCRE2_32
6214 if (test_mode == PCRE32_MODE) *r32++ = c;
6215 #endif
6216 }
6217
6218 SET(*r, 0);
6219 if ((dat_datctl.control & CTL_ZERO_TERMINATE) != 0)
6220 rlen = PCRE2_ZERO_TERMINATED;
6221 else
6222 rlen = (CASTVAR(uint8_t *, r) - rbuffer)/code_unit_size;
6223 PCRE2_SUBSTITUTE(rc, compiled_code, pp, ulen, dat_datctl.offset,
6224 dat_datctl.options|xoptions, match_data, dat_context,
6225 rbuffer, rlen, nbuffer, &nsize);
6226
6227 if (rc < 0)
6228 {
6229 PCRE2_SIZE msize;
6230 fprintf(outfile, "Failed: error %d", rc);
6231 if (rc != PCRE2_ERROR_NOMEMORY && nsize != PCRE2_UNSET)
6232 fprintf(outfile, " at offset %ld in replacement", (long int)nsize);
6233 fprintf(outfile, ": ");
6234 PCRE2_GET_ERROR_MESSAGE(msize, rc, pbuffer);
6235 PCHARSV(CASTVAR(void *, pbuffer), 0, msize, FALSE, outfile);
6236 if (rc == PCRE2_ERROR_NOMEMORY &&
6237 (xoptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) != 0)
6238 fprintf(outfile, ": %ld code units are needed", (long int)nsize);
6239 }
6240 else
6241 {
6242 fprintf(outfile, "%2d: ", rc);
6243 PCHARSV(nbuffer, 0, nsize, utf, outfile);
6244 }
6245
6246 fprintf(outfile, "\n");
6247 } /* End of substitution handling */
6248
6249 /* When a replacement string is not provided, run a loop for global matching
6250 with one of the basic matching functions. */
6251
6252 else for (gmatched = 0;; gmatched++)
6253 {
6254 PCRE2_SIZE j;
6255 int capcount;
6256 PCRE2_SIZE *ovector;
6257 PCRE2_SIZE ovecsave[2];
6258
6259 ovector = FLD(match_data, ovector);
6260
6261 /* After the first time round a global loop, for a normal global (/g)
6262 iteration, save the current ovector[0,1] so that we can check that they do
6263 change each time. Otherwise a matching bug that returns the same string
6264 causes an infinite loop. It has happened! */
6265
6266 if (gmatched > 0 && (dat_datctl.control & CTL_GLOBAL) != 0)
6267 {
6268 ovecsave[0] = ovector[0];
6269 ovecsave[1] = ovector[1];
6270 }
6271
6272 /* For altglobal (or first time round the loop), set an "unset" value. */
6273
6274 else ovecsave[0] = ovecsave[1] = PCRE2_UNSET;
6275
6276 /* Fill the ovector with junk to detect elements that do not get set
6277 when they should be. */
6278
6279 for (j = 0; j < 2*dat_datctl.oveccount; j++) ovector[j] = JUNK_OFFSET;
6280
6281 /* When matching is via pcre2_match(), we will detect the use of JIT via the
6282 stack callback function. */
6283
6284 jit_was_used = (pat_patctl.control & CTL_JITFAST) != 0;
6285
6286 /* Do timing if required. */
6287
6288 if (timeitm > 0)
6289 {
6290 register int i;
6291 clock_t start_time, time_taken;
6292
6293 if ((dat_datctl.control & CTL_DFA) != 0)
6294 {
6295 if ((dat_datctl.options & PCRE2_DFA_RESTART) != 0)
6296 {
6297 fprintf(outfile, "Timing DFA restarts is not supported\n");
6298 return PR_OK;
6299 }
6300 if (dfa_workspace == NULL)
6301 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
6302 start_time = clock();
6303 for (i = 0; i < timeitm; i++)
6304 {
6305 PCRE2_DFA_MATCH(capcount, compiled_code, pp, ulen,
6306 dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
6307 use_dat_context, dfa_workspace, DFA_WS_DIMENSION);
6308 }
6309 }
6310
6311 else if ((pat_patctl.control & CTL_JITFAST) != 0)
6312 {
6313 start_time = clock();
6314 for (i = 0; i < timeitm; i++)
6315 {
6316 PCRE2_JIT_MATCH(capcount, compiled_code, pp, ulen,
6317 dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
6318 use_dat_context);
6319 }
6320 }
6321
6322 else
6323 {
6324 start_time = clock();
6325 for (i = 0; i < timeitm; i++)
6326 {
6327 PCRE2_MATCH(capcount, compiled_code, pp, ulen,
6328 dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
6329 use_dat_context);
6330 }
6331 }
6332 total_match_time += (time_taken = clock() - start_time);
6333 fprintf(outfile, "Match time %.4f milliseconds\n",
6334 (((double)time_taken * 1000.0) / (double)timeitm) /
6335 (double)CLOCKS_PER_SEC);
6336 }
6337
6338 /* Find the match and recursion limits if requested. The recursion limit
6339 is not relevant for JIT. */
6340
6341 if ((dat_datctl.control & CTL_FINDLIMITS) != 0)
6342 {
6343 capcount = check_match_limit(pp, ulen, PCRE2_ERROR_MATCHLIMIT, "match");
6344 if (FLD(compiled_code, executable_jit) == NULL)
6345 (void)check_match_limit(pp, ulen, PCRE2_ERROR_RECURSIONLIMIT,
6346 "recursion");
6347 }
6348
6349 /* Otherwise just run a single match, setting up a callout if required (the
6350 default). */
6351
6352 else
6353 {
6354 if ((dat_datctl.control & CTL_CALLOUT_NONE) == 0)
6355 {
6356 PCRE2_SET_CALLOUT(dat_context, callout_function,
6357 (void *)(&dat_datctl.callout_data));
6358 first_callout = TRUE;
6359 last_callout_mark = NULL;
6360 callout_count = 0;
6361 }
6362 else
6363 {
6364 PCRE2_SET_CALLOUT(dat_context, NULL, NULL); /* No callout */
6365 }
6366
6367 /* Run a single DFA or NFA match. */
6368
6369 if ((dat_datctl.control & CTL_DFA) != 0)
6370 {
6371 if (dfa_workspace == NULL)
6372 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
6373 if (dfa_matched++ == 0)
6374 dfa_workspace[0] = -1; /* To catch bad restart */
6375 PCRE2_DFA_MATCH(capcount, compiled_code, pp, ulen,
6376 dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
6377 use_dat_context, dfa_workspace, DFA_WS_DIMENSION);
6378 if (capcount == 0)
6379 {
6380 fprintf(outfile, "Matched, but offsets vector is too small to show all matches\n");
6381 capcount = dat_datctl.oveccount;
6382 }
6383 }
6384 else
6385 {
6386 if ((pat_patctl.control & CTL_JITFAST) != 0)
6387 PCRE2_JIT_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
6388 dat_datctl.options | g_notempty, match_data, use_dat_context);
6389 else
6390 PCRE2_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
6391 dat_datctl.options | g_notempty, match_data, use_dat_context);
6392 if (capcount == 0)
6393 {
6394 fprintf(outfile, "Matched, but too many substrings\n");
6395 capcount = dat_datctl.oveccount;
6396 }
6397 }
6398 }
6399
6400 /* The result of the match is now in capcount. First handle a successful
6401 match. */
6402
6403 if (capcount >= 0)
6404 {
6405 int i;
6406 uint32_t oveccount;
6407
6408 /* This is a check against a lunatic return value. */
6409
6410 PCRE2_GET_OVECTOR_COUNT(oveccount, match_data);
6411 if (capcount > (int)oveccount)
6412 {
6413 fprintf(outfile,
6414 "** PCRE2 error: returned count %d is too big for ovector count %d\n",
6415 capcount, oveccount);
6416 capcount = oveccount;
6417 if ((dat_datctl.control & CTL_ANYGLOB) != 0)
6418 {
6419 fprintf(outfile, "** Global loop abandoned\n");
6420 dat_datctl.control &= ~CTL_ANYGLOB; /* Break g/G loop */
6421 }
6422 }
6423
6424 /* If this is not the first time round a global loop, check that the
6425 returned string has changed. If not, there is a bug somewhere and we must
6426 break the loop because it will go on for ever. We know that there are
6427 always at least two elements in the ovector. */
6428
6429 if (gmatched > 0 && ovecsave[0] == ovector[0] && ovecsave[1] == ovector[1])
6430 {
6431 fprintf(outfile,
6432 "** PCRE2 error: global repeat returned the same string as previous\n");
6433 fprintf(outfile, "** Global loop abandoned\n");
6434 dat_datctl.control &= ~CTL_ANYGLOB; /* Break g/G loop */
6435 }
6436
6437 /* "allcaptures" requests showing of all captures in the pattern, to check
6438 unset ones at the end. It may be set on the pattern or the data. Implement
6439 by setting capcount to the maximum. This is not relevant for DFA matching,
6440 so ignore it. */
6441
6442 if ((dat_datctl.control & CTL_ALLCAPTURES) != 0)
6443 {
6444 uint32_t maxcapcount;
6445 if ((dat_datctl.control & CTL_DFA) != 0)
6446 {
6447 fprintf(outfile, "** Ignored after DFA matching: allcaptures\n");
6448 }
6449 else
6450 {
6451 if (pattern_info(PCRE2_INFO_CAPTURECOUNT, &maxcapcount, FALSE) < 0)
6452 return PR_SKIP;
6453 capcount = maxcapcount + 1; /* Allow for full match */
6454 if (capcount > (int)oveccount) capcount = oveccount;
6455 }
6456 }
6457
6458 /* Output the captured substrings. Note that, for the matched string,
6459 the use of \K in an assertion can make the start later than the end. */
6460
6461 for (i = 0; i < 2*capcount; i += 2)
6462 {
6463 PCRE2_SIZE lleft, lmiddle, lright;
6464 PCRE2_SIZE start = ovector[i];
6465 PCRE2_SIZE end = ovector[i+1];
6466
6467 if (start > end)
6468 {
6469 start = ovector[i+1];
6470 end = ovector[i];
6471 fprintf(outfile, "Start of matched string is beyond its end - "
6472 "displaying from end to start.\n");
6473 }
6474
6475 fprintf(outfile, "%2d: ", i/2);
6476
6477 /* Check for an unset group */
6478
6479 if (start == PCRE2_UNSET)
6480 {
6481 fprintf(outfile, "<unset>\n");
6482 continue;
6483 }
6484
6485 /* Check for silly offsets, in particular, values that have not been
6486 set when they should have been. */
6487
6488 if (start > ulen || end > ulen)
6489 {
6490 fprintf(outfile, "ERROR: bad value(s) for offset(s): 0x%lx 0x%lx\n",
6491 (unsigned long int)start, (unsigned long int)end);
6492 continue;
6493 }
6494
6495 /* When JIT is not being used, ALLUSEDTEXT may be set. (It if is set with
6496 JIT, it is disabled above, with a comment.) When the match is done by the
6497 interpreter, leftchar and rightchar are available, and if ALLUSEDTEXT is
6498 set, and if the leftmost consulted character is before the start of the
6499 match or the rightmost consulted character is past the end of the match,
6500 we want to show all consulted characters for the main matched string, and
6501 indicate which were lookarounds. */
6502
6503 if (i == 0)
6504 {
6505 BOOL showallused;
6506 PCRE2_SIZE leftchar, rightchar;
6507
6508 if ((dat_datctl.control & CTL_ALLUSEDTEXT) != 0)
6509 {
6510 leftchar = FLD(match_data, leftchar);
6511 rightchar = FLD(match_data, rightchar);
6512 showallused = i == 0 && (leftchar < start || rightchar > end);
6513 }
6514 else showallused = FALSE;
6515
6516 if (showallused)
6517 {
6518 PCHARS(lleft, pp, leftchar, start - leftchar, utf, outfile);
6519 PCHARS(lmiddle, pp, start, end - start, utf, outfile);
6520 PCHARS(lright, pp, end, rightchar - end, utf, outfile);
6521 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
6522 fprintf(outfile, " (JIT)");
6523 fprintf(outfile, "\n ");
6524 for (j = 0; j < lleft; j++) fprintf(outfile, "<");
6525 for (j = 0; j < lmiddle; j++) fprintf(outfile, " ");
6526 for (j = 0; j < lright; j++) fprintf(outfile, ">");
6527 }
6528
6529 /* When a pattern contains \K, the start of match position may be
6530 different to the start of the matched string. When this is the case,
6531 show it when requested. */
6532
6533 else if ((dat_datctl.control & CTL_STARTCHAR) != 0)
6534 {
6535 PCRE2_SIZE startchar;
6536 PCRE2_GET_STARTCHAR(startchar, match_data);
6537 PCHARS(lleft, pp, startchar, start - startchar, utf, outfile);
6538 PCHARSV(pp, start, end - start, utf, outfile);
6539 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
6540 fprintf(outfile, " (JIT)");
6541 if (startchar != start)
6542 {
6543 fprintf(outfile, "\n ");
6544 for (j = 0; j < lleft; j++) fprintf(outfile, "^");
6545 }
6546 }
6547
6548 /* Otherwise, just show the matched string. */
6549
6550 else
6551 {
6552 PCHARSV(pp, start, end - start, utf, outfile);
6553 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
6554 fprintf(outfile, " (JIT)");
6555 }
6556 }
6557
6558 /* Not the main matched string. Just show it unadorned. */
6559
6560 else
6561 {
6562 PCHARSV(pp, start, end - start, utf, outfile);
6563 }
6564
6565 fprintf(outfile, "\n");
6566
6567 /* Note: don't use the start/end variables here because we want to
6568 show the text from what is reported as the end. */
6569
6570 if ((dat_datctl.control & CTL_ALLAFTERTEXT) != 0 ||
6571 (i == 0 && (dat_datctl.control & CTL_AFTERTEXT) != 0))
6572 {
6573 fprintf(outfile, "%2d+ ", i/2);
6574 PCHARSV(pp, ovector[i+1], ulen - ovector[i+1], utf, outfile);
6575 fprintf(outfile, "\n");
6576 }
6577 }
6578
6579 /* Output (*MARK) data if requested */
6580
6581 if ((dat_datctl.control & CTL_MARK) != 0 &&
6582 TESTFLD(match_data, mark, !=, NULL))
6583 {
6584 fprintf(outfile, "MK: ");
6585 PCHARSV(CASTFLD(void *, match_data, mark), 0, -1, utf, outfile);
6586 fprintf(outfile, "\n");
6587 }
6588
6589 /* Process copy/get strings */
6590
6591 copy_and_get(utf, capcount);
6592
6593 } /* End of handling a successful match */
6594
6595 /* There was a partial match. The value of ovector[0] is the bumpalong point,
6596 that is, startchar, not any \K point that might have been passed. */
6597
6598 else if (capcount == PCRE2_ERROR_PARTIAL)
6599 {
6600 PCRE2_SIZE poffset;
6601 int backlength;
6602 int rubriclength = 0;
6603
6604 fprintf(outfile, "Partial match");
6605 if ((dat_datctl.control & CTL_MARK) != 0 &&
6606 TESTFLD(match_data, mark, !=, NULL))
6607 {
6608 fprintf(outfile, ", mark=");
6609 PCHARS(rubriclength, CASTFLD(void *, match_data, mark), 0, -1, utf,
6610 outfile);
6611 rubriclength += 7;
6612 }
6613 fprintf(outfile, ": ");
6614 rubriclength += 15;
6615
6616 poffset = backchars(pp, ovector[0], maxlookbehind, utf);
6617 PCHARS(backlength, pp, poffset, ovector[0] - poffset, utf, outfile);
6618 PCHARSV(pp, ovector[0], ulen - ovector[0], utf, outfile);
6619
6620 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
6621 fprintf(outfile, " (JIT)");
6622 fprintf(outfile, "\n");
6623
6624 if (backlength != 0)
6625 {
6626 int i;
6627 for (i = 0; i < rubriclength; i++) fprintf(outfile, " ");
6628 for (i = 0; i < backlength; i++) fprintf(outfile, "<");
6629 fprintf(outfile, "\n");
6630 }
6631
6632 /* Process copy/get strings */
6633
6634 copy_and_get(utf, 1);
6635
6636 break; /* Out of the /g loop */
6637 } /* End of handling partial match */
6638
6639 /* Failed to match. If this is a /g or /G loop, we might previously have
6640 set g_notempty (to PCRE2_NOTEMPTY_ATSTART|PCRE2_ANCHORED) after a null match.
6641 If that is the case, this is not necessarily the end. We want to advance the
6642 start offset, and continue. We won't be at the end of the string - that was
6643 checked before setting g_notempty. We achieve the effect by pretending that a
6644 single character was matched.
6645
6646 Complication arises in the case when the newline convention is "any", "crlf",
6647 or "anycrlf". If the previous match was at the end of a line terminated by
6648 CRLF, an advance of one character just passes the CR, whereas we should
6649 prefer the longer newline sequence, as does the code in pcre2_match().
6650
6651 Otherwise, in the case of UTF-8 or UTF-16 matching, the advance must be one
6652 character, not one byte. */
6653
6654 else if (g_notempty != 0) /* There was a previous null match */
6655 {
6656 uint16_t nl = FLD(compiled_code, newline_convention);
6657 PCRE2_SIZE start_offset = dat_datctl.offset; /* Where the match was */
6658 PCRE2_SIZE end_offset = start_offset + 1;
6659
6660 if ((nl == PCRE2_NEWLINE_CRLF || nl == PCRE2_NEWLINE_ANY ||
6661 nl == PCRE2_NEWLINE_ANYCRLF) &&
6662 start_offset < ulen - 1 &&
6663 CODE_UNIT(pp, start_offset) == '\r' &&
6664 CODE_UNIT(pp, end_offset) == '\n')
6665 end_offset++;
6666
6667 else if (utf && test_mode != PCRE32_MODE)
6668 {
6669 if (test_mode == PCRE8_MODE)
6670 {
6671 for (; end_offset < ulen; end_offset++)
6672 if ((((PCRE2_SPTR8)pp)[end_offset] & 0xc0) != 0x80) break;
6673 }
6674 else /* 16-bit mode */
6675 {
6676 for (; end_offset < ulen; end_offset++)
6677 if ((((PCRE2_SPTR16)pp)[end_offset] & 0xfc00) != 0xdc00) break;
6678 }
6679 }
6680
6681 SETFLDVEC(match_data, ovector, 0, start_offset);
6682 SETFLDVEC(match_data, ovector, 1, end_offset);
6683 } /* End of handling null match in a global loop */
6684
6685 /* A "normal" match failure. There will be a negative error number in
6686 capcount. */
6687
6688 else
6689 {
6690 int mlen;
6691
6692 switch(capcount)
6693 {
6694 case PCRE2_ERROR_NOMATCH:
6695 if (gmatched == 0)
6696 {
6697 fprintf(outfile, "No match");
6698 if ((dat_datctl.control & CTL_MARK) != 0 &&
6699 TESTFLD(match_data, mark, !=, NULL))
6700 {
6701 fprintf(outfile, ", mark = ");
6702 PCHARSV(CASTFLD(void *, match_data, mark), 0, -1, utf, outfile);
6703 }
6704 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
6705 fprintf(outfile, " (JIT)");
6706 fprintf(outfile, "\n");
6707 }
6708 break;
6709
6710 case PCRE2_ERROR_BADUTFOFFSET:
6711 fprintf(outfile, "Error %d (bad UTF-%d offset)\n", capcount, test_mode);
6712 break;
6713
6714 default:
6715 fprintf(outfile, "Failed: error %d: ", capcount);
6716 PCRE2_GET_ERROR_MESSAGE(mlen, capcount, pbuffer);
6717 PCHARSV(CASTVAR(void *, pbuffer), 0, mlen, FALSE, outfile);
6718 if (capcount <= PCRE2_ERROR_UTF8_ERR1 &&
6719 capcount >= PCRE2_ERROR_UTF32_ERR2)
6720 {
6721 PCRE2_SIZE startchar;
6722 PCRE2_GET_STARTCHAR(startchar, match_data);
6723 fprintf(outfile, " at offset %lu", (unsigned long int)startchar);
6724 }
6725 fprintf(outfile, "\n");
6726 break;
6727 }
6728
6729 break; /* Out of the /g loop */
6730 } /* End of failed match handling */
6731
6732 /* Control reaches here in two circumstances: (a) after a match, and (b)
6733 after a non-match that immediately followed a match on an empty string when
6734 doing a global search. Such a match is done with PCRE2_NOTEMPTY_ATSTART and
6735 PCRE2_ANCHORED set in g_notempty. The code above turns it into a fake match
6736 of one character. So effectively we get here only after a match. If we
6737 are not doing a global search, we are done. */
6738
6739 if ((dat_datctl.control & CTL_ANYGLOB) == 0) break; else
6740 {
6741 PCRE2_SIZE end_offset = FLD(match_data, ovector)[1];
6742
6743 /* We must now set up for the next iteration of a global search. If we have
6744 matched an empty string, first check to see if we are at the end of the
6745 subject. If so, the loop is over. Otherwise, mimic what Perl's /g option
6746 does. Set PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED and try the match again
6747 at the same point. If this fails it will be picked up above, where a fake
6748 match is set up so that at this point we advance to the next character. */
6749
6750 if (FLD(match_data, ovector)[0] == end_offset)
6751 {
6752 if (end_offset == ulen) break; /* End of subject */
6753 g_notempty = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
6754 }
6755
6756 /* However, even after matching a non-empty string, there is still one
6757 tricky case. If a pattern contains \K within a lookbehind assertion at the
6758 start, the end of the matched string can be at the offset where the match
6759 started. In the case of a normal /g iteration without special action, this
6760 leads to a loop that keeps on returning the same substring. The loop would
6761 be caught above, but we really want to move on to the next match. */
6762
6763 else
6764 {
6765 g_notempty = 0; /* Set for a "normal" repeat */
6766 if ((dat_datctl.control & CTL_GLOBAL) != 0)
6767 {
6768 PCRE2_SIZE startchar;
6769 PCRE2_GET_STARTCHAR(startchar, match_data);
6770 if (end_offset <= startchar)
6771 {
6772 if (startchar >= ulen) break; /* End of subject */
6773 end_offset = startchar + 1;
6774 if (utf && test_mode != PCRE32_MODE)
6775 {
6776 if (test_mode == PCRE8_MODE)
6777 {
6778 for (; end_offset < ulen; end_offset++)
6779 if ((((PCRE2_SPTR8)pp)[end_offset] & 0xc0) != 0x80) break;
6780 }
6781 else /* 16-bit mode */
6782 {
6783 for (; end_offset < ulen; end_offset++)
6784 if ((((PCRE2_SPTR16)pp)[end_offset] & 0xfc00) != 0xdc00) break;
6785 }
6786 }
6787 }
6788 }
6789 }
6790
6791 /* For /g (global), update the start offset, leaving the rest alone. */
6792
6793 if ((dat_datctl.control & CTL_GLOBAL) != 0)
6794 dat_datctl.offset = end_offset;
6795
6796 /* For altglobal, just update the pointer and length. */
6797
6798 else
6799 {
6800 pp += end_offset * code_unit_size;
6801 len -= end_offset * code_unit_size;
6802 ulen -= end_offset;
6803 }
6804 }
6805 } /* End of global loop */
6806
6807 show_memory = FALSE;
6808 return PR_OK;
6809 }
6810
6811
6812
6813
6814 /*************************************************
6815 * Print PCRE2 version *
6816 *************************************************/
6817
6818 static void
print_version(FILE * f)6819 print_version(FILE *f)
6820 {
6821 VERSION_TYPE *vp;
6822 fprintf(f, "PCRE2 version ");
6823 for (vp = version; *vp != 0; vp++) fprintf(f, "%c", *vp);
6824 fprintf(f, "\n");
6825 }
6826
6827
6828
6829 /*************************************************
6830 * Print Unicode version *
6831 *************************************************/
6832
6833 static void
print_unicode_version(FILE * f)6834 print_unicode_version(FILE *f)
6835 {
6836 VERSION_TYPE *vp;
6837 fprintf(f, "Unicode version ");
6838 for (vp = uversion; *vp != 0; vp++) fprintf(f, "%c", *vp);
6839 }
6840
6841
6842
6843 /*************************************************
6844 * Print JIT target *
6845 *************************************************/
6846
6847 static void
print_jit_target(FILE * f)6848 print_jit_target(FILE *f)
6849 {
6850 VERSION_TYPE *vp;
6851 for (vp = jittarget; *vp != 0; vp++) fprintf(f, "%c", *vp);
6852 }
6853
6854
6855
6856 /*************************************************
6857 * Print newline configuration *
6858 *************************************************/
6859
6860 /* Output is always to stdout.
6861
6862 Arguments:
6863 rc the return code from PCRE2_CONFIG_NEWLINE
6864 isc TRUE if called from "-C newline"
6865 Returns: nothing
6866 */
6867
6868 static void
print_newline_config(uint32_t optval,BOOL isc)6869 print_newline_config(uint32_t optval, BOOL isc)
6870 {
6871 if (!isc) printf(" Newline sequence is ");
6872 if (optval < sizeof(newlines)/sizeof(char *))
6873 printf("%s\n", newlines[optval]);
6874 else
6875 printf("a non-standard value: %d\n", optval);
6876 }
6877
6878
6879
6880 /*************************************************
6881 * Usage function *
6882 *************************************************/
6883
6884 static void
usage(void)6885 usage(void)
6886 {
6887 printf("Usage: pcre2test [options] [<input file> [<output file>]]\n\n");
6888 printf("Input and output default to stdin and stdout.\n");
6889 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
6890 printf("If input is a terminal, readline() is used to read from it.\n");
6891 #else
6892 printf("This version of pcre2test is not linked with readline().\n");
6893 #endif
6894 printf("\nOptions:\n");
6895 #ifdef SUPPORT_PCRE2_8
6896 printf(" -8 use the 8-bit library\n");
6897 #endif
6898 #ifdef SUPPORT_PCRE2_16
6899 printf(" -16 use the 16-bit library\n");
6900 #endif
6901 #ifdef SUPPORT_PCRE2_32
6902 printf(" -32 use the 32-bit library\n");
6903 #endif
6904 printf(" -b set default pattern control 'fullbincode'\n");
6905 printf(" -C show PCRE2 compile-time options and exit\n");
6906 printf(" -C arg show a specific compile-time option and exit with its\n");
6907 printf(" value if numeric (else 0). The arg can be:\n");
6908 printf(" backslash-C use of \\C is enabled [0, 1]\n");
6909 printf(" bsr \\R type [ANYCRLF, ANY]\n");
6910 printf(" ebcdic compiled for EBCDIC character code [0,1]\n");
6911 printf(" ebcdic-nl NL code if compiled for EBCDIC\n");
6912 printf(" jit just-in-time compiler supported [0, 1]\n");
6913 printf(" linksize internal link size [2, 3, 4]\n");
6914 printf(" newline newline type [CR, LF, CRLF, ANYCRLF, ANY]\n");
6915 printf(" pcre2-8 8 bit library support enabled [0, 1]\n");
6916 printf(" pcre2-16 16 bit library support enabled [0, 1]\n");
6917 printf(" pcre2-32 32 bit library support enabled [0, 1]\n");
6918 printf(" unicode Unicode and UTF support enabled [0, 1]\n");
6919 printf(" -d set default pattern control 'debug'\n");
6920 printf(" -dfa set default subject control 'dfa'\n");
6921 printf(" -error <n,m,..> show messages for error numbers, then exit\n");
6922 printf(" -help show usage information\n");
6923 printf(" -i set default pattern control 'info'\n");
6924 printf(" -jit set default pattern control 'jit'\n");
6925 printf(" -q quiet: do not output PCRE2 version number at start\n");
6926 printf(" -pattern <s> set default pattern control fields\n");
6927 printf(" -subject <s> set default subject control fields\n");
6928 printf(" -S <n> set stack size to <n> megabytes\n");
6929 printf(" -t [<n>] time compilation and execution, repeating <n> times\n");
6930 printf(" -tm [<n>] time execution (matching) only, repeating <n> times\n");
6931 printf(" -T same as -t, but show total times at the end\n");
6932 printf(" -TM same as -tm, but show total time at the end\n");
6933 printf(" -version show PCRE2 version and exit\n");
6934 }
6935
6936
6937
6938 /*************************************************
6939 * Handle -C option *
6940 *************************************************/
6941
6942 /* This option outputs configuration options and sets an appropriate return
6943 code when asked for a single option. The code is abstracted into a separate
6944 function because of its size. Use whichever pcre2_config() function is
6945 available.
6946
6947 Argument: an option name or NULL
6948 Returns: the return code
6949 */
6950
6951 static int
c_option(const char * arg)6952 c_option(const char *arg)
6953 {
6954 uint32_t optval;
6955 int yield = 0;
6956
6957 if (arg != NULL)
6958 {
6959 unsigned int i;
6960
6961 for (i = 0; i < COPTLISTCOUNT; i++)
6962 if (strcmp(arg, coptlist[i].name) == 0) break;
6963
6964 if (i >= COPTLISTCOUNT)
6965 {
6966 fprintf(stderr, "** Unknown -C option '%s'\n", arg);
6967 return -1;
6968 }
6969
6970 switch (coptlist[i].type)
6971 {
6972 case CONF_BSR:
6973 (void)PCRE2_CONFIG(coptlist[i].value, &optval);
6974 printf("%s\n", optval? "ANYCRLF" : "ANY");
6975 break;
6976
6977 case CONF_FIX:
6978 yield = coptlist[i].value;
6979 printf("%d\n", yield);
6980 break;
6981
6982 case CONF_FIZ:
6983 optval = coptlist[i].value;
6984 printf("%d\n", optval);
6985 break;
6986
6987 case CONF_INT:
6988 (void)PCRE2_CONFIG(coptlist[i].value, &yield);
6989 printf("%d\n", yield);
6990 break;
6991
6992 case CONF_NL:
6993 (void)PCRE2_CONFIG(coptlist[i].value, &optval);
6994 print_newline_config(optval, TRUE);
6995 break;
6996 }
6997
6998 /* For VMS, return the value by setting a symbol, for certain values only. */
6999
7000 #ifdef __VMS
7001 if (copytlist[i].type == CONF_FIX || coptlist[i].type == CONF_INT)
7002 {
7003 char ucname[16];
7004 strcpy(ucname, coptlist[i].name);
7005 for (i = 0; ucname[i] != 0; i++) ucname[i] = toupper[ucname[i];
7006 vms_setsymbol(ucname, 0, optval);
7007 }
7008 #endif
7009
7010 return yield;
7011 }
7012
7013 /* No argument for -C: output all configuration information. */
7014
7015 print_version(stdout);
7016 printf("Compiled with\n");
7017
7018 #ifdef EBCDIC
7019 printf(" EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
7020 #if defined NATIVE_ZOS
7021 printf(" EBCDIC code page %s or similar\n", pcrz_cpversion());
7022 #endif
7023 #endif
7024
7025 #ifdef SUPPORT_PCRE2_8
7026 printf(" 8-bit support\n");
7027 #endif
7028 #ifdef SUPPORT_PCRE2_16
7029 printf(" 16-bit support\n");
7030 #endif
7031 #ifdef SUPPORT_PCRE2_32
7032 printf(" 32-bit support\n");
7033 #endif
7034
7035 (void)PCRE2_CONFIG(PCRE2_CONFIG_UNICODE, &optval);
7036 if (optval != 0)
7037 {
7038 printf(" UTF and UCP support (");
7039 print_unicode_version(stdout);
7040 printf(")\n");
7041 }
7042 else printf(" No Unicode support\n");
7043
7044 (void)PCRE2_CONFIG(PCRE2_CONFIG_JIT, &optval);
7045 if (optval != 0)
7046 {
7047 printf(" Just-in-time compiler support: ");
7048 print_jit_target(stdout);
7049 printf("\n");
7050 }
7051 else
7052 {
7053 printf(" No just-in-time compiler support\n");
7054 }
7055
7056 (void)PCRE2_CONFIG(PCRE2_CONFIG_NEWLINE, &optval);
7057 print_newline_config(optval, FALSE);
7058 (void)PCRE2_CONFIG(PCRE2_CONFIG_BSR, &optval);
7059 printf(" \\R matches %s\n", optval? "CR, LF, or CRLF only" :
7060 "all Unicode newlines");
7061 #ifdef NEVER_BACKSLASH_C
7062 printf(" \\C is not supported\n");
7063 #else
7064 printf(" \\C is supported\n");
7065 #endif
7066 (void)PCRE2_CONFIG(PCRE2_CONFIG_LINKSIZE, &optval);
7067 printf(" Internal link size = %d\n", optval);
7068 (void)PCRE2_CONFIG(PCRE2_CONFIG_PARENSLIMIT, &optval);
7069 printf(" Parentheses nest limit = %d\n", optval);
7070 (void)PCRE2_CONFIG(PCRE2_CONFIG_MATCHLIMIT, &optval);
7071 printf(" Default match limit = %d\n", optval);
7072 (void)PCRE2_CONFIG(PCRE2_CONFIG_RECURSIONLIMIT, &optval);
7073 printf(" Default recursion depth limit = %d\n", optval);
7074 (void)PCRE2_CONFIG(PCRE2_CONFIG_STACKRECURSE, &optval);
7075 printf(" Match recursion uses %s", optval? "stack" : "heap");
7076
7077 printf("\n");
7078 return 0;
7079 }
7080
7081
7082
7083 /*************************************************
7084 * Main Program *
7085 *************************************************/
7086
7087 int
7088 main(int argc, char **argv)
7089 {
7090 uint32_t yield = 0;
7091 uint32_t op = 1;
7092 uint32_t stack_size;
7093 BOOL notdone = TRUE;
7094 BOOL quiet = FALSE;
7095 BOOL showtotaltimes = FALSE;
7096 BOOL skipping = FALSE;
7097 char *arg_subject = NULL;
7098 char *arg_pattern = NULL;
7099 char *arg_error = NULL;
7100
7101 /* The offsets to the options and control bits fields of the pattern and data
7102 control blocks must be the same so that common options and controls such as
7103 "anchored" or "memory" can work for either of them from a single table entry.
7104 We cannot test this till runtime because "offsetof" does not work in the
7105 preprocessor. */
7106
7107 if (PO(options) != DO(options) || PO(control) != DO(control) ||
7108 PO(control2) != DO(control2))
7109 {
7110 fprintf(stderr, "** Coding error: "
7111 "options and control offsets for pattern and data must be the same.\n");
7112 return 1;
7113 }
7114
7115 /* Get the PCRE2 and Unicode version number and JIT target information, at the
7116 same time checking that a request for the length gives the same answer. Also
7117 check lengths for non-string items. */
7118
7119 if (PCRE2_CONFIG(PCRE2_CONFIG_VERSION, NULL) !=
7120 PCRE2_CONFIG(PCRE2_CONFIG_VERSION, version) ||
7121
7122 PCRE2_CONFIG(PCRE2_CONFIG_UNICODE_VERSION, NULL) !=
7123 PCRE2_CONFIG(PCRE2_CONFIG_UNICODE_VERSION, uversion) ||
7124
7125 PCRE2_CONFIG(PCRE2_CONFIG_JITTARGET, NULL) !=
7126 PCRE2_CONFIG(PCRE2_CONFIG_JITTARGET, jittarget) ||
7127
7128 PCRE2_CONFIG(PCRE2_CONFIG_UNICODE, NULL) != sizeof(uint32_t) ||
7129 PCRE2_CONFIG(PCRE2_CONFIG_MATCHLIMIT, NULL) != sizeof(uint32_t))
7130 {
7131 fprintf(stderr, "** Error in pcre2_config(): bad length\n");
7132 return 1;
7133 }
7134
7135 /* Get buffers from malloc() so that valgrind will check their misuse when
7136 debugging. They grow automatically when very long lines are read. The 16-
7137 and 32-bit buffers (pbuffer16, pbuffer32) are obtained only if needed. */
7138
7139 buffer = (uint8_t *)malloc(pbuffer8_size);
7140 pbuffer8 = (uint8_t *)malloc(pbuffer8_size);
7141
7142 /* The following _setmode() stuff is some Windows magic that tells its runtime
7143 library to translate CRLF into a single LF character. At least, that's what
7144 I've been told: never having used Windows I take this all on trust. Originally
7145 it set 0x8000, but then I was advised that _O_BINARY was better. */
7146
7147 #if defined(_WIN32) || defined(WIN32)
7148 _setmode( _fileno( stdout ), _O_BINARY );
7149 #endif
7150
7151 /* Initialization that does not depend on the running mode. */
7152
7153 locale_name[0] = 0;
7154 memset(&def_patctl, 0, sizeof(patctl));
7155 memset(&def_datctl, 0, sizeof(datctl));
7156 def_datctl.oveccount = DEFAULT_OVECCOUNT;
7157 def_datctl.copy_numbers[0] = -1;
7158 def_datctl.get_numbers[0] = -1;
7159 def_datctl.cfail[0] = def_datctl.cfail[1] = CFAIL_UNSET;
7160
7161 /* Scan command line options. */
7162
7163 while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0)
7164 {
7165 char *endptr;
7166 char *arg = argv[op];
7167 unsigned long uli;
7168
7169 /* Display and/or set return code for configuration options. */
7170
7171 if (strcmp(arg, "-C") == 0)
7172 {
7173 yield = c_option(argv[op + 1]);
7174 goto EXIT;
7175 }
7176
7177 /* Select operating mode */
7178
7179 if (strcmp(arg, "-8") == 0)
7180 {
7181 #ifdef SUPPORT_PCRE2_8
7182 test_mode = PCRE8_MODE;
7183 #else
7184 fprintf(stderr,
7185 "** This version of PCRE2 was built without 8-bit support\n");
7186 exit(1);
7187 #endif
7188 }
7189 else if (strcmp(arg, "-16") == 0)
7190 {
7191 #ifdef SUPPORT_PCRE2_16
7192 test_mode = PCRE16_MODE;
7193 #else
7194 fprintf(stderr,
7195 "** This version of PCRE2 was built without 16-bit support\n");
7196 exit(1);
7197 #endif
7198 }
7199 else if (strcmp(arg, "-32") == 0)
7200 {
7201 #ifdef SUPPORT_PCRE2_32
7202 test_mode = PCRE32_MODE;
7203 #else
7204 fprintf(stderr,
7205 "** This version of PCRE2 was built without 32-bit support\n");
7206 exit(1);
7207 #endif
7208 }
7209
7210 /* Set quiet (no version verification) */
7211
7212 else if (strcmp(arg, "-q") == 0) quiet = TRUE;
7213
7214 /* Set system stack size */
7215
7216 else if (strcmp(arg, "-S") == 0 && argc > 2 &&
7217 ((uli = strtoul(argv[op+1], &endptr, 10)), *endptr == 0))
7218 {
7219 #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS) || defined(__VMS)
7220 fprintf(stderr, "pcre2test: -S is not supported on this OS\n");
7221 exit(1);
7222 #else
7223 int rc;
7224 struct rlimit rlim;
7225 if (U32OVERFLOW(uli))
7226 {
7227 fprintf(stderr, "** Argument for -S is too big\n");
7228 exit(1);
7229 }
7230 stack_size = (uint32_t)uli;
7231 getrlimit(RLIMIT_STACK, &rlim);
7232 rlim.rlim_cur = stack_size * 1024 * 1024;
7233 if (rlim.rlim_cur > rlim.rlim_max)
7234 {
7235 fprintf(stderr,
7236 "pcre2test: requested stack size %luM is greater than hard limit %lu\n",
7237 (unsigned long int)stack_size,
7238 (unsigned long int)(rlim.rlim_max));
7239 exit(1);
7240 }
7241 rc = setrlimit(RLIMIT_STACK, &rlim);
7242 if (rc != 0)
7243 {
7244 fprintf(stderr, "pcre2test: setting stack size %luM failed: %s\n",
7245 (unsigned long int)stack_size, strerror(errno));
7246 exit(1);
7247 }
7248 op++;
7249 argc--;
7250 #endif
7251 }
7252
7253 /* Set some common pattern and subject controls */
7254
7255 else if (strcmp(arg, "-dfa") == 0) def_datctl.control |= CTL_DFA;
7256 else if (strcmp(arg, "-b") == 0) def_patctl.control |= CTL_FULLBINCODE;
7257 else if (strcmp(arg, "-d") == 0) def_patctl.control |= CTL_DEBUG;
7258 else if (strcmp(arg, "-i") == 0) def_patctl.control |= CTL_INFO;
7259 else if (strcmp(arg, "-jit") == 0)
7260 {
7261 def_patctl.jit = 7; /* full & partial */
7262 #ifndef SUPPORT_JIT
7263 fprintf(stderr, "** Warning: JIT support is not available: "
7264 "-jit calls functions that do nothing.\n");
7265 #endif
7266 }
7267
7268 /* Set timing parameters */
7269
7270 else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0 ||
7271 strcmp(arg, "-T") == 0 || strcmp(arg, "-TM") == 0)
7272 {
7273 int both = arg[2] == 0;
7274 showtotaltimes = arg[1] == 'T';
7275 if (argc > 2 && (uli = strtoul(argv[op+1], &endptr, 10), *endptr == 0))
7276 {
7277 if (U32OVERFLOW(uli))
7278 {
7279 fprintf(stderr, "** Argument for %s is too big\n", arg);
7280 exit(1);
7281 }
7282 timeitm = (int)uli;
7283 op++;
7284 argc--;
7285 }
7286 else timeitm = LOOPREPEAT;
7287 if (both) timeit = timeitm;
7288 }
7289
7290 /* Give help */
7291
7292 else if (strcmp(arg, "-help") == 0 ||
7293 strcmp(arg, "--help") == 0)
7294 {
7295 usage();
7296 goto EXIT;
7297 }
7298
7299 /* Show version */
7300
7301 else if (strcmp(arg, "-version") == 0 ||
7302 strcmp(arg, "--version") == 0)
7303 {
7304 print_version(stdout);
7305 goto EXIT;
7306 }
7307
7308 /* The following options save their data for processing once we know what
7309 the running mode is. */
7310
7311 else if (strcmp(arg, "-error") == 0)
7312 {
7313 arg_error = argv[op+1];
7314 goto CHECK_VALUE_EXISTS;
7315 }
7316
7317 else if (strcmp(arg, "-subject") == 0)
7318 {
7319 arg_subject = argv[op+1];
7320 goto CHECK_VALUE_EXISTS;
7321 }
7322
7323 else if (strcmp(arg, "-pattern") == 0)
7324 {
7325 arg_pattern = argv[op+1];
7326 CHECK_VALUE_EXISTS:
7327 if (argc <= 2)
7328 {
7329 fprintf(stderr, "** Missing value for %s\n", arg);
7330 yield = 1;
7331 goto EXIT;
7332 }
7333 op++;
7334 argc--;
7335 }
7336
7337 /* Unrecognized option */
7338
7339 else
7340 {
7341 fprintf(stderr, "** Unknown or malformed option '%s'\n", arg);
7342 usage();
7343 yield = 1;
7344 goto EXIT;
7345 }
7346 op++;
7347 argc--;
7348 }
7349
7350 /* If -error was present, get the error numbers, show the messages, and exit.
7351 We wait to do this until we know which mode we are in. */
7352
7353 if (arg_error != NULL)
7354 {
7355 int len;
7356 int errcode;
7357 char *endptr;
7358
7359 /* Ensure the relevant non-8-bit buffer is available. */
7360
7361 #ifdef SUPPORT_PCRE2_16
7362 if (test_mode == PCRE16_MODE)
7363 {
7364 pbuffer16_size = 256;
7365 pbuffer16 = (uint16_t *)malloc(pbuffer16_size);
7366 if (pbuffer16 == NULL)
7367 {
7368 fprintf(stderr, "pcre2test: malloc(%lu) failed for pbuffer16\n",
7369 (unsigned long int)pbuffer16_size);
7370 yield = 1;
7371 goto EXIT;
7372 }
7373 }
7374 #endif
7375
7376 #ifdef SUPPORT_PCRE2_32
7377 if (test_mode == PCRE32_MODE)
7378 {
7379 pbuffer32_size = 256;
7380 pbuffer32 = (uint32_t *)malloc(pbuffer32_size);
7381 if (pbuffer32 == NULL)
7382 {
7383 fprintf(stderr, "pcre2test: malloc(%lu) failed for pbuffer32\n",
7384 (unsigned long int)pbuffer32_size);
7385 yield = 1;
7386 goto EXIT;
7387 }
7388 }
7389 #endif
7390
7391 /* Loop along a list of error numbers. */
7392
7393 for (;;)
7394 {
7395 errcode = strtol(arg_error, &endptr, 10);
7396 if (*endptr != 0 && *endptr != CHAR_COMMA)
7397 {
7398 fprintf(stderr, "** '%s' is not a valid error number list\n", arg_error);
7399 yield = 1;
7400 goto EXIT;
7401 }
7402 printf("Error %d: ", errcode);
7403 PCRE2_GET_ERROR_MESSAGE(len, errcode, pbuffer);
7404 if (len < 0)
7405 {
7406 switch (len)
7407 {
7408 case PCRE2_ERROR_BADDATA:
7409 printf("PCRE2_ERROR_BADDATA (unknown error number)");
7410 break;
7411
7412 case PCRE2_ERROR_NOMEMORY:
7413 printf("PCRE2_ERROR_NOMEMORY (buffer too small)");
7414 break;
7415
7416 default:
7417 printf("Unexpected return (%d) from pcre2_get_error_message()", len);
7418 break;
7419 }
7420 }
7421 else
7422 {
7423 PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, stdout);
7424 }
7425 printf("\n");
7426 if (*endptr == 0) goto EXIT;
7427 arg_error = endptr + 1;
7428 }
7429 /* Control never reaches here */
7430 } /* End of -error handling */
7431
7432 /* Initialize things that cannot be done until we know which test mode we are
7433 running in. When HEAP_MATCH_RECURSE is undefined, calling pcre2_set_recursion_
7434 memory_management() is a no-op, but we call it in order to exercise it. Also
7435 exercise the general context copying function, which is not otherwise used. */
7436
7437 code_unit_size = test_mode/8;
7438 max_oveccount = DEFAULT_OVECCOUNT;
7439
7440 /* Use macros to save a lot of duplication. */
7441
7442 #define CREATECONTEXTS \
7443 G(general_context,BITS) = G(pcre2_general_context_create_,BITS)(&my_malloc, &my_free, NULL); \
7444 G(general_context_copy,BITS) = G(pcre2_general_context_copy_,BITS)(G(general_context,BITS)); \
7445 G(default_pat_context,BITS) = G(pcre2_compile_context_create_,BITS)(G(general_context,BITS)); \
7446 G(pat_context,BITS) = G(pcre2_compile_context_copy_,BITS)(G(default_pat_context,BITS)); \
7447 G(default_dat_context,BITS) = G(pcre2_match_context_create_,BITS)(G(general_context,BITS)); \
7448 G(dat_context,BITS) = G(pcre2_match_context_copy_,BITS)(G(default_dat_context,BITS)); \
7449 G(match_data,BITS) = G(pcre2_match_data_create_,BITS)(max_oveccount, G(general_context,BITS))
7450
7451 #ifdef HEAP_MATCH_RECURSE
7452 #define SETRECURSEMEMMAN \
7453 (void)G(pcre2_set_recursion_memory_management_,BITS) \
7454 (G(default_dat_context,BITS), \
7455 &my_stack_malloc, &my_stack_free, NULL)
7456 #else
7457 #define SETRECURSEMEMMAN \
7458 (void)G(pcre2_set_recursion_memory_management_,BITS)(NULL, NULL, NULL, NULL)
7459 #endif
7460
7461 /* Call the appropriate functions for the current mode. */
7462
7463 #ifdef SUPPORT_PCRE2_8
7464 #undef BITS
7465 #define BITS 8
7466 if (test_mode == PCRE8_MODE)
7467 {
7468 CREATECONTEXTS;
7469 SETRECURSEMEMMAN;
7470 }
7471 #endif
7472
7473 #ifdef SUPPORT_PCRE2_16
7474 #undef BITS
7475 #define BITS 16
7476 if (test_mode == PCRE16_MODE)
7477 {
7478 CREATECONTEXTS;
7479 SETRECURSEMEMMAN;
7480 }
7481 #endif
7482
7483 #ifdef SUPPORT_PCRE2_32
7484 #undef BITS
7485 #define BITS 32
7486 if (test_mode == PCRE32_MODE)
7487 {
7488 CREATECONTEXTS;
7489 SETRECURSEMEMMAN;
7490 }
7491 #endif
7492
7493 /* Set a default parentheses nest limit that is large enough to run the
7494 standard tests (this also exercises the function). */
7495
7496 PCRE2_SET_PARENS_NEST_LIMIT(default_pat_context, 220);
7497
7498 /* Handle command line modifier settings, sending any error messages to
7499 stderr. We need to know the mode before modifying the context, and it is tidier
7500 to do them all in the same way. */
7501
7502 outfile = stderr;
7503 if ((arg_pattern != NULL &&
7504 !decode_modifiers((uint8_t *)arg_pattern, CTX_DEFPAT, &def_patctl, NULL)) ||
7505 (arg_subject != NULL &&
7506 !decode_modifiers((uint8_t *)arg_subject, CTX_DEFDAT, NULL, &def_datctl)))
7507 {
7508 yield = 1;
7509 goto EXIT;
7510 }
7511
7512 /* Sort out the input and output files, defaulting to stdin/stdout. */
7513
7514 infile = stdin;
7515 outfile = stdout;
7516
7517 if (argc > 1 && strcmp(argv[op], "-") != 0)
7518 {
7519 infile = fopen(argv[op], INPUT_MODE);
7520 if (infile == NULL)
7521 {
7522 printf("** Failed to open '%s'\n", argv[op]);
7523 yield = 1;
7524 goto EXIT;
7525 }
7526 }
7527
7528 if (argc > 2)
7529 {
7530 outfile = fopen(argv[op+1], OUTPUT_MODE);
7531 if (outfile == NULL)
7532 {
7533 printf("** Failed to open '%s'\n", argv[op+1]);
7534 yield = 1;
7535 goto EXIT;
7536 }
7537 }
7538
7539 /* Output a heading line unless quiet, then process input lines. */
7540
7541 if (!quiet) print_version(outfile);
7542
7543 SET(compiled_code, NULL);
7544
7545 #ifdef SUPPORT_PCRE2_8
7546 preg.re_pcre2_code = NULL;
7547 preg.re_match_data = NULL;
7548 #endif
7549
7550 while (notdone)
7551 {
7552 uint8_t *p;
7553 int rc = PR_OK;
7554 BOOL expectdata = TEST(compiled_code, !=, NULL);
7555 #ifdef SUPPORT_PCRE2_8
7556 expectdata |= preg.re_pcre2_code != NULL;
7557 #endif
7558
7559 if (extend_inputline(infile, buffer, expectdata? "data> " : " re> ") == NULL)
7560 break;
7561 if (!INTERACTIVE(infile)) fprintf(outfile, "%s", (char *)buffer);
7562 fflush(outfile);
7563 p = buffer;
7564
7565 /* If we have a pattern set up for testing, or we are skipping after a
7566 compile failure, a blank line terminates this test; otherwise process the
7567 line as a data line. */
7568
7569 if (expectdata || skipping)
7570 {
7571 while (isspace(*p)) p++;
7572 if (*p == 0)
7573 {
7574 #ifdef SUPPORT_PCRE2_8
7575 if (preg.re_pcre2_code != NULL)
7576 {
7577 regfree(&preg);
7578 preg.re_pcre2_code = NULL;
7579 preg.re_match_data = NULL;
7580 }
7581 #endif /* SUPPORT_PCRE2_8 */
7582 if (TEST(compiled_code, !=, NULL))
7583 {
7584 SUB1(pcre2_code_free, compiled_code);
7585 SET(compiled_code, NULL);
7586 }
7587 skipping = FALSE;
7588 setlocale(LC_CTYPE, "C");
7589 }
7590 else if (!skipping && !(p[0] == '\\' && p[1] == '=' && isspace(p[2])))
7591 rc = process_data();
7592 }
7593
7594 /* We do not have a pattern set up for testing. Lines starting with # are
7595 either comments or special commands. Blank lines are ignored. Otherwise, the
7596 line must start with a valid delimiter. It is then processed as a pattern
7597 line. */
7598
7599 else if (*p == '#')
7600 {
7601 if (isspace(p[1]) || p[1] == '!' || p[1] == 0) continue;
7602 rc = process_command();
7603 }
7604
7605 else if (strchr("/!\"'`%&-=_:;,@~", *p) != NULL)
7606 {
7607 rc = process_pattern();
7608 dfa_matched = 0;
7609 }
7610
7611 else
7612 {
7613 while (isspace(*p)) p++;
7614 if (*p != 0)
7615 {
7616 fprintf(outfile, "** Invalid pattern delimiter '%c' (x%x).\n", *buffer,
7617 *buffer);
7618 rc = PR_SKIP;
7619 }
7620 }
7621
7622 if (rc == PR_SKIP && !INTERACTIVE(infile)) skipping = TRUE;
7623 else if (rc == PR_ABEND)
7624 {
7625 fprintf(outfile, "** pcre2test run abandoned\n");
7626 yield = 1;
7627 goto EXIT;
7628 }
7629 }
7630
7631 /* Finish off a normal run. */
7632
7633 if (INTERACTIVE(infile)) fprintf(outfile, "\n");
7634
7635 if (showtotaltimes)
7636 {
7637 const char *pad = "";
7638 fprintf(outfile, "--------------------------------------\n");
7639 if (timeit > 0)
7640 {
7641 fprintf(outfile, "Total compile time %.4f milliseconds\n",
7642 (((double)total_compile_time * 1000.0) / (double)timeit) /
7643 (double)CLOCKS_PER_SEC);
7644 if (total_jit_compile_time > 0)
7645 fprintf(outfile, "Total JIT compile %.4f milliseconds\n",
7646 (((double)total_jit_compile_time * 1000.0) / (double)timeit) /
7647 (double)CLOCKS_PER_SEC);
7648 pad = " ";
7649 }
7650 fprintf(outfile, "Total match time %s%.4f milliseconds\n", pad,
7651 (((double)total_match_time * 1000.0) / (double)timeitm) /
7652 (double)CLOCKS_PER_SEC);
7653 }
7654
7655
7656 EXIT:
7657
7658 if (infile != NULL && infile != stdin) fclose(infile);
7659 if (outfile != NULL && outfile != stdout) fclose(outfile);
7660
7661 free(buffer);
7662 free(dbuffer);
7663 free(pbuffer8);
7664 free(dfa_workspace);
7665 free((void *)locale_tables);
7666 PCRE2_MATCH_DATA_FREE(match_data);
7667 SUB1(pcre2_code_free, compiled_code);
7668
7669 while(patstacknext-- > 0)
7670 {
7671 SET(compiled_code, patstack[patstacknext]);
7672 SUB1(pcre2_code_free, compiled_code);
7673 }
7674
7675 PCRE2_JIT_FREE_UNUSED_MEMORY(general_context);
7676 if (jit_stack != NULL)
7677 {
7678 PCRE2_JIT_STACK_FREE(jit_stack);
7679 }
7680
7681 #define FREECONTEXTS \
7682 G(pcre2_general_context_free_,BITS)(G(general_context,BITS)); \
7683 G(pcre2_general_context_free_,BITS)(G(general_context_copy,BITS)); \
7684 G(pcre2_compile_context_free_,BITS)(G(pat_context,BITS)); \
7685 G(pcre2_compile_context_free_,BITS)(G(default_pat_context,BITS)); \
7686 G(pcre2_match_context_free_,BITS)(G(dat_context,BITS)); \
7687 G(pcre2_match_context_free_,BITS)(G(default_dat_context,BITS))
7688
7689 #ifdef SUPPORT_PCRE2_8
7690 #undef BITS
7691 #define BITS 8
7692 if (preg.re_pcre2_code != NULL) regfree(&preg);
7693 FREECONTEXTS;
7694 #endif
7695
7696 #ifdef SUPPORT_PCRE2_16
7697 #undef BITS
7698 #define BITS 16
7699 free(pbuffer16);
7700 FREECONTEXTS;
7701 #endif
7702
7703 #ifdef SUPPORT_PCRE2_32
7704 #undef BITS
7705 #define BITS 32
7706 free(pbuffer32);
7707 FREECONTEXTS;
7708 #endif
7709
7710 #if defined(__VMS)
7711 yield = SS$_NORMAL; /* Return values via DCL symbols */
7712 #endif
7713
7714 return yield;
7715 }
7716
7717 /* End of pcre2test.c */
7718