1 /*************************************************
2 * PCRE2 testing program *
3 *************************************************/
4
5 /* PCRE2 is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language. In 2014
7 the API was completely revised and '2' was added to the name, because the old
8 API, which had lasted for 16 years, could not accommodate new requirements. At
9 the same time, this testing program was re-designed because its original
10 hacked-up (non-) design had also run out of steam.
11
12 Written by Philip Hazel
13 Original code Copyright (c) 1997-2012 University of Cambridge
14 Rewritten code Copyright (c) 2016-2018 University of Cambridge
15
16 -----------------------------------------------------------------------------
17 Redistribution and use in source and binary forms, with or without
18 modification, are permitted provided that the following conditions are met:
19
20 * Redistributions of source code must retain the above copyright notice,
21 this list of conditions and the following disclaimer.
22
23 * Redistributions in binary form must reproduce the above copyright
24 notice, this list of conditions and the following disclaimer in the
25 documentation and/or other materials provided with the distribution.
26
27 * Neither the name of the University of Cambridge nor the names of its
28 contributors may be used to endorse or promote products derived from
29 this software without specific prior written permission.
30
31 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
32 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
35 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
36 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
37 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
38 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
39 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
41 POSSIBILITY OF SUCH DAMAGE.
42 -----------------------------------------------------------------------------
43 */
44
45
46 /* This program supports testing of the 8-bit, 16-bit, and 32-bit PCRE2
47 libraries in a single program, though its input and output are always 8-bit.
48 It is different from modules such as pcre2_compile.c in the library itself,
49 which are compiled separately for each code unit width. If two widths are
50 enabled, for example, pcre2_compile.c is compiled twice. In contrast,
51 pcre2test.c is compiled only once, and linked with all the enabled libraries.
52 Therefore, it must not make use of any of the macros from pcre2.h or
53 pcre2_internal.h that depend on PCRE2_CODE_UNIT_WIDTH. It does, however, make
54 use of SUPPORT_PCRE2_8, SUPPORT_PCRE2_16, and SUPPORT_PCRE2_32, to ensure that
55 it references only the enabled library functions. */
56
57 #ifdef HAVE_CONFIG_H
58 #include "config.h"
59 #endif
60
61 #include <ctype.h>
62 #include <stdio.h>
63 #include <string.h>
64 #include <stdlib.h>
65 #include <time.h>
66 #include <locale.h>
67 #include <errno.h>
68
69 #if defined NATIVE_ZOS
70 #include "pcrzoscs.h"
71 /* That header is not included in the main PCRE2 distribution because other
72 apparatus is needed to compile pcre2test for z/OS. The header can be found in
73 the special z/OS distribution, which is available from www.zaconsultants.net or
74 from www.cbttape.org. */
75 #endif
76
77 #ifdef HAVE_UNISTD_H
78 #include <unistd.h>
79 #endif
80
81 /* Debugging code enabler */
82
83 /* #define DEBUG_SHOW_MALLOC_ADDRESSES */
84
85 /* Both libreadline and libedit are optionally supported. The user-supplied
86 original patch uses readline/readline.h for libedit, but in at least one system
87 it is installed as editline/readline.h, so the configuration code now looks for
88 that first, falling back to readline/readline.h. */
89
90 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
91 #if defined(SUPPORT_LIBREADLINE)
92 #include <readline/readline.h>
93 #include <readline/history.h>
94 #else
95 #if defined(HAVE_EDITLINE_READLINE_H)
96 #include <editline/readline.h>
97 #else
98 #include <readline/readline.h>
99 #endif
100 #endif
101 #endif
102
103 /* Put the test for interactive input into a macro so that it can be changed if
104 required for different environments. */
105
106 #define INTERACTIVE(f) isatty(fileno(f))
107
108
109 /* ---------------------- System-specific definitions ---------------------- */
110
111 /* A number of things vary for Windows builds. Originally, pcretest opened its
112 input and output without "b"; then I was told that "b" was needed in some
113 environments, so it was added for release 5.0 to both the input and output. (It
114 makes no difference on Unix-like systems.) Later I was told that it is wrong
115 for the input on Windows. I've now abstracted the modes into macros that are
116 set here, to make it easier to fiddle with them, and removed "b" from the input
117 mode under Windows. The BINARY versions are used when saving/restoring compiled
118 patterns. */
119
120 #if defined(_WIN32) || defined(WIN32)
121 #include <io.h> /* For _setmode() */
122 #include <fcntl.h> /* For _O_BINARY */
123 #define INPUT_MODE "r"
124 #define OUTPUT_MODE "wb"
125 #define BINARY_INPUT_MODE "rb"
126 #define BINARY_OUTPUT_MODE "wb"
127
128 #ifndef isatty
129 #define isatty _isatty /* This is what Windows calls them, I'm told, */
130 #endif /* though in some environments they seem to */
131 /* be already defined, hence the #ifndefs. */
132 #ifndef fileno
133 #define fileno _fileno
134 #endif
135
136 /* A user sent this fix for Borland Builder 5 under Windows. */
137
138 #ifdef __BORLANDC__
139 #define _setmode(handle, mode) setmode(handle, mode)
140 #endif
141
142 /* Not Windows */
143
144 #else
145 #include <sys/time.h> /* These two includes are needed */
146 #include <sys/resource.h> /* for setrlimit(). */
147 #if defined NATIVE_ZOS /* z/OS uses non-binary I/O */
148 #define INPUT_MODE "r"
149 #define OUTPUT_MODE "w"
150 #define BINARY_INPUT_MODE "rb"
151 #define BINARY_OUTPUT_MODE "wb"
152 #else
153 #define INPUT_MODE "rb"
154 #define OUTPUT_MODE "wb"
155 #define BINARY_INPUT_MODE "rb"
156 #define BINARY_OUTPUT_MODE "wb"
157 #endif
158 #endif
159
160 #ifdef __VMS
161 #include <ssdef.h>
162 void vms_setsymbol( char *, char *, int );
163 #endif
164
165 /* VC and older compilers don't support %td or %zu. */
166
167 #if defined(_MSC_VER) || !defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L
168 #define PTR_FORM "lu"
169 #define SIZ_FORM "lu"
170 #define SIZ_CAST (unsigned long int)
171 #else
172 #define PTR_FORM "td"
173 #define SIZ_FORM "zu"
174 #define SIZ_CAST
175 #endif
176
177 /* ------------------End of system-specific definitions -------------------- */
178
179 /* Glueing macros that are used in several places below. */
180
181 #define glue(a,b) a##b
182 #define G(a,b) glue(a,b)
183
184 /* Miscellaneous parameters and manifests */
185
186 #ifndef CLOCKS_PER_SEC
187 #ifdef CLK_TCK
188 #define CLOCKS_PER_SEC CLK_TCK
189 #else
190 #define CLOCKS_PER_SEC 100
191 #endif
192 #endif
193
194 #define CFORE_UNSET UINT32_MAX /* Unset value for startend/cfail/cerror fields */
195 #define CONVERT_UNSET UINT32_MAX /* Unset value for convert_type field */
196 #define DFA_WS_DIMENSION 1000 /* Size of DFA workspace */
197 #define DEFAULT_OVECCOUNT 15 /* Default ovector count */
198 #define JUNK_OFFSET 0xdeadbeef /* For initializing ovector */
199 #define LOCALESIZE 32 /* Size of locale name */
200 #define LOOPREPEAT 500000 /* Default loop count for timing */
201 #define MALLOCLISTSIZE 20 /* For remembering mallocs */
202 #define PARENS_NEST_DEFAULT 220 /* Default parentheses nest limit */
203 #define PATSTACKSIZE 20 /* Pattern stack for save/restore testing */
204 #define REPLACE_MODSIZE 100 /* Field for reading 8-bit replacement */
205 #define VERSION_SIZE 64 /* Size of buffer for the version strings */
206
207 /* Make sure the buffer into which replacement strings are copied is big enough
208 to hold them as 32-bit code units. */
209
210 #define REPLACE_BUFFSIZE 1024 /* This is a byte value */
211
212 /* Execution modes */
213
214 #define PCRE8_MODE 8
215 #define PCRE16_MODE 16
216 #define PCRE32_MODE 32
217
218 /* Processing returns */
219
220 enum { PR_OK, PR_SKIP, PR_ABEND };
221
222 /* The macro PRINTABLE determines whether to print an output character as-is or
223 as a hex value when showing compiled patterns. is We use it in cases when the
224 locale has not been explicitly changed, so as to get consistent output from
225 systems that differ in their output from isprint() even in the "C" locale. */
226
227 #ifdef EBCDIC
228 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
229 #else
230 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
231 #endif
232
233 #define PRINTOK(c) ((use_tables != NULL && c < 256)? isprint(c) : PRINTABLE(c))
234
235 /* We have to include some of the library source files because we need
236 to use some of the macros, internal structure definitions, and other internal
237 values - pcre2test has "inside information" compared to an application program
238 that strictly follows the PCRE2 API.
239
240 Before including pcre2_internal.h we define PRIV so that it does not get
241 defined therein. This ensures that PRIV names in the included files do not
242 clash with those in the libraries. Also, although pcre2_internal.h does itself
243 include pcre2.h, we explicitly include it beforehand, along with pcre2posix.h,
244 so that the PCRE2_EXP_xxx macros get set appropriately for an application, not
245 for building the library. */
246
247 #define PRIV(name) name
248 #define PCRE2_CODE_UNIT_WIDTH 0
249 #include "pcre2.h"
250 #include "pcre2posix.h"
251 #include "pcre2_internal.h"
252
253 /* We need access to some of the data tables that PCRE2 uses. Defining
254 PCRE2_PCRETEST makes some minor changes in the files. The previous definition
255 of PRIV avoids name clashes. */
256
257 #define PCRE2_PCRE2TEST
258 #include "pcre2_tables.c"
259 #include "pcre2_ucd.c"
260
261 /* 32-bit integer values in the input are read by strtoul() or strtol(). The
262 check needed for overflow depends on whether long ints are in fact longer than
263 ints. They are defined not to be shorter. */
264
265 #if ULONG_MAX > UINT32_MAX
266 #define U32OVERFLOW(x) (x > UINT32_MAX)
267 #else
268 #define U32OVERFLOW(x) (x == UINT32_MAX)
269 #endif
270
271 #if LONG_MAX > INT32_MAX
272 #define S32OVERFLOW(x) (x > INT32_MAX || x < INT32_MIN)
273 #else
274 #define S32OVERFLOW(x) (x == INT32_MAX || x == INT32_MIN)
275 #endif
276
277 /* When PCRE2_CODE_UNIT_WIDTH is zero, pcre2_internal.h does not include
278 pcre2_intmodedep.h, which is where mode-dependent macros and structures are
279 defined. We can now include it for each supported code unit width. Because
280 PCRE2_CODE_UNIT_WIDTH was defined as zero before including pcre2.h, it will
281 have left PCRE2_SUFFIX defined as a no-op. We must re-define it appropriately
282 while including these files, and then restore it to a no-op. Because LINK_SIZE
283 may be changed in 16-bit mode and forced to 1 in 32-bit mode, the order of
284 these inclusions should not be changed. */
285
286 #undef PCRE2_SUFFIX
287 #undef PCRE2_CODE_UNIT_WIDTH
288
289 #ifdef SUPPORT_PCRE2_8
290 #define PCRE2_CODE_UNIT_WIDTH 8
291 #define PCRE2_SUFFIX(a) G(a,8)
292 #include "pcre2_intmodedep.h"
293 #include "pcre2_printint.c"
294 #undef PCRE2_CODE_UNIT_WIDTH
295 #undef PCRE2_SUFFIX
296 #endif /* SUPPORT_PCRE2_8 */
297
298 #ifdef SUPPORT_PCRE2_16
299 #define PCRE2_CODE_UNIT_WIDTH 16
300 #define PCRE2_SUFFIX(a) G(a,16)
301 #include "pcre2_intmodedep.h"
302 #include "pcre2_printint.c"
303 #undef PCRE2_CODE_UNIT_WIDTH
304 #undef PCRE2_SUFFIX
305 #endif /* SUPPORT_PCRE2_16 */
306
307 #ifdef SUPPORT_PCRE2_32
308 #define PCRE2_CODE_UNIT_WIDTH 32
309 #define PCRE2_SUFFIX(a) G(a,32)
310 #include "pcre2_intmodedep.h"
311 #include "pcre2_printint.c"
312 #undef PCRE2_CODE_UNIT_WIDTH
313 #undef PCRE2_SUFFIX
314 #endif /* SUPPORT_PCRE2_32 */
315
316 #define PCRE2_SUFFIX(a) a
317
318 /* We need to be able to check input text for UTF-8 validity, whatever code
319 widths are actually available, because the input to pcre2test is always in
320 8-bit code units. So we include the UTF validity checking function for 8-bit
321 code units. */
322
323 extern int valid_utf(PCRE2_SPTR8, PCRE2_SIZE, PCRE2_SIZE *);
324
325 #define PCRE2_CODE_UNIT_WIDTH 8
326 #undef PCRE2_SPTR
327 #define PCRE2_SPTR PCRE2_SPTR8
328 #include "pcre2_valid_utf.c"
329 #undef PCRE2_CODE_UNIT_WIDTH
330 #undef PCRE2_SPTR
331
332 /* If we have 8-bit support, default to it; if there is also 16-or 32-bit
333 support, it can be selected by a command-line option. If there is no 8-bit
334 support, there must be 16-bit or 32-bit support, so default to one of them. The
335 config function, JIT stack, contexts, and version string are the same in all
336 modes, so use the form of the first that is available. */
337
338 #if defined SUPPORT_PCRE2_8
339 #define DEFAULT_TEST_MODE PCRE8_MODE
340 #define VERSION_TYPE PCRE2_UCHAR8
341 #define PCRE2_CONFIG pcre2_config_8
342 #define PCRE2_JIT_STACK pcre2_jit_stack_8
343 #define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_8
344 #define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_8
345 #define PCRE2_REAL_CONVERT_CONTEXT pcre2_real_convert_context_8
346 #define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_8
347
348 #elif defined SUPPORT_PCRE2_16
349 #define DEFAULT_TEST_MODE PCRE16_MODE
350 #define VERSION_TYPE PCRE2_UCHAR16
351 #define PCRE2_CONFIG pcre2_config_16
352 #define PCRE2_JIT_STACK pcre2_jit_stack_16
353 #define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_16
354 #define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_16
355 #define PCRE2_REAL_CONVERT_CONTEXT pcre2_real_convert_context_16
356 #define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_16
357
358 #elif defined SUPPORT_PCRE2_32
359 #define DEFAULT_TEST_MODE PCRE32_MODE
360 #define VERSION_TYPE PCRE2_UCHAR32
361 #define PCRE2_CONFIG pcre2_config_32
362 #define PCRE2_JIT_STACK pcre2_jit_stack_32
363 #define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_32
364 #define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_32
365 #define PCRE2_REAL_CONVERT_CONTEXT pcre2_real_convert_context_32
366 #define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_32
367 #endif
368
369 /* ------------- Structure and table for handling #-commands ------------- */
370
371 typedef struct cmdstruct {
372 const char *name;
373 int value;
374 } cmdstruct;
375
376 enum { CMD_FORBID_UTF, CMD_LOAD, CMD_NEWLINE_DEFAULT, CMD_PATTERN,
377 CMD_PERLTEST, CMD_POP, CMD_POPCOPY, CMD_SAVE, CMD_SUBJECT, CMD_UNKNOWN };
378
379 static cmdstruct cmdlist[] = {
380 { "forbid_utf", CMD_FORBID_UTF },
381 { "load", CMD_LOAD },
382 { "newline_default", CMD_NEWLINE_DEFAULT },
383 { "pattern", CMD_PATTERN },
384 { "perltest", CMD_PERLTEST },
385 { "pop", CMD_POP },
386 { "popcopy", CMD_POPCOPY },
387 { "save", CMD_SAVE },
388 { "subject", CMD_SUBJECT }};
389
390 #define cmdlistcount (sizeof(cmdlist)/sizeof(cmdstruct))
391
392 /* ------------- Structures and tables for handling modifiers -------------- */
393
394 /* Table of names for newline types. Must be kept in step with the definitions
395 of PCRE2_NEWLINE_xx in pcre2.h. */
396
397 static const char *newlines[] = {
398 "DEFAULT", "CR", "LF", "CRLF", "ANY", "ANYCRLF", "NUL" };
399
400 /* Structure and table for handling pattern conversion types. */
401
402 typedef struct convertstruct {
403 const char *name;
404 uint32_t option;
405 } convertstruct;
406
407 static convertstruct convertlist[] = {
408 { "glob", PCRE2_CONVERT_GLOB },
409 { "glob_no_starstar", PCRE2_CONVERT_GLOB_NO_STARSTAR },
410 { "glob_no_wild_separator", PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR },
411 { "posix_basic", PCRE2_CONVERT_POSIX_BASIC },
412 { "posix_extended", PCRE2_CONVERT_POSIX_EXTENDED },
413 { "unset", CONVERT_UNSET }};
414
415 #define convertlistcount (sizeof(convertlist)/sizeof(convertstruct))
416
417 /* Modifier types and applicability */
418
419 enum { MOD_CTC, /* Applies to a compile context */
420 MOD_CTM, /* Applies to a match context */
421 MOD_PAT, /* Applies to a pattern */
422 MOD_PATP, /* Ditto, OK for Perl test */
423 MOD_DAT, /* Applies to a data line */
424 MOD_PD, /* Applies to a pattern or a data line */
425 MOD_PDP, /* As MOD_PD, OK for Perl test */
426 MOD_PND, /* As MOD_PD, but not for a default pattern */
427 MOD_PNDP, /* As MOD_PND, OK for Perl test */
428 MOD_CHR, /* Is a single character */
429 MOD_CON, /* Is a "convert" type/options list */
430 MOD_CTL, /* Is a control bit */
431 MOD_BSR, /* Is a BSR value */
432 MOD_IN2, /* Is one or two unsigned integers */
433 MOD_INS, /* Is a signed integer */
434 MOD_INT, /* Is an unsigned integer */
435 MOD_IND, /* Is an unsigned integer, but no value => default */
436 MOD_NL, /* Is a newline value */
437 MOD_NN, /* Is a number or a name; more than one may occur */
438 MOD_OPT, /* Is an option bit */
439 MOD_SIZ, /* Is a PCRE2_SIZE value */
440 MOD_STR }; /* Is a string */
441
442 /* Control bits. Some apply to compiling, some to matching, but some can be set
443 either on a pattern or a data line, so they must all be distinct. There are now
444 so many of them that they are split into two fields. */
445
446 #define CTL_AFTERTEXT 0x00000001u
447 #define CTL_ALLAFTERTEXT 0x00000002u
448 #define CTL_ALLCAPTURES 0x00000004u
449 #define CTL_ALLUSEDTEXT 0x00000008u
450 #define CTL_ALTGLOBAL 0x00000010u
451 #define CTL_BINCODE 0x00000020u
452 #define CTL_CALLOUT_CAPTURE 0x00000040u
453 #define CTL_CALLOUT_INFO 0x00000080u
454 #define CTL_CALLOUT_NONE 0x00000100u
455 #define CTL_DFA 0x00000200u
456 #define CTL_EXPAND 0x00000400u
457 #define CTL_FINDLIMITS 0x00000800u
458 #define CTL_FRAMESIZE 0x00001000u
459 #define CTL_FULLBINCODE 0x00002000u
460 #define CTL_GETALL 0x00004000u
461 #define CTL_GLOBAL 0x00008000u
462 #define CTL_HEXPAT 0x00010000u /* Same word as USE_LENGTH */
463 #define CTL_INFO 0x00020000u
464 #define CTL_JITFAST 0x00040000u
465 #define CTL_JITVERIFY 0x00080000u
466 #define CTL_MARK 0x00100000u
467 #define CTL_MEMORY 0x00200000u
468 #define CTL_NULLCONTEXT 0x00400000u
469 #define CTL_POSIX 0x00800000u
470 #define CTL_POSIX_NOSUB 0x01000000u
471 #define CTL_PUSH 0x02000000u /* These three must be */
472 #define CTL_PUSHCOPY 0x04000000u /* all in the same */
473 #define CTL_PUSHTABLESCOPY 0x08000000u /* word. */
474 #define CTL_STARTCHAR 0x10000000u
475 #define CTL_USE_LENGTH 0x20000000u /* Same word as HEXPAT */
476 #define CTL_UTF8_INPUT 0x40000000u
477 #define CTL_ZERO_TERMINATE 0x80000000u
478
479 /* Combinations */
480
481 #define CTL_DEBUG (CTL_FULLBINCODE|CTL_INFO) /* For setting */
482 #define CTL_ANYINFO (CTL_DEBUG|CTL_BINCODE|CTL_CALLOUT_INFO)
483 #define CTL_ANYGLOB (CTL_ALTGLOBAL|CTL_GLOBAL)
484
485 /* Second control word */
486
487 #define CTL2_SUBSTITUTE_EXTENDED 0x00000001u
488 #define CTL2_SUBSTITUTE_OVERFLOW_LENGTH 0x00000002u
489 #define CTL2_SUBSTITUTE_UNKNOWN_UNSET 0x00000004u
490 #define CTL2_SUBSTITUTE_UNSET_EMPTY 0x00000008u
491 #define CTL2_SUBJECT_LITERAL 0x00000010u
492 #define CTL2_CALLOUT_NO_WHERE 0x00000020u
493 #define CTL2_CALLOUT_EXTRA 0x00000040u
494
495 #define CTL2_NL_SET 0x40000000u /* Informational */
496 #define CTL2_BSR_SET 0x80000000u /* Informational */
497
498 /* These are the matching controls that may be set either on a pattern or on a
499 data line. They are copied from the pattern controls as initial settings for
500 data line controls. Note that CTL_MEMORY is not included here, because it does
501 different things in the two cases. */
502
503 #define CTL_ALLPD (CTL_AFTERTEXT|\
504 CTL_ALLAFTERTEXT|\
505 CTL_ALLCAPTURES|\
506 CTL_ALLUSEDTEXT|\
507 CTL_ALTGLOBAL|\
508 CTL_GLOBAL|\
509 CTL_MARK|\
510 CTL_STARTCHAR|\
511 CTL_UTF8_INPUT)
512
513 #define CTL2_ALLPD (CTL2_SUBSTITUTE_EXTENDED|\
514 CTL2_SUBSTITUTE_OVERFLOW_LENGTH|\
515 CTL2_SUBSTITUTE_UNKNOWN_UNSET|\
516 CTL2_SUBSTITUTE_UNSET_EMPTY)
517
518 /* Structures for holding modifier information for patterns and subject strings
519 (data). Fields containing modifiers that can be set either for a pattern or a
520 subject must be at the start and in the same order in both cases so that the
521 same offset in the big table below works for both. */
522
523 typedef struct patctl { /* Structure for pattern modifiers. */
524 uint32_t options; /* Must be in same position as datctl */
525 uint32_t control; /* Must be in same position as datctl */
526 uint32_t control2; /* Must be in same position as datctl */
527 uint32_t jitstack; /* Must be in same position as datctl */
528 uint8_t replacement[REPLACE_MODSIZE]; /* So must this */
529 uint32_t jit;
530 uint32_t stackguard_test;
531 uint32_t tables_id;
532 uint32_t convert_type;
533 uint32_t convert_length;
534 uint32_t convert_glob_escape;
535 uint32_t convert_glob_separator;
536 uint32_t regerror_buffsize;
537 uint8_t locale[LOCALESIZE];
538 } patctl;
539
540 #define MAXCPYGET 10
541 #define LENCPYGET 64
542
543 typedef struct datctl { /* Structure for data line modifiers. */
544 uint32_t options; /* Must be in same position as patctl */
545 uint32_t control; /* Must be in same position as patctl */
546 uint32_t control2; /* Must be in same position as patctl */
547 uint32_t jitstack; /* Must be in same position as patctl */
548 uint8_t replacement[REPLACE_MODSIZE]; /* So must this */
549 uint32_t startend[2];
550 uint32_t cerror[2];
551 uint32_t cfail[2];
552 int32_t callout_data;
553 int32_t copy_numbers[MAXCPYGET];
554 int32_t get_numbers[MAXCPYGET];
555 uint32_t oveccount;
556 uint32_t offset;
557 uint8_t copy_names[LENCPYGET];
558 uint8_t get_names[LENCPYGET];
559 } datctl;
560
561 /* Ids for which context to modify. */
562
563 enum { CTX_PAT, /* Active pattern context */
564 CTX_POPPAT, /* Ditto, for a popped pattern */
565 CTX_DEFPAT, /* Default pattern context */
566 CTX_DAT, /* Active data (match) context */
567 CTX_DEFDAT }; /* Default data (match) context */
568
569 /* Macros to simplify the big table below. */
570
571 #define CO(name) offsetof(PCRE2_REAL_COMPILE_CONTEXT, name)
572 #define MO(name) offsetof(PCRE2_REAL_MATCH_CONTEXT, name)
573 #define PO(name) offsetof(patctl, name)
574 #define PD(name) PO(name)
575 #define DO(name) offsetof(datctl, name)
576
577 /* Table of all long-form modifiers. Must be in collating sequence of modifier
578 name because it is searched by binary chop. */
579
580 typedef struct modstruct {
581 const char *name;
582 uint16_t which;
583 uint16_t type;
584 uint32_t value;
585 PCRE2_SIZE offset;
586 } modstruct;
587
588 static modstruct modlist[] = {
589 { "aftertext", MOD_PNDP, MOD_CTL, CTL_AFTERTEXT, PO(control) },
590 { "allaftertext", MOD_PNDP, MOD_CTL, CTL_ALLAFTERTEXT, PO(control) },
591 { "allcaptures", MOD_PND, MOD_CTL, CTL_ALLCAPTURES, PO(control) },
592 { "allow_empty_class", MOD_PAT, MOD_OPT, PCRE2_ALLOW_EMPTY_CLASS, PO(options) },
593 { "allow_surrogate_escapes", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES, CO(extra_options) },
594 { "allusedtext", MOD_PNDP, MOD_CTL, CTL_ALLUSEDTEXT, PO(control) },
595 { "alt_bsux", MOD_PAT, MOD_OPT, PCRE2_ALT_BSUX, PO(options) },
596 { "alt_circumflex", MOD_PAT, MOD_OPT, PCRE2_ALT_CIRCUMFLEX, PO(options) },
597 { "alt_verbnames", MOD_PAT, MOD_OPT, PCRE2_ALT_VERBNAMES, PO(options) },
598 { "altglobal", MOD_PND, MOD_CTL, CTL_ALTGLOBAL, PO(control) },
599 { "anchored", MOD_PD, MOD_OPT, PCRE2_ANCHORED, PD(options) },
600 { "auto_callout", MOD_PAT, MOD_OPT, PCRE2_AUTO_CALLOUT, PO(options) },
601 { "bad_escape_is_literal", MOD_CTC, MOD_OPT, PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL, CO(extra_options) },
602 { "bincode", MOD_PAT, MOD_CTL, CTL_BINCODE, PO(control) },
603 { "bsr", MOD_CTC, MOD_BSR, 0, CO(bsr_convention) },
604 { "callout_capture", MOD_DAT, MOD_CTL, CTL_CALLOUT_CAPTURE, DO(control) },
605 { "callout_data", MOD_DAT, MOD_INS, 0, DO(callout_data) },
606 { "callout_error", MOD_DAT, MOD_IN2, 0, DO(cerror) },
607 { "callout_extra", MOD_DAT, MOD_CTL, CTL2_CALLOUT_EXTRA, DO(control2) },
608 { "callout_fail", MOD_DAT, MOD_IN2, 0, DO(cfail) },
609 { "callout_info", MOD_PAT, MOD_CTL, CTL_CALLOUT_INFO, PO(control) },
610 { "callout_no_where", MOD_DAT, MOD_CTL, CTL2_CALLOUT_NO_WHERE, DO(control2) },
611 { "callout_none", MOD_DAT, MOD_CTL, CTL_CALLOUT_NONE, DO(control) },
612 { "caseless", MOD_PATP, MOD_OPT, PCRE2_CASELESS, PO(options) },
613 { "convert", MOD_PAT, MOD_CON, 0, PO(convert_type) },
614 { "convert_glob_escape", MOD_PAT, MOD_CHR, 0, PO(convert_glob_escape) },
615 { "convert_glob_separator", MOD_PAT, MOD_CHR, 0, PO(convert_glob_separator) },
616 { "convert_length", MOD_PAT, MOD_INT, 0, PO(convert_length) },
617 { "copy", MOD_DAT, MOD_NN, DO(copy_numbers), DO(copy_names) },
618 { "debug", MOD_PAT, MOD_CTL, CTL_DEBUG, PO(control) },
619 { "depth_limit", MOD_CTM, MOD_INT, 0, MO(depth_limit) },
620 { "dfa", MOD_DAT, MOD_CTL, CTL_DFA, DO(control) },
621 { "dfa_restart", MOD_DAT, MOD_OPT, PCRE2_DFA_RESTART, DO(options) },
622 { "dfa_shortest", MOD_DAT, MOD_OPT, PCRE2_DFA_SHORTEST, DO(options) },
623 { "dollar_endonly", MOD_PAT, MOD_OPT, PCRE2_DOLLAR_ENDONLY, PO(options) },
624 { "dotall", MOD_PATP, MOD_OPT, PCRE2_DOTALL, PO(options) },
625 { "dupnames", MOD_PATP, MOD_OPT, PCRE2_DUPNAMES, PO(options) },
626 { "endanchored", MOD_PD, MOD_OPT, PCRE2_ENDANCHORED, PD(options) },
627 { "expand", MOD_PAT, MOD_CTL, CTL_EXPAND, PO(control) },
628 { "extended", MOD_PATP, MOD_OPT, PCRE2_EXTENDED, PO(options) },
629 { "extended_more", MOD_PATP, MOD_OPT, PCRE2_EXTENDED_MORE, PO(options) },
630 { "find_limits", MOD_DAT, MOD_CTL, CTL_FINDLIMITS, DO(control) },
631 { "firstline", MOD_PAT, MOD_OPT, PCRE2_FIRSTLINE, PO(options) },
632 { "framesize", MOD_PAT, MOD_CTL, CTL_FRAMESIZE, PO(control) },
633 { "fullbincode", MOD_PAT, MOD_CTL, CTL_FULLBINCODE, PO(control) },
634 { "get", MOD_DAT, MOD_NN, DO(get_numbers), DO(get_names) },
635 { "getall", MOD_DAT, MOD_CTL, CTL_GETALL, DO(control) },
636 { "global", MOD_PNDP, MOD_CTL, CTL_GLOBAL, PO(control) },
637 { "heap_limit", MOD_CTM, MOD_INT, 0, MO(heap_limit) },
638 { "hex", MOD_PAT, MOD_CTL, CTL_HEXPAT, PO(control) },
639 { "info", MOD_PAT, MOD_CTL, CTL_INFO, PO(control) },
640 { "jit", MOD_PAT, MOD_IND, 7, PO(jit) },
641 { "jitfast", MOD_PAT, MOD_CTL, CTL_JITFAST, PO(control) },
642 { "jitstack", MOD_PNDP, MOD_INT, 0, PO(jitstack) },
643 { "jitverify", MOD_PAT, MOD_CTL, CTL_JITVERIFY, PO(control) },
644 { "literal", MOD_PAT, MOD_OPT, PCRE2_LITERAL, PO(options) },
645 { "locale", MOD_PAT, MOD_STR, LOCALESIZE, PO(locale) },
646 { "mark", MOD_PNDP, MOD_CTL, CTL_MARK, PO(control) },
647 { "match_limit", MOD_CTM, MOD_INT, 0, MO(match_limit) },
648 { "match_line", MOD_CTC, MOD_OPT, PCRE2_EXTRA_MATCH_LINE, CO(extra_options) },
649 { "match_unset_backref", MOD_PAT, MOD_OPT, PCRE2_MATCH_UNSET_BACKREF, PO(options) },
650 { "match_word", MOD_CTC, MOD_OPT, PCRE2_EXTRA_MATCH_WORD, CO(extra_options) },
651 { "max_pattern_length", MOD_CTC, MOD_SIZ, 0, CO(max_pattern_length) },
652 { "memory", MOD_PD, MOD_CTL, CTL_MEMORY, PD(control) },
653 { "multiline", MOD_PATP, MOD_OPT, PCRE2_MULTILINE, PO(options) },
654 { "never_backslash_c", MOD_PAT, MOD_OPT, PCRE2_NEVER_BACKSLASH_C, PO(options) },
655 { "never_ucp", MOD_PAT, MOD_OPT, PCRE2_NEVER_UCP, PO(options) },
656 { "never_utf", MOD_PAT, MOD_OPT, PCRE2_NEVER_UTF, PO(options) },
657 { "newline", MOD_CTC, MOD_NL, 0, CO(newline_convention) },
658 { "no_auto_capture", MOD_PAT, MOD_OPT, PCRE2_NO_AUTO_CAPTURE, PO(options) },
659 { "no_auto_possess", MOD_PATP, MOD_OPT, PCRE2_NO_AUTO_POSSESS, PO(options) },
660 { "no_dotstar_anchor", MOD_PAT, MOD_OPT, PCRE2_NO_DOTSTAR_ANCHOR, PO(options) },
661 { "no_jit", MOD_DAT, MOD_OPT, PCRE2_NO_JIT, DO(options) },
662 { "no_start_optimize", MOD_PATP, MOD_OPT, PCRE2_NO_START_OPTIMIZE, PO(options) },
663 { "no_utf_check", MOD_PD, MOD_OPT, PCRE2_NO_UTF_CHECK, PD(options) },
664 { "notbol", MOD_DAT, MOD_OPT, PCRE2_NOTBOL, DO(options) },
665 { "notempty", MOD_DAT, MOD_OPT, PCRE2_NOTEMPTY, DO(options) },
666 { "notempty_atstart", MOD_DAT, MOD_OPT, PCRE2_NOTEMPTY_ATSTART, DO(options) },
667 { "noteol", MOD_DAT, MOD_OPT, PCRE2_NOTEOL, DO(options) },
668 { "null_context", MOD_PD, MOD_CTL, CTL_NULLCONTEXT, PO(control) },
669 { "offset", MOD_DAT, MOD_INT, 0, DO(offset) },
670 { "offset_limit", MOD_CTM, MOD_SIZ, 0, MO(offset_limit)},
671 { "ovector", MOD_DAT, MOD_INT, 0, DO(oveccount) },
672 { "parens_nest_limit", MOD_CTC, MOD_INT, 0, CO(parens_nest_limit) },
673 { "partial_hard", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_HARD, DO(options) },
674 { "partial_soft", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_SOFT, DO(options) },
675 { "ph", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_HARD, DO(options) },
676 { "posix", MOD_PAT, MOD_CTL, CTL_POSIX, PO(control) },
677 { "posix_nosub", MOD_PAT, MOD_CTL, CTL_POSIX|CTL_POSIX_NOSUB, PO(control) },
678 { "posix_startend", MOD_DAT, MOD_IN2, 0, DO(startend) },
679 { "ps", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_SOFT, DO(options) },
680 { "push", MOD_PAT, MOD_CTL, CTL_PUSH, PO(control) },
681 { "pushcopy", MOD_PAT, MOD_CTL, CTL_PUSHCOPY, PO(control) },
682 { "pushtablescopy", MOD_PAT, MOD_CTL, CTL_PUSHTABLESCOPY, PO(control) },
683 { "recursion_limit", MOD_CTM, MOD_INT, 0, MO(depth_limit) }, /* Obsolete synonym */
684 { "regerror_buffsize", MOD_PAT, MOD_INT, 0, PO(regerror_buffsize) },
685 { "replace", MOD_PND, MOD_STR, REPLACE_MODSIZE, PO(replacement) },
686 { "stackguard", MOD_PAT, MOD_INT, 0, PO(stackguard_test) },
687 { "startchar", MOD_PND, MOD_CTL, CTL_STARTCHAR, PO(control) },
688 { "startoffset", MOD_DAT, MOD_INT, 0, DO(offset) },
689 { "subject_literal", MOD_PATP, MOD_CTL, CTL2_SUBJECT_LITERAL, PO(control2) },
690 { "substitute_extended", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_EXTENDED, PO(control2) },
691 { "substitute_overflow_length", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_OVERFLOW_LENGTH, PO(control2) },
692 { "substitute_unknown_unset", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_UNKNOWN_UNSET, PO(control2) },
693 { "substitute_unset_empty", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_UNSET_EMPTY, PO(control2) },
694 { "tables", MOD_PAT, MOD_INT, 0, PO(tables_id) },
695 { "ucp", MOD_PATP, MOD_OPT, PCRE2_UCP, PO(options) },
696 { "ungreedy", MOD_PAT, MOD_OPT, PCRE2_UNGREEDY, PO(options) },
697 { "use_length", MOD_PAT, MOD_CTL, CTL_USE_LENGTH, PO(control) },
698 { "use_offset_limit", MOD_PAT, MOD_OPT, PCRE2_USE_OFFSET_LIMIT, PO(options) },
699 { "utf", MOD_PATP, MOD_OPT, PCRE2_UTF, PO(options) },
700 { "utf8_input", MOD_PAT, MOD_CTL, CTL_UTF8_INPUT, PO(control) },
701 { "zero_terminate", MOD_DAT, MOD_CTL, CTL_ZERO_TERMINATE, DO(control) }
702 };
703
704 #define MODLISTCOUNT sizeof(modlist)/sizeof(modstruct)
705
706 /* Controls and options that are supported for use with the POSIX interface. */
707
708 #define POSIX_SUPPORTED_COMPILE_OPTIONS ( \
709 PCRE2_CASELESS|PCRE2_DOTALL|PCRE2_LITERAL|PCRE2_MULTILINE|PCRE2_UCP| \
710 PCRE2_UTF|PCRE2_UNGREEDY)
711
712 #define POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS (0)
713
714 #define POSIX_SUPPORTED_COMPILE_CONTROLS ( \
715 CTL_AFTERTEXT|CTL_ALLAFTERTEXT|CTL_EXPAND|CTL_HEXPAT|CTL_POSIX| \
716 CTL_POSIX_NOSUB|CTL_USE_LENGTH)
717
718 #define POSIX_SUPPORTED_COMPILE_CONTROLS2 (0)
719
720 #define POSIX_SUPPORTED_MATCH_OPTIONS ( \
721 PCRE2_NOTBOL|PCRE2_NOTEMPTY|PCRE2_NOTEOL)
722
723 #define POSIX_SUPPORTED_MATCH_CONTROLS (CTL_AFTERTEXT|CTL_ALLAFTERTEXT)
724 #define POSIX_SUPPORTED_MATCH_CONTROLS2 (0)
725
726 /* Control bits that are not ignored with 'push'. */
727
728 #define PUSH_SUPPORTED_COMPILE_CONTROLS ( \
729 CTL_BINCODE|CTL_CALLOUT_INFO|CTL_FULLBINCODE|CTL_HEXPAT|CTL_INFO| \
730 CTL_JITVERIFY|CTL_MEMORY|CTL_FRAMESIZE|CTL_PUSH|CTL_PUSHCOPY| \
731 CTL_PUSHTABLESCOPY|CTL_USE_LENGTH)
732
733 #define PUSH_SUPPORTED_COMPILE_CONTROLS2 (CTL2_BSR_SET|CTL2_NL_SET)
734
735 /* Controls that apply only at compile time with 'push'. */
736
737 #define PUSH_COMPILE_ONLY_CONTROLS CTL_JITVERIFY
738 #define PUSH_COMPILE_ONLY_CONTROLS2 (0)
739
740 /* Controls that are forbidden with #pop or #popcopy. */
741
742 #define NOTPOP_CONTROLS (CTL_HEXPAT|CTL_POSIX|CTL_POSIX_NOSUB|CTL_PUSH| \
743 CTL_PUSHCOPY|CTL_PUSHTABLESCOPY|CTL_USE_LENGTH)
744
745 /* Pattern controls that are mutually exclusive. At present these are all in
746 the first control word. Note that CTL_POSIX_NOSUB is always accompanied by
747 CTL_POSIX, so it doesn't need its own entries. */
748
749 static uint32_t exclusive_pat_controls[] = {
750 CTL_POSIX | CTL_PUSH,
751 CTL_POSIX | CTL_PUSHCOPY,
752 CTL_POSIX | CTL_PUSHTABLESCOPY,
753 CTL_PUSH | CTL_PUSHCOPY,
754 CTL_PUSH | CTL_PUSHTABLESCOPY,
755 CTL_PUSHCOPY | CTL_PUSHTABLESCOPY,
756 CTL_EXPAND | CTL_HEXPAT };
757
758 /* Data controls that are mutually exclusive. At present these are all in the
759 first control word. */
760
761 static uint32_t exclusive_dat_controls[] = {
762 CTL_ALLUSEDTEXT | CTL_STARTCHAR,
763 CTL_FINDLIMITS | CTL_NULLCONTEXT };
764
765 /* Table of single-character abbreviated modifiers. The index field is
766 initialized to -1, but the first time the modifier is encountered, it is filled
767 in with the index of the full entry in modlist, to save repeated searching when
768 processing multiple test items. This short list is searched serially, so its
769 order does not matter. */
770
771 typedef struct c1modstruct {
772 const char *fullname;
773 uint32_t onechar;
774 int index;
775 } c1modstruct;
776
777 static c1modstruct c1modlist[] = {
778 { "bincode", 'B', -1 },
779 { "info", 'I', -1 },
780 { "global", 'g', -1 },
781 { "caseless", 'i', -1 },
782 { "multiline", 'm', -1 },
783 { "no_auto_capture", 'n', -1 },
784 { "dotall", 's', -1 },
785 { "extended", 'x', -1 }
786 };
787
788 #define C1MODLISTCOUNT sizeof(c1modlist)/sizeof(c1modstruct)
789
790 /* Table of arguments for the -C command line option. Use macros to make the
791 table itself easier to read. */
792
793 #if defined SUPPORT_PCRE2_8
794 #define SUPPORT_8 1
795 #endif
796 #if defined SUPPORT_PCRE2_16
797 #define SUPPORT_16 1
798 #endif
799 #if defined SUPPORT_PCRE2_32
800 #define SUPPORT_32 1
801 #endif
802
803 #ifndef SUPPORT_8
804 #define SUPPORT_8 0
805 #endif
806 #ifndef SUPPORT_16
807 #define SUPPORT_16 0
808 #endif
809 #ifndef SUPPORT_32
810 #define SUPPORT_32 0
811 #endif
812
813 #ifdef EBCDIC
814 #define SUPPORT_EBCDIC 1
815 #define EBCDIC_NL CHAR_LF
816 #else
817 #define SUPPORT_EBCDIC 0
818 #define EBCDIC_NL 0
819 #endif
820
821 #ifdef NEVER_BACKSLASH_C
822 #define BACKSLASH_C 0
823 #else
824 #define BACKSLASH_C 1
825 #endif
826
827 typedef struct coptstruct {
828 const char *name;
829 uint32_t type;
830 uint32_t value;
831 } coptstruct;
832
833 enum { CONF_BSR,
834 CONF_FIX,
835 CONF_FIZ,
836 CONF_INT,
837 CONF_NL
838 };
839
840 static coptstruct coptlist[] = {
841 { "backslash-C", CONF_FIX, BACKSLASH_C },
842 { "bsr", CONF_BSR, PCRE2_CONFIG_BSR },
843 { "ebcdic", CONF_FIX, SUPPORT_EBCDIC },
844 { "ebcdic-nl", CONF_FIZ, EBCDIC_NL },
845 { "jit", CONF_INT, PCRE2_CONFIG_JIT },
846 { "linksize", CONF_INT, PCRE2_CONFIG_LINKSIZE },
847 { "newline", CONF_NL, PCRE2_CONFIG_NEWLINE },
848 { "pcre2-16", CONF_FIX, SUPPORT_16 },
849 { "pcre2-32", CONF_FIX, SUPPORT_32 },
850 { "pcre2-8", CONF_FIX, SUPPORT_8 },
851 { "unicode", CONF_INT, PCRE2_CONFIG_UNICODE }
852 };
853
854 #define COPTLISTCOUNT sizeof(coptlist)/sizeof(coptstruct)
855
856 #undef SUPPORT_8
857 #undef SUPPORT_16
858 #undef SUPPORT_32
859 #undef SUPPORT_EBCDIC
860
861
862 /* ----------------------- Static variables ------------------------ */
863
864 static FILE *infile;
865 static FILE *outfile;
866
867 static const void *last_callout_mark;
868 static PCRE2_JIT_STACK *jit_stack = NULL;
869 static size_t jit_stack_size = 0;
870
871 static BOOL first_callout;
872 static BOOL jit_was_used;
873 static BOOL restrict_for_perl_test = FALSE;
874 static BOOL show_memory = FALSE;
875
876 static int code_unit_size; /* Bytes */
877 static int jitrc; /* Return from JIT compile */
878 static int test_mode = DEFAULT_TEST_MODE;
879 static int timeit = 0;
880 static int timeitm = 0;
881
882 clock_t total_compile_time = 0;
883 clock_t total_jit_compile_time = 0;
884 clock_t total_match_time = 0;
885
886 static uint32_t dfa_matched;
887 static uint32_t forbid_utf = 0;
888 static uint32_t maxlookbehind;
889 static uint32_t max_oveccount;
890 static uint32_t callout_count;
891
892 static uint16_t local_newline_default = 0;
893
894 static VERSION_TYPE jittarget[VERSION_SIZE];
895 static VERSION_TYPE version[VERSION_SIZE];
896 static VERSION_TYPE uversion[VERSION_SIZE];
897
898 static patctl def_patctl;
899 static patctl pat_patctl;
900 static datctl def_datctl;
901 static datctl dat_datctl;
902
903 static void *patstack[PATSTACKSIZE];
904 static int patstacknext = 0;
905
906 static void *malloclist[MALLOCLISTSIZE];
907 static PCRE2_SIZE malloclistlength[MALLOCLISTSIZE];
908 static uint32_t malloclistptr = 0;
909
910 #ifdef SUPPORT_PCRE2_8
911 static regex_t preg = { NULL, NULL, 0, 0, 0, 0 };
912 #endif
913
914 static int *dfa_workspace = NULL;
915 static const uint8_t *locale_tables = NULL;
916 static const uint8_t *use_tables = NULL;
917 static uint8_t locale_name[32];
918
919 /* We need buffers for building 16/32-bit strings; 8-bit strings don't need
920 rebuilding, but set up the same naming scheme for use in macros. The "buffer"
921 buffer is where all input lines are read. Its size is the same as pbuffer8.
922 Pattern lines are always copied to pbuffer8 for use in callouts, even if they
923 are actually compiled from pbuffer16 or pbuffer32. */
924
925 static size_t pbuffer8_size = 50000; /* Initial size, bytes */
926 static uint8_t *pbuffer8 = NULL;
927 static uint8_t *buffer = NULL;
928
929 /* The dbuffer is where all processed data lines are put. In non-8-bit modes it
930 is cast as needed. For long data lines it grows as necessary. */
931
932 static size_t dbuffer_size = 1u << 14; /* Initial size, bytes */
933 static uint8_t *dbuffer = NULL;
934
935
936 /* ---------------- Mode-dependent variables -------------------*/
937
938 #ifdef SUPPORT_PCRE2_8
939 static pcre2_code_8 *compiled_code8;
940 static pcre2_general_context_8 *general_context8, *general_context_copy8;
941 static pcre2_compile_context_8 *pat_context8, *default_pat_context8;
942 static pcre2_convert_context_8 *con_context8, *default_con_context8;
943 static pcre2_match_context_8 *dat_context8, *default_dat_context8;
944 static pcre2_match_data_8 *match_data8;
945 #endif
946
947 #ifdef SUPPORT_PCRE2_16
948 static pcre2_code_16 *compiled_code16;
949 static pcre2_general_context_16 *general_context16, *general_context_copy16;
950 static pcre2_compile_context_16 *pat_context16, *default_pat_context16;
951 static pcre2_convert_context_16 *con_context16, *default_con_context16;
952 static pcre2_match_context_16 *dat_context16, *default_dat_context16;
953 static pcre2_match_data_16 *match_data16;
954 static PCRE2_SIZE pbuffer16_size = 0; /* Set only when needed */
955 static uint16_t *pbuffer16 = NULL;
956 #endif
957
958 #ifdef SUPPORT_PCRE2_32
959 static pcre2_code_32 *compiled_code32;
960 static pcre2_general_context_32 *general_context32, *general_context_copy32;
961 static pcre2_compile_context_32 *pat_context32, *default_pat_context32;
962 static pcre2_convert_context_32 *con_context32, *default_con_context32;
963 static pcre2_match_context_32 *dat_context32, *default_dat_context32;
964 static pcre2_match_data_32 *match_data32;
965 static PCRE2_SIZE pbuffer32_size = 0; /* Set only when needed */
966 static uint32_t *pbuffer32 = NULL;
967 #endif
968
969
970 /* ---------------- Macros that work in all modes ----------------- */
971
972 #define CAST8VAR(x) CASTVAR(uint8_t *, x)
973 #define SET(x,y) SETOP(x,y,=)
974 #define SETPLUS(x,y) SETOP(x,y,+=)
975 #define strlen8(x) strlen((char *)x)
976
977
978 /* ---------------- Mode-dependent, runtime-testing macros ------------------*/
979
980 /* Define macros for variables and functions that must be selected dynamically
981 depending on the mode setting (8, 16, 32). These are dependent on which modes
982 are supported. */
983
984 #if (defined (SUPPORT_PCRE2_8) + defined (SUPPORT_PCRE2_16) + \
985 defined (SUPPORT_PCRE2_32)) >= 2
986
987 /* ----- All three modes supported ----- */
988
989 #if defined(SUPPORT_PCRE2_8) && defined(SUPPORT_PCRE2_16) && defined(SUPPORT_PCRE2_32)
990
991 #define CASTFLD(t,a,b) ((test_mode == PCRE8_MODE)? (t)(G(a,8)->b) : \
992 (test_mode == PCRE16_MODE)? (t)(G(a,16)->b) : (t)(G(a,32)->b))
993
994 #define CASTVAR(t,x) ( \
995 (test_mode == PCRE8_MODE)? (t)G(x,8) : \
996 (test_mode == PCRE16_MODE)? (t)G(x,16) : (t)G(x,32))
997
998 #define CODE_UNIT(a,b) ( \
999 (test_mode == PCRE8_MODE)? (uint32_t)(((PCRE2_SPTR8)(a))[b]) : \
1000 (test_mode == PCRE16_MODE)? (uint32_t)(((PCRE2_SPTR16)(a))[b]) : \
1001 (uint32_t)(((PCRE2_SPTR32)(a))[b]))
1002
1003 #define CONCTXCPY(a,b) \
1004 if (test_mode == PCRE8_MODE) \
1005 memcpy(G(a,8),G(b,8),sizeof(pcre2_convert_context_8)); \
1006 else if (test_mode == PCRE16_MODE) \
1007 memcpy(G(a,16),G(b,16),sizeof(pcre2_convert_context_16)); \
1008 else memcpy(G(a,32),G(b,32),sizeof(pcre2_convert_context_32))
1009
1010 #define CONVERT_COPY(a,b,c) \
1011 if (test_mode == PCRE8_MODE) \
1012 memcpy(G(a,8),(char *)b,c); \
1013 else if (test_mode == PCRE16_MODE) \
1014 memcpy(G(a,16),(char *)b,(c)*2); \
1015 else if (test_mode == PCRE32_MODE) \
1016 memcpy(G(a,32),(char *)b,(c)*4)
1017
1018 #define DATCTXCPY(a,b) \
1019 if (test_mode == PCRE8_MODE) \
1020 memcpy(G(a,8),G(b,8),sizeof(pcre2_match_context_8)); \
1021 else if (test_mode == PCRE16_MODE) \
1022 memcpy(G(a,16),G(b,16),sizeof(pcre2_match_context_16)); \
1023 else memcpy(G(a,32),G(b,32),sizeof(pcre2_match_context_32))
1024
1025 #define FLD(a,b) ((test_mode == PCRE8_MODE)? G(a,8)->b : \
1026 (test_mode == PCRE16_MODE)? G(a,16)->b : G(a,32)->b)
1027
1028 #define PATCTXCPY(a,b) \
1029 if (test_mode == PCRE8_MODE) \
1030 memcpy(G(a,8),G(b,8),sizeof(pcre2_compile_context_8)); \
1031 else if (test_mode == PCRE16_MODE) \
1032 memcpy(G(a,16),G(b,16),sizeof(pcre2_compile_context_16)); \
1033 else memcpy(G(a,32),G(b,32),sizeof(pcre2_compile_context_32))
1034
1035 #define PCHARS(lv, p, offset, len, utf, f) \
1036 if (test_mode == PCRE32_MODE) \
1037 lv = pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f); \
1038 else if (test_mode == PCRE16_MODE) \
1039 lv = pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f); \
1040 else \
1041 lv = pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
1042
1043 #define PCHARSV(p, offset, len, utf, f) \
1044 if (test_mode == PCRE32_MODE) \
1045 (void)pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f); \
1046 else if (test_mode == PCRE16_MODE) \
1047 (void)pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f); \
1048 else \
1049 (void)pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
1050
1051 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
1052 if (test_mode == PCRE8_MODE) \
1053 a = pcre2_callout_enumerate_8(compiled_code8, \
1054 (int (*)(struct pcre2_callout_enumerate_block_8 *, void *))b,c); \
1055 else if (test_mode == PCRE16_MODE) \
1056 a = pcre2_callout_enumerate_16(compiled_code16, \
1057 (int(*)(struct pcre2_callout_enumerate_block_16 *, void *))b,c); \
1058 else \
1059 a = pcre2_callout_enumerate_32(compiled_code32, \
1060 (int (*)(struct pcre2_callout_enumerate_block_32 *, void *))b,c)
1061
1062 #define PCRE2_CODE_COPY_FROM_VOID(a,b) \
1063 if (test_mode == PCRE8_MODE) \
1064 G(a,8) = pcre2_code_copy_8(b); \
1065 else if (test_mode == PCRE16_MODE) \
1066 G(a,16) = pcre2_code_copy_16(b); \
1067 else \
1068 G(a,32) = pcre2_code_copy_32(b)
1069
1070 #define PCRE2_CODE_COPY_TO_VOID(a,b) \
1071 if (test_mode == PCRE8_MODE) \
1072 a = (void *)pcre2_code_copy_8(G(b,8)); \
1073 else if (test_mode == PCRE16_MODE) \
1074 a = (void *)pcre2_code_copy_16(G(b,16)); \
1075 else \
1076 a = (void *)pcre2_code_copy_32(G(b,32))
1077
1078 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) \
1079 if (test_mode == PCRE8_MODE) \
1080 a = (void *)pcre2_code_copy_with_tables_8(G(b,8)); \
1081 else if (test_mode == PCRE16_MODE) \
1082 a = (void *)pcre2_code_copy_with_tables_16(G(b,16)); \
1083 else \
1084 a = (void *)pcre2_code_copy_with_tables_32(G(b,32))
1085
1086 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
1087 if (test_mode == PCRE8_MODE) \
1088 G(a,8) = pcre2_compile_8(G(b,8),c,d,e,f,g); \
1089 else if (test_mode == PCRE16_MODE) \
1090 G(a,16) = pcre2_compile_16(G(b,16),c,d,e,f,g); \
1091 else \
1092 G(a,32) = pcre2_compile_32(G(b,32),c,d,e,f,g)
1093
1094 #define PCRE2_CONVERTED_PATTERN_FREE(a) \
1095 if (test_mode == PCRE8_MODE) pcre2_converted_pattern_free_8((PCRE2_UCHAR8 *)a); \
1096 else if (test_mode == PCRE16_MODE) pcre2_converted_pattern_free_16((PCRE2_UCHAR16 *)a); \
1097 else pcre2_converted_pattern_free_32((PCRE2_UCHAR32 *)a)
1098
1099 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
1100 if (test_mode == PCRE8_MODE) \
1101 a = pcre2_dfa_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h,i,j); \
1102 else if (test_mode == PCRE16_MODE) \
1103 a = pcre2_dfa_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h,i,j); \
1104 else \
1105 a = pcre2_dfa_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h,i,j)
1106
1107 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
1108 if (test_mode == PCRE8_MODE) \
1109 r = pcre2_get_error_message_8(a,G(b,8),G(G(b,8),_size)); \
1110 else if (test_mode == PCRE16_MODE) \
1111 r = pcre2_get_error_message_16(a,G(b,16),G(G(b,16),_size/2)); \
1112 else \
1113 r = pcre2_get_error_message_32(a,G(b,32),G(G(b,32),_size/4))
1114
1115 #define PCRE2_GET_OVECTOR_COUNT(a,b) \
1116 if (test_mode == PCRE8_MODE) \
1117 a = pcre2_get_ovector_count_8(G(b,8)); \
1118 else if (test_mode == PCRE16_MODE) \
1119 a = pcre2_get_ovector_count_16(G(b,16)); \
1120 else \
1121 a = pcre2_get_ovector_count_32(G(b,32))
1122
1123 #define PCRE2_GET_STARTCHAR(a,b) \
1124 if (test_mode == PCRE8_MODE) \
1125 a = pcre2_get_startchar_8(G(b,8)); \
1126 else if (test_mode == PCRE16_MODE) \
1127 a = pcre2_get_startchar_16(G(b,16)); \
1128 else \
1129 a = pcre2_get_startchar_32(G(b,32))
1130
1131 #define PCRE2_JIT_COMPILE(r,a,b) \
1132 if (test_mode == PCRE8_MODE) r = pcre2_jit_compile_8(G(a,8),b); \
1133 else if (test_mode == PCRE16_MODE) r = pcre2_jit_compile_16(G(a,16),b); \
1134 else r = pcre2_jit_compile_32(G(a,32),b)
1135
1136 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) \
1137 if (test_mode == PCRE8_MODE) pcre2_jit_free_unused_memory_8(G(a,8)); \
1138 else if (test_mode == PCRE16_MODE) pcre2_jit_free_unused_memory_16(G(a,16)); \
1139 else pcre2_jit_free_unused_memory_32(G(a,32))
1140
1141 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
1142 if (test_mode == PCRE8_MODE) \
1143 a = pcre2_jit_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h); \
1144 else if (test_mode == PCRE16_MODE) \
1145 a = pcre2_jit_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h); \
1146 else \
1147 a = pcre2_jit_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
1148
1149 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
1150 if (test_mode == PCRE8_MODE) \
1151 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_8(b,c,d); \
1152 else if (test_mode == PCRE16_MODE) \
1153 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_16(b,c,d); \
1154 else \
1155 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_32(b,c,d);
1156
1157 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
1158 if (test_mode == PCRE8_MODE) \
1159 pcre2_jit_stack_assign_8(G(a,8),(pcre2_jit_callback_8)b,c); \
1160 else if (test_mode == PCRE16_MODE) \
1161 pcre2_jit_stack_assign_16(G(a,16),(pcre2_jit_callback_16)b,c); \
1162 else \
1163 pcre2_jit_stack_assign_32(G(a,32),(pcre2_jit_callback_32)b,c);
1164
1165 #define PCRE2_JIT_STACK_FREE(a) \
1166 if (test_mode == PCRE8_MODE) \
1167 pcre2_jit_stack_free_8((pcre2_jit_stack_8 *)a); \
1168 else if (test_mode == PCRE16_MODE) \
1169 pcre2_jit_stack_free_16((pcre2_jit_stack_16 *)a); \
1170 else \
1171 pcre2_jit_stack_free_32((pcre2_jit_stack_32 *)a);
1172
1173 #define PCRE2_MAKETABLES(a) \
1174 if (test_mode == PCRE8_MODE) a = pcre2_maketables_8(NULL); \
1175 else if (test_mode == PCRE16_MODE) a = pcre2_maketables_16(NULL); \
1176 else a = pcre2_maketables_32(NULL)
1177
1178 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
1179 if (test_mode == PCRE8_MODE) \
1180 a = pcre2_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h); \
1181 else if (test_mode == PCRE16_MODE) \
1182 a = pcre2_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h); \
1183 else \
1184 a = pcre2_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
1185
1186 #define PCRE2_MATCH_DATA_CREATE(a,b,c) \
1187 if (test_mode == PCRE8_MODE) \
1188 G(a,8) = pcre2_match_data_create_8(b,c); \
1189 else if (test_mode == PCRE16_MODE) \
1190 G(a,16) = pcre2_match_data_create_16(b,c); \
1191 else \
1192 G(a,32) = pcre2_match_data_create_32(b,c)
1193
1194 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
1195 if (test_mode == PCRE8_MODE) \
1196 G(a,8) = pcre2_match_data_create_from_pattern_8(G(b,8),c); \
1197 else if (test_mode == PCRE16_MODE) \
1198 G(a,16) = pcre2_match_data_create_from_pattern_16(G(b,16),c); \
1199 else \
1200 G(a,32) = pcre2_match_data_create_from_pattern_32(G(b,32),c)
1201
1202 #define PCRE2_MATCH_DATA_FREE(a) \
1203 if (test_mode == PCRE8_MODE) \
1204 pcre2_match_data_free_8(G(a,8)); \
1205 else if (test_mode == PCRE16_MODE) \
1206 pcre2_match_data_free_16(G(a,16)); \
1207 else \
1208 pcre2_match_data_free_32(G(a,32))
1209
1210 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) \
1211 if (test_mode == PCRE8_MODE) \
1212 a = pcre2_pattern_convert_8(G(b,8),c,d,(PCRE2_UCHAR8 **)e,f,G(g,8)); \
1213 else if (test_mode == PCRE16_MODE) \
1214 a = pcre2_pattern_convert_16(G(b,16),c,d,(PCRE2_UCHAR16 **)e,f,G(g,16)); \
1215 else \
1216 a = pcre2_pattern_convert_32(G(b,32),c,d,(PCRE2_UCHAR32 **)e,f,G(g,32))
1217
1218 #define PCRE2_PATTERN_INFO(a,b,c,d) \
1219 if (test_mode == PCRE8_MODE) \
1220 a = pcre2_pattern_info_8(G(b,8),c,d); \
1221 else if (test_mode == PCRE16_MODE) \
1222 a = pcre2_pattern_info_16(G(b,16),c,d); \
1223 else \
1224 a = pcre2_pattern_info_32(G(b,32),c,d)
1225
1226 #define PCRE2_PRINTINT(a) \
1227 if (test_mode == PCRE8_MODE) \
1228 pcre2_printint_8(compiled_code8,outfile,a); \
1229 else if (test_mode == PCRE16_MODE) \
1230 pcre2_printint_16(compiled_code16,outfile,a); \
1231 else \
1232 pcre2_printint_32(compiled_code32,outfile,a)
1233
1234 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
1235 if (test_mode == PCRE8_MODE) \
1236 r = pcre2_serialize_decode_8((pcre2_code_8 **)a,b,c,G(d,8)); \
1237 else if (test_mode == PCRE16_MODE) \
1238 r = pcre2_serialize_decode_16((pcre2_code_16 **)a,b,c,G(d,16)); \
1239 else \
1240 r = pcre2_serialize_decode_32((pcre2_code_32 **)a,b,c,G(d,32))
1241
1242 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
1243 if (test_mode == PCRE8_MODE) \
1244 r = pcre2_serialize_encode_8((const pcre2_code_8 **)a,b,c,d,G(e,8)); \
1245 else if (test_mode == PCRE16_MODE) \
1246 r = pcre2_serialize_encode_16((const pcre2_code_16 **)a,b,c,d,G(e,16)); \
1247 else \
1248 r = pcre2_serialize_encode_32((const pcre2_code_32 **)a,b,c,d,G(e,32))
1249
1250 #define PCRE2_SERIALIZE_FREE(a) \
1251 if (test_mode == PCRE8_MODE) \
1252 pcre2_serialize_free_8(a); \
1253 else if (test_mode == PCRE16_MODE) \
1254 pcre2_serialize_free_16(a); \
1255 else \
1256 pcre2_serialize_free_32(a)
1257
1258 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
1259 if (test_mode == PCRE8_MODE) \
1260 r = pcre2_serialize_get_number_of_codes_8(a); \
1261 else if (test_mode == PCRE16_MODE) \
1262 r = pcre2_serialize_get_number_of_codes_16(a); \
1263 else \
1264 r = pcre2_serialize_get_number_of_codes_32(a); \
1265
1266 #define PCRE2_SET_CALLOUT(a,b,c) \
1267 if (test_mode == PCRE8_MODE) \
1268 pcre2_set_callout_8(G(a,8),(int (*)(pcre2_callout_block_8 *, void *))b,c); \
1269 else if (test_mode == PCRE16_MODE) \
1270 pcre2_set_callout_16(G(a,16),(int (*)(pcre2_callout_block_16 *, void *))b,c); \
1271 else \
1272 pcre2_set_callout_32(G(a,32),(int (*)(pcre2_callout_block_32 *, void *))b,c);
1273
1274 #define PCRE2_SET_CHARACTER_TABLES(a,b) \
1275 if (test_mode == PCRE8_MODE) \
1276 pcre2_set_character_tables_8(G(a,8),b); \
1277 else if (test_mode == PCRE16_MODE) \
1278 pcre2_set_character_tables_16(G(a,16),b); \
1279 else \
1280 pcre2_set_character_tables_32(G(a,32),b)
1281
1282 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
1283 if (test_mode == PCRE8_MODE) \
1284 pcre2_set_compile_recursion_guard_8(G(a,8),b,c); \
1285 else if (test_mode == PCRE16_MODE) \
1286 pcre2_set_compile_recursion_guard_16(G(a,16),b,c); \
1287 else \
1288 pcre2_set_compile_recursion_guard_32(G(a,32),b,c)
1289
1290 #define PCRE2_SET_DEPTH_LIMIT(a,b) \
1291 if (test_mode == PCRE8_MODE) \
1292 pcre2_set_depth_limit_8(G(a,8),b); \
1293 else if (test_mode == PCRE16_MODE) \
1294 pcre2_set_depth_limit_16(G(a,16),b); \
1295 else \
1296 pcre2_set_depth_limit_32(G(a,32),b)
1297
1298 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) \
1299 if (test_mode == PCRE8_MODE) \
1300 r = pcre2_set_glob_separator_8(G(a,8),b); \
1301 else if (test_mode == PCRE16_MODE) \
1302 r = pcre2_set_glob_separator_16(G(a,16),b); \
1303 else \
1304 r = pcre2_set_glob_separator_32(G(a,32),b)
1305
1306 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) \
1307 if (test_mode == PCRE8_MODE) \
1308 r = pcre2_set_glob_escape_8(G(a,8),b); \
1309 else if (test_mode == PCRE16_MODE) \
1310 r = pcre2_set_glob_escape_16(G(a,16),b); \
1311 else \
1312 r = pcre2_set_glob_escape_32(G(a,32),b)
1313
1314 #define PCRE2_SET_HEAP_LIMIT(a,b) \
1315 if (test_mode == PCRE8_MODE) \
1316 pcre2_set_heap_limit_8(G(a,8),b); \
1317 else if (test_mode == PCRE16_MODE) \
1318 pcre2_set_heap_limit_16(G(a,16),b); \
1319 else \
1320 pcre2_set_heap_limit_32(G(a,32),b)
1321
1322 #define PCRE2_SET_MATCH_LIMIT(a,b) \
1323 if (test_mode == PCRE8_MODE) \
1324 pcre2_set_match_limit_8(G(a,8),b); \
1325 else if (test_mode == PCRE16_MODE) \
1326 pcre2_set_match_limit_16(G(a,16),b); \
1327 else \
1328 pcre2_set_match_limit_32(G(a,32),b)
1329
1330 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) \
1331 if (test_mode == PCRE8_MODE) \
1332 pcre2_set_max_pattern_length_8(G(a,8),b); \
1333 else if (test_mode == PCRE16_MODE) \
1334 pcre2_set_max_pattern_length_16(G(a,16),b); \
1335 else \
1336 pcre2_set_max_pattern_length_32(G(a,32),b)
1337
1338 #define PCRE2_SET_OFFSET_LIMIT(a,b) \
1339 if (test_mode == PCRE8_MODE) \
1340 pcre2_set_offset_limit_8(G(a,8),b); \
1341 else if (test_mode == PCRE16_MODE) \
1342 pcre2_set_offset_limit_16(G(a,16),b); \
1343 else \
1344 pcre2_set_offset_limit_32(G(a,32),b)
1345
1346 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) \
1347 if (test_mode == PCRE8_MODE) \
1348 pcre2_set_parens_nest_limit_8(G(a,8),b); \
1349 else if (test_mode == PCRE16_MODE) \
1350 pcre2_set_parens_nest_limit_16(G(a,16),b); \
1351 else \
1352 pcre2_set_parens_nest_limit_32(G(a,32),b)
1353
1354 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
1355 if (test_mode == PCRE8_MODE) \
1356 a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),G(h,8), \
1357 (PCRE2_SPTR8)i,j,(PCRE2_UCHAR8 *)k,l); \
1358 else if (test_mode == PCRE16_MODE) \
1359 a = pcre2_substitute_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),G(h,16), \
1360 (PCRE2_SPTR16)i,j,(PCRE2_UCHAR16 *)k,l); \
1361 else \
1362 a = pcre2_substitute_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),G(h,32), \
1363 (PCRE2_SPTR32)i,j,(PCRE2_UCHAR32 *)k,l)
1364
1365 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
1366 if (test_mode == PCRE8_MODE) \
1367 a = pcre2_substring_copy_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 *)d,e); \
1368 else if (test_mode == PCRE16_MODE) \
1369 a = pcre2_substring_copy_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 *)d,e); \
1370 else \
1371 a = pcre2_substring_copy_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 *)d,e)
1372
1373 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
1374 if (test_mode == PCRE8_MODE) \
1375 a = pcre2_substring_copy_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 *)d,e); \
1376 else if (test_mode == PCRE16_MODE) \
1377 a = pcre2_substring_copy_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 *)d,e); \
1378 else \
1379 a = pcre2_substring_copy_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 *)d,e)
1380
1381 #define PCRE2_SUBSTRING_FREE(a) \
1382 if (test_mode == PCRE8_MODE) pcre2_substring_free_8((PCRE2_UCHAR8 *)a); \
1383 else if (test_mode == PCRE16_MODE) \
1384 pcre2_substring_free_16((PCRE2_UCHAR16 *)a); \
1385 else pcre2_substring_free_32((PCRE2_UCHAR32 *)a)
1386
1387 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
1388 if (test_mode == PCRE8_MODE) \
1389 a = pcre2_substring_get_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 **)d,e); \
1390 else if (test_mode == PCRE16_MODE) \
1391 a = pcre2_substring_get_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 **)d,e); \
1392 else \
1393 a = pcre2_substring_get_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 **)d,e)
1394
1395 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
1396 if (test_mode == PCRE8_MODE) \
1397 a = pcre2_substring_get_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 **)d,e); \
1398 else if (test_mode == PCRE16_MODE) \
1399 a = pcre2_substring_get_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 **)d,e); \
1400 else \
1401 a = pcre2_substring_get_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 **)d,e)
1402
1403 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
1404 if (test_mode == PCRE8_MODE) \
1405 a = pcre2_substring_length_byname_8(G(b,8),G(c,8),d); \
1406 else if (test_mode == PCRE16_MODE) \
1407 a = pcre2_substring_length_byname_16(G(b,16),G(c,16),d); \
1408 else \
1409 a = pcre2_substring_length_byname_32(G(b,32),G(c,32),d)
1410
1411 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
1412 if (test_mode == PCRE8_MODE) \
1413 a = pcre2_substring_length_bynumber_8(G(b,8),c,d); \
1414 else if (test_mode == PCRE16_MODE) \
1415 a = pcre2_substring_length_bynumber_16(G(b,16),c,d); \
1416 else \
1417 a = pcre2_substring_length_bynumber_32(G(b,32),c,d)
1418
1419 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
1420 if (test_mode == PCRE8_MODE) \
1421 a = pcre2_substring_list_get_8(G(b,8),(PCRE2_UCHAR8 ***)c,d); \
1422 else if (test_mode == PCRE16_MODE) \
1423 a = pcre2_substring_list_get_16(G(b,16),(PCRE2_UCHAR16 ***)c,d); \
1424 else \
1425 a = pcre2_substring_list_get_32(G(b,32),(PCRE2_UCHAR32 ***)c,d)
1426
1427 #define PCRE2_SUBSTRING_LIST_FREE(a) \
1428 if (test_mode == PCRE8_MODE) \
1429 pcre2_substring_list_free_8((PCRE2_SPTR8 *)a); \
1430 else if (test_mode == PCRE16_MODE) \
1431 pcre2_substring_list_free_16((PCRE2_SPTR16 *)a); \
1432 else \
1433 pcre2_substring_list_free_32((PCRE2_SPTR32 *)a)
1434
1435 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
1436 if (test_mode == PCRE8_MODE) \
1437 a = pcre2_substring_number_from_name_8(G(b,8),G(c,8)); \
1438 else if (test_mode == PCRE16_MODE) \
1439 a = pcre2_substring_number_from_name_16(G(b,16),G(c,16)); \
1440 else \
1441 a = pcre2_substring_number_from_name_32(G(b,32),G(c,32))
1442
1443 #define PTR(x) ( \
1444 (test_mode == PCRE8_MODE)? (void *)G(x,8) : \
1445 (test_mode == PCRE16_MODE)? (void *)G(x,16) : \
1446 (void *)G(x,32))
1447
1448 #define SETFLD(x,y,z) \
1449 if (test_mode == PCRE8_MODE) G(x,8)->y = z; \
1450 else if (test_mode == PCRE16_MODE) G(x,16)->y = z; \
1451 else G(x,32)->y = z
1452
1453 #define SETFLDVEC(x,y,v,z) \
1454 if (test_mode == PCRE8_MODE) G(x,8)->y[v] = z; \
1455 else if (test_mode == PCRE16_MODE) G(x,16)->y[v] = z; \
1456 else G(x,32)->y[v] = z
1457
1458 #define SETOP(x,y,z) \
1459 if (test_mode == PCRE8_MODE) G(x,8) z y; \
1460 else if (test_mode == PCRE16_MODE) G(x,16) z y; \
1461 else G(x,32) z y
1462
1463 #define SETCASTPTR(x,y) \
1464 if (test_mode == PCRE8_MODE) \
1465 G(x,8) = (uint8_t *)(y); \
1466 else if (test_mode == PCRE16_MODE) \
1467 G(x,16) = (uint16_t *)(y); \
1468 else \
1469 G(x,32) = (uint32_t *)(y)
1470
1471 #define STRLEN(p) ((test_mode == PCRE8_MODE)? ((int)strlen((char *)p)) : \
1472 (test_mode == PCRE16_MODE)? ((int)strlen16((PCRE2_SPTR16)p)) : \
1473 ((int)strlen32((PCRE2_SPTR32)p)))
1474
1475 #define SUB1(a,b) \
1476 if (test_mode == PCRE8_MODE) G(a,8)(G(b,8)); \
1477 else if (test_mode == PCRE16_MODE) G(a,16)(G(b,16)); \
1478 else G(a,32)(G(b,32))
1479
1480 #define SUB2(a,b,c) \
1481 if (test_mode == PCRE8_MODE) G(a,8)(G(b,8),G(c,8)); \
1482 else if (test_mode == PCRE16_MODE) G(a,16)(G(b,16),G(c,16)); \
1483 else G(a,32)(G(b,32),G(c,32))
1484
1485 #define TEST(x,r,y) ( \
1486 (test_mode == PCRE8_MODE && G(x,8) r (y)) || \
1487 (test_mode == PCRE16_MODE && G(x,16) r (y)) || \
1488 (test_mode == PCRE32_MODE && G(x,32) r (y)))
1489
1490 #define TESTFLD(x,f,r,y) ( \
1491 (test_mode == PCRE8_MODE && G(x,8)->f r (y)) || \
1492 (test_mode == PCRE16_MODE && G(x,16)->f r (y)) || \
1493 (test_mode == PCRE32_MODE && G(x,32)->f r (y)))
1494
1495
1496 /* ----- Two out of three modes are supported ----- */
1497
1498 #else
1499
1500 /* We can use some macro trickery to make a single set of definitions work in
1501 the three different cases. */
1502
1503 /* ----- 32-bit and 16-bit but not 8-bit supported ----- */
1504
1505 #if defined(SUPPORT_PCRE2_32) && defined(SUPPORT_PCRE2_16)
1506 #define BITONE 32
1507 #define BITTWO 16
1508
1509 /* ----- 32-bit and 8-bit but not 16-bit supported ----- */
1510
1511 #elif defined(SUPPORT_PCRE2_32) && defined(SUPPORT_PCRE2_8)
1512 #define BITONE 32
1513 #define BITTWO 8
1514
1515 /* ----- 16-bit and 8-bit but not 32-bit supported ----- */
1516
1517 #else
1518 #define BITONE 16
1519 #define BITTWO 8
1520 #endif
1521
1522
1523 /* ----- Common macros for two-mode cases ----- */
1524
1525 #define BYTEONE (BITONE/8)
1526 #define BYTETWO (BITTWO/8)
1527
1528 #define CASTFLD(t,a,b) \
1529 ((test_mode == G(G(PCRE,BITONE),_MODE))? (t)(G(a,BITONE)->b) : \
1530 (t)(G(a,BITTWO)->b))
1531
1532 #define CASTVAR(t,x) ( \
1533 (test_mode == G(G(PCRE,BITONE),_MODE))? \
1534 (t)G(x,BITONE) : (t)G(x,BITTWO))
1535
1536 #define CODE_UNIT(a,b) ( \
1537 (test_mode == G(G(PCRE,BITONE),_MODE))? \
1538 (uint32_t)(((G(PCRE2_SPTR,BITONE))(a))[b]) : \
1539 (uint32_t)(((G(PCRE2_SPTR,BITTWO))(a))[b]))
1540
1541 #define CONCTXCPY(a,b) \
1542 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1543 memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_convert_context_,BITONE))); \
1544 else \
1545 memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_convert_context_,BITTWO)))
1546
1547 #define CONVERT_COPY(a,b,c) \
1548 (test_mode == G(G(PCRE,BITONE),_MODE))? \
1549 memcpy(G(a,BITONE),(char *)b,(c)*BYTEONE) : \
1550 memcpy(G(a,BITTWO),(char *)b,(c)*BYTETWO)
1551
1552 #define DATCTXCPY(a,b) \
1553 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1554 memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_match_context_,BITONE))); \
1555 else \
1556 memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_match_context_,BITTWO)))
1557
1558 #define FLD(a,b) \
1559 ((test_mode == G(G(PCRE,BITONE),_MODE))? G(a,BITONE)->b : G(a,BITTWO)->b)
1560
1561 #define PATCTXCPY(a,b) \
1562 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1563 memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_compile_context_,BITONE))); \
1564 else \
1565 memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_compile_context_,BITTWO)))
1566
1567 #define PCHARS(lv, p, offset, len, utf, f) \
1568 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1569 lv = G(pchars,BITONE)((G(PCRE2_SPTR,BITONE))(p)+offset, len, utf, f); \
1570 else \
1571 lv = G(pchars,BITTWO)((G(PCRE2_SPTR,BITTWO))(p)+offset, len, utf, f)
1572
1573 #define PCHARSV(p, offset, len, utf, f) \
1574 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1575 (void)G(pchars,BITONE)((G(PCRE2_SPTR,BITONE))(p)+offset, len, utf, f); \
1576 else \
1577 (void)G(pchars,BITTWO)((G(PCRE2_SPTR,BITTWO))(p)+offset, len, utf, f)
1578
1579 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
1580 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1581 a = G(pcre2_callout_enumerate,BITONE)(G(compiled_code,BITONE), \
1582 (int (*)(struct G(pcre2_callout_enumerate_block_,BITONE) *, void *))b,c); \
1583 else \
1584 a = G(pcre2_callout_enumerate,BITTWO)(G(compiled_code,BITTWO), \
1585 (int (*)(struct G(pcre2_callout_enumerate_block_,BITTWO) *, void *))b,c)
1586
1587 #define PCRE2_CODE_COPY_FROM_VOID(a,b) \
1588 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1589 G(a,BITONE) = G(pcre2_code_copy_,BITONE)(b); \
1590 else \
1591 G(a,BITTWO) = G(pcre2_code_copy_,BITTWO)(b)
1592
1593 #define PCRE2_CODE_COPY_TO_VOID(a,b) \
1594 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1595 a = (void *)G(pcre2_code_copy_,BITONE)(G(b,BITONE)); \
1596 else \
1597 a = (void *)G(pcre2_code_copy_,BITTWO)(G(b,BITTWO))
1598
1599 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) \
1600 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1601 a = (void *)G(pcre2_code_copy_with_tables_,BITONE)(G(b,BITONE)); \
1602 else \
1603 a = (void *)G(pcre2_code_copy_with_tables_,BITTWO)(G(b,BITTWO))
1604
1605 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
1606 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1607 G(a,BITONE) = G(pcre2_compile_,BITONE)(G(b,BITONE),c,d,e,f,g); \
1608 else \
1609 G(a,BITTWO) = G(pcre2_compile_,BITTWO)(G(b,BITTWO),c,d,e,f,g)
1610
1611 #define PCRE2_CONVERTED_PATTERN_FREE(a) \
1612 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1613 G(pcre2_converted_pattern_free_,BITONE)((G(PCRE2_UCHAR,BITONE) *)a); \
1614 else \
1615 G(pcre2_converted_pattern_free_,BITTWO)((G(PCRE2_UCHAR,BITTWO) *)a)
1616
1617 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
1618 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1619 a = G(pcre2_dfa_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1620 G(g,BITONE),h,i,j); \
1621 else \
1622 a = G(pcre2_dfa_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1623 G(g,BITTWO),h,i,j)
1624
1625 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
1626 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1627 r = G(pcre2_get_error_message_,BITONE)(a,G(b,BITONE),G(G(b,BITONE),_size/BYTEONE)); \
1628 else \
1629 r = G(pcre2_get_error_message_,BITTWO)(a,G(b,BITTWO),G(G(b,BITTWO),_size/BYTETWO))
1630
1631 #define PCRE2_GET_OVECTOR_COUNT(a,b) \
1632 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1633 a = G(pcre2_get_ovector_count_,BITONE)(G(b,BITONE)); \
1634 else \
1635 a = G(pcre2_get_ovector_count_,BITTWO)(G(b,BITTWO))
1636
1637 #define PCRE2_GET_STARTCHAR(a,b) \
1638 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1639 a = G(pcre2_get_startchar_,BITONE)(G(b,BITONE)); \
1640 else \
1641 a = G(pcre2_get_startchar_,BITTWO)(G(b,BITTWO))
1642
1643 #define PCRE2_JIT_COMPILE(r,a,b) \
1644 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1645 r = G(pcre2_jit_compile_,BITONE)(G(a,BITONE),b); \
1646 else \
1647 r = G(pcre2_jit_compile_,BITTWO)(G(a,BITTWO),b)
1648
1649 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) \
1650 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1651 G(pcre2_jit_free_unused_memory_,BITONE)(G(a,BITONE)); \
1652 else \
1653 G(pcre2_jit_free_unused_memory_,BITTWO)(G(a,BITTWO))
1654
1655 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
1656 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1657 a = G(pcre2_jit_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1658 G(g,BITONE),h); \
1659 else \
1660 a = G(pcre2_jit_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1661 G(g,BITTWO),h)
1662
1663 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
1664 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1665 a = (PCRE2_JIT_STACK *)G(pcre2_jit_stack_create_,BITONE)(b,c,d); \
1666 else \
1667 a = (PCRE2_JIT_STACK *)G(pcre2_jit_stack_create_,BITTWO)(b,c,d); \
1668
1669 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
1670 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1671 G(pcre2_jit_stack_assign_,BITONE)(G(a,BITONE),(G(pcre2_jit_callback_,BITONE))b,c); \
1672 else \
1673 G(pcre2_jit_stack_assign_,BITTWO)(G(a,BITTWO),(G(pcre2_jit_callback_,BITTWO))b,c);
1674
1675 #define PCRE2_JIT_STACK_FREE(a) \
1676 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1677 G(pcre2_jit_stack_free_,BITONE)((G(pcre2_jit_stack_,BITONE) *)a); \
1678 else \
1679 G(pcre2_jit_stack_free_,BITTWO)((G(pcre2_jit_stack_,BITTWO) *)a);
1680
1681 #define PCRE2_MAKETABLES(a) \
1682 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1683 a = G(pcre2_maketables_,BITONE)(NULL); \
1684 else \
1685 a = G(pcre2_maketables_,BITTWO)(NULL)
1686
1687 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
1688 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1689 a = G(pcre2_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1690 G(g,BITONE),h); \
1691 else \
1692 a = G(pcre2_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1693 G(g,BITTWO),h)
1694
1695 #define PCRE2_MATCH_DATA_CREATE(a,b,c) \
1696 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1697 G(a,BITONE) = G(pcre2_match_data_create_,BITONE)(b,c); \
1698 else \
1699 G(a,BITTWO) = G(pcre2_match_data_create_,BITTWO)(b,c)
1700
1701 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
1702 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1703 G(a,BITONE) = G(pcre2_match_data_create_from_pattern_,BITONE)(G(b,BITONE),c); \
1704 else \
1705 G(a,BITTWO) = G(pcre2_match_data_create_from_pattern_,BITTWO)(G(b,BITTWO),c)
1706
1707 #define PCRE2_MATCH_DATA_FREE(a) \
1708 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1709 G(pcre2_match_data_free_,BITONE)(G(a,BITONE)); \
1710 else \
1711 G(pcre2_match_data_free_,BITTWO)(G(a,BITTWO))
1712
1713 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) \
1714 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1715 a = G(pcre2_pattern_convert_,BITONE)(G(b,BITONE),c,d,(G(PCRE2_UCHAR,BITONE) **)e,f,G(g,BITONE)); \
1716 else \
1717 a = G(pcre2_pattern_convert_,BITTWO)(G(b,BITTWO),c,d,(G(PCRE2_UCHAR,BITTWO) **)e,f,G(g,BITTWO))
1718
1719 #define PCRE2_PATTERN_INFO(a,b,c,d) \
1720 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1721 a = G(pcre2_pattern_info_,BITONE)(G(b,BITONE),c,d); \
1722 else \
1723 a = G(pcre2_pattern_info_,BITTWO)(G(b,BITTWO),c,d)
1724
1725 #define PCRE2_PRINTINT(a) \
1726 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1727 G(pcre2_printint_,BITONE)(G(compiled_code,BITONE),outfile,a); \
1728 else \
1729 G(pcre2_printint_,BITTWO)(G(compiled_code,BITTWO),outfile,a)
1730
1731 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
1732 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1733 r = G(pcre2_serialize_decode_,BITONE)((G(pcre2_code_,BITONE) **)a,b,c,G(d,BITONE)); \
1734 else \
1735 r = G(pcre2_serialize_decode_,BITTWO)((G(pcre2_code_,BITTWO) **)a,b,c,G(d,BITTWO))
1736
1737 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
1738 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1739 r = G(pcre2_serialize_encode_,BITONE)((G(const pcre2_code_,BITONE) **)a,b,c,d,G(e,BITONE)); \
1740 else \
1741 r = G(pcre2_serialize_encode_,BITTWO)((G(const pcre2_code_,BITTWO) **)a,b,c,d,G(e,BITTWO))
1742
1743 #define PCRE2_SERIALIZE_FREE(a) \
1744 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1745 G(pcre2_serialize_free_,BITONE)(a); \
1746 else \
1747 G(pcre2_serialize_free_,BITTWO)(a)
1748
1749 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
1750 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1751 r = G(pcre2_serialize_get_number_of_codes_,BITONE)(a); \
1752 else \
1753 r = G(pcre2_serialize_get_number_of_codes_,BITTWO)(a)
1754
1755 #define PCRE2_SET_CALLOUT(a,b,c) \
1756 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1757 G(pcre2_set_callout_,BITONE)(G(a,BITONE), \
1758 (int (*)(G(pcre2_callout_block_,BITONE) *, void *))b,c); \
1759 else \
1760 G(pcre2_set_callout_,BITTWO)(G(a,BITTWO), \
1761 (int (*)(G(pcre2_callout_block_,BITTWO) *, void *))b,c);
1762
1763 #define PCRE2_SET_CHARACTER_TABLES(a,b) \
1764 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1765 G(pcre2_set_character_tables_,BITONE)(G(a,BITONE),b); \
1766 else \
1767 G(pcre2_set_character_tables_,BITTWO)(G(a,BITTWO),b)
1768
1769 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
1770 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1771 G(pcre2_set_compile_recursion_guard_,BITONE)(G(a,BITONE),b,c); \
1772 else \
1773 G(pcre2_set_compile_recursion_guard_,BITTWO)(G(a,BITTWO),b,c)
1774
1775 #define PCRE2_SET_DEPTH_LIMIT(a,b) \
1776 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1777 G(pcre2_set_depth_limit_,BITONE)(G(a,BITONE),b); \
1778 else \
1779 G(pcre2_set_depth_limit_,BITTWO)(G(a,BITTWO),b)
1780
1781 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) \
1782 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1783 r = G(pcre2_set_glob_escape_,BITONE)(G(a,BITONE),b); \
1784 else \
1785 r = G(pcre2_set_glob_escape_,BITTWO)(G(a,BITTWO),b)
1786
1787 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) \
1788 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1789 r = G(pcre2_set_glob_separator_,BITONE)(G(a,BITONE),b); \
1790 else \
1791 r = G(pcre2_set_glob_separator_,BITTWO)(G(a,BITTWO),b)
1792
1793 #define PCRE2_SET_HEAP_LIMIT(a,b) \
1794 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1795 G(pcre2_set_heap_limit_,BITONE)(G(a,BITONE),b); \
1796 else \
1797 G(pcre2_set_heap_limit_,BITTWO)(G(a,BITTWO),b)
1798
1799 #define PCRE2_SET_MATCH_LIMIT(a,b) \
1800 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1801 G(pcre2_set_match_limit_,BITONE)(G(a,BITONE),b); \
1802 else \
1803 G(pcre2_set_match_limit_,BITTWO)(G(a,BITTWO),b)
1804
1805 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) \
1806 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1807 G(pcre2_set_max_pattern_length_,BITONE)(G(a,BITONE),b); \
1808 else \
1809 G(pcre2_set_max_pattern_length_,BITTWO)(G(a,BITTWO),b)
1810
1811 #define PCRE2_SET_OFFSET_LIMIT(a,b) \
1812 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1813 G(pcre2_set_offset_limit_,BITONE)(G(a,BITONE),b); \
1814 else \
1815 G(pcre2_set_offset_limit_,BITTWO)(G(a,BITTWO),b)
1816
1817 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) \
1818 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1819 G(pcre2_set_parens_nest_limit_,BITONE)(G(a,BITONE),b); \
1820 else \
1821 G(pcre2_set_parens_nest_limit_,BITTWO)(G(a,BITTWO),b)
1822
1823 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
1824 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1825 a = G(pcre2_substitute_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1826 G(g,BITONE),G(h,BITONE),(G(PCRE2_SPTR,BITONE))i,j, \
1827 (G(PCRE2_UCHAR,BITONE) *)k,l); \
1828 else \
1829 a = G(pcre2_substitute_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1830 G(g,BITTWO),G(h,BITTWO),(G(PCRE2_SPTR,BITTWO))i,j, \
1831 (G(PCRE2_UCHAR,BITTWO) *)k,l)
1832
1833 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
1834 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1835 a = G(pcre2_substring_copy_byname_,BITONE)(G(b,BITONE),G(c,BITONE),\
1836 (G(PCRE2_UCHAR,BITONE) *)d,e); \
1837 else \
1838 a = G(pcre2_substring_copy_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),\
1839 (G(PCRE2_UCHAR,BITTWO) *)d,e)
1840
1841 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
1842 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1843 a = G(pcre2_substring_copy_bynumber_,BITONE)(G(b,BITONE),c,\
1844 (G(PCRE2_UCHAR,BITONE) *)d,e); \
1845 else \
1846 a = G(pcre2_substring_copy_bynumber_,BITTWO)(G(b,BITTWO),c,\
1847 (G(PCRE2_UCHAR,BITTWO) *)d,e)
1848
1849 #define PCRE2_SUBSTRING_FREE(a) \
1850 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1851 G(pcre2_substring_free_,BITONE)((G(PCRE2_UCHAR,BITONE) *)a); \
1852 else G(pcre2_substring_free_,BITTWO)((G(PCRE2_UCHAR,BITTWO) *)a)
1853
1854 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
1855 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1856 a = G(pcre2_substring_get_byname_,BITONE)(G(b,BITONE),G(c,BITONE),\
1857 (G(PCRE2_UCHAR,BITONE) **)d,e); \
1858 else \
1859 a = G(pcre2_substring_get_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),\
1860 (G(PCRE2_UCHAR,BITTWO) **)d,e)
1861
1862 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
1863 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1864 a = G(pcre2_substring_get_bynumber_,BITONE)(G(b,BITONE),c,\
1865 (G(PCRE2_UCHAR,BITONE) **)d,e); \
1866 else \
1867 a = G(pcre2_substring_get_bynumber_,BITTWO)(G(b,BITTWO),c,\
1868 (G(PCRE2_UCHAR,BITTWO) **)d,e)
1869
1870 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
1871 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1872 a = G(pcre2_substring_length_byname_,BITONE)(G(b,BITONE),G(c,BITONE),d); \
1873 else \
1874 a = G(pcre2_substring_length_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),d)
1875
1876 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
1877 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1878 a = G(pcre2_substring_length_bynumber_,BITONE)(G(b,BITONE),c,d); \
1879 else \
1880 a = G(pcre2_substring_length_bynumber_,BITTWO)(G(b,BITTWO),c,d)
1881
1882 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
1883 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1884 a = G(pcre2_substring_list_get_,BITONE)(G(b,BITONE), \
1885 (G(PCRE2_UCHAR,BITONE) ***)c,d); \
1886 else \
1887 a = G(pcre2_substring_list_get_,BITTWO)(G(b,BITTWO), \
1888 (G(PCRE2_UCHAR,BITTWO) ***)c,d)
1889
1890 #define PCRE2_SUBSTRING_LIST_FREE(a) \
1891 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1892 G(pcre2_substring_list_free_,BITONE)((G(PCRE2_SPTR,BITONE) *)a); \
1893 else \
1894 G(pcre2_substring_list_free_,BITTWO)((G(PCRE2_SPTR,BITTWO) *)a)
1895
1896 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
1897 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1898 a = G(pcre2_substring_number_from_name_,BITONE)(G(b,BITONE),G(c,BITONE)); \
1899 else \
1900 a = G(pcre2_substring_number_from_name_,BITTWO)(G(b,BITTWO),G(c,BITTWO))
1901
1902 #define PTR(x) ( \
1903 (test_mode == G(G(PCRE,BITONE),_MODE))? (void *)G(x,BITONE) : \
1904 (void *)G(x,BITTWO))
1905
1906 #define SETFLD(x,y,z) \
1907 if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE)->y = z; \
1908 else G(x,BITTWO)->y = z
1909
1910 #define SETFLDVEC(x,y,v,z) \
1911 if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE)->y[v] = z; \
1912 else G(x,BITTWO)->y[v] = z
1913
1914 #define SETOP(x,y,z) \
1915 if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE) z y; \
1916 else G(x,BITTWO) z y
1917
1918 #define SETCASTPTR(x,y) \
1919 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1920 G(x,BITONE) = (G(G(uint,BITONE),_t) *)(y); \
1921 else \
1922 G(x,BITTWO) = (G(G(uint,BITTWO),_t) *)(y)
1923
1924 #define STRLEN(p) ((test_mode == G(G(PCRE,BITONE),_MODE))? \
1925 G(strlen,BITONE)((G(PCRE2_SPTR,BITONE))p) : \
1926 G(strlen,BITTWO)((G(PCRE2_SPTR,BITTWO))p))
1927
1928 #define SUB1(a,b) \
1929 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1930 G(a,BITONE)(G(b,BITONE)); \
1931 else \
1932 G(a,BITTWO)(G(b,BITTWO))
1933
1934 #define SUB2(a,b,c) \
1935 if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1936 G(a,BITONE))(G(b,BITONE),G(c,BITONE)); \
1937 else \
1938 G(a,BITTWO))(G(b,BITTWO),G(c,BITTWO))
1939
1940 #define TEST(x,r,y) ( \
1941 (test_mode == G(G(PCRE,BITONE),_MODE) && G(x,BITONE) r (y)) || \
1942 (test_mode == G(G(PCRE,BITTWO),_MODE) && G(x,BITTWO) r (y)))
1943
1944 #define TESTFLD(x,f,r,y) ( \
1945 (test_mode == G(G(PCRE,BITONE),_MODE) && G(x,BITONE)->f r (y)) || \
1946 (test_mode == G(G(PCRE,BITTWO),_MODE) && G(x,BITTWO)->f r (y)))
1947
1948
1949 #endif /* Two out of three modes */
1950
1951 /* ----- End of cases where more than one mode is supported ----- */
1952
1953
1954 /* ----- Only 8-bit mode is supported ----- */
1955
1956 #elif defined SUPPORT_PCRE2_8
1957 #define CASTFLD(t,a,b) (t)(G(a,8)->b)
1958 #define CASTVAR(t,x) (t)G(x,8)
1959 #define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR8)(a))[b])
1960 #define CONCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_convert_context_8))
1961 #define CONVERT_COPY(a,b,c) memcpy(G(a,8),(char *)b, c)
1962 #define DATCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_match_context_8))
1963 #define FLD(a,b) G(a,8)->b
1964 #define PATCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_compile_context_8))
1965 #define PCHARS(lv, p, offset, len, utf, f) \
1966 lv = pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
1967 #define PCHARSV(p, offset, len, utf, f) \
1968 (void)pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
1969 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
1970 a = pcre2_callout_enumerate_8(compiled_code8, \
1971 (int (*)(struct pcre2_callout_enumerate_block_8 *, void *))b,c)
1972 #define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,8) = pcre2_code_copy_8(b)
1973 #define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_8(G(b,8))
1974 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_8(G(b,8))
1975 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
1976 G(a,8) = pcre2_compile_8(G(b,8),c,d,e,f,g)
1977 #define PCRE2_CONVERTED_PATTERN_FREE(a) \
1978 pcre2_converted_pattern_free_8((PCRE2_UCHAR8 *)a)
1979 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
1980 a = pcre2_dfa_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h,i,j)
1981 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
1982 r = pcre2_get_error_message_8(a,G(b,8),G(G(b,8),_size))
1983 #define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_8(G(b,8))
1984 #define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_8(G(b,8))
1985 #define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_8(G(a,8),b)
1986 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_8(G(a,8))
1987 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
1988 a = pcre2_jit_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h)
1989 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
1990 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_8(b,c,d);
1991 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
1992 pcre2_jit_stack_assign_8(G(a,8),(pcre2_jit_callback_8)b,c);
1993 #define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_8((pcre2_jit_stack_8 *)a);
1994 #define PCRE2_MAKETABLES(a) a = pcre2_maketables_8(NULL)
1995 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
1996 a = pcre2_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h)
1997 #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,8) = pcre2_match_data_create_8(b,c)
1998 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
1999 G(a,8) = pcre2_match_data_create_from_pattern_8(G(b,8),c)
2000 #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_8(G(a,8))
2001 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) a = pcre2_pattern_convert_8(G(b,8),c,d,(PCRE2_UCHAR8 **)e,f,G(g,8))
2002 #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_8(G(b,8),c,d)
2003 #define PCRE2_PRINTINT(a) pcre2_printint_8(compiled_code8,outfile,a)
2004 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
2005 r = pcre2_serialize_decode_8((pcre2_code_8 **)a,b,c,G(d,8))
2006 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
2007 r = pcre2_serialize_encode_8((const pcre2_code_8 **)a,b,c,d,G(e,8))
2008 #define PCRE2_SERIALIZE_FREE(a) pcre2_serialize_free_8(a)
2009 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
2010 r = pcre2_serialize_get_number_of_codes_8(a)
2011 #define PCRE2_SET_CALLOUT(a,b,c) \
2012 pcre2_set_callout_8(G(a,8),(int (*)(pcre2_callout_block_8 *, void *))b,c)
2013 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_8(G(a,8),b)
2014 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
2015 pcre2_set_compile_recursion_guard_8(G(a,8),b,c)
2016 #define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_8(G(a,8),b)
2017 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) r = pcre2_set_glob_escape_8(G(a,8),b)
2018 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_8(G(a,8),b)
2019 #define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_8(G(a,8),b)
2020 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_8(G(a,8),b)
2021 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_8(G(a,8),b)
2022 #define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_8(G(a,8),b)
2023 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_8(G(a,8),b)
2024 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
2025 a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),G(h,8), \
2026 (PCRE2_SPTR8)i,j,(PCRE2_UCHAR8 *)k,l)
2027 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
2028 a = pcre2_substring_copy_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 *)d,e)
2029 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
2030 a = pcre2_substring_copy_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 *)d,e)
2031 #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_8((PCRE2_UCHAR8 *)a)
2032 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
2033 a = pcre2_substring_get_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 **)d,e)
2034 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
2035 a = pcre2_substring_get_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 **)d,e)
2036 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
2037 a = pcre2_substring_length_byname_8(G(b,8),G(c,8),d)
2038 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
2039 a = pcre2_substring_length_bynumber_8(G(b,8),c,d)
2040 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
2041 a = pcre2_substring_list_get_8(G(b,8),(PCRE2_UCHAR8 ***)c,d)
2042 #define PCRE2_SUBSTRING_LIST_FREE(a) \
2043 pcre2_substring_list_free_8((PCRE2_SPTR8 *)a)
2044 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
2045 a = pcre2_substring_number_from_name_8(G(b,8),G(c,8));
2046 #define PTR(x) (void *)G(x,8)
2047 #define SETFLD(x,y,z) G(x,8)->y = z
2048 #define SETFLDVEC(x,y,v,z) G(x,8)->y[v] = z
2049 #define SETOP(x,y,z) G(x,8) z y
2050 #define SETCASTPTR(x,y) G(x,8) = (uint8_t *)(y)
2051 #define STRLEN(p) (int)strlen((char *)p)
2052 #define SUB1(a,b) G(a,8)(G(b,8))
2053 #define SUB2(a,b,c) G(a,8)(G(b,8),G(c,8))
2054 #define TEST(x,r,y) (G(x,8) r (y))
2055 #define TESTFLD(x,f,r,y) (G(x,8)->f r (y))
2056
2057
2058 /* ----- Only 16-bit mode is supported ----- */
2059
2060 #elif defined SUPPORT_PCRE2_16
2061 #define CASTFLD(t,a,b) (t)(G(a,16)->b)
2062 #define CASTVAR(t,x) (t)G(x,16)
2063 #define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR16)(a))[b])
2064 #define CONCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_convert_context_16))
2065 #define CONVERT_COPY(a,b,c) memcpy(G(a,16),(char *)b, (c)*2)
2066 #define DATCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_match_context_16))
2067 #define FLD(a,b) G(a,16)->b
2068 #define PATCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_compile_context_16))
2069 #define PCHARS(lv, p, offset, len, utf, f) \
2070 lv = pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f)
2071 #define PCHARSV(p, offset, len, utf, f) \
2072 (void)pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f)
2073 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
2074 a = pcre2_callout_enumerate_16(compiled_code16, \
2075 (int (*)(struct pcre2_callout_enumerate_block_16 *, void *))b,c)
2076 #define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,16) = pcre2_code_copy_16(b)
2077 #define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_16(G(b,16))
2078 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_16(G(b,16))
2079 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
2080 G(a,16) = pcre2_compile_16(G(b,16),c,d,e,f,g)
2081 #define PCRE2_CONVERTED_PATTERN_FREE(a) \
2082 pcre2_converted_pattern_free_16((PCRE2_UCHAR16 *)a)
2083 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
2084 a = pcre2_dfa_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h,i,j)
2085 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
2086 r = pcre2_get_error_message_16(a,G(b,16),G(G(b,16),_size/2))
2087 #define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_16(G(b,16))
2088 #define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_16(G(b,16))
2089 #define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_16(G(a,16),b)
2090 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_16(G(a,16))
2091 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
2092 a = pcre2_jit_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h)
2093 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
2094 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_16(b,c,d);
2095 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
2096 pcre2_jit_stack_assign_16(G(a,16),(pcre2_jit_callback_16)b,c);
2097 #define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_16((pcre2_jit_stack_16 *)a);
2098 #define PCRE2_MAKETABLES(a) a = pcre2_maketables_16(NULL)
2099 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
2100 a = pcre2_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h)
2101 #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,16) = pcre2_match_data_create_16(b,c)
2102 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
2103 G(a,16) = pcre2_match_data_create_from_pattern_16(G(b,16),c)
2104 #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_16(G(a,16))
2105 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) a = pcre2_pattern_convert_16(G(b,16),c,d,(PCRE2_UCHAR16 **)e,f,G(g,16))
2106 #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_16(G(b,16),c,d)
2107 #define PCRE2_PRINTINT(a) pcre2_printint_16(compiled_code16,outfile,a)
2108 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
2109 r = pcre2_serialize_decode_16((pcre2_code_16 **)a,b,c,G(d,16))
2110 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
2111 r = pcre2_serialize_encode_16((const pcre2_code_16 **)a,b,c,d,G(e,16))
2112 #define PCRE2_SERIALIZE_FREE(a) pcre2_serialize_free_16(a)
2113 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
2114 r = pcre2_serialize_get_number_of_codes_16(a)
2115 #define PCRE2_SET_CALLOUT(a,b,c) \
2116 pcre2_set_callout_16(G(a,16),(int (*)(pcre2_callout_block_16 *, void *))b,c);
2117 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_16(G(a,16),b)
2118 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
2119 pcre2_set_compile_recursion_guard_16(G(a,16),b,c)
2120 #define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_16(G(a,16),b)
2121 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) r = pcre2_set_glob_escape_16(G(a,16),b)
2122 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_16(G(a,16),b)
2123 #define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_16(G(a,16),b)
2124 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_16(G(a,16),b)
2125 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_16(G(a,16),b)
2126 #define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_16(G(a,16),b)
2127 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_16(G(a,16),b)
2128 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
2129 a = pcre2_substitute_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),G(h,16), \
2130 (PCRE2_SPTR16)i,j,(PCRE2_UCHAR16 *)k,l)
2131 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
2132 a = pcre2_substring_copy_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 *)d,e)
2133 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
2134 a = pcre2_substring_copy_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 *)d,e)
2135 #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_16((PCRE2_UCHAR16 *)a)
2136 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
2137 a = pcre2_substring_get_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 **)d,e)
2138 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
2139 a = pcre2_substring_get_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 **)d,e)
2140 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
2141 a = pcre2_substring_length_byname_16(G(b,16),G(c,16),d)
2142 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
2143 a = pcre2_substring_length_bynumber_16(G(b,16),c,d)
2144 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
2145 a = pcre2_substring_list_get_16(G(b,16),(PCRE2_UCHAR16 ***)c,d)
2146 #define PCRE2_SUBSTRING_LIST_FREE(a) \
2147 pcre2_substring_list_free_16((PCRE2_SPTR16 *)a)
2148 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
2149 a = pcre2_substring_number_from_name_16(G(b,16),G(c,16));
2150 #define PTR(x) (void *)G(x,16)
2151 #define SETFLD(x,y,z) G(x,16)->y = z
2152 #define SETFLDVEC(x,y,v,z) G(x,16)->y[v] = z
2153 #define SETOP(x,y,z) G(x,16) z y
2154 #define SETCASTPTR(x,y) G(x,16) = (uint16_t *)(y)
2155 #define STRLEN(p) (int)strlen16((PCRE2_SPTR16)p)
2156 #define SUB1(a,b) G(a,16)(G(b,16))
2157 #define SUB2(a,b,c) G(a,16)(G(b,16),G(c,16))
2158 #define TEST(x,r,y) (G(x,16) r (y))
2159 #define TESTFLD(x,f,r,y) (G(x,16)->f r (y))
2160
2161
2162 /* ----- Only 32-bit mode is supported ----- */
2163
2164 #elif defined SUPPORT_PCRE2_32
2165 #define CASTFLD(t,a,b) (t)(G(a,32)->b)
2166 #define CASTVAR(t,x) (t)G(x,32)
2167 #define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR32)(a))[b])
2168 #define CONCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_convert_context_32))
2169 #define CONVERT_COPY(a,b,c) memcpy(G(a,32),(char *)b, (c)*4)
2170 #define DATCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_match_context_32))
2171 #define FLD(a,b) G(a,32)->b
2172 #define PATCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_compile_context_32))
2173 #define PCHARS(lv, p, offset, len, utf, f) \
2174 lv = pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f)
2175 #define PCHARSV(p, offset, len, utf, f) \
2176 (void)pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f)
2177 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
2178 a = pcre2_callout_enumerate_32(compiled_code32, \
2179 (int (*)(struct pcre2_callout_enumerate_block_32 *, void *))b,c)
2180 #define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,32) = pcre2_code_copy_32(b)
2181 #define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_32(G(b,32))
2182 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_32(G(b,32))
2183 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
2184 G(a,32) = pcre2_compile_32(G(b,32),c,d,e,f,g)
2185 #define PCRE2_CONVERTED_PATTERN_FREE(a) \
2186 pcre2_converted_pattern_free_32((PCRE2_UCHAR32 *)a)
2187 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
2188 a = pcre2_dfa_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h,i,j)
2189 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
2190 r = pcre2_get_error_message_32(a,G(b,32),G(G(b,32),_size/4))
2191 #define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_32(G(b,32))
2192 #define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_32(G(b,32))
2193 #define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_32(G(a,32),b)
2194 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_32(G(a,32))
2195 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
2196 a = pcre2_jit_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
2197 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
2198 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_32(b,c,d);
2199 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
2200 pcre2_jit_stack_assign_32(G(a,32),(pcre2_jit_callback_32)b,c);
2201 #define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_32((pcre2_jit_stack_32 *)a);
2202 #define PCRE2_MAKETABLES(a) a = pcre2_maketables_32(NULL)
2203 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
2204 a = pcre2_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
2205 #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,32) = pcre2_match_data_create_32(b,c)
2206 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
2207 G(a,32) = pcre2_match_data_create_from_pattern_32(G(b,32),c)
2208 #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_32(G(a,32))
2209 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) a = pcre2_pattern_convert_32(G(b,32),c,d,(PCRE2_UCHAR32 **)e,f,G(g,32))
2210 #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_32(G(b,32),c,d)
2211 #define PCRE2_PRINTINT(a) pcre2_printint_32(compiled_code32,outfile,a)
2212 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
2213 r = pcre2_serialize_decode_32((pcre2_code_32 **)a,b,c,G(d,32))
2214 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
2215 r = pcre2_serialize_encode_32((const pcre2_code_32 **)a,b,c,d,G(e,32))
2216 #define PCRE2_SERIALIZE_FREE(a) pcre2_serialize_free_32(a)
2217 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
2218 r = pcre2_serialize_get_number_of_codes_32(a)
2219 #define PCRE2_SET_CALLOUT(a,b,c) \
2220 pcre2_set_callout_32(G(a,32),(int (*)(pcre2_callout_block_32 *, void *))b,c);
2221 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_32(G(a,32),b)
2222 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
2223 pcre2_set_compile_recursion_guard_32(G(a,32),b,c)
2224 #define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_32(G(a,32),b)
2225 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) r = pcre2_set_glob_escape_32(G(a,32),b)
2226 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_32(G(a,32),b)
2227 #define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_32(G(a,32),b)
2228 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_32(G(a,32),b)
2229 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_32(G(a,32),b)
2230 #define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_32(G(a,32),b)
2231 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_32(G(a,32),b)
2232 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
2233 a = pcre2_substitute_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),G(h,32), \
2234 (PCRE2_SPTR32)i,j,(PCRE2_UCHAR32 *)k,l)
2235 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
2236 a = pcre2_substring_copy_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 *)d,e)
2237 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
2238 a = pcre2_substring_copy_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 *)d,e);
2239 #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_32((PCRE2_UCHAR32 *)a)
2240 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
2241 a = pcre2_substring_get_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 **)d,e)
2242 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
2243 a = pcre2_substring_get_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 **)d,e)
2244 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
2245 a = pcre2_substring_length_byname_32(G(b,32),G(c,32),d)
2246 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
2247 a = pcre2_substring_length_bynumber_32(G(b,32),c,d)
2248 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
2249 a = pcre2_substring_list_get_32(G(b,32),(PCRE2_UCHAR32 ***)c,d)
2250 #define PCRE2_SUBSTRING_LIST_FREE(a) \
2251 pcre2_substring_list_free_32((PCRE2_SPTR32 *)a)
2252 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
2253 a = pcre2_substring_number_from_name_32(G(b,32),G(c,32));
2254 #define PTR(x) (void *)G(x,32)
2255 #define SETFLD(x,y,z) G(x,32)->y = z
2256 #define SETFLDVEC(x,y,v,z) G(x,32)->y[v] = z
2257 #define SETOP(x,y,z) G(x,32) z y
2258 #define SETCASTPTR(x,y) G(x,32) = (uint32_t *)(y)
2259 #define STRLEN(p) (int)strlen32((PCRE2_SPTR32)p)
2260 #define SUB1(a,b) G(a,32)(G(b,32))
2261 #define SUB2(a,b,c) G(a,32)(G(b,32),G(c,32))
2262 #define TEST(x,r,y) (G(x,32) r (y))
2263 #define TESTFLD(x,f,r,y) (G(x,32)->f r (y))
2264
2265 #endif
2266
2267 /* ----- End of mode-specific function call macros ----- */
2268
2269
2270
2271
2272 /*************************************************
2273 * Alternate character tables *
2274 *************************************************/
2275
2276 /* By default, the "tables" pointer in the compile context when calling
2277 pcre2_compile() is not set (= NULL), thereby using the default tables of the
2278 library. However, the tables modifier can be used to select alternate sets of
2279 tables, for different kinds of testing. Note that the locale modifier also
2280 adjusts the tables. */
2281
2282 /* This is the set of tables distributed as default with PCRE2. It recognizes
2283 only ASCII characters. */
2284
2285 static const uint8_t tables1[] = {
2286
2287 /* This table is a lower casing table. */
2288
2289 0, 1, 2, 3, 4, 5, 6, 7,
2290 8, 9, 10, 11, 12, 13, 14, 15,
2291 16, 17, 18, 19, 20, 21, 22, 23,
2292 24, 25, 26, 27, 28, 29, 30, 31,
2293 32, 33, 34, 35, 36, 37, 38, 39,
2294 40, 41, 42, 43, 44, 45, 46, 47,
2295 48, 49, 50, 51, 52, 53, 54, 55,
2296 56, 57, 58, 59, 60, 61, 62, 63,
2297 64, 97, 98, 99,100,101,102,103,
2298 104,105,106,107,108,109,110,111,
2299 112,113,114,115,116,117,118,119,
2300 120,121,122, 91, 92, 93, 94, 95,
2301 96, 97, 98, 99,100,101,102,103,
2302 104,105,106,107,108,109,110,111,
2303 112,113,114,115,116,117,118,119,
2304 120,121,122,123,124,125,126,127,
2305 128,129,130,131,132,133,134,135,
2306 136,137,138,139,140,141,142,143,
2307 144,145,146,147,148,149,150,151,
2308 152,153,154,155,156,157,158,159,
2309 160,161,162,163,164,165,166,167,
2310 168,169,170,171,172,173,174,175,
2311 176,177,178,179,180,181,182,183,
2312 184,185,186,187,188,189,190,191,
2313 192,193,194,195,196,197,198,199,
2314 200,201,202,203,204,205,206,207,
2315 208,209,210,211,212,213,214,215,
2316 216,217,218,219,220,221,222,223,
2317 224,225,226,227,228,229,230,231,
2318 232,233,234,235,236,237,238,239,
2319 240,241,242,243,244,245,246,247,
2320 248,249,250,251,252,253,254,255,
2321
2322 /* This table is a case flipping table. */
2323
2324 0, 1, 2, 3, 4, 5, 6, 7,
2325 8, 9, 10, 11, 12, 13, 14, 15,
2326 16, 17, 18, 19, 20, 21, 22, 23,
2327 24, 25, 26, 27, 28, 29, 30, 31,
2328 32, 33, 34, 35, 36, 37, 38, 39,
2329 40, 41, 42, 43, 44, 45, 46, 47,
2330 48, 49, 50, 51, 52, 53, 54, 55,
2331 56, 57, 58, 59, 60, 61, 62, 63,
2332 64, 97, 98, 99,100,101,102,103,
2333 104,105,106,107,108,109,110,111,
2334 112,113,114,115,116,117,118,119,
2335 120,121,122, 91, 92, 93, 94, 95,
2336 96, 65, 66, 67, 68, 69, 70, 71,
2337 72, 73, 74, 75, 76, 77, 78, 79,
2338 80, 81, 82, 83, 84, 85, 86, 87,
2339 88, 89, 90,123,124,125,126,127,
2340 128,129,130,131,132,133,134,135,
2341 136,137,138,139,140,141,142,143,
2342 144,145,146,147,148,149,150,151,
2343 152,153,154,155,156,157,158,159,
2344 160,161,162,163,164,165,166,167,
2345 168,169,170,171,172,173,174,175,
2346 176,177,178,179,180,181,182,183,
2347 184,185,186,187,188,189,190,191,
2348 192,193,194,195,196,197,198,199,
2349 200,201,202,203,204,205,206,207,
2350 208,209,210,211,212,213,214,215,
2351 216,217,218,219,220,221,222,223,
2352 224,225,226,227,228,229,230,231,
2353 232,233,234,235,236,237,238,239,
2354 240,241,242,243,244,245,246,247,
2355 248,249,250,251,252,253,254,255,
2356
2357 /* This table contains bit maps for various character classes. Each map is 32
2358 bytes long and the bits run from the least significant end of each byte. The
2359 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
2360 graph, print, punct, and cntrl. Other classes are built from combinations. */
2361
2362 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
2363 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2364 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2365 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2366
2367 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
2368 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
2369 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2370 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2371
2372 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
2373 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2374 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2375 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2376
2377 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2378 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
2379 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2380 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2381
2382 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2383 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
2384 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2385 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2386
2387 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
2388 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
2389 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2390 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2391
2392 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
2393 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
2394 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2395 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2396
2397 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
2398 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
2399 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2400 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2401
2402 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
2403 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
2404 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2405 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2406
2407 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
2408 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
2409 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2410 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2411
2412 /* This table identifies various classes of character by individual bits:
2413 0x01 white space character
2414 0x02 letter
2415 0x04 decimal digit
2416 0x08 hexadecimal digit
2417 0x10 alphanumeric or '_'
2418 0x80 regular expression metacharacter or binary zero
2419 */
2420
2421 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
2422 0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /* 8- 15 */
2423 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
2424 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
2425 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
2426 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
2427 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
2428 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
2429 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
2430 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
2431 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
2432 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
2433 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
2434 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
2435 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
2436 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
2437 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
2438 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
2439 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
2440 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
2441 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
2442 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
2443 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
2444 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
2445 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
2446 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
2447 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
2448 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
2449 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
2450 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
2451 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
2452 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
2453
2454 /* This is a set of tables that came originally from a Windows user. It seems
2455 to be at least an approximation of ISO 8859. In particular, there are
2456 characters greater than 128 that are marked as spaces, letters, etc. */
2457
2458 static const uint8_t tables2[] = {
2459 0,1,2,3,4,5,6,7,
2460 8,9,10,11,12,13,14,15,
2461 16,17,18,19,20,21,22,23,
2462 24,25,26,27,28,29,30,31,
2463 32,33,34,35,36,37,38,39,
2464 40,41,42,43,44,45,46,47,
2465 48,49,50,51,52,53,54,55,
2466 56,57,58,59,60,61,62,63,
2467 64,97,98,99,100,101,102,103,
2468 104,105,106,107,108,109,110,111,
2469 112,113,114,115,116,117,118,119,
2470 120,121,122,91,92,93,94,95,
2471 96,97,98,99,100,101,102,103,
2472 104,105,106,107,108,109,110,111,
2473 112,113,114,115,116,117,118,119,
2474 120,121,122,123,124,125,126,127,
2475 128,129,130,131,132,133,134,135,
2476 136,137,138,139,140,141,142,143,
2477 144,145,146,147,148,149,150,151,
2478 152,153,154,155,156,157,158,159,
2479 160,161,162,163,164,165,166,167,
2480 168,169,170,171,172,173,174,175,
2481 176,177,178,179,180,181,182,183,
2482 184,185,186,187,188,189,190,191,
2483 224,225,226,227,228,229,230,231,
2484 232,233,234,235,236,237,238,239,
2485 240,241,242,243,244,245,246,215,
2486 248,249,250,251,252,253,254,223,
2487 224,225,226,227,228,229,230,231,
2488 232,233,234,235,236,237,238,239,
2489 240,241,242,243,244,245,246,247,
2490 248,249,250,251,252,253,254,255,
2491 0,1,2,3,4,5,6,7,
2492 8,9,10,11,12,13,14,15,
2493 16,17,18,19,20,21,22,23,
2494 24,25,26,27,28,29,30,31,
2495 32,33,34,35,36,37,38,39,
2496 40,41,42,43,44,45,46,47,
2497 48,49,50,51,52,53,54,55,
2498 56,57,58,59,60,61,62,63,
2499 64,97,98,99,100,101,102,103,
2500 104,105,106,107,108,109,110,111,
2501 112,113,114,115,116,117,118,119,
2502 120,121,122,91,92,93,94,95,
2503 96,65,66,67,68,69,70,71,
2504 72,73,74,75,76,77,78,79,
2505 80,81,82,83,84,85,86,87,
2506 88,89,90,123,124,125,126,127,
2507 128,129,130,131,132,133,134,135,
2508 136,137,138,139,140,141,142,143,
2509 144,145,146,147,148,149,150,151,
2510 152,153,154,155,156,157,158,159,
2511 160,161,162,163,164,165,166,167,
2512 168,169,170,171,172,173,174,175,
2513 176,177,178,179,180,181,182,183,
2514 184,185,186,187,188,189,190,191,
2515 224,225,226,227,228,229,230,231,
2516 232,233,234,235,236,237,238,239,
2517 240,241,242,243,244,245,246,215,
2518 248,249,250,251,252,253,254,223,
2519 192,193,194,195,196,197,198,199,
2520 200,201,202,203,204,205,206,207,
2521 208,209,210,211,212,213,214,247,
2522 216,217,218,219,220,221,222,255,
2523 0,62,0,0,1,0,0,0,
2524 0,0,0,0,0,0,0,0,
2525 32,0,0,0,1,0,0,0,
2526 0,0,0,0,0,0,0,0,
2527 0,0,0,0,0,0,255,3,
2528 126,0,0,0,126,0,0,0,
2529 0,0,0,0,0,0,0,0,
2530 0,0,0,0,0,0,0,0,
2531 0,0,0,0,0,0,255,3,
2532 0,0,0,0,0,0,0,0,
2533 0,0,0,0,0,0,12,2,
2534 0,0,0,0,0,0,0,0,
2535 0,0,0,0,0,0,0,0,
2536 254,255,255,7,0,0,0,0,
2537 0,0,0,0,0,0,0,0,
2538 255,255,127,127,0,0,0,0,
2539 0,0,0,0,0,0,0,0,
2540 0,0,0,0,254,255,255,7,
2541 0,0,0,0,0,4,32,4,
2542 0,0,0,128,255,255,127,255,
2543 0,0,0,0,0,0,255,3,
2544 254,255,255,135,254,255,255,7,
2545 0,0,0,0,0,4,44,6,
2546 255,255,127,255,255,255,127,255,
2547 0,0,0,0,254,255,255,255,
2548 255,255,255,255,255,255,255,127,
2549 0,0,0,0,254,255,255,255,
2550 255,255,255,255,255,255,255,255,
2551 0,2,0,0,255,255,255,255,
2552 255,255,255,255,255,255,255,127,
2553 0,0,0,0,255,255,255,255,
2554 255,255,255,255,255,255,255,255,
2555 0,0,0,0,254,255,0,252,
2556 1,0,0,248,1,0,0,120,
2557 0,0,0,0,254,255,255,255,
2558 0,0,128,0,0,0,128,0,
2559 255,255,255,255,0,0,0,0,
2560 0,0,0,0,0,0,0,128,
2561 255,255,255,255,0,0,0,0,
2562 0,0,0,0,0,0,0,0,
2563 128,0,0,0,0,0,0,0,
2564 0,1,1,0,1,1,0,0,
2565 0,0,0,0,0,0,0,0,
2566 0,0,0,0,0,0,0,0,
2567 1,0,0,0,128,0,0,0,
2568 128,128,128,128,0,0,128,0,
2569 28,28,28,28,28,28,28,28,
2570 28,28,0,0,0,0,0,128,
2571 0,26,26,26,26,26,26,18,
2572 18,18,18,18,18,18,18,18,
2573 18,18,18,18,18,18,18,18,
2574 18,18,18,128,128,0,128,16,
2575 0,26,26,26,26,26,26,18,
2576 18,18,18,18,18,18,18,18,
2577 18,18,18,18,18,18,18,18,
2578 18,18,18,128,128,0,0,0,
2579 0,0,0,0,0,1,0,0,
2580 0,0,0,0,0,0,0,0,
2581 0,0,0,0,0,0,0,0,
2582 0,0,0,0,0,0,0,0,
2583 1,0,0,0,0,0,0,0,
2584 0,0,18,0,0,0,0,0,
2585 0,0,20,20,0,18,0,0,
2586 0,20,18,0,0,0,0,0,
2587 18,18,18,18,18,18,18,18,
2588 18,18,18,18,18,18,18,18,
2589 18,18,18,18,18,18,18,0,
2590 18,18,18,18,18,18,18,18,
2591 18,18,18,18,18,18,18,18,
2592 18,18,18,18,18,18,18,18,
2593 18,18,18,18,18,18,18,0,
2594 18,18,18,18,18,18,18,18
2595 };
2596
2597
2598
2599 #if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE)
2600 /*************************************************
2601 * Emulated memmove() for systems without it *
2602 *************************************************/
2603
2604 /* This function can make use of bcopy() if it is available. Otherwise do it by
2605 steam, as there are some non-Unix environments that lack both memmove() and
2606 bcopy(). */
2607
2608 static void *
emulated_memmove(void * d,const void * s,size_t n)2609 emulated_memmove(void *d, const void *s, size_t n)
2610 {
2611 #ifdef HAVE_BCOPY
2612 bcopy(s, d, n);
2613 return d;
2614 #else
2615 size_t i;
2616 unsigned char *dest = (unsigned char *)d;
2617 const unsigned char *src = (const unsigned char *)s;
2618 if (dest > src)
2619 {
2620 dest += n;
2621 src += n;
2622 for (i = 0; i < n; ++i) *(--dest) = *(--src);
2623 return (void *)dest;
2624 }
2625 else
2626 {
2627 for (i = 0; i < n; ++i) *dest++ = *src++;
2628 return (void *)(dest - n);
2629 }
2630 #endif /* not HAVE_BCOPY */
2631 }
2632 #undef memmove
2633 #define memmove(d,s,n) emulated_memmove(d,s,n)
2634 #endif /* not VPCOMPAT && not HAVE_MEMMOVE */
2635
2636
2637
2638 #ifndef HAVE_STRERROR
2639 /*************************************************
2640 * Provide strerror() for non-ANSI libraries *
2641 *************************************************/
2642
2643 /* Some old-fashioned systems (e.g. SunOS4) didn't have strerror() in their
2644 libraries. They may no longer be around, but just in case, we can try to
2645 provide the same facility by this simple alternative function. */
2646
2647 extern int sys_nerr;
2648 extern char *sys_errlist[];
2649
2650 char *
strerror(int n)2651 strerror(int n)
2652 {
2653 if (n < 0 || n >= sys_nerr) return "unknown error number";
2654 return sys_errlist[n];
2655 }
2656 #endif /* HAVE_STRERROR */
2657
2658
2659
2660 /*************************************************
2661 * Local memory functions *
2662 *************************************************/
2663
2664 /* Alternative memory functions, to test functionality. */
2665
my_malloc(PCRE2_SIZE size,void * data)2666 static void *my_malloc(PCRE2_SIZE size, void *data)
2667 {
2668 void *block = malloc(size);
2669 (void)data;
2670 if (show_memory)
2671 {
2672 if (block == NULL)
2673 {
2674 fprintf(outfile, "** malloc() failed for %" SIZ_FORM "\n", SIZ_CAST size);
2675 }
2676 else
2677 {
2678 fprintf(outfile, "malloc %5" SIZ_FORM, SIZ_CAST size);
2679 #ifdef DEBUG_SHOW_MALLOC_ADDRESSES
2680 fprintf(outfile, " %p", block); /* Not portable */
2681 #endif
2682 if (malloclistptr < MALLOCLISTSIZE)
2683 {
2684 malloclist[malloclistptr] = block;
2685 malloclistlength[malloclistptr++] = size;
2686 }
2687 else
2688 fprintf(outfile, " (not remembered)");
2689 fprintf(outfile, "\n");
2690 }
2691 }
2692 return block;
2693 }
2694
my_free(void * block,void * data)2695 static void my_free(void *block, void *data)
2696 {
2697 (void)data;
2698 if (show_memory)
2699 {
2700 uint32_t i, j;
2701 BOOL found = FALSE;
2702
2703 fprintf(outfile, "free");
2704 for (i = 0; i < malloclistptr; i++)
2705 {
2706 if (block == malloclist[i])
2707 {
2708 fprintf(outfile, " %5" SIZ_FORM, SIZ_CAST malloclistlength[i]);
2709 malloclistptr--;
2710 for (j = i; j < malloclistptr; j++)
2711 {
2712 malloclist[j] = malloclist[j+1];
2713 malloclistlength[j] = malloclistlength[j+1];
2714 }
2715 found = TRUE;
2716 break;
2717 }
2718 }
2719 if (!found) fprintf(outfile, " unremembered block");
2720 #ifdef DEBUG_SHOW_MALLOC_ADDRESSES
2721 fprintf(outfile, " %p", block); /* Not portable */
2722 #endif
2723 fprintf(outfile, "\n");
2724 }
2725 free(block);
2726 }
2727
2728
2729
2730 /*************************************************
2731 * Callback function for stack guard *
2732 *************************************************/
2733
2734 /* This is set up to be called from pcre2_compile() when the stackguard=n
2735 modifier sets a value greater than zero. The test we do is whether the
2736 parenthesis nesting depth is greater than the value set by the modifier.
2737
2738 Argument: the current parenthesis nesting depth
2739 Returns: non-zero to kill the compilation
2740 */
2741
2742 static int
stack_guard(uint32_t depth,void * user_data)2743 stack_guard(uint32_t depth, void *user_data)
2744 {
2745 (void)user_data;
2746 return depth > pat_patctl.stackguard_test;
2747 }
2748
2749
2750 /*************************************************
2751 * JIT memory callback *
2752 *************************************************/
2753
2754 static PCRE2_JIT_STACK*
jit_callback(void * arg)2755 jit_callback(void *arg)
2756 {
2757 jit_was_used = TRUE;
2758 return (PCRE2_JIT_STACK *)arg;
2759 }
2760
2761
2762 /*************************************************
2763 * Convert UTF-8 character to code point *
2764 *************************************************/
2765
2766 /* This function reads one or more bytes that represent a UTF-8 character,
2767 and returns the codepoint of that character. Note that the function supports
2768 the original UTF-8 definition of RFC 2279, allowing for values in the range 0
2769 to 0x7fffffff, up to 6 bytes long. This makes it possible to generate
2770 codepoints greater than 0x10ffff which are useful for testing PCRE2's error
2771 checking, and also for generating 32-bit non-UTF data values above the UTF
2772 limit.
2773
2774 Argument:
2775 utf8bytes a pointer to the byte vector
2776 vptr a pointer to an int to receive the value
2777
2778 Returns: > 0 => the number of bytes consumed
2779 -6 to 0 => malformed UTF-8 character at offset = (-return)
2780 */
2781
2782 static int
utf82ord(PCRE2_SPTR8 utf8bytes,uint32_t * vptr)2783 utf82ord(PCRE2_SPTR8 utf8bytes, uint32_t *vptr)
2784 {
2785 uint32_t c = *utf8bytes++;
2786 uint32_t d = c;
2787 int i, j, s;
2788
2789 for (i = -1; i < 6; i++) /* i is number of additional bytes */
2790 {
2791 if ((d & 0x80) == 0) break;
2792 d <<= 1;
2793 }
2794
2795 if (i == -1) { *vptr = c; return 1; } /* ascii character */
2796 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
2797
2798 /* i now has a value in the range 1-5 */
2799
2800 s = 6*i;
2801 d = (c & utf8_table3[i]) << s;
2802
2803 for (j = 0; j < i; j++)
2804 {
2805 c = *utf8bytes++;
2806 if ((c & 0xc0) != 0x80) return -(j+1);
2807 s -= 6;
2808 d |= (c & 0x3f) << s;
2809 }
2810
2811 /* Check that encoding was the correct unique one */
2812
2813 for (j = 0; j < utf8_table1_size; j++)
2814 if (d <= (uint32_t)utf8_table1[j]) break;
2815 if (j != i) return -(i+1);
2816
2817 /* Valid value */
2818
2819 *vptr = d;
2820 return i+1;
2821 }
2822
2823
2824
2825 /*************************************************
2826 * Print one character *
2827 *************************************************/
2828
2829 /* Print a single character either literally, or as a hex escape, and count how
2830 many printed characters are used.
2831
2832 Arguments:
2833 c the character
2834 utf TRUE in UTF mode
2835 f the FILE to print to, or NULL just to count characters
2836
2837 Returns: number of characters written
2838 */
2839
2840 static int
pchar(uint32_t c,BOOL utf,FILE * f)2841 pchar(uint32_t c, BOOL utf, FILE *f)
2842 {
2843 int n = 0;
2844 char tempbuffer[16];
2845
2846 if (PRINTOK(c))
2847 {
2848 if (f != NULL) fprintf(f, "%c", c);
2849 return 1;
2850 }
2851
2852 if (c < 0x100)
2853 {
2854 if (utf)
2855 {
2856 if (f != NULL) fprintf(f, "\\x{%02x}", c);
2857 return 6;
2858 }
2859 else
2860 {
2861 if (f != NULL) fprintf(f, "\\x%02x", c);
2862 return 4;
2863 }
2864 }
2865
2866 if (f != NULL) n = fprintf(f, "\\x{%02x}", c);
2867 else n = sprintf(tempbuffer, "\\x{%02x}", c);
2868
2869 return n >= 0 ? n : 0;
2870 }
2871
2872
2873
2874 #ifdef SUPPORT_PCRE2_16
2875 /*************************************************
2876 * Find length of 0-terminated 16-bit string *
2877 *************************************************/
2878
strlen16(PCRE2_SPTR16 p)2879 static size_t strlen16(PCRE2_SPTR16 p)
2880 {
2881 PCRE2_SPTR16 pp = p;
2882 while (*pp != 0) pp++;
2883 return (int)(pp - p);
2884 }
2885 #endif /* SUPPORT_PCRE2_16 */
2886
2887
2888
2889 #ifdef SUPPORT_PCRE2_32
2890 /*************************************************
2891 * Find length of 0-terminated 32-bit string *
2892 *************************************************/
2893
strlen32(PCRE2_SPTR32 p)2894 static size_t strlen32(PCRE2_SPTR32 p)
2895 {
2896 PCRE2_SPTR32 pp = p;
2897 while (*pp != 0) pp++;
2898 return (int)(pp - p);
2899 }
2900 #endif /* SUPPORT_PCRE2_32 */
2901
2902
2903 #ifdef SUPPORT_PCRE2_8
2904 /*************************************************
2905 * Print 8-bit character string *
2906 *************************************************/
2907
2908 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
2909 For printing *MARK strings, a negative length is given. If handed a NULL file,
2910 just counts chars without printing (because pchar() does that). */
2911
pchars8(PCRE2_SPTR8 p,int length,BOOL utf,FILE * f)2912 static int pchars8(PCRE2_SPTR8 p, int length, BOOL utf, FILE *f)
2913 {
2914 uint32_t c = 0;
2915 int yield = 0;
2916
2917 if (length < 0) length = p[-1];
2918 while (length-- > 0)
2919 {
2920 if (utf)
2921 {
2922 int rc = utf82ord(p, &c);
2923 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
2924 {
2925 length -= rc - 1;
2926 p += rc;
2927 yield += pchar(c, utf, f);
2928 continue;
2929 }
2930 }
2931 c = *p++;
2932 yield += pchar(c, utf, f);
2933 }
2934
2935 return yield;
2936 }
2937 #endif
2938
2939
2940 #ifdef SUPPORT_PCRE2_16
2941 /*************************************************
2942 * Print 16-bit character string *
2943 *************************************************/
2944
2945 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
2946 For printing *MARK strings, a negative length is given. If handed a NULL file,
2947 just counts chars without printing. */
2948
pchars16(PCRE2_SPTR16 p,int length,BOOL utf,FILE * f)2949 static int pchars16(PCRE2_SPTR16 p, int length, BOOL utf, FILE *f)
2950 {
2951 int yield = 0;
2952 if (length < 0) length = p[-1];
2953 while (length-- > 0)
2954 {
2955 uint32_t c = *p++ & 0xffff;
2956 if (utf && c >= 0xD800 && c < 0xDC00 && length > 0)
2957 {
2958 int d = *p & 0xffff;
2959 if (d >= 0xDC00 && d <= 0xDFFF)
2960 {
2961 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
2962 length--;
2963 p++;
2964 }
2965 }
2966 yield += pchar(c, utf, f);
2967 }
2968 return yield;
2969 }
2970 #endif /* SUPPORT_PCRE2_16 */
2971
2972
2973
2974 #ifdef SUPPORT_PCRE2_32
2975 /*************************************************
2976 * Print 32-bit character string *
2977 *************************************************/
2978
2979 /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
2980 For printing *MARK strings, a negative length is given. If handed a NULL file,
2981 just counts chars without printing. */
2982
pchars32(PCRE2_SPTR32 p,int length,BOOL utf,FILE * f)2983 static int pchars32(PCRE2_SPTR32 p, int length, BOOL utf, FILE *f)
2984 {
2985 int yield = 0;
2986 (void)(utf); /* Avoid compiler warning */
2987
2988 if (length < 0) length = p[-1];
2989 while (length-- > 0)
2990 {
2991 uint32_t c = *p++;
2992 yield += pchar(c, utf, f);
2993 }
2994 return yield;
2995 }
2996 #endif /* SUPPORT_PCRE2_32 */
2997
2998
2999
3000
3001 #ifdef SUPPORT_PCRE2_8
3002 /*************************************************
3003 * Convert character value to UTF-8 *
3004 *************************************************/
3005
3006 /* This function takes an integer value in the range 0 - 0x7fffffff
3007 and encodes it as a UTF-8 character in 0 to 6 bytes.
3008
3009 Arguments:
3010 cvalue the character value
3011 utf8bytes pointer to buffer for result - at least 6 bytes long
3012
3013 Returns: number of characters placed in the buffer
3014 */
3015
3016 static int
ord2utf8(uint32_t cvalue,uint8_t * utf8bytes)3017 ord2utf8(uint32_t cvalue, uint8_t *utf8bytes)
3018 {
3019 int i, j;
3020 if (cvalue > 0x7fffffffu)
3021 return -1;
3022 for (i = 0; i < utf8_table1_size; i++)
3023 if (cvalue <= (uint32_t)utf8_table1[i]) break;
3024 utf8bytes += i;
3025 for (j = i; j > 0; j--)
3026 {
3027 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
3028 cvalue >>= 6;
3029 }
3030 *utf8bytes = utf8_table2[i] | cvalue;
3031 return i + 1;
3032 }
3033 #endif /* SUPPORT_PCRE2_8 */
3034
3035
3036
3037 #ifdef SUPPORT_PCRE2_16
3038 /*************************************************
3039 * Convert string to 16-bit *
3040 *************************************************/
3041
3042 /* In UTF mode the input is always interpreted as a string of UTF-8 bytes using
3043 the original UTF-8 definition of RFC 2279, which allows for up to 6 bytes, and
3044 code values from 0 to 0x7fffffff. However, values greater than the later UTF
3045 limit of 0x10ffff cause an error. In non-UTF mode the input is interpreted as
3046 UTF-8 if the utf8_input modifier is set, but an error is generated for values
3047 greater than 0xffff.
3048
3049 If all the input bytes are ASCII, the space needed for a 16-bit string is
3050 exactly double the 8-bit size. Otherwise, the size needed for a 16-bit string
3051 is no more than double, because up to 0xffff uses no more than 3 bytes in UTF-8
3052 but possibly 4 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes
3053 in UTF-16. The result is always left in pbuffer16. Impose a minimum size to
3054 save repeated re-sizing.
3055
3056 Note that this function does not object to surrogate values. This is
3057 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
3058 for the purpose of testing that they are correctly faulted.
3059
3060 Arguments:
3061 p points to a byte string
3062 utf true in UTF mode
3063 lenptr points to number of bytes in the string (excluding trailing zero)
3064
3065 Returns: 0 on success, with the length updated to the number of 16-bit
3066 data items used (excluding the trailing zero)
3067 OR -1 if a UTF-8 string is malformed
3068 OR -2 if a value > 0x10ffff is encountered in UTF mode
3069 OR -3 if a value > 0xffff is encountered when not in UTF mode
3070 */
3071
3072 static PCRE2_SIZE
to16(uint8_t * p,int utf,PCRE2_SIZE * lenptr)3073 to16(uint8_t *p, int utf, PCRE2_SIZE *lenptr)
3074 {
3075 uint16_t *pp;
3076 PCRE2_SIZE len = *lenptr;
3077
3078 if (pbuffer16_size < 2*len + 2)
3079 {
3080 if (pbuffer16 != NULL) free(pbuffer16);
3081 pbuffer16_size = 2*len + 2;
3082 if (pbuffer16_size < 4096) pbuffer16_size = 4096;
3083 pbuffer16 = (uint16_t *)malloc(pbuffer16_size);
3084 if (pbuffer16 == NULL)
3085 {
3086 fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer16\n",
3087 SIZ_CAST pbuffer16_size);
3088 exit(1);
3089 }
3090 }
3091
3092 pp = pbuffer16;
3093 if (!utf && (pat_patctl.control & CTL_UTF8_INPUT) == 0)
3094 {
3095 for (; len > 0; len--) *pp++ = *p++;
3096 }
3097 else while (len > 0)
3098 {
3099 uint32_t c;
3100 int chlen = utf82ord(p, &c);
3101 if (chlen <= 0) return -1;
3102 if (!utf && c > 0xffff) return -3;
3103 if (c > 0x10ffff) return -2;
3104 p += chlen;
3105 len -= chlen;
3106 if (c < 0x10000) *pp++ = c; else
3107 {
3108 c -= 0x10000;
3109 *pp++ = 0xD800 | (c >> 10);
3110 *pp++ = 0xDC00 | (c & 0x3ff);
3111 }
3112 }
3113
3114 *pp = 0;
3115 *lenptr = pp - pbuffer16;
3116 return 0;
3117 }
3118 #endif
3119
3120
3121
3122 #ifdef SUPPORT_PCRE2_32
3123 /*************************************************
3124 * Convert string to 32-bit *
3125 *************************************************/
3126
3127 /* In UTF mode the input is always interpreted as a string of UTF-8 bytes using
3128 the original UTF-8 definition of RFC 2279, which allows for up to 6 bytes, and
3129 code values from 0 to 0x7fffffff. However, values greater than the later UTF
3130 limit of 0x10ffff cause an error.
3131
3132 In non-UTF mode the input is interpreted as UTF-8 if the utf8_input modifier
3133 is set, and no limit is imposed. There is special interpretation of the 0xff
3134 byte (which is illegal in UTF-8) in this case: it causes the top bit of the
3135 next character to be set. This provides a way of generating 32-bit characters
3136 greater than 0x7fffffff.
3137
3138 If all the input bytes are ASCII, the space needed for a 32-bit string is
3139 exactly four times the 8-bit size. Otherwise, the size needed for a 32-bit
3140 string is no more than four times, because the number of characters must be
3141 less than the number of bytes. The result is always left in pbuffer32. Impose a
3142 minimum size to save repeated re-sizing.
3143
3144 Note that this function does not object to surrogate values. This is
3145 deliberate; it makes it possible to construct UTF-32 strings that are invalid,
3146 for the purpose of testing that they are correctly faulted.
3147
3148 Arguments:
3149 p points to a byte string
3150 utf true in UTF mode
3151 lenptr points to number of bytes in the string (excluding trailing zero)
3152
3153 Returns: 0 on success, with the length updated to the number of 32-bit
3154 data items used (excluding the trailing zero)
3155 OR -1 if a UTF-8 string is malformed
3156 OR -2 if a value > 0x10ffff is encountered in UTF mode
3157 */
3158
3159 static PCRE2_SIZE
to32(uint8_t * p,int utf,PCRE2_SIZE * lenptr)3160 to32(uint8_t *p, int utf, PCRE2_SIZE *lenptr)
3161 {
3162 uint32_t *pp;
3163 PCRE2_SIZE len = *lenptr;
3164
3165 if (pbuffer32_size < 4*len + 4)
3166 {
3167 if (pbuffer32 != NULL) free(pbuffer32);
3168 pbuffer32_size = 4*len + 4;
3169 if (pbuffer32_size < 8192) pbuffer32_size = 8192;
3170 pbuffer32 = (uint32_t *)malloc(pbuffer32_size);
3171 if (pbuffer32 == NULL)
3172 {
3173 fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer32\n",
3174 SIZ_CAST pbuffer32_size);
3175 exit(1);
3176 }
3177 }
3178
3179 pp = pbuffer32;
3180
3181 if (!utf && (pat_patctl.control & CTL_UTF8_INPUT) == 0)
3182 {
3183 for (; len > 0; len--) *pp++ = *p++;
3184 }
3185
3186 else while (len > 0)
3187 {
3188 int chlen;
3189 uint32_t c;
3190 uint32_t topbit = 0;
3191 if (!utf && *p == 0xff && len > 1)
3192 {
3193 topbit = 0x80000000u;
3194 p++;
3195 len--;
3196 }
3197 chlen = utf82ord(p, &c);
3198 if (chlen <= 0) return -1;
3199 if (utf && c > 0x10ffff) return -2;
3200 p += chlen;
3201 len -= chlen;
3202 *pp++ = c | topbit;
3203 }
3204
3205 *pp = 0;
3206 *lenptr = pp - pbuffer32;
3207 return 0;
3208 }
3209 #endif /* SUPPORT_PCRE2_32 */
3210
3211
3212
3213 /*************************************************
3214 * Move back by so many characters *
3215 *************************************************/
3216
3217 /* Given a code unit offset in a subject string, move backwards by a number of
3218 characters, and return the resulting offset.
3219
3220 Arguments:
3221 subject pointer to the string
3222 offset start offset
3223 count count to move back by
3224 utf TRUE if in UTF mode
3225
3226 Returns: a possibly changed offset
3227 */
3228
3229 static PCRE2_SIZE
backchars(uint8_t * subject,PCRE2_SIZE offset,uint32_t count,BOOL utf)3230 backchars(uint8_t *subject, PCRE2_SIZE offset, uint32_t count, BOOL utf)
3231 {
3232 if (!utf || test_mode == PCRE32_MODE)
3233 return (count >= offset)? 0 : (offset - count);
3234
3235 else if (test_mode == PCRE8_MODE)
3236 {
3237 PCRE2_SPTR8 pp = (PCRE2_SPTR8)subject + offset;
3238 for (; count > 0 && pp > (PCRE2_SPTR8)subject; count--)
3239 {
3240 pp--;
3241 while ((*pp & 0xc0) == 0x80) pp--;
3242 }
3243 return pp - (PCRE2_SPTR8)subject;
3244 }
3245
3246 else /* 16-bit mode */
3247 {
3248 PCRE2_SPTR16 pp = (PCRE2_SPTR16)subject + offset;
3249 for (; count > 0 && pp > (PCRE2_SPTR16)subject; count--)
3250 {
3251 pp--;
3252 if ((*pp & 0xfc00) == 0xdc00) pp--;
3253 }
3254 return pp - (PCRE2_SPTR16)subject;
3255 }
3256 }
3257
3258
3259
3260 /*************************************************
3261 * Expand input buffers *
3262 *************************************************/
3263
3264 /* This function doubles the size of the input buffer and the buffer for
3265 keeping an 8-bit copy of patterns (pbuffer8), and copies the current buffers to
3266 the new ones.
3267
3268 Arguments: none
3269 Returns: nothing (aborts if malloc() fails)
3270 */
3271
3272 static void
expand_input_buffers(void)3273 expand_input_buffers(void)
3274 {
3275 int new_pbuffer8_size = 2*pbuffer8_size;
3276 uint8_t *new_buffer = (uint8_t *)malloc(new_pbuffer8_size);
3277 uint8_t *new_pbuffer8 = (uint8_t *)malloc(new_pbuffer8_size);
3278
3279 if (new_buffer == NULL || new_pbuffer8 == NULL)
3280 {
3281 fprintf(stderr, "pcre2test: malloc(%d) failed\n", new_pbuffer8_size);
3282 exit(1);
3283 }
3284
3285 memcpy(new_buffer, buffer, pbuffer8_size);
3286 memcpy(new_pbuffer8, pbuffer8, pbuffer8_size);
3287
3288 pbuffer8_size = new_pbuffer8_size;
3289
3290 free(buffer);
3291 free(pbuffer8);
3292
3293 buffer = new_buffer;
3294 pbuffer8 = new_pbuffer8;
3295 }
3296
3297
3298
3299 /*************************************************
3300 * Read or extend an input line *
3301 *************************************************/
3302
3303 /* Input lines are read into buffer, but both patterns and data lines can be
3304 continued over multiple input lines. In addition, if the buffer fills up, we
3305 want to automatically expand it so as to be able to handle extremely large
3306 lines that are needed for certain stress tests, although this is less likely
3307 now that there are repetition features for both patterns and data. When the
3308 input buffer is expanded, the other two buffers must also be expanded likewise,
3309 and the contents of pbuffer, which are a copy of the input for callouts, must
3310 be preserved (for when expansion happens for a data line). This is not the most
3311 optimal way of handling this, but hey, this is just a test program!
3312
3313 Arguments:
3314 f the file to read
3315 start where in buffer to start (this *must* be within buffer)
3316 prompt for stdin or readline()
3317
3318 Returns: pointer to the start of new data
3319 could be a copy of start, or could be moved
3320 NULL if no data read and EOF reached
3321 */
3322
3323 static uint8_t *
extend_inputline(FILE * f,uint8_t * start,const char * prompt)3324 extend_inputline(FILE *f, uint8_t *start, const char *prompt)
3325 {
3326 uint8_t *here = start;
3327
3328 for (;;)
3329 {
3330 size_t rlen = (size_t)(pbuffer8_size - (here - buffer));
3331
3332 if (rlen > 1000)
3333 {
3334 size_t dlen;
3335
3336 /* If libreadline or libedit support is required, use readline() to read a
3337 line if the input is a terminal. Note that readline() removes the trailing
3338 newline, so we must put it back again, to be compatible with fgets(). */
3339
3340 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
3341 if (INTERACTIVE(f))
3342 {
3343 size_t len;
3344 char *s = readline(prompt);
3345 if (s == NULL) return (here == start)? NULL : start;
3346 len = strlen(s);
3347 if (len > 0) add_history(s);
3348 if (len > rlen - 1) len = rlen - 1;
3349 memcpy(here, s, len);
3350 here[len] = '\n';
3351 here[len+1] = 0;
3352 free(s);
3353 }
3354 else
3355 #endif
3356
3357 /* Read the next line by normal means, prompting if the file is a tty. */
3358
3359 {
3360 if (INTERACTIVE(f)) printf("%s", prompt);
3361 if (fgets((char *)here, rlen, f) == NULL)
3362 return (here == start)? NULL : start;
3363 }
3364
3365 dlen = strlen((char *)here);
3366 here += dlen;
3367
3368 /* Check for end of line reached. Take care not to read data from before
3369 start (dlen will be zero for a file starting with a binary zero). */
3370
3371 if (here > start && here[-1] == '\n') return start;
3372
3373 /* If we have not read a newline when reading a file, we have either filled
3374 the buffer or reached the end of the file. We can detect the former by
3375 checking that the string fills the buffer, and the latter by feof(). If
3376 neither of these is true, it means we read a binary zero which has caused
3377 strlen() to give a short length. This is a hard error because pcre2test
3378 expects to work with C strings. */
3379
3380 if (!INTERACTIVE(f) && dlen < rlen - 1 && !feof(f))
3381 {
3382 fprintf(outfile, "** Binary zero encountered in input\n");
3383 fprintf(outfile, "** pcre2test run abandoned\n");
3384 exit(1);
3385 }
3386 }
3387
3388 else
3389 {
3390 size_t start_offset = start - buffer;
3391 size_t here_offset = here - buffer;
3392 expand_input_buffers();
3393 start = buffer + start_offset;
3394 here = buffer + here_offset;
3395 }
3396 }
3397
3398 /* Control never gets here */
3399 }
3400
3401
3402
3403 /*************************************************
3404 * Case-independent strncmp() function *
3405 *************************************************/
3406
3407 /*
3408 Arguments:
3409 s first string
3410 t second string
3411 n number of characters to compare
3412
3413 Returns: < 0, = 0, or > 0, according to the comparison
3414 */
3415
3416 static int
strncmpic(const uint8_t * s,const uint8_t * t,int n)3417 strncmpic(const uint8_t *s, const uint8_t *t, int n)
3418 {
3419 while (n--)
3420 {
3421 int c = tolower(*s++) - tolower(*t++);
3422 if (c != 0) return c;
3423 }
3424 return 0;
3425 }
3426
3427
3428
3429 /*************************************************
3430 * Scan the main modifier list *
3431 *************************************************/
3432
3433 /* This function searches the modifier list for a long modifier name.
3434
3435 Argument:
3436 p start of the name
3437 lenp length of the name
3438
3439 Returns: an index in the modifier list, or -1 on failure
3440 */
3441
3442 static int
scan_modifiers(const uint8_t * p,unsigned int len)3443 scan_modifiers(const uint8_t *p, unsigned int len)
3444 {
3445 int bot = 0;
3446 int top = MODLISTCOUNT;
3447
3448 while (top > bot)
3449 {
3450 int mid = (bot + top)/2;
3451 unsigned int mlen = strlen(modlist[mid].name);
3452 int c = strncmp((char *)p, modlist[mid].name, (len < mlen)? len : mlen);
3453 if (c == 0)
3454 {
3455 if (len == mlen) return mid;
3456 c = (int)len - (int)mlen;
3457 }
3458 if (c > 0) bot = mid + 1; else top = mid;
3459 }
3460
3461 return -1;
3462
3463 }
3464
3465
3466
3467 /*************************************************
3468 * Check a modifer and find its field *
3469 *************************************************/
3470
3471 /* This function is called when a modifier has been identified. We check that
3472 it is allowed here and find the field that is to be changed.
3473
3474 Arguments:
3475 m the modifier list entry
3476 ctx CTX_PAT => pattern context
3477 CTX_POPPAT => pattern context for popped pattern
3478 CTX_DEFPAT => default pattern context
3479 CTX_DAT => data context
3480 CTX_DEFDAT => default data context
3481 pctl point to pattern control block
3482 dctl point to data control block
3483 c a single character or 0
3484
3485 Returns: a field pointer or NULL
3486 */
3487
3488 static void *
check_modifier(modstruct * m,int ctx,patctl * pctl,datctl * dctl,uint32_t c)3489 check_modifier(modstruct *m, int ctx, patctl *pctl, datctl *dctl, uint32_t c)
3490 {
3491 void *field = NULL;
3492 PCRE2_SIZE offset = m->offset;
3493
3494 if (restrict_for_perl_test) switch(m->which)
3495 {
3496 case MOD_PNDP:
3497 case MOD_PATP:
3498 case MOD_PDP:
3499 break;
3500
3501 default:
3502 fprintf(outfile, "** '%s' is not allowed in a Perl-compatible test\n",
3503 m->name);
3504 return NULL;
3505 }
3506
3507 switch (m->which)
3508 {
3509 case MOD_CTC: /* Compile context modifier */
3510 if (ctx == CTX_DEFPAT) field = PTR(default_pat_context);
3511 else if (ctx == CTX_PAT) field = PTR(pat_context);
3512 break;
3513
3514 case MOD_CTM: /* Match context modifier */
3515 if (ctx == CTX_DEFDAT) field = PTR(default_dat_context);
3516 else if (ctx == CTX_DAT) field = PTR(dat_context);
3517 break;
3518
3519 case MOD_DAT: /* Data line modifier */
3520 if (dctl != NULL) field = dctl;
3521 break;
3522
3523 case MOD_PAT: /* Pattern modifier */
3524 case MOD_PATP: /* Allowed for Perl test */
3525 if (pctl != NULL) field = pctl;
3526 break;
3527
3528 case MOD_PD: /* Pattern or data line modifier */
3529 case MOD_PDP: /* Ditto, allowed for Perl test */
3530 case MOD_PND: /* Ditto, but not default pattern */
3531 case MOD_PNDP: /* Ditto, allowed for Perl test */
3532 if (dctl != NULL) field = dctl;
3533 else if (pctl != NULL && (m->which == MOD_PD || m->which == MOD_PDP ||
3534 ctx != CTX_DEFPAT))
3535 field = pctl;
3536 break;
3537 }
3538
3539 if (field == NULL)
3540 {
3541 if (c == 0)
3542 fprintf(outfile, "** '%s' is not valid here\n", m->name);
3543 else
3544 fprintf(outfile, "** /%c is not valid here\n", c);
3545 return NULL;
3546 }
3547
3548 return (char *)field + offset;
3549 }
3550
3551
3552
3553 /*************************************************
3554 * Decode a modifier list *
3555 *************************************************/
3556
3557 /* A pointer to a control block is NULL when called in cases when that block is
3558 not relevant. They are never all relevant in one call. At least one of patctl
3559 and datctl is NULL. The second argument specifies which context to use for
3560 modifiers that apply to contexts.
3561
3562 Arguments:
3563 p point to modifier string
3564 ctx CTX_PAT => pattern context
3565 CTX_POPPAT => pattern context for popped pattern
3566 CTX_DEFPAT => default pattern context
3567 CTX_DAT => data context
3568 CTX_DEFDAT => default data context
3569 pctl point to pattern control block
3570 dctl point to data control block
3571
3572 Returns: TRUE if successful decode, FALSE otherwise
3573 */
3574
3575 static BOOL
decode_modifiers(uint8_t * p,int ctx,patctl * pctl,datctl * dctl)3576 decode_modifiers(uint8_t *p, int ctx, patctl *pctl, datctl *dctl)
3577 {
3578 uint8_t *ep, *pp;
3579 long li;
3580 unsigned long uli;
3581 BOOL first = TRUE;
3582
3583 for (;;)
3584 {
3585 void *field;
3586 modstruct *m;
3587 BOOL off = FALSE;
3588 unsigned int i, len;
3589 int index;
3590 char *endptr;
3591
3592 /* Skip white space and commas. */
3593
3594 while (isspace(*p) || *p == ',') p++;
3595 if (*p == 0) break;
3596
3597 /* Find the end of the item; lose trailing whitespace at end of line. */
3598
3599 for (ep = p; *ep != 0 && *ep != ','; ep++);
3600 if (*ep == 0)
3601 {
3602 while (ep > p && isspace(ep[-1])) ep--;
3603 *ep = 0;
3604 }
3605
3606 /* Remember if the first character is '-'. */
3607
3608 if (*p == '-')
3609 {
3610 off = TRUE;
3611 p++;
3612 }
3613
3614 /* Find the length of a full-length modifier name, and scan for it. */
3615
3616 pp = p;
3617 while (pp < ep && *pp != '=') pp++;
3618 index = scan_modifiers(p, pp - p);
3619
3620 /* If the first modifier is unrecognized, try to interpret it as a sequence
3621 of single-character abbreviated modifiers. None of these modifiers have any
3622 associated data. They just set options or control bits. */
3623
3624 if (index < 0)
3625 {
3626 uint32_t cc;
3627 uint8_t *mp = p;
3628
3629 if (!first)
3630 {
3631 fprintf(outfile, "** Unrecognized modifier '%.*s'\n", (int)(ep-p), p);
3632 if (ep - p == 1)
3633 fprintf(outfile, "** Single-character modifiers must come first\n");
3634 return FALSE;
3635 }
3636
3637 for (cc = *p; cc != ',' && cc != '\n' && cc != 0; cc = *(++p))
3638 {
3639 for (i = 0; i < C1MODLISTCOUNT; i++)
3640 if (cc == c1modlist[i].onechar) break;
3641
3642 if (i >= C1MODLISTCOUNT)
3643 {
3644 fprintf(outfile, "** Unrecognized modifier '%c' in '%.*s'\n",
3645 *p, (int)(ep-mp), mp);
3646 return FALSE;
3647 }
3648
3649 if (c1modlist[i].index >= 0)
3650 {
3651 index = c1modlist[i].index;
3652 }
3653
3654 else
3655 {
3656 index = scan_modifiers((uint8_t *)(c1modlist[i].fullname),
3657 strlen(c1modlist[i].fullname));
3658 if (index < 0)
3659 {
3660 fprintf(outfile, "** Internal error: single-character equivalent "
3661 "modifier '%s' not found\n", c1modlist[i].fullname);
3662 return FALSE;
3663 }
3664 c1modlist[i].index = index; /* Cache for next time */
3665 }
3666
3667 field = check_modifier(modlist + index, ctx, pctl, dctl, *p);
3668 if (field == NULL) return FALSE;
3669
3670 /* /x is a special case; a second appearance changes PCRE2_EXTENDED to
3671 PCRE2_EXTENDED_MORE. */
3672
3673 if (cc == 'x' && (*((uint32_t *)field) & PCRE2_EXTENDED) != 0)
3674 {
3675 *((uint32_t *)field) &= ~PCRE2_EXTENDED;
3676 *((uint32_t *)field) |= PCRE2_EXTENDED_MORE;
3677 }
3678 else
3679 *((uint32_t *)field) |= modlist[index].value;
3680 }
3681
3682 continue; /* With tne next (fullname) modifier */
3683 }
3684
3685 /* We have a match on a full-name modifier. Check for the existence of data
3686 when needed. */
3687
3688 m = modlist + index; /* Save typing */
3689 if (m->type != MOD_CTL && m->type != MOD_OPT &&
3690 (m->type != MOD_IND || *pp == '='))
3691 {
3692 if (*pp++ != '=')
3693 {
3694 fprintf(outfile, "** '=' expected after '%s'\n", m->name);
3695 return FALSE;
3696 }
3697 if (off)
3698 {
3699 fprintf(outfile, "** '-' is not valid for '%s'\n", m->name);
3700 return FALSE;
3701 }
3702 }
3703
3704 /* These on/off types have no data. */
3705
3706 else if (*pp != ',' && *pp != '\n' && *pp != ' ' && *pp != 0)
3707 {
3708 fprintf(outfile, "** Unrecognized modifier '%.*s'\n", (int)(ep-p), p);
3709 return FALSE;
3710 }
3711
3712 /* Set the data length for those types that have data. Then find the field
3713 that is to be set. If check_modifier() returns NULL, it has already output an
3714 error message. */
3715
3716 len = ep - pp;
3717 field = check_modifier(m, ctx, pctl, dctl, 0);
3718 if (field == NULL) return FALSE;
3719
3720 /* Process according to data type. */
3721
3722 switch (m->type)
3723 {
3724 case MOD_CTL:
3725 case MOD_OPT:
3726 if (off) *((uint32_t *)field) &= ~m->value;
3727 else *((uint32_t *)field) |= m->value;
3728 break;
3729
3730 case MOD_BSR:
3731 if (len == 7 && strncmpic(pp, (const uint8_t *)"default", 7) == 0)
3732 {
3733 #ifdef BSR_ANYCRLF
3734 *((uint16_t *)field) = PCRE2_BSR_ANYCRLF;
3735 #else
3736 *((uint16_t *)field) = PCRE2_BSR_UNICODE;
3737 #endif
3738 if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 &= ~CTL2_BSR_SET;
3739 else dctl->control2 &= ~CTL2_BSR_SET;
3740 }
3741 else
3742 {
3743 if (len == 7 && strncmpic(pp, (const uint8_t *)"anycrlf", 7) == 0)
3744 *((uint16_t *)field) = PCRE2_BSR_ANYCRLF;
3745 else if (len == 7 && strncmpic(pp, (const uint8_t *)"unicode", 7) == 0)
3746 *((uint16_t *)field) = PCRE2_BSR_UNICODE;
3747 else goto INVALID_VALUE;
3748 if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 |= CTL2_BSR_SET;
3749 else dctl->control2 |= CTL2_BSR_SET;
3750 }
3751 pp = ep;
3752 break;
3753
3754 case MOD_CHR: /* A single character */
3755 *((uint32_t *)field) = *pp++;
3756 break;
3757
3758 case MOD_CON: /* A convert type/options list */
3759 for (;; pp++)
3760 {
3761 uint8_t *colon = (uint8_t *)strchr((const char *)pp, ':');
3762 len = ((colon != NULL && colon < ep)? colon:ep) - pp;
3763 for (i = 0; i < convertlistcount; i++)
3764 {
3765 if (strncmpic(pp, (const uint8_t *)convertlist[i].name, len) == 0)
3766 {
3767 if (*((uint32_t *)field) == CONVERT_UNSET)
3768 *((uint32_t *)field) = convertlist[i].option;
3769 else
3770 *((uint32_t *)field) |= convertlist[i].option;
3771 break;
3772 }
3773 }
3774 if (i >= convertlistcount) goto INVALID_VALUE;
3775 pp += len;
3776 if (*pp != ':') break;
3777 }
3778 break;
3779
3780 case MOD_IN2: /* One or two unsigned integers */
3781 if (!isdigit(*pp)) goto INVALID_VALUE;
3782 uli = strtoul((const char *)pp, &endptr, 10);
3783 if (U32OVERFLOW(uli)) goto INVALID_VALUE;
3784 ((uint32_t *)field)[0] = (uint32_t)uli;
3785 if (*endptr == ':')
3786 {
3787 uli = strtoul((const char *)endptr+1, &endptr, 10);
3788 if (U32OVERFLOW(uli)) goto INVALID_VALUE;
3789 ((uint32_t *)field)[1] = (uint32_t)uli;
3790 }
3791 else ((uint32_t *)field)[1] = 0;
3792 pp = (uint8_t *)endptr;
3793 break;
3794
3795 /* PCRE2_SIZE_MAX is usually SIZE_MAX, which may be greater, equal to, or
3796 less than ULONG_MAX. So first test for overflowing the long int, and then
3797 test for overflowing PCRE2_SIZE_MAX if it is smaller than ULONG_MAX. */
3798
3799 case MOD_SIZ: /* PCRE2_SIZE value */
3800 if (!isdigit(*pp)) goto INVALID_VALUE;
3801 uli = strtoul((const char *)pp, &endptr, 10);
3802 if (uli == ULONG_MAX) goto INVALID_VALUE;
3803 #if ULONG_MAX > PCRE2_SIZE_MAX
3804 if (uli > PCRE2_SIZE_MAX) goto INVALID_VALUE;
3805 #endif
3806 *((PCRE2_SIZE *)field) = (PCRE2_SIZE)uli;
3807 pp = (uint8_t *)endptr;
3808 break;
3809
3810 case MOD_IND: /* Unsigned integer with default */
3811 if (len == 0)
3812 {
3813 *((uint32_t *)field) = (uint32_t)(m->value);
3814 break;
3815 }
3816 /* Fall through */
3817
3818 case MOD_INT: /* Unsigned integer */
3819 if (!isdigit(*pp)) goto INVALID_VALUE;
3820 uli = strtoul((const char *)pp, &endptr, 10);
3821 if (U32OVERFLOW(uli)) goto INVALID_VALUE;
3822 *((uint32_t *)field) = (uint32_t)uli;
3823 pp = (uint8_t *)endptr;
3824 break;
3825
3826 case MOD_INS: /* Signed integer */
3827 if (!isdigit(*pp) && *pp != '-') goto INVALID_VALUE;
3828 li = strtol((const char *)pp, &endptr, 10);
3829 if (S32OVERFLOW(li)) goto INVALID_VALUE;
3830 *((int32_t *)field) = (int32_t)li;
3831 pp = (uint8_t *)endptr;
3832 break;
3833
3834 case MOD_NL:
3835 for (i = 0; i < sizeof(newlines)/sizeof(char *); i++)
3836 if (len == strlen(newlines[i]) &&
3837 strncmpic(pp, (const uint8_t *)newlines[i], len) == 0) break;
3838 if (i >= sizeof(newlines)/sizeof(char *)) goto INVALID_VALUE;
3839 if (i == 0)
3840 {
3841 *((uint16_t *)field) = NEWLINE_DEFAULT;
3842 if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 &= ~CTL2_NL_SET;
3843 else dctl->control2 &= ~CTL2_NL_SET;
3844 }
3845 else
3846 {
3847 *((uint16_t *)field) = i;
3848 if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 |= CTL2_NL_SET;
3849 else dctl->control2 |= CTL2_NL_SET;
3850 }
3851 pp = ep;
3852 break;
3853
3854 case MOD_NN: /* Name or (signed) number; may be several */
3855 if (isdigit(*pp) || *pp == '-')
3856 {
3857 int ct = MAXCPYGET - 1;
3858 int32_t value;
3859 li = strtol((const char *)pp, &endptr, 10);
3860 if (S32OVERFLOW(li)) goto INVALID_VALUE;
3861 value = (int32_t)li;
3862 field = (char *)field - m->offset + m->value; /* Adjust field ptr */
3863 if (value >= 0) /* Add new number */
3864 {
3865 while (*((int32_t *)field) >= 0 && ct-- > 0) /* Skip previous */
3866 field = (char *)field + sizeof(int32_t);
3867 if (ct <= 0)
3868 {
3869 fprintf(outfile, "** Too many numeric '%s' modifiers\n", m->name);
3870 return FALSE;
3871 }
3872 }
3873 *((int32_t *)field) = value;
3874 if (ct > 0) ((int32_t *)field)[1] = -1;
3875 pp = (uint8_t *)endptr;
3876 }
3877
3878 /* Multiple strings are put end to end. */
3879
3880 else
3881 {
3882 char *nn = (char *)field;
3883 if (len > 0) /* Add new name */
3884 {
3885 if (len > MAX_NAME_SIZE)
3886 {
3887 fprintf(outfile, "** Group name in '%s' is too long\n", m->name);
3888 return FALSE;
3889 }
3890 while (*nn != 0) nn += strlen(nn) + 1;
3891 if (nn + len + 2 - (char *)field > LENCPYGET)
3892 {
3893 fprintf(outfile, "** Too many characters in named '%s' modifiers\n",
3894 m->name);
3895 return FALSE;
3896 }
3897 memcpy(nn, pp, len);
3898 }
3899 nn[len] = 0 ;
3900 nn[len+1] = 0;
3901 pp = ep;
3902 }
3903 break;
3904
3905 case MOD_STR:
3906 if (len + 1 > m->value)
3907 {
3908 fprintf(outfile, "** Overlong value for '%s' (max %d code units)\n",
3909 m->name, m->value - 1);
3910 return FALSE;
3911 }
3912 memcpy(field, pp, len);
3913 ((uint8_t *)field)[len] = 0;
3914 pp = ep;
3915 break;
3916 }
3917
3918 if (*pp != ',' && *pp != '\n' && *pp != ' ' && *pp != 0)
3919 {
3920 fprintf(outfile, "** Comma expected after modifier item '%s'\n", m->name);
3921 return FALSE;
3922 }
3923
3924 p = pp;
3925 first = FALSE;
3926
3927 if (ctx == CTX_POPPAT &&
3928 (pctl->options != 0 ||
3929 pctl->tables_id != 0 ||
3930 pctl->locale[0] != 0 ||
3931 (pctl->control & NOTPOP_CONTROLS) != 0))
3932 {
3933 fprintf(outfile, "** '%s' is not valid here\n", m->name);
3934 return FALSE;
3935 }
3936 }
3937
3938 return TRUE;
3939
3940 INVALID_VALUE:
3941 fprintf(outfile, "** Invalid value in '%.*s'\n", (int)(ep-p), p);
3942 return FALSE;
3943 }
3944
3945
3946 /*************************************************
3947 * Get info from a pattern *
3948 *************************************************/
3949
3950 /* A wrapped call to pcre2_pattern_info(), applied to the current compiled
3951 pattern.
3952
3953 Arguments:
3954 what code for the required information
3955 where where to put the answer
3956 unsetok PCRE2_ERROR_UNSET is an "expected" result
3957
3958 Returns: the return from pcre2_pattern_info()
3959 */
3960
3961 static int
pattern_info(int what,void * where,BOOL unsetok)3962 pattern_info(int what, void *where, BOOL unsetok)
3963 {
3964 int rc;
3965 PCRE2_PATTERN_INFO(rc, compiled_code, what, NULL); /* Exercise the code */
3966 PCRE2_PATTERN_INFO(rc, compiled_code, what, where);
3967 if (rc >= 0) return 0;
3968 if (rc != PCRE2_ERROR_UNSET || !unsetok)
3969 {
3970 fprintf(outfile, "Error %d from pcre2_pattern_info_%d(%d)\n", rc, test_mode,
3971 what);
3972 if (rc == PCRE2_ERROR_BADMODE)
3973 fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
3974 "%d-bit mode\n", test_mode,
3975 8 * (FLD(compiled_code, flags) & PCRE2_MODE_MASK));
3976 }
3977 return rc;
3978 }
3979
3980
3981
3982 #ifdef SUPPORT_PCRE2_8
3983 /*************************************************
3984 * Show something in a list *
3985 *************************************************/
3986
3987 /* This function just helps to keep the code that uses it tidier. It's used for
3988 various lists of things where there needs to be introductory text before the
3989 first item. As these calls are all in the POSIX-support code, they happen only
3990 when 8-bit mode is supported. */
3991
3992 static void
prmsg(const char ** msg,const char * s)3993 prmsg(const char **msg, const char *s)
3994 {
3995 fprintf(outfile, "%s %s", *msg, s);
3996 *msg = "";
3997 }
3998 #endif /* SUPPORT_PCRE2_8 */
3999
4000
4001
4002 /*************************************************
4003 * Show control bits *
4004 *************************************************/
4005
4006 /* Called for mutually exclusive controls and for unsupported POSIX controls.
4007 Because the bits are unique, this can be used for both pattern and data control
4008 words.
4009
4010 Arguments:
4011 controls control bits
4012 controls2 more control bits
4013 before text to print before
4014
4015 Returns: nothing
4016 */
4017
4018 static void
show_controls(uint32_t controls,uint32_t controls2,const char * before)4019 show_controls(uint32_t controls, uint32_t controls2, const char *before)
4020 {
4021 fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
4022 before,
4023 ((controls & CTL_AFTERTEXT) != 0)? " aftertext" : "",
4024 ((controls & CTL_ALLAFTERTEXT) != 0)? " allaftertext" : "",
4025 ((controls & CTL_ALLCAPTURES) != 0)? " allcaptures" : "",
4026 ((controls & CTL_ALLUSEDTEXT) != 0)? " allusedtext" : "",
4027 ((controls & CTL_ALTGLOBAL) != 0)? " altglobal" : "",
4028 ((controls & CTL_BINCODE) != 0)? " bincode" : "",
4029 ((controls2 & CTL2_BSR_SET) != 0)? " bsr" : "",
4030 ((controls & CTL_CALLOUT_CAPTURE) != 0)? " callout_capture" : "",
4031 ((controls2 & CTL2_CALLOUT_EXTRA) != 0)? " callout_extra" : "",
4032 ((controls & CTL_CALLOUT_INFO) != 0)? " callout_info" : "",
4033 ((controls & CTL_CALLOUT_NONE) != 0)? " callout_none" : "",
4034 ((controls2 & CTL2_CALLOUT_NO_WHERE) != 0)? " callout_no_where" : "",
4035 ((controls & CTL_DFA) != 0)? " dfa" : "",
4036 ((controls & CTL_EXPAND) != 0)? " expand" : "",
4037 ((controls & CTL_FINDLIMITS) != 0)? " find_limits" : "",
4038 ((controls & CTL_FRAMESIZE) != 0)? " framesize" : "",
4039 ((controls & CTL_FULLBINCODE) != 0)? " fullbincode" : "",
4040 ((controls & CTL_GETALL) != 0)? " getall" : "",
4041 ((controls & CTL_GLOBAL) != 0)? " global" : "",
4042 ((controls & CTL_HEXPAT) != 0)? " hex" : "",
4043 ((controls & CTL_INFO) != 0)? " info" : "",
4044 ((controls & CTL_JITFAST) != 0)? " jitfast" : "",
4045 ((controls & CTL_JITVERIFY) != 0)? " jitverify" : "",
4046 ((controls & CTL_MARK) != 0)? " mark" : "",
4047 ((controls & CTL_MEMORY) != 0)? " memory" : "",
4048 ((controls2 & CTL2_NL_SET) != 0)? " newline" : "",
4049 ((controls & CTL_NULLCONTEXT) != 0)? " null_context" : "",
4050 ((controls & CTL_POSIX) != 0)? " posix" : "",
4051 ((controls & CTL_POSIX_NOSUB) != 0)? " posix_nosub" : "",
4052 ((controls & CTL_PUSH) != 0)? " push" : "",
4053 ((controls & CTL_PUSHCOPY) != 0)? " pushcopy" : "",
4054 ((controls & CTL_PUSHTABLESCOPY) != 0)? " pushtablescopy" : "",
4055 ((controls & CTL_STARTCHAR) != 0)? " startchar" : "",
4056 ((controls2 & CTL2_SUBSTITUTE_EXTENDED) != 0)? " substitute_extended" : "",
4057 ((controls2 & CTL2_SUBSTITUTE_OVERFLOW_LENGTH) != 0)? " substitute_overflow_length" : "",
4058 ((controls2 & CTL2_SUBSTITUTE_UNKNOWN_UNSET) != 0)? " substitute_unknown_unset" : "",
4059 ((controls2 & CTL2_SUBSTITUTE_UNSET_EMPTY) != 0)? " substitute_unset_empty" : "",
4060 ((controls & CTL_USE_LENGTH) != 0)? " use_length" : "",
4061 ((controls & CTL_UTF8_INPUT) != 0)? " utf8_input" : "",
4062 ((controls & CTL_ZERO_TERMINATE) != 0)? " zero_terminate" : "");
4063 }
4064
4065
4066
4067 /*************************************************
4068 * Show compile options *
4069 *************************************************/
4070
4071 /* Called from show_pattern_info() and for unsupported POSIX options.
4072
4073 Arguments:
4074 options an options word
4075 before text to print before
4076 after text to print after
4077
4078 Returns: nothing
4079 */
4080
4081 static void
show_compile_options(uint32_t options,const char * before,const char * after)4082 show_compile_options(uint32_t options, const char *before, const char *after)
4083 {
4084 if (options == 0) fprintf(outfile, "%s <none>%s", before, after);
4085 else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
4086 before,
4087 ((options & PCRE2_ALT_BSUX) != 0)? " alt_bsux" : "",
4088 ((options & PCRE2_ALT_CIRCUMFLEX) != 0)? " alt_circumflex" : "",
4089 ((options & PCRE2_ALT_VERBNAMES) != 0)? " alt_verbnames" : "",
4090 ((options & PCRE2_ALLOW_EMPTY_CLASS) != 0)? " allow_empty_class" : "",
4091 ((options & PCRE2_ANCHORED) != 0)? " anchored" : "",
4092 ((options & PCRE2_AUTO_CALLOUT) != 0)? " auto_callout" : "",
4093 ((options & PCRE2_CASELESS) != 0)? " caseless" : "",
4094 ((options & PCRE2_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
4095 ((options & PCRE2_DOTALL) != 0)? " dotall" : "",
4096 ((options & PCRE2_DUPNAMES) != 0)? " dupnames" : "",
4097 ((options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "",
4098 ((options & PCRE2_EXTENDED) != 0)? " extended" : "",
4099 ((options & PCRE2_EXTENDED_MORE) != 0)? " extended_more" : "",
4100 ((options & PCRE2_FIRSTLINE) != 0)? " firstline" : "",
4101 ((options & PCRE2_LITERAL) != 0)? " literal" : "",
4102 ((options & PCRE2_MATCH_UNSET_BACKREF) != 0)? " match_unset_backref" : "",
4103 ((options & PCRE2_MULTILINE) != 0)? " multiline" : "",
4104 ((options & PCRE2_NEVER_BACKSLASH_C) != 0)? " never_backslash_c" : "",
4105 ((options & PCRE2_NEVER_UCP) != 0)? " never_ucp" : "",
4106 ((options & PCRE2_NEVER_UTF) != 0)? " never_utf" : "",
4107 ((options & PCRE2_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
4108 ((options & PCRE2_NO_AUTO_POSSESS) != 0)? " no_auto_possess" : "",
4109 ((options & PCRE2_NO_DOTSTAR_ANCHOR) != 0)? " no_dotstar_anchor" : "",
4110 ((options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
4111 ((options & PCRE2_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
4112 ((options & PCRE2_UCP) != 0)? " ucp" : "",
4113 ((options & PCRE2_UNGREEDY) != 0)? " ungreedy" : "",
4114 ((options & PCRE2_USE_OFFSET_LIMIT) != 0)? " use_offset_limit" : "",
4115 ((options & PCRE2_UTF) != 0)? " utf" : "",
4116 after);
4117 }
4118
4119
4120 /*************************************************
4121 * Show compile extra options *
4122 *************************************************/
4123
4124 /* Called from show_pattern_info() and for unsupported POSIX options.
4125
4126 Arguments:
4127 options an options word
4128 before text to print before
4129 after text to print after
4130
4131 Returns: nothing
4132 */
4133
4134 static void
show_compile_extra_options(uint32_t options,const char * before,const char * after)4135 show_compile_extra_options(uint32_t options, const char *before,
4136 const char *after)
4137 {
4138 if (options == 0) fprintf(outfile, "%s <none>%s", before, after);
4139 else fprintf(outfile, "%s%s%s%s%s%s",
4140 before,
4141 ((options & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) != 0)? " allow_surrogate_escapes" : "",
4142 ((options & PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL) != 0)? " bad_escape_is_literal" : "",
4143 ((options & PCRE2_EXTRA_MATCH_WORD) != 0)? " match_word" : "",
4144 ((options & PCRE2_EXTRA_MATCH_LINE) != 0)? " match_line" : "",
4145 after);
4146 }
4147
4148
4149
4150 #ifdef SUPPORT_PCRE2_8
4151 /*************************************************
4152 * Show match options *
4153 *************************************************/
4154
4155 /* Called for unsupported POSIX options. */
4156
4157 static void
show_match_options(uint32_t options)4158 show_match_options(uint32_t options)
4159 {
4160 fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s",
4161 ((options & PCRE2_ANCHORED) != 0)? " anchored" : "",
4162 ((options & PCRE2_DFA_RESTART) != 0)? " dfa_restart" : "",
4163 ((options & PCRE2_DFA_SHORTEST) != 0)? " dfa_shortest" : "",
4164 ((options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "",
4165 ((options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
4166 ((options & PCRE2_NOTBOL) != 0)? " notbol" : "",
4167 ((options & PCRE2_NOTEMPTY) != 0)? " notempty" : "",
4168 ((options & PCRE2_NOTEMPTY_ATSTART) != 0)? " notempty_atstart" : "",
4169 ((options & PCRE2_NOTEOL) != 0)? " noteol" : "",
4170 ((options & PCRE2_PARTIAL_HARD) != 0)? " partial_hard" : "",
4171 ((options & PCRE2_PARTIAL_SOFT) != 0)? " partial_soft" : "");
4172 }
4173 #endif /* SUPPORT_PCRE2_8 */
4174
4175
4176
4177 /*************************************************
4178 * Show memory usage info for a pattern *
4179 *************************************************/
4180
4181 static void
show_memory_info(void)4182 show_memory_info(void)
4183 {
4184 uint32_t name_count, name_entry_size;
4185 size_t size, cblock_size;
4186
4187 /* One of the test_mode values will always be true, but to stop a compiler
4188 warning we must initialize cblock_size. */
4189
4190 cblock_size = 0;
4191 #ifdef SUPPORT_PCRE2_8
4192 if (test_mode == PCRE8_MODE) cblock_size = sizeof(pcre2_real_code_8);
4193 #endif
4194 #ifdef SUPPORT_PCRE2_16
4195 if (test_mode == PCRE16_MODE) cblock_size = sizeof(pcre2_real_code_16);
4196 #endif
4197 #ifdef SUPPORT_PCRE2_32
4198 if (test_mode == PCRE32_MODE) cblock_size = sizeof(pcre2_real_code_32);
4199 #endif
4200
4201 (void)pattern_info(PCRE2_INFO_SIZE, &size, FALSE);
4202 (void)pattern_info(PCRE2_INFO_NAMECOUNT, &name_count, FALSE);
4203 (void)pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &name_entry_size, FALSE);
4204 fprintf(outfile, "Memory allocation (code space): %d\n",
4205 (int)(size - name_count*name_entry_size*code_unit_size - cblock_size));
4206 if (pat_patctl.jit != 0)
4207 {
4208 (void)pattern_info(PCRE2_INFO_JITSIZE, &size, FALSE);
4209 fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)size);
4210 }
4211 }
4212
4213
4214
4215 /*************************************************
4216 * Show frame size info for a pattern *
4217 *************************************************/
4218
4219 static void
show_framesize(void)4220 show_framesize(void)
4221 {
4222 size_t frame_size;
4223 (void)pattern_info(PCRE2_INFO_FRAMESIZE, &frame_size, FALSE);
4224 fprintf(outfile, "Frame size for pcre2_match(): %d\n", (int)frame_size);
4225 }
4226
4227
4228
4229 /*************************************************
4230 * Get and output an error message *
4231 *************************************************/
4232
4233 static BOOL
print_error_message(int errorcode,const char * before,const char * after)4234 print_error_message(int errorcode, const char *before, const char *after)
4235 {
4236 int len;
4237 PCRE2_GET_ERROR_MESSAGE(len, errorcode, pbuffer);
4238 if (len < 0)
4239 {
4240 fprintf(outfile, "\n** pcre2test internal error: cannot interpret error "
4241 "number\n** Unexpected return (%d) from pcre2_get_error_message()\n", len);
4242 }
4243 else
4244 {
4245 fprintf(outfile, "%s", before);
4246 PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, outfile);
4247 fprintf(outfile, "%s", after);
4248 }
4249 return len >= 0;
4250 }
4251
4252
4253 /*************************************************
4254 * Callback function for callout enumeration *
4255 *************************************************/
4256
4257 /* The only differences in the callout emumeration block for different code
4258 unit widths are that the pointers to the subject, the most recent MARK, and a
4259 callout argument string point to strings of the appropriate width. Casts can be
4260 used to deal with this.
4261
4262 Argument:
4263 cb pointer to enumerate block
4264 callout_data user data
4265
4266 Returns: 0
4267 */
4268
callout_callback(pcre2_callout_enumerate_block_8 * cb,void * callout_data)4269 static int callout_callback(pcre2_callout_enumerate_block_8 *cb,
4270 void *callout_data)
4271 {
4272 uint32_t i;
4273 BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
4274
4275 (void)callout_data; /* Not currently displayed */
4276
4277 fprintf(outfile, "Callout ");
4278 if (cb->callout_string != NULL)
4279 {
4280 uint32_t delimiter = CODE_UNIT(cb->callout_string, -1);
4281 fprintf(outfile, "%c", delimiter);
4282 PCHARSV(cb->callout_string, 0,
4283 cb->callout_string_length, utf, outfile);
4284 for (i = 0; callout_start_delims[i] != 0; i++)
4285 if (delimiter == callout_start_delims[i])
4286 {
4287 delimiter = callout_end_delims[i];
4288 break;
4289 }
4290 fprintf(outfile, "%c ", delimiter);
4291 }
4292 else fprintf(outfile, "%d ", cb->callout_number);
4293
4294 fprintf(outfile, "%.*s\n",
4295 (int)((cb->next_item_length == 0)? 1 : cb->next_item_length),
4296 pbuffer8 + cb->pattern_position);
4297
4298 return 0;
4299 }
4300
4301
4302
4303 /*************************************************
4304 * Show information about a pattern *
4305 *************************************************/
4306
4307 /* This function is called after a pattern has been compiled if any of the
4308 information-requesting controls have been set.
4309
4310 Arguments: none
4311
4312 Returns: PR_OK continue processing next line
4313 PR_SKIP skip to a blank line
4314 PR_ABEND abort the pcre2test run
4315 */
4316
4317 static int
show_pattern_info(void)4318 show_pattern_info(void)
4319 {
4320 uint32_t compile_options, overall_options, extra_options;
4321
4322 if ((pat_patctl.control & (CTL_BINCODE|CTL_FULLBINCODE)) != 0)
4323 {
4324 fprintf(outfile, "------------------------------------------------------------------\n");
4325 PCRE2_PRINTINT((pat_patctl.control & CTL_FULLBINCODE) != 0);
4326 }
4327
4328 if ((pat_patctl.control & CTL_INFO) != 0)
4329 {
4330 int rc;
4331 void *nametable;
4332 uint8_t *start_bits;
4333 BOOL heap_limit_set, match_limit_set, depth_limit_set;
4334 uint32_t backrefmax, bsr_convention, capture_count, first_ctype, first_cunit,
4335 hasbackslashc, hascrorlf, jchanged, last_ctype, last_cunit, match_empty,
4336 depth_limit, heap_limit, match_limit, minlength, nameentrysize, namecount,
4337 newline_convention;
4338
4339 /* Exercise the error route. */
4340
4341 PCRE2_PATTERN_INFO(rc, compiled_code, 999, NULL);
4342 (void)rc;
4343
4344 /* These info requests may return PCRE2_ERROR_UNSET. */
4345
4346 switch(pattern_info(PCRE2_INFO_HEAPLIMIT, &heap_limit, TRUE))
4347 {
4348 case 0:
4349 heap_limit_set = TRUE;
4350 break;
4351
4352 case PCRE2_ERROR_UNSET:
4353 heap_limit_set = FALSE;
4354 break;
4355
4356 default:
4357 return PR_ABEND;
4358 }
4359
4360 switch(pattern_info(PCRE2_INFO_MATCHLIMIT, &match_limit, TRUE))
4361 {
4362 case 0:
4363 match_limit_set = TRUE;
4364 break;
4365
4366 case PCRE2_ERROR_UNSET:
4367 match_limit_set = FALSE;
4368 break;
4369
4370 default:
4371 return PR_ABEND;
4372 }
4373
4374 switch(pattern_info(PCRE2_INFO_DEPTHLIMIT, &depth_limit, TRUE))
4375 {
4376 case 0:
4377 depth_limit_set = TRUE;
4378 break;
4379
4380 case PCRE2_ERROR_UNSET:
4381 depth_limit_set = FALSE;
4382 break;
4383
4384 default:
4385 return PR_ABEND;
4386 }
4387
4388 /* These info requests should always succeed. */
4389
4390 if (pattern_info(PCRE2_INFO_BACKREFMAX, &backrefmax, FALSE) +
4391 pattern_info(PCRE2_INFO_BSR, &bsr_convention, FALSE) +
4392 pattern_info(PCRE2_INFO_CAPTURECOUNT, &capture_count, FALSE) +
4393 pattern_info(PCRE2_INFO_FIRSTBITMAP, &start_bits, FALSE) +
4394 pattern_info(PCRE2_INFO_FIRSTCODEUNIT, &first_cunit, FALSE) +
4395 pattern_info(PCRE2_INFO_FIRSTCODETYPE, &first_ctype, FALSE) +
4396 pattern_info(PCRE2_INFO_HASBACKSLASHC, &hasbackslashc, FALSE) +
4397 pattern_info(PCRE2_INFO_HASCRORLF, &hascrorlf, FALSE) +
4398 pattern_info(PCRE2_INFO_JCHANGED, &jchanged, FALSE) +
4399 pattern_info(PCRE2_INFO_LASTCODEUNIT, &last_cunit, FALSE) +
4400 pattern_info(PCRE2_INFO_LASTCODETYPE, &last_ctype, FALSE) +
4401 pattern_info(PCRE2_INFO_MATCHEMPTY, &match_empty, FALSE) +
4402 pattern_info(PCRE2_INFO_MINLENGTH, &minlength, FALSE) +
4403 pattern_info(PCRE2_INFO_NAMECOUNT, &namecount, FALSE) +
4404 pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &nameentrysize, FALSE) +
4405 pattern_info(PCRE2_INFO_NAMETABLE, &nametable, FALSE) +
4406 pattern_info(PCRE2_INFO_NEWLINE, &newline_convention, FALSE)
4407 != 0)
4408 return PR_ABEND;
4409
4410 fprintf(outfile, "Capturing subpattern count = %d\n", capture_count);
4411
4412 if (backrefmax > 0)
4413 fprintf(outfile, "Max back reference = %d\n", backrefmax);
4414
4415 if (maxlookbehind > 0)
4416 fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
4417
4418 if (heap_limit_set)
4419 fprintf(outfile, "Heap limit = %u\n", heap_limit);
4420
4421 if (match_limit_set)
4422 fprintf(outfile, "Match limit = %u\n", match_limit);
4423
4424 if (depth_limit_set)
4425 fprintf(outfile, "Depth limit = %u\n", depth_limit);
4426
4427 if (namecount > 0)
4428 {
4429 fprintf(outfile, "Named capturing subpatterns:\n");
4430 for (; namecount > 0; namecount--)
4431 {
4432 int imm2_size = test_mode == PCRE8_MODE ? 2 : 1;
4433 uint32_t length = (uint32_t)STRLEN(nametable + imm2_size);
4434 fprintf(outfile, " ");
4435 PCHARSV(nametable, imm2_size, length, FALSE, outfile);
4436 while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
4437 #ifdef SUPPORT_PCRE2_32
4438 if (test_mode == PCRE32_MODE)
4439 fprintf(outfile, "%3d\n", (int)(((PCRE2_SPTR32)nametable)[0]));
4440 #endif
4441 #ifdef SUPPORT_PCRE2_16
4442 if (test_mode == PCRE16_MODE)
4443 fprintf(outfile, "%3d\n", (int)(((PCRE2_SPTR16)nametable)[0]));
4444 #endif
4445 #ifdef SUPPORT_PCRE2_8
4446 if (test_mode == PCRE8_MODE)
4447 fprintf(outfile, "%3d\n", (int)(
4448 ((((PCRE2_SPTR8)nametable)[0]) << 8) | ((PCRE2_SPTR8)nametable)[1]));
4449 #endif
4450 nametable = (void*)((PCRE2_SPTR8)nametable + nameentrysize * code_unit_size);
4451 }
4452 }
4453
4454 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
4455 if (hasbackslashc) fprintf(outfile, "Contains \\C\n");
4456 if (match_empty) fprintf(outfile, "May match empty string\n");
4457
4458 pattern_info(PCRE2_INFO_ARGOPTIONS, &compile_options, FALSE);
4459 pattern_info(PCRE2_INFO_ALLOPTIONS, &overall_options, FALSE);
4460 pattern_info(PCRE2_INFO_EXTRAOPTIONS, &extra_options, FALSE);
4461
4462 /* Remove UTF/UCP if they were there only because of forbid_utf. This saves
4463 cluttering up the verification output of non-UTF test files. */
4464
4465 if ((pat_patctl.options & PCRE2_NEVER_UTF) == 0)
4466 {
4467 compile_options &= ~PCRE2_NEVER_UTF;
4468 overall_options &= ~PCRE2_NEVER_UTF;
4469 }
4470
4471 if ((pat_patctl.options & PCRE2_NEVER_UCP) == 0)
4472 {
4473 compile_options &= ~PCRE2_NEVER_UCP;
4474 overall_options &= ~PCRE2_NEVER_UCP;
4475 }
4476
4477 if ((compile_options|overall_options) != 0)
4478 {
4479 if (compile_options == overall_options)
4480 show_compile_options(compile_options, "Options:", "\n");
4481 else
4482 {
4483 show_compile_options(compile_options, "Compile options:", "\n");
4484 show_compile_options(overall_options, "Overall options:", "\n");
4485 }
4486 }
4487
4488 if (extra_options != 0)
4489 show_compile_extra_options(extra_options, "Extra options:", "\n");
4490
4491 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
4492
4493 if ((pat_patctl.control2 & CTL2_BSR_SET) != 0 ||
4494 (FLD(compiled_code, flags) & PCRE2_BSR_SET) != 0)
4495 fprintf(outfile, "\\R matches %s\n", (bsr_convention == PCRE2_BSR_UNICODE)?
4496 "any Unicode newline" : "CR, LF, or CRLF");
4497
4498 if ((FLD(compiled_code, flags) & PCRE2_NL_SET) != 0)
4499 {
4500 switch (newline_convention)
4501 {
4502 case PCRE2_NEWLINE_CR:
4503 fprintf(outfile, "Forced newline is CR\n");
4504 break;
4505
4506 case PCRE2_NEWLINE_LF:
4507 fprintf(outfile, "Forced newline is LF\n");
4508 break;
4509
4510 case PCRE2_NEWLINE_CRLF:
4511 fprintf(outfile, "Forced newline is CRLF\n");
4512 break;
4513
4514 case PCRE2_NEWLINE_ANYCRLF:
4515 fprintf(outfile, "Forced newline is CR, LF, or CRLF\n");
4516 break;
4517
4518 case PCRE2_NEWLINE_ANY:
4519 fprintf(outfile, "Forced newline is any Unicode newline\n");
4520 break;
4521
4522 case PCRE2_NEWLINE_NUL:
4523 fprintf(outfile, "Forced newline is NUL\n");
4524 break;
4525
4526 default:
4527 break;
4528 }
4529 }
4530
4531 if (first_ctype == 2)
4532 {
4533 fprintf(outfile, "First code unit at start or follows newline\n");
4534 }
4535 else if (first_ctype == 1)
4536 {
4537 const char *caseless =
4538 ((FLD(compiled_code, flags) & PCRE2_FIRSTCASELESS) == 0)?
4539 "" : " (caseless)";
4540 if (PRINTOK(first_cunit))
4541 fprintf(outfile, "First code unit = \'%c\'%s\n", first_cunit, caseless);
4542 else
4543 {
4544 fprintf(outfile, "First code unit = ");
4545 pchar(first_cunit, FALSE, outfile);
4546 fprintf(outfile, "%s\n", caseless);
4547 }
4548 }
4549 else if (start_bits != NULL)
4550 {
4551 int i;
4552 int c = 24;
4553 fprintf(outfile, "Starting code units: ");
4554 for (i = 0; i < 256; i++)
4555 {
4556 if ((start_bits[i/8] & (1<<(i&7))) != 0)
4557 {
4558 if (c > 75)
4559 {
4560 fprintf(outfile, "\n ");
4561 c = 2;
4562 }
4563 if (PRINTOK(i) && i != ' ')
4564 {
4565 fprintf(outfile, "%c ", i);
4566 c += 2;
4567 }
4568 else
4569 {
4570 fprintf(outfile, "\\x%02x ", i);
4571 c += 5;
4572 }
4573 }
4574 }
4575 fprintf(outfile, "\n");
4576 }
4577
4578 if (last_ctype != 0)
4579 {
4580 const char *caseless =
4581 ((FLD(compiled_code, flags) & PCRE2_LASTCASELESS) == 0)?
4582 "" : " (caseless)";
4583 if (PRINTOK(last_cunit))
4584 fprintf(outfile, "Last code unit = \'%c\'%s\n", last_cunit, caseless);
4585 else
4586 {
4587 fprintf(outfile, "Last code unit = ");
4588 pchar(last_cunit, FALSE, outfile);
4589 fprintf(outfile, "%s\n", caseless);
4590 }
4591 }
4592
4593 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
4594
4595 if (pat_patctl.jit != 0 && (pat_patctl.control & CTL_JITVERIFY) != 0)
4596 {
4597 if (FLD(compiled_code, executable_jit) != NULL)
4598 fprintf(outfile, "JIT compilation was successful\n");
4599 else
4600 {
4601 #ifdef SUPPORT_JIT
4602 fprintf(outfile, "JIT compilation was not successful");
4603 if (jitrc != 0 && !print_error_message(jitrc, " (", ")"))
4604 return PR_ABEND;
4605 fprintf(outfile, "\n");
4606 #else
4607 fprintf(outfile, "JIT support is not available in this version of PCRE2\n");
4608 #endif
4609 }
4610 }
4611 }
4612
4613 if ((pat_patctl.control & CTL_CALLOUT_INFO) != 0)
4614 {
4615 int errorcode;
4616 PCRE2_CALLOUT_ENUMERATE(errorcode, callout_callback, 0);
4617 if (errorcode != 0)
4618 {
4619 fprintf(outfile, "Callout enumerate failed: error %d: ", errorcode);
4620 if (errorcode < 0 && !print_error_message(errorcode, "", "\n"))
4621 return PR_ABEND;
4622 return PR_SKIP;
4623 }
4624 }
4625
4626 return PR_OK;
4627 }
4628
4629
4630
4631 /*************************************************
4632 * Handle serialization error *
4633 *************************************************/
4634
4635 /* Print an error message after a serialization failure.
4636
4637 Arguments:
4638 rc the error code
4639 msg an initial message for what failed
4640
4641 Returns: FALSE if print_error_message() fails
4642 */
4643
4644 static BOOL
serial_error(int rc,const char * msg)4645 serial_error(int rc, const char *msg)
4646 {
4647 fprintf(outfile, "%s failed: error %d: ", msg, rc);
4648 return print_error_message(rc, "", "\n");
4649 }
4650
4651
4652
4653 /*************************************************
4654 * Open file for save/load commands *
4655 *************************************************/
4656
4657 /* This function decodes the file name and opens the file.
4658
4659 Arguments:
4660 buffptr point after the #command
4661 mode open mode
4662 fptr points to the FILE variable
4663
4664 Returns: PR_OK or PR_ABEND
4665 */
4666
4667 static int
open_file(uint8_t * buffptr,const char * mode,FILE ** fptr)4668 open_file(uint8_t *buffptr, const char *mode, FILE **fptr)
4669 {
4670 char *endf;
4671 char *filename = (char *)buffptr;
4672 while (isspace(*filename)) filename++;
4673 endf = filename + strlen8(filename);
4674 while (endf > filename && isspace(endf[-1])) endf--;
4675
4676 if (endf == filename)
4677 {
4678 fprintf(outfile, "** File name expected after #save\n");
4679 return PR_ABEND;
4680 }
4681
4682 *endf = 0;
4683 *fptr = fopen((const char *)filename, mode);
4684 if (*fptr == NULL)
4685 {
4686 fprintf(outfile, "** Failed to open '%s': %s\n", filename, strerror(errno));
4687 return PR_ABEND;
4688 }
4689
4690 return PR_OK;
4691 }
4692
4693
4694
4695 /*************************************************
4696 * Process command line *
4697 *************************************************/
4698
4699 /* This function is called for lines beginning with # and a character that is
4700 not ! or whitespace, when encountered between tests, which means that there is
4701 no compiled pattern (compiled_code is NULL). The line is in buffer.
4702
4703 Arguments: none
4704
4705 Returns: PR_OK continue processing next line
4706 PR_SKIP skip to a blank line
4707 PR_ABEND abort the pcre2test run
4708 */
4709
4710 static int
process_command(void)4711 process_command(void)
4712 {
4713 FILE *f;
4714 PCRE2_SIZE serial_size;
4715 size_t i;
4716 int rc, cmd, cmdlen, yield;
4717 uint16_t first_listed_newline;
4718 const char *cmdname;
4719 uint8_t *argptr, *serial;
4720
4721 yield = PR_OK;
4722 cmd = CMD_UNKNOWN;
4723 cmdlen = 0;
4724
4725 for (i = 0; i < cmdlistcount; i++)
4726 {
4727 cmdname = cmdlist[i].name;
4728 cmdlen = strlen(cmdname);
4729 if (strncmp((char *)(buffer+1), cmdname, cmdlen) == 0 &&
4730 isspace(buffer[cmdlen+1]))
4731 {
4732 cmd = cmdlist[i].value;
4733 break;
4734 }
4735 }
4736
4737 argptr = buffer + cmdlen + 1;
4738
4739 if (restrict_for_perl_test && cmd != CMD_PATTERN && cmd != CMD_SUBJECT)
4740 {
4741 fprintf(outfile, "** #%s is not allowed after #perltest\n", cmdname);
4742 return PR_ABEND;
4743 }
4744
4745 switch(cmd)
4746 {
4747 case CMD_UNKNOWN:
4748 fprintf(outfile, "** Unknown command: %s", buffer);
4749 break;
4750
4751 case CMD_FORBID_UTF:
4752 forbid_utf = PCRE2_NEVER_UTF|PCRE2_NEVER_UCP;
4753 break;
4754
4755 case CMD_PERLTEST:
4756 restrict_for_perl_test = TRUE;
4757 break;
4758
4759 /* Set default pattern modifiers */
4760
4761 case CMD_PATTERN:
4762 (void)decode_modifiers(argptr, CTX_DEFPAT, &def_patctl, NULL);
4763 if (def_patctl.jit == 0 && (def_patctl.control & CTL_JITVERIFY) != 0)
4764 def_patctl.jit = 7;
4765 break;
4766
4767 /* Set default subject modifiers */
4768
4769 case CMD_SUBJECT:
4770 (void)decode_modifiers(argptr, CTX_DEFDAT, NULL, &def_datctl);
4771 break;
4772
4773 /* Check the default newline, and if not one of those listed, set up the
4774 first one to be forced. An empty list unsets. */
4775
4776 case CMD_NEWLINE_DEFAULT:
4777 local_newline_default = 0; /* Unset */
4778 first_listed_newline = 0;
4779 for (;;)
4780 {
4781 while (isspace(*argptr)) argptr++;
4782 if (*argptr == 0) break;
4783 for (i = 1; i < sizeof(newlines)/sizeof(char *); i++)
4784 {
4785 size_t nlen = strlen(newlines[i]);
4786 if (strncmpic(argptr, (const uint8_t *)newlines[i], nlen) == 0 &&
4787 isspace(argptr[nlen]))
4788 {
4789 if (i == NEWLINE_DEFAULT) return PR_OK; /* Default is valid */
4790 if (first_listed_newline == 0) first_listed_newline = i;
4791 }
4792 }
4793 while (*argptr != 0 && !isspace(*argptr)) argptr++;
4794 }
4795 local_newline_default = first_listed_newline;
4796 break;
4797
4798 /* Pop or copy a compiled pattern off the stack. Modifiers that do not affect
4799 the compiled pattern (e.g. to give information) are permitted. The default
4800 pattern modifiers are ignored. */
4801
4802 case CMD_POP:
4803 case CMD_POPCOPY:
4804 if (patstacknext <= 0)
4805 {
4806 fprintf(outfile, "** Can't pop off an empty stack\n");
4807 return PR_SKIP;
4808 }
4809 memset(&pat_patctl, 0, sizeof(patctl)); /* Completely unset */
4810 if (!decode_modifiers(argptr, CTX_POPPAT, &pat_patctl, NULL))
4811 return PR_SKIP;
4812
4813 if (cmd == CMD_POP)
4814 {
4815 SET(compiled_code, patstack[--patstacknext]);
4816 }
4817 else
4818 {
4819 PCRE2_CODE_COPY_FROM_VOID(compiled_code, patstack[patstacknext - 1]);
4820 }
4821
4822 if (pat_patctl.jit != 0)
4823 {
4824 PCRE2_JIT_COMPILE(jitrc, compiled_code, pat_patctl.jit);
4825 }
4826 if ((pat_patctl.control & CTL_MEMORY) != 0) show_memory_info();
4827 if ((pat_patctl.control & CTL_FRAMESIZE) != 0) show_framesize();
4828 if ((pat_patctl.control & CTL_ANYINFO) != 0)
4829 {
4830 rc = show_pattern_info();
4831 if (rc != PR_OK) return rc;
4832 }
4833 break;
4834
4835 /* Save the stack of compiled patterns to a file, then empty the stack. */
4836
4837 case CMD_SAVE:
4838 if (patstacknext <= 0)
4839 {
4840 fprintf(outfile, "** No stacked patterns to save\n");
4841 return PR_OK;
4842 }
4843
4844 rc = open_file(argptr+1, BINARY_OUTPUT_MODE, &f);
4845 if (rc != PR_OK) return rc;
4846
4847 PCRE2_SERIALIZE_ENCODE(rc, patstack, patstacknext, &serial, &serial_size,
4848 general_context);
4849 if (rc < 0)
4850 {
4851 fclose(f);
4852 if (!serial_error(rc, "Serialization")) return PR_ABEND;
4853 break;
4854 }
4855
4856 /* Write the length at the start of the file to make it straightforward to
4857 get the right memory when re-loading. This saves having to read the file size
4858 in different operating systems. To allow for different endianness (even
4859 though reloading with the opposite endianness does not work), write the
4860 length byte-by-byte. */
4861
4862 for (i = 0; i < 4; i++) fputc((serial_size >> (i*8)) & 255, f);
4863 if (fwrite(serial, 1, serial_size, f) != serial_size)
4864 {
4865 fprintf(outfile, "** Wrong return from fwrite()\n");
4866 fclose(f);
4867 return PR_ABEND;
4868 }
4869
4870 fclose(f);
4871 PCRE2_SERIALIZE_FREE(serial);
4872 while(patstacknext > 0)
4873 {
4874 SET(compiled_code, patstack[--patstacknext]);
4875 SUB1(pcre2_code_free, compiled_code);
4876 }
4877 SET(compiled_code, NULL);
4878 break;
4879
4880 /* Load a set of compiled patterns from a file onto the stack */
4881
4882 case CMD_LOAD:
4883 rc = open_file(argptr+1, BINARY_INPUT_MODE, &f);
4884 if (rc != PR_OK) return rc;
4885
4886 serial_size = 0;
4887 for (i = 0; i < 4; i++) serial_size |= fgetc(f) << (i*8);
4888
4889 serial = malloc(serial_size);
4890 if (serial == NULL)
4891 {
4892 fprintf(outfile, "** Failed to get memory (size %" SIZ_FORM ") for #load\n",
4893 SIZ_CAST serial_size);
4894 fclose(f);
4895 return PR_ABEND;
4896 }
4897
4898 i = fread(serial, 1, serial_size, f);
4899 fclose(f);
4900
4901 if (i != serial_size)
4902 {
4903 fprintf(outfile, "** Wrong return from fread()\n");
4904 yield = PR_ABEND;
4905 }
4906 else
4907 {
4908 PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(rc, serial);
4909 if (rc < 0)
4910 {
4911 if (!serial_error(rc, "Get number of codes")) yield = PR_ABEND;
4912 }
4913 else
4914 {
4915 if (rc + patstacknext > PATSTACKSIZE)
4916 {
4917 fprintf(outfile, "** Not enough space on pattern stack for %d pattern%s\n",
4918 rc, (rc == 1)? "" : "s");
4919 rc = PATSTACKSIZE - patstacknext;
4920 fprintf(outfile, "** Decoding %d pattern%s\n", rc,
4921 (rc == 1)? "" : "s");
4922 }
4923 PCRE2_SERIALIZE_DECODE(rc, patstack + patstacknext, rc, serial,
4924 general_context);
4925 if (rc < 0)
4926 {
4927 if (!serial_error(rc, "Deserialization")) yield = PR_ABEND;
4928 }
4929 else patstacknext += rc;
4930 }
4931 }
4932
4933 free(serial);
4934 break;
4935 }
4936
4937 return yield;
4938 }
4939
4940
4941
4942 /*************************************************
4943 * Process pattern line *
4944 *************************************************/
4945
4946 /* This function is called when the input buffer contains the start of a
4947 pattern. The first character is known to be a valid delimiter. The pattern is
4948 read, modifiers are interpreted, and a suitable local context is set up for
4949 this test. The pattern is then compiled.
4950
4951 Arguments: none
4952
4953 Returns: PR_OK continue processing next line
4954 PR_SKIP skip to a blank line
4955 PR_ABEND abort the pcre2test run
4956 */
4957
4958 static int
process_pattern(void)4959 process_pattern(void)
4960 {
4961 BOOL utf;
4962 uint32_t k;
4963 uint8_t *p = buffer;
4964 unsigned int delimiter = *p++;
4965 int errorcode;
4966 void *use_pat_context;
4967 uint32_t use_forbid_utf = forbid_utf;
4968 PCRE2_SIZE patlen;
4969 PCRE2_SIZE valgrind_access_length;
4970 PCRE2_SIZE erroroffset;
4971
4972 /* Initialize the context and pattern/data controls for this test from the
4973 defaults. */
4974
4975 PATCTXCPY(pat_context, default_pat_context);
4976 memcpy(&pat_patctl, &def_patctl, sizeof(patctl));
4977
4978 /* Find the end of the pattern, reading more lines if necessary. */
4979
4980 for(;;)
4981 {
4982 while (*p != 0)
4983 {
4984 if (*p == '\\' && p[1] != 0) p++;
4985 else if (*p == delimiter) break;
4986 p++;
4987 }
4988 if (*p != 0) break;
4989 if ((p = extend_inputline(infile, p, " > ")) == NULL)
4990 {
4991 fprintf(outfile, "** Unexpected EOF\n");
4992 return PR_ABEND;
4993 }
4994 if (!INTERACTIVE(infile)) fprintf(outfile, "%s", (char *)p);
4995 }
4996
4997 /* If the first character after the delimiter is backslash, make the pattern
4998 end with backslash. This is purely to provide a way of testing for the error
4999 message when a pattern ends with backslash. */
5000
5001 if (p[1] == '\\') *p++ = '\\';
5002
5003 /* Terminate the pattern at the delimiter, and compute the length. */
5004
5005 *p++ = 0;
5006 patlen = p - buffer - 2;
5007
5008 /* Look for modifiers and options after the final delimiter. */
5009
5010 if (!decode_modifiers(p, CTX_PAT, &pat_patctl, NULL)) return PR_SKIP;
5011 utf = (pat_patctl.options & PCRE2_UTF) != 0;
5012
5013 /* The utf8_input modifier is not allowed in 8-bit mode, and is mutually
5014 exclusive with the utf modifier. */
5015
5016 if ((pat_patctl.control & CTL_UTF8_INPUT) != 0)
5017 {
5018 if (test_mode == PCRE8_MODE)
5019 {
5020 fprintf(outfile, "** The utf8_input modifier is not allowed in 8-bit mode\n");
5021 return PR_SKIP;
5022 }
5023 if (utf)
5024 {
5025 fprintf(outfile, "** The utf and utf8_input modifiers are mutually exclusive\n");
5026 return PR_SKIP;
5027 }
5028 }
5029
5030 /* The convert and posix modifiers are mutually exclusive. */
5031
5032 if (pat_patctl.convert_type != CONVERT_UNSET &&
5033 (pat_patctl.control & CTL_POSIX) != 0)
5034 {
5035 fprintf(outfile, "** The convert and posix modifiers are mutually exclusive\n");
5036 return PR_SKIP;
5037 }
5038
5039 /* Check for mutually exclusive control modifiers. At present, these are all in
5040 the first control word. */
5041
5042 for (k = 0; k < sizeof(exclusive_pat_controls)/sizeof(uint32_t); k++)
5043 {
5044 uint32_t c = pat_patctl.control & exclusive_pat_controls[k];
5045 if (c != 0 && c != (c & (~c+1)))
5046 {
5047 show_controls(c, 0, "** Not allowed together:");
5048 fprintf(outfile, "\n");
5049 return PR_SKIP;
5050 }
5051 }
5052
5053 /* Assume full JIT compile for jitverify and/or jitfast if nothing else was
5054 specified. */
5055
5056 if (pat_patctl.jit == 0 &&
5057 (pat_patctl.control & (CTL_JITVERIFY|CTL_JITFAST)) != 0)
5058 pat_patctl.jit = 7;
5059
5060 /* Now copy the pattern to pbuffer8 for use in 8-bit testing and for reflecting
5061 in callouts. Convert from hex if requested (literal strings in quotes may be
5062 present within the hexadecimal pairs). The result must necessarily be fewer
5063 characters so will always fit in pbuffer8. */
5064
5065 if ((pat_patctl.control & CTL_HEXPAT) != 0)
5066 {
5067 uint8_t *pp, *pt;
5068 uint32_t c, d;
5069
5070 pt = pbuffer8;
5071 for (pp = buffer + 1; *pp != 0; pp++)
5072 {
5073 if (isspace(*pp)) continue;
5074 c = *pp++;
5075
5076 /* Handle a literal substring */
5077
5078 if (c == '\'' || c == '"')
5079 {
5080 uint8_t *pq = pp;
5081 for (;; pp++)
5082 {
5083 d = *pp;
5084 if (d == 0)
5085 {
5086 fprintf(outfile, "** Missing closing quote in hex pattern: "
5087 "opening quote is at offset %" PTR_FORM ".\n", pq - buffer - 2);
5088 return PR_SKIP;
5089 }
5090 if (d == c) break;
5091 *pt++ = d;
5092 }
5093 }
5094
5095 /* Expect a hex pair */
5096
5097 else
5098 {
5099 if (!isxdigit(c))
5100 {
5101 fprintf(outfile, "** Unexpected non-hex-digit '%c' at offset %"
5102 PTR_FORM " in hex pattern: quote missing?\n", c, pp - buffer - 2);
5103 return PR_SKIP;
5104 }
5105 if (*pp == 0)
5106 {
5107 fprintf(outfile, "** Odd number of digits in hex pattern\n");
5108 return PR_SKIP;
5109 }
5110 d = *pp;
5111 if (!isxdigit(d))
5112 {
5113 fprintf(outfile, "** Unexpected non-hex-digit '%c' at offset %"
5114 PTR_FORM " in hex pattern: quote missing?\n", d, pp - buffer - 1);
5115 return PR_SKIP;
5116 }
5117 c = toupper(c);
5118 d = toupper(d);
5119 *pt++ = ((isdigit(c)? (c - '0') : (c - 'A' + 10)) << 4) +
5120 (isdigit(d)? (d - '0') : (d - 'A' + 10));
5121 }
5122 }
5123 *pt = 0;
5124 patlen = pt - pbuffer8;
5125 }
5126
5127 /* If not a hex string, process for repetition expansion if requested. */
5128
5129 else if ((pat_patctl.control & CTL_EXPAND) != 0)
5130 {
5131 uint8_t *pp, *pt;
5132
5133 pt = pbuffer8;
5134 for (pp = buffer + 1; *pp != 0; pp++)
5135 {
5136 uint8_t *pc = pp;
5137 uint32_t count = 1;
5138 size_t length = 1;
5139
5140 /* Check for replication syntax; if not found, the defaults just set will
5141 prevail and one character will be copied. */
5142
5143 if (pp[0] == '\\' && pp[1] == '[')
5144 {
5145 uint8_t *pe;
5146 for (pe = pp + 2; *pe != 0; pe++)
5147 {
5148 if (pe[0] == ']' && pe[1] == '{')
5149 {
5150 uint32_t clen = pe - pc - 2;
5151 uint32_t i = 0;
5152 unsigned long uli;
5153 char *endptr;
5154
5155 pe += 2;
5156 uli = strtoul((const char *)pe, &endptr, 10);
5157 if (U32OVERFLOW(uli))
5158 {
5159 fprintf(outfile, "** Pattern repeat count too large\n");
5160 return PR_SKIP;
5161 }
5162
5163 i = (uint32_t)uli;
5164 pe = (uint8_t *)endptr;
5165 if (*pe == '}')
5166 {
5167 if (i == 0)
5168 {
5169 fprintf(outfile, "** Zero repeat not allowed\n");
5170 return PR_SKIP;
5171 }
5172 pc += 2;
5173 count = i;
5174 length = clen;
5175 pp = pe;
5176 break;
5177 }
5178 }
5179 }
5180 }
5181
5182 /* Add to output. If the buffer is too small expand it. The function for
5183 expanding buffers always keeps buffer and pbuffer8 in step as far as their
5184 size goes. */
5185
5186 while (pt + count * length > pbuffer8 + pbuffer8_size)
5187 {
5188 size_t pc_offset = pc - buffer;
5189 size_t pp_offset = pp - buffer;
5190 size_t pt_offset = pt - pbuffer8;
5191 expand_input_buffers();
5192 pc = buffer + pc_offset;
5193 pp = buffer + pp_offset;
5194 pt = pbuffer8 + pt_offset;
5195 }
5196
5197 for (; count > 0; count--)
5198 {
5199 memcpy(pt, pc, length);
5200 pt += length;
5201 }
5202 }
5203
5204 *pt = 0;
5205 patlen = pt - pbuffer8;
5206
5207 if ((pat_patctl.control & CTL_INFO) != 0)
5208 fprintf(outfile, "Expanded: %s\n", pbuffer8);
5209 }
5210
5211 /* Neither hex nor expanded, just copy the input verbatim. */
5212
5213 else
5214 {
5215 strncpy((char *)pbuffer8, (char *)(buffer+1), patlen + 1);
5216 }
5217
5218 /* Sort out character tables */
5219
5220 if (pat_patctl.locale[0] != 0)
5221 {
5222 if (pat_patctl.tables_id != 0)
5223 {
5224 fprintf(outfile, "** 'Locale' and 'tables' must not both be set\n");
5225 return PR_SKIP;
5226 }
5227 if (setlocale(LC_CTYPE, (const char *)pat_patctl.locale) == NULL)
5228 {
5229 fprintf(outfile, "** Failed to set locale '%s'\n", pat_patctl.locale);
5230 return PR_SKIP;
5231 }
5232 if (strcmp((const char *)pat_patctl.locale, (const char *)locale_name) != 0)
5233 {
5234 strcpy((char *)locale_name, (char *)pat_patctl.locale);
5235 if (locale_tables != NULL) free((void *)locale_tables);
5236 PCRE2_MAKETABLES(locale_tables);
5237 }
5238 use_tables = locale_tables;
5239 }
5240
5241 else switch (pat_patctl.tables_id)
5242 {
5243 case 0: use_tables = NULL; break;
5244 case 1: use_tables = tables1; break;
5245 case 2: use_tables = tables2; break;
5246 default:
5247 fprintf(outfile, "** 'Tables' must specify 0, 1, or 2.\n");
5248 return PR_SKIP;
5249 }
5250
5251 PCRE2_SET_CHARACTER_TABLES(pat_context, use_tables);
5252
5253 /* Set up for the stackguard test. */
5254
5255 if (pat_patctl.stackguard_test != 0)
5256 {
5257 PCRE2_SET_COMPILE_RECURSION_GUARD(pat_context, stack_guard, NULL);
5258 }
5259
5260 /* Handle compiling via the POSIX interface, which doesn't support the
5261 timing, showing, or debugging options, nor the ability to pass over
5262 local character tables. Neither does it have 16-bit or 32-bit support. */
5263
5264 if ((pat_patctl.control & CTL_POSIX) != 0)
5265 {
5266 #ifdef SUPPORT_PCRE2_8
5267 int rc;
5268 int cflags = 0;
5269 const char *msg = "** Ignored with POSIX interface:";
5270 #endif
5271
5272 if (test_mode != PCRE8_MODE)
5273 {
5274 fprintf(outfile, "** The POSIX interface is available only in 8-bit mode\n");
5275 return PR_SKIP;
5276 }
5277
5278 #ifdef SUPPORT_PCRE2_8
5279 /* Check for features that the POSIX interface does not support. */
5280
5281 if (pat_patctl.locale[0] != 0) prmsg(&msg, "locale");
5282 if (pat_patctl.replacement[0] != 0) prmsg(&msg, "replace");
5283 if (pat_patctl.tables_id != 0) prmsg(&msg, "tables");
5284 if (pat_patctl.stackguard_test != 0) prmsg(&msg, "stackguard");
5285 if (timeit > 0) prmsg(&msg, "timing");
5286 if (pat_patctl.jit != 0) prmsg(&msg, "JIT");
5287
5288 if ((pat_patctl.options & ~POSIX_SUPPORTED_COMPILE_OPTIONS) != 0)
5289 {
5290 show_compile_options(
5291 pat_patctl.options & ~POSIX_SUPPORTED_COMPILE_OPTIONS, msg, "");
5292 msg = "";
5293 }
5294
5295 if ((FLD(pat_context, extra_options) &
5296 ~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS) != 0)
5297 {
5298 show_compile_extra_options(
5299 FLD(pat_context, extra_options) & ~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS,
5300 msg, "");
5301 msg = "";
5302 }
5303
5304 if ((pat_patctl.control & ~POSIX_SUPPORTED_COMPILE_CONTROLS) != 0 ||
5305 (pat_patctl.control2 & ~POSIX_SUPPORTED_COMPILE_CONTROLS2) != 0)
5306 {
5307 show_controls(pat_patctl.control & ~POSIX_SUPPORTED_COMPILE_CONTROLS,
5308 pat_patctl.control2 & ~POSIX_SUPPORTED_COMPILE_CONTROLS2, msg);
5309 msg = "";
5310 }
5311
5312 if (local_newline_default != 0) prmsg(&msg, "#newline_default");
5313 if (FLD(pat_context, max_pattern_length) != PCRE2_UNSET)
5314 prmsg(&msg, "max_pattern_length");
5315 if (FLD(pat_context, parens_nest_limit) != PARENS_NEST_DEFAULT)
5316 prmsg(&msg, "parens_nest_limit");
5317
5318 if (msg[0] == 0) fprintf(outfile, "\n");
5319
5320 /* Translate PCRE2 options to POSIX options and then compile. */
5321
5322 if (utf) cflags |= REG_UTF;
5323 if ((pat_patctl.control & CTL_POSIX_NOSUB) != 0) cflags |= REG_NOSUB;
5324 if ((pat_patctl.options & PCRE2_UCP) != 0) cflags |= REG_UCP;
5325 if ((pat_patctl.options & PCRE2_CASELESS) != 0) cflags |= REG_ICASE;
5326 if ((pat_patctl.options & PCRE2_LITERAL) != 0) cflags |= REG_NOSPEC;
5327 if ((pat_patctl.options & PCRE2_MULTILINE) != 0) cflags |= REG_NEWLINE;
5328 if ((pat_patctl.options & PCRE2_DOTALL) != 0) cflags |= REG_DOTALL;
5329 if ((pat_patctl.options & PCRE2_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
5330
5331 if ((pat_patctl.control & (CTL_HEXPAT|CTL_USE_LENGTH)) != 0)
5332 {
5333 preg.re_endp = (char *)pbuffer8 + patlen;
5334 cflags |= REG_PEND;
5335 }
5336
5337 rc = regcomp(&preg, (char *)pbuffer8, cflags);
5338
5339 /* Compiling failed */
5340
5341 if (rc != 0)
5342 {
5343 size_t bsize, usize;
5344 int psize;
5345
5346 preg.re_pcre2_code = NULL; /* In case something was left in there */
5347 preg.re_match_data = NULL;
5348
5349 bsize = (pat_patctl.regerror_buffsize != 0)?
5350 pat_patctl.regerror_buffsize : pbuffer8_size;
5351 if (bsize + 8 < pbuffer8_size)
5352 memcpy(pbuffer8 + bsize, "DEADBEEF", 8);
5353 usize = regerror(rc, &preg, (char *)pbuffer8, bsize);
5354
5355 /* Inside regerror(), snprintf() is used. If the buffer is too small, some
5356 versions of snprintf() put a zero byte at the end, but others do not.
5357 Therefore, we print a maximum of one less than the size of the buffer. */
5358
5359 psize = (int)bsize - 1;
5360 fprintf(outfile, "Failed: POSIX code %d: %.*s\n", rc, psize, pbuffer8);
5361 if (usize > bsize)
5362 {
5363 fprintf(outfile, "** regerror() message truncated\n");
5364 if (memcmp(pbuffer8 + bsize, "DEADBEEF", 8) != 0)
5365 fprintf(outfile, "** regerror() buffer overflow\n");
5366 }
5367 return PR_SKIP;
5368 }
5369
5370 /* Compiling succeeded. Check that the values in the preg block are sensible.
5371 It can happen that pcre2test is accidentally linked with a different POSIX
5372 library which succeeds, but of course puts different things into preg. In
5373 this situation, calling regfree() may cause a segfault (or invalid free() in
5374 valgrind), so ensure that preg.re_pcre2_code is NULL, which suppresses the
5375 calling of regfree() on exit. */
5376
5377 if (preg.re_pcre2_code == NULL ||
5378 ((pcre2_real_code_8 *)preg.re_pcre2_code)->magic_number != MAGIC_NUMBER ||
5379 ((pcre2_real_code_8 *)preg.re_pcre2_code)->top_bracket != preg.re_nsub ||
5380 preg.re_match_data == NULL ||
5381 preg.re_cflags != cflags)
5382 {
5383 fprintf(outfile,
5384 "** The regcomp() function returned zero (success), but the values set\n"
5385 "** in the preg block are not valid for PCRE2. Check that pcre2test is\n"
5386 "** linked with PCRE2's pcre2posix module (-lpcre2-posix) and not with\n"
5387 "** some other POSIX regex library.\n**\n");
5388 preg.re_pcre2_code = NULL;
5389 return PR_ABEND;
5390 }
5391
5392 return PR_OK;
5393 #endif /* SUPPORT_PCRE2_8 */
5394 }
5395
5396 /* Handle compiling via the native interface. Controls that act later are
5397 ignored with "push". Replacements are locked out. */
5398
5399 if ((pat_patctl.control & (CTL_PUSH|CTL_PUSHCOPY|CTL_PUSHTABLESCOPY)) != 0)
5400 {
5401 if (pat_patctl.replacement[0] != 0)
5402 {
5403 fprintf(outfile, "** Replacement text is not supported with 'push'.\n");
5404 return PR_OK;
5405 }
5406 if ((pat_patctl.control & ~PUSH_SUPPORTED_COMPILE_CONTROLS) != 0 ||
5407 (pat_patctl.control2 & ~PUSH_SUPPORTED_COMPILE_CONTROLS2) != 0)
5408 {
5409 show_controls(pat_patctl.control & ~PUSH_SUPPORTED_COMPILE_CONTROLS,
5410 pat_patctl.control2 & ~PUSH_SUPPORTED_COMPILE_CONTROLS2,
5411 "** Ignored when compiled pattern is stacked with 'push':");
5412 fprintf(outfile, "\n");
5413 }
5414 if ((pat_patctl.control & PUSH_COMPILE_ONLY_CONTROLS) != 0 ||
5415 (pat_patctl.control2 & PUSH_COMPILE_ONLY_CONTROLS2) != 0)
5416 {
5417 show_controls(pat_patctl.control & PUSH_COMPILE_ONLY_CONTROLS,
5418 pat_patctl.control2 & PUSH_COMPILE_ONLY_CONTROLS2,
5419 "** Applies only to compile when pattern is stacked with 'push':");
5420 fprintf(outfile, "\n");
5421 }
5422 }
5423
5424 /* Convert the input in non-8-bit modes. */
5425
5426 errorcode = 0;
5427
5428 #ifdef SUPPORT_PCRE2_16
5429 if (test_mode == PCRE16_MODE) errorcode = to16(pbuffer8, utf, &patlen);
5430 #endif
5431
5432 #ifdef SUPPORT_PCRE2_32
5433 if (test_mode == PCRE32_MODE) errorcode = to32(pbuffer8, utf, &patlen);
5434 #endif
5435
5436 switch(errorcode)
5437 {
5438 case -1:
5439 fprintf(outfile, "** Failed: invalid UTF-8 string cannot be "
5440 "converted to %d-bit string\n", (test_mode == PCRE16_MODE)? 16:32);
5441 return PR_SKIP;
5442
5443 case -2:
5444 fprintf(outfile, "** Failed: character value greater than 0x10ffff "
5445 "cannot be converted to UTF\n");
5446 return PR_SKIP;
5447
5448 case -3:
5449 fprintf(outfile, "** Failed: character value greater than 0xffff "
5450 "cannot be converted to 16-bit in non-UTF mode\n");
5451 return PR_SKIP;
5452
5453 default:
5454 break;
5455 }
5456
5457 /* The pattern is now in pbuffer[8|16|32], with the length in code units in
5458 patlen. If it is to be converted, copy the result back afterwards so that it
5459 ends up back in the usual place. */
5460
5461 if (pat_patctl.convert_type != CONVERT_UNSET)
5462 {
5463 int rc;
5464 int convert_return = PR_OK;
5465 uint32_t convert_options = pat_patctl.convert_type;
5466 void *converted_pattern;
5467 PCRE2_SIZE converted_length;
5468
5469 if (pat_patctl.convert_length != 0)
5470 {
5471 converted_length = pat_patctl.convert_length;
5472 converted_pattern = malloc(converted_length * code_unit_size);
5473 if (converted_pattern == NULL)
5474 {
5475 fprintf(outfile, "** Failed: malloc failed for converted pattern\n");
5476 return PR_SKIP;
5477 }
5478 }
5479 else converted_pattern = NULL; /* Let the library allocate */
5480
5481 if (utf) convert_options |= PCRE2_CONVERT_UTF;
5482 if ((pat_patctl.options & PCRE2_NO_UTF_CHECK) != 0)
5483 convert_options |= PCRE2_CONVERT_NO_UTF_CHECK;
5484
5485 CONCTXCPY(con_context, default_con_context);
5486
5487 if (pat_patctl.convert_glob_escape != 0)
5488 {
5489 uint32_t escape = (pat_patctl.convert_glob_escape == '0')? 0 :
5490 pat_patctl.convert_glob_escape;
5491 PCRE2_SET_GLOB_ESCAPE(rc, con_context, escape);
5492 if (rc != 0)
5493 {
5494 fprintf(outfile, "** Invalid glob escape '%c'\n",
5495 pat_patctl.convert_glob_escape);
5496 convert_return = PR_SKIP;
5497 goto CONVERT_FINISH;
5498 }
5499 }
5500
5501 if (pat_patctl.convert_glob_separator != 0)
5502 {
5503 PCRE2_SET_GLOB_SEPARATOR(rc, con_context, pat_patctl.convert_glob_separator);
5504 if (rc != 0)
5505 {
5506 fprintf(outfile, "** Invalid glob separator '%c'\n",
5507 pat_patctl.convert_glob_separator);
5508 convert_return = PR_SKIP;
5509 goto CONVERT_FINISH;
5510 }
5511 }
5512
5513 PCRE2_PATTERN_CONVERT(rc, pbuffer, patlen, convert_options,
5514 &converted_pattern, &converted_length, con_context);
5515
5516 if (rc != 0)
5517 {
5518 fprintf(outfile, "** Pattern conversion error at offset %" SIZ_FORM ": ",
5519 SIZ_CAST converted_length);
5520 convert_return = print_error_message(rc, "", "\n")? PR_SKIP:PR_ABEND;
5521 }
5522
5523 /* Output the converted pattern, then copy it. */
5524
5525 else
5526 {
5527 PCHARSV(converted_pattern, 0, converted_length, utf, outfile);
5528 fprintf(outfile, "\n");
5529 patlen = converted_length;
5530 CONVERT_COPY(pbuffer, converted_pattern, converted_length + 1);
5531 }
5532
5533 /* Free the converted pattern. */
5534
5535 CONVERT_FINISH:
5536 if (pat_patctl.convert_length != 0)
5537 free(converted_pattern);
5538 else
5539 PCRE2_CONVERTED_PATTERN_FREE(converted_pattern);
5540
5541 /* Return if conversion was unsuccessful. */
5542
5543 if (convert_return != PR_OK) return convert_return;
5544 }
5545
5546 /* By default we pass a zero-terminated pattern, but a length is passed if
5547 "use_length" was specified or this is a hex pattern (which might contain binary
5548 zeros). When valgrind is supported, arrange for the unused part of the buffer
5549 to be marked as no access. */
5550
5551 valgrind_access_length = patlen;
5552 if ((pat_patctl.control & (CTL_HEXPAT|CTL_USE_LENGTH)) == 0)
5553 {
5554 patlen = PCRE2_ZERO_TERMINATED;
5555 valgrind_access_length += 1; /* For the terminating zero */
5556 }
5557
5558 #ifdef SUPPORT_VALGRIND
5559 #ifdef SUPPORT_PCRE2_8
5560 if (test_mode == PCRE8_MODE && pbuffer8 != NULL)
5561 {
5562 VALGRIND_MAKE_MEM_NOACCESS(pbuffer8 + valgrind_access_length,
5563 pbuffer8_size - valgrind_access_length);
5564 }
5565 #endif
5566 #ifdef SUPPORT_PCRE2_16
5567 if (test_mode == PCRE16_MODE && pbuffer16 != NULL)
5568 {
5569 VALGRIND_MAKE_MEM_NOACCESS(pbuffer16 + valgrind_access_length,
5570 pbuffer16_size - valgrind_access_length*sizeof(uint16_t));
5571 }
5572 #endif
5573 #ifdef SUPPORT_PCRE2_32
5574 if (test_mode == PCRE32_MODE && pbuffer32 != NULL)
5575 {
5576 VALGRIND_MAKE_MEM_NOACCESS(pbuffer32 + valgrind_access_length,
5577 pbuffer32_size - valgrind_access_length*sizeof(uint32_t));
5578 }
5579 #endif
5580 #else /* Valgrind not supported */
5581 (void)valgrind_access_length; /* Avoid compiler warning */
5582 #endif
5583
5584 /* If #newline_default has been used and the library was not compiled with an
5585 appropriate default newline setting, local_newline_default will be non-zero. We
5586 use this if there is no explicit newline modifier. */
5587
5588 if ((pat_patctl.control2 & CTL2_NL_SET) == 0 && local_newline_default != 0)
5589 {
5590 SETFLD(pat_context, newline_convention, local_newline_default);
5591 }
5592
5593 /* The null_context modifier is used to test calling pcre2_compile() with a
5594 NULL context. */
5595
5596 use_pat_context = ((pat_patctl.control & CTL_NULLCONTEXT) != 0)?
5597 NULL : PTR(pat_context);
5598
5599 /* If PCRE2_LITERAL is set, set use_forbid_utf zero because PCRE2_NEVER_UTF
5600 and PCRE2_NEVER_UCP are invalid with it. */
5601
5602 if ((pat_patctl.options & PCRE2_LITERAL) != 0) use_forbid_utf = 0;
5603
5604 /* Compile many times when timing. */
5605
5606 if (timeit > 0)
5607 {
5608 int i;
5609 clock_t time_taken = 0;
5610 for (i = 0; i < timeit; i++)
5611 {
5612 clock_t start_time = clock();
5613 PCRE2_COMPILE(compiled_code, pbuffer, patlen,
5614 pat_patctl.options|use_forbid_utf, &errorcode, &erroroffset,
5615 use_pat_context);
5616 time_taken += clock() - start_time;
5617 if (TEST(compiled_code, !=, NULL))
5618 { SUB1(pcre2_code_free, compiled_code); }
5619 }
5620 total_compile_time += time_taken;
5621 fprintf(outfile, "Compile time %.4f milliseconds\n",
5622 (((double)time_taken * 1000.0) / (double)timeit) /
5623 (double)CLOCKS_PER_SEC);
5624 }
5625
5626 /* A final compile that is used "for real". */
5627
5628 PCRE2_COMPILE(compiled_code, pbuffer, patlen, pat_patctl.options|use_forbid_utf,
5629 &errorcode, &erroroffset, use_pat_context);
5630
5631 /* Call the JIT compiler if requested. When timing, we must free and recompile
5632 the pattern each time because that is the only way to free the JIT compiled
5633 code. We know that compilation will always succeed. */
5634
5635 if (TEST(compiled_code, !=, NULL) && pat_patctl.jit != 0)
5636 {
5637 if (timeit > 0)
5638 {
5639 int i;
5640 clock_t time_taken = 0;
5641 for (i = 0; i < timeit; i++)
5642 {
5643 clock_t start_time;
5644 SUB1(pcre2_code_free, compiled_code);
5645 PCRE2_COMPILE(compiled_code, pbuffer, patlen,
5646 pat_patctl.options|use_forbid_utf, &errorcode, &erroroffset,
5647 use_pat_context);
5648 start_time = clock();
5649 PCRE2_JIT_COMPILE(jitrc,compiled_code, pat_patctl.jit);
5650 time_taken += clock() - start_time;
5651 }
5652 total_jit_compile_time += time_taken;
5653 fprintf(outfile, "JIT compile %.4f milliseconds\n",
5654 (((double)time_taken * 1000.0) / (double)timeit) /
5655 (double)CLOCKS_PER_SEC);
5656 }
5657 else
5658 {
5659 PCRE2_JIT_COMPILE(jitrc, compiled_code, pat_patctl.jit);
5660 }
5661 }
5662
5663 /* If valgrind is supported, mark the pbuffer as accessible again. The 16-bit
5664 and 32-bit buffers can be marked completely undefined, but we must leave the
5665 pattern in the 8-bit buffer defined because it may be read from a callout
5666 during matching. */
5667
5668 #ifdef SUPPORT_VALGRIND
5669 #ifdef SUPPORT_PCRE2_8
5670 if (test_mode == PCRE8_MODE)
5671 {
5672 VALGRIND_MAKE_MEM_UNDEFINED(pbuffer8 + valgrind_access_length,
5673 pbuffer8_size - valgrind_access_length);
5674 }
5675 #endif
5676 #ifdef SUPPORT_PCRE2_16
5677 if (test_mode == PCRE16_MODE)
5678 {
5679 VALGRIND_MAKE_MEM_UNDEFINED(pbuffer16, pbuffer16_size);
5680 }
5681 #endif
5682 #ifdef SUPPORT_PCRE2_32
5683 if (test_mode == PCRE32_MODE)
5684 {
5685 VALGRIND_MAKE_MEM_UNDEFINED(pbuffer32, pbuffer32_size);
5686 }
5687 #endif
5688 #endif
5689
5690 /* Compilation failed; go back for another re, skipping to blank line
5691 if non-interactive. */
5692
5693 if (TEST(compiled_code, ==, NULL))
5694 {
5695 fprintf(outfile, "Failed: error %d at offset %d: ", errorcode,
5696 (int)erroroffset);
5697 if (!print_error_message(errorcode, "", "\n")) return PR_ABEND;
5698 return PR_SKIP;
5699 }
5700
5701 /* If forbid_utf is non-zero, we are running a non-UTF test. UTF and UCP are
5702 locked out at compile time, but we must also check for occurrences of \P, \p,
5703 and \X, which are only supported when Unicode is supported. */
5704
5705 if (forbid_utf != 0)
5706 {
5707 if ((FLD(compiled_code, flags) & PCRE2_HASBKPORX) != 0)
5708 {
5709 fprintf(outfile, "** \\P, \\p, and \\X are not allowed after the "
5710 "#forbid_utf command\n");
5711 return PR_SKIP;
5712 }
5713 }
5714
5715 /* Remember the maximum lookbehind, for partial matching. */
5716
5717 if (pattern_info(PCRE2_INFO_MAXLOOKBEHIND, &maxlookbehind, FALSE) != 0)
5718 return PR_ABEND;
5719
5720 /* If an explicit newline modifier was given, set the information flag in the
5721 pattern so that it is preserved over push/pop. */
5722
5723 if ((pat_patctl.control2 & CTL2_NL_SET) != 0)
5724 {
5725 SETFLD(compiled_code, flags, FLD(compiled_code, flags) | PCRE2_NL_SET);
5726 }
5727
5728 /* Output code size and other information if requested. */
5729
5730 if ((pat_patctl.control & CTL_MEMORY) != 0) show_memory_info();
5731 if ((pat_patctl.control & CTL_FRAMESIZE) != 0) show_framesize();
5732 if ((pat_patctl.control & CTL_ANYINFO) != 0)
5733 {
5734 int rc = show_pattern_info();
5735 if (rc != PR_OK) return rc;
5736 }
5737
5738 /* The "push" control requests that the compiled pattern be remembered on a
5739 stack. This is mainly for testing the serialization functionality. */
5740
5741 if ((pat_patctl.control & CTL_PUSH) != 0)
5742 {
5743 if (patstacknext >= PATSTACKSIZE)
5744 {
5745 fprintf(outfile, "** Too many pushed patterns (max %d)\n", PATSTACKSIZE);
5746 return PR_ABEND;
5747 }
5748 patstack[patstacknext++] = PTR(compiled_code);
5749 SET(compiled_code, NULL);
5750 }
5751
5752 /* The "pushcopy" and "pushtablescopy" controls are similar, but push a
5753 copy of the pattern, the latter with a copy of its character tables. This tests
5754 the pcre2_code_copy() and pcre2_code_copy_with_tables() functions. */
5755
5756 if ((pat_patctl.control & (CTL_PUSHCOPY|CTL_PUSHTABLESCOPY)) != 0)
5757 {
5758 if (patstacknext >= PATSTACKSIZE)
5759 {
5760 fprintf(outfile, "** Too many pushed patterns (max %d)\n", PATSTACKSIZE);
5761 return PR_ABEND;
5762 }
5763 if ((pat_patctl.control & CTL_PUSHCOPY) != 0)
5764 {
5765 PCRE2_CODE_COPY_TO_VOID(patstack[patstacknext++], compiled_code);
5766 }
5767 else
5768 {
5769 PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(patstack[patstacknext++],
5770 compiled_code); }
5771 }
5772
5773 return PR_OK;
5774 }
5775
5776
5777
5778 /*************************************************
5779 * Check heap, match or depth limit *
5780 *************************************************/
5781
5782 /* This is used for DFA, normal, and JIT fast matching. For DFA matching it
5783 should only be called with the third argument set to PCRE2_ERROR_DEPTHLIMIT.
5784
5785 Arguments:
5786 pp the subject string
5787 ulen length of subject or PCRE2_ZERO_TERMINATED
5788 errnumber defines which limit to test
5789 msg string to include in final message
5790
5791 Returns: the return from the final match function call
5792 */
5793
5794 static int
check_match_limit(uint8_t * pp,PCRE2_SIZE ulen,int errnumber,const char * msg)5795 check_match_limit(uint8_t *pp, PCRE2_SIZE ulen, int errnumber, const char *msg)
5796 {
5797 int capcount;
5798 uint32_t min = 0;
5799 uint32_t mid = 64;
5800 uint32_t max = UINT32_MAX;
5801
5802 PCRE2_SET_MATCH_LIMIT(dat_context, max);
5803 PCRE2_SET_DEPTH_LIMIT(dat_context, max);
5804 PCRE2_SET_HEAP_LIMIT(dat_context, max);
5805
5806 for (;;)
5807 {
5808 uint32_t stack_start = 0;
5809
5810 if (errnumber == PCRE2_ERROR_HEAPLIMIT)
5811 {
5812 PCRE2_SET_HEAP_LIMIT(dat_context, mid);
5813 }
5814 else if (errnumber == PCRE2_ERROR_MATCHLIMIT)
5815 {
5816 PCRE2_SET_MATCH_LIMIT(dat_context, mid);
5817 }
5818 else
5819 {
5820 PCRE2_SET_DEPTH_LIMIT(dat_context, mid);
5821 }
5822
5823 if ((dat_datctl.control & CTL_DFA) != 0)
5824 {
5825 stack_start = DFA_START_RWS_SIZE/1024;
5826 if (dfa_workspace == NULL)
5827 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
5828 if (dfa_matched++ == 0)
5829 dfa_workspace[0] = -1; /* To catch bad restart */
5830 PCRE2_DFA_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
5831 dat_datctl.options, match_data,
5832 PTR(dat_context), dfa_workspace, DFA_WS_DIMENSION);
5833 }
5834
5835 else if ((pat_patctl.control & CTL_JITFAST) != 0)
5836 PCRE2_JIT_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
5837 dat_datctl.options, match_data, PTR(dat_context));
5838
5839 else
5840 {
5841 stack_start = START_FRAMES_SIZE/1024;
5842 PCRE2_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
5843 dat_datctl.options, match_data, PTR(dat_context));
5844 }
5845
5846 if (capcount == errnumber)
5847 {
5848 if ((mid & 0x80000000u) != 0)
5849 {
5850 fprintf(outfile, "Can't find minimum %s limit: check pattern for "
5851 "restriction\n", msg);
5852 break;
5853 }
5854
5855 min = mid;
5856 mid = (mid == max - 1)? max : (max != UINT32_MAX)? (min + max)/2 : mid*2;
5857 }
5858 else if (capcount >= 0 ||
5859 capcount == PCRE2_ERROR_NOMATCH ||
5860 capcount == PCRE2_ERROR_PARTIAL)
5861 {
5862 /* If we've not hit the error with a heap limit less than the size of the
5863 initial stack frame vector (for pcre2_match()) or the initial stack
5864 workspace vector (for pcre2_dfa_match()), the heap is not being used, so
5865 the minimum limit is zero; there's no need to go on. The other limits are
5866 always greater than zero. */
5867
5868 if (errnumber == PCRE2_ERROR_HEAPLIMIT && mid < stack_start)
5869 {
5870 fprintf(outfile, "Minimum %s limit = 0\n", msg);
5871 break;
5872 }
5873 if (mid == min + 1)
5874 {
5875 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
5876 break;
5877 }
5878 max = mid;
5879 mid = (min + max)/2;
5880 }
5881 else break; /* Some other error */
5882 }
5883
5884 return capcount;
5885 }
5886
5887
5888
5889 /*************************************************
5890 * Callout function *
5891 *************************************************/
5892
5893 /* Called from a PCRE2 library as a result of the (?C) item. We print out where
5894 we are in the match (unless suppressed). Yield zero unless more callouts than
5895 the fail count, or the callout data is not zero. The only differences in the
5896 callout block for different code unit widths are that the pointers to the
5897 subject, the most recent MARK, and a callout argument string point to strings
5898 of the appropriate width. Casts can be used to deal with this.
5899
5900 Argument: a pointer to a callout block
5901 Return:
5902 */
5903
5904 static int
callout_function(pcre2_callout_block_8 * cb,void * callout_data_ptr)5905 callout_function(pcre2_callout_block_8 *cb, void *callout_data_ptr)
5906 {
5907 FILE *f, *fdefault;
5908 uint32_t i, pre_start, post_start, subject_length;
5909 PCRE2_SIZE current_position;
5910 BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
5911 BOOL callout_capture = (dat_datctl.control & CTL_CALLOUT_CAPTURE) != 0;
5912 BOOL callout_where = (dat_datctl.control2 & CTL2_CALLOUT_NO_WHERE) == 0;
5913
5914 /* The FILE f is used for echoing the subject string if it is non-NULL. This
5915 happens only once in simple cases, but we want to repeat after any additional
5916 output caused by CALLOUT_EXTRA. */
5917
5918 fdefault = (!first_callout && !callout_capture && cb->callout_string == NULL)?
5919 NULL : outfile;
5920
5921 if ((dat_datctl.control2 & CTL2_CALLOUT_EXTRA) != 0)
5922 {
5923 f = outfile;
5924 switch (cb->callout_flags)
5925 {
5926 case PCRE2_CALLOUT_BACKTRACK:
5927 fprintf(f, "Backtrack\n");
5928 break;
5929
5930 case PCRE2_CALLOUT_STARTMATCH|PCRE2_CALLOUT_BACKTRACK:
5931 fprintf(f, "Backtrack\nNo other matching paths\n");
5932 /* Fall through */
5933
5934 case PCRE2_CALLOUT_STARTMATCH:
5935 fprintf(f, "New match attempt\n");
5936 break;
5937
5938 default:
5939 f = fdefault;
5940 break;
5941 }
5942 }
5943 else f = fdefault;
5944
5945 /* For a callout with a string argument, show the string first because there
5946 isn't a tidy way to fit it in the rest of the data. */
5947
5948 if (cb->callout_string != NULL)
5949 {
5950 uint32_t delimiter = CODE_UNIT(cb->callout_string, -1);
5951 fprintf(outfile, "Callout (%" SIZ_FORM "): %c",
5952 SIZ_CAST cb->callout_string_offset, delimiter);
5953 PCHARSV(cb->callout_string, 0,
5954 cb->callout_string_length, utf, outfile);
5955 for (i = 0; callout_start_delims[i] != 0; i++)
5956 if (delimiter == callout_start_delims[i])
5957 {
5958 delimiter = callout_end_delims[i];
5959 break;
5960 }
5961 fprintf(outfile, "%c", delimiter);
5962 if (!callout_capture) fprintf(outfile, "\n");
5963 }
5964
5965 /* Show captured strings if required */
5966
5967 if (callout_capture)
5968 {
5969 if (cb->callout_string == NULL)
5970 fprintf(outfile, "Callout %d:", cb->callout_number);
5971 fprintf(outfile, " last capture = %d\n", cb->capture_last);
5972 for (i = 2; i < cb->capture_top * 2; i += 2)
5973 {
5974 fprintf(outfile, "%2d: ", i/2);
5975 if (cb->offset_vector[i] == PCRE2_UNSET)
5976 fprintf(outfile, "<unset>");
5977 else
5978 {
5979 PCHARSV(cb->subject, cb->offset_vector[i],
5980 cb->offset_vector[i+1] - cb->offset_vector[i], utf, f);
5981 }
5982 fprintf(outfile, "\n");
5983 }
5984 }
5985
5986 /* Unless suppressed, re-print the subject in canonical form (with escapes for
5987 non-printing characters), the first time, or if giving full details. On
5988 subsequent calls in the same match, we use PCHARS() just to find the printed
5989 lengths of the substrings. */
5990
5991 if (callout_where)
5992 {
5993 if (f != NULL) fprintf(f, "--->");
5994
5995 /* The subject before the match start. */
5996
5997 PCHARS(pre_start, cb->subject, 0, cb->start_match, utf, f);
5998
5999 /* If a lookbehind is involved, the current position may be earlier than the
6000 match start. If so, use the match start instead. */
6001
6002 current_position = (cb->current_position >= cb->start_match)?
6003 cb->current_position : cb->start_match;
6004
6005 /* The subject between the match start and the current position. */
6006
6007 PCHARS(post_start, cb->subject, cb->start_match,
6008 current_position - cb->start_match, utf, f);
6009
6010 /* Print from the current position to the end. */
6011
6012 PCHARSV(cb->subject, current_position, cb->subject_length - current_position,
6013 utf, f);
6014
6015 /* Calculate the total subject printed length (no print). */
6016
6017 PCHARS(subject_length, cb->subject, 0, cb->subject_length, utf, NULL);
6018
6019 if (f != NULL) fprintf(f, "\n");
6020
6021 /* For automatic callouts, show the pattern offset. Otherwise, for a
6022 numerical callout whose number has not already been shown with captured
6023 strings, show the number here. A callout with a string argument has been
6024 displayed above. */
6025
6026 if (cb->callout_number == 255)
6027 {
6028 fprintf(outfile, "%+3d ", (int)cb->pattern_position);
6029 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
6030 }
6031 else
6032 {
6033 if (callout_capture || cb->callout_string != NULL) fprintf(outfile, " ");
6034 else fprintf(outfile, "%3d ", cb->callout_number);
6035 }
6036
6037 /* Now show position indicators */
6038
6039 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
6040 fprintf(outfile, "^");
6041
6042 if (post_start > 0)
6043 {
6044 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
6045 fprintf(outfile, "^");
6046 }
6047
6048 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
6049 fprintf(outfile, " ");
6050
6051 if (cb->next_item_length != 0)
6052 fprintf(outfile, "%.*s", (int)(cb->next_item_length),
6053 pbuffer8 + cb->pattern_position);
6054 else
6055 fprintf(outfile, "End of pattern");
6056
6057 fprintf(outfile, "\n");
6058 }
6059
6060 first_callout = FALSE;
6061
6062 /* Show any mark info */
6063
6064 if (cb->mark != last_callout_mark)
6065 {
6066 if (cb->mark == NULL)
6067 fprintf(outfile, "Latest Mark: <unset>\n");
6068 else
6069 {
6070 fprintf(outfile, "Latest Mark: ");
6071 PCHARSV(cb->mark, 0, -1, utf, outfile);
6072 putc('\n', outfile);
6073 }
6074 last_callout_mark = cb->mark;
6075 }
6076
6077 /* Show callout data */
6078
6079 if (callout_data_ptr != NULL)
6080 {
6081 int callout_data = *((int32_t *)callout_data_ptr);
6082 if (callout_data != 0)
6083 {
6084 fprintf(outfile, "Callout data = %d\n", callout_data);
6085 return callout_data;
6086 }
6087 }
6088
6089 /* Keep count and give the appropriate return code */
6090
6091 callout_count++;
6092
6093 if (cb->callout_number == dat_datctl.cerror[0] &&
6094 callout_count >= dat_datctl.cerror[1])
6095 return PCRE2_ERROR_CALLOUT;
6096
6097 if (cb->callout_number == dat_datctl.cfail[0] &&
6098 callout_count >= dat_datctl.cfail[1])
6099 return 1;
6100
6101 return 0;
6102 }
6103
6104
6105
6106 /*************************************************
6107 * Handle *MARK and copy/get tests *
6108 *************************************************/
6109
6110 /* This function is called after complete and partial matches. It runs the
6111 tests for substring extraction.
6112
6113 Arguments:
6114 utf TRUE for utf
6115 capcount return from pcre2_match()
6116
6117 Returns: FALSE if print_error_message() fails
6118 */
6119
6120 static BOOL
copy_and_get(BOOL utf,int capcount)6121 copy_and_get(BOOL utf, int capcount)
6122 {
6123 int i;
6124 uint8_t *nptr;
6125
6126 /* Test copy strings by number */
6127
6128 for (i = 0; i < MAXCPYGET && dat_datctl.copy_numbers[i] >= 0; i++)
6129 {
6130 int rc;
6131 PCRE2_SIZE length, length2;
6132 uint32_t copybuffer[256];
6133 uint32_t n = (uint32_t)(dat_datctl.copy_numbers[i]);
6134 length = sizeof(copybuffer)/code_unit_size;
6135 PCRE2_SUBSTRING_COPY_BYNUMBER(rc, match_data, n, copybuffer, &length);
6136 if (rc < 0)
6137 {
6138 fprintf(outfile, "Copy substring %d failed (%d): ", n, rc);
6139 if (!print_error_message(rc, "", "\n")) return FALSE;
6140 }
6141 else
6142 {
6143 PCRE2_SUBSTRING_LENGTH_BYNUMBER(rc, match_data, n, &length2);
6144 if (rc < 0)
6145 {
6146 fprintf(outfile, "Get substring %d length failed (%d): ", n, rc);
6147 if (!print_error_message(rc, "", "\n")) return FALSE;
6148 }
6149 else if (length2 != length)
6150 {
6151 fprintf(outfile, "Mismatched substring lengths: %"
6152 SIZ_FORM " %" SIZ_FORM "\n", SIZ_CAST length, SIZ_CAST length2);
6153 }
6154 fprintf(outfile, "%2dC ", n);
6155 PCHARSV(copybuffer, 0, length, utf, outfile);
6156 fprintf(outfile, " (%" SIZ_FORM ")\n", SIZ_CAST length);
6157 }
6158 }
6159
6160 /* Test copy strings by name */
6161
6162 nptr = dat_datctl.copy_names;
6163 for (;;)
6164 {
6165 int rc;
6166 int groupnumber;
6167 PCRE2_SIZE length, length2;
6168 uint32_t copybuffer[256];
6169 int namelen = strlen((const char *)nptr);
6170 #if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32
6171 PCRE2_SIZE cnl = namelen;
6172 #endif
6173 if (namelen == 0) break;
6174
6175 #ifdef SUPPORT_PCRE2_8
6176 if (test_mode == PCRE8_MODE) strcpy((char *)pbuffer8, (char *)nptr);
6177 #endif
6178 #ifdef SUPPORT_PCRE2_16
6179 if (test_mode == PCRE16_MODE)(void)to16(nptr, utf, &cnl);
6180 #endif
6181 #ifdef SUPPORT_PCRE2_32
6182 if (test_mode == PCRE32_MODE)(void)to32(nptr, utf, &cnl);
6183 #endif
6184
6185 PCRE2_SUBSTRING_NUMBER_FROM_NAME(groupnumber, compiled_code, pbuffer);
6186 if (groupnumber < 0 && groupnumber != PCRE2_ERROR_NOUNIQUESUBSTRING)
6187 fprintf(outfile, "Number not found for group '%s'\n", nptr);
6188
6189 length = sizeof(copybuffer)/code_unit_size;
6190 PCRE2_SUBSTRING_COPY_BYNAME(rc, match_data, pbuffer, copybuffer, &length);
6191 if (rc < 0)
6192 {
6193 fprintf(outfile, "Copy substring '%s' failed (%d): ", nptr, rc);
6194 if (!print_error_message(rc, "", "\n")) return FALSE;
6195 }
6196 else
6197 {
6198 PCRE2_SUBSTRING_LENGTH_BYNAME(rc, match_data, pbuffer, &length2);
6199 if (rc < 0)
6200 {
6201 fprintf(outfile, "Get substring '%s' length failed (%d): ", nptr, rc);
6202 if (!print_error_message(rc, "", "\n")) return FALSE;
6203 }
6204 else if (length2 != length)
6205 {
6206 fprintf(outfile, "Mismatched substring lengths: %"
6207 SIZ_FORM " %" SIZ_FORM "\n", SIZ_CAST length, SIZ_CAST length2);
6208 }
6209 fprintf(outfile, " C ");
6210 PCHARSV(copybuffer, 0, length, utf, outfile);
6211 fprintf(outfile, " (%" SIZ_FORM ") %s", SIZ_CAST length, nptr);
6212 if (groupnumber >= 0) fprintf(outfile, " (group %d)\n", groupnumber);
6213 else fprintf(outfile, " (non-unique)\n");
6214 }
6215 nptr += namelen + 1;
6216 }
6217
6218 /* Test get strings by number */
6219
6220 for (i = 0; i < MAXCPYGET && dat_datctl.get_numbers[i] >= 0; i++)
6221 {
6222 int rc;
6223 PCRE2_SIZE length;
6224 void *gotbuffer;
6225 uint32_t n = (uint32_t)(dat_datctl.get_numbers[i]);
6226 PCRE2_SUBSTRING_GET_BYNUMBER(rc, match_data, n, &gotbuffer, &length);
6227 if (rc < 0)
6228 {
6229 fprintf(outfile, "Get substring %d failed (%d): ", n, rc);
6230 if (!print_error_message(rc, "", "\n")) return FALSE;
6231 }
6232 else
6233 {
6234 fprintf(outfile, "%2dG ", n);
6235 PCHARSV(gotbuffer, 0, length, utf, outfile);
6236 fprintf(outfile, " (%" SIZ_FORM ")\n", SIZ_CAST length);
6237 PCRE2_SUBSTRING_FREE(gotbuffer);
6238 }
6239 }
6240
6241 /* Test get strings by name */
6242
6243 nptr = dat_datctl.get_names;
6244 for (;;)
6245 {
6246 PCRE2_SIZE length;
6247 void *gotbuffer;
6248 int rc;
6249 int groupnumber;
6250 int namelen = strlen((const char *)nptr);
6251 #if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32
6252 PCRE2_SIZE cnl = namelen;
6253 #endif
6254 if (namelen == 0) break;
6255
6256 #ifdef SUPPORT_PCRE2_8
6257 if (test_mode == PCRE8_MODE) strcpy((char *)pbuffer8, (char *)nptr);
6258 #endif
6259 #ifdef SUPPORT_PCRE2_16
6260 if (test_mode == PCRE16_MODE)(void)to16(nptr, utf, &cnl);
6261 #endif
6262 #ifdef SUPPORT_PCRE2_32
6263 if (test_mode == PCRE32_MODE)(void)to32(nptr, utf, &cnl);
6264 #endif
6265
6266 PCRE2_SUBSTRING_NUMBER_FROM_NAME(groupnumber, compiled_code, pbuffer);
6267 if (groupnumber < 0 && groupnumber != PCRE2_ERROR_NOUNIQUESUBSTRING)
6268 fprintf(outfile, "Number not found for group '%s'\n", nptr);
6269
6270 PCRE2_SUBSTRING_GET_BYNAME(rc, match_data, pbuffer, &gotbuffer, &length);
6271 if (rc < 0)
6272 {
6273 fprintf(outfile, "Get substring '%s' failed (%d): ", nptr, rc);
6274 if (!print_error_message(rc, "", "\n")) return FALSE;
6275 }
6276 else
6277 {
6278 fprintf(outfile, " G ");
6279 PCHARSV(gotbuffer, 0, length, utf, outfile);
6280 fprintf(outfile, " (%" SIZ_FORM ") %s", SIZ_CAST length, nptr);
6281 if (groupnumber >= 0) fprintf(outfile, " (group %d)\n", groupnumber);
6282 else fprintf(outfile, " (non-unique)\n");
6283 PCRE2_SUBSTRING_FREE(gotbuffer);
6284 }
6285 nptr += namelen + 1;
6286 }
6287
6288 /* Test getting the complete list of captured strings. */
6289
6290 if ((dat_datctl.control & CTL_GETALL) != 0)
6291 {
6292 int rc;
6293 void **stringlist;
6294 PCRE2_SIZE *lengths;
6295 PCRE2_SUBSTRING_LIST_GET(rc, match_data, &stringlist, &lengths);
6296 if (rc < 0)
6297 {
6298 fprintf(outfile, "get substring list failed (%d): ", rc);
6299 if (!print_error_message(rc, "", "\n")) return FALSE;
6300 }
6301 else
6302 {
6303 for (i = 0; i < capcount; i++)
6304 {
6305 fprintf(outfile, "%2dL ", i);
6306 PCHARSV(stringlist[i], 0, lengths[i], utf, outfile);
6307 putc('\n', outfile);
6308 }
6309 if (stringlist[i] != NULL)
6310 fprintf(outfile, "string list not terminated by NULL\n");
6311 PCRE2_SUBSTRING_LIST_FREE(stringlist);
6312 }
6313 }
6314
6315 return TRUE;
6316 }
6317
6318
6319
6320 /*************************************************
6321 * Process a data line *
6322 *************************************************/
6323
6324 /* The line is in buffer; it will not be empty.
6325
6326 Arguments: none
6327
6328 Returns: PR_OK continue processing next line
6329 PR_SKIP skip to a blank line
6330 PR_ABEND abort the pcre2test run
6331 */
6332
6333 static int
process_data(void)6334 process_data(void)
6335 {
6336 PCRE2_SIZE len, ulen, arg_ulen;
6337 uint32_t gmatched;
6338 uint32_t c, k;
6339 uint32_t g_notempty = 0;
6340 uint8_t *p, *pp, *start_rep;
6341 size_t needlen;
6342 void *use_dat_context;
6343 BOOL utf;
6344 BOOL subject_literal;
6345 PCRE2_SIZE ovecsave[3];
6346
6347 #ifdef SUPPORT_PCRE2_8
6348 uint8_t *q8 = NULL;
6349 #endif
6350 #ifdef SUPPORT_PCRE2_16
6351 uint16_t *q16 = NULL;
6352 #endif
6353 #ifdef SUPPORT_PCRE2_32
6354 uint32_t *q32 = NULL;
6355 #endif
6356
6357 subject_literal = (pat_patctl.control2 & CTL2_SUBJECT_LITERAL) != 0;
6358
6359 /* Copy the default context and data control blocks to the active ones. Then
6360 copy from the pattern the controls that can be set in either the pattern or the
6361 data. This allows them to be overridden in the data line. We do not do this for
6362 options because those that are common apply separately to compiling and
6363 matching. */
6364
6365 DATCTXCPY(dat_context, default_dat_context);
6366 memcpy(&dat_datctl, &def_datctl, sizeof(datctl));
6367 dat_datctl.control |= (pat_patctl.control & CTL_ALLPD);
6368 dat_datctl.control2 |= (pat_patctl.control2 & CTL2_ALLPD);
6369 strcpy((char *)dat_datctl.replacement, (char *)pat_patctl.replacement);
6370 if (dat_datctl.jitstack == 0) dat_datctl.jitstack = pat_patctl.jitstack;
6371
6372 /* Initialize for scanning the data line. */
6373
6374 #ifdef SUPPORT_PCRE2_8
6375 utf = ((((pat_patctl.control & CTL_POSIX) != 0)?
6376 ((pcre2_real_code_8 *)preg.re_pcre2_code)->overall_options :
6377 FLD(compiled_code, overall_options)) & PCRE2_UTF) != 0;
6378 #else
6379 utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
6380 #endif
6381
6382 start_rep = NULL;
6383 len = strlen((const char *)buffer);
6384 while (len > 0 && isspace(buffer[len-1])) len--;
6385 buffer[len] = 0;
6386 p = buffer;
6387 while (isspace(*p)) p++;
6388
6389 /* Check that the data is well-formed UTF-8 if we're in UTF mode. To create
6390 invalid input to pcre2_match(), you must use \x?? or \x{} sequences. */
6391
6392 if (utf)
6393 {
6394 uint8_t *q;
6395 uint32_t cc;
6396 int n = 1;
6397 for (q = p; n > 0 && *q; q += n) n = utf82ord(q, &cc);
6398 if (n <= 0)
6399 {
6400 fprintf(outfile, "** Failed: invalid UTF-8 string cannot be used as input "
6401 "in UTF mode\n");
6402 return PR_OK;
6403 }
6404 }
6405
6406 #ifdef SUPPORT_VALGRIND
6407 /* Mark the dbuffer as addressable but undefined again. */
6408 if (dbuffer != NULL)
6409 {
6410 VALGRIND_MAKE_MEM_UNDEFINED(dbuffer, dbuffer_size);
6411 }
6412 #endif
6413
6414 /* Allocate a buffer to hold the data line; len+1 is an upper bound on
6415 the number of code units that will be needed (though the buffer may have to be
6416 extended if replication is involved). */
6417
6418 needlen = (size_t)((len+1) * code_unit_size);
6419 if (dbuffer == NULL || needlen >= dbuffer_size)
6420 {
6421 while (needlen >= dbuffer_size) dbuffer_size *= 2;
6422 dbuffer = (uint8_t *)realloc(dbuffer, dbuffer_size);
6423 if (dbuffer == NULL)
6424 {
6425 fprintf(stderr, "pcre2test: realloc(%d) failed\n", (int)dbuffer_size);
6426 exit(1);
6427 }
6428 }
6429 SETCASTPTR(q, dbuffer); /* Sets q8, q16, or q32, as appropriate. */
6430
6431 /* Scan the data line, interpreting data escapes, and put the result into a
6432 buffer of the appropriate width. In UTF mode, input is always UTF-8; otherwise,
6433 in 16- and 32-bit modes, it can be forced to UTF-8 by the utf8_input modifier.
6434 */
6435
6436 while ((c = *p++) != 0)
6437 {
6438 int32_t i = 0;
6439 size_t replen;
6440
6441 /* ] may mark the end of a replicated sequence */
6442
6443 if (c == ']' && start_rep != NULL)
6444 {
6445 long li;
6446 char *endptr;
6447 size_t qoffset = CAST8VAR(q) - dbuffer;
6448 size_t rep_offset = start_rep - dbuffer;
6449
6450 if (*p++ != '{')
6451 {
6452 fprintf(outfile, "** Expected '{' after \\[....]\n");
6453 return PR_OK;
6454 }
6455
6456 li = strtol((const char *)p, &endptr, 10);
6457 if (S32OVERFLOW(li))
6458 {
6459 fprintf(outfile, "** Repeat count too large\n");
6460 return PR_OK;
6461 }
6462
6463 p = (uint8_t *)endptr;
6464 if (*p++ != '}')
6465 {
6466 fprintf(outfile, "** Expected '}' after \\[...]{...\n");
6467 return PR_OK;
6468 }
6469
6470 i = (int32_t)li;
6471 if (i-- == 0)
6472 {
6473 fprintf(outfile, "** Zero repeat not allowed\n");
6474 return PR_OK;
6475 }
6476
6477 replen = CAST8VAR(q) - start_rep;
6478 needlen += replen * i;
6479
6480 if (needlen >= dbuffer_size)
6481 {
6482 while (needlen >= dbuffer_size) dbuffer_size *= 2;
6483 dbuffer = (uint8_t *)realloc(dbuffer, dbuffer_size);
6484 if (dbuffer == NULL)
6485 {
6486 fprintf(stderr, "pcre2test: realloc(%d) failed\n", (int)dbuffer_size);
6487 exit(1);
6488 }
6489 SETCASTPTR(q, dbuffer + qoffset);
6490 start_rep = dbuffer + rep_offset;
6491 }
6492
6493 while (i-- > 0)
6494 {
6495 memcpy(CAST8VAR(q), start_rep, replen);
6496 SETPLUS(q, replen/code_unit_size);
6497 }
6498
6499 start_rep = NULL;
6500 continue;
6501 }
6502
6503 /* Handle a non-escaped character. In non-UTF 32-bit mode with utf8_input
6504 set, do the fudge for setting the top bit. */
6505
6506 if (c != '\\' || subject_literal)
6507 {
6508 uint32_t topbit = 0;
6509 if (test_mode == PCRE32_MODE && c == 0xff && *p != 0)
6510 {
6511 topbit = 0x80000000;
6512 c = *p++;
6513 }
6514 if ((utf || (pat_patctl.control & CTL_UTF8_INPUT) != 0) &&
6515 HASUTF8EXTRALEN(c)) { GETUTF8INC(c, p); }
6516 c |= topbit;
6517 }
6518
6519 /* Handle backslash escapes */
6520
6521 else switch ((c = *p++))
6522 {
6523 case '\\': break;
6524 case 'a': c = CHAR_BEL; break;
6525 case 'b': c = '\b'; break;
6526 case 'e': c = CHAR_ESC; break;
6527 case 'f': c = '\f'; break;
6528 case 'n': c = '\n'; break;
6529 case 'r': c = '\r'; break;
6530 case 't': c = '\t'; break;
6531 case 'v': c = '\v'; break;
6532
6533 case '0': case '1': case '2': case '3':
6534 case '4': case '5': case '6': case '7':
6535 c -= '0';
6536 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
6537 c = c * 8 + *p++ - '0';
6538 break;
6539
6540 case 'o':
6541 if (*p == '{')
6542 {
6543 uint8_t *pt = p;
6544 c = 0;
6545 for (pt++; isdigit(*pt) && *pt != '8' && *pt != '9'; pt++)
6546 {
6547 if (++i == 12)
6548 fprintf(outfile, "** Too many octal digits in \\o{...} item; "
6549 "using only the first twelve.\n");
6550 else c = c * 8 + *pt - '0';
6551 }
6552 if (*pt == '}') p = pt + 1;
6553 else fprintf(outfile, "** Missing } after \\o{ (assumed)\n");
6554 }
6555 break;
6556
6557 case 'x':
6558 if (*p == '{')
6559 {
6560 uint8_t *pt = p;
6561 c = 0;
6562
6563 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
6564 when isxdigit() is a macro that refers to its argument more than
6565 once. This is banned by the C Standard, but apparently happens in at
6566 least one MacOS environment. */
6567
6568 for (pt++; isxdigit(*pt); pt++)
6569 {
6570 if (++i == 9)
6571 fprintf(outfile, "** Too many hex digits in \\x{...} item; "
6572 "using only the first eight.\n");
6573 else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
6574 }
6575 if (*pt == '}')
6576 {
6577 p = pt + 1;
6578 break;
6579 }
6580 /* Not correct form for \x{...}; fall through */
6581 }
6582
6583 /* \x without {} always defines just one byte in 8-bit mode. This
6584 allows UTF-8 characters to be constructed byte by byte, and also allows
6585 invalid UTF-8 sequences to be made. Just copy the byte in UTF-8 mode.
6586 Otherwise, pass it down as data. */
6587
6588 c = 0;
6589 while (i++ < 2 && isxdigit(*p))
6590 {
6591 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
6592 p++;
6593 }
6594 #if defined SUPPORT_PCRE2_8
6595 if (utf && (test_mode == PCRE8_MODE))
6596 {
6597 *q8++ = c;
6598 continue;
6599 }
6600 #endif
6601 break;
6602
6603 case 0: /* \ followed by EOF allows for an empty line */
6604 p--;
6605 continue;
6606
6607 case '=': /* \= terminates the data, starts modifiers */
6608 goto ENDSTRING;
6609
6610 case '[': /* \[ introduces a replicated character sequence */
6611 if (start_rep != NULL)
6612 {
6613 fprintf(outfile, "** Nested replication is not supported\n");
6614 return PR_OK;
6615 }
6616 start_rep = CAST8VAR(q);
6617 continue;
6618
6619 default:
6620 if (isalnum(c))
6621 {
6622 fprintf(outfile, "** Unrecognized escape sequence \"\\%c\"\n", c);
6623 return PR_OK;
6624 }
6625 }
6626
6627 /* We now have a character value in c that may be greater than 255.
6628 In 8-bit mode we convert to UTF-8 if we are in UTF mode. Values greater
6629 than 127 in UTF mode must have come from \x{...} or octal constructs
6630 because values from \x.. get this far only in non-UTF mode. */
6631
6632 #ifdef SUPPORT_PCRE2_8
6633 if (test_mode == PCRE8_MODE)
6634 {
6635 if (utf)
6636 {
6637 if (c > 0x7fffffff)
6638 {
6639 fprintf(outfile, "** Character \\x{%x} is greater than 0x7fffffff "
6640 "and so cannot be converted to UTF-8\n", c);
6641 return PR_OK;
6642 }
6643 q8 += ord2utf8(c, q8);
6644 }
6645 else
6646 {
6647 if (c > 0xffu)
6648 {
6649 fprintf(outfile, "** Character \\x{%x} is greater than 255 "
6650 "and UTF-8 mode is not enabled.\n", c);
6651 fprintf(outfile, "** Truncation will probably give the wrong "
6652 "result.\n");
6653 }
6654 *q8++ = c;
6655 }
6656 }
6657 #endif
6658 #ifdef SUPPORT_PCRE2_16
6659 if (test_mode == PCRE16_MODE)
6660 {
6661 if (utf)
6662 {
6663 if (c > 0x10ffffu)
6664 {
6665 fprintf(outfile, "** Failed: character \\x{%x} is greater than "
6666 "0x10ffff and so cannot be converted to UTF-16\n", c);
6667 return PR_OK;
6668 }
6669 else if (c >= 0x10000u)
6670 {
6671 c-= 0x10000u;
6672 *q16++ = 0xD800 | (c >> 10);
6673 *q16++ = 0xDC00 | (c & 0x3ff);
6674 }
6675 else
6676 *q16++ = c;
6677 }
6678 else
6679 {
6680 if (c > 0xffffu)
6681 {
6682 fprintf(outfile, "** Character \\x{%x} is greater than 0xffff "
6683 "and UTF-16 mode is not enabled.\n", c);
6684 fprintf(outfile, "** Truncation will probably give the wrong "
6685 "result.\n");
6686 }
6687
6688 *q16++ = c;
6689 }
6690 }
6691 #endif
6692 #ifdef SUPPORT_PCRE2_32
6693 if (test_mode == PCRE32_MODE)
6694 {
6695 *q32++ = c;
6696 }
6697 #endif
6698 }
6699
6700 ENDSTRING:
6701 SET(*q, 0);
6702 len = CASTVAR(uint8_t *, q) - dbuffer; /* Length in bytes */
6703 ulen = len/code_unit_size; /* Length in code units */
6704 arg_ulen = ulen; /* Value to use in match arg */
6705
6706 /* If the string was terminated by \= we must now interpret modifiers. */
6707
6708 if (p[-1] != 0 && !decode_modifiers(p, CTX_DAT, NULL, &dat_datctl))
6709 return PR_OK;
6710
6711 /* Check for mutually exclusive modifiers. At present, these are all in the
6712 first control word. */
6713
6714 for (k = 0; k < sizeof(exclusive_dat_controls)/sizeof(uint32_t); k++)
6715 {
6716 c = dat_datctl.control & exclusive_dat_controls[k];
6717 if (c != 0 && c != (c & (~c+1)))
6718 {
6719 show_controls(c, 0, "** Not allowed together:");
6720 fprintf(outfile, "\n");
6721 return PR_OK;
6722 }
6723 }
6724
6725 if (pat_patctl.replacement[0] != 0 &&
6726 (dat_datctl.control & CTL_NULLCONTEXT) != 0)
6727 {
6728 fprintf(outfile, "** Replacement text is not supported with null_context.\n");
6729 return PR_OK;
6730 }
6731
6732 /* We now have the subject in dbuffer, with len containing the byte length, and
6733 ulen containing the code unit length, with a copy in arg_ulen for use in match
6734 function arguments (this gets changed to PCRE2_ZERO_TERMINATED when the
6735 zero_terminate modifier is present).
6736
6737 Move the data to the end of the buffer so that a read over the end can be
6738 caught by valgrind or other means. If we have explicit valgrind support, mark
6739 the unused start of the buffer unaddressable. If we are using the POSIX
6740 interface, or testing zero-termination, we must include the terminating zero in
6741 the usable data. */
6742
6743 c = code_unit_size * (((pat_patctl.control & CTL_POSIX) +
6744 (dat_datctl.control & CTL_ZERO_TERMINATE) != 0)? 1:0);
6745 pp = memmove(dbuffer + dbuffer_size - len - c, dbuffer, len + c);
6746 #ifdef SUPPORT_VALGRIND
6747 VALGRIND_MAKE_MEM_NOACCESS(dbuffer, dbuffer_size - (len + c));
6748 #endif
6749
6750 /* Now pp points to the subject string. POSIX matching is only possible in
6751 8-bit mode, and it does not support timing or other fancy features. Some were
6752 checked at compile time, but we need to check the match-time settings here. */
6753
6754 #ifdef SUPPORT_PCRE2_8
6755 if ((pat_patctl.control & CTL_POSIX) != 0)
6756 {
6757 int rc;
6758 int eflags = 0;
6759 regmatch_t *pmatch = NULL;
6760 const char *msg = "** Ignored with POSIX interface:";
6761
6762 if (dat_datctl.cerror[0] != CFORE_UNSET || dat_datctl.cerror[1] != CFORE_UNSET)
6763 prmsg(&msg, "callout_error");
6764 if (dat_datctl.cfail[0] != CFORE_UNSET || dat_datctl.cfail[1] != CFORE_UNSET)
6765 prmsg(&msg, "callout_fail");
6766 if (dat_datctl.copy_numbers[0] >= 0 || dat_datctl.copy_names[0] != 0)
6767 prmsg(&msg, "copy");
6768 if (dat_datctl.get_numbers[0] >= 0 || dat_datctl.get_names[0] != 0)
6769 prmsg(&msg, "get");
6770 if (dat_datctl.jitstack != 0) prmsg(&msg, "jitstack");
6771 if (dat_datctl.offset != 0) prmsg(&msg, "offset");
6772
6773 if ((dat_datctl.options & ~POSIX_SUPPORTED_MATCH_OPTIONS) != 0)
6774 {
6775 fprintf(outfile, "%s", msg);
6776 show_match_options(dat_datctl.options & ~POSIX_SUPPORTED_MATCH_OPTIONS);
6777 msg = "";
6778 }
6779 if ((dat_datctl.control & ~POSIX_SUPPORTED_MATCH_CONTROLS) != 0 ||
6780 (dat_datctl.control2 & ~POSIX_SUPPORTED_MATCH_CONTROLS2) != 0)
6781 {
6782 show_controls(dat_datctl.control & ~POSIX_SUPPORTED_MATCH_CONTROLS,
6783 dat_datctl.control2 & ~POSIX_SUPPORTED_MATCH_CONTROLS2, msg);
6784 msg = "";
6785 }
6786
6787 if (msg[0] == 0) fprintf(outfile, "\n");
6788
6789 if (dat_datctl.oveccount > 0)
6790 {
6791 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * dat_datctl.oveccount);
6792 if (pmatch == NULL)
6793 {
6794 fprintf(outfile, "** Failed to get memory for recording matching "
6795 "information (size set = %du)\n", dat_datctl.oveccount);
6796 return PR_OK;
6797 }
6798 }
6799
6800 if (dat_datctl.startend[0] != CFORE_UNSET)
6801 {
6802 pmatch[0].rm_so = dat_datctl.startend[0];
6803 pmatch[0].rm_eo = (dat_datctl.startend[1] != 0)?
6804 dat_datctl.startend[1] : len;
6805 eflags |= REG_STARTEND;
6806 }
6807
6808 if ((dat_datctl.options & PCRE2_NOTBOL) != 0) eflags |= REG_NOTBOL;
6809 if ((dat_datctl.options & PCRE2_NOTEOL) != 0) eflags |= REG_NOTEOL;
6810 if ((dat_datctl.options & PCRE2_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
6811
6812 rc = regexec(&preg, (const char *)pp, dat_datctl.oveccount, pmatch, eflags);
6813 if (rc != 0)
6814 {
6815 (void)regerror(rc, &preg, (char *)pbuffer8, pbuffer8_size);
6816 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, pbuffer8);
6817 }
6818 else if ((pat_patctl.control & CTL_POSIX_NOSUB) != 0)
6819 fprintf(outfile, "Matched with REG_NOSUB\n");
6820 else if (dat_datctl.oveccount == 0)
6821 fprintf(outfile, "Matched without capture\n");
6822 else
6823 {
6824 size_t i, j;
6825 size_t last_printed = (size_t)dat_datctl.oveccount;
6826 for (i = 0; i < (size_t)dat_datctl.oveccount; i++)
6827 {
6828 if (pmatch[i].rm_so >= 0)
6829 {
6830 PCRE2_SIZE start = pmatch[i].rm_so;
6831 PCRE2_SIZE end = pmatch[i].rm_eo;
6832 for (j = last_printed + 1; j < i; j++)
6833 fprintf(outfile, "%2d: <unset>\n", (int)j);
6834 last_printed = i;
6835 if (start > end)
6836 {
6837 start = pmatch[i].rm_eo;
6838 end = pmatch[i].rm_so;
6839 fprintf(outfile, "Start of matched string is beyond its end - "
6840 "displaying from end to start.\n");
6841 }
6842 fprintf(outfile, "%2d: ", (int)i);
6843 PCHARSV(pp, start, end - start, utf, outfile);
6844 fprintf(outfile, "\n");
6845
6846 if ((i == 0 && (dat_datctl.control & CTL_AFTERTEXT) != 0) ||
6847 (dat_datctl.control & CTL_ALLAFTERTEXT) != 0)
6848 {
6849 fprintf(outfile, "%2d+ ", (int)i);
6850 /* Note: don't use the start/end variables here because we want to
6851 show the text from what is reported as the end. */
6852 PCHARSV(pp, pmatch[i].rm_eo, len - pmatch[i].rm_eo, utf, outfile);
6853 fprintf(outfile, "\n"); }
6854 }
6855 }
6856 }
6857 free(pmatch);
6858 return PR_OK;
6859 }
6860 #endif /* SUPPORT_PCRE2_8 */
6861
6862 /* Handle matching via the native interface. Check for consistency of
6863 modifiers. */
6864
6865 if (dat_datctl.startend[0] != CFORE_UNSET)
6866 fprintf(outfile, "** \\=posix_startend ignored for non-POSIX matching\n");
6867
6868 /* ALLUSEDTEXT is not supported with JIT, but JIT is not used with DFA
6869 matching, even if the JIT compiler was used. */
6870
6871 if ((dat_datctl.control & (CTL_ALLUSEDTEXT|CTL_DFA)) == CTL_ALLUSEDTEXT &&
6872 FLD(compiled_code, executable_jit) != NULL)
6873 {
6874 fprintf(outfile, "** Showing all consulted text is not supported by JIT: ignored\n");
6875 dat_datctl.control &= ~CTL_ALLUSEDTEXT;
6876 }
6877
6878 /* Handle passing the subject as zero-terminated. */
6879
6880 if ((dat_datctl.control & CTL_ZERO_TERMINATE) != 0)
6881 arg_ulen = PCRE2_ZERO_TERMINATED;
6882
6883 /* The nullcontext modifier is used to test calling pcre2_[jit_]match() with a
6884 NULL context. */
6885
6886 use_dat_context = ((dat_datctl.control & CTL_NULLCONTEXT) != 0)?
6887 NULL : PTR(dat_context);
6888
6889 /* Enable display of malloc/free if wanted. We can do this only if either the
6890 pattern or the subject is processed with a context. */
6891
6892 show_memory = (dat_datctl.control & CTL_MEMORY) != 0;
6893
6894 if (show_memory &&
6895 (pat_patctl.control & dat_datctl.control & CTL_NULLCONTEXT) != 0)
6896 fprintf(outfile, "** \\=memory requires either a pattern or a subject "
6897 "context: ignored\n");
6898
6899 /* Create and assign a JIT stack if requested. */
6900
6901 if (dat_datctl.jitstack != 0)
6902 {
6903 if (dat_datctl.jitstack != jit_stack_size)
6904 {
6905 PCRE2_JIT_STACK_FREE(jit_stack);
6906 PCRE2_JIT_STACK_CREATE(jit_stack, 1, dat_datctl.jitstack * 1024, NULL);
6907 jit_stack_size = dat_datctl.jitstack;
6908 }
6909 PCRE2_JIT_STACK_ASSIGN(dat_context, jit_callback, jit_stack);
6910 }
6911
6912 /* Or de-assign */
6913
6914 else if (jit_stack != NULL)
6915 {
6916 PCRE2_JIT_STACK_ASSIGN(dat_context, NULL, NULL);
6917 PCRE2_JIT_STACK_FREE(jit_stack);
6918 jit_stack = NULL;
6919 jit_stack_size = 0;
6920 }
6921
6922 /* When no JIT stack is assigned, we must ensure that there is a JIT callback
6923 if we want to verify that JIT was actually used. */
6924
6925 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_stack == NULL)
6926 {
6927 PCRE2_JIT_STACK_ASSIGN(dat_context, jit_callback, NULL);
6928 }
6929
6930 /* Adjust match_data according to size of offsets required. A size of zero
6931 causes a new match data block to be obtained that exactly fits the pattern. */
6932
6933 if (dat_datctl.oveccount == 0)
6934 {
6935 PCRE2_MATCH_DATA_FREE(match_data);
6936 PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(match_data, compiled_code, NULL);
6937 PCRE2_GET_OVECTOR_COUNT(max_oveccount, match_data);
6938 }
6939 else if (dat_datctl.oveccount <= max_oveccount)
6940 {
6941 SETFLD(match_data, oveccount, dat_datctl.oveccount);
6942 }
6943 else
6944 {
6945 max_oveccount = dat_datctl.oveccount;
6946 PCRE2_MATCH_DATA_FREE(match_data);
6947 PCRE2_MATCH_DATA_CREATE(match_data, max_oveccount, NULL);
6948 }
6949
6950 if (CASTVAR(void *, match_data) == NULL)
6951 {
6952 fprintf(outfile, "** Failed to get memory for recording matching "
6953 "information (size requested: %d)\n", dat_datctl.oveccount);
6954 max_oveccount = 0;
6955 return PR_OK;
6956 }
6957
6958 /* Replacement processing is ignored for DFA matching. */
6959
6960 if (dat_datctl.replacement[0] != 0 && (dat_datctl.control & CTL_DFA) != 0)
6961 {
6962 fprintf(outfile, "** Ignored for DFA matching: replace\n");
6963 dat_datctl.replacement[0] = 0;
6964 }
6965
6966 /* If a replacement string is provided, call pcre2_substitute() instead of one
6967 of the matching functions. First we have to convert the replacement string to
6968 the appropriate width. */
6969
6970 if (dat_datctl.replacement[0] != 0)
6971 {
6972 int rc;
6973 uint8_t *pr;
6974 uint8_t rbuffer[REPLACE_BUFFSIZE];
6975 uint8_t nbuffer[REPLACE_BUFFSIZE];
6976 uint32_t xoptions;
6977 PCRE2_SIZE rlen, nsize, erroroffset;
6978 BOOL badutf = FALSE;
6979
6980 #ifdef SUPPORT_PCRE2_8
6981 uint8_t *r8 = NULL;
6982 #endif
6983 #ifdef SUPPORT_PCRE2_16
6984 uint16_t *r16 = NULL;
6985 #endif
6986 #ifdef SUPPORT_PCRE2_32
6987 uint32_t *r32 = NULL;
6988 #endif
6989
6990 if (timeitm)
6991 fprintf(outfile, "** Timing is not supported with replace: ignored\n");
6992
6993 if ((dat_datctl.control & CTL_ALTGLOBAL) != 0)
6994 fprintf(outfile, "** Altglobal is not supported with replace: ignored\n");
6995
6996 xoptions = (((dat_datctl.control & CTL_GLOBAL) == 0)? 0 :
6997 PCRE2_SUBSTITUTE_GLOBAL) |
6998 (((dat_datctl.control2 & CTL2_SUBSTITUTE_EXTENDED) == 0)? 0 :
6999 PCRE2_SUBSTITUTE_EXTENDED) |
7000 (((dat_datctl.control2 & CTL2_SUBSTITUTE_OVERFLOW_LENGTH) == 0)? 0 :
7001 PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) |
7002 (((dat_datctl.control2 & CTL2_SUBSTITUTE_UNKNOWN_UNSET) == 0)? 0 :
7003 PCRE2_SUBSTITUTE_UNKNOWN_UNSET) |
7004 (((dat_datctl.control2 & CTL2_SUBSTITUTE_UNSET_EMPTY) == 0)? 0 :
7005 PCRE2_SUBSTITUTE_UNSET_EMPTY);
7006
7007 SETCASTPTR(r, rbuffer); /* Sets r8, r16, or r32, as appropriate. */
7008 pr = dat_datctl.replacement;
7009
7010 /* If the replacement starts with '[<number>]' we interpret that as length
7011 value for the replacement buffer. */
7012
7013 nsize = REPLACE_BUFFSIZE/code_unit_size;
7014 if (*pr == '[')
7015 {
7016 PCRE2_SIZE n = 0;
7017 while ((c = *(++pr)) >= CHAR_0 && c <= CHAR_9) n = n * 10 + c - CHAR_0;
7018 if (*pr++ != ']')
7019 {
7020 fprintf(outfile, "Bad buffer size in replacement string\n");
7021 return PR_OK;
7022 }
7023 if (n > nsize)
7024 {
7025 fprintf(outfile, "Replacement buffer setting (%" SIZ_FORM ") is too "
7026 "large (max %" SIZ_FORM ")\n", SIZ_CAST n, SIZ_CAST nsize);
7027 return PR_OK;
7028 }
7029 nsize = n;
7030 }
7031
7032 /* Now copy the replacement string to a buffer of the appropriate width. No
7033 escape processing is done for replacements. In UTF mode, check for an invalid
7034 UTF-8 input string, and if it is invalid, just copy its code units without
7035 UTF interpretation. This provides a means of checking that an invalid string
7036 is detected. Otherwise, UTF-8 can be used to include wide characters in a
7037 replacement. */
7038
7039 if (utf) badutf = valid_utf(pr, strlen((const char *)pr), &erroroffset);
7040
7041 /* Not UTF or invalid UTF-8: just copy the code units. */
7042
7043 if (!utf || badutf)
7044 {
7045 while ((c = *pr++) != 0)
7046 {
7047 #ifdef SUPPORT_PCRE2_8
7048 if (test_mode == PCRE8_MODE) *r8++ = c;
7049 #endif
7050 #ifdef SUPPORT_PCRE2_16
7051 if (test_mode == PCRE16_MODE) *r16++ = c;
7052 #endif
7053 #ifdef SUPPORT_PCRE2_32
7054 if (test_mode == PCRE32_MODE) *r32++ = c;
7055 #endif
7056 }
7057 }
7058
7059 /* Valid UTF-8 replacement string */
7060
7061 else while ((c = *pr++) != 0)
7062 {
7063 if (HASUTF8EXTRALEN(c)) { GETUTF8INC(c, pr); }
7064
7065 #ifdef SUPPORT_PCRE2_8
7066 if (test_mode == PCRE8_MODE) r8 += ord2utf8(c, r8);
7067 #endif
7068
7069 #ifdef SUPPORT_PCRE2_16
7070 if (test_mode == PCRE16_MODE)
7071 {
7072 if (c >= 0x10000u)
7073 {
7074 c-= 0x10000u;
7075 *r16++ = 0xD800 | (c >> 10);
7076 *r16++ = 0xDC00 | (c & 0x3ff);
7077 }
7078 else *r16++ = c;
7079 }
7080 #endif
7081
7082 #ifdef SUPPORT_PCRE2_32
7083 if (test_mode == PCRE32_MODE) *r32++ = c;
7084 #endif
7085 }
7086
7087 SET(*r, 0);
7088 if ((dat_datctl.control & CTL_ZERO_TERMINATE) != 0)
7089 rlen = PCRE2_ZERO_TERMINATED;
7090 else
7091 rlen = (CASTVAR(uint8_t *, r) - rbuffer)/code_unit_size;
7092 PCRE2_SUBSTITUTE(rc, compiled_code, pp, arg_ulen, dat_datctl.offset,
7093 dat_datctl.options|xoptions, match_data, dat_context,
7094 rbuffer, rlen, nbuffer, &nsize);
7095
7096 if (rc < 0)
7097 {
7098 fprintf(outfile, "Failed: error %d", rc);
7099 if (rc != PCRE2_ERROR_NOMEMORY && nsize != PCRE2_UNSET)
7100 fprintf(outfile, " at offset %ld in replacement", (long int)nsize);
7101 fprintf(outfile, ": ");
7102 if (!print_error_message(rc, "", "")) return PR_ABEND;
7103 if (rc == PCRE2_ERROR_NOMEMORY &&
7104 (xoptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) != 0)
7105 fprintf(outfile, ": %ld code units are needed", (long int)nsize);
7106 }
7107 else
7108 {
7109 fprintf(outfile, "%2d: ", rc);
7110 PCHARSV(nbuffer, 0, nsize, utf, outfile);
7111 }
7112
7113 fprintf(outfile, "\n");
7114 show_memory = FALSE;
7115 return PR_OK;
7116 } /* End of substitution handling */
7117
7118 /* When a replacement string is not provided, run a loop for global matching
7119 with one of the basic matching functions. For altglobal (or first time round
7120 the loop), set an "unset" value for the previous match info. */
7121
7122 ovecsave[0] = ovecsave[1] = ovecsave[2] = PCRE2_UNSET;
7123
7124 for (gmatched = 0;; gmatched++)
7125 {
7126 PCRE2_SIZE j;
7127 int capcount;
7128 PCRE2_SIZE *ovector;
7129
7130 ovector = FLD(match_data, ovector);
7131
7132 /* Fill the ovector with junk to detect elements that do not get set
7133 when they should be. */
7134
7135 for (j = 0; j < 2*dat_datctl.oveccount; j++) ovector[j] = JUNK_OFFSET;
7136
7137 /* When matching is via pcre2_match(), we will detect the use of JIT via the
7138 stack callback function. */
7139
7140 jit_was_used = (pat_patctl.control & CTL_JITFAST) != 0;
7141
7142 /* Do timing if required. */
7143
7144 if (timeitm > 0)
7145 {
7146 int i;
7147 clock_t start_time, time_taken;
7148
7149 if ((dat_datctl.control & CTL_DFA) != 0)
7150 {
7151 if ((dat_datctl.options & PCRE2_DFA_RESTART) != 0)
7152 {
7153 fprintf(outfile, "Timing DFA restarts is not supported\n");
7154 return PR_OK;
7155 }
7156 if (dfa_workspace == NULL)
7157 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
7158 start_time = clock();
7159 for (i = 0; i < timeitm; i++)
7160 {
7161 PCRE2_DFA_MATCH(capcount, compiled_code, pp, arg_ulen,
7162 dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
7163 use_dat_context, dfa_workspace, DFA_WS_DIMENSION);
7164 }
7165 }
7166
7167 else if ((pat_patctl.control & CTL_JITFAST) != 0)
7168 {
7169 start_time = clock();
7170 for (i = 0; i < timeitm; i++)
7171 {
7172 PCRE2_JIT_MATCH(capcount, compiled_code, pp, arg_ulen,
7173 dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
7174 use_dat_context);
7175 }
7176 }
7177
7178 else
7179 {
7180 start_time = clock();
7181 for (i = 0; i < timeitm; i++)
7182 {
7183 PCRE2_MATCH(capcount, compiled_code, pp, arg_ulen,
7184 dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
7185 use_dat_context);
7186 }
7187 }
7188 total_match_time += (time_taken = clock() - start_time);
7189 fprintf(outfile, "Match time %.4f milliseconds\n",
7190 (((double)time_taken * 1000.0) / (double)timeitm) /
7191 (double)CLOCKS_PER_SEC);
7192 }
7193
7194 /* Find the heap, match and depth limits if requested. The depth and heap
7195 limits are not relevant for JIT. The return from check_match_limit() is the
7196 return from the final call to pcre2_match() or pcre2_dfa_match(). */
7197
7198 if ((dat_datctl.control & CTL_FINDLIMITS) != 0)
7199 {
7200 capcount = 0; /* This stops compiler warnings */
7201
7202 if (FLD(compiled_code, executable_jit) == NULL ||
7203 (dat_datctl.options & PCRE2_NO_JIT) != 0)
7204 {
7205 (void)check_match_limit(pp, arg_ulen, PCRE2_ERROR_HEAPLIMIT, "heap");
7206 }
7207
7208 capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_MATCHLIMIT,
7209 "match");
7210
7211 if (FLD(compiled_code, executable_jit) == NULL ||
7212 (dat_datctl.options & PCRE2_NO_JIT) != 0 ||
7213 (dat_datctl.control & CTL_DFA) != 0)
7214 {
7215 capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_DEPTHLIMIT,
7216 "depth");
7217 }
7218
7219 if (capcount == 0)
7220 {
7221 fprintf(outfile, "Matched, but offsets vector is too small to show all matches\n");
7222 capcount = dat_datctl.oveccount;
7223 }
7224 }
7225
7226 /* Otherwise just run a single match, setting up a callout if required (the
7227 default). There is a copy of the pattern in pbuffer8 for use by callouts. */
7228
7229 else
7230 {
7231 if ((dat_datctl.control & CTL_CALLOUT_NONE) == 0)
7232 {
7233 PCRE2_SET_CALLOUT(dat_context, callout_function,
7234 (void *)(&dat_datctl.callout_data));
7235 first_callout = TRUE;
7236 last_callout_mark = NULL;
7237 callout_count = 0;
7238 }
7239 else
7240 {
7241 PCRE2_SET_CALLOUT(dat_context, NULL, NULL); /* No callout */
7242 }
7243
7244 /* Run a single DFA or NFA match. */
7245
7246 if ((dat_datctl.control & CTL_DFA) != 0)
7247 {
7248 if (dfa_workspace == NULL)
7249 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
7250 if (dfa_matched++ == 0)
7251 dfa_workspace[0] = -1; /* To catch bad restart */
7252 PCRE2_DFA_MATCH(capcount, compiled_code, pp, arg_ulen,
7253 dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
7254 use_dat_context, dfa_workspace, DFA_WS_DIMENSION);
7255 if (capcount == 0)
7256 {
7257 fprintf(outfile, "Matched, but offsets vector is too small to show all matches\n");
7258 capcount = dat_datctl.oveccount;
7259 }
7260 }
7261 else
7262 {
7263 if ((pat_patctl.control & CTL_JITFAST) != 0)
7264 PCRE2_JIT_MATCH(capcount, compiled_code, pp, arg_ulen, dat_datctl.offset,
7265 dat_datctl.options | g_notempty, match_data, use_dat_context);
7266 else
7267 PCRE2_MATCH(capcount, compiled_code, pp, arg_ulen, dat_datctl.offset,
7268 dat_datctl.options | g_notempty, match_data, use_dat_context);
7269 if (capcount == 0)
7270 {
7271 fprintf(outfile, "Matched, but too many substrings\n");
7272 capcount = dat_datctl.oveccount;
7273 }
7274 }
7275 }
7276
7277 /* The result of the match is now in capcount. First handle a successful
7278 match. */
7279
7280 if (capcount >= 0)
7281 {
7282 int i;
7283 uint32_t oveccount;
7284
7285 /* This is a check against a lunatic return value. */
7286
7287 PCRE2_GET_OVECTOR_COUNT(oveccount, match_data);
7288 if (capcount > (int)oveccount)
7289 {
7290 fprintf(outfile,
7291 "** PCRE2 error: returned count %d is too big for ovector count %d\n",
7292 capcount, oveccount);
7293 capcount = oveccount;
7294 if ((dat_datctl.control & CTL_ANYGLOB) != 0)
7295 {
7296 fprintf(outfile, "** Global loop abandoned\n");
7297 dat_datctl.control &= ~CTL_ANYGLOB; /* Break g/G loop */
7298 }
7299 }
7300
7301 /* If this is not the first time round a global loop, check that the
7302 returned string has changed. If it has not, check for an empty string match
7303 at different starting offset from the previous match. This is a failed test
7304 retry for null-matching patterns that don't match at their starting offset,
7305 for example /(?<=\G.)/. A repeated match at the same point is not such a
7306 pattern, and must be discarded, and we then proceed to seek a non-null
7307 match at the current point. For any other repeated match, there is a bug
7308 somewhere and we must break the loop because it will go on for ever. We
7309 know that there are always at least two elements in the ovector. */
7310
7311 if (gmatched > 0 && ovecsave[0] == ovector[0] && ovecsave[1] == ovector[1])
7312 {
7313 if (ovector[0] == ovector[1] && ovecsave[2] != dat_datctl.offset)
7314 {
7315 g_notempty = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
7316 ovecsave[2] = dat_datctl.offset;
7317 continue; /* Back to the top of the loop */
7318 }
7319 fprintf(outfile,
7320 "** PCRE2 error: global repeat returned the same string as previous\n");
7321 fprintf(outfile, "** Global loop abandoned\n");
7322 dat_datctl.control &= ~CTL_ANYGLOB; /* Break g/G loop */
7323 }
7324
7325 /* "allcaptures" requests showing of all captures in the pattern, to check
7326 unset ones at the end. It may be set on the pattern or the data. Implement
7327 by setting capcount to the maximum. This is not relevant for DFA matching,
7328 so ignore it. */
7329
7330 if ((dat_datctl.control & CTL_ALLCAPTURES) != 0)
7331 {
7332 uint32_t maxcapcount;
7333 if ((dat_datctl.control & CTL_DFA) != 0)
7334 {
7335 fprintf(outfile, "** Ignored after DFA matching: allcaptures\n");
7336 }
7337 else
7338 {
7339 if (pattern_info(PCRE2_INFO_CAPTURECOUNT, &maxcapcount, FALSE) < 0)
7340 return PR_SKIP;
7341 capcount = maxcapcount + 1; /* Allow for full match */
7342 if (capcount > (int)oveccount) capcount = oveccount;
7343 }
7344 }
7345
7346 /* Output the captured substrings. Note that, for the matched string,
7347 the use of \K in an assertion can make the start later than the end. */
7348
7349 for (i = 0; i < 2*capcount; i += 2)
7350 {
7351 PCRE2_SIZE lleft, lmiddle, lright;
7352 PCRE2_SIZE start = ovector[i];
7353 PCRE2_SIZE end = ovector[i+1];
7354
7355 if (start > end)
7356 {
7357 start = ovector[i+1];
7358 end = ovector[i];
7359 fprintf(outfile, "Start of matched string is beyond its end - "
7360 "displaying from end to start.\n");
7361 }
7362
7363 fprintf(outfile, "%2d: ", i/2);
7364
7365 /* Check for an unset group */
7366
7367 if (start == PCRE2_UNSET)
7368 {
7369 fprintf(outfile, "<unset>\n");
7370 continue;
7371 }
7372
7373 /* Check for silly offsets, in particular, values that have not been
7374 set when they should have been. */
7375
7376 if (start > ulen || end > ulen)
7377 {
7378 fprintf(outfile, "ERROR: bad value(s) for offset(s): 0x%lx 0x%lx\n",
7379 (unsigned long int)start, (unsigned long int)end);
7380 continue;
7381 }
7382
7383 /* When JIT is not being used, ALLUSEDTEXT may be set. (It if is set with
7384 JIT, it is disabled above, with a comment.) When the match is done by the
7385 interpreter, leftchar and rightchar are available, and if ALLUSEDTEXT is
7386 set, and if the leftmost consulted character is before the start of the
7387 match or the rightmost consulted character is past the end of the match,
7388 we want to show all consulted characters for the main matched string, and
7389 indicate which were lookarounds. */
7390
7391 if (i == 0)
7392 {
7393 BOOL showallused;
7394 PCRE2_SIZE leftchar, rightchar;
7395
7396 if ((dat_datctl.control & CTL_ALLUSEDTEXT) != 0)
7397 {
7398 leftchar = FLD(match_data, leftchar);
7399 rightchar = FLD(match_data, rightchar);
7400 showallused = i == 0 && (leftchar < start || rightchar > end);
7401 }
7402 else showallused = FALSE;
7403
7404 if (showallused)
7405 {
7406 PCHARS(lleft, pp, leftchar, start - leftchar, utf, outfile);
7407 PCHARS(lmiddle, pp, start, end - start, utf, outfile);
7408 PCHARS(lright, pp, end, rightchar - end, utf, outfile);
7409 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
7410 fprintf(outfile, " (JIT)");
7411 fprintf(outfile, "\n ");
7412 for (j = 0; j < lleft; j++) fprintf(outfile, "<");
7413 for (j = 0; j < lmiddle; j++) fprintf(outfile, " ");
7414 for (j = 0; j < lright; j++) fprintf(outfile, ">");
7415 }
7416
7417 /* When a pattern contains \K, the start of match position may be
7418 different to the start of the matched string. When this is the case,
7419 show it when requested. */
7420
7421 else if ((dat_datctl.control & CTL_STARTCHAR) != 0)
7422 {
7423 PCRE2_SIZE startchar;
7424 PCRE2_GET_STARTCHAR(startchar, match_data);
7425 PCHARS(lleft, pp, startchar, start - startchar, utf, outfile);
7426 PCHARSV(pp, start, end - start, utf, outfile);
7427 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
7428 fprintf(outfile, " (JIT)");
7429 if (startchar != start)
7430 {
7431 fprintf(outfile, "\n ");
7432 for (j = 0; j < lleft; j++) fprintf(outfile, "^");
7433 }
7434 }
7435
7436 /* Otherwise, just show the matched string. */
7437
7438 else
7439 {
7440 PCHARSV(pp, start, end - start, utf, outfile);
7441 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
7442 fprintf(outfile, " (JIT)");
7443 }
7444 }
7445
7446 /* Not the main matched string. Just show it unadorned. */
7447
7448 else
7449 {
7450 PCHARSV(pp, start, end - start, utf, outfile);
7451 }
7452
7453 fprintf(outfile, "\n");
7454
7455 /* Note: don't use the start/end variables here because we want to
7456 show the text from what is reported as the end. */
7457
7458 if ((dat_datctl.control & CTL_ALLAFTERTEXT) != 0 ||
7459 (i == 0 && (dat_datctl.control & CTL_AFTERTEXT) != 0))
7460 {
7461 fprintf(outfile, "%2d+ ", i/2);
7462 PCHARSV(pp, ovector[i+1], ulen - ovector[i+1], utf, outfile);
7463 fprintf(outfile, "\n");
7464 }
7465 }
7466
7467 /* Output (*MARK) data if requested */
7468
7469 if ((dat_datctl.control & CTL_MARK) != 0 &&
7470 TESTFLD(match_data, mark, !=, NULL))
7471 {
7472 fprintf(outfile, "MK: ");
7473 PCHARSV(CASTFLD(void *, match_data, mark), 0, -1, utf, outfile);
7474 fprintf(outfile, "\n");
7475 }
7476
7477 /* Process copy/get strings */
7478
7479 if (!copy_and_get(utf, capcount)) return PR_ABEND;
7480
7481 } /* End of handling a successful match */
7482
7483 /* There was a partial match. The value of ovector[0] is the bumpalong point,
7484 that is, startchar, not any \K point that might have been passed. */
7485
7486 else if (capcount == PCRE2_ERROR_PARTIAL)
7487 {
7488 PCRE2_SIZE poffset;
7489 int backlength;
7490 int rubriclength = 0;
7491
7492 fprintf(outfile, "Partial match");
7493 if ((dat_datctl.control & CTL_MARK) != 0 &&
7494 TESTFLD(match_data, mark, !=, NULL))
7495 {
7496 fprintf(outfile, ", mark=");
7497 PCHARS(rubriclength, CASTFLD(void *, match_data, mark), 0, -1, utf,
7498 outfile);
7499 rubriclength += 7;
7500 }
7501 fprintf(outfile, ": ");
7502 rubriclength += 15;
7503
7504 poffset = backchars(pp, ovector[0], maxlookbehind, utf);
7505 PCHARS(backlength, pp, poffset, ovector[0] - poffset, utf, outfile);
7506 PCHARSV(pp, ovector[0], ulen - ovector[0], utf, outfile);
7507
7508 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
7509 fprintf(outfile, " (JIT)");
7510 fprintf(outfile, "\n");
7511
7512 if (backlength != 0)
7513 {
7514 int i;
7515 for (i = 0; i < rubriclength; i++) fprintf(outfile, " ");
7516 for (i = 0; i < backlength; i++) fprintf(outfile, "<");
7517 fprintf(outfile, "\n");
7518 }
7519
7520 /* Process copy/get strings */
7521
7522 if (!copy_and_get(utf, 1)) return PR_ABEND;
7523
7524 break; /* Out of the /g loop */
7525 } /* End of handling partial match */
7526
7527 /* Failed to match. If this is a /g or /G loop, we might previously have
7528 set g_notempty (to PCRE2_NOTEMPTY_ATSTART|PCRE2_ANCHORED) after a null match.
7529 If that is the case, this is not necessarily the end. We want to advance the
7530 start offset, and continue. We won't be at the end of the string - that was
7531 checked before setting g_notempty. We achieve the effect by pretending that a
7532 single character was matched.
7533
7534 Complication arises in the case when the newline convention is "any", "crlf",
7535 or "anycrlf". If the previous match was at the end of a line terminated by
7536 CRLF, an advance of one character just passes the CR, whereas we should
7537 prefer the longer newline sequence, as does the code in pcre2_match().
7538
7539 Otherwise, in the case of UTF-8 or UTF-16 matching, the advance must be one
7540 character, not one byte. */
7541
7542 else if (g_notempty != 0) /* There was a previous null match */
7543 {
7544 uint16_t nl = FLD(compiled_code, newline_convention);
7545 PCRE2_SIZE start_offset = dat_datctl.offset; /* Where the match was */
7546 PCRE2_SIZE end_offset = start_offset + 1;
7547
7548 if ((nl == PCRE2_NEWLINE_CRLF || nl == PCRE2_NEWLINE_ANY ||
7549 nl == PCRE2_NEWLINE_ANYCRLF) &&
7550 start_offset < ulen - 1 &&
7551 CODE_UNIT(pp, start_offset) == '\r' &&
7552 CODE_UNIT(pp, end_offset) == '\n')
7553 end_offset++;
7554
7555 else if (utf && test_mode != PCRE32_MODE)
7556 {
7557 if (test_mode == PCRE8_MODE)
7558 {
7559 for (; end_offset < ulen; end_offset++)
7560 if ((((PCRE2_SPTR8)pp)[end_offset] & 0xc0) != 0x80) break;
7561 }
7562 else /* 16-bit mode */
7563 {
7564 for (; end_offset < ulen; end_offset++)
7565 if ((((PCRE2_SPTR16)pp)[end_offset] & 0xfc00) != 0xdc00) break;
7566 }
7567 }
7568
7569 SETFLDVEC(match_data, ovector, 0, start_offset);
7570 SETFLDVEC(match_data, ovector, 1, end_offset);
7571 } /* End of handling null match in a global loop */
7572
7573 /* A "normal" match failure. There will be a negative error number in
7574 capcount. */
7575
7576 else
7577 {
7578 switch(capcount)
7579 {
7580 case PCRE2_ERROR_NOMATCH:
7581 if (gmatched == 0)
7582 {
7583 fprintf(outfile, "No match");
7584 if ((dat_datctl.control & CTL_MARK) != 0 &&
7585 TESTFLD(match_data, mark, !=, NULL))
7586 {
7587 fprintf(outfile, ", mark = ");
7588 PCHARSV(CASTFLD(void *, match_data, mark), 0, -1, utf, outfile);
7589 }
7590 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
7591 fprintf(outfile, " (JIT)");
7592 fprintf(outfile, "\n");
7593 }
7594 break;
7595
7596 case PCRE2_ERROR_BADUTFOFFSET:
7597 fprintf(outfile, "Error %d (bad UTF-%d offset)\n", capcount, test_mode);
7598 break;
7599
7600 default:
7601 fprintf(outfile, "Failed: error %d: ", capcount);
7602 if (!print_error_message(capcount, "", "")) return PR_ABEND;
7603 if (capcount <= PCRE2_ERROR_UTF8_ERR1 &&
7604 capcount >= PCRE2_ERROR_UTF32_ERR2)
7605 {
7606 PCRE2_SIZE startchar;
7607 PCRE2_GET_STARTCHAR(startchar, match_data);
7608 fprintf(outfile, " at offset %" SIZ_FORM, SIZ_CAST startchar);
7609 }
7610 fprintf(outfile, "\n");
7611 break;
7612 }
7613
7614 break; /* Out of the /g loop */
7615 } /* End of failed match handling */
7616
7617 /* Control reaches here in two circumstances: (a) after a match, and (b)
7618 after a non-match that immediately followed a match on an empty string when
7619 doing a global search. Such a match is done with PCRE2_NOTEMPTY_ATSTART and
7620 PCRE2_ANCHORED set in g_notempty. The code above turns it into a fake match
7621 of one character. So effectively we get here only after a match. If we
7622 are not doing a global search, we are done. */
7623
7624 if ((dat_datctl.control & CTL_ANYGLOB) == 0) break; else
7625 {
7626 PCRE2_SIZE match_offset = FLD(match_data, ovector)[0];
7627 PCRE2_SIZE end_offset = FLD(match_data, ovector)[1];
7628
7629 /* We must now set up for the next iteration of a global search. If we have
7630 matched an empty string, first check to see if we are at the end of the
7631 subject. If so, the loop is over. Otherwise, mimic what Perl's /g option
7632 does. Set PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED and try the match again
7633 at the same point. If this fails it will be picked up above, where a fake
7634 match is set up so that at this point we advance to the next character.
7635
7636 However, in order to cope with patterns that never match at their starting
7637 offset (e.g. /(?<=\G.)/) we don't do this when the match offset is greater
7638 than the starting offset. This means there will be a retry with the
7639 starting offset at the match offset. If this returns the same match again,
7640 it is picked up above and ignored, and the special action is then taken. */
7641
7642 if (match_offset == end_offset)
7643 {
7644 if (end_offset == ulen) break; /* End of subject */
7645 if (match_offset <= dat_datctl.offset)
7646 g_notempty = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
7647 }
7648
7649 /* However, even after matching a non-empty string, there is still one
7650 tricky case. If a pattern contains \K within a lookbehind assertion at the
7651 start, the end of the matched string can be at the offset where the match
7652 started. In the case of a normal /g iteration without special action, this
7653 leads to a loop that keeps on returning the same substring. The loop would
7654 be caught above, but we really want to move on to the next match. */
7655
7656 else
7657 {
7658 g_notempty = 0; /* Set for a "normal" repeat */
7659 if ((dat_datctl.control & CTL_GLOBAL) != 0)
7660 {
7661 PCRE2_SIZE startchar;
7662 PCRE2_GET_STARTCHAR(startchar, match_data);
7663 if (end_offset <= startchar)
7664 {
7665 if (startchar >= ulen) break; /* End of subject */
7666 end_offset = startchar + 1;
7667 if (utf && test_mode != PCRE32_MODE)
7668 {
7669 if (test_mode == PCRE8_MODE)
7670 {
7671 for (; end_offset < ulen; end_offset++)
7672 if ((((PCRE2_SPTR8)pp)[end_offset] & 0xc0) != 0x80) break;
7673 }
7674 else /* 16-bit mode */
7675 {
7676 for (; end_offset < ulen; end_offset++)
7677 if ((((PCRE2_SPTR16)pp)[end_offset] & 0xfc00) != 0xdc00) break;
7678 }
7679 }
7680 }
7681 }
7682 }
7683
7684 /* For a normal global (/g) iteration, save the current ovector[0,1] and
7685 the starting offset so that we can check that they do change each time.
7686 Otherwise a matching bug that returns the same string causes an infinite
7687 loop. It has happened! Then update the start offset, leaving other
7688 parameters alone. */
7689
7690 if ((dat_datctl.control & CTL_GLOBAL) != 0)
7691 {
7692 ovecsave[0] = ovector[0];
7693 ovecsave[1] = ovector[1];
7694 ovecsave[2] = dat_datctl.offset;
7695 dat_datctl.offset = end_offset;
7696 }
7697
7698 /* For altglobal, just update the pointer and length. */
7699
7700 else
7701 {
7702 pp += end_offset * code_unit_size;
7703 len -= end_offset * code_unit_size;
7704 ulen -= end_offset;
7705 if (arg_ulen != PCRE2_ZERO_TERMINATED) arg_ulen -= end_offset;
7706 }
7707 }
7708 } /* End of global loop */
7709
7710 show_memory = FALSE;
7711 return PR_OK;
7712 }
7713
7714
7715
7716
7717 /*************************************************
7718 * Print PCRE2 version *
7719 *************************************************/
7720
7721 static void
print_version(FILE * f)7722 print_version(FILE *f)
7723 {
7724 VERSION_TYPE *vp;
7725 fprintf(f, "PCRE2 version ");
7726 for (vp = version; *vp != 0; vp++) fprintf(f, "%c", *vp);
7727 fprintf(f, "\n");
7728 }
7729
7730
7731
7732 /*************************************************
7733 * Print Unicode version *
7734 *************************************************/
7735
7736 static void
print_unicode_version(FILE * f)7737 print_unicode_version(FILE *f)
7738 {
7739 VERSION_TYPE *vp;
7740 fprintf(f, "Unicode version ");
7741 for (vp = uversion; *vp != 0; vp++) fprintf(f, "%c", *vp);
7742 }
7743
7744
7745
7746 /*************************************************
7747 * Print JIT target *
7748 *************************************************/
7749
7750 static void
print_jit_target(FILE * f)7751 print_jit_target(FILE *f)
7752 {
7753 VERSION_TYPE *vp;
7754 for (vp = jittarget; *vp != 0; vp++) fprintf(f, "%c", *vp);
7755 }
7756
7757
7758
7759 /*************************************************
7760 * Print newline configuration *
7761 *************************************************/
7762
7763 /* Output is always to stdout.
7764
7765 Arguments:
7766 rc the return code from PCRE2_CONFIG_NEWLINE
7767 isc TRUE if called from "-C newline"
7768 Returns: nothing
7769 */
7770
7771 static void
print_newline_config(uint32_t optval,BOOL isc)7772 print_newline_config(uint32_t optval, BOOL isc)
7773 {
7774 if (!isc) printf(" Newline sequence is ");
7775 if (optval < sizeof(newlines)/sizeof(char *))
7776 printf("%s\n", newlines[optval]);
7777 else
7778 printf("a non-standard value: %d\n", optval);
7779 }
7780
7781
7782
7783 /*************************************************
7784 * Usage function *
7785 *************************************************/
7786
7787 static void
usage(void)7788 usage(void)
7789 {
7790 printf("Usage: pcre2test [options] [<input file> [<output file>]]\n\n");
7791 printf("Input and output default to stdin and stdout.\n");
7792 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
7793 printf("If input is a terminal, readline() is used to read from it.\n");
7794 #else
7795 printf("This version of pcre2test is not linked with readline().\n");
7796 #endif
7797 printf("\nOptions:\n");
7798 #ifdef SUPPORT_PCRE2_8
7799 printf(" -8 use the 8-bit library\n");
7800 #endif
7801 #ifdef SUPPORT_PCRE2_16
7802 printf(" -16 use the 16-bit library\n");
7803 #endif
7804 #ifdef SUPPORT_PCRE2_32
7805 printf(" -32 use the 32-bit library\n");
7806 #endif
7807 printf(" -ac set default pattern modifier PCRE2_AUTO_CALLOUT\n");
7808 printf(" -AC as -ac, but also set subject 'callout_extra' modifier\n");
7809 printf(" -b set default pattern modifier 'fullbincode'\n");
7810 printf(" -C show PCRE2 compile-time options and exit\n");
7811 printf(" -C arg show a specific compile-time option and exit with its\n");
7812 printf(" value if numeric (else 0). The arg can be:\n");
7813 printf(" backslash-C use of \\C is enabled [0, 1]\n");
7814 printf(" bsr \\R type [ANYCRLF, ANY]\n");
7815 printf(" ebcdic compiled for EBCDIC character code [0,1]\n");
7816 printf(" ebcdic-nl NL code if compiled for EBCDIC\n");
7817 printf(" jit just-in-time compiler supported [0, 1]\n");
7818 printf(" linksize internal link size [2, 3, 4]\n");
7819 printf(" newline newline type [CR, LF, CRLF, ANYCRLF, ANY, NUL]\n");
7820 printf(" pcre2-8 8 bit library support enabled [0, 1]\n");
7821 printf(" pcre2-16 16 bit library support enabled [0, 1]\n");
7822 printf(" pcre2-32 32 bit library support enabled [0, 1]\n");
7823 printf(" unicode Unicode and UTF support enabled [0, 1]\n");
7824 printf(" -d set default pattern modifier 'debug'\n");
7825 printf(" -dfa set default subject modifier 'dfa'\n");
7826 printf(" -error <n,m,..> show messages for error numbers, then exit\n");
7827 printf(" -help show usage information\n");
7828 printf(" -i set default pattern modifier 'info'\n");
7829 printf(" -jit set default pattern modifier 'jit'\n");
7830 printf(" -jitverify set default pattern modifier 'jitverify'\n");
7831 printf(" -LM list pattern and subject modifiers, then exit\n");
7832 printf(" -q quiet: do not output PCRE2 version number at start\n");
7833 printf(" -pattern <s> set default pattern modifier fields\n");
7834 printf(" -subject <s> set default subject modifier fields\n");
7835 printf(" -S <n> set stack size to <n> mebibytes\n");
7836 printf(" -t [<n>] time compilation and execution, repeating <n> times\n");
7837 printf(" -tm [<n>] time execution (matching) only, repeating <n> times\n");
7838 printf(" -T same as -t, but show total times at the end\n");
7839 printf(" -TM same as -tm, but show total time at the end\n");
7840 printf(" -version show PCRE2 version and exit\n");
7841 }
7842
7843
7844
7845 /*************************************************
7846 * Handle -C option *
7847 *************************************************/
7848
7849 /* This option outputs configuration options and sets an appropriate return
7850 code when asked for a single option. The code is abstracted into a separate
7851 function because of its size. Use whichever pcre2_config() function is
7852 available.
7853
7854 Argument: an option name or NULL
7855 Returns: the return code
7856 */
7857
7858 static int
c_option(const char * arg)7859 c_option(const char *arg)
7860 {
7861 uint32_t optval;
7862 unsigned int i = COPTLISTCOUNT;
7863 int yield = 0;
7864
7865 if (arg != NULL && arg[0] != CHAR_MINUS)
7866 {
7867 for (i = 0; i < COPTLISTCOUNT; i++)
7868 if (strcmp(arg, coptlist[i].name) == 0) break;
7869
7870 if (i >= COPTLISTCOUNT)
7871 {
7872 fprintf(stderr, "** Unknown -C option '%s'\n", arg);
7873 return 0;
7874 }
7875
7876 switch (coptlist[i].type)
7877 {
7878 case CONF_BSR:
7879 (void)PCRE2_CONFIG(coptlist[i].value, &optval);
7880 printf("%s\n", (optval == PCRE2_BSR_ANYCRLF)? "ANYCRLF" : "ANY");
7881 break;
7882
7883 case CONF_FIX:
7884 yield = coptlist[i].value;
7885 printf("%d\n", yield);
7886 break;
7887
7888 case CONF_FIZ:
7889 optval = coptlist[i].value;
7890 printf("%d\n", optval);
7891 break;
7892
7893 case CONF_INT:
7894 (void)PCRE2_CONFIG(coptlist[i].value, &yield);
7895 printf("%d\n", yield);
7896 break;
7897
7898 case CONF_NL:
7899 (void)PCRE2_CONFIG(coptlist[i].value, &optval);
7900 print_newline_config(optval, TRUE);
7901 break;
7902 }
7903
7904 /* For VMS, return the value by setting a symbol, for certain values only. */
7905
7906 #ifdef __VMS
7907 if (copytlist[i].type == CONF_FIX || coptlist[i].type == CONF_INT)
7908 {
7909 char ucname[16];
7910 strcpy(ucname, coptlist[i].name);
7911 for (i = 0; ucname[i] != 0; i++) ucname[i] = toupper[ucname[i]];
7912 vms_setsymbol(ucname, 0, optval);
7913 }
7914 #endif
7915
7916 return yield;
7917 }
7918
7919 /* No argument for -C: output all configuration information. */
7920
7921 print_version(stdout);
7922 printf("Compiled with\n");
7923
7924 #ifdef EBCDIC
7925 printf(" EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
7926 #if defined NATIVE_ZOS
7927 printf(" EBCDIC code page %s or similar\n", pcrz_cpversion());
7928 #endif
7929 #endif
7930
7931 (void)PCRE2_CONFIG(PCRE2_CONFIG_COMPILED_WIDTHS, &optval);
7932 if (optval & 1) printf(" 8-bit support\n");
7933 if (optval & 2) printf(" 16-bit support\n");
7934 if (optval & 4) printf(" 32-bit support\n");
7935
7936 #ifdef SUPPORT_VALGRIND
7937 printf(" Valgrind support\n");
7938 #endif
7939
7940 (void)PCRE2_CONFIG(PCRE2_CONFIG_UNICODE, &optval);
7941 if (optval != 0)
7942 {
7943 printf(" UTF and UCP support (");
7944 print_unicode_version(stdout);
7945 printf(")\n");
7946 }
7947 else printf(" No Unicode support\n");
7948
7949 (void)PCRE2_CONFIG(PCRE2_CONFIG_JIT, &optval);
7950 if (optval != 0)
7951 {
7952 printf(" Just-in-time compiler support: ");
7953 print_jit_target(stdout);
7954 printf("\n");
7955 }
7956 else
7957 {
7958 printf(" No just-in-time compiler support\n");
7959 }
7960
7961 (void)PCRE2_CONFIG(PCRE2_CONFIG_NEWLINE, &optval);
7962 print_newline_config(optval, FALSE);
7963 (void)PCRE2_CONFIG(PCRE2_CONFIG_BSR, &optval);
7964 printf(" \\R matches %s\n",
7965 (optval == PCRE2_BSR_ANYCRLF)? "CR, LF, or CRLF only" :
7966 "all Unicode newlines");
7967 (void)PCRE2_CONFIG(PCRE2_CONFIG_NEVER_BACKSLASH_C, &optval);
7968 printf(" \\C is %ssupported\n", optval? "not ":"");
7969 (void)PCRE2_CONFIG(PCRE2_CONFIG_LINKSIZE, &optval);
7970 printf(" Internal link size = %d\n", optval);
7971 (void)PCRE2_CONFIG(PCRE2_CONFIG_PARENSLIMIT, &optval);
7972 printf(" Parentheses nest limit = %d\n", optval);
7973 (void)PCRE2_CONFIG(PCRE2_CONFIG_HEAPLIMIT, &optval);
7974 printf(" Default heap limit = %d\n", optval);
7975 (void)PCRE2_CONFIG(PCRE2_CONFIG_MATCHLIMIT, &optval);
7976 printf(" Default match limit = %d\n", optval);
7977 (void)PCRE2_CONFIG(PCRE2_CONFIG_DEPTHLIMIT, &optval);
7978 printf(" Default depth limit = %d\n", optval);
7979 return 0;
7980 }
7981
7982
7983
7984 /*************************************************
7985 * Display one modifier *
7986 *************************************************/
7987
7988 static void
display_one_modifier(modstruct * m,BOOL for_pattern)7989 display_one_modifier(modstruct *m, BOOL for_pattern)
7990 {
7991 uint32_t c = (!for_pattern && (m->which == MOD_PND || m->which == MOD_PNDP))?
7992 '*' : ' ';
7993 printf("%c%s", c, m->name);
7994 }
7995
7996
7997
7998 /*************************************************
7999 * Display pattern or subject modifiers *
8000 *************************************************/
8001
8002 /* In order to print in two columns, first scan without printing to get a list
8003 of the modifiers that are required.
8004
8005 Arguments:
8006 for_pattern TRUE for pattern modifiers, FALSE for subject modifiers
8007 title string to be used in title
8008
8009 Returns: nothing
8010 */
8011
8012 static void
display_selected_modifiers(BOOL for_pattern,const char * title)8013 display_selected_modifiers(BOOL for_pattern, const char *title)
8014 {
8015 uint32_t i, j;
8016 uint32_t n = 0;
8017 uint32_t list[MODLISTCOUNT];
8018
8019 for (i = 0; i < MODLISTCOUNT; i++)
8020 {
8021 BOOL is_pattern = TRUE;
8022 modstruct *m = modlist + i;
8023
8024 switch (m->which)
8025 {
8026 case MOD_CTC: /* Compile context */
8027 case MOD_PAT: /* Pattern */
8028 case MOD_PATP: /* Pattern, OK for Perl-compatible test */
8029 break;
8030
8031 /* The MOD_PND and MOD_PNDP modifiers are precisely those that affect
8032 subjects, but can be given with a pattern. We list them as subject
8033 modifiers, but marked with an asterisk.*/
8034
8035 case MOD_CTM: /* Match context */
8036 case MOD_DAT: /* Subject line */
8037 case MOD_PND: /* As PD, but not default pattern */
8038 case MOD_PNDP: /* As PND, OK for Perl-compatible test */
8039 is_pattern = FALSE;
8040 break;
8041
8042 default: printf("** Unknown type for modifier '%s'\n", m->name);
8043 /* Fall through */
8044 case MOD_PD: /* Pattern or subject */
8045 case MOD_PDP: /* As PD, OK for Perl-compatible test */
8046 is_pattern = for_pattern;
8047 break;
8048 }
8049
8050 if (for_pattern == is_pattern) list[n++] = i;
8051 }
8052
8053 /* Now print from the list in two columns. */
8054
8055 printf("-------------- %s MODIFIERS --------------\n", title);
8056
8057 for (i = 0, j = (n+1)/2; i < (n+1)/2; i++, j++)
8058 {
8059 modstruct *m = modlist + list[i];
8060 display_one_modifier(m, for_pattern);
8061 if (j < n)
8062 {
8063 uint32_t k = 27 - strlen(m->name);
8064 while (k-- > 0) printf(" ");
8065 display_one_modifier(modlist + list[j], for_pattern);
8066 }
8067 printf("\n");
8068 }
8069 }
8070
8071
8072
8073 /*************************************************
8074 * Display the list of modifiers *
8075 *************************************************/
8076
8077 static void
display_modifiers(void)8078 display_modifiers(void)
8079 {
8080 printf(
8081 "An asterisk on a subject modifier means that it may be given on a pattern\n"
8082 "line, in order to apply to all subjects matched by that pattern. Modifiers\n"
8083 "that are listed for both patterns and subjects have different effects in\n"
8084 "each case.\n\n");
8085 display_selected_modifiers(TRUE, "PATTERN");
8086 printf("\n");
8087 display_selected_modifiers(FALSE, "SUBJECT");
8088 }
8089
8090
8091
8092 /*************************************************
8093 * Main Program *
8094 *************************************************/
8095
8096 int
main(int argc,char ** argv)8097 main(int argc, char **argv)
8098 {
8099 uint32_t temp;
8100 uint32_t yield = 0;
8101 uint32_t op = 1;
8102 BOOL notdone = TRUE;
8103 BOOL quiet = FALSE;
8104 BOOL showtotaltimes = FALSE;
8105 BOOL skipping = FALSE;
8106 char *arg_subject = NULL;
8107 char *arg_pattern = NULL;
8108 char *arg_error = NULL;
8109
8110 /* The offsets to the options and control bits fields of the pattern and data
8111 control blocks must be the same so that common options and controls such as
8112 "anchored" or "memory" can work for either of them from a single table entry.
8113 We cannot test this till runtime because "offsetof" does not work in the
8114 preprocessor. */
8115
8116 if (PO(options) != DO(options) || PO(control) != DO(control) ||
8117 PO(control2) != DO(control2))
8118 {
8119 fprintf(stderr, "** Coding error: "
8120 "options and control offsets for pattern and data must be the same.\n");
8121 return 1;
8122 }
8123
8124 /* Get the PCRE2 and Unicode version number and JIT target information, at the
8125 same time checking that a request for the length gives the same answer. Also
8126 check lengths for non-string items. */
8127
8128 if (PCRE2_CONFIG(PCRE2_CONFIG_VERSION, NULL) !=
8129 PCRE2_CONFIG(PCRE2_CONFIG_VERSION, version) ||
8130
8131 PCRE2_CONFIG(PCRE2_CONFIG_UNICODE_VERSION, NULL) !=
8132 PCRE2_CONFIG(PCRE2_CONFIG_UNICODE_VERSION, uversion) ||
8133
8134 PCRE2_CONFIG(PCRE2_CONFIG_JITTARGET, NULL) !=
8135 PCRE2_CONFIG(PCRE2_CONFIG_JITTARGET, jittarget) ||
8136
8137 PCRE2_CONFIG(PCRE2_CONFIG_UNICODE, NULL) != sizeof(uint32_t) ||
8138 PCRE2_CONFIG(PCRE2_CONFIG_MATCHLIMIT, NULL) != sizeof(uint32_t))
8139 {
8140 fprintf(stderr, "** Error in pcre2_config(): bad length\n");
8141 return 1;
8142 }
8143
8144 /* Check that bad options are diagnosed. */
8145
8146 if (PCRE2_CONFIG(999, NULL) != PCRE2_ERROR_BADOPTION ||
8147 PCRE2_CONFIG(999, &temp) != PCRE2_ERROR_BADOPTION)
8148 {
8149 fprintf(stderr, "** Error in pcre2_config(): bad option not diagnosed\n");
8150 return 1;
8151 }
8152
8153 /* This configuration option is now obsolete, but running a quick check ensures
8154 that its code is covered. */
8155
8156 (void)PCRE2_CONFIG(PCRE2_CONFIG_STACKRECURSE, &temp);
8157
8158 /* Get buffers from malloc() so that valgrind will check their misuse when
8159 debugging. They grow automatically when very long lines are read. The 16-
8160 and 32-bit buffers (pbuffer16, pbuffer32) are obtained only if needed. */
8161
8162 buffer = (uint8_t *)malloc(pbuffer8_size);
8163 pbuffer8 = (uint8_t *)malloc(pbuffer8_size);
8164
8165 /* The following _setmode() stuff is some Windows magic that tells its runtime
8166 library to translate CRLF into a single LF character. At least, that's what
8167 I've been told: never having used Windows I take this all on trust. Originally
8168 it set 0x8000, but then I was advised that _O_BINARY was better. */
8169
8170 #if defined(_WIN32) || defined(WIN32)
8171 _setmode( _fileno( stdout ), _O_BINARY );
8172 #endif
8173
8174 /* Initialization that does not depend on the running mode. */
8175
8176 locale_name[0] = 0;
8177
8178 memset(&def_patctl, 0, sizeof(patctl));
8179 def_patctl.convert_type = CONVERT_UNSET;
8180
8181 memset(&def_datctl, 0, sizeof(datctl));
8182 def_datctl.oveccount = DEFAULT_OVECCOUNT;
8183 def_datctl.copy_numbers[0] = -1;
8184 def_datctl.get_numbers[0] = -1;
8185 def_datctl.startend[0] = def_datctl.startend[1] = CFORE_UNSET;
8186 def_datctl.cerror[0] = def_datctl.cerror[1] = CFORE_UNSET;
8187 def_datctl.cfail[0] = def_datctl.cfail[1] = CFORE_UNSET;
8188
8189 /* Scan command line options. */
8190
8191 while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0)
8192 {
8193 char *endptr;
8194 char *arg = argv[op];
8195 unsigned long uli;
8196
8197 /* List modifiers and exit. */
8198
8199 if (strcmp(arg, "-LM") == 0)
8200 {
8201 display_modifiers();
8202 goto EXIT;
8203 }
8204
8205 /* Display and/or set return code for configuration options. */
8206
8207 if (strcmp(arg, "-C") == 0)
8208 {
8209 yield = c_option(argv[op + 1]);
8210 goto EXIT;
8211 }
8212
8213 /* Select operating mode. Ensure that pcre2_config() is called in 16-bit
8214 and 32-bit modes because that won't happen naturally when 8-bit is also
8215 configured. Also call some other functions that are not otherwise used. This
8216 means that a coverage report won't claim there are uncalled functions. */
8217
8218 if (strcmp(arg, "-8") == 0)
8219 {
8220 #ifdef SUPPORT_PCRE2_8
8221 test_mode = PCRE8_MODE;
8222 (void)pcre2_set_bsr_8(pat_context8, 999);
8223 (void)pcre2_set_newline_8(pat_context8, 999);
8224 #else
8225 fprintf(stderr,
8226 "** This version of PCRE2 was built without 8-bit support\n");
8227 exit(1);
8228 #endif
8229 }
8230
8231 else if (strcmp(arg, "-16") == 0)
8232 {
8233 #ifdef SUPPORT_PCRE2_16
8234 test_mode = PCRE16_MODE;
8235 (void)pcre2_config_16(PCRE2_CONFIG_VERSION, NULL);
8236 (void)pcre2_set_bsr_16(pat_context16, 999);
8237 (void)pcre2_set_newline_16(pat_context16, 999);
8238 #else
8239 fprintf(stderr,
8240 "** This version of PCRE2 was built without 16-bit support\n");
8241 exit(1);
8242 #endif
8243 }
8244
8245 else if (strcmp(arg, "-32") == 0)
8246 {
8247 #ifdef SUPPORT_PCRE2_32
8248 test_mode = PCRE32_MODE;
8249 (void)pcre2_config_32(PCRE2_CONFIG_VERSION, NULL);
8250 (void)pcre2_set_bsr_32(pat_context32, 999);
8251 (void)pcre2_set_newline_32(pat_context32, 999);
8252 #else
8253 fprintf(stderr,
8254 "** This version of PCRE2 was built without 32-bit support\n");
8255 exit(1);
8256 #endif
8257 }
8258
8259 /* Set quiet (no version verification) */
8260
8261 else if (strcmp(arg, "-q") == 0) quiet = TRUE;
8262
8263 /* Set system stack size */
8264
8265 else if (strcmp(arg, "-S") == 0 && argc > 2 &&
8266 ((uli = strtoul(argv[op+1], &endptr, 10)), *endptr == 0))
8267 {
8268 #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS) || defined(__VMS)
8269 fprintf(stderr, "pcre2test: -S is not supported on this OS\n");
8270 exit(1);
8271 #else
8272 int rc;
8273 uint32_t stack_size;
8274 struct rlimit rlim;
8275 if (U32OVERFLOW(uli))
8276 {
8277 fprintf(stderr, "** Argument for -S is too big\n");
8278 exit(1);
8279 }
8280 stack_size = (uint32_t)uli;
8281 getrlimit(RLIMIT_STACK, &rlim);
8282 rlim.rlim_cur = stack_size * 1024 * 1024;
8283 if (rlim.rlim_cur > rlim.rlim_max)
8284 {
8285 fprintf(stderr,
8286 "pcre2test: requested stack size %luMiB is greater than hard limit "
8287 "%luMiB\n", (unsigned long int)stack_size,
8288 (unsigned long int)(rlim.rlim_max));
8289 exit(1);
8290 }
8291 rc = setrlimit(RLIMIT_STACK, &rlim);
8292 if (rc != 0)
8293 {
8294 fprintf(stderr, "pcre2test: setting stack size %luMiB failed: %s\n",
8295 (unsigned long int)stack_size, strerror(errno));
8296 exit(1);
8297 }
8298 op++;
8299 argc--;
8300 #endif
8301 }
8302
8303 /* Set some common pattern and subject controls */
8304
8305 else if (strcmp(arg, "-AC") == 0)
8306 {
8307 def_patctl.options |= PCRE2_AUTO_CALLOUT;
8308 def_datctl.control2 |= CTL2_CALLOUT_EXTRA;
8309 }
8310 else if (strcmp(arg, "-ac") == 0) def_patctl.options |= PCRE2_AUTO_CALLOUT;
8311 else if (strcmp(arg, "-b") == 0) def_patctl.control |= CTL_FULLBINCODE;
8312 else if (strcmp(arg, "-d") == 0) def_patctl.control |= CTL_DEBUG;
8313 else if (strcmp(arg, "-dfa") == 0) def_datctl.control |= CTL_DFA;
8314 else if (strcmp(arg, "-i") == 0) def_patctl.control |= CTL_INFO;
8315 else if (strcmp(arg, "-jit") == 0 || strcmp(arg, "-jitverify") == 0)
8316 {
8317 if (arg[4] != 0) def_patctl.control |= CTL_JITVERIFY;
8318 def_patctl.jit = 7; /* full & partial */
8319 #ifndef SUPPORT_JIT
8320 fprintf(stderr, "** Warning: JIT support is not available: "
8321 "-jit[verify] calls functions that do nothing.\n");
8322 #endif
8323 }
8324
8325 /* Set timing parameters */
8326
8327 else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0 ||
8328 strcmp(arg, "-T") == 0 || strcmp(arg, "-TM") == 0)
8329 {
8330 int both = arg[2] == 0;
8331 showtotaltimes = arg[1] == 'T';
8332 if (argc > 2 && (uli = strtoul(argv[op+1], &endptr, 10), *endptr == 0))
8333 {
8334 if (U32OVERFLOW(uli))
8335 {
8336 fprintf(stderr, "** Argument for %s is too big\n", arg);
8337 exit(1);
8338 }
8339 timeitm = (int)uli;
8340 op++;
8341 argc--;
8342 }
8343 else timeitm = LOOPREPEAT;
8344 if (both) timeit = timeitm;
8345 }
8346
8347 /* Give help */
8348
8349 else if (strcmp(arg, "-help") == 0 ||
8350 strcmp(arg, "--help") == 0)
8351 {
8352 usage();
8353 goto EXIT;
8354 }
8355
8356 /* Show version */
8357
8358 else if (strcmp(arg, "-version") == 0 ||
8359 strcmp(arg, "--version") == 0)
8360 {
8361 print_version(stdout);
8362 goto EXIT;
8363 }
8364
8365 /* The following options save their data for processing once we know what
8366 the running mode is. */
8367
8368 else if (strcmp(arg, "-error") == 0)
8369 {
8370 arg_error = argv[op+1];
8371 goto CHECK_VALUE_EXISTS;
8372 }
8373
8374 else if (strcmp(arg, "-subject") == 0)
8375 {
8376 arg_subject = argv[op+1];
8377 goto CHECK_VALUE_EXISTS;
8378 }
8379
8380 else if (strcmp(arg, "-pattern") == 0)
8381 {
8382 arg_pattern = argv[op+1];
8383 CHECK_VALUE_EXISTS:
8384 if (argc <= 2)
8385 {
8386 fprintf(stderr, "** Missing value for %s\n", arg);
8387 yield = 1;
8388 goto EXIT;
8389 }
8390 op++;
8391 argc--;
8392 }
8393
8394 /* Unrecognized option */
8395
8396 else
8397 {
8398 fprintf(stderr, "** Unknown or malformed option '%s'\n", arg);
8399 usage();
8400 yield = 1;
8401 goto EXIT;
8402 }
8403 op++;
8404 argc--;
8405 }
8406
8407 /* If -error was present, get the error numbers, show the messages, and exit.
8408 We wait to do this until we know which mode we are in. */
8409
8410 if (arg_error != NULL)
8411 {
8412 int len;
8413 int errcode;
8414 char *endptr;
8415
8416 /* Ensure the relevant non-8-bit buffer is available. Ensure that it is at
8417 least 128 code units, because it is used for retrieving error messages. */
8418
8419 #ifdef SUPPORT_PCRE2_16
8420 if (test_mode == PCRE16_MODE)
8421 {
8422 pbuffer16_size = 256;
8423 pbuffer16 = (uint16_t *)malloc(pbuffer16_size);
8424 if (pbuffer16 == NULL)
8425 {
8426 fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer16\n",
8427 SIZ_CAST pbuffer16_size);
8428 yield = 1;
8429 goto EXIT;
8430 }
8431 }
8432 #endif
8433
8434 #ifdef SUPPORT_PCRE2_32
8435 if (test_mode == PCRE32_MODE)
8436 {
8437 pbuffer32_size = 512;
8438 pbuffer32 = (uint32_t *)malloc(pbuffer32_size);
8439 if (pbuffer32 == NULL)
8440 {
8441 fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer32\n",
8442 SIZ_CAST pbuffer32_size);
8443 yield = 1;
8444 goto EXIT;
8445 }
8446 }
8447 #endif
8448
8449 /* Loop along a list of error numbers. */
8450
8451 for (;;)
8452 {
8453 errcode = strtol(arg_error, &endptr, 10);
8454 if (*endptr != 0 && *endptr != CHAR_COMMA)
8455 {
8456 fprintf(stderr, "** '%s' is not a valid error number list\n", arg_error);
8457 yield = 1;
8458 goto EXIT;
8459 }
8460 printf("Error %d: ", errcode);
8461 PCRE2_GET_ERROR_MESSAGE(len, errcode, pbuffer);
8462 if (len < 0)
8463 {
8464 switch (len)
8465 {
8466 case PCRE2_ERROR_BADDATA:
8467 printf("PCRE2_ERROR_BADDATA (unknown error number)");
8468 break;
8469
8470 case PCRE2_ERROR_NOMEMORY:
8471 printf("PCRE2_ERROR_NOMEMORY (buffer too small)");
8472 break;
8473
8474 default:
8475 printf("Unexpected return (%d) from pcre2_get_error_message()", len);
8476 break;
8477 }
8478 }
8479 else
8480 {
8481 PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, stdout);
8482 }
8483 printf("\n");
8484 if (*endptr == 0) goto EXIT;
8485 arg_error = endptr + 1;
8486 }
8487 /* Control never reaches here */
8488 } /* End of -error handling */
8489
8490 /* Initialize things that cannot be done until we know which test mode we are
8491 running in. Exercise the general context copying function, which is not
8492 otherwise used. */
8493
8494 code_unit_size = test_mode/8;
8495 max_oveccount = DEFAULT_OVECCOUNT;
8496
8497 /* Use macros to save a lot of duplication. */
8498
8499 #define CREATECONTEXTS \
8500 G(general_context,BITS) = G(pcre2_general_context_create_,BITS)(&my_malloc, &my_free, NULL); \
8501 G(general_context_copy,BITS) = G(pcre2_general_context_copy_,BITS)(G(general_context,BITS)); \
8502 G(default_pat_context,BITS) = G(pcre2_compile_context_create_,BITS)(G(general_context,BITS)); \
8503 G(pat_context,BITS) = G(pcre2_compile_context_copy_,BITS)(G(default_pat_context,BITS)); \
8504 G(default_dat_context,BITS) = G(pcre2_match_context_create_,BITS)(G(general_context,BITS)); \
8505 G(dat_context,BITS) = G(pcre2_match_context_copy_,BITS)(G(default_dat_context,BITS)); \
8506 G(default_con_context,BITS) = G(pcre2_convert_context_create_,BITS)(G(general_context,BITS)); \
8507 G(con_context,BITS) = G(pcre2_convert_context_copy_,BITS)(G(default_con_context,BITS)); \
8508 G(match_data,BITS) = G(pcre2_match_data_create_,BITS)(max_oveccount, G(general_context,BITS))
8509
8510 #define CONTEXTTESTS \
8511 (void)G(pcre2_set_compile_extra_options_,BITS)(G(pat_context,BITS), 0); \
8512 (void)G(pcre2_set_max_pattern_length_,BITS)(G(pat_context,BITS), 0); \
8513 (void)G(pcre2_set_offset_limit_,BITS)(G(dat_context,BITS), 0); \
8514 (void)G(pcre2_set_recursion_memory_management_,BITS)(G(dat_context,BITS), my_malloc, my_free, NULL)
8515
8516 /* Call the appropriate functions for the current mode, and exercise some
8517 functions that are not otherwise called. */
8518
8519 #ifdef SUPPORT_PCRE2_8
8520 #undef BITS
8521 #define BITS 8
8522 if (test_mode == PCRE8_MODE)
8523 {
8524 CREATECONTEXTS;
8525 CONTEXTTESTS;
8526 }
8527 #endif
8528
8529 #ifdef SUPPORT_PCRE2_16
8530 #undef BITS
8531 #define BITS 16
8532 if (test_mode == PCRE16_MODE)
8533 {
8534 CREATECONTEXTS;
8535 CONTEXTTESTS;
8536 }
8537 #endif
8538
8539 #ifdef SUPPORT_PCRE2_32
8540 #undef BITS
8541 #define BITS 32
8542 if (test_mode == PCRE32_MODE)
8543 {
8544 CREATECONTEXTS;
8545 CONTEXTTESTS;
8546 }
8547 #endif
8548
8549 /* Set a default parentheses nest limit that is large enough to run the
8550 standard tests (this also exercises the function). */
8551
8552 PCRE2_SET_PARENS_NEST_LIMIT(default_pat_context, PARENS_NEST_DEFAULT);
8553
8554 /* Handle command line modifier settings, sending any error messages to
8555 stderr. We need to know the mode before modifying the context, and it is tidier
8556 to do them all in the same way. */
8557
8558 outfile = stderr;
8559 if ((arg_pattern != NULL &&
8560 !decode_modifiers((uint8_t *)arg_pattern, CTX_DEFPAT, &def_patctl, NULL)) ||
8561 (arg_subject != NULL &&
8562 !decode_modifiers((uint8_t *)arg_subject, CTX_DEFDAT, NULL, &def_datctl)))
8563 {
8564 yield = 1;
8565 goto EXIT;
8566 }
8567
8568 /* Sort out the input and output files, defaulting to stdin/stdout. */
8569
8570 infile = stdin;
8571 outfile = stdout;
8572
8573 if (argc > 1 && strcmp(argv[op], "-") != 0)
8574 {
8575 infile = fopen(argv[op], INPUT_MODE);
8576 if (infile == NULL)
8577 {
8578 printf("** Failed to open '%s': %s\n", argv[op], strerror(errno));
8579 yield = 1;
8580 goto EXIT;
8581 }
8582 }
8583
8584 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
8585 if (INTERACTIVE(infile)) using_history();
8586 #endif
8587
8588 if (argc > 2)
8589 {
8590 outfile = fopen(argv[op+1], OUTPUT_MODE);
8591 if (outfile == NULL)
8592 {
8593 printf("** Failed to open '%s': %s\n", argv[op+1], strerror(errno));
8594 yield = 1;
8595 goto EXIT;
8596 }
8597 }
8598
8599 /* Output a heading line unless quiet, then process input lines. */
8600
8601 if (!quiet) print_version(outfile);
8602
8603 SET(compiled_code, NULL);
8604
8605 #ifdef SUPPORT_PCRE2_8
8606 preg.re_pcre2_code = NULL;
8607 preg.re_match_data = NULL;
8608 #endif
8609
8610 while (notdone)
8611 {
8612 uint8_t *p;
8613 int rc = PR_OK;
8614 BOOL expectdata = TEST(compiled_code, !=, NULL);
8615 #ifdef SUPPORT_PCRE2_8
8616 expectdata |= preg.re_pcre2_code != NULL;
8617 #endif
8618
8619 if (extend_inputline(infile, buffer, expectdata? "data> " : " re> ") == NULL)
8620 break;
8621 if (!INTERACTIVE(infile)) fprintf(outfile, "%s", (char *)buffer);
8622 fflush(outfile);
8623 p = buffer;
8624
8625 /* If we have a pattern set up for testing, or we are skipping after a
8626 compile failure, a blank line terminates this test. */
8627
8628 if (expectdata || skipping)
8629 {
8630 while (isspace(*p)) p++;
8631 if (*p == 0)
8632 {
8633 #ifdef SUPPORT_PCRE2_8
8634 if (preg.re_pcre2_code != NULL)
8635 {
8636 regfree(&preg);
8637 preg.re_pcre2_code = NULL;
8638 preg.re_match_data = NULL;
8639 }
8640 #endif /* SUPPORT_PCRE2_8 */
8641 if (TEST(compiled_code, !=, NULL))
8642 {
8643 SUB1(pcre2_code_free, compiled_code);
8644 SET(compiled_code, NULL);
8645 }
8646 skipping = FALSE;
8647 setlocale(LC_CTYPE, "C");
8648 }
8649
8650 /* Otherwise, if we are not skipping, and the line is not a data comment
8651 line starting with "\=", process a data line. */
8652
8653 else if (!skipping && !(p[0] == '\\' && p[1] == '=' && isspace(p[2])))
8654 {
8655 rc = process_data();
8656 }
8657 }
8658
8659 /* We do not have a pattern set up for testing. Lines starting with # are
8660 either comments or special commands. Blank lines are ignored. Otherwise, the
8661 line must start with a valid delimiter. It is then processed as a pattern
8662 line. A copy of the pattern is left in pbuffer8 for use by callouts. Under
8663 valgrind, make the unused part of the buffer undefined, to catch overruns. */
8664
8665 else if (*p == '#')
8666 {
8667 if (isspace(p[1]) || p[1] == '!' || p[1] == 0) continue;
8668 rc = process_command();
8669 }
8670
8671 else if (strchr("/!\"'`%&-=_:;,@~", *p) != NULL)
8672 {
8673 rc = process_pattern();
8674 dfa_matched = 0;
8675 }
8676
8677 else
8678 {
8679 while (isspace(*p)) p++;
8680 if (*p != 0)
8681 {
8682 fprintf(outfile, "** Invalid pattern delimiter '%c' (x%x).\n", *buffer,
8683 *buffer);
8684 rc = PR_SKIP;
8685 }
8686 }
8687
8688 if (rc == PR_SKIP && !INTERACTIVE(infile)) skipping = TRUE;
8689 else if (rc == PR_ABEND)
8690 {
8691 fprintf(outfile, "** pcre2test run abandoned\n");
8692 yield = 1;
8693 goto EXIT;
8694 }
8695 }
8696
8697 /* Finish off a normal run. */
8698
8699 if (INTERACTIVE(infile)) fprintf(outfile, "\n");
8700
8701 if (showtotaltimes)
8702 {
8703 const char *pad = "";
8704 fprintf(outfile, "--------------------------------------\n");
8705 if (timeit > 0)
8706 {
8707 fprintf(outfile, "Total compile time %.4f milliseconds\n",
8708 (((double)total_compile_time * 1000.0) / (double)timeit) /
8709 (double)CLOCKS_PER_SEC);
8710 if (total_jit_compile_time > 0)
8711 fprintf(outfile, "Total JIT compile %.4f milliseconds\n",
8712 (((double)total_jit_compile_time * 1000.0) / (double)timeit) /
8713 (double)CLOCKS_PER_SEC);
8714 pad = " ";
8715 }
8716 fprintf(outfile, "Total match time %s%.4f milliseconds\n", pad,
8717 (((double)total_match_time * 1000.0) / (double)timeitm) /
8718 (double)CLOCKS_PER_SEC);
8719 }
8720
8721
8722 EXIT:
8723
8724 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
8725 if (infile != NULL && INTERACTIVE(infile)) clear_history();
8726 #endif
8727
8728 if (infile != NULL && infile != stdin) fclose(infile);
8729 if (outfile != NULL && outfile != stdout) fclose(outfile);
8730
8731 free(buffer);
8732 free(dbuffer);
8733 free(pbuffer8);
8734 free(dfa_workspace);
8735 free((void *)locale_tables);
8736 PCRE2_MATCH_DATA_FREE(match_data);
8737 SUB1(pcre2_code_free, compiled_code);
8738
8739 while(patstacknext-- > 0)
8740 {
8741 SET(compiled_code, patstack[patstacknext]);
8742 SUB1(pcre2_code_free, compiled_code);
8743 }
8744
8745 PCRE2_JIT_FREE_UNUSED_MEMORY(general_context);
8746 if (jit_stack != NULL)
8747 {
8748 PCRE2_JIT_STACK_FREE(jit_stack);
8749 }
8750
8751 #define FREECONTEXTS \
8752 G(pcre2_general_context_free_,BITS)(G(general_context,BITS)); \
8753 G(pcre2_general_context_free_,BITS)(G(general_context_copy,BITS)); \
8754 G(pcre2_compile_context_free_,BITS)(G(pat_context,BITS)); \
8755 G(pcre2_compile_context_free_,BITS)(G(default_pat_context,BITS)); \
8756 G(pcre2_match_context_free_,BITS)(G(dat_context,BITS)); \
8757 G(pcre2_match_context_free_,BITS)(G(default_dat_context,BITS)); \
8758 G(pcre2_convert_context_free_,BITS)(G(default_con_context,BITS)); \
8759 G(pcre2_convert_context_free_,BITS)(G(con_context,BITS));
8760
8761 #ifdef SUPPORT_PCRE2_8
8762 #undef BITS
8763 #define BITS 8
8764 if (preg.re_pcre2_code != NULL) regfree(&preg);
8765 FREECONTEXTS;
8766 #endif
8767
8768 #ifdef SUPPORT_PCRE2_16
8769 #undef BITS
8770 #define BITS 16
8771 free(pbuffer16);
8772 FREECONTEXTS;
8773 #endif
8774
8775 #ifdef SUPPORT_PCRE2_32
8776 #undef BITS
8777 #define BITS 32
8778 free(pbuffer32);
8779 FREECONTEXTS;
8780 #endif
8781
8782 #if defined(__VMS)
8783 yield = SS$_NORMAL; /* Return values via DCL symbols */
8784 #endif
8785
8786 return yield;
8787 }
8788
8789 /* End of pcre2test.c */
8790