1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Original API code Copyright (c) 1997-2012 University of Cambridge
10 New API code Copyright (c) 2016 University of Cambridge
11
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
15
16 * Redistributions of source code must retain the above copyright notice,
17 this list of conditions and the following disclaimer.
18
19 * Redistributions in binary form must reproduce the above copyright
20 notice, this list of conditions and the following disclaimer in the
21 documentation and/or other materials provided with the distribution.
22
23 * Neither the name of the University of Cambridge nor the names of its
24 contributors may be used to endorse or promote products derived from
25 this software without specific prior written permission.
26
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
39 */
40
41 #ifdef HAVE_CONFIG_H
42 #include "config.h"
43 #endif
44
45 #include <stdio.h>
46 #include <string.h>
47
48 #define PCRE2_CODE_UNIT_WIDTH 0
49 #include "pcre2.h"
50
51 /*
52 Letter characters:
53 \xe6\x92\xad = 0x64ad = 25773 (kanji)
54 Non-letter characters:
55 \xc2\xa1 = 0xa1 = (Inverted Exclamation Mark)
56 \xf3\xa9\xb7\x80 = 0xe9dc0 = 957888
57 \xed\xa0\x80 = 55296 = 0xd800 (Invalid UTF character)
58 \xed\xb0\x80 = 56320 = 0xdc00 (Invalid UTF character)
59 Newlines:
60 \xc2\x85 = 0x85 = 133 (NExt Line = NEL)
61 \xe2\x80\xa8 = 0x2028 = 8232 (Line Separator)
62 Othercase pairs:
63 \xc3\xa9 = 0xe9 = 233 (e')
64 \xc3\x89 = 0xc9 = 201 (E')
65 \xc3\xa1 = 0xe1 = 225 (a')
66 \xc3\x81 = 0xc1 = 193 (A')
67 \x53 = 0x53 = S
68 \x73 = 0x73 = s
69 \xc5\xbf = 0x17f = 383 (long S)
70 \xc8\xba = 0x23a = 570
71 \xe2\xb1\xa5 = 0x2c65 = 11365
72 \xe1\xbd\xb8 = 0x1f78 = 8056
73 \xe1\xbf\xb8 = 0x1ff8 = 8184
74 \xf0\x90\x90\x80 = 0x10400 = 66560
75 \xf0\x90\x90\xa8 = 0x10428 = 66600
76 \xc7\x84 = 0x1c4 = 452
77 \xc7\x85 = 0x1c5 = 453
78 \xc7\x86 = 0x1c6 = 454
79 Caseless sets:
80 ucp_Armenian - \x{531}-\x{556} -> \x{561}-\x{586}
81 ucp_Coptic - \x{2c80}-\x{2ce3} -> caseless: XOR 0x1
82 ucp_Latin - \x{ff21}-\x{ff3a} -> \x{ff41]-\x{ff5a}
83
84 Mark property:
85 \xcc\x8d = 0x30d = 781
86 Special:
87 \xc2\x80 = 0x80 = 128 (lowest 2 byte character)
88 \xdf\xbf = 0x7ff = 2047 (highest 2 byte character)
89 \xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character)
90 \xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character)
91 \xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character)
92 \xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character)
93 */
94
95 static int regression_tests(void);
96
main(void)97 int main(void)
98 {
99 int jit = 0;
100 #if defined SUPPORT_PCRE2_8
101 pcre2_config_8(PCRE2_CONFIG_JIT, &jit);
102 #elif defined SUPPORT_PCRE2_16
103 pcre2_config_16(PCRE2_CONFIG_JIT, &jit);
104 #elif defined SUPPORT_PCRE2_32
105 pcre2_config_32(PCRE2_CONFIG_JIT, &jit);
106 #endif
107 if (!jit) {
108 printf("JIT must be enabled to run pcre_jit_test\n");
109 return 1;
110 }
111 return regression_tests();
112 }
113
114 /* --------------------------------------------------------------------------------------- */
115
116 #if !(defined SUPPORT_PCRE2_8) && !(defined SUPPORT_PCRE2_16) && !(defined SUPPORT_PCRE2_32)
117 #error SUPPORT_PCRE2_8 or SUPPORT_PCRE2_16 or SUPPORT_PCRE2_32 must be defined
118 #endif
119
120 #define MU (PCRE2_MULTILINE | PCRE2_UTF)
121 #define MUP (PCRE2_MULTILINE | PCRE2_UTF | PCRE2_UCP)
122 #define CMU (PCRE2_CASELESS | PCRE2_MULTILINE | PCRE2_UTF)
123 #define CMUP (PCRE2_CASELESS | PCRE2_MULTILINE | PCRE2_UTF | PCRE2_UCP)
124 #define M (PCRE2_MULTILINE)
125 #define MP (PCRE2_MULTILINE | PCRE2_UCP)
126 #define U (PCRE2_UTF)
127 #define CM (PCRE2_CASELESS | PCRE2_MULTILINE)
128
129 #define BSR(x) ((x) << 16)
130 #define A PCRE2_NEWLINE_ANYCRLF
131
132 #define GET_NEWLINE(x) ((x) & 0xffff)
133 #define GET_BSR(x) ((x) >> 16)
134
135 #define OFFSET_MASK 0x00ffff
136 #define F_NO8 0x010000
137 #define F_NO16 0x020000
138 #define F_NO32 0x020000
139 #define F_NOMATCH 0x040000
140 #define F_DIFF 0x080000
141 #define F_FORCECONV 0x100000
142 #define F_PROPERTY 0x200000
143
144 struct regression_test_case {
145 int compile_options;
146 int newline;
147 int match_options;
148 int start_offset;
149 const char *pattern;
150 const char *input;
151 };
152
153 static struct regression_test_case regression_test_cases[] = {
154 /* Constant strings. */
155 { MU, A, 0, 0, "AbC", "AbAbC" },
156 { MU, A, 0, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
157 { CMU, A, 0, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
158 { M, A, 0, 0, "[^a]", "aAbB" },
159 { CM, A, 0, 0, "[^m]", "mMnN" },
160 { M, A, 0, 0, "a[^b][^#]", "abacd" },
161 { CM, A, 0, 0, "A[^B][^E]", "abacd" },
162 { CMU, A, 0, 0, "[^x][^#]", "XxBll" },
163 { MU, A, 0, 0, "[^a]", "aaa\xc3\xa1#Ab" },
164 { CMU, A, 0, 0, "[^A]", "aA\xe6\x92\xad" },
165 { MU, A, 0, 0, "\\W(\\W)?\\w", "\r\n+bc" },
166 { MU, A, 0, 0, "\\W(\\W)?\\w", "\n\r+bc" },
167 { MU, A, 0, 0, "\\W(\\W)?\\w", "\r\r+bc" },
168 { MU, A, 0, 0, "\\W(\\W)?\\w", "\n\n+bc" },
169 { MU, A, 0, 0, "[axd]", "sAXd" },
170 { CMU, A, 0, 0, "[axd]", "sAXd" },
171 { CMU, A, 0, 0 | F_NOMATCH, "[^axd]", "DxA" },
172 { MU, A, 0, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
173 { MU, A, 0, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
174 { CMU, A, 0, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
175 { MU, A, 0, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
176 { MU, A, 0, 0, "[^a]", "\xc2\x80[]" },
177 { CMU, A, 0, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
178 { CM, A, 0, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
179 { PCRE2_CASELESS, 0, 0, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
180 { PCRE2_CASELESS, 0, 0, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
181 { PCRE2_CASELESS, 0, 0, 0, "a1", "Aa1" },
182 #ifndef NEVER_BACKSLASH_C
183 { M, A, 0, 0, "\\Ca", "cda" },
184 { CM, A, 0, 0, "\\Ca", "CDA" },
185 { M, A, 0, 0 | F_NOMATCH, "\\Cx", "cda" },
186 { CM, A, 0, 0 | F_NOMATCH, "\\Cx", "CDA" },
187 #endif
188 { CMUP, A, 0, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
189 { CMUP, A, 0, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
190 { CMUP, A, 0, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
191 { CMUP, A, 0, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
192 { M, A, 0, 0, "[3-57-9]", "5" },
193
194 /* Assertions. */
195 { MU, A, 0, 0, "\\b[^A]", "A_B#" },
196 { M, A, 0, 0 | F_NOMATCH, "\\b\\W", "\n*" },
197 { MU, A, 0, 0, "\\B[^,]\\b[^s]\\b", "#X" },
198 { MP, A, 0, 0, "\\B", "_\xa1" },
199 { MP, A, 0, 0 | F_PROPERTY, "\\b_\\b[,A]\\B", "_," },
200 { MUP, A, 0, 0, "\\b", "\xe6\x92\xad!" },
201 { MUP, A, 0, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
202 { MUP, A, 0, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
203 { MUP, A, 0, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
204 { MU, A, 0, 0 | F_NOMATCH, "\\b.", "\xcd\xbe" },
205 { CMUP, A, 0, 0, "\\By", "\xf0\x90\x90\xa8y" },
206 { M, A, 0, 0 | F_NOMATCH, "\\R^", "\n" },
207 { M, A, 0, 1 | F_NOMATCH, "^", "\n" },
208 { 0, 0, 0, 0, "^ab", "ab" },
209 { 0, 0, 0, 0 | F_NOMATCH, "^ab", "aab" },
210 { M, PCRE2_NEWLINE_CRLF, 0, 0, "^a", "\r\raa\n\naa\r\naa" },
211 { MU, A, 0, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
212 { M, PCRE2_NEWLINE_ANY, 0, 0, "^-", "a--b--\x85--" },
213 { MU, PCRE2_NEWLINE_ANY, 0, 0, "^-", "a--\xe2\x80\xa8--" },
214 { MU, PCRE2_NEWLINE_ANY, 0, 0, "^-", "a--\xc2\x85--" },
215 { 0, 0, 0, 0, "ab$", "ab" },
216 { 0, 0, 0, 0 | F_NOMATCH, "ab$", "abab\n\n" },
217 { PCRE2_DOLLAR_ENDONLY, 0, 0, 0 | F_NOMATCH, "ab$", "abab\r\n" },
218 { M, PCRE2_NEWLINE_CRLF, 0, 0, "a$", "\r\raa\n\naa\r\naa" },
219 { M, PCRE2_NEWLINE_ANY, 0, 0, "a$", "aaa" },
220 { MU, PCRE2_NEWLINE_ANYCRLF, 0, 0, "#$", "#\xc2\x85###\r#" },
221 { MU, PCRE2_NEWLINE_ANY, 0, 0, "#$", "#\xe2\x80\xa9" },
222 { 0, PCRE2_NEWLINE_ANY, PCRE2_NOTBOL, 0 | F_NOMATCH, "^a", "aa\naa" },
223 { M, PCRE2_NEWLINE_ANY, PCRE2_NOTBOL, 0, "^a", "aa\naa" },
224 { 0, PCRE2_NEWLINE_ANY, PCRE2_NOTEOL, 0 | F_NOMATCH, "a$", "aa\naa" },
225 { 0, PCRE2_NEWLINE_ANY, PCRE2_NOTEOL, 0 | F_NOMATCH, "a$", "aa\r\n" },
226 { U | PCRE2_DOLLAR_ENDONLY, PCRE2_NEWLINE_ANY, 0, 0 | F_PROPERTY, "\\p{Any}{2,}$", "aa\r\n" },
227 { M, PCRE2_NEWLINE_ANY, PCRE2_NOTEOL, 0, "a$", "aa\naa" },
228 { 0, PCRE2_NEWLINE_CR, 0, 0, ".\\Z", "aaa" },
229 { U, PCRE2_NEWLINE_CR, 0, 0, "a\\Z", "aaa\r" },
230 { 0, PCRE2_NEWLINE_CR, 0, 0, ".\\Z", "aaa\n" },
231 { 0, PCRE2_NEWLINE_CRLF, 0, 0, ".\\Z", "aaa\r" },
232 { U, PCRE2_NEWLINE_CRLF, 0, 0, ".\\Z", "aaa\n" },
233 { 0, PCRE2_NEWLINE_CRLF, 0, 0, ".\\Z", "aaa\r\n" },
234 { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa" },
235 { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r" },
236 { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\n" },
237 { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r\n" },
238 { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\xe2\x80\xa8" },
239 { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa" },
240 { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r" },
241 { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\n" },
242 { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r\n" },
243 { U, PCRE2_NEWLINE_ANY, 0, 0, ".\\Z", "aaa\xc2\x85" },
244 { U, PCRE2_NEWLINE_ANY, 0, 0, ".\\Z", "aaa\xe2\x80\xa8" },
245 { M, A, 0, 0, "\\Aa", "aaa" },
246 { M, A, 0, 1 | F_NOMATCH, "\\Aa", "aaa" },
247 { M, A, 0, 1, "\\Ga", "aaa" },
248 { M, A, 0, 1 | F_NOMATCH, "\\Ga", "aba" },
249 { M, A, 0, 0, "a\\z", "aaa" },
250 { M, A, 0, 0 | F_NOMATCH, "a\\z", "aab" },
251
252 /* Brackets and alternatives. */
253 { MU, A, 0, 0, "(ab|bb|cd)", "bacde" },
254 { MU, A, 0, 0, "(?:ab|a)(bc|c)", "ababc" },
255 { MU, A, 0, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
256 { CMU, A, 0, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
257 { MU, A, 0, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
258 { MU, A, 0, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
259 { MU, A, 0, 0, "\xc7\x82|\xc6\x82", "\xf1\x83\x82\x82\xc7\x82\xc7\x83" },
260 { MU, A, 0, 0, "=\xc7\x82|#\xc6\x82", "\xf1\x83\x82\x82=\xc7\x82\xc7\x83" },
261 { MU, A, 0, 0, "\xc7\x82\xc7\x83|\xc6\x82\xc6\x82", "\xf1\x83\x82\x82\xc7\x82\xc7\x83" },
262 { MU, A, 0, 0, "\xc6\x82\xc6\x82|\xc7\x83\xc7\x83|\xc8\x84\xc8\x84", "\xf1\x83\x82\x82\xc8\x84\xc8\x84" },
263 { U, A, 0, 0, "\xe1\x81\x80|\xe2\x82\x80|\xe4\x84\x80", "\xdf\xbf\xc2\x80\xe4\x84\x80" },
264 { U, A, 0, 0, "(?:\xe1\x81\x80|\xe2\x82\x80|\xe4\x84\x80)#", "\xdf\xbf\xc2\x80#\xe4\x84\x80#" },
265
266 /* Greedy and non-greedy ? operators. */
267 { MU, A, 0, 0, "(?:a)?a", "laab" },
268 { CMU, A, 0, 0, "(A)?A", "llaab" },
269 { MU, A, 0, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
270 { MU, A, 0, 0, "(a)?a", "manm" },
271 { CMU, A, 0, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
272 { MU, A, 0, 0, "(a|b)?\?d((?:e)?)", "abcde" },
273 { MU, A, 0, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
274
275 /* Greedy and non-greedy + operators */
276 { MU, A, 0, 0, "(aa)+aa", "aaaaaaa" },
277 { MU, A, 0, 0, "(aa)+?aa", "aaaaaaa" },
278 { MU, A, 0, 0, "(?:aba|ab|a)+l", "ababamababal" },
279 { MU, A, 0, 0, "(?:aba|ab|a)+?l", "ababamababal" },
280 { MU, A, 0, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
281 { MU, A, 0, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
282 { MU, A, 0, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
283
284 /* Greedy and non-greedy * operators */
285 { CMU, A, 0, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
286 { MU, A, 0, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
287 { MU, A, 0, 0, "(aa|ab)*ab", "aaabaaab" },
288 { CMU, A, 0, 0, "(aa|Ab)*?aB", "aaabaaab" },
289 { MU, A, 0, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
290 { MU, A, 0, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
291 { M, A, 0, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
292 { M, A, 0, 0, "((?:a|)*){0}a", "a" },
293
294 /* Combining ? + * operators */
295 { MU, A, 0, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
296 { MU, A, 0, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
297 { MU, A, 0, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
298 { MU, A, 0, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
299 { MU, A, 0, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
300
301 /* Single character iterators. */
302 { MU, A, 0, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
303 { MU, A, 0, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
304 { MU, A, 0, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
305 { MU, A, 0, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
306 { MU, A, 0, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
307 { MU, A, 0, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
308 { MU, A, 0, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
309 { MU, A, 0, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
310 { MU, A, 0, 0, "(ba{2})+c", "baabaaabacbaabaac" },
311 { MU, A, 0, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
312 { MU, A, 0, 0, "(a?+[^b])+", "babaacacb" },
313 { MU, A, 0, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
314 { CMU, A, 0, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
315 { CMU, A, 0, 0, "[c-f]+k", "DemmFke" },
316 { MU, A, 0, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
317 { MU, A, 0, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
318 { CMU, A, 0, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
319 { CMU, A, 0, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
320 { CMU, A, 0, 0, "[ace]{3,}", "AcbDAcEEcEd" },
321 { CMU, A, 0, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
322 { MU, A, 0, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
323 { CMU, A, 0, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
324 { MU, A, 0, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
325 { MU, A, 0, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
326 { MU, A, 0, 0, "\\b\\w+\\B", "x,a_cd" },
327 { MUP, A, 0, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
328 { CMU, A, 0, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
329 { CMUP, A, 0, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" },
330 { CMU, A, 0, 0, "[^\xf0\x90\x90\x80]{2,4}@", "\xf0\x90\x90\xa8\xf0\x90\x90\x80###\xf0\x90\x90\x80@@@" },
331 { CMU, A, 0, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
332 { MU, A, 0, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
333 { MU, A, 0, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" },
334 { MU, A, 0, 0, "\\d+123", "987654321,01234" },
335 { MU, A, 0, 0, "abcd*|\\w+xy", "aaaaa,abxyz" },
336 { MU, A, 0, 0, "(?:abc|((?:amc|\\b\\w*xy)))", "aaaaa,abxyz" },
337 { MU, A, 0, 0, "a(?R)|([a-z]++)#", ".abcd.abcd#."},
338 { MU, A, 0, 0, "a(?R)|([a-z]++)#", ".abcd.mbcd#."},
339 { MU, A, 0, 0, ".[ab]*.", "xx" },
340 { MU, A, 0, 0, ".[ab]*a", "xxa" },
341 { MU, A, 0, 0, ".[ab]?.", "xx" },
342
343 /* Bracket repeats with limit. */
344 { MU, A, 0, 0, "(?:(ab){2}){5}M", "abababababababababababM" },
345 { MU, A, 0, 0, "(?:ab|abab){1,5}M", "abababababababababababM" },
346 { MU, A, 0, 0, "(?>ab|abab){1,5}M", "abababababababababababM" },
347 { MU, A, 0, 0, "(?:ab|abab){1,5}?M", "abababababababababababM" },
348 { MU, A, 0, 0, "(?>ab|abab){1,5}?M", "abababababababababababM" },
349 { MU, A, 0, 0, "(?:(ab){1,4}?){1,3}?M", "abababababababababababababM" },
350 { MU, A, 0, 0, "(?:(ab){1,4}){1,3}abababababababababababM", "ababababababababababababM" },
351 { MU, A, 0, 0 | F_NOMATCH, "(?:(ab){1,4}){1,3}abababababababababababM", "abababababababababababM" },
352 { MU, A, 0, 0, "(ab){4,6}?M", "abababababababM" },
353
354 /* Basic character sets. */
355 { MU, A, 0, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
356 { MU, A, 0, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
357 { MU, A, 0, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
358 { MU, A, 0, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
359 { MU, A, 0, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
360 { MU, A, 0, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
361 { MU, A, 0, 0, "x[bcef]+", "xaxdxecbfg" },
362 { MU, A, 0, 0, "x[bcdghij]+", "xaxexfxdgbjk" },
363 { MU, A, 0, 0, "x[^befg]+", "xbxexacdhg" },
364 { MU, A, 0, 0, "x[^bcdl]+", "xlxbxaekmd" },
365 { MU, A, 0, 0, "x[^bcdghi]+", "xbxdxgxaefji" },
366 { MU, A, 0, 0, "x[B-Fb-f]+", "xaxAxgxbfBFG" },
367 { CMU, A, 0, 0, "\\x{e9}+", "#\xf0\x90\x90\xa8\xc3\xa8\xc3\xa9\xc3\x89\xc3\x88" },
368 { CMU, A, 0, 0, "[^\\x{e9}]+", "\xc3\xa9#\xf0\x90\x90\xa8\xc3\xa8\xc3\x88\xc3\x89" },
369 { MU, A, 0, 0, "[\\x02\\x7e]+", "\xc3\x81\xe1\xbf\xb8\xf0\x90\x90\xa8\x01\x02\x7e\x7f" },
370 { MU, A, 0, 0, "[^\\x02\\x7e]+", "\x02\xc3\x81\xe1\xbf\xb8\xf0\x90\x90\xa8\x01\x7f\x7e" },
371 { MU, A, 0, 0, "[\\x{81}-\\x{7fe}]+", "#\xe1\xbf\xb8\xf0\x90\x90\xa8\xc2\x80\xc2\x81\xdf\xbe\xdf\xbf" },
372 { MU, A, 0, 0, "[^\\x{81}-\\x{7fe}]+", "\xc2\x81#\xe1\xbf\xb8\xf0\x90\x90\xa8\xc2\x80\xdf\xbf\xdf\xbe" },
373 { MU, A, 0, 0, "[\\x{801}-\\x{fffe}]+", "#\xc3\xa9\xf0\x90\x90\x80\xe0\xa0\x80\xe0\xa0\x81\xef\xbf\xbe\xef\xbf\xbf" },
374 { MU, A, 0, 0, "[^\\x{801}-\\x{fffe}]+", "\xe0\xa0\x81#\xc3\xa9\xf0\x90\x90\x80\xe0\xa0\x80\xef\xbf\xbf\xef\xbf\xbe" },
375 { MU, A, 0, 0, "[\\x{10001}-\\x{10fffe}]+", "#\xc3\xa9\xe2\xb1\xa5\xf0\x90\x80\x80\xf0\x90\x80\x81\xf4\x8f\xbf\xbe\xf4\x8f\xbf\xbf" },
376 { MU, A, 0, 0, "[^\\x{10001}-\\x{10fffe}]+", "\xf0\x90\x80\x81#\xc3\xa9\xe2\xb1\xa5\xf0\x90\x80\x80\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbe" },
377
378 /* Unicode properties. */
379 { MUP, A, 0, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
380 { MUP, A, 0, 0 | F_PROPERTY, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
381 { MUP, A, 0, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
382 { MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}]", "abc" },
383 { MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}]", "abc" },
384 { MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
385 { MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
386 { MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
387 { MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
388 { MUP, A, 0, 0 | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
389 { MUP, A, 0, 0 | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
390 { MUP, A, 0, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
391 { CMUP, A, 0, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
392 { MUP, A, 0, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
393 { MUP, A, 0, 0 | F_PROPERTY, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
394 { MU, A, 0, 0 | F_PROPERTY, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
395 { CMUP, A, 0, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
396 { MUP, A, 0, 0 | F_PROPERTY, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
397 { MUP, A, 0, 0 | F_PROPERTY, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
398 { PCRE2_UCP, 0, 0, 0 | F_PROPERTY, "[a-b\\s]{2,5}[^a]", "AB baaa" },
399
400 /* Possible empty brackets. */
401 { MU, A, 0, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
402 { MU, A, 0, 0, "(|ab||bc|a)+d", "abcxabcabd" },
403 { MU, A, 0, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
404 { MU, A, 0, 0, "(|ab||bc|a)*d", "abcxabcabd" },
405 { MU, A, 0, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
406 { MU, A, 0, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
407 { MU, A, 0, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
408 { MU, A, 0, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
409 { MU, A, 0, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
410 { MU, A, 0, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
411
412 /* Start offset. */
413 { MU, A, 0, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
414 { MU, A, 0, 4 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
415 { MU, A, 0, 2 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
416 { MU, A, 0, 1, "(\\w\\W\\w)+", "ab#d" },
417
418 /* Newline. */
419 { M, PCRE2_NEWLINE_CRLF, 0, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
420 { M, PCRE2_NEWLINE_CR, 0, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
421 { M, PCRE2_NEWLINE_CRLF, 0, 0, "\\W{1,3}[^#]", "\r\n##...." },
422 { MU, A, PCRE2_NO_UTF_CHECK, 1, "^.a", "\n\x80\nxa" },
423 { MU, A, 0, 1, "^", "\r\n" },
424 { M, PCRE2_NEWLINE_CRLF, 0, 1 | F_NOMATCH, "^", "\r\n" },
425 { M, PCRE2_NEWLINE_CRLF, 0, 1, "^", "\r\na" },
426
427 /* Any character except newline or any newline. */
428 { 0, PCRE2_NEWLINE_CRLF, 0, 0, ".", "\r" },
429 { U, PCRE2_NEWLINE_CRLF, 0, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
430 { 0, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
431 { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
432 { U, PCRE2_NEWLINE_ANY, 0, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
433 { U, PCRE2_NEWLINE_ANYCRLF, 0, 0 | F_NOMATCH, ".(.).", "\xe2\x80\xa8\nb\r" },
434 { 0, PCRE2_NEWLINE_ANY, 0, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
435 { U, PCRE2_NEWLINE_ANY, 0, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
436 { 0, BSR(PCRE2_BSR_ANYCRLF), 0, 0, "\\R", "\r" },
437 { 0, BSR(PCRE2_BSR_ANYCRLF), 0, 0, "\\R", "\x85#\r\n#" },
438 { U, BSR(PCRE2_BSR_UNICODE), 0, 0, "\\R", "ab\xe2\x80\xa8#c" },
439 { U, BSR(PCRE2_BSR_UNICODE), 0, 0, "\\R", "ab\r\nc" },
440 { U, PCRE2_NEWLINE_CRLF | BSR(PCRE2_BSR_UNICODE), 0, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
441 { MU, A, 0, 0 | F_NOMATCH, "\\R+", "ab" },
442 { MU, A, 0, 0, "\\R+", "ab\r\n\r" },
443 { MU, A, 0, 0, "\\R*", "ab\r\n\r" },
444 { MU, A, 0, 0, "\\R*", "\r\n\r" },
445 { MU, A, 0, 0, "\\R{2,4}", "\r\nab\r\r" },
446 { MU, A, 0, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
447 { MU, A, 0, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
448 { MU, A, 0, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
449 { MU, A, 0, 0 | F_NOMATCH, "\\R+\\R\\R", "\r\n\r\n" },
450 { MU, A, 0, 0, "\\R+\\R\\R", "\r\r\r" },
451 { MU, A, 0, 0, "\\R*\\R\\R", "\n\r" },
452 { MU, A, 0, 0 | F_NOMATCH, "\\R{2,4}\\R\\R", "\r\r\r" },
453 { MU, A, 0, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
454
455 /* Atomic groups (no fallback from "next" direction). */
456 { MU, A, 0, 0 | F_NOMATCH, "(?>ab)ab", "bab" },
457 { MU, A, 0, 0 | F_NOMATCH, "(?>(ab))ab", "bab" },
458 { MU, A, 0, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
459 "bababcdedefgheijijklmlmnop" },
460 { MU, A, 0, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
461 { MU, A, 0, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
462 { MU, A, 0, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
463 { MU, A, 0, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
464 { MU, A, 0, 0, "((?>a|)+?)b", "aaacaaab" },
465 { MU, A, 0, 0, "(?>x|)*$", "aaa" },
466 { MU, A, 0, 0, "(?>(x)|)*$", "aaa" },
467 { MU, A, 0, 0, "(?>x|())*$", "aaa" },
468 { MU, A, 0, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
469 { MU, A, 0, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
470 { MU, A, 0, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
471 { MU, A, 0, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
472 { MU, A, 0, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
473 { MU, A, 0, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
474 { MU, A, 0, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
475 { MU, A, 0, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
476 { MU, A, 0, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
477 { MU, A, 0, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
478 { MU, A, 0, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
479 { MU, A, 0, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
480 { MU, A, 0, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
481 { MU, A, 0, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
482 { CM, A, 0, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
483 { MU, A, 0, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
484 { MU, A, 0, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
485 { MU, A, 0, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d" },
486 { MU, A, 0, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
487 { MU, A, 0, 0 | F_PROPERTY, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
488 { MU, A, 0, 0 | F_PROPERTY, "\\X{2,4}", "abcdef" },
489 { MU, A, 0, 0 | F_PROPERTY, "\\X{2,4}?", "abcdef" },
490 { MU, A, 0, 0 | F_NOMATCH | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d##" },
491 { MU, A, 0, 0 | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
492 { MU, A, 0, 0, "(c(ab)?+ab)+", "cabcababcab" },
493 { MU, A, 0, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
494
495 /* Possessive quantifiers. */
496 { MU, A, 0, 0, "(?:a|b)++m", "mababbaaxababbaam" },
497 { MU, A, 0, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
498 { MU, A, 0, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
499 { MU, A, 0, 0, "(a|b)++m", "mababbaaxababbaam" },
500 { MU, A, 0, 0, "(a|b)*+m", "mababbaaxababbaam" },
501 { MU, A, 0, 0, "(a|b)*+m", "ababbaaxababbaam" },
502 { MU, A, 0, 0, "(a|b(*ACCEPT))++m", "maaxab" },
503 { MU, A, 0, 0, "(?:b*)++m", "bxbbxbbbxm" },
504 { MU, A, 0, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
505 { MU, A, 0, 0, "(?:b*)*+m", "bxbbxbbbxm" },
506 { MU, A, 0, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
507 { MU, A, 0, 0, "(b*)++m", "bxbbxbbbxm" },
508 { MU, A, 0, 0, "(b*)++m", "bxbbxbbbxbbm" },
509 { MU, A, 0, 0, "(b*)*+m", "bxbbxbbbxm" },
510 { MU, A, 0, 0, "(b*)*+m", "bxbbxbbbxbbm" },
511 { MU, A, 0, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
512 { MU, A, 0, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
513 { MU, A, 0, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
514 { MU, A, 0, 0, "(a|(b))++m", "mababbaaxababbaam" },
515 { MU, A, 0, 0, "((a)|b)*+m", "mababbaaxababbaam" },
516 { MU, A, 0, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
517 { MU, A, 0, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
518 { MU, A, 0, 0, "(?:(b*))++m", "bxbbxbbbxm" },
519 { MU, A, 0, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
520 { MU, A, 0, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
521 { MU, A, 0, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
522 { MU, A, 0, 0, "((b*))++m", "bxbbxbbbxm" },
523 { MU, A, 0, 0, "((b*))++m", "bxbbxbbbxbbm" },
524 { MU, A, 0, 0, "((b*))*+m", "bxbbxbbbxm" },
525 { MU, A, 0, 0, "((b*))*+m", "bxbbxbbbxbbm" },
526 { MU, A, 0, 0 | F_NOMATCH, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
527 { MU, A, 0, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
528 { MU, A, 0, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
529 { MU, A, 0, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
530 { MU, A, 0, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
531
532 /* Back references. */
533 { MU, A, 0, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
534 { CMU, A, 0, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
535 { CM, A, 0, 0, "(a{2,4})\\1", "AaAaaAaA" },
536 { MU, A, 0, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
537 { MU, A, 0, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
538 { MU, A, 0, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
539 { MU, A, 0, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
540 { MU, A, 0, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
541 { MU, A, 0, 0, "(?:(aa)|b)\\1?b", "bb" },
542 { CMU, A, 0, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
543 { MU, A, 0, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
544 { CMU, A, 0, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
545 { MU, A, 0, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
546 { CM, A, 0, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
547 { MU, A, 0, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
548 { MU, A, 0, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
549 { M, A, 0, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
550 { MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
551 { MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{0,2}", "wwwww." },
552 { MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwww" },
553 { MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwwww" },
554 { PCRE2_UCP, 0, 0, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
555 { CMUP, A, 0, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" },
556 { MU | PCRE2_DUPNAMES, A, 0, 0 | F_NOMATCH, "\\k<A>{1,3}(?<A>aa)(?<A>bb)", "aabb" },
557 { MU | PCRE2_DUPNAMES | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\k<A>{1,3}(?<A>aa)(?<A>bb)", "aabb" },
558 { MU | PCRE2_DUPNAMES | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\k<A>*(?<A>aa)(?<A>bb)", "aabb" },
559 { MU | PCRE2_DUPNAMES, A, 0, 0, "(?<A>aa)(?<A>bb)\\k<A>{0,3}aaaaaa", "aabbaaaaaa" },
560 { MU | PCRE2_DUPNAMES, A, 0, 0, "(?<A>aa)(?<A>bb)\\k<A>{2,5}bb", "aabbaaaabb" },
561 { MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{0,3}m", "aaaaaaaabbbbaabbbbm" },
562 { MU | PCRE2_DUPNAMES, A, 0, 0 | F_NOMATCH, "\\k<A>{1,3}?(?<A>aa)(?<A>bb)", "aabb" },
563 { MU | PCRE2_DUPNAMES | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\k<A>{1,3}?(?<A>aa)(?<A>bb)", "aabb" },
564 { MU | PCRE2_DUPNAMES, A, 0, 0, "\\k<A>*?(?<A>aa)(?<A>bb)", "aabb" },
565 { MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{0,3}?m", "aaaaaabbbbbbaabbbbbbbbbbm" },
566 { MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>*?m", "aaaaaabbbbbbaabbbbbbbbbbm" },
567 { MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{2,3}?", "aaaabbbbaaaabbbbbbbbbb" },
568 { CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}M", "aaaaaaaabbbbaabbbbm" },
569 { CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{1,3}M", "aaaaaaaabbbbaabbbbm" },
570 { CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}?M", "aaaaaabbbbbbaabbbbbbbbbbm" },
571 { CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{2,3}?", "aaaabbbbaaaabbbbbbbbbb" },
572
573 /* Assertions. */
574 { MU, A, 0, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
575 { MU, A, 0, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
576 { MU, A, 0, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
577 { MU, A, 0, 0, "(?<=aaa|aa|a)a", "aaa" },
578 { MU, A, 0, 2, "(?<=aaa|aa|a)a", "aaa" },
579 { M, A, 0, 0, "(?<=aaa|aa|a)a", "aaa" },
580 { M, A, 0, 2, "(?<=aaa|aa|a)a", "aaa" },
581 { MU, A, 0, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
582 { MU, A, 0, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
583 { MU, A, 0, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
584 { MU, A, 0, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
585 { MU, A, 0, 0, "((?(?=(a))a)+k)", "bbak" },
586 { MU, A, 0, 0, "((?(?=a)a)+k)", "bbak" },
587 { MU, A, 0, 0 | F_NOMATCH, "(?=(?>(a))m)amk", "a k" },
588 { MU, A, 0, 0 | F_NOMATCH, "(?!(?>(a))m)amk", "a k" },
589 { MU, A, 0, 0 | F_NOMATCH, "(?>(?=(a))am)amk", "a k" },
590 { MU, A, 0, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
591 { MU, A, 0, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
592 { MU, A, 0, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
593 { MU, A, 0, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
594 { MU, A, 0, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
595 { MU, A, 0, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
596 { MU, A, 0, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
597 { MU, A, 0, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
598 { MU, A, 0, 0, "a(?=(?C)\\B(?C`x`))b", "ab" },
599 { MU, A, 0, 0, "a(?!(?C)\\B(?C`x`))bb|ab", "abb" },
600 { MU, A, 0, 0, "a(?=\\b|(?C)\\B(?C`x`))b", "ab" },
601 { MU, A, 0, 0, "a(?!\\b|(?C)\\B(?C`x`))bb|ab", "abb" },
602 { MU, A, 0, 0, "c(?(?=(?C)\\B(?C`x`))ab|a)", "cab" },
603 { MU, A, 0, 0, "c(?(?!(?C)\\B(?C`x`))ab|a)", "cab" },
604 { MU, A, 0, 0, "c(?(?=\\b|(?C)\\B(?C`x`))ab|a)", "cab" },
605 { MU, A, 0, 0, "c(?(?!\\b|(?C)\\B(?C`x`))ab|a)", "cab" },
606 { MU, A, 0, 0, "a(?=)b", "ab" },
607 { MU, A, 0, 0 | F_NOMATCH, "a(?!)b", "ab" },
608
609 /* Not empty, ACCEPT, FAIL */
610 { MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "a*", "bcx" },
611 { MU, A, PCRE2_NOTEMPTY, 0, "a*", "bcaad" },
612 { MU, A, PCRE2_NOTEMPTY, 0, "a*?", "bcaad" },
613 { MU, A, PCRE2_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
614 { MU, A, 0, 0, "a(*ACCEPT)b", "ab" },
615 { MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "a*(*ACCEPT)b", "bcx" },
616 { MU, A, PCRE2_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
617 { MU, A, PCRE2_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
618 { MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "(?:z|a*(*ACCEPT)b)", "bcx" },
619 { MU, A, PCRE2_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
620 { MU, A, PCRE2_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
621 { MU, A, PCRE2_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
622 { MU, A, PCRE2_NOTEMPTY_ATSTART, 0 | F_NOMATCH, "a*(*ACCEPT)b", "" },
623 { MU, A, 0, 0, "((a(*ACCEPT)b))", "ab" },
624 { MU, A, 0, 0, "(a(*FAIL)a|a)", "aaa" },
625 { MU, A, 0, 0, "(?=ab(*ACCEPT)b)a", "ab" },
626 { MU, A, 0, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
627 { MU, A, 0, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
628 { MU, A, PCRE2_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
629
630 /* Conditional blocks. */
631 { MU, A, 0, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
632 { MU, A, 0, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
633 { MU, A, 0, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
634 { MU, A, 0, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
635 { MU, A, 0, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
636 { MU, A, 0, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
637 { MU, A, 0, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
638 { MU, A, 0, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
639 { MU, A, 0, 0 | F_DIFF, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
640 { MU, A, 0, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
641 { MU, A, 0, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
642 { MU, A, 0, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
643 { MU, A, 0, 0, "(?(?=a)ab)", "a" },
644 { MU, A, 0, 0, "(?(?<!b)c)", "b" },
645 { MU, A, 0, 0, "(?(DEFINE)a(b))", "a" },
646 { MU, A, 0, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
647 { MU, A, 0, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
648 { MU, A, 0, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
649 { MU, A, 0, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
650 { MU, A, 0, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
651 { MU, A, 0, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
652 { MU, A, 0, 0, "(c)?\?(?(1)a|b)", "cbb" },
653 { MU, A, 0, 0 | F_DIFF, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
654 { MU, A, 0, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
655 { MU, A, 0, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
656 { MU, A, 0, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
657 { MU, A, 0, 0 | F_DIFF, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
658 { MU, A, 0, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
659 { MU, A, 0, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
660 { MU, A, 0, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
661 { MU, A, 0, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
662 { MU, A, 0, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)*l", "bc ddd abccabccl" },
663 { MU, A, 0, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+?dd", "bcabcacdb bdddd" },
664 { MU, A, 0, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+l", "ababccddabdbccd abcccl" },
665 { MU, A, 0, 0, "((?:a|aa)(?(1)aaa))x", "aax" },
666 { MU, A, 0, 0, "(?(?!)a|b)", "ab" },
667 { MU, A, 0, 0, "(?(?!)a)", "ab" },
668 { MU, A, 0, 0 | F_NOMATCH, "(?(?!)a|b)", "ac" },
669
670 /* Set start of match. */
671 { MU, A, 0, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
672 { MU, A, 0, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
673 { MU, A, 0, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
674 { MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "a\\K(*ACCEPT)b", "aa" },
675 { MU, A, PCRE2_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
676
677 /* First line. */
678 { MU | PCRE2_FIRSTLINE, A, 0, 0 | F_PROPERTY, "\\p{Any}a", "bb\naaa" },
679 { MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}a", "bb\r\naaa" },
680 { MU | PCRE2_FIRSTLINE, A, 0, 0, "(?<=a)", "a" },
681 { MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "[^a][^b]", "ab" },
682 { MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "a", "\na" },
683 { MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "[abc]", "\na" },
684 { MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "^a", "\na" },
685 { MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "^(?<=\n)", "\na" },
686 { MU | PCRE2_FIRSTLINE, A, 0, 0, "\xf0\x90\x90\x80", "\xf0\x90\x90\x80" },
687 { MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_ANY, 0, 0 | F_NOMATCH, "#", "\xc2\x85#" },
688 { M | PCRE2_FIRSTLINE, PCRE2_NEWLINE_ANY, 0, 0 | F_NOMATCH, "#", "\x85#" },
689 { MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_ANY, 0, 0 | F_NOMATCH, "^#", "\xe2\x80\xa8#" },
690 { MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0 | F_PROPERTY, "\\p{Any}", "\r\na" },
691 { MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0, ".", "\r" },
692 { MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0, "a", "\ra" },
693 { MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0 | F_NOMATCH, "ba", "bbb\r\nba" },
694 { MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}{4}|a", "\r\na" },
695 { MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 1, ".", "\r\n" },
696 { PCRE2_FIRSTLINE | PCRE2_DOTALL, PCRE2_NEWLINE_LF, 0, 0 | F_NOMATCH, "ab.", "ab" },
697 { MU | PCRE2_FIRSTLINE, A, 0, 1 | F_NOMATCH, "^[a-d0-9]", "\nxx\nd" },
698 { PCRE2_FIRSTLINE | PCRE2_DOTALL, PCRE2_NEWLINE_ANY, 0, 0, "....a", "012\n0a" },
699 { MU | PCRE2_FIRSTLINE, A, 0, 0, "[aC]", "a" },
700
701 /* Recurse. */
702 { MU, A, 0, 0, "(a)(?1)", "aa" },
703 { MU, A, 0, 0, "((a))(?1)", "aa" },
704 { MU, A, 0, 0, "(b|a)(?1)", "aa" },
705 { MU, A, 0, 0, "(b|(a))(?1)", "aa" },
706 { MU, A, 0, 0 | F_NOMATCH, "((a)(b)(?:a*))(?1)", "aba" },
707 { MU, A, 0, 0, "((a)(b)(?:a*))(?1)", "abab" },
708 { MU, A, 0, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
709 { MU, A, 0, 0, "((?2)b|(a)){2}(?1)", "aabab" },
710 { MU, A, 0, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
711 { MU, A, 0, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
712 { MU, A, 0, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
713 { MU, A, 0, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
714 { MU, A, 0, 0, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
715 { MU, A, 0, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
716 { MU, A, 0, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
717 { MU, A, 0, 0 | F_NOMATCH, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
718 { MU, A, 0, 0, "b|<(?R)*>", "<<b>" },
719 { MU, A, 0, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
720 { MU, A, 0, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
721 { MU, A, 0, 0, "(a)((?(R)a|b))(?2)", "aabbabaa" },
722 { MU, A, 0, 0, "(a)((?(R2)a|b))(?2)", "aabbabaa" },
723 { MU, A, 0, 0, "(a)((?(R1)a|b))(?2)", "ababba" },
724 { MU, A, 0, 0, "(?(R0)aa|bb(?R))", "abba aabb bbaa" },
725 { MU, A, 0, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" },
726 { MU, A, 0, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" },
727 { MU, A, 0, 0, "((?(R)a|(?1)){3})", "XaaaaaaaaaX" },
728 { MU, A, 0, 0, "((?:(?(R)a|(?1))){3})", "XaaaaaaaaaX" },
729 { MU, A, 0, 0, "((?(R)a|(?1)){1,3})aaaaaa", "aaaaaaaaXaaaaaaaaa" },
730 { MU, A, 0, 0, "((?(R)a|(?1)){1,3}?)M", "aaaM" },
731 { MU, A, 0, 0, "((.)(?:.|\\2(?1))){0}#(?1)#", "#aabbccdde# #aabbccddee#" },
732 { MU, A, 0, 0, "((.)(?:\\2|\\2{4}b)){0}#(?:(?1))+#", "#aaaab# #aaaaab#" },
733
734 /* 16 bit specific tests. */
735 { CM, A, 0, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" },
736 { CM, A, 0, 0 | F_FORCECONV, "\xe1\xbd\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
737 { CM, A, 0, 0 | F_FORCECONV, "[\xc3\xa1]", "\xc3\x81\xc3\xa1" },
738 { CM, A, 0, 0 | F_FORCECONV, "[\xe1\xbd\xb8]", "\xe1\xbf\xb8\xe1\xbd\xb8" },
739 { CM, A, 0, 0 | F_FORCECONV, "[a-\xed\xb0\x80]", "A" },
740 { CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[a-\\x{dc00}]", "B" },
741 { CM, A, 0, 0 | F_NO8 | F_NOMATCH | F_FORCECONV, "[b-\\x{dc00}]", "a" },
742 { CM, A, 0, 0 | F_NO8 | F_FORCECONV, "\xed\xa0\x80\\x{d800}\xed\xb0\x80\\x{dc00}", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80" },
743 { CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[\xed\xa0\x80\\x{d800}]{1,2}?[\xed\xb0\x80\\x{dc00}]{1,2}?#", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80#" },
744 { CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80\xed\xb0\x80#]{0,3}(?<=\xed\xb0\x80.)", "\xed\xa0\x80#\xed\xa0\x80##\xed\xb0\x80\xed\xa0\x80" },
745 { CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\x9f\xbf\xed\xa0\x83" },
746 { CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\xb4\x80\xed\xb3\xb0" },
747 { CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" },
748 { CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" },
749 { CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" },
750 { CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80][\xed\xb0\x80]{2,}", "\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" },
751 { M, A, 0, 0 | F_FORCECONV, "[^\xed\xb0\x80]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
752 { M, A, 0, 0 | F_NO8 | F_FORCECONV, "[^\\x{dc00}]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
753 { CM, A, 0, 0 | F_FORCECONV, ".\\B.", "\xed\xa0\x80\xed\xb0\x80" },
754 { CM, A, 0, 0 | F_FORCECONV, "\\D+(?:\\d+|.)\\S+(?:\\s+|.)\\W+(?:\\w+|.)\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80" },
755 { CM, A, 0, 0 | F_FORCECONV, "\\d*\\s*\\w*\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80" },
756 { CM, A, 0, 0 | F_FORCECONV | F_NOMATCH, "\\d*?\\D*?\\s*?\\S*?\\w*?\\W*?##", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80#" },
757 { CM | PCRE2_EXTENDED, A, 0, 0 | F_FORCECONV, "\xed\xa0\x80 \xed\xb0\x80 !", "\xed\xa0\x80\xed\xb0\x80!" },
758 { CM, A, 0, 0 | F_FORCECONV, "\xed\xa0\x80+#[^#]+\xed\xa0\x80", "\xed\xa0\x80#a\xed\xa0\x80" },
759 { CM, A, 0, 0 | F_FORCECONV, "(\xed\xa0\x80+)#\\1", "\xed\xa0\x80\xed\xa0\x80#\xed\xa0\x80\xed\xa0\x80" },
760 { M, PCRE2_NEWLINE_ANY, 0, 0 | F_NO8 | F_FORCECONV, "^-", "a--\xe2\x80\xa8--" },
761 { 0, BSR(PCRE2_BSR_UNICODE), 0, 0 | F_NO8 | F_FORCECONV, "\\R", "ab\xe2\x80\xa8" },
762 { 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\v", "ab\xe2\x80\xa9" },
763 { 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\h", "ab\xe1\xa0\x8e" },
764 { 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\v+?\\V+?#", "\xe2\x80\xa9\xe2\x80\xa9\xef\xbf\xbf\xef\xbf\xbf#" },
765 { 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\h+?\\H+?#", "\xe1\xa0\x8e\xe1\xa0\x8e\xef\xbf\xbf\xef\xbf\xbf#" },
766
767 /* Partial matching. */
768 { MU, A, PCRE2_PARTIAL_SOFT, 0, "ab", "a" },
769 { MU, A, PCRE2_PARTIAL_SOFT, 0, "ab|a", "a" },
770 { MU, A, PCRE2_PARTIAL_HARD, 0, "ab|a", "a" },
771 { MU, A, PCRE2_PARTIAL_SOFT, 0, "\\b#", "a" },
772 { MU, A, PCRE2_PARTIAL_SOFT, 0, "(?<=a)b", "a" },
773 { MU, A, PCRE2_PARTIAL_SOFT, 0, "abc|(?<=xxa)bc", "xxab" },
774 { MU, A, PCRE2_PARTIAL_SOFT, 0, "a\\B", "a" },
775 { MU, A, PCRE2_PARTIAL_HARD, 0, "a\\b", "a" },
776
777 /* (*MARK) verb. */
778 { MU, A, 0, 0, "a(*MARK:aa)a", "ababaa" },
779 { MU, A, 0, 0 | F_NOMATCH, "a(*:aa)a", "abab" },
780 { MU, A, 0, 0, "a(*:aa)(b(*:bb)b|bc)", "abc" },
781 { MU, A, 0, 0 | F_NOMATCH, "a(*:1)x|b(*:2)y", "abc" },
782 { MU, A, 0, 0, "(?>a(*:aa))b|ac", "ac" },
783 { MU, A, 0, 0, "(?(DEFINE)(a(*:aa)))(?1)", "a" },
784 { MU, A, 0, 0 | F_NOMATCH, "(?(DEFINE)((a)(*:aa)))(?1)b", "aa" },
785 { MU, A, 0, 0, "(?(DEFINE)(a(*:aa)))a(?1)b|aac", "aac" },
786 { MU, A, 0, 0, "(a(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
787 { MU, A, 0, 0, "(a(*:aa)){0}(?:b(?1)b)+", "babba" },
788 { MU, A, 0, 0 | F_NOMATCH, "(a(*:aa)){0}(?:b(?1)b)+", "ba" },
789 { MU, A, 0, 0, "(a\\K(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
790 { MU, A, 0, 0, "(a\\K(*:aa)){0}(?:b(?1)b)+", "babba" },
791 { MU, A, 0, 0 | F_NOMATCH, "(a\\K(*:aa)){0}(?:b(?1)b)+", "ba" },
792 { MU, A, 0, 0 | F_NOMATCH, "(*:mark)m", "a" },
793
794 /* (*COMMIT) verb. */
795 { MU, A, 0, 0 | F_NOMATCH, "a(*COMMIT)b", "ac" },
796 { MU, A, 0, 0, "aa(*COMMIT)b", "xaxaab" },
797 { MU, A, 0, 0 | F_NOMATCH, "a(*COMMIT)(*:msg)b|ac", "ac" },
798 { MU, A, 0, 0 | F_NOMATCH, "(a(*COMMIT)b)++", "abac" },
799 { MU, A, 0, 0 | F_NOMATCH, "((a)(*COMMIT)b)++", "abac" },
800 { MU, A, 0, 0 | F_NOMATCH, "(?=a(*COMMIT)b)ab|ad", "ad" },
801
802 /* (*PRUNE) verb. */
803 { MU, A, 0, 0, "aa\\K(*PRUNE)b", "aaab" },
804 { MU, A, 0, 0, "aa(*PRUNE:bb)b|a", "aa" },
805 { MU, A, 0, 0, "(a)(a)(*PRUNE)b|(a)", "aa" },
806 { MU, A, 0, 0, "(a)(a)(a)(a)(a)(a)(a)(a)(*PRUNE)b|(a)", "aaaaaaaa" },
807 { MU, A, PCRE2_PARTIAL_SOFT, 0, "a(*PRUNE)a|", "a" },
808 { MU, A, PCRE2_PARTIAL_SOFT, 0, "a(*PRUNE)a|m", "a" },
809 { MU, A, 0, 0 | F_NOMATCH, "(?=a(*PRUNE)b)ab|ad", "ad" },
810 { MU, A, 0, 0, "a(*COMMIT)(*PRUNE)d|bc", "abc" },
811 { MU, A, 0, 0, "(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
812 { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
813 { MU, A, 0, 0, "(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
814 { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
815 { MU, A, 0, 0, "(a(*COMMIT)b){0}a(?1)(*PRUNE)c|bc", "abc" },
816 { MU, A, 0, 0 | F_NOMATCH, "(a(*COMMIT)b){0}a(*COMMIT)(?1)(*PRUNE)c|bc", "abc" },
817 { MU, A, 0, 0, "(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
818 { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
819 { MU, A, 0, 0, "((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
820 { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
821 { MU, A, 0, 0, "(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" },
822 { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" },
823 { MU, A, 0, 0, "(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" },
824 { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" },
825 { MU, A, 0, 0, "(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" },
826 { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" },
827 { MU, A, 0, 0, "(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" },
828 { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" },
829 { MU, A, 0, 0, "(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" },
830 { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" },
831 { MU, A, 0, 0, "(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" },
832 { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" },
833
834 /* (*SKIP) verb. */
835 { MU, A, 0, 0 | F_NOMATCH, "(?=a(*SKIP)b)ab|ad", "ad" },
836 { MU, A, 0, 0, "(\\w+(*SKIP)#)", "abcd,xyz#," },
837 { MU, A, 0, 0, "\\w+(*SKIP)#|mm", "abcd,xyz#," },
838 { MU, A, 0, 0 | F_NOMATCH, "b+(?<=(*SKIP)#c)|b+", "#bbb" },
839
840 /* (*THEN) verb. */
841 { MU, A, 0, 0, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcaabcaabcaabcnacm" },
842 { MU, A, 0, 0 | F_NOMATCH, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcm" },
843 { MU, A, 0, 0, "((?:a(*THEN)|aab)c|a+)+m", "aabcaabcnmaabcaabcm" },
844 { MU, A, 0, 0, "((?:a|aab)(*THEN)c|a+)+m", "aam" },
845 { MU, A, 0, 0, "((?:a(*COMMIT)|aab)(*THEN)c|a+)+m", "aam" },
846 { MU, A, 0, 0, "(?(?=a(*THEN)b)ab|ad)", "ad" },
847 { MU, A, 0, 0, "(?(?!a(*THEN)b)ad|add)", "add" },
848 { MU, A, 0, 0 | F_NOMATCH, "(?(?=a)a(*THEN)b|ad)", "ad" },
849 { MU, A, 0, 0, "(?!(?(?=a)ab|b(*THEN)d))bn|bnn", "bnn" },
850
851 /* Recurse and control verbs. */
852 { MU, A, 0, 0, "(a(*ACCEPT)b){0}a(?1)b", "aacaabb" },
853 { MU, A, 0, 0, "((a)\\2(*ACCEPT)b){0}a(?1)b", "aaacaaabb" },
854 { MU, A, 0, 0, "((ab|a(*ACCEPT)x)+|ababababax){0}_(?1)_", "_ababababax_ _ababababa_" },
855 { MU, A, 0, 0, "((.)(?:A(*ACCEPT)|(?1)\\2)){0}_(?1)_", "_bcdaAdcb_bcdaAdcb_" },
856 { MU, A, 0, 0, "((*MARK:m)(?:a|a(*COMMIT)b|aa)){0}_(?1)_", "_ab_" },
857 { MU, A, 0, 0, "((*MARK:m)(?:a|a(*COMMIT)b|aa)){0}_(?1)_|(_aa_)", "_aa_" },
858 { MU, A, 0, 0, "(a(*COMMIT)(?:b|bb)|c(*ACCEPT)d|dd){0}_(?1)+_", "_ax_ _cd_ _abbb_ _abcd_ _abbcdd_" },
859 { MU, A, 0, 0, "((.)(?:.|(*COMMIT)\\2{3}(*ACCEPT).*|.*)){0}_(?1){0,4}_", "_aaaabbbbccccddd_ _aaaabbbbccccdddd_" },
860
861 /* Deep recursion. */
862 { MU, A, 0, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
863 { MU, A, 0, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
864 { MU, A, 0, 0, "((a?)+)+b", "aaaaaaaaaaaa b" },
865
866 /* Deep recursion: Stack limit reached. */
867 { M, A, 0, 0 | F_NOMATCH, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
868 { M, A, 0, 0 | F_NOMATCH, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
869 { M, A, 0, 0 | F_NOMATCH, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
870 { M, A, 0, 0 | F_NOMATCH, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
871 { M, A, 0, 0 | F_NOMATCH, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
872
873 { 0, 0, 0, 0, NULL, NULL }
874 };
875
876 #ifdef SUPPORT_PCRE2_8
callback8(void * arg)877 static pcre2_jit_stack_8* callback8(void *arg)
878 {
879 return (pcre2_jit_stack_8 *)arg;
880 }
881 #endif
882
883 #ifdef SUPPORT_PCRE2_16
callback16(void * arg)884 static pcre2_jit_stack_16* callback16(void *arg)
885 {
886 return (pcre2_jit_stack_16 *)arg;
887 }
888 #endif
889
890 #ifdef SUPPORT_PCRE2_32
callback32(void * arg)891 static pcre2_jit_stack_32* callback32(void *arg)
892 {
893 return (pcre2_jit_stack_32 *)arg;
894 }
895 #endif
896
897 #ifdef SUPPORT_PCRE2_8
898 static pcre2_jit_stack_8 *stack8;
899
getstack8(void)900 static pcre2_jit_stack_8 *getstack8(void)
901 {
902 if (!stack8)
903 stack8 = pcre2_jit_stack_create_8(1, 1024 * 1024, NULL);
904 return stack8;
905 }
906
setstack8(pcre2_match_context_8 * mcontext)907 static void setstack8(pcre2_match_context_8 *mcontext)
908 {
909 if (!mcontext) {
910 if (stack8)
911 pcre2_jit_stack_free_8(stack8);
912 stack8 = NULL;
913 return;
914 }
915
916 pcre2_jit_stack_assign_8(mcontext, callback8, getstack8());
917 }
918 #endif /* SUPPORT_PCRE2_8 */
919
920 #ifdef SUPPORT_PCRE2_16
921 static pcre2_jit_stack_16 *stack16;
922
getstack16(void)923 static pcre2_jit_stack_16 *getstack16(void)
924 {
925 if (!stack16)
926 stack16 = pcre2_jit_stack_create_16(1, 1024 * 1024, NULL);
927 return stack16;
928 }
929
setstack16(pcre2_match_context_16 * mcontext)930 static void setstack16(pcre2_match_context_16 *mcontext)
931 {
932 if (!mcontext) {
933 if (stack16)
934 pcre2_jit_stack_free_16(stack16);
935 stack16 = NULL;
936 return;
937 }
938
939 pcre2_jit_stack_assign_16(mcontext, callback16, getstack16());
940 }
941 #endif /* SUPPORT_PCRE2_16 */
942
943 #ifdef SUPPORT_PCRE2_32
944 static pcre2_jit_stack_32 *stack32;
945
getstack32(void)946 static pcre2_jit_stack_32 *getstack32(void)
947 {
948 if (!stack32)
949 stack32 = pcre2_jit_stack_create_32(1, 1024 * 1024, NULL);
950 return stack32;
951 }
952
setstack32(pcre2_match_context_32 * mcontext)953 static void setstack32(pcre2_match_context_32 *mcontext)
954 {
955 if (!mcontext) {
956 if (stack32)
957 pcre2_jit_stack_free_32(stack32);
958 stack32 = NULL;
959 return;
960 }
961
962 pcre2_jit_stack_assign_32(mcontext, callback32, getstack32());
963 }
964 #endif /* SUPPORT_PCRE2_32 */
965
966 #ifdef SUPPORT_PCRE2_16
967
convert_utf8_to_utf16(PCRE2_SPTR8 input,PCRE2_UCHAR16 * output,int * offsetmap,int max_length)968 static int convert_utf8_to_utf16(PCRE2_SPTR8 input, PCRE2_UCHAR16 *output, int *offsetmap, int max_length)
969 {
970 PCRE2_SPTR8 iptr = input;
971 PCRE2_UCHAR16 *optr = output;
972 unsigned int c;
973
974 if (max_length == 0)
975 return 0;
976
977 while (*iptr && max_length > 1) {
978 c = 0;
979 if (offsetmap)
980 *offsetmap++ = (int)(iptr - (unsigned char*)input);
981
982 if (*iptr < 0xc0)
983 c = *iptr++;
984 else if (!(*iptr & 0x20)) {
985 c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
986 iptr += 2;
987 } else if (!(*iptr & 0x10)) {
988 c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
989 iptr += 3;
990 } else if (!(*iptr & 0x08)) {
991 c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
992 iptr += 4;
993 }
994
995 if (c < 65536) {
996 *optr++ = c;
997 max_length--;
998 } else if (max_length <= 2) {
999 *optr = '\0';
1000 return (int)(optr - output);
1001 } else {
1002 c -= 0x10000;
1003 *optr++ = 0xd800 | ((c >> 10) & 0x3ff);
1004 *optr++ = 0xdc00 | (c & 0x3ff);
1005 max_length -= 2;
1006 if (offsetmap)
1007 offsetmap++;
1008 }
1009 }
1010 if (offsetmap)
1011 *offsetmap = (int)(iptr - (unsigned char*)input);
1012 *optr = '\0';
1013 return (int)(optr - output);
1014 }
1015
copy_char8_to_char16(PCRE2_SPTR8 input,PCRE2_UCHAR16 * output,int max_length)1016 static int copy_char8_to_char16(PCRE2_SPTR8 input, PCRE2_UCHAR16 *output, int max_length)
1017 {
1018 PCRE2_SPTR8 iptr = input;
1019 PCRE2_UCHAR16 *optr = output;
1020
1021 if (max_length == 0)
1022 return 0;
1023
1024 while (*iptr && max_length > 1) {
1025 *optr++ = *iptr++;
1026 max_length--;
1027 }
1028 *optr = '\0';
1029 return (int)(optr - output);
1030 }
1031
1032 #define REGTEST_MAX_LENGTH16 4096
1033 static PCRE2_UCHAR16 regtest_buf16[REGTEST_MAX_LENGTH16];
1034 static int regtest_offsetmap16[REGTEST_MAX_LENGTH16];
1035
1036 #endif /* SUPPORT_PCRE2_16 */
1037
1038 #ifdef SUPPORT_PCRE2_32
1039
convert_utf8_to_utf32(PCRE2_SPTR8 input,PCRE2_UCHAR32 * output,int * offsetmap,int max_length)1040 static int convert_utf8_to_utf32(PCRE2_SPTR8 input, PCRE2_UCHAR32 *output, int *offsetmap, int max_length)
1041 {
1042 PCRE2_SPTR8 iptr = input;
1043 PCRE2_UCHAR32 *optr = output;
1044 unsigned int c;
1045
1046 if (max_length == 0)
1047 return 0;
1048
1049 while (*iptr && max_length > 1) {
1050 c = 0;
1051 if (offsetmap)
1052 *offsetmap++ = (int)(iptr - (unsigned char*)input);
1053
1054 if (*iptr < 0xc0)
1055 c = *iptr++;
1056 else if (!(*iptr & 0x20)) {
1057 c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
1058 iptr += 2;
1059 } else if (!(*iptr & 0x10)) {
1060 c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
1061 iptr += 3;
1062 } else if (!(*iptr & 0x08)) {
1063 c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
1064 iptr += 4;
1065 }
1066
1067 *optr++ = c;
1068 max_length--;
1069 }
1070 if (offsetmap)
1071 *offsetmap = (int)(iptr - (unsigned char*)input);
1072 *optr = 0;
1073 return (int)(optr - output);
1074 }
1075
copy_char8_to_char32(PCRE2_SPTR8 input,PCRE2_UCHAR32 * output,int max_length)1076 static int copy_char8_to_char32(PCRE2_SPTR8 input, PCRE2_UCHAR32 *output, int max_length)
1077 {
1078 PCRE2_SPTR8 iptr = input;
1079 PCRE2_UCHAR32 *optr = output;
1080
1081 if (max_length == 0)
1082 return 0;
1083
1084 while (*iptr && max_length > 1) {
1085 *optr++ = *iptr++;
1086 max_length--;
1087 }
1088 *optr = '\0';
1089 return (int)(optr - output);
1090 }
1091
1092 #define REGTEST_MAX_LENGTH32 4096
1093 static PCRE2_UCHAR32 regtest_buf32[REGTEST_MAX_LENGTH32];
1094 static int regtest_offsetmap32[REGTEST_MAX_LENGTH32];
1095
1096 #endif /* SUPPORT_PCRE2_32 */
1097
check_ascii(const char * input)1098 static int check_ascii(const char *input)
1099 {
1100 const unsigned char *ptr = (unsigned char *)input;
1101 while (*ptr) {
1102 if (*ptr > 127)
1103 return 0;
1104 ptr++;
1105 }
1106 return 1;
1107 }
1108
1109 #define OVECTOR_SIZE 15
1110
regression_tests(void)1111 static int regression_tests(void)
1112 {
1113 struct regression_test_case *current = regression_test_cases;
1114 int error;
1115 PCRE2_SIZE err_offs;
1116 int is_successful;
1117 int is_ascii;
1118 int total = 0;
1119 int successful = 0;
1120 int successful_row = 0;
1121 int counter = 0;
1122 int jit_compile_mode;
1123 int utf = 0;
1124 int disabled_options = 0;
1125 int i;
1126 #ifdef SUPPORT_PCRE2_8
1127 pcre2_code_8 *re8;
1128 pcre2_compile_context_8 *ccontext8;
1129 pcre2_match_data_8 *mdata8_1;
1130 pcre2_match_data_8 *mdata8_2;
1131 pcre2_match_context_8 *mcontext8;
1132 PCRE2_SIZE *ovector8_1 = NULL;
1133 PCRE2_SIZE *ovector8_2 = NULL;
1134 int return_value8[2];
1135 #endif
1136 #ifdef SUPPORT_PCRE2_16
1137 pcre2_code_16 *re16;
1138 pcre2_compile_context_16 *ccontext16;
1139 pcre2_match_data_16 *mdata16_1;
1140 pcre2_match_data_16 *mdata16_2;
1141 pcre2_match_context_16 *mcontext16;
1142 PCRE2_SIZE *ovector16_1 = NULL;
1143 PCRE2_SIZE *ovector16_2 = NULL;
1144 int return_value16[2];
1145 int length16;
1146 #endif
1147 #ifdef SUPPORT_PCRE2_32
1148 pcre2_code_32 *re32;
1149 pcre2_compile_context_32 *ccontext32;
1150 pcre2_match_data_32 *mdata32_1;
1151 pcre2_match_data_32 *mdata32_2;
1152 pcre2_match_context_32 *mcontext32;
1153 PCRE2_SIZE *ovector32_1 = NULL;
1154 PCRE2_SIZE *ovector32_2 = NULL;
1155 int return_value32[2];
1156 int length32;
1157 #endif
1158
1159 #if defined SUPPORT_PCRE2_8
1160 PCRE2_UCHAR8 cpu_info[128];
1161 #elif defined SUPPORT_PCRE2_16
1162 PCRE2_UCHAR16 cpu_info[128];
1163 #elif defined SUPPORT_PCRE2_32
1164 PCRE2_UCHAR32 cpu_info[128];
1165 #endif
1166 #if defined SUPPORT_UTF && ((defined(SUPPORT_PCRE2_8) + defined(SUPPORT_PCRE2_16) + defined(SUPPORT_PCRE2_32)) >= 2)
1167 int return_value;
1168 #endif
1169
1170 /* This test compares the behaviour of interpreter and JIT. Although disabling
1171 utf or ucp may make tests fail, if the pcre_exec result is the SAME, it is
1172 still considered successful from pcre_jit_test point of view. */
1173
1174 #if defined SUPPORT_PCRE2_8
1175 pcre2_config_8(PCRE2_CONFIG_JITTARGET, &cpu_info);
1176 #elif defined SUPPORT_PCRE2_16
1177 pcre2_config_16(PCRE2_CONFIG_JITTARGET, &cpu_info);
1178 #elif defined SUPPORT_PCRE2_32
1179 pcre2_config_32(PCRE2_CONFIG_JITTARGET, &cpu_info);
1180 #endif
1181
1182 printf("Running JIT regression tests\n");
1183 printf(" target CPU of SLJIT compiler: ");
1184 for (i = 0; cpu_info[i]; i++)
1185 printf("%c", (char)(cpu_info[i]));
1186 printf("\n");
1187
1188 #if defined SUPPORT_PCRE2_8
1189 pcre2_config_8(PCRE2_CONFIG_UNICODE, &utf);
1190 #elif defined SUPPORT_PCRE2_16
1191 pcre2_config_16(PCRE2_CONFIG_UNICODE, &utf);
1192 #elif defined SUPPORT_PCRE2_32
1193 pcre2_config_32(PCRE2_CONFIG_UNICODE, &utf);
1194 #endif
1195
1196 if (!utf)
1197 disabled_options |= PCRE2_UTF;
1198 #ifdef SUPPORT_PCRE2_8
1199 printf(" in 8 bit mode with UTF-8 %s:\n", utf ? "enabled" : "disabled");
1200 #endif
1201 #ifdef SUPPORT_PCRE2_16
1202 printf(" in 16 bit mode with UTF-16 %s:\n", utf ? "enabled" : "disabled");
1203 #endif
1204 #ifdef SUPPORT_PCRE2_32
1205 printf(" in 32 bit mode with UTF-32 %s:\n", utf ? "enabled" : "disabled");
1206 #endif
1207
1208 while (current->pattern) {
1209 /* printf("\nPattern: %s :\n", current->pattern); */
1210 total++;
1211 is_ascii = 0;
1212 if (!(current->start_offset & F_PROPERTY))
1213 is_ascii = check_ascii(current->pattern) && check_ascii(current->input);
1214
1215 if (current->match_options & PCRE2_PARTIAL_SOFT)
1216 jit_compile_mode = PCRE2_JIT_PARTIAL_SOFT;
1217 else if (current->match_options & PCRE2_PARTIAL_HARD)
1218 jit_compile_mode = PCRE2_JIT_PARTIAL_HARD;
1219 else
1220 jit_compile_mode = PCRE2_JIT_COMPLETE;
1221 error = 0;
1222 #ifdef SUPPORT_PCRE2_8
1223 re8 = NULL;
1224 ccontext8 = pcre2_compile_context_create_8(NULL);
1225 if (ccontext8) {
1226 if (GET_NEWLINE(current->newline))
1227 pcre2_set_newline_8(ccontext8, GET_NEWLINE(current->newline));
1228 if (GET_BSR(current->newline))
1229 pcre2_set_bsr_8(ccontext8, GET_BSR(current->newline));
1230
1231 if (!(current->start_offset & F_NO8)) {
1232 re8 = pcre2_compile_8((PCRE2_SPTR8)current->pattern, PCRE2_ZERO_TERMINATED,
1233 current->compile_options & ~disabled_options,
1234 &error, &err_offs, ccontext8);
1235
1236 if (!re8 && (utf || is_ascii))
1237 printf("\n8 bit: Cannot compile pattern \"%s\": %d\n", current->pattern, error);
1238 }
1239 pcre2_compile_context_free_8(ccontext8);
1240 }
1241 else
1242 printf("\n8 bit: Cannot allocate compile context\n");
1243 #endif
1244 #ifdef SUPPORT_PCRE2_16
1245 if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
1246 convert_utf8_to_utf16((PCRE2_SPTR8)current->pattern, regtest_buf16, NULL, REGTEST_MAX_LENGTH16);
1247 else
1248 copy_char8_to_char16((PCRE2_SPTR8)current->pattern, regtest_buf16, REGTEST_MAX_LENGTH16);
1249
1250 re16 = NULL;
1251 ccontext16 = pcre2_compile_context_create_16(NULL);
1252 if (ccontext16) {
1253 if (GET_NEWLINE(current->newline))
1254 pcre2_set_newline_16(ccontext16, GET_NEWLINE(current->newline));
1255 if (GET_BSR(current->newline))
1256 pcre2_set_bsr_16(ccontext16, GET_BSR(current->newline));
1257
1258 if (!(current->start_offset & F_NO16)) {
1259 re16 = pcre2_compile_16(regtest_buf16, PCRE2_ZERO_TERMINATED,
1260 current->compile_options & ~disabled_options,
1261 &error, &err_offs, ccontext16);
1262
1263 if (!re16 && (utf || is_ascii))
1264 printf("\n16 bit: Cannot compile pattern \"%s\": %d\n", current->pattern, error);
1265 }
1266 pcre2_compile_context_free_16(ccontext16);
1267 }
1268 else
1269 printf("\n16 bit: Cannot allocate compile context\n");
1270 #endif
1271 #ifdef SUPPORT_PCRE2_32
1272 if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
1273 convert_utf8_to_utf32((PCRE2_SPTR8)current->pattern, regtest_buf32, NULL, REGTEST_MAX_LENGTH32);
1274 else
1275 copy_char8_to_char32((PCRE2_SPTR8)current->pattern, regtest_buf32, REGTEST_MAX_LENGTH32);
1276
1277 re32 = NULL;
1278 ccontext32 = pcre2_compile_context_create_32(NULL);
1279 if (ccontext32) {
1280 if (GET_NEWLINE(current->newline))
1281 pcre2_set_newline_32(ccontext32, GET_NEWLINE(current->newline));
1282 if (GET_BSR(current->newline))
1283 pcre2_set_bsr_32(ccontext32, GET_BSR(current->newline));
1284
1285 if (!(current->start_offset & F_NO32)) {
1286 re32 = pcre2_compile_32(regtest_buf32, PCRE2_ZERO_TERMINATED,
1287 current->compile_options & ~disabled_options,
1288 &error, &err_offs, ccontext32);
1289
1290 if (!re32 && (utf || is_ascii))
1291 printf("\n32 bit: Cannot compile pattern \"%s\": %d\n", current->pattern, error);
1292 }
1293 pcre2_compile_context_free_32(ccontext32);
1294 }
1295 else
1296 printf("\n32 bit: Cannot allocate compile context\n");
1297 #endif
1298
1299 counter++;
1300 if ((counter & 0x3) != 0) {
1301 #ifdef SUPPORT_PCRE2_8
1302 setstack8(NULL);
1303 #endif
1304 #ifdef SUPPORT_PCRE2_16
1305 setstack16(NULL);
1306 #endif
1307 #ifdef SUPPORT_PCRE2_32
1308 setstack32(NULL);
1309 #endif
1310 }
1311
1312 #ifdef SUPPORT_PCRE2_8
1313 return_value8[0] = -1000;
1314 return_value8[1] = -1000;
1315 mdata8_1 = pcre2_match_data_create_8(OVECTOR_SIZE, NULL);
1316 mdata8_2 = pcre2_match_data_create_8(OVECTOR_SIZE, NULL);
1317 mcontext8 = pcre2_match_context_create_8(NULL);
1318 if (!mdata8_1 || !mdata8_2 || !mcontext8) {
1319 printf("\n8 bit: Cannot allocate match data\n");
1320 pcre2_match_data_free_8(mdata8_1);
1321 pcre2_match_data_free_8(mdata8_2);
1322 pcre2_match_context_free_8(mcontext8);
1323 pcre2_code_free_8(re8);
1324 re8 = NULL;
1325 } else {
1326 ovector8_1 = pcre2_get_ovector_pointer_8(mdata8_1);
1327 ovector8_2 = pcre2_get_ovector_pointer_8(mdata8_2);
1328 for (i = 0; i < OVECTOR_SIZE * 2; ++i)
1329 ovector8_1[i] = -2;
1330 for (i = 0; i < OVECTOR_SIZE * 2; ++i)
1331 ovector8_2[i] = -2;
1332 }
1333 if (re8) {
1334 (void)pcre2_set_match_limit_8(mcontext8, 10000000);
1335 return_value8[1] = pcre2_match_8(re8, (PCRE2_SPTR8)current->input, strlen(current->input),
1336 current->start_offset & OFFSET_MASK, current->match_options, mdata8_2, mcontext8);
1337
1338 if (pcre2_jit_compile_8(re8, jit_compile_mode)) {
1339 printf("\n8 bit: JIT compiler does not support \"%s\"\n", current->pattern);
1340 } else if ((counter & 0x1) != 0) {
1341 setstack8(mcontext8);
1342 return_value8[0] = pcre2_match_8(re8, (PCRE2_SPTR8)current->input, strlen(current->input),
1343 current->start_offset & OFFSET_MASK, current->match_options, mdata8_1, mcontext8);
1344 } else {
1345 pcre2_jit_stack_assign_8(mcontext8, NULL, getstack8());
1346 return_value8[0] = pcre2_jit_match_8(re8, (PCRE2_SPTR8)current->input, strlen(current->input),
1347 current->start_offset & OFFSET_MASK, current->match_options, mdata8_1, mcontext8);
1348 }
1349 }
1350 #endif
1351
1352 #ifdef SUPPORT_PCRE2_16
1353 return_value16[0] = -1000;
1354 return_value16[1] = -1000;
1355 mdata16_1 = pcre2_match_data_create_16(OVECTOR_SIZE, NULL);
1356 mdata16_2 = pcre2_match_data_create_16(OVECTOR_SIZE, NULL);
1357 mcontext16 = pcre2_match_context_create_16(NULL);
1358 if (!mdata16_1 || !mdata16_2 || !mcontext16) {
1359 printf("\n16 bit: Cannot allocate match data\n");
1360 pcre2_match_data_free_16(mdata16_1);
1361 pcre2_match_data_free_16(mdata16_2);
1362 pcre2_match_context_free_16(mcontext16);
1363 pcre2_code_free_16(re16);
1364 re16 = NULL;
1365 } else {
1366 ovector16_1 = pcre2_get_ovector_pointer_16(mdata16_1);
1367 ovector16_2 = pcre2_get_ovector_pointer_16(mdata16_2);
1368 for (i = 0; i < OVECTOR_SIZE * 2; ++i)
1369 ovector16_1[i] = -2;
1370 for (i = 0; i < OVECTOR_SIZE * 2; ++i)
1371 ovector16_2[i] = -2;
1372 }
1373 if (re16) {
1374 if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
1375 length16 = convert_utf8_to_utf16((PCRE2_SPTR8)current->input, regtest_buf16, regtest_offsetmap16, REGTEST_MAX_LENGTH16);
1376 else
1377 length16 = copy_char8_to_char16((PCRE2_SPTR8)current->input, regtest_buf16, REGTEST_MAX_LENGTH16);
1378
1379 (void)pcre2_set_match_limit_16(mcontext16, 10000000);
1380 return_value16[1] = pcre2_match_16(re16, regtest_buf16, length16,
1381 current->start_offset & OFFSET_MASK, current->match_options, mdata16_2, mcontext16);
1382
1383 if (pcre2_jit_compile_16(re16, jit_compile_mode)) {
1384 printf("\n16 bit: JIT compiler does not support \"%s\"\n", current->pattern);
1385 } else if ((counter & 0x1) != 0) {
1386 setstack16(mcontext16);
1387 return_value16[0] = pcre2_match_16(re16, regtest_buf16, length16,
1388 current->start_offset & OFFSET_MASK, current->match_options, mdata16_1, mcontext16);
1389 } else {
1390 pcre2_jit_stack_assign_16(mcontext16, NULL, getstack16());
1391 return_value16[0] = pcre2_jit_match_16(re16, regtest_buf16, length16,
1392 current->start_offset & OFFSET_MASK, current->match_options, mdata16_1, mcontext16);
1393 }
1394 }
1395 #endif
1396
1397 #ifdef SUPPORT_PCRE2_32
1398 return_value32[0] = -1000;
1399 return_value32[1] = -1000;
1400 mdata32_1 = pcre2_match_data_create_32(OVECTOR_SIZE, NULL);
1401 mdata32_2 = pcre2_match_data_create_32(OVECTOR_SIZE, NULL);
1402 mcontext32 = pcre2_match_context_create_32(NULL);
1403 if (!mdata32_1 || !mdata32_2 || !mcontext32) {
1404 printf("\n32 bit: Cannot allocate match data\n");
1405 pcre2_match_data_free_32(mdata32_1);
1406 pcre2_match_data_free_32(mdata32_2);
1407 pcre2_match_context_free_32(mcontext32);
1408 pcre2_code_free_32(re32);
1409 re32 = NULL;
1410 } else {
1411 ovector32_1 = pcre2_get_ovector_pointer_32(mdata32_1);
1412 ovector32_2 = pcre2_get_ovector_pointer_32(mdata32_2);
1413 for (i = 0; i < OVECTOR_SIZE * 2; ++i)
1414 ovector32_1[i] = -2;
1415 for (i = 0; i < OVECTOR_SIZE * 2; ++i)
1416 ovector32_2[i] = -2;
1417 }
1418 if (re32) {
1419 if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
1420 length32 = convert_utf8_to_utf32((PCRE2_SPTR8)current->input, regtest_buf32, regtest_offsetmap32, REGTEST_MAX_LENGTH32);
1421 else
1422 length32 = copy_char8_to_char32((PCRE2_SPTR8)current->input, regtest_buf32, REGTEST_MAX_LENGTH32);
1423
1424 (void)pcre2_set_match_limit_32(mcontext32, 10000000);
1425 return_value32[1] = pcre2_match_32(re32, regtest_buf32, length32,
1426 current->start_offset & OFFSET_MASK, current->match_options, mdata32_2, mcontext32);
1427
1428 if (pcre2_jit_compile_32(re32, jit_compile_mode)) {
1429 printf("\n32 bit: JIT compiler does not support \"%s\"\n", current->pattern);
1430 } else if ((counter & 0x1) != 0) {
1431 setstack32(mcontext32);
1432 return_value32[0] = pcre2_match_32(re32, regtest_buf32, length32,
1433 current->start_offset & OFFSET_MASK, current->match_options, mdata32_1, mcontext32);
1434 } else {
1435 pcre2_jit_stack_assign_32(mcontext32, NULL, getstack32());
1436 return_value32[0] = pcre2_jit_match_32(re32, regtest_buf32, length32,
1437 current->start_offset & OFFSET_MASK, current->match_options, mdata32_1, mcontext32);
1438 }
1439 }
1440 #endif
1441
1442 /* printf("[%d-%d-%d|%d-%d|%d-%d|%d-%d]%s",
1443 return_value8[0], return_value16[0], return_value32[0],
1444 (int)ovector8_1[0], (int)ovector8_1[1],
1445 (int)ovector16_1[0], (int)ovector16_1[1],
1446 (int)ovector32_1[0], (int)ovector32_1[1],
1447 (current->compile_options & PCRE2_CASELESS) ? "C" : ""); */
1448
1449 /* If F_DIFF is set, just run the test, but do not compare the results.
1450 Segfaults can still be captured. */
1451
1452 is_successful = 1;
1453 if (!(current->start_offset & F_DIFF)) {
1454 #if defined SUPPORT_UTF && ((defined(SUPPORT_PCRE2_8) + defined(SUPPORT_PCRE2_16) + defined(SUPPORT_PCRE2_32)) >= 2)
1455 if (!(current->start_offset & F_FORCECONV)) {
1456
1457 /* All results must be the same. */
1458 #ifdef SUPPORT_PCRE2_8
1459 if ((return_value = return_value8[0]) != return_value8[1]) {
1460 printf("\n8 bit: Return value differs(J8:%d,I8:%d): [%d] '%s' @ '%s'\n",
1461 return_value8[0], return_value8[1], total, current->pattern, current->input);
1462 is_successful = 0;
1463 } else
1464 #endif
1465 #ifdef SUPPORT_PCRE2_16
1466 if ((return_value = return_value16[0]) != return_value16[1]) {
1467 printf("\n16 bit: Return value differs(J16:%d,I16:%d): [%d] '%s' @ '%s'\n",
1468 return_value16[0], return_value16[1], total, current->pattern, current->input);
1469 is_successful = 0;
1470 } else
1471 #endif
1472 #ifdef SUPPORT_PCRE2_32
1473 if ((return_value = return_value32[0]) != return_value32[1]) {
1474 printf("\n32 bit: Return value differs(J32:%d,I32:%d): [%d] '%s' @ '%s'\n",
1475 return_value32[0], return_value32[1], total, current->pattern, current->input);
1476 is_successful = 0;
1477 } else
1478 #endif
1479 #if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_16
1480 if (return_value8[0] != return_value16[0]) {
1481 printf("\n8 and 16 bit: Return value differs(J8:%d,J16:%d): [%d] '%s' @ '%s'\n",
1482 return_value8[0], return_value16[0],
1483 total, current->pattern, current->input);
1484 is_successful = 0;
1485 } else
1486 #endif
1487 #if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_32
1488 if (return_value8[0] != return_value32[0]) {
1489 printf("\n8 and 32 bit: Return value differs(J8:%d,J32:%d): [%d] '%s' @ '%s'\n",
1490 return_value8[0], return_value32[0],
1491 total, current->pattern, current->input);
1492 is_successful = 0;
1493 } else
1494 #endif
1495 #if defined SUPPORT_PCRE2_16 && defined SUPPORT_PCRE2_32
1496 if (return_value16[0] != return_value32[0]) {
1497 printf("\n16 and 32 bit: Return value differs(J16:%d,J32:%d): [%d] '%s' @ '%s'\n",
1498 return_value16[0], return_value32[0],
1499 total, current->pattern, current->input);
1500 is_successful = 0;
1501 } else
1502 #endif
1503 if (return_value >= 0 || return_value == PCRE_ERROR_PARTIAL) {
1504 if (return_value == PCRE_ERROR_PARTIAL) {
1505 return_value = 2;
1506 } else {
1507 return_value *= 2;
1508 }
1509 #ifdef SUPPORT_PCRE2_8
1510 return_value8[0] = return_value;
1511 #endif
1512 #ifdef SUPPORT_PCRE2_16
1513 return_value16[0] = return_value;
1514 #endif
1515 #ifdef SUPPORT_PCRE2_32
1516 return_value32[0] = return_value;
1517 #endif
1518 /* Transform back the results. */
1519 if (current->flags & PCRE_UTF8) {
1520 #ifdef SUPPORT_PCRE2_16
1521 for (i = 0; i < return_value; ++i) {
1522 if (ovector16_1[i] >= 0)
1523 ovector16_1[i] = regtest_offsetmap16[ovector16_1[i]];
1524 if (ovector16_2[i] >= 0)
1525 ovector16_2[i] = regtest_offsetmap16[ovector16_2[i]];
1526 }
1527 #endif
1528 #ifdef SUPPORT_PCRE2_32
1529 for (i = 0; i < return_value; ++i) {
1530 if (ovector32_1[i] >= 0)
1531 ovector32_1[i] = regtest_offsetmap32[ovector32_1[i]];
1532 if (ovector32_2[i] >= 0)
1533 ovector32_2[i] = regtest_offsetmap32[ovector32_2[i]];
1534 }
1535 #endif
1536 }
1537
1538 for (i = 0; i < return_value; ++i) {
1539 #if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_16
1540 if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
1541 printf("\n8 and 16 bit: Ovector[%d] value differs(J8:%d,I8:%d,J16:%d,I16:%d): [%d] '%s' @ '%s' \n",
1542 i, ovector8_1[i], ovector8_2[i], ovector16_1[i], ovector16_2[i],
1543 total, current->pattern, current->input);
1544 is_successful = 0;
1545 }
1546 #endif
1547 #if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_32
1548 if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector32_1[i] || ovector8_1[i] != ovector32_2[i]) {
1549 printf("\n8 and 32 bit: Ovector[%d] value differs(J8:%d,I8:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
1550 i, ovector8_1[i], ovector8_2[i], ovector32_1[i], ovector32_2[i],
1551 total, current->pattern, current->input);
1552 is_successful = 0;
1553 }
1554 #endif
1555 #if defined SUPPORT_PCRE2_16 && defined SUPPORT_PCRE2_32
1556 if (ovector16_1[i] != ovector16_2[i] || ovector16_1[i] != ovector32_1[i] || ovector16_1[i] != ovector32_2[i]) {
1557 printf("\n16 and 32 bit: Ovector[%d] value differs(J16:%d,I16:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
1558 i, ovector16_1[i], ovector16_2[i], ovector32_1[i], ovector32_2[i],
1559 total, current->pattern, current->input);
1560 is_successful = 0;
1561 }
1562 #endif
1563 }
1564 }
1565 } else
1566 #endif /* more than one of SUPPORT_PCRE2_8, SUPPORT_PCRE2_16 and SUPPORT_PCRE2_32 */
1567 {
1568 #ifdef SUPPORT_PCRE2_8
1569 if (return_value8[0] != return_value8[1]) {
1570 printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1571 return_value8[0], return_value8[1], total, current->pattern, current->input);
1572 is_successful = 0;
1573 } else if (return_value8[0] >= 0 || return_value8[0] == PCRE2_ERROR_PARTIAL) {
1574 if (return_value8[0] == PCRE2_ERROR_PARTIAL)
1575 return_value8[0] = 2;
1576 else
1577 return_value8[0] *= 2;
1578
1579 for (i = 0; i < return_value8[0]; ++i)
1580 if (ovector8_1[i] != ovector8_2[i]) {
1581 printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1582 i, (int)ovector8_1[i], (int)ovector8_2[i], total, current->pattern, current->input);
1583 is_successful = 0;
1584 }
1585 }
1586 #endif
1587
1588 #ifdef SUPPORT_PCRE2_16
1589 if (return_value16[0] != return_value16[1]) {
1590 printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1591 return_value16[0], return_value16[1], total, current->pattern, current->input);
1592 is_successful = 0;
1593 } else if (return_value16[0] >= 0 || return_value16[0] == PCRE2_ERROR_PARTIAL) {
1594 if (return_value16[0] == PCRE2_ERROR_PARTIAL)
1595 return_value16[0] = 2;
1596 else
1597 return_value16[0] *= 2;
1598
1599 for (i = 0; i < return_value16[0]; ++i)
1600 if (ovector16_1[i] != ovector16_2[i]) {
1601 printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1602 i, (int)ovector16_1[i], (int)ovector16_2[i], total, current->pattern, current->input);
1603 is_successful = 0;
1604 }
1605 }
1606 #endif
1607
1608 #ifdef SUPPORT_PCRE2_32
1609 if (return_value32[0] != return_value32[1]) {
1610 printf("\n32 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1611 return_value32[0], return_value32[1], total, current->pattern, current->input);
1612 is_successful = 0;
1613 } else if (return_value32[0] >= 0 || return_value32[0] == PCRE2_ERROR_PARTIAL) {
1614 if (return_value32[0] == PCRE2_ERROR_PARTIAL)
1615 return_value32[0] = 2;
1616 else
1617 return_value32[0] *= 2;
1618
1619 for (i = 0; i < return_value32[0]; ++i)
1620 if (ovector32_1[i] != ovector32_2[i]) {
1621 printf("\n32 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1622 i, (int)ovector32_1[i], (int)ovector32_2[i], total, current->pattern, current->input);
1623 is_successful = 0;
1624 }
1625 }
1626 #endif
1627 }
1628 }
1629
1630 if (is_successful) {
1631 #ifdef SUPPORT_PCRE2_8
1632 if (!(current->start_offset & F_NO8) && (utf || is_ascii)) {
1633 if (return_value8[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1634 printf("8 bit: Test should match: [%d] '%s' @ '%s'\n",
1635 total, current->pattern, current->input);
1636 is_successful = 0;
1637 }
1638
1639 if (return_value8[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1640 printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n",
1641 total, current->pattern, current->input);
1642 is_successful = 0;
1643 }
1644 }
1645 #endif
1646 #ifdef SUPPORT_PCRE2_16
1647 if (!(current->start_offset & F_NO16) && (utf || is_ascii)) {
1648 if (return_value16[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1649 printf("16 bit: Test should match: [%d] '%s' @ '%s'\n",
1650 total, current->pattern, current->input);
1651 is_successful = 0;
1652 }
1653
1654 if (return_value16[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1655 printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n",
1656 total, current->pattern, current->input);
1657 is_successful = 0;
1658 }
1659 }
1660 #endif
1661 #ifdef SUPPORT_PCRE2_32
1662 if (!(current->start_offset & F_NO32) && (utf || is_ascii)) {
1663 if (return_value32[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1664 printf("32 bit: Test should match: [%d] '%s' @ '%s'\n",
1665 total, current->pattern, current->input);
1666 is_successful = 0;
1667 }
1668
1669 if (return_value32[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1670 printf("32 bit: Test should not match: [%d] '%s' @ '%s'\n",
1671 total, current->pattern, current->input);
1672 is_successful = 0;
1673 }
1674 }
1675 #endif
1676 }
1677
1678 if (is_successful) {
1679 #ifdef SUPPORT_PCRE2_8
1680 if (re8 && !(current->start_offset & F_NO8) && pcre2_get_mark_8(mdata8_1) != pcre2_get_mark_8(mdata8_2)) {
1681 printf("8 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1682 total, current->pattern, current->input);
1683 is_successful = 0;
1684 }
1685 #endif
1686 #ifdef SUPPORT_PCRE2_16
1687 if (re16 && !(current->start_offset & F_NO16) && pcre2_get_mark_16(mdata16_1) != pcre2_get_mark_16(mdata16_2)) {
1688 printf("16 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1689 total, current->pattern, current->input);
1690 is_successful = 0;
1691 }
1692 #endif
1693 #ifdef SUPPORT_PCRE2_32
1694 if (re32 && !(current->start_offset & F_NO32) && pcre2_get_mark_32(mdata32_1) != pcre2_get_mark_32(mdata32_2)) {
1695 printf("32 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1696 total, current->pattern, current->input);
1697 is_successful = 0;
1698 }
1699 #endif
1700 }
1701
1702 #ifdef SUPPORT_PCRE2_8
1703 pcre2_code_free_8(re8);
1704 pcre2_match_data_free_8(mdata8_1);
1705 pcre2_match_data_free_8(mdata8_2);
1706 pcre2_match_context_free_8(mcontext8);
1707 #endif
1708 #ifdef SUPPORT_PCRE2_16
1709 pcre2_code_free_16(re16);
1710 pcre2_match_data_free_16(mdata16_1);
1711 pcre2_match_data_free_16(mdata16_2);
1712 pcre2_match_context_free_16(mcontext16);
1713 #endif
1714 #ifdef SUPPORT_PCRE2_32
1715 pcre2_code_free_32(re32);
1716 pcre2_match_data_free_32(mdata32_1);
1717 pcre2_match_data_free_32(mdata32_2);
1718 pcre2_match_context_free_32(mcontext32);
1719 #endif
1720
1721 if (is_successful) {
1722 successful++;
1723 successful_row++;
1724 printf(".");
1725 if (successful_row >= 60) {
1726 successful_row = 0;
1727 printf("\n");
1728 }
1729 } else
1730 successful_row = 0;
1731
1732 fflush(stdout);
1733 current++;
1734 }
1735 #ifdef SUPPORT_PCRE2_8
1736 setstack8(NULL);
1737 #endif
1738 #ifdef SUPPORT_PCRE2_16
1739 setstack16(NULL);
1740 #endif
1741 #ifdef SUPPORT_PCRE2_32
1742 setstack32(NULL);
1743 #endif
1744
1745 if (total == successful) {
1746 printf("\nAll JIT regression tests are successfully passed.\n");
1747 return 0;
1748 } else {
1749 printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
1750 return 1;
1751 }
1752 }
1753
1754 /* End of pcre2_jit_test.c */
1755