1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Original API code Copyright (c) 1997-2012 University of Cambridge
10 New API code Copyright (c) 2016 University of Cambridge
11
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
15
16 * Redistributions of source code must retain the above copyright notice,
17 this list of conditions and the following disclaimer.
18
19 * Redistributions in binary form must reproduce the above copyright
20 notice, this list of conditions and the following disclaimer in the
21 documentation and/or other materials provided with the distribution.
22
23 * Neither the name of the University of Cambridge nor the names of its
24 contributors may be used to endorse or promote products derived from
25 this software without specific prior written permission.
26
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
39 */
40
41 #ifdef HAVE_CONFIG_H
42 #include "config.h"
43 #endif
44
45 #include <stdio.h>
46 #include <string.h>
47
48 #define PCRE2_CODE_UNIT_WIDTH 0
49 #include "pcre2.h"
50
51 /*
52 Letter characters:
53 \xe6\x92\xad = 0x64ad = 25773 (kanji)
54 Non-letter characters:
55 \xc2\xa1 = 0xa1 = (Inverted Exclamation Mark)
56 \xf3\xa9\xb7\x80 = 0xe9dc0 = 957888
57 \xed\xa0\x80 = 55296 = 0xd800 (Invalid UTF character)
58 \xed\xb0\x80 = 56320 = 0xdc00 (Invalid UTF character)
59 Newlines:
60 \xc2\x85 = 0x85 = 133 (NExt Line = NEL)
61 \xe2\x80\xa8 = 0x2028 = 8232 (Line Separator)
62 Othercase pairs:
63 \xc3\xa9 = 0xe9 = 233 (e')
64 \xc3\x89 = 0xc9 = 201 (E')
65 \xc3\xa1 = 0xe1 = 225 (a')
66 \xc3\x81 = 0xc1 = 193 (A')
67 \x53 = 0x53 = S
68 \x73 = 0x73 = s
69 \xc5\xbf = 0x17f = 383 (long S)
70 \xc8\xba = 0x23a = 570
71 \xe2\xb1\xa5 = 0x2c65 = 11365
72 \xe1\xbd\xb8 = 0x1f78 = 8056
73 \xe1\xbf\xb8 = 0x1ff8 = 8184
74 \xf0\x90\x90\x80 = 0x10400 = 66560
75 \xf0\x90\x90\xa8 = 0x10428 = 66600
76 \xc7\x84 = 0x1c4 = 452
77 \xc7\x85 = 0x1c5 = 453
78 \xc7\x86 = 0x1c6 = 454
79 Caseless sets:
80 ucp_Armenian - \x{531}-\x{556} -> \x{561}-\x{586}
81 ucp_Coptic - \x{2c80}-\x{2ce3} -> caseless: XOR 0x1
82 ucp_Latin - \x{ff21}-\x{ff3a} -> \x{ff41]-\x{ff5a}
83
84 Mark property:
85 \xcc\x8d = 0x30d = 781
86 Special:
87 \xc2\x80 = 0x80 = 128 (lowest 2 byte character)
88 \xdf\xbf = 0x7ff = 2047 (highest 2 byte character)
89 \xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character)
90 \xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character)
91 \xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character)
92 \xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character)
93 */
94
95 static int regression_tests(void);
96
main(void)97 int main(void)
98 {
99 int jit = 0;
100 #if defined SUPPORT_PCRE2_8
101 pcre2_config_8(PCRE2_CONFIG_JIT, &jit);
102 #elif defined SUPPORT_PCRE2_16
103 pcre2_config_16(PCRE2_CONFIG_JIT, &jit);
104 #elif defined SUPPORT_PCRE2_32
105 pcre2_config_32(PCRE2_CONFIG_JIT, &jit);
106 #endif
107 if (!jit) {
108 printf("JIT must be enabled to run pcre_jit_test\n");
109 return 1;
110 }
111 return regression_tests();
112 }
113
114 /* --------------------------------------------------------------------------------------- */
115
116 #if !(defined SUPPORT_PCRE2_8) && !(defined SUPPORT_PCRE2_16) && !(defined SUPPORT_PCRE2_32)
117 #error SUPPORT_PCRE2_8 or SUPPORT_PCRE2_16 or SUPPORT_PCRE2_32 must be defined
118 #endif
119
120 #define MU (PCRE2_MULTILINE | PCRE2_UTF)
121 #define MUP (PCRE2_MULTILINE | PCRE2_UTF | PCRE2_UCP)
122 #define CMU (PCRE2_CASELESS | PCRE2_MULTILINE | PCRE2_UTF)
123 #define CMUP (PCRE2_CASELESS | PCRE2_MULTILINE | PCRE2_UTF | PCRE2_UCP)
124 #define M (PCRE2_MULTILINE)
125 #define MP (PCRE2_MULTILINE | PCRE2_UCP)
126 #define U (PCRE2_UTF)
127 #define CM (PCRE2_CASELESS | PCRE2_MULTILINE)
128
129 #define BSR(x) ((x) << 16)
130 #define A PCRE2_NEWLINE_ANYCRLF
131
132 #define GET_NEWLINE(x) ((x) & 0xffff)
133 #define GET_BSR(x) ((x) >> 16)
134
135 #define OFFSET_MASK 0x00ffff
136 #define F_NO8 0x010000
137 #define F_NO16 0x020000
138 #define F_NO32 0x020000
139 #define F_NOMATCH 0x040000
140 #define F_DIFF 0x080000
141 #define F_FORCECONV 0x100000
142 #define F_PROPERTY 0x200000
143
144 struct regression_test_case {
145 int compile_options;
146 int newline;
147 int match_options;
148 int start_offset;
149 const char *pattern;
150 const char *input;
151 };
152
153 static struct regression_test_case regression_test_cases[] = {
154 /* Constant strings. */
155 { MU, A, 0, 0, "AbC", "AbAbC" },
156 { MU, A, 0, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
157 { CMU, A, 0, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
158 { M, A, 0, 0, "[^a]", "aAbB" },
159 { CM, A, 0, 0, "[^m]", "mMnN" },
160 { M, A, 0, 0, "a[^b][^#]", "abacd" },
161 { CM, A, 0, 0, "A[^B][^E]", "abacd" },
162 { CMU, A, 0, 0, "[^x][^#]", "XxBll" },
163 { MU, A, 0, 0, "[^a]", "aaa\xc3\xa1#Ab" },
164 { CMU, A, 0, 0, "[^A]", "aA\xe6\x92\xad" },
165 { MU, A, 0, 0, "\\W(\\W)?\\w", "\r\n+bc" },
166 { MU, A, 0, 0, "\\W(\\W)?\\w", "\n\r+bc" },
167 { MU, A, 0, 0, "\\W(\\W)?\\w", "\r\r+bc" },
168 { MU, A, 0, 0, "\\W(\\W)?\\w", "\n\n+bc" },
169 { MU, A, 0, 0, "[axd]", "sAXd" },
170 { CMU, A, 0, 0, "[axd]", "sAXd" },
171 { CMU, A, 0, 0 | F_NOMATCH, "[^axd]", "DxA" },
172 { MU, A, 0, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
173 { MU, A, 0, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
174 { CMU, A, 0, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
175 { MU, A, 0, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
176 { MU, A, 0, 0, "[^a]", "\xc2\x80[]" },
177 { CMU, A, 0, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
178 { CM, A, 0, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
179 { PCRE2_CASELESS, 0, 0, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
180 { PCRE2_CASELESS, 0, 0, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
181 { PCRE2_CASELESS, 0, 0, 0, "a1", "Aa1" },
182 { M, A, 0, 0, "\\Ca", "cda" },
183 { CM, A, 0, 0, "\\Ca", "CDA" },
184 { M, A, 0, 0 | F_NOMATCH, "\\Cx", "cda" },
185 { CM, A, 0, 0 | F_NOMATCH, "\\Cx", "CDA" },
186 { CMUP, A, 0, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
187 { CMUP, A, 0, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
188 { CMUP, A, 0, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
189 { CMUP, A, 0, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
190 { M, A, 0, 0, "[3-57-9]", "5" },
191
192 /* Assertions. */
193 { MU, A, 0, 0, "\\b[^A]", "A_B#" },
194 { M, A, 0, 0 | F_NOMATCH, "\\b\\W", "\n*" },
195 { MU, A, 0, 0, "\\B[^,]\\b[^s]\\b", "#X" },
196 { MP, A, 0, 0, "\\B", "_\xa1" },
197 { MP, A, 0, 0 | F_PROPERTY, "\\b_\\b[,A]\\B", "_," },
198 { MUP, A, 0, 0, "\\b", "\xe6\x92\xad!" },
199 { MUP, A, 0, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
200 { MUP, A, 0, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
201 { MUP, A, 0, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
202 { MU, A, 0, 0 | F_NOMATCH, "\\b.", "\xcd\xbe" },
203 { CMUP, A, 0, 0, "\\By", "\xf0\x90\x90\xa8y" },
204 { M, A, 0, 0 | F_NOMATCH, "\\R^", "\n" },
205 { M, A, 0, 1 | F_NOMATCH, "^", "\n" },
206 { 0, 0, 0, 0, "^ab", "ab" },
207 { 0, 0, 0, 0 | F_NOMATCH, "^ab", "aab" },
208 { M, PCRE2_NEWLINE_CRLF, 0, 0, "^a", "\r\raa\n\naa\r\naa" },
209 { MU, A, 0, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
210 { M, PCRE2_NEWLINE_ANY, 0, 0, "^-", "a--b--\x85--" },
211 { MU, PCRE2_NEWLINE_ANY, 0, 0, "^-", "a--\xe2\x80\xa8--" },
212 { MU, PCRE2_NEWLINE_ANY, 0, 0, "^-", "a--\xc2\x85--" },
213 { 0, 0, 0, 0, "ab$", "ab" },
214 { 0, 0, 0, 0 | F_NOMATCH, "ab$", "abab\n\n" },
215 { PCRE2_DOLLAR_ENDONLY, 0, 0, 0 | F_NOMATCH, "ab$", "abab\r\n" },
216 { M, PCRE2_NEWLINE_CRLF, 0, 0, "a$", "\r\raa\n\naa\r\naa" },
217 { M, PCRE2_NEWLINE_ANY, 0, 0, "a$", "aaa" },
218 { MU, PCRE2_NEWLINE_ANYCRLF, 0, 0, "#$", "#\xc2\x85###\r#" },
219 { MU, PCRE2_NEWLINE_ANY, 0, 0, "#$", "#\xe2\x80\xa9" },
220 { 0, PCRE2_NEWLINE_ANY, PCRE2_NOTBOL, 0 | F_NOMATCH, "^a", "aa\naa" },
221 { M, PCRE2_NEWLINE_ANY, PCRE2_NOTBOL, 0, "^a", "aa\naa" },
222 { 0, PCRE2_NEWLINE_ANY, PCRE2_NOTEOL, 0 | F_NOMATCH, "a$", "aa\naa" },
223 { 0, PCRE2_NEWLINE_ANY, PCRE2_NOTEOL, 0 | F_NOMATCH, "a$", "aa\r\n" },
224 { U | PCRE2_DOLLAR_ENDONLY, PCRE2_NEWLINE_ANY, 0, 0 | F_PROPERTY, "\\p{Any}{2,}$", "aa\r\n" },
225 { M, PCRE2_NEWLINE_ANY, PCRE2_NOTEOL, 0, "a$", "aa\naa" },
226 { 0, PCRE2_NEWLINE_CR, 0, 0, ".\\Z", "aaa" },
227 { U, PCRE2_NEWLINE_CR, 0, 0, "a\\Z", "aaa\r" },
228 { 0, PCRE2_NEWLINE_CR, 0, 0, ".\\Z", "aaa\n" },
229 { 0, PCRE2_NEWLINE_CRLF, 0, 0, ".\\Z", "aaa\r" },
230 { U, PCRE2_NEWLINE_CRLF, 0, 0, ".\\Z", "aaa\n" },
231 { 0, PCRE2_NEWLINE_CRLF, 0, 0, ".\\Z", "aaa\r\n" },
232 { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa" },
233 { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r" },
234 { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\n" },
235 { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r\n" },
236 { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\xe2\x80\xa8" },
237 { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa" },
238 { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r" },
239 { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\n" },
240 { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r\n" },
241 { U, PCRE2_NEWLINE_ANY, 0, 0, ".\\Z", "aaa\xc2\x85" },
242 { U, PCRE2_NEWLINE_ANY, 0, 0, ".\\Z", "aaa\xe2\x80\xa8" },
243 { M, A, 0, 0, "\\Aa", "aaa" },
244 { M, A, 0, 1 | F_NOMATCH, "\\Aa", "aaa" },
245 { M, A, 0, 1, "\\Ga", "aaa" },
246 { M, A, 0, 1 | F_NOMATCH, "\\Ga", "aba" },
247 { M, A, 0, 0, "a\\z", "aaa" },
248 { M, A, 0, 0 | F_NOMATCH, "a\\z", "aab" },
249
250 /* Brackets and alternatives. */
251 { MU, A, 0, 0, "(ab|bb|cd)", "bacde" },
252 { MU, A, 0, 0, "(?:ab|a)(bc|c)", "ababc" },
253 { MU, A, 0, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
254 { CMU, A, 0, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
255 { MU, A, 0, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
256 { MU, A, 0, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
257 { MU, A, 0, 0, "\xc7\x82|\xc6\x82", "\xf1\x83\x82\x82\xc7\x82\xc7\x83" },
258 { MU, A, 0, 0, "=\xc7\x82|#\xc6\x82", "\xf1\x83\x82\x82=\xc7\x82\xc7\x83" },
259 { MU, A, 0, 0, "\xc7\x82\xc7\x83|\xc6\x82\xc6\x82", "\xf1\x83\x82\x82\xc7\x82\xc7\x83" },
260 { MU, A, 0, 0, "\xc6\x82\xc6\x82|\xc7\x83\xc7\x83|\xc8\x84\xc8\x84", "\xf1\x83\x82\x82\xc8\x84\xc8\x84" },
261
262 /* Greedy and non-greedy ? operators. */
263 { MU, A, 0, 0, "(?:a)?a", "laab" },
264 { CMU, A, 0, 0, "(A)?A", "llaab" },
265 { MU, A, 0, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
266 { MU, A, 0, 0, "(a)?a", "manm" },
267 { CMU, A, 0, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
268 { MU, A, 0, 0, "(a|b)?\?d((?:e)?)", "abcde" },
269 { MU, A, 0, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
270
271 /* Greedy and non-greedy + operators */
272 { MU, A, 0, 0, "(aa)+aa", "aaaaaaa" },
273 { MU, A, 0, 0, "(aa)+?aa", "aaaaaaa" },
274 { MU, A, 0, 0, "(?:aba|ab|a)+l", "ababamababal" },
275 { MU, A, 0, 0, "(?:aba|ab|a)+?l", "ababamababal" },
276 { MU, A, 0, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
277 { MU, A, 0, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
278 { MU, A, 0, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
279
280 /* Greedy and non-greedy * operators */
281 { CMU, A, 0, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
282 { MU, A, 0, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
283 { MU, A, 0, 0, "(aa|ab)*ab", "aaabaaab" },
284 { CMU, A, 0, 0, "(aa|Ab)*?aB", "aaabaaab" },
285 { MU, A, 0, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
286 { MU, A, 0, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
287 { M, A, 0, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
288 { M, A, 0, 0, "((?:a|)*){0}a", "a" },
289
290 /* Combining ? + * operators */
291 { MU, A, 0, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
292 { MU, A, 0, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
293 { MU, A, 0, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
294 { MU, A, 0, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
295 { MU, A, 0, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
296
297 /* Single character iterators. */
298 { MU, A, 0, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
299 { MU, A, 0, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
300 { MU, A, 0, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
301 { MU, A, 0, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
302 { MU, A, 0, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
303 { MU, A, 0, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
304 { MU, A, 0, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
305 { MU, A, 0, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
306 { MU, A, 0, 0, "(ba{2})+c", "baabaaabacbaabaac" },
307 { MU, A, 0, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
308 { MU, A, 0, 0, "(a?+[^b])+", "babaacacb" },
309 { MU, A, 0, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
310 { CMU, A, 0, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
311 { CMU, A, 0, 0, "[c-f]+k", "DemmFke" },
312 { MU, A, 0, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
313 { MU, A, 0, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
314 { CMU, A, 0, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
315 { CMU, A, 0, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
316 { CMU, A, 0, 0, "[ace]{3,}", "AcbDAcEEcEd" },
317 { CMU, A, 0, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
318 { MU, A, 0, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
319 { CMU, A, 0, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
320 { MU, A, 0, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
321 { MU, A, 0, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
322 { MU, A, 0, 0, "\\b\\w+\\B", "x,a_cd" },
323 { MUP, A, 0, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
324 { CMU, A, 0, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
325 { CMUP, A, 0, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" },
326 { CMU, A, 0, 0, "[^\xf0\x90\x90\x80]{2,4}@", "\xf0\x90\x90\xa8\xf0\x90\x90\x80###\xf0\x90\x90\x80@@@" },
327 { CMU, A, 0, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
328 { MU, A, 0, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
329 { MU, A, 0, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" },
330 { MU, A, 0, 0, "\\d+123", "987654321,01234" },
331 { MU, A, 0, 0, "abcd*|\\w+xy", "aaaaa,abxyz" },
332 { MU, A, 0, 0, "(?:abc|((?:amc|\\b\\w*xy)))", "aaaaa,abxyz" },
333 { MU, A, 0, 0, "a(?R)|([a-z]++)#", ".abcd.abcd#."},
334 { MU, A, 0, 0, "a(?R)|([a-z]++)#", ".abcd.mbcd#."},
335 { MU, A, 0, 0, ".[ab]*.", "xx" },
336 { MU, A, 0, 0, ".[ab]*a", "xxa" },
337 { MU, A, 0, 0, ".[ab]?.", "xx" },
338
339 /* Bracket repeats with limit. */
340 { MU, A, 0, 0, "(?:(ab){2}){5}M", "abababababababababababM" },
341 { MU, A, 0, 0, "(?:ab|abab){1,5}M", "abababababababababababM" },
342 { MU, A, 0, 0, "(?>ab|abab){1,5}M", "abababababababababababM" },
343 { MU, A, 0, 0, "(?:ab|abab){1,5}?M", "abababababababababababM" },
344 { MU, A, 0, 0, "(?>ab|abab){1,5}?M", "abababababababababababM" },
345 { MU, A, 0, 0, "(?:(ab){1,4}?){1,3}?M", "abababababababababababababM" },
346 { MU, A, 0, 0, "(?:(ab){1,4}){1,3}abababababababababababM", "ababababababababababababM" },
347 { MU, A, 0, 0 | F_NOMATCH, "(?:(ab){1,4}){1,3}abababababababababababM", "abababababababababababM" },
348 { MU, A, 0, 0, "(ab){4,6}?M", "abababababababM" },
349
350 /* Basic character sets. */
351 { MU, A, 0, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
352 { MU, A, 0, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
353 { MU, A, 0, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
354 { MU, A, 0, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
355 { MU, A, 0, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
356 { MU, A, 0, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
357 { MU, A, 0, 0, "x[bcef]+", "xaxdxecbfg" },
358 { MU, A, 0, 0, "x[bcdghij]+", "xaxexfxdgbjk" },
359 { MU, A, 0, 0, "x[^befg]+", "xbxexacdhg" },
360 { MU, A, 0, 0, "x[^bcdl]+", "xlxbxaekmd" },
361 { MU, A, 0, 0, "x[^bcdghi]+", "xbxdxgxaefji" },
362 { MU, A, 0, 0, "x[B-Fb-f]+", "xaxAxgxbfBFG" },
363 { CMU, A, 0, 0, "\\x{e9}+", "#\xf0\x90\x90\xa8\xc3\xa8\xc3\xa9\xc3\x89\xc3\x88" },
364 { CMU, A, 0, 0, "[^\\x{e9}]+", "\xc3\xa9#\xf0\x90\x90\xa8\xc3\xa8\xc3\x88\xc3\x89" },
365 { MU, A, 0, 0, "[\\x02\\x7e]+", "\xc3\x81\xe1\xbf\xb8\xf0\x90\x90\xa8\x01\x02\x7e\x7f" },
366 { MU, A, 0, 0, "[^\\x02\\x7e]+", "\x02\xc3\x81\xe1\xbf\xb8\xf0\x90\x90\xa8\x01\x7f\x7e" },
367 { MU, A, 0, 0, "[\\x{81}-\\x{7fe}]+", "#\xe1\xbf\xb8\xf0\x90\x90\xa8\xc2\x80\xc2\x81\xdf\xbe\xdf\xbf" },
368 { MU, A, 0, 0, "[^\\x{81}-\\x{7fe}]+", "\xc2\x81#\xe1\xbf\xb8\xf0\x90\x90\xa8\xc2\x80\xdf\xbf\xdf\xbe" },
369 { MU, A, 0, 0, "[\\x{801}-\\x{fffe}]+", "#\xc3\xa9\xf0\x90\x90\x80\xe0\xa0\x80\xe0\xa0\x81\xef\xbf\xbe\xef\xbf\xbf" },
370 { MU, A, 0, 0, "[^\\x{801}-\\x{fffe}]+", "\xe0\xa0\x81#\xc3\xa9\xf0\x90\x90\x80\xe0\xa0\x80\xef\xbf\xbf\xef\xbf\xbe" },
371 { MU, A, 0, 0, "[\\x{10001}-\\x{10fffe}]+", "#\xc3\xa9\xe2\xb1\xa5\xf0\x90\x80\x80\xf0\x90\x80\x81\xf4\x8f\xbf\xbe\xf4\x8f\xbf\xbf" },
372 { MU, A, 0, 0, "[^\\x{10001}-\\x{10fffe}]+", "\xf0\x90\x80\x81#\xc3\xa9\xe2\xb1\xa5\xf0\x90\x80\x80\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbe" },
373
374 /* Unicode properties. */
375 { MUP, A, 0, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
376 { MUP, A, 0, 0 | F_PROPERTY, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
377 { MUP, A, 0, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
378 { MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}]", "abc" },
379 { MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}]", "abc" },
380 { MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
381 { MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
382 { MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
383 { MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
384 { MUP, A, 0, 0 | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
385 { MUP, A, 0, 0 | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
386 { MUP, A, 0, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
387 { CMUP, A, 0, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
388 { MUP, A, 0, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
389 { MUP, A, 0, 0 | F_PROPERTY, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
390 { MU, A, 0, 0 | F_PROPERTY, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
391 { CMUP, A, 0, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
392 { MUP, A, 0, 0 | F_PROPERTY, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
393 { MUP, A, 0, 0 | F_PROPERTY, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
394 { PCRE2_UCP, 0, 0, 0 | F_PROPERTY, "[a-b\\s]{2,5}[^a]", "AB baaa" },
395
396 /* Possible empty brackets. */
397 { MU, A, 0, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
398 { MU, A, 0, 0, "(|ab||bc|a)+d", "abcxabcabd" },
399 { MU, A, 0, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
400 { MU, A, 0, 0, "(|ab||bc|a)*d", "abcxabcabd" },
401 { MU, A, 0, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
402 { MU, A, 0, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
403 { MU, A, 0, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
404 { MU, A, 0, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
405 { MU, A, 0, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
406 { MU, A, 0, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
407
408 /* Start offset. */
409 { MU, A, 0, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
410 { MU, A, 0, 4 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
411 { MU, A, 0, 2 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
412 { MU, A, 0, 1, "(\\w\\W\\w)+", "ab#d" },
413
414 /* Newline. */
415 { M, PCRE2_NEWLINE_CRLF, 0, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
416 { M, PCRE2_NEWLINE_CR, 0, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
417 { M, PCRE2_NEWLINE_CRLF, 0, 0, "\\W{1,3}[^#]", "\r\n##...." },
418 { MU, A, PCRE2_NO_UTF_CHECK, 1, "^.a", "\n\x80\nxa" },
419 { MU, A, 0, 1, "^", "\r\n" },
420 { M, PCRE2_NEWLINE_CRLF, 0, 1 | F_NOMATCH, "^", "\r\n" },
421 { M, PCRE2_NEWLINE_CRLF, 0, 1, "^", "\r\na" },
422
423 /* Any character except newline or any newline. */
424 { 0, PCRE2_NEWLINE_CRLF, 0, 0, ".", "\r" },
425 { U, PCRE2_NEWLINE_CRLF, 0, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
426 { 0, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
427 { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
428 { U, PCRE2_NEWLINE_ANY, 0, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
429 { U, PCRE2_NEWLINE_ANYCRLF, 0, 0 | F_NOMATCH, ".(.).", "\xe2\x80\xa8\nb\r" },
430 { 0, PCRE2_NEWLINE_ANY, 0, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
431 { U, PCRE2_NEWLINE_ANY, 0, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
432 { 0, BSR(PCRE2_BSR_ANYCRLF), 0, 0, "\\R", "\r" },
433 { 0, BSR(PCRE2_BSR_ANYCRLF), 0, 0, "\\R", "\x85#\r\n#" },
434 { U, BSR(PCRE2_BSR_UNICODE), 0, 0, "\\R", "ab\xe2\x80\xa8#c" },
435 { U, BSR(PCRE2_BSR_UNICODE), 0, 0, "\\R", "ab\r\nc" },
436 { U, PCRE2_NEWLINE_CRLF | BSR(PCRE2_BSR_UNICODE), 0, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
437 { MU, A, 0, 0 | F_NOMATCH, "\\R+", "ab" },
438 { MU, A, 0, 0, "\\R+", "ab\r\n\r" },
439 { MU, A, 0, 0, "\\R*", "ab\r\n\r" },
440 { MU, A, 0, 0, "\\R*", "\r\n\r" },
441 { MU, A, 0, 0, "\\R{2,4}", "\r\nab\r\r" },
442 { MU, A, 0, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
443 { MU, A, 0, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
444 { MU, A, 0, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
445 { MU, A, 0, 0 | F_NOMATCH, "\\R+\\R\\R", "\r\n\r\n" },
446 { MU, A, 0, 0, "\\R+\\R\\R", "\r\r\r" },
447 { MU, A, 0, 0, "\\R*\\R\\R", "\n\r" },
448 { MU, A, 0, 0 | F_NOMATCH, "\\R{2,4}\\R\\R", "\r\r\r" },
449 { MU, A, 0, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
450
451 /* Atomic groups (no fallback from "next" direction). */
452 { MU, A, 0, 0 | F_NOMATCH, "(?>ab)ab", "bab" },
453 { MU, A, 0, 0 | F_NOMATCH, "(?>(ab))ab", "bab" },
454 { MU, A, 0, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
455 "bababcdedefgheijijklmlmnop" },
456 { MU, A, 0, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
457 { MU, A, 0, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
458 { MU, A, 0, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
459 { MU, A, 0, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
460 { MU, A, 0, 0, "((?>a|)+?)b", "aaacaaab" },
461 { MU, A, 0, 0, "(?>x|)*$", "aaa" },
462 { MU, A, 0, 0, "(?>(x)|)*$", "aaa" },
463 { MU, A, 0, 0, "(?>x|())*$", "aaa" },
464 { MU, A, 0, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
465 { MU, A, 0, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
466 { MU, A, 0, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
467 { MU, A, 0, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
468 { MU, A, 0, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
469 { MU, A, 0, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
470 { MU, A, 0, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
471 { MU, A, 0, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
472 { MU, A, 0, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
473 { MU, A, 0, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
474 { MU, A, 0, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
475 { MU, A, 0, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
476 { MU, A, 0, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
477 { MU, A, 0, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
478 { CM, A, 0, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
479 { MU, A, 0, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
480 { MU, A, 0, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
481 { MU, A, 0, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d" },
482 { MU, A, 0, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
483 { MU, A, 0, 0 | F_PROPERTY, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
484 { MU, A, 0, 0 | F_PROPERTY, "\\X{2,4}", "abcdef" },
485 { MU, A, 0, 0 | F_PROPERTY, "\\X{2,4}?", "abcdef" },
486 { MU, A, 0, 0 | F_NOMATCH | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d##" },
487 { MU, A, 0, 0 | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
488 { MU, A, 0, 0, "(c(ab)?+ab)+", "cabcababcab" },
489 { MU, A, 0, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
490
491 /* Possessive quantifiers. */
492 { MU, A, 0, 0, "(?:a|b)++m", "mababbaaxababbaam" },
493 { MU, A, 0, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
494 { MU, A, 0, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
495 { MU, A, 0, 0, "(a|b)++m", "mababbaaxababbaam" },
496 { MU, A, 0, 0, "(a|b)*+m", "mababbaaxababbaam" },
497 { MU, A, 0, 0, "(a|b)*+m", "ababbaaxababbaam" },
498 { MU, A, 0, 0, "(a|b(*ACCEPT))++m", "maaxab" },
499 { MU, A, 0, 0, "(?:b*)++m", "bxbbxbbbxm" },
500 { MU, A, 0, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
501 { MU, A, 0, 0, "(?:b*)*+m", "bxbbxbbbxm" },
502 { MU, A, 0, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
503 { MU, A, 0, 0, "(b*)++m", "bxbbxbbbxm" },
504 { MU, A, 0, 0, "(b*)++m", "bxbbxbbbxbbm" },
505 { MU, A, 0, 0, "(b*)*+m", "bxbbxbbbxm" },
506 { MU, A, 0, 0, "(b*)*+m", "bxbbxbbbxbbm" },
507 { MU, A, 0, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
508 { MU, A, 0, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
509 { MU, A, 0, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
510 { MU, A, 0, 0, "(a|(b))++m", "mababbaaxababbaam" },
511 { MU, A, 0, 0, "((a)|b)*+m", "mababbaaxababbaam" },
512 { MU, A, 0, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
513 { MU, A, 0, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
514 { MU, A, 0, 0, "(?:(b*))++m", "bxbbxbbbxm" },
515 { MU, A, 0, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
516 { MU, A, 0, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
517 { MU, A, 0, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
518 { MU, A, 0, 0, "((b*))++m", "bxbbxbbbxm" },
519 { MU, A, 0, 0, "((b*))++m", "bxbbxbbbxbbm" },
520 { MU, A, 0, 0, "((b*))*+m", "bxbbxbbbxm" },
521 { MU, A, 0, 0, "((b*))*+m", "bxbbxbbbxbbm" },
522 { MU, A, 0, 0 | F_NOMATCH, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
523 { MU, A, 0, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
524 { MU, A, 0, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
525 { MU, A, 0, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
526 { MU, A, 0, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
527
528 /* Back references. */
529 { MU, A, 0, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
530 { CMU, A, 0, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
531 { CM, A, 0, 0, "(a{2,4})\\1", "AaAaaAaA" },
532 { MU, A, 0, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
533 { MU, A, 0, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
534 { MU, A, 0, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
535 { MU, A, 0, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
536 { MU, A, 0, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
537 { MU, A, 0, 0, "(?:(aa)|b)\\1?b", "bb" },
538 { CMU, A, 0, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
539 { MU, A, 0, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
540 { CMU, A, 0, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
541 { MU, A, 0, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
542 { CM, A, 0, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
543 { MU, A, 0, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
544 { MU, A, 0, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
545 { M, A, 0, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
546 { MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
547 { MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{0,2}", "wwwww." },
548 { MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwww" },
549 { MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwwww" },
550 { PCRE2_UCP, 0, 0, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
551 { CMUP, A, 0, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" },
552 { MU | PCRE2_DUPNAMES, A, 0, 0 | F_NOMATCH, "\\k<A>{1,3}(?<A>aa)(?<A>bb)", "aabb" },
553 { MU | PCRE2_DUPNAMES | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\k<A>{1,3}(?<A>aa)(?<A>bb)", "aabb" },
554 { MU | PCRE2_DUPNAMES | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\k<A>*(?<A>aa)(?<A>bb)", "aabb" },
555 { MU | PCRE2_DUPNAMES, A, 0, 0, "(?<A>aa)(?<A>bb)\\k<A>{0,3}aaaaaa", "aabbaaaaaa" },
556 { MU | PCRE2_DUPNAMES, A, 0, 0, "(?<A>aa)(?<A>bb)\\k<A>{2,5}bb", "aabbaaaabb" },
557 { MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{0,3}m", "aaaaaaaabbbbaabbbbm" },
558 { MU | PCRE2_DUPNAMES, A, 0, 0 | F_NOMATCH, "\\k<A>{1,3}?(?<A>aa)(?<A>bb)", "aabb" },
559 { MU | PCRE2_DUPNAMES | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\k<A>{1,3}?(?<A>aa)(?<A>bb)", "aabb" },
560 { MU | PCRE2_DUPNAMES, A, 0, 0, "\\k<A>*?(?<A>aa)(?<A>bb)", "aabb" },
561 { MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{0,3}?m", "aaaaaabbbbbbaabbbbbbbbbbm" },
562 { MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>*?m", "aaaaaabbbbbbaabbbbbbbbbbm" },
563 { MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{2,3}?", "aaaabbbbaaaabbbbbbbbbb" },
564 { CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}M", "aaaaaaaabbbbaabbbbm" },
565 { CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{1,3}M", "aaaaaaaabbbbaabbbbm" },
566 { CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}?M", "aaaaaabbbbbbaabbbbbbbbbbm" },
567 { CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{2,3}?", "aaaabbbbaaaabbbbbbbbbb" },
568
569 /* Assertions. */
570 { MU, A, 0, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
571 { MU, A, 0, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
572 { MU, A, 0, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
573 { MU, A, 0, 0, "(?<=aaa|aa|a)a", "aaa" },
574 { MU, A, 0, 2, "(?<=aaa|aa|a)a", "aaa" },
575 { M, A, 0, 0, "(?<=aaa|aa|a)a", "aaa" },
576 { M, A, 0, 2, "(?<=aaa|aa|a)a", "aaa" },
577 { MU, A, 0, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
578 { MU, A, 0, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
579 { MU, A, 0, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
580 { MU, A, 0, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
581 { MU, A, 0, 0, "((?(?=(a))a)+k)", "bbak" },
582 { MU, A, 0, 0, "((?(?=a)a)+k)", "bbak" },
583 { MU, A, 0, 0 | F_NOMATCH, "(?=(?>(a))m)amk", "a k" },
584 { MU, A, 0, 0 | F_NOMATCH, "(?!(?>(a))m)amk", "a k" },
585 { MU, A, 0, 0 | F_NOMATCH, "(?>(?=(a))am)amk", "a k" },
586 { MU, A, 0, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
587 { MU, A, 0, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
588 { MU, A, 0, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
589 { MU, A, 0, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
590 { MU, A, 0, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
591 { MU, A, 0, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
592 { MU, A, 0, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
593 { MU, A, 0, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
594 { MU, A, 0, 0, "a(?=(?C)\\B(?C`x`))b", "ab" },
595 { MU, A, 0, 0, "a(?!(?C)\\B(?C`x`))bb|ab", "abb" },
596 { MU, A, 0, 0, "a(?=\\b|(?C)\\B(?C`x`))b", "ab" },
597 { MU, A, 0, 0, "a(?!\\b|(?C)\\B(?C`x`))bb|ab", "abb" },
598 { MU, A, 0, 0, "c(?(?=(?C)\\B(?C`x`))ab|a)", "cab" },
599 { MU, A, 0, 0, "c(?(?!(?C)\\B(?C`x`))ab|a)", "cab" },
600 { MU, A, 0, 0, "c(?(?=\\b|(?C)\\B(?C`x`))ab|a)", "cab" },
601 { MU, A, 0, 0, "c(?(?!\\b|(?C)\\B(?C`x`))ab|a)", "cab" },
602 { MU, A, 0, 0, "a(?=)b", "ab" },
603 { MU, A, 0, 0 | F_NOMATCH, "a(?!)b", "ab" },
604
605 /* Not empty, ACCEPT, FAIL */
606 { MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "a*", "bcx" },
607 { MU, A, PCRE2_NOTEMPTY, 0, "a*", "bcaad" },
608 { MU, A, PCRE2_NOTEMPTY, 0, "a*?", "bcaad" },
609 { MU, A, PCRE2_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
610 { MU, A, 0, 0, "a(*ACCEPT)b", "ab" },
611 { MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "a*(*ACCEPT)b", "bcx" },
612 { MU, A, PCRE2_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
613 { MU, A, PCRE2_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
614 { MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "(?:z|a*(*ACCEPT)b)", "bcx" },
615 { MU, A, PCRE2_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
616 { MU, A, PCRE2_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
617 { MU, A, PCRE2_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
618 { MU, A, PCRE2_NOTEMPTY_ATSTART, 0 | F_NOMATCH, "a*(*ACCEPT)b", "" },
619 { MU, A, 0, 0, "((a(*ACCEPT)b))", "ab" },
620 { MU, A, 0, 0, "(a(*FAIL)a|a)", "aaa" },
621 { MU, A, 0, 0, "(?=ab(*ACCEPT)b)a", "ab" },
622 { MU, A, 0, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
623 { MU, A, 0, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
624 { MU, A, PCRE2_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
625
626 /* Conditional blocks. */
627 { MU, A, 0, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
628 { MU, A, 0, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
629 { MU, A, 0, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
630 { MU, A, 0, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
631 { MU, A, 0, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
632 { MU, A, 0, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
633 { MU, A, 0, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
634 { MU, A, 0, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
635 { MU, A, 0, 0 | F_DIFF, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
636 { MU, A, 0, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
637 { MU, A, 0, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
638 { MU, A, 0, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
639 { MU, A, 0, 0, "(?(?=a)ab)", "a" },
640 { MU, A, 0, 0, "(?(?<!b)c)", "b" },
641 { MU, A, 0, 0, "(?(DEFINE)a(b))", "a" },
642 { MU, A, 0, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
643 { MU, A, 0, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
644 { MU, A, 0, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
645 { MU, A, 0, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
646 { MU, A, 0, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
647 { MU, A, 0, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
648 { MU, A, 0, 0, "(c)?\?(?(1)a|b)", "cbb" },
649 { MU, A, 0, 0 | F_DIFF, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
650 { MU, A, 0, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
651 { MU, A, 0, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
652 { MU, A, 0, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
653 { MU, A, 0, 0 | F_DIFF, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
654 { MU, A, 0, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
655 { MU, A, 0, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
656 { MU, A, 0, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
657 { MU, A, 0, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
658 { MU, A, 0, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)*l", "bc ddd abccabccl" },
659 { MU, A, 0, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+?dd", "bcabcacdb bdddd" },
660 { MU, A, 0, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+l", "ababccddabdbccd abcccl" },
661 { MU, A, 0, 0, "((?:a|aa)(?(1)aaa))x", "aax" },
662 { MU, A, 0, 0, "(?(?!)a|b)", "ab" },
663 { MU, A, 0, 0, "(?(?!)a)", "ab" },
664 { MU, A, 0, 0 | F_NOMATCH, "(?(?!)a|b)", "ac" },
665
666 /* Set start of match. */
667 { MU, A, 0, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
668 { MU, A, 0, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
669 { MU, A, 0, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
670 { MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "a\\K(*ACCEPT)b", "aa" },
671 { MU, A, PCRE2_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
672
673 /* First line. */
674 { MU | PCRE2_FIRSTLINE, A, 0, 0 | F_PROPERTY, "\\p{Any}a", "bb\naaa" },
675 { MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}a", "bb\r\naaa" },
676 { MU | PCRE2_FIRSTLINE, A, 0, 0, "(?<=a)", "a" },
677 { MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "[^a][^b]", "ab" },
678 { MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "a", "\na" },
679 { MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "[abc]", "\na" },
680 { MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "^a", "\na" },
681 { MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "^(?<=\n)", "\na" },
682 { MU | PCRE2_FIRSTLINE, A, 0, 0, "\xf0\x90\x90\x80", "\xf0\x90\x90\x80" },
683 { MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_ANY, 0, 0 | F_NOMATCH, "#", "\xc2\x85#" },
684 { M | PCRE2_FIRSTLINE, PCRE2_NEWLINE_ANY, 0, 0 | F_NOMATCH, "#", "\x85#" },
685 { MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_ANY, 0, 0 | F_NOMATCH, "^#", "\xe2\x80\xa8#" },
686 { MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0 | F_PROPERTY, "\\p{Any}", "\r\na" },
687 { MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0, ".", "\r" },
688 { MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0, "a", "\ra" },
689 { MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0 | F_NOMATCH, "ba", "bbb\r\nba" },
690 { MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}{4}|a", "\r\na" },
691 { MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 1, ".", "\r\n" },
692 { PCRE2_FIRSTLINE | PCRE2_DOTALL, PCRE2_NEWLINE_LF, 0, 0 | F_NOMATCH, "ab.", "ab" },
693 { MU | PCRE2_FIRSTLINE, A, 0, 1 | F_NOMATCH, "^[a-d0-9]", "\nxx\nd" },
694 { PCRE2_FIRSTLINE | PCRE2_DOTALL, PCRE2_NEWLINE_ANY, 0, 0, "....a", "012\n0a" },
695 { MU | PCRE2_FIRSTLINE, A, 0, 0, "[aC]", "a" },
696
697 /* Recurse. */
698 { MU, A, 0, 0, "(a)(?1)", "aa" },
699 { MU, A, 0, 0, "((a))(?1)", "aa" },
700 { MU, A, 0, 0, "(b|a)(?1)", "aa" },
701 { MU, A, 0, 0, "(b|(a))(?1)", "aa" },
702 { MU, A, 0, 0 | F_NOMATCH, "((a)(b)(?:a*))(?1)", "aba" },
703 { MU, A, 0, 0, "((a)(b)(?:a*))(?1)", "abab" },
704 { MU, A, 0, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
705 { MU, A, 0, 0, "((?2)b|(a)){2}(?1)", "aabab" },
706 { MU, A, 0, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
707 { MU, A, 0, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
708 { MU, A, 0, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
709 { MU, A, 0, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
710 { MU, A, 0, 0 | F_NOMATCH, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
711 { MU, A, 0, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
712 { MU, A, 0, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
713 { MU, A, 0, 0 | F_NOMATCH, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
714 { MU, A, 0, 0, "b|<(?R)*>", "<<b>" },
715 { MU, A, 0, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
716 { MU, A, 0, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
717 { MU, A, 0, 0, "(a)((?(R)a|b))(?2)", "aabbabaa" },
718 { MU, A, 0, 0, "(a)((?(R2)a|b))(?2)", "aabbabaa" },
719 { MU, A, 0, 0, "(a)((?(R1)a|b))(?2)", "ababba" },
720 { MU, A, 0, 0, "(?(R0)aa|bb(?R))", "abba aabb bbaa" },
721 { MU, A, 0, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" },
722 { MU, A, 0, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" },
723 { MU, A, 0, 0, "((?(R)a|(?1)){3})", "XaaaaaaaaaX" },
724 { MU, A, 0, 0, "((?:(?(R)a|(?1))){3})", "XaaaaaaaaaX" },
725 { MU, A, 0, 0, "((?(R)a|(?1)){1,3})aaaaaa", "aaaaaaaaXaaaaaaaaa" },
726 { MU, A, 0, 0, "((?(R)a|(?1)){1,3}?)M", "aaaM" },
727
728 /* 16 bit specific tests. */
729 { CM, A, 0, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" },
730 { CM, A, 0, 0 | F_FORCECONV, "\xe1\xbd\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
731 { CM, A, 0, 0 | F_FORCECONV, "[\xc3\xa1]", "\xc3\x81\xc3\xa1" },
732 { CM, A, 0, 0 | F_FORCECONV, "[\xe1\xbd\xb8]", "\xe1\xbf\xb8\xe1\xbd\xb8" },
733 { CM, A, 0, 0 | F_FORCECONV, "[a-\xed\xb0\x80]", "A" },
734 { CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[a-\\x{dc00}]", "B" },
735 { CM, A, 0, 0 | F_NO8 | F_NOMATCH | F_FORCECONV, "[b-\\x{dc00}]", "a" },
736 { CM, A, 0, 0 | F_NO8 | F_FORCECONV, "\xed\xa0\x80\\x{d800}\xed\xb0\x80\\x{dc00}", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80" },
737 { CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[\xed\xa0\x80\\x{d800}]{1,2}?[\xed\xb0\x80\\x{dc00}]{1,2}?#", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80#" },
738 { CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80\xed\xb0\x80#]{0,3}(?<=\xed\xb0\x80.)", "\xed\xa0\x80#\xed\xa0\x80##\xed\xb0\x80\xed\xa0\x80" },
739 { CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\x9f\xbf\xed\xa0\x83" },
740 { CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\xb4\x80\xed\xb3\xb0" },
741 { CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" },
742 { CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" },
743 { CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" },
744 { CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80][\xed\xb0\x80]{2,}", "\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" },
745 { M, A, 0, 0 | F_FORCECONV, "[^\xed\xb0\x80]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
746 { M, A, 0, 0 | F_NO8 | F_FORCECONV, "[^\\x{dc00}]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
747 { CM, A, 0, 0 | F_FORCECONV, ".\\B.", "\xed\xa0\x80\xed\xb0\x80" },
748 { CM, A, 0, 0 | F_FORCECONV, "\\D+(?:\\d+|.)\\S+(?:\\s+|.)\\W+(?:\\w+|.)\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80" },
749 { CM, A, 0, 0 | F_FORCECONV, "\\d*\\s*\\w*\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80" },
750 { CM, A, 0, 0 | F_FORCECONV | F_NOMATCH, "\\d*?\\D*?\\s*?\\S*?\\w*?\\W*?##", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80#" },
751 { CM | PCRE2_EXTENDED, A, 0, 0 | F_FORCECONV, "\xed\xa0\x80 \xed\xb0\x80 !", "\xed\xa0\x80\xed\xb0\x80!" },
752 { CM, A, 0, 0 | F_FORCECONV, "\xed\xa0\x80+#[^#]+\xed\xa0\x80", "\xed\xa0\x80#a\xed\xa0\x80" },
753 { CM, A, 0, 0 | F_FORCECONV, "(\xed\xa0\x80+)#\\1", "\xed\xa0\x80\xed\xa0\x80#\xed\xa0\x80\xed\xa0\x80" },
754 { M, PCRE2_NEWLINE_ANY, 0, 0 | F_NO8 | F_FORCECONV, "^-", "a--\xe2\x80\xa8--" },
755 { 0, BSR(PCRE2_BSR_UNICODE), 0, 0 | F_NO8 | F_FORCECONV, "\\R", "ab\xe2\x80\xa8" },
756 { 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\v", "ab\xe2\x80\xa9" },
757 { 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\h", "ab\xe1\xa0\x8e" },
758 { 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\v+?\\V+?#", "\xe2\x80\xa9\xe2\x80\xa9\xef\xbf\xbf\xef\xbf\xbf#" },
759 { 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\h+?\\H+?#", "\xe1\xa0\x8e\xe1\xa0\x8e\xef\xbf\xbf\xef\xbf\xbf#" },
760
761 /* Partial matching. */
762 { MU, A, PCRE2_PARTIAL_SOFT, 0, "ab", "a" },
763 { MU, A, PCRE2_PARTIAL_SOFT, 0, "ab|a", "a" },
764 { MU, A, PCRE2_PARTIAL_HARD, 0, "ab|a", "a" },
765 { MU, A, PCRE2_PARTIAL_SOFT, 0, "\\b#", "a" },
766 { MU, A, PCRE2_PARTIAL_SOFT, 0, "(?<=a)b", "a" },
767 { MU, A, PCRE2_PARTIAL_SOFT, 0, "abc|(?<=xxa)bc", "xxab" },
768 { MU, A, PCRE2_PARTIAL_SOFT, 0, "a\\B", "a" },
769 { MU, A, PCRE2_PARTIAL_HARD, 0, "a\\b", "a" },
770
771 /* (*MARK) verb. */
772 { MU, A, 0, 0, "a(*MARK:aa)a", "ababaa" },
773 { MU, A, 0, 0 | F_NOMATCH, "a(*:aa)a", "abab" },
774 { MU, A, 0, 0, "a(*:aa)(b(*:bb)b|bc)", "abc" },
775 { MU, A, 0, 0 | F_NOMATCH, "a(*:1)x|b(*:2)y", "abc" },
776 { MU, A, 0, 0, "(?>a(*:aa))b|ac", "ac" },
777 { MU, A, 0, 0, "(?(DEFINE)(a(*:aa)))(?1)", "a" },
778 { MU, A, 0, 0 | F_NOMATCH, "(?(DEFINE)((a)(*:aa)))(?1)b", "aa" },
779 { MU, A, 0, 0, "(?(DEFINE)(a(*:aa)))a(?1)b|aac", "aac" },
780 { MU, A, 0, 0, "(a(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
781 { MU, A, 0, 0, "(a(*:aa)){0}(?:b(?1)b)+", "babba" },
782 { MU, A, 0, 0 | F_NOMATCH, "(a(*:aa)){0}(?:b(?1)b)+", "ba" },
783 { MU, A, 0, 0, "(a\\K(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
784 { MU, A, 0, 0, "(a\\K(*:aa)){0}(?:b(?1)b)+", "babba" },
785 { MU, A, 0, 0 | F_NOMATCH, "(a\\K(*:aa)){0}(?:b(?1)b)+", "ba" },
786 { MU, A, 0, 0 | F_NOMATCH, "(*:mark)m", "a" },
787
788 /* (*COMMIT) verb. */
789 { MU, A, 0, 0 | F_NOMATCH, "a(*COMMIT)b", "ac" },
790 { MU, A, 0, 0, "aa(*COMMIT)b", "xaxaab" },
791 { MU, A, 0, 0 | F_NOMATCH, "a(*COMMIT)(*:msg)b|ac", "ac" },
792 { MU, A, 0, 0 | F_NOMATCH, "(a(*COMMIT)b)++", "abac" },
793 { MU, A, 0, 0 | F_NOMATCH, "((a)(*COMMIT)b)++", "abac" },
794 { MU, A, 0, 0 | F_NOMATCH, "(?=a(*COMMIT)b)ab|ad", "ad" },
795
796 /* (*PRUNE) verb. */
797 { MU, A, 0, 0, "aa\\K(*PRUNE)b", "aaab" },
798 { MU, A, 0, 0, "aa(*PRUNE:bb)b|a", "aa" },
799 { MU, A, 0, 0, "(a)(a)(*PRUNE)b|(a)", "aa" },
800 { MU, A, 0, 0, "(a)(a)(a)(a)(a)(a)(a)(a)(*PRUNE)b|(a)", "aaaaaaaa" },
801 { MU, A, PCRE2_PARTIAL_SOFT, 0, "a(*PRUNE)a|", "a" },
802 { MU, A, PCRE2_PARTIAL_SOFT, 0, "a(*PRUNE)a|m", "a" },
803 { MU, A, 0, 0 | F_NOMATCH, "(?=a(*PRUNE)b)ab|ad", "ad" },
804 { MU, A, 0, 0, "a(*COMMIT)(*PRUNE)d|bc", "abc" },
805 { MU, A, 0, 0, "(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
806 { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
807 { MU, A, 0, 0, "(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
808 { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
809 { MU, A, 0, 0, "(a(*COMMIT)b){0}a(?1)(*PRUNE)c|bc", "abc" },
810 { MU, A, 0, 0 | F_NOMATCH, "(a(*COMMIT)b){0}a(*COMMIT)(?1)(*PRUNE)c|bc", "abc" },
811 { MU, A, 0, 0, "(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
812 { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
813 { MU, A, 0, 0, "((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
814 { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
815 { MU, A, 0, 0, "(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" },
816 { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" },
817 { MU, A, 0, 0, "(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" },
818 { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" },
819 { MU, A, 0, 0, "(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" },
820 { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" },
821 { MU, A, 0, 0, "(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" },
822 { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" },
823 { MU, A, 0, 0, "(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" },
824 { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" },
825 { MU, A, 0, 0, "(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" },
826 { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" },
827
828 /* (*SKIP) verb. */
829 { MU, A, 0, 0 | F_NOMATCH, "(?=a(*SKIP)b)ab|ad", "ad" },
830 { MU, A, 0, 0, "(\\w+(*SKIP)#)", "abcd,xyz#," },
831 { MU, A, 0, 0, "\\w+(*SKIP)#|mm", "abcd,xyz#," },
832 { MU, A, 0, 0 | F_NOMATCH, "b+(?<=(*SKIP)#c)|b+", "#bbb" },
833
834 /* (*THEN) verb. */
835 { MU, A, 0, 0, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcaabcaabcaabcnacm" },
836 { MU, A, 0, 0 | F_NOMATCH, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcm" },
837 { MU, A, 0, 0, "((?:a(*THEN)|aab)c|a+)+m", "aabcaabcnmaabcaabcm" },
838 { MU, A, 0, 0, "((?:a|aab)(*THEN)c|a+)+m", "aam" },
839 { MU, A, 0, 0, "((?:a(*COMMIT)|aab)(*THEN)c|a+)+m", "aam" },
840 { MU, A, 0, 0, "(?(?=a(*THEN)b)ab|ad)", "ad" },
841 { MU, A, 0, 0, "(?(?!a(*THEN)b)ad|add)", "add" },
842 { MU, A, 0, 0 | F_NOMATCH, "(?(?=a)a(*THEN)b|ad)", "ad" },
843 { MU, A, 0, 0, "(?!(?(?=a)ab|b(*THEN)d))bn|bnn", "bnn" },
844
845 /* Deep recursion. */
846 { MU, A, 0, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
847 { MU, A, 0, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
848 { MU, A, 0, 0, "((a?)+)+b", "aaaaaaaaaaaa b" },
849
850 /* Deep recursion: Stack limit reached. */
851 { M, A, 0, 0 | F_NOMATCH, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
852 { M, A, 0, 0 | F_NOMATCH, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
853 { M, A, 0, 0 | F_NOMATCH, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
854 { M, A, 0, 0 | F_NOMATCH, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
855 { M, A, 0, 0 | F_NOMATCH, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
856
857 { 0, 0, 0, 0, NULL, NULL }
858 };
859
860 #ifdef SUPPORT_PCRE2_8
callback8(void * arg)861 static pcre2_jit_stack_8* callback8(void *arg)
862 {
863 return (pcre2_jit_stack_8 *)arg;
864 }
865 #endif
866
867 #ifdef SUPPORT_PCRE2_16
callback16(void * arg)868 static pcre2_jit_stack_16* callback16(void *arg)
869 {
870 return (pcre2_jit_stack_16 *)arg;
871 }
872 #endif
873
874 #ifdef SUPPORT_PCRE2_32
callback32(void * arg)875 static pcre2_jit_stack_32* callback32(void *arg)
876 {
877 return (pcre2_jit_stack_32 *)arg;
878 }
879 #endif
880
881 #ifdef SUPPORT_PCRE2_8
882 static pcre2_jit_stack_8 *stack8;
883
getstack8(void)884 static pcre2_jit_stack_8 *getstack8(void)
885 {
886 if (!stack8)
887 stack8 = pcre2_jit_stack_create_8(1, 1024 * 1024, NULL);
888 return stack8;
889 }
890
setstack8(pcre2_match_context_8 * mcontext)891 static void setstack8(pcre2_match_context_8 *mcontext)
892 {
893 if (!mcontext) {
894 if (stack8)
895 pcre2_jit_stack_free_8(stack8);
896 stack8 = NULL;
897 return;
898 }
899
900 pcre2_jit_stack_assign_8(mcontext, callback8, getstack8());
901 }
902 #endif /* SUPPORT_PCRE2_8 */
903
904 #ifdef SUPPORT_PCRE2_16
905 static pcre2_jit_stack_16 *stack16;
906
getstack16(void)907 static pcre2_jit_stack_16 *getstack16(void)
908 {
909 if (!stack16)
910 stack16 = pcre2_jit_stack_create_16(1, 1024 * 1024, NULL);
911 return stack16;
912 }
913
setstack16(pcre2_match_context_16 * mcontext)914 static void setstack16(pcre2_match_context_16 *mcontext)
915 {
916 if (!mcontext) {
917 if (stack16)
918 pcre2_jit_stack_free_16(stack16);
919 stack16 = NULL;
920 return;
921 }
922
923 pcre2_jit_stack_assign_16(mcontext, callback16, getstack16());
924 }
925 #endif /* SUPPORT_PCRE2_16 */
926
927 #ifdef SUPPORT_PCRE2_32
928 static pcre2_jit_stack_32 *stack32;
929
getstack32(void)930 static pcre2_jit_stack_32 *getstack32(void)
931 {
932 if (!stack32)
933 stack32 = pcre2_jit_stack_create_32(1, 1024 * 1024, NULL);
934 return stack32;
935 }
936
setstack32(pcre2_match_context_32 * mcontext)937 static void setstack32(pcre2_match_context_32 *mcontext)
938 {
939 if (!mcontext) {
940 if (stack32)
941 pcre2_jit_stack_free_32(stack32);
942 stack32 = NULL;
943 return;
944 }
945
946 pcre2_jit_stack_assign_32(mcontext, callback32, getstack32());
947 }
948 #endif /* SUPPORT_PCRE2_32 */
949
950 #ifdef SUPPORT_PCRE2_16
951
convert_utf8_to_utf16(PCRE2_SPTR8 input,PCRE2_UCHAR16 * output,int * offsetmap,int max_length)952 static int convert_utf8_to_utf16(PCRE2_SPTR8 input, PCRE2_UCHAR16 *output, int *offsetmap, int max_length)
953 {
954 PCRE2_SPTR8 iptr = input;
955 PCRE2_UCHAR16 *optr = output;
956 unsigned int c;
957
958 if (max_length == 0)
959 return 0;
960
961 while (*iptr && max_length > 1) {
962 c = 0;
963 if (offsetmap)
964 *offsetmap++ = (int)(iptr - (unsigned char*)input);
965
966 if (*iptr < 0xc0)
967 c = *iptr++;
968 else if (!(*iptr & 0x20)) {
969 c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
970 iptr += 2;
971 } else if (!(*iptr & 0x10)) {
972 c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
973 iptr += 3;
974 } else if (!(*iptr & 0x08)) {
975 c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
976 iptr += 4;
977 }
978
979 if (c < 65536) {
980 *optr++ = c;
981 max_length--;
982 } else if (max_length <= 2) {
983 *optr = '\0';
984 return (int)(optr - output);
985 } else {
986 c -= 0x10000;
987 *optr++ = 0xd800 | ((c >> 10) & 0x3ff);
988 *optr++ = 0xdc00 | (c & 0x3ff);
989 max_length -= 2;
990 if (offsetmap)
991 offsetmap++;
992 }
993 }
994 if (offsetmap)
995 *offsetmap = (int)(iptr - (unsigned char*)input);
996 *optr = '\0';
997 return (int)(optr - output);
998 }
999
copy_char8_to_char16(PCRE2_SPTR8 input,PCRE2_UCHAR16 * output,int max_length)1000 static int copy_char8_to_char16(PCRE2_SPTR8 input, PCRE2_UCHAR16 *output, int max_length)
1001 {
1002 PCRE2_SPTR8 iptr = input;
1003 PCRE2_UCHAR16 *optr = output;
1004
1005 if (max_length == 0)
1006 return 0;
1007
1008 while (*iptr && max_length > 1) {
1009 *optr++ = *iptr++;
1010 max_length--;
1011 }
1012 *optr = '\0';
1013 return (int)(optr - output);
1014 }
1015
1016 #define REGTEST_MAX_LENGTH16 4096
1017 static PCRE2_UCHAR16 regtest_buf16[REGTEST_MAX_LENGTH16];
1018 static int regtest_offsetmap16[REGTEST_MAX_LENGTH16];
1019
1020 #endif /* SUPPORT_PCRE2_16 */
1021
1022 #ifdef SUPPORT_PCRE2_32
1023
convert_utf8_to_utf32(PCRE2_SPTR8 input,PCRE2_UCHAR32 * output,int * offsetmap,int max_length)1024 static int convert_utf8_to_utf32(PCRE2_SPTR8 input, PCRE2_UCHAR32 *output, int *offsetmap, int max_length)
1025 {
1026 PCRE2_SPTR8 iptr = input;
1027 PCRE2_UCHAR32 *optr = output;
1028 unsigned int c;
1029
1030 if (max_length == 0)
1031 return 0;
1032
1033 while (*iptr && max_length > 1) {
1034 c = 0;
1035 if (offsetmap)
1036 *offsetmap++ = (int)(iptr - (unsigned char*)input);
1037
1038 if (*iptr < 0xc0)
1039 c = *iptr++;
1040 else if (!(*iptr & 0x20)) {
1041 c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
1042 iptr += 2;
1043 } else if (!(*iptr & 0x10)) {
1044 c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
1045 iptr += 3;
1046 } else if (!(*iptr & 0x08)) {
1047 c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
1048 iptr += 4;
1049 }
1050
1051 *optr++ = c;
1052 max_length--;
1053 }
1054 if (offsetmap)
1055 *offsetmap = (int)(iptr - (unsigned char*)input);
1056 *optr = 0;
1057 return (int)(optr - output);
1058 }
1059
copy_char8_to_char32(PCRE2_SPTR8 input,PCRE2_UCHAR32 * output,int max_length)1060 static int copy_char8_to_char32(PCRE2_SPTR8 input, PCRE2_UCHAR32 *output, int max_length)
1061 {
1062 PCRE2_SPTR8 iptr = input;
1063 PCRE2_UCHAR32 *optr = output;
1064
1065 if (max_length == 0)
1066 return 0;
1067
1068 while (*iptr && max_length > 1) {
1069 *optr++ = *iptr++;
1070 max_length--;
1071 }
1072 *optr = '\0';
1073 return (int)(optr - output);
1074 }
1075
1076 #define REGTEST_MAX_LENGTH32 4096
1077 static PCRE2_UCHAR32 regtest_buf32[REGTEST_MAX_LENGTH32];
1078 static int regtest_offsetmap32[REGTEST_MAX_LENGTH32];
1079
1080 #endif /* SUPPORT_PCRE2_32 */
1081
check_ascii(const char * input)1082 static int check_ascii(const char *input)
1083 {
1084 const unsigned char *ptr = (unsigned char *)input;
1085 while (*ptr) {
1086 if (*ptr > 127)
1087 return 0;
1088 ptr++;
1089 }
1090 return 1;
1091 }
1092
1093 #define OVECTOR_SIZE 15
1094
regression_tests(void)1095 static int regression_tests(void)
1096 {
1097 struct regression_test_case *current = regression_test_cases;
1098 int error;
1099 PCRE2_SIZE err_offs;
1100 int is_successful;
1101 int is_ascii;
1102 int total = 0;
1103 int successful = 0;
1104 int successful_row = 0;
1105 int counter = 0;
1106 int jit_compile_mode;
1107 int utf = 0;
1108 int disabled_options = 0;
1109 int i;
1110 #ifdef SUPPORT_PCRE2_8
1111 pcre2_code_8 *re8;
1112 pcre2_compile_context_8 *ccontext8;
1113 pcre2_match_data_8 *mdata8_1;
1114 pcre2_match_data_8 *mdata8_2;
1115 pcre2_match_context_8 *mcontext8;
1116 PCRE2_SIZE *ovector8_1 = NULL;
1117 PCRE2_SIZE *ovector8_2 = NULL;
1118 int return_value8[2];
1119 #endif
1120 #ifdef SUPPORT_PCRE2_16
1121 pcre2_code_16 *re16;
1122 pcre2_compile_context_16 *ccontext16;
1123 pcre2_match_data_16 *mdata16_1;
1124 pcre2_match_data_16 *mdata16_2;
1125 pcre2_match_context_16 *mcontext16;
1126 PCRE2_SIZE *ovector16_1 = NULL;
1127 PCRE2_SIZE *ovector16_2 = NULL;
1128 int return_value16[2];
1129 int length16;
1130 #endif
1131 #ifdef SUPPORT_PCRE2_32
1132 pcre2_code_32 *re32;
1133 pcre2_compile_context_32 *ccontext32;
1134 pcre2_match_data_32 *mdata32_1;
1135 pcre2_match_data_32 *mdata32_2;
1136 pcre2_match_context_32 *mcontext32;
1137 PCRE2_SIZE *ovector32_1 = NULL;
1138 PCRE2_SIZE *ovector32_2 = NULL;
1139 int return_value32[2];
1140 int length32;
1141 #endif
1142
1143 #if defined SUPPORT_PCRE2_8
1144 PCRE2_UCHAR8 cpu_info[128];
1145 #elif defined SUPPORT_PCRE2_16
1146 PCRE2_UCHAR16 cpu_info[128];
1147 #elif defined SUPPORT_PCRE2_32
1148 PCRE2_UCHAR32 cpu_info[128];
1149 #endif
1150 #if defined SUPPORT_UTF && ((defined(SUPPORT_PCRE2_8) + defined(SUPPORT_PCRE2_16) + defined(SUPPORT_PCRE2_32)) >= 2)
1151 int return_value;
1152 #endif
1153
1154 /* This test compares the behaviour of interpreter and JIT. Although disabling
1155 utf or ucp may make tests fail, if the pcre_exec result is the SAME, it is
1156 still considered successful from pcre_jit_test point of view. */
1157
1158 #if defined SUPPORT_PCRE2_8
1159 pcre2_config_8(PCRE2_CONFIG_JITTARGET, &cpu_info);
1160 #elif defined SUPPORT_PCRE2_16
1161 pcre2_config_16(PCRE2_CONFIG_JITTARGET, &cpu_info);
1162 #elif defined SUPPORT_PCRE2_32
1163 pcre2_config_32(PCRE2_CONFIG_JITTARGET, &cpu_info);
1164 #endif
1165
1166 printf("Running JIT regression tests\n");
1167 printf(" target CPU of SLJIT compiler: ");
1168 for (i = 0; cpu_info[i]; i++)
1169 printf("%c", (char)(cpu_info[i]));
1170 printf("\n");
1171
1172 #if defined SUPPORT_PCRE2_8
1173 pcre2_config_8(PCRE2_CONFIG_UNICODE, &utf);
1174 #elif defined SUPPORT_PCRE2_16
1175 pcre2_config_16(PCRE2_CONFIG_UNICODE, &utf);
1176 #elif defined SUPPORT_PCRE2_32
1177 pcre2_config_32(PCRE2_CONFIG_UNICODE, &utf);
1178 #endif
1179
1180 if (!utf)
1181 disabled_options |= PCRE2_UTF;
1182 #ifdef SUPPORT_PCRE2_8
1183 printf(" in 8 bit mode with UTF-8 %s:\n", utf ? "enabled" : "disabled");
1184 #endif
1185 #ifdef SUPPORT_PCRE2_16
1186 printf(" in 16 bit mode with UTF-16 %s:\n", utf ? "enabled" : "disabled");
1187 #endif
1188 #ifdef SUPPORT_PCRE2_32
1189 printf(" in 32 bit mode with UTF-32 %s:\n", utf ? "enabled" : "disabled");
1190 #endif
1191
1192 while (current->pattern) {
1193 /* printf("\nPattern: %s :\n", current->pattern); */
1194 total++;
1195 is_ascii = 0;
1196 if (!(current->start_offset & F_PROPERTY))
1197 is_ascii = check_ascii(current->pattern) && check_ascii(current->input);
1198
1199 if (current->match_options & PCRE2_PARTIAL_SOFT)
1200 jit_compile_mode = PCRE2_JIT_PARTIAL_SOFT;
1201 else if (current->match_options & PCRE2_PARTIAL_HARD)
1202 jit_compile_mode = PCRE2_JIT_PARTIAL_HARD;
1203 else
1204 jit_compile_mode = PCRE2_JIT_COMPLETE;
1205 error = 0;
1206 #ifdef SUPPORT_PCRE2_8
1207 re8 = NULL;
1208 ccontext8 = pcre2_compile_context_create_8(NULL);
1209 if (ccontext8) {
1210 if (GET_NEWLINE(current->newline))
1211 pcre2_set_newline_8(ccontext8, GET_NEWLINE(current->newline));
1212 if (GET_BSR(current->newline))
1213 pcre2_set_bsr_8(ccontext8, GET_BSR(current->newline));
1214
1215 if (!(current->start_offset & F_NO8)) {
1216 re8 = pcre2_compile_8((PCRE2_SPTR8)current->pattern, PCRE2_ZERO_TERMINATED,
1217 current->compile_options & ~disabled_options,
1218 &error, &err_offs, ccontext8);
1219
1220 if (!re8 && (utf || is_ascii))
1221 printf("\n8 bit: Cannot compile pattern \"%s\": %d\n", current->pattern, error);
1222 }
1223 pcre2_compile_context_free_8(ccontext8);
1224 }
1225 else
1226 printf("\n8 bit: Cannot allocate compile context\n");
1227 #endif
1228 #ifdef SUPPORT_PCRE2_16
1229 if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
1230 convert_utf8_to_utf16((PCRE2_SPTR8)current->pattern, regtest_buf16, NULL, REGTEST_MAX_LENGTH16);
1231 else
1232 copy_char8_to_char16((PCRE2_SPTR8)current->pattern, regtest_buf16, REGTEST_MAX_LENGTH16);
1233
1234 re16 = NULL;
1235 ccontext16 = pcre2_compile_context_create_16(NULL);
1236 if (ccontext16) {
1237 if (GET_NEWLINE(current->newline))
1238 pcre2_set_newline_16(ccontext16, GET_NEWLINE(current->newline));
1239 if (GET_BSR(current->newline))
1240 pcre2_set_bsr_16(ccontext16, GET_BSR(current->newline));
1241
1242 if (!(current->start_offset & F_NO16)) {
1243 re16 = pcre2_compile_16(regtest_buf16, PCRE2_ZERO_TERMINATED,
1244 current->compile_options & ~disabled_options,
1245 &error, &err_offs, ccontext16);
1246
1247 if (!re16 && (utf || is_ascii))
1248 printf("\n16 bit: Cannot compile pattern \"%s\": %d\n", current->pattern, error);
1249 }
1250 pcre2_compile_context_free_16(ccontext16);
1251 }
1252 else
1253 printf("\n16 bit: Cannot allocate compile context\n");
1254 #endif
1255 #ifdef SUPPORT_PCRE2_32
1256 if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
1257 convert_utf8_to_utf32((PCRE2_SPTR8)current->pattern, regtest_buf32, NULL, REGTEST_MAX_LENGTH32);
1258 else
1259 copy_char8_to_char32((PCRE2_SPTR8)current->pattern, regtest_buf32, REGTEST_MAX_LENGTH32);
1260
1261 re32 = NULL;
1262 ccontext32 = pcre2_compile_context_create_32(NULL);
1263 if (ccontext32) {
1264 if (GET_NEWLINE(current->newline))
1265 pcre2_set_newline_32(ccontext32, GET_NEWLINE(current->newline));
1266 if (GET_BSR(current->newline))
1267 pcre2_set_bsr_32(ccontext32, GET_BSR(current->newline));
1268
1269 if (!(current->start_offset & F_NO32)) {
1270 re32 = pcre2_compile_32(regtest_buf32, PCRE2_ZERO_TERMINATED,
1271 current->compile_options & ~disabled_options,
1272 &error, &err_offs, ccontext32);
1273
1274 if (!re32 && (utf || is_ascii))
1275 printf("\n32 bit: Cannot compile pattern \"%s\": %d\n", current->pattern, error);
1276 }
1277 pcre2_compile_context_free_32(ccontext32);
1278 }
1279 else
1280 printf("\n32 bit: Cannot allocate compile context\n");
1281 #endif
1282
1283 counter++;
1284 if ((counter & 0x3) != 0) {
1285 #ifdef SUPPORT_PCRE2_8
1286 setstack8(NULL);
1287 #endif
1288 #ifdef SUPPORT_PCRE2_16
1289 setstack16(NULL);
1290 #endif
1291 #ifdef SUPPORT_PCRE2_32
1292 setstack32(NULL);
1293 #endif
1294 }
1295
1296 #ifdef SUPPORT_PCRE2_8
1297 return_value8[0] = -1000;
1298 return_value8[1] = -1000;
1299 mdata8_1 = pcre2_match_data_create_8(OVECTOR_SIZE, NULL);
1300 mdata8_2 = pcre2_match_data_create_8(OVECTOR_SIZE, NULL);
1301 mcontext8 = pcre2_match_context_create_8(NULL);
1302 if (!mdata8_1 || !mdata8_2 || !mcontext8) {
1303 printf("\n8 bit: Cannot allocate match data\n");
1304 pcre2_match_data_free_8(mdata8_1);
1305 pcre2_match_data_free_8(mdata8_2);
1306 pcre2_match_context_free_8(mcontext8);
1307 pcre2_code_free_8(re8);
1308 re8 = NULL;
1309 } else {
1310 ovector8_1 = pcre2_get_ovector_pointer_8(mdata8_1);
1311 ovector8_2 = pcre2_get_ovector_pointer_8(mdata8_2);
1312 for (i = 0; i < OVECTOR_SIZE * 3; ++i)
1313 ovector8_1[i] = -2;
1314 for (i = 0; i < OVECTOR_SIZE * 3; ++i)
1315 ovector8_2[i] = -2;
1316 }
1317 if (re8) {
1318 return_value8[1] = pcre2_match_8(re8, (PCRE2_SPTR8)current->input, strlen(current->input),
1319 current->start_offset & OFFSET_MASK, current->match_options, mdata8_2, NULL);
1320
1321 if (pcre2_jit_compile_8(re8, jit_compile_mode)) {
1322 printf("\n8 bit: JIT compiler does not support \"%s\"\n", current->pattern);
1323 } else if ((counter & 0x1) != 0) {
1324 setstack8(mcontext8);
1325 return_value8[0] = pcre2_match_8(re8, (PCRE2_SPTR8)current->input, strlen(current->input),
1326 current->start_offset & OFFSET_MASK, current->match_options, mdata8_1, mcontext8);
1327 } else {
1328 pcre2_jit_stack_assign_8(mcontext8, NULL, getstack8());
1329 return_value8[0] = pcre2_jit_match_8(re8, (PCRE2_SPTR8)current->input, strlen(current->input),
1330 current->start_offset & OFFSET_MASK, current->match_options, mdata8_1, mcontext8);
1331 }
1332 }
1333 #endif
1334
1335 #ifdef SUPPORT_PCRE2_16
1336 return_value16[0] = -1000;
1337 return_value16[1] = -1000;
1338 mdata16_1 = pcre2_match_data_create_16(OVECTOR_SIZE, NULL);
1339 mdata16_2 = pcre2_match_data_create_16(OVECTOR_SIZE, NULL);
1340 mcontext16 = pcre2_match_context_create_16(NULL);
1341 if (!mdata16_1 || !mdata16_2 || !mcontext16) {
1342 printf("\n16 bit: Cannot allocate match data\n");
1343 pcre2_match_data_free_16(mdata16_1);
1344 pcre2_match_data_free_16(mdata16_2);
1345 pcre2_match_context_free_16(mcontext16);
1346 pcre2_code_free_16(re16);
1347 re16 = NULL;
1348 } else {
1349 ovector16_1 = pcre2_get_ovector_pointer_16(mdata16_1);
1350 ovector16_2 = pcre2_get_ovector_pointer_16(mdata16_2);
1351 for (i = 0; i < OVECTOR_SIZE * 3; ++i)
1352 ovector16_1[i] = -2;
1353 for (i = 0; i < OVECTOR_SIZE * 3; ++i)
1354 ovector16_2[i] = -2;
1355 }
1356 if (re16) {
1357 if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
1358 length16 = convert_utf8_to_utf16((PCRE2_SPTR8)current->input, regtest_buf16, regtest_offsetmap16, REGTEST_MAX_LENGTH16);
1359 else
1360 length16 = copy_char8_to_char16((PCRE2_SPTR8)current->input, regtest_buf16, REGTEST_MAX_LENGTH16);
1361
1362 return_value16[1] = pcre2_match_16(re16, regtest_buf16, length16,
1363 current->start_offset & OFFSET_MASK, current->match_options, mdata16_2, NULL);
1364
1365 if (pcre2_jit_compile_16(re16, jit_compile_mode)) {
1366 printf("\n16 bit: JIT compiler does not support \"%s\"\n", current->pattern);
1367 } else if ((counter & 0x1) != 0) {
1368 setstack16(mcontext16);
1369 return_value16[0] = pcre2_match_16(re16, regtest_buf16, length16,
1370 current->start_offset & OFFSET_MASK, current->match_options, mdata16_1, mcontext16);
1371 } else {
1372 pcre2_jit_stack_assign_16(mcontext16, NULL, getstack16());
1373 return_value16[0] = pcre2_jit_match_16(re16, regtest_buf16, length16,
1374 current->start_offset & OFFSET_MASK, current->match_options, mdata16_1, mcontext16);
1375 }
1376 }
1377 #endif
1378
1379 #ifdef SUPPORT_PCRE2_32
1380 return_value32[0] = -1000;
1381 return_value32[1] = -1000;
1382 mdata32_1 = pcre2_match_data_create_32(OVECTOR_SIZE, NULL);
1383 mdata32_2 = pcre2_match_data_create_32(OVECTOR_SIZE, NULL);
1384 mcontext32 = pcre2_match_context_create_32(NULL);
1385 if (!mdata32_1 || !mdata32_2 || !mcontext32) {
1386 printf("\n32 bit: Cannot allocate match data\n");
1387 pcre2_match_data_free_32(mdata32_1);
1388 pcre2_match_data_free_32(mdata32_2);
1389 pcre2_match_context_free_32(mcontext32);
1390 pcre2_code_free_32(re32);
1391 re32 = NULL;
1392 } else {
1393 ovector32_1 = pcre2_get_ovector_pointer_32(mdata32_1);
1394 ovector32_2 = pcre2_get_ovector_pointer_32(mdata32_2);
1395 for (i = 0; i < OVECTOR_SIZE * 3; ++i)
1396 ovector32_1[i] = -2;
1397 for (i = 0; i < OVECTOR_SIZE * 3; ++i)
1398 ovector32_2[i] = -2;
1399 }
1400 if (re32) {
1401 if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
1402 length32 = convert_utf8_to_utf32((PCRE2_SPTR8)current->input, regtest_buf32, regtest_offsetmap32, REGTEST_MAX_LENGTH32);
1403 else
1404 length32 = copy_char8_to_char32((PCRE2_SPTR8)current->input, regtest_buf32, REGTEST_MAX_LENGTH32);
1405
1406 return_value32[1] = pcre2_match_32(re32, regtest_buf32, length32,
1407 current->start_offset & OFFSET_MASK, current->match_options, mdata32_2, NULL);
1408
1409 if (pcre2_jit_compile_32(re32, jit_compile_mode)) {
1410 printf("\n32 bit: JIT compiler does not support \"%s\"\n", current->pattern);
1411 } else if ((counter & 0x1) != 0) {
1412 setstack32(mcontext32);
1413 return_value32[0] = pcre2_match_32(re32, regtest_buf32, length32,
1414 current->start_offset & OFFSET_MASK, current->match_options, mdata32_1, mcontext32);
1415 } else {
1416 pcre2_jit_stack_assign_32(mcontext32, NULL, getstack32());
1417 return_value32[0] = pcre2_jit_match_32(re32, regtest_buf32, length32,
1418 current->start_offset & OFFSET_MASK, current->match_options, mdata32_1, mcontext32);
1419 }
1420 }
1421 #endif
1422
1423 /* printf("[%d-%d-%d|%d-%d|%d-%d|%d-%d]%s",
1424 return_value8[0], return_value16[0], return_value32[0],
1425 (int)ovector8_1[0], (int)ovector8_1[1],
1426 (int)ovector16_1[0], (int)ovector16_1[1],
1427 (int)ovector32_1[0], (int)ovector32_1[1],
1428 (current->compile_options & PCRE2_CASELESS) ? "C" : ""); */
1429
1430 /* If F_DIFF is set, just run the test, but do not compare the results.
1431 Segfaults can still be captured. */
1432
1433 is_successful = 1;
1434 if (!(current->start_offset & F_DIFF)) {
1435 #if defined SUPPORT_UTF && ((defined(SUPPORT_PCRE2_8) + defined(SUPPORT_PCRE2_16) + defined(SUPPORT_PCRE2_32)) >= 2)
1436 if (!(current->start_offset & F_FORCECONV)) {
1437
1438 /* All results must be the same. */
1439 #ifdef SUPPORT_PCRE2_8
1440 if ((return_value = return_value8[0]) != return_value8[1]) {
1441 printf("\n8 bit: Return value differs(J8:%d,I8:%d): [%d] '%s' @ '%s'\n",
1442 return_value8[0], return_value8[1], total, current->pattern, current->input);
1443 is_successful = 0;
1444 } else
1445 #endif
1446 #ifdef SUPPORT_PCRE2_16
1447 if ((return_value = return_value16[0]) != return_value16[1]) {
1448 printf("\n16 bit: Return value differs(J16:%d,I16:%d): [%d] '%s' @ '%s'\n",
1449 return_value16[0], return_value16[1], total, current->pattern, current->input);
1450 is_successful = 0;
1451 } else
1452 #endif
1453 #ifdef SUPPORT_PCRE2_32
1454 if ((return_value = return_value32[0]) != return_value32[1]) {
1455 printf("\n32 bit: Return value differs(J32:%d,I32:%d): [%d] '%s' @ '%s'\n",
1456 return_value32[0], return_value32[1], total, current->pattern, current->input);
1457 is_successful = 0;
1458 } else
1459 #endif
1460 #if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_16
1461 if (return_value8[0] != return_value16[0]) {
1462 printf("\n8 and 16 bit: Return value differs(J8:%d,J16:%d): [%d] '%s' @ '%s'\n",
1463 return_value8[0], return_value16[0],
1464 total, current->pattern, current->input);
1465 is_successful = 0;
1466 } else
1467 #endif
1468 #if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_32
1469 if (return_value8[0] != return_value32[0]) {
1470 printf("\n8 and 32 bit: Return value differs(J8:%d,J32:%d): [%d] '%s' @ '%s'\n",
1471 return_value8[0], return_value32[0],
1472 total, current->pattern, current->input);
1473 is_successful = 0;
1474 } else
1475 #endif
1476 #if defined SUPPORT_PCRE2_16 && defined SUPPORT_PCRE2_32
1477 if (return_value16[0] != return_value32[0]) {
1478 printf("\n16 and 32 bit: Return value differs(J16:%d,J32:%d): [%d] '%s' @ '%s'\n",
1479 return_value16[0], return_value32[0],
1480 total, current->pattern, current->input);
1481 is_successful = 0;
1482 } else
1483 #endif
1484 if (return_value >= 0 || return_value == PCRE_ERROR_PARTIAL) {
1485 if (return_value == PCRE_ERROR_PARTIAL) {
1486 return_value = 2;
1487 } else {
1488 return_value *= 2;
1489 }
1490 #ifdef SUPPORT_PCRE2_8
1491 return_value8[0] = return_value;
1492 #endif
1493 #ifdef SUPPORT_PCRE2_16
1494 return_value16[0] = return_value;
1495 #endif
1496 #ifdef SUPPORT_PCRE2_32
1497 return_value32[0] = return_value;
1498 #endif
1499 /* Transform back the results. */
1500 if (current->flags & PCRE_UTF8) {
1501 #ifdef SUPPORT_PCRE2_16
1502 for (i = 0; i < return_value; ++i) {
1503 if (ovector16_1[i] >= 0)
1504 ovector16_1[i] = regtest_offsetmap16[ovector16_1[i]];
1505 if (ovector16_2[i] >= 0)
1506 ovector16_2[i] = regtest_offsetmap16[ovector16_2[i]];
1507 }
1508 #endif
1509 #ifdef SUPPORT_PCRE2_32
1510 for (i = 0; i < return_value; ++i) {
1511 if (ovector32_1[i] >= 0)
1512 ovector32_1[i] = regtest_offsetmap32[ovector32_1[i]];
1513 if (ovector32_2[i] >= 0)
1514 ovector32_2[i] = regtest_offsetmap32[ovector32_2[i]];
1515 }
1516 #endif
1517 }
1518
1519 for (i = 0; i < return_value; ++i) {
1520 #if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_16
1521 if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
1522 printf("\n8 and 16 bit: Ovector[%d] value differs(J8:%d,I8:%d,J16:%d,I16:%d): [%d] '%s' @ '%s' \n",
1523 i, ovector8_1[i], ovector8_2[i], ovector16_1[i], ovector16_2[i],
1524 total, current->pattern, current->input);
1525 is_successful = 0;
1526 }
1527 #endif
1528 #if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_32
1529 if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector32_1[i] || ovector8_1[i] != ovector32_2[i]) {
1530 printf("\n8 and 32 bit: Ovector[%d] value differs(J8:%d,I8:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
1531 i, ovector8_1[i], ovector8_2[i], ovector32_1[i], ovector32_2[i],
1532 total, current->pattern, current->input);
1533 is_successful = 0;
1534 }
1535 #endif
1536 #if defined SUPPORT_PCRE2_16 && defined SUPPORT_PCRE2_32
1537 if (ovector16_1[i] != ovector16_2[i] || ovector16_1[i] != ovector32_1[i] || ovector16_1[i] != ovector32_2[i]) {
1538 printf("\n16 and 32 bit: Ovector[%d] value differs(J16:%d,I16:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
1539 i, ovector16_1[i], ovector16_2[i], ovector32_1[i], ovector32_2[i],
1540 total, current->pattern, current->input);
1541 is_successful = 0;
1542 }
1543 #endif
1544 }
1545 }
1546 } else
1547 #endif /* more than one of SUPPORT_PCRE2_8, SUPPORT_PCRE2_16 and SUPPORT_PCRE2_32 */
1548 {
1549 #ifdef SUPPORT_PCRE2_8
1550 if (return_value8[0] != return_value8[1]) {
1551 printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1552 return_value8[0], return_value8[1], total, current->pattern, current->input);
1553 is_successful = 0;
1554 } else if (return_value8[0] >= 0 || return_value8[0] == PCRE2_ERROR_PARTIAL) {
1555 if (return_value8[0] == PCRE2_ERROR_PARTIAL)
1556 return_value8[0] = 2;
1557 else
1558 return_value8[0] *= 2;
1559
1560 for (i = 0; i < return_value8[0]; ++i)
1561 if (ovector8_1[i] != ovector8_2[i]) {
1562 printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1563 i, (int)ovector8_1[i], (int)ovector8_2[i], total, current->pattern, current->input);
1564 is_successful = 0;
1565 }
1566 }
1567 #endif
1568
1569 #ifdef SUPPORT_PCRE2_16
1570 if (return_value16[0] != return_value16[1]) {
1571 printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1572 return_value16[0], return_value16[1], total, current->pattern, current->input);
1573 is_successful = 0;
1574 } else if (return_value16[0] >= 0 || return_value16[0] == PCRE2_ERROR_PARTIAL) {
1575 if (return_value16[0] == PCRE2_ERROR_PARTIAL)
1576 return_value16[0] = 2;
1577 else
1578 return_value16[0] *= 2;
1579
1580 for (i = 0; i < return_value16[0]; ++i)
1581 if (ovector16_1[i] != ovector16_2[i]) {
1582 printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1583 i, (int)ovector16_1[i], (int)ovector16_2[i], total, current->pattern, current->input);
1584 is_successful = 0;
1585 }
1586 }
1587 #endif
1588
1589 #ifdef SUPPORT_PCRE2_32
1590 if (return_value32[0] != return_value32[1]) {
1591 printf("\n32 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1592 return_value32[0], return_value32[1], total, current->pattern, current->input);
1593 is_successful = 0;
1594 } else if (return_value32[0] >= 0 || return_value32[0] == PCRE2_ERROR_PARTIAL) {
1595 if (return_value32[0] == PCRE2_ERROR_PARTIAL)
1596 return_value32[0] = 2;
1597 else
1598 return_value32[0] *= 2;
1599
1600 for (i = 0; i < return_value32[0]; ++i)
1601 if (ovector32_1[i] != ovector32_2[i]) {
1602 printf("\n32 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1603 i, (int)ovector32_1[i], (int)ovector32_2[i], total, current->pattern, current->input);
1604 is_successful = 0;
1605 }
1606 }
1607 #endif
1608 }
1609 }
1610
1611 if (is_successful) {
1612 #ifdef SUPPORT_PCRE2_8
1613 if (!(current->start_offset & F_NO8) && (utf || is_ascii)) {
1614 if (return_value8[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1615 printf("8 bit: Test should match: [%d] '%s' @ '%s'\n",
1616 total, current->pattern, current->input);
1617 is_successful = 0;
1618 }
1619
1620 if (return_value8[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1621 printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n",
1622 total, current->pattern, current->input);
1623 is_successful = 0;
1624 }
1625 }
1626 #endif
1627 #ifdef SUPPORT_PCRE2_16
1628 if (!(current->start_offset & F_NO16) && (utf || is_ascii)) {
1629 if (return_value16[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1630 printf("16 bit: Test should match: [%d] '%s' @ '%s'\n",
1631 total, current->pattern, current->input);
1632 is_successful = 0;
1633 }
1634
1635 if (return_value16[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1636 printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n",
1637 total, current->pattern, current->input);
1638 is_successful = 0;
1639 }
1640 }
1641 #endif
1642 #ifdef SUPPORT_PCRE2_32
1643 if (!(current->start_offset & F_NO32) && (utf || is_ascii)) {
1644 if (return_value32[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1645 printf("32 bit: Test should match: [%d] '%s' @ '%s'\n",
1646 total, current->pattern, current->input);
1647 is_successful = 0;
1648 }
1649
1650 if (return_value32[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1651 printf("32 bit: Test should not match: [%d] '%s' @ '%s'\n",
1652 total, current->pattern, current->input);
1653 is_successful = 0;
1654 }
1655 }
1656 #endif
1657 }
1658
1659 if (is_successful) {
1660 #ifdef SUPPORT_PCRE2_8
1661 if (re8 && !(current->start_offset & F_NO8) && pcre2_get_mark_8(mdata8_1) != pcre2_get_mark_8(mdata8_2)) {
1662 printf("8 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1663 total, current->pattern, current->input);
1664 is_successful = 0;
1665 }
1666 #endif
1667 #ifdef SUPPORT_PCRE2_16
1668 if (re16 && !(current->start_offset & F_NO16) && pcre2_get_mark_16(mdata16_1) != pcre2_get_mark_16(mdata16_2)) {
1669 printf("16 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1670 total, current->pattern, current->input);
1671 is_successful = 0;
1672 }
1673 #endif
1674 #ifdef SUPPORT_PCRE2_32
1675 if (re32 && !(current->start_offset & F_NO32) && pcre2_get_mark_32(mdata32_1) != pcre2_get_mark_32(mdata32_2)) {
1676 printf("32 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1677 total, current->pattern, current->input);
1678 is_successful = 0;
1679 }
1680 #endif
1681 }
1682
1683 #ifdef SUPPORT_PCRE2_8
1684 pcre2_code_free_8(re8);
1685 pcre2_match_data_free_8(mdata8_1);
1686 pcre2_match_data_free_8(mdata8_2);
1687 pcre2_match_context_free_8(mcontext8);
1688 #endif
1689 #ifdef SUPPORT_PCRE2_16
1690 pcre2_code_free_16(re16);
1691 pcre2_match_data_free_16(mdata16_1);
1692 pcre2_match_data_free_16(mdata16_2);
1693 pcre2_match_context_free_16(mcontext16);
1694 #endif
1695 #ifdef SUPPORT_PCRE2_32
1696 pcre2_code_free_32(re32);
1697 pcre2_match_data_free_32(mdata32_1);
1698 pcre2_match_data_free_32(mdata32_2);
1699 pcre2_match_context_free_32(mcontext32);
1700 #endif
1701
1702 if (is_successful) {
1703 successful++;
1704 successful_row++;
1705 printf(".");
1706 if (successful_row >= 60) {
1707 successful_row = 0;
1708 printf("\n");
1709 }
1710 } else
1711 successful_row = 0;
1712
1713 fflush(stdout);
1714 current++;
1715 }
1716 #ifdef SUPPORT_PCRE2_8
1717 setstack8(NULL);
1718 #endif
1719 #ifdef SUPPORT_PCRE2_16
1720 setstack16(NULL);
1721 #endif
1722 #ifdef SUPPORT_PCRE2_32
1723 setstack32(NULL);
1724 #endif
1725
1726 if (total == successful) {
1727 printf("\nAll JIT regression tests are successfully passed.\n");
1728 return 0;
1729 } else {
1730 printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
1731 return 1;
1732 }
1733 }
1734
1735 /* End of pcre2_jit_test.c */
1736