• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*************************************************
2 *      Perl-Compatible Regular Expressions       *
3 *************************************************/
4 
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7 
8                        Written by Philip Hazel
9      Original API code Copyright (c) 1997-2012 University of Cambridge
10          New API code Copyright (c) 2016 University of Cambridge
11 
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
15 
16     * Redistributions of source code must retain the above copyright notice,
17       this list of conditions and the following disclaimer.
18 
19     * Redistributions in binary form must reproduce the above copyright
20       notice, this list of conditions and the following disclaimer in the
21       documentation and/or other materials provided with the distribution.
22 
23     * Neither the name of the University of Cambridge nor the names of its
24       contributors may be used to endorse or promote products derived from
25       this software without specific prior written permission.
26 
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
39 */
40 
41 #ifdef HAVE_CONFIG_H
42 #include "config.h"
43 #endif
44 
45 #include <stdio.h>
46 #include <string.h>
47 
48 #define PCRE2_CODE_UNIT_WIDTH 0
49 #include "pcre2.h"
50 
51 /*
52  Letter characters:
53    \xe6\x92\xad = 0x64ad = 25773 (kanji)
54  Non-letter characters:
55    \xc2\xa1 = 0xa1 =  (Inverted Exclamation Mark)
56    \xf3\xa9\xb7\x80 = 0xe9dc0 = 957888
57    \xed\xa0\x80 = 55296 = 0xd800 (Invalid UTF character)
58    \xed\xb0\x80 = 56320 = 0xdc00 (Invalid UTF character)
59  Newlines:
60    \xc2\x85 = 0x85 = 133 (NExt Line = NEL)
61    \xe2\x80\xa8 = 0x2028 = 8232 (Line Separator)
62  Othercase pairs:
63    \xc3\xa9 = 0xe9 = 233 (e')
64       \xc3\x89 = 0xc9 = 201 (E')
65    \xc3\xa1 = 0xe1 = 225 (a')
66       \xc3\x81 = 0xc1 = 193 (A')
67    \x53 = 0x53 = S
68      \x73 = 0x73 = s
69      \xc5\xbf = 0x17f = 383 (long S)
70    \xc8\xba = 0x23a = 570
71       \xe2\xb1\xa5 = 0x2c65 = 11365
72    \xe1\xbd\xb8 = 0x1f78 = 8056
73       \xe1\xbf\xb8 = 0x1ff8 = 8184
74    \xf0\x90\x90\x80 = 0x10400 = 66560
75       \xf0\x90\x90\xa8 = 0x10428 = 66600
76    \xc7\x84 = 0x1c4 = 452
77      \xc7\x85 = 0x1c5 = 453
78      \xc7\x86 = 0x1c6 = 454
79  Caseless sets:
80    ucp_Armenian - \x{531}-\x{556} -> \x{561}-\x{586}
81    ucp_Coptic - \x{2c80}-\x{2ce3} -> caseless: XOR 0x1
82    ucp_Latin - \x{ff21}-\x{ff3a} -> \x{ff41]-\x{ff5a}
83 
84  Mark property:
85    \xcc\x8d = 0x30d = 781
86  Special:
87    \xc2\x80 = 0x80 = 128 (lowest 2 byte character)
88    \xdf\xbf = 0x7ff = 2047 (highest 2 byte character)
89    \xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character)
90    \xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character)
91    \xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character)
92    \xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character)
93 */
94 
95 static int regression_tests(void);
96 static int invalid_utf8_regression_tests(void);
97 static int invalid_utf16_regression_tests(void);
98 static int invalid_utf32_regression_tests(void);
99 
main(void)100 int main(void)
101 {
102 	int jit = 0;
103 #if defined SUPPORT_PCRE2_8
104 	pcre2_config_8(PCRE2_CONFIG_JIT, &jit);
105 #elif defined SUPPORT_PCRE2_16
106 	pcre2_config_16(PCRE2_CONFIG_JIT, &jit);
107 #elif defined SUPPORT_PCRE2_32
108 	pcre2_config_32(PCRE2_CONFIG_JIT, &jit);
109 #endif
110 	if (!jit) {
111 		printf("JIT must be enabled to run pcre_jit_test\n");
112 		return 1;
113 	}
114 	return regression_tests()
115 		| invalid_utf8_regression_tests()
116 		| invalid_utf16_regression_tests()
117 		| invalid_utf32_regression_tests();
118 }
119 
120 /* --------------------------------------------------------------------------------------- */
121 
122 #if !(defined SUPPORT_PCRE2_8) && !(defined SUPPORT_PCRE2_16) && !(defined SUPPORT_PCRE2_32)
123 #error SUPPORT_PCRE2_8 or SUPPORT_PCRE2_16 or SUPPORT_PCRE2_32 must be defined
124 #endif
125 
126 #define MU	(PCRE2_MULTILINE | PCRE2_UTF)
127 #define MUP	(PCRE2_MULTILINE | PCRE2_UTF | PCRE2_UCP)
128 #define CMU	(PCRE2_CASELESS | PCRE2_MULTILINE | PCRE2_UTF)
129 #define CMUP	(PCRE2_CASELESS | PCRE2_MULTILINE | PCRE2_UTF | PCRE2_UCP)
130 #define M	(PCRE2_MULTILINE)
131 #define MP	(PCRE2_MULTILINE | PCRE2_UCP)
132 #define U	(PCRE2_UTF)
133 #define CM	(PCRE2_CASELESS | PCRE2_MULTILINE)
134 
135 #define BSR(x)	((x) << 16)
136 #define A	PCRE2_NEWLINE_ANYCRLF
137 
138 #define GET_NEWLINE(x)	((x) & 0xffff)
139 #define GET_BSR(x)	((x) >> 16)
140 
141 #define OFFSET_MASK	0x00ffff
142 #define F_NO8		0x010000
143 #define F_NO16		0x020000
144 #define F_NO32		0x020000
145 #define F_NOMATCH	0x040000
146 #define F_DIFF		0x080000
147 #define F_FORCECONV	0x100000
148 #define F_PROPERTY	0x200000
149 
150 struct regression_test_case {
151 	int compile_options;
152 	int newline;
153 	int match_options;
154 	int start_offset;
155 	const char *pattern;
156 	const char *input;
157 };
158 
159 static struct regression_test_case regression_test_cases[] = {
160 	/* Constant strings. */
161 	{ MU, A, 0, 0, "AbC", "AbAbC" },
162 	{ MU, A, 0, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
163 	{ CMU, A, 0, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
164 	{ M, A, 0, 0, "[^a]", "aAbB" },
165 	{ CM, A, 0, 0, "[^m]", "mMnN" },
166 	{ M, A, 0, 0, "a[^b][^#]", "abacd" },
167 	{ CM, A, 0, 0, "A[^B][^E]", "abacd" },
168 	{ CMU, A, 0, 0, "[^x][^#]", "XxBll" },
169 	{ MU, A, 0, 0, "[^a]", "aaa\xc3\xa1#Ab" },
170 	{ CMU, A, 0, 0, "[^A]", "aA\xe6\x92\xad" },
171 	{ MU, A, 0, 0, "\\W(\\W)?\\w", "\r\n+bc" },
172 	{ MU, A, 0, 0, "\\W(\\W)?\\w", "\n\r+bc" },
173 	{ MU, A, 0, 0, "\\W(\\W)?\\w", "\r\r+bc" },
174 	{ MU, A, 0, 0, "\\W(\\W)?\\w", "\n\n+bc" },
175 	{ MU, A, 0, 0, "[axd]", "sAXd" },
176 	{ CMU, A, 0, 0, "[axd]", "sAXd" },
177 	{ CMU, A, 0, 0 | F_NOMATCH, "[^axd]", "DxA" },
178 	{ MU, A, 0, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
179 	{ MU, A, 0, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
180 	{ CMU, A, 0, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
181 	{ MU, A, 0, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
182 	{ MU, A, 0, 0, "[^a]", "\xc2\x80[]" },
183 	{ CMU, A, 0, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
184 	{ CM, A, 0, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
185 	{ PCRE2_CASELESS, 0, 0, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
186 	{ PCRE2_CASELESS, 0, 0, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
187 	{ PCRE2_CASELESS, 0, 0, 0, "a1", "Aa1" },
188 #ifndef NEVER_BACKSLASH_C
189 	{ M, A, 0, 0, "\\Ca", "cda" },
190 	{ CM, A, 0, 0, "\\Ca", "CDA" },
191 	{ M, A, 0, 0 | F_NOMATCH, "\\Cx", "cda" },
192 	{ CM, A, 0, 0 | F_NOMATCH, "\\Cx", "CDA" },
193 #endif /* !NEVER_BACKSLASH_C */
194 	{ CMUP, A, 0, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
195 	{ CMUP, A, 0, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
196 	{ CMUP, A, 0, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
197 	{ CMUP, A, 0, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
198 	{ M, A, 0, 0, "[3-57-9]", "5" },
199 
200 	/* Assertions. */
201 	{ MU, A, 0, 0, "\\b[^A]", "A_B#" },
202 	{ M, A, 0, 0 | F_NOMATCH, "\\b\\W", "\n*" },
203 	{ MU, A, 0, 0, "\\B[^,]\\b[^s]\\b", "#X" },
204 	{ MP, A, 0, 0, "\\B", "_\xa1" },
205 	{ MP, A, 0, 0 | F_PROPERTY, "\\b_\\b[,A]\\B", "_," },
206 	{ MUP, A, 0, 0, "\\b", "\xe6\x92\xad!" },
207 	{ MUP, A, 0, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
208 	{ MUP, A, 0, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
209 	{ MUP, A, 0, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
210 	{ MU, A, 0, 0 | F_NOMATCH, "\\b.", "\xcd\xbe" },
211 	{ CMUP, A, 0, 0, "\\By", "\xf0\x90\x90\xa8y" },
212 	{ M, A, 0, 0 | F_NOMATCH, "\\R^", "\n" },
213 	{ M, A, 0, 1 | F_NOMATCH, "^", "\n" },
214 	{ 0, 0, 0, 0, "^ab", "ab" },
215 	{ 0, 0, 0, 0 | F_NOMATCH, "^ab", "aab" },
216 	{ M, PCRE2_NEWLINE_CRLF, 0, 0, "^a", "\r\raa\n\naa\r\naa" },
217 	{ MU, A, 0, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
218 	{ M, PCRE2_NEWLINE_ANY, 0, 0, "^-", "a--b--\x85--" },
219 	{ MU, PCRE2_NEWLINE_ANY, 0, 0, "^-", "a--\xe2\x80\xa8--" },
220 	{ MU, PCRE2_NEWLINE_ANY, 0, 0, "^-", "a--\xc2\x85--" },
221 	{ 0, 0, 0, 0, "ab$", "ab" },
222 	{ 0, 0, 0, 0 | F_NOMATCH, "ab$", "abab\n\n" },
223 	{ PCRE2_DOLLAR_ENDONLY, 0, 0, 0 | F_NOMATCH, "ab$", "abab\r\n" },
224 	{ M, PCRE2_NEWLINE_CRLF, 0, 0, "a$", "\r\raa\n\naa\r\naa" },
225 	{ M, PCRE2_NEWLINE_ANY, 0, 0, "a$", "aaa" },
226 	{ MU, PCRE2_NEWLINE_ANYCRLF, 0, 0, "#$", "#\xc2\x85###\r#" },
227 	{ MU, PCRE2_NEWLINE_ANY, 0, 0, "#$", "#\xe2\x80\xa9" },
228 	{ 0, PCRE2_NEWLINE_ANY, PCRE2_NOTBOL, 0 | F_NOMATCH, "^a", "aa\naa" },
229 	{ M, PCRE2_NEWLINE_ANY, PCRE2_NOTBOL, 0, "^a", "aa\naa" },
230 	{ 0, PCRE2_NEWLINE_ANY, PCRE2_NOTEOL, 0 | F_NOMATCH, "a$", "aa\naa" },
231 	{ 0, PCRE2_NEWLINE_ANY, PCRE2_NOTEOL, 0 | F_NOMATCH, "a$", "aa\r\n" },
232 	{ U | PCRE2_DOLLAR_ENDONLY, PCRE2_NEWLINE_ANY, 0, 0 | F_PROPERTY, "\\p{Any}{2,}$", "aa\r\n" },
233 	{ M, PCRE2_NEWLINE_ANY, PCRE2_NOTEOL, 0, "a$", "aa\naa" },
234 	{ 0, PCRE2_NEWLINE_CR, 0, 0, ".\\Z", "aaa" },
235 	{ U, PCRE2_NEWLINE_CR, 0, 0, "a\\Z", "aaa\r" },
236 	{ 0, PCRE2_NEWLINE_CR, 0, 0, ".\\Z", "aaa\n" },
237 	{ 0, PCRE2_NEWLINE_CRLF, 0, 0, ".\\Z", "aaa\r" },
238 	{ U, PCRE2_NEWLINE_CRLF, 0, 0, ".\\Z", "aaa\n" },
239 	{ 0, PCRE2_NEWLINE_CRLF, 0, 0, ".\\Z", "aaa\r\n" },
240 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa" },
241 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r" },
242 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\n" },
243 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r\n" },
244 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\xe2\x80\xa8" },
245 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa" },
246 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r" },
247 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\n" },
248 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r\n" },
249 	{ U, PCRE2_NEWLINE_ANY, 0, 0, ".\\Z", "aaa\xc2\x85" },
250 	{ U, PCRE2_NEWLINE_ANY, 0, 0, ".\\Z", "aaa\xe2\x80\xa8" },
251 	{ M, A, 0, 0, "\\Aa", "aaa" },
252 	{ M, A, 0, 1 | F_NOMATCH, "\\Aa", "aaa" },
253 	{ M, A, 0, 1, "\\Ga", "aaa" },
254 	{ M, A, 0, 1 | F_NOMATCH, "\\Ga", "aba" },
255 	{ M, A, 0, 0, "a\\z", "aaa" },
256 	{ M, A, 0, 0 | F_NOMATCH, "a\\z", "aab" },
257 
258 	/* Brackets and alternatives. */
259 	{ MU, A, 0, 0, "(ab|bb|cd)", "bacde" },
260 	{ MU, A, 0, 0, "(?:ab|a)(bc|c)", "ababc" },
261 	{ MU, A, 0, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
262 	{ CMU, A, 0, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
263 	{ MU, A, 0, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
264 	{ MU, A, 0, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
265 	{ MU, A, 0, 0, "\xc7\x82|\xc6\x82", "\xf1\x83\x82\x82\xc7\x82\xc7\x83" },
266 	{ MU, A, 0, 0, "=\xc7\x82|#\xc6\x82", "\xf1\x83\x82\x82=\xc7\x82\xc7\x83" },
267 	{ MU, A, 0, 0, "\xc7\x82\xc7\x83|\xc6\x82\xc6\x82", "\xf1\x83\x82\x82\xc7\x82\xc7\x83" },
268 	{ MU, A, 0, 0, "\xc6\x82\xc6\x82|\xc7\x83\xc7\x83|\xc8\x84\xc8\x84", "\xf1\x83\x82\x82\xc8\x84\xc8\x84" },
269 	{ U, A, 0, 0, "\xe1\x81\x80|\xe2\x82\x80|\xe4\x84\x80", "\xdf\xbf\xc2\x80\xe4\x84\x80" },
270 	{ U, A, 0, 0, "(?:\xe1\x81\x80|\xe2\x82\x80|\xe4\x84\x80)#", "\xdf\xbf\xc2\x80#\xe4\x84\x80#" },
271 	{ CM, A, 0, 0, "ab|cd", "CD" },
272 	{ CM, A, 0, 0, "a1277|a1377|bX487", "bx487" },
273 	{ CM, A, 0, 0, "a1277|a1377|bx487", "bX487" },
274 
275 	/* Greedy and non-greedy ? operators. */
276 	{ MU, A, 0, 0, "(?:a)?a", "laab" },
277 	{ CMU, A, 0, 0, "(A)?A", "llaab" },
278 	{ MU, A, 0, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
279 	{ MU, A, 0, 0, "(a)?a", "manm" },
280 	{ CMU, A, 0, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
281 	{ MU, A, 0, 0, "(a|b)?\?d((?:e)?)", "abcde" },
282 	{ MU, A, 0, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
283 
284 	/* Greedy and non-greedy + operators */
285 	{ MU, A, 0, 0, "(aa)+aa", "aaaaaaa" },
286 	{ MU, A, 0, 0, "(aa)+?aa", "aaaaaaa" },
287 	{ MU, A, 0, 0, "(?:aba|ab|a)+l", "ababamababal" },
288 	{ MU, A, 0, 0, "(?:aba|ab|a)+?l", "ababamababal" },
289 	{ MU, A, 0, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
290 	{ MU, A, 0, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
291 	{ MU, A, 0, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
292 
293 	/* Greedy and non-greedy * operators */
294 	{ CMU, A, 0, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
295 	{ MU, A, 0, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
296 	{ MU, A, 0, 0, "(aa|ab)*ab", "aaabaaab" },
297 	{ CMU, A, 0, 0, "(aa|Ab)*?aB", "aaabaaab" },
298 	{ MU, A, 0, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
299 	{ MU, A, 0, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
300 	{ M, A, 0, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
301 	{ M, A, 0, 0, "((?:a|)*){0}a", "a" },
302 
303 	/* Combining ? + * operators */
304 	{ MU, A, 0, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
305 	{ MU, A, 0, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
306 	{ MU, A, 0, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
307 	{ MU, A, 0, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
308 	{ MU, A, 0, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
309 
310 	/* Single character iterators. */
311 	{ MU, A, 0, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
312 	{ MU, A, 0, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
313 	{ MU, A, 0, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
314 	{ MU, A, 0, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
315 	{ MU, A, 0, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
316 	{ MU, A, 0, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
317 	{ MU, A, 0, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
318 	{ MU, A, 0, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
319 	{ MU, A, 0, 0, "(ba{2})+c", "baabaaabacbaabaac" },
320 	{ MU, A, 0, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
321 	{ MU, A, 0, 0, "(a?+[^b])+", "babaacacb" },
322 	{ MU, A, 0, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
323 	{ CMU, A, 0, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
324 	{ CMU, A, 0, 0, "[c-f]+k", "DemmFke" },
325 	{ MU, A, 0, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
326 	{ MU, A, 0, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
327 	{ CMU, A, 0, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
328 	{ CMU, A, 0, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
329 	{ CMU, A, 0, 0, "[ace]{3,}", "AcbDAcEEcEd" },
330 	{ CMU, A, 0, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
331 	{ MU, A, 0, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
332 	{ CMU, A, 0, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
333 	{ MU, A, 0, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
334 	{ MU, A, 0, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
335 	{ MU, A, 0, 0, "\\b\\w+\\B", "x,a_cd" },
336 	{ MUP, A, 0, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
337 	{ CMU, A, 0, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
338 	{ CMUP, A, 0, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" },
339 	{ CMU, A, 0, 0, "[^\xf0\x90\x90\x80]{2,4}@", "\xf0\x90\x90\xa8\xf0\x90\x90\x80###\xf0\x90\x90\x80@@@" },
340 	{ CMU, A, 0, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
341 	{ MU, A, 0, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
342 	{ MU, A, 0, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" },
343 	{ MU, A, 0, 0, "\\d+123", "987654321,01234" },
344 	{ MU, A, 0, 0, "abcd*|\\w+xy", "aaaaa,abxyz" },
345 	{ MU, A, 0, 0, "(?:abc|((?:amc|\\b\\w*xy)))", "aaaaa,abxyz" },
346 	{ MU, A, 0, 0, "a(?R)|([a-z]++)#", ".abcd.abcd#."},
347 	{ MU, A, 0, 0, "a(?R)|([a-z]++)#", ".abcd.mbcd#."},
348 	{ MU, A, 0, 0, ".[ab]*.", "xx" },
349 	{ MU, A, 0, 0, ".[ab]*a", "xxa" },
350 	{ MU, A, 0, 0, ".[ab]?.", "xx" },
351 
352 	/* Bracket repeats with limit. */
353 	{ MU, A, 0, 0, "(?:(ab){2}){5}M", "abababababababababababM" },
354 	{ MU, A, 0, 0, "(?:ab|abab){1,5}M", "abababababababababababM" },
355 	{ MU, A, 0, 0, "(?>ab|abab){1,5}M", "abababababababababababM" },
356 	{ MU, A, 0, 0, "(?:ab|abab){1,5}?M", "abababababababababababM" },
357 	{ MU, A, 0, 0, "(?>ab|abab){1,5}?M", "abababababababababababM" },
358 	{ MU, A, 0, 0, "(?:(ab){1,4}?){1,3}?M", "abababababababababababababM" },
359 	{ MU, A, 0, 0, "(?:(ab){1,4}){1,3}abababababababababababM", "ababababababababababababM" },
360 	{ MU, A, 0, 0 | F_NOMATCH, "(?:(ab){1,4}){1,3}abababababababababababM", "abababababababababababM" },
361 	{ MU, A, 0, 0, "(ab){4,6}?M", "abababababababM" },
362 
363 	/* Basic character sets. */
364 	{ MU, A, 0, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
365 	{ MU, A, 0, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
366 	{ MU, A, 0, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
367 	{ MU, A, 0, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
368 	{ MU, A, 0, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
369 	{ MU, A, 0, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
370 	{ MU, A, 0, 0, "x[bcef]+", "xaxdxecbfg" },
371 	{ MU, A, 0, 0, "x[bcdghij]+", "xaxexfxdgbjk" },
372 	{ MU, A, 0, 0, "x[^befg]+", "xbxexacdhg" },
373 	{ MU, A, 0, 0, "x[^bcdl]+", "xlxbxaekmd" },
374 	{ MU, A, 0, 0, "x[^bcdghi]+", "xbxdxgxaefji" },
375 	{ MU, A, 0, 0, "x[B-Fb-f]+", "xaxAxgxbfBFG" },
376 	{ CMU, A, 0, 0, "\\x{e9}+", "#\xf0\x90\x90\xa8\xc3\xa8\xc3\xa9\xc3\x89\xc3\x88" },
377 	{ CMU, A, 0, 0, "[^\\x{e9}]+", "\xc3\xa9#\xf0\x90\x90\xa8\xc3\xa8\xc3\x88\xc3\x89" },
378 	{ MU, A, 0, 0, "[\\x02\\x7e]+", "\xc3\x81\xe1\xbf\xb8\xf0\x90\x90\xa8\x01\x02\x7e\x7f" },
379 	{ MU, A, 0, 0, "[^\\x02\\x7e]+", "\x02\xc3\x81\xe1\xbf\xb8\xf0\x90\x90\xa8\x01\x7f\x7e" },
380 	{ MU, A, 0, 0, "[\\x{81}-\\x{7fe}]+", "#\xe1\xbf\xb8\xf0\x90\x90\xa8\xc2\x80\xc2\x81\xdf\xbe\xdf\xbf" },
381 	{ MU, A, 0, 0, "[^\\x{81}-\\x{7fe}]+", "\xc2\x81#\xe1\xbf\xb8\xf0\x90\x90\xa8\xc2\x80\xdf\xbf\xdf\xbe" },
382 	{ MU, A, 0, 0, "[\\x{801}-\\x{fffe}]+", "#\xc3\xa9\xf0\x90\x90\x80\xe0\xa0\x80\xe0\xa0\x81\xef\xbf\xbe\xef\xbf\xbf" },
383 	{ MU, A, 0, 0, "[^\\x{801}-\\x{fffe}]+", "\xe0\xa0\x81#\xc3\xa9\xf0\x90\x90\x80\xe0\xa0\x80\xef\xbf\xbf\xef\xbf\xbe" },
384 	{ MU, A, 0, 0, "[\\x{10001}-\\x{10fffe}]+", "#\xc3\xa9\xe2\xb1\xa5\xf0\x90\x80\x80\xf0\x90\x80\x81\xf4\x8f\xbf\xbe\xf4\x8f\xbf\xbf" },
385 	{ MU, A, 0, 0, "[^\\x{10001}-\\x{10fffe}]+", "\xf0\x90\x80\x81#\xc3\xa9\xe2\xb1\xa5\xf0\x90\x80\x80\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbe" },
386 	{ CMU, A, 0, 0 | F_NOMATCH, "^[\\x{0100}-\\x{017f}]", " " },
387 
388 	/* Unicode properties. */
389 	{ MUP, A, 0, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
390 	{ MUP, A, 0, 0 | F_PROPERTY, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
391 	{ MUP, A, 0, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
392 	{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}]", "abc" },
393 	{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}]", "abc" },
394 	{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
395 	{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
396 	{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
397 	{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
398 	{ MUP, A, 0, 0 | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
399 	{ MUP, A, 0, 0 | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
400 	{ MUP, A, 0, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
401 	{ CMUP, A, 0, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
402 	{ MUP, A, 0, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
403 	{ MUP, A, 0, 0 | F_PROPERTY, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
404 	{ MU, A, 0, 0 | F_PROPERTY, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
405 	{ CMUP, A, 0, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
406 	{ MUP, A, 0, 0 | F_PROPERTY, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
407 	{ MUP, A, 0, 0 | F_PROPERTY, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
408 	{ PCRE2_UCP, 0, 0, 0 | F_PROPERTY, "[a-b\\s]{2,5}[^a]", "AB  baaa" },
409 
410 	/* Possible empty brackets. */
411 	{ MU, A, 0, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
412 	{ MU, A, 0, 0, "(|ab||bc|a)+d", "abcxabcabd" },
413 	{ MU, A, 0, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
414 	{ MU, A, 0, 0, "(|ab||bc|a)*d", "abcxabcabd" },
415 	{ MU, A, 0, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
416 	{ MU, A, 0, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
417 	{ MU, A, 0, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
418 	{ MU, A, 0, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
419 	{ MU, A, 0, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
420 	{ MU, A, 0, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
421 
422 	/* Start offset. */
423 	{ MU, A, 0, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
424 	{ MU, A, 0, 4 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
425 	{ MU, A, 0, 2 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
426 	{ MU, A, 0, 1, "(\\w\\W\\w)+", "ab#d" },
427 
428 	/* Newline. */
429 	{ M, PCRE2_NEWLINE_CRLF, 0, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
430 	{ M, PCRE2_NEWLINE_CR, 0, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
431 	{ M, PCRE2_NEWLINE_CRLF, 0, 0, "\\W{1,3}[^#]", "\r\n##...." },
432 	{ MU, A, PCRE2_NO_UTF_CHECK, 1, "^.a", "\n\x80\nxa" },
433 	{ MU, A, 0, 1, "^", "\r\n" },
434 	{ M, PCRE2_NEWLINE_CRLF, 0, 1 | F_NOMATCH, "^", "\r\n" },
435 	{ M, PCRE2_NEWLINE_CRLF, 0, 1, "^", "\r\na" },
436 
437 	/* Any character except newline or any newline. */
438 	{ 0, PCRE2_NEWLINE_CRLF, 0, 0, ".", "\r" },
439 	{ U, PCRE2_NEWLINE_CRLF, 0, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
440 	{ 0, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
441 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
442 	{ U, PCRE2_NEWLINE_ANY, 0, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
443 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0 | F_NOMATCH, ".(.).", "\xe2\x80\xa8\nb\r" },
444 	{ 0, PCRE2_NEWLINE_ANY, 0, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
445 	{ U, PCRE2_NEWLINE_ANY, 0, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
446 	{ 0, BSR(PCRE2_BSR_ANYCRLF), 0, 0, "\\R", "\r" },
447 	{ 0, BSR(PCRE2_BSR_ANYCRLF), 0, 0, "\\R", "\x85#\r\n#" },
448 	{ U, BSR(PCRE2_BSR_UNICODE), 0, 0, "\\R", "ab\xe2\x80\xa8#c" },
449 	{ U, BSR(PCRE2_BSR_UNICODE), 0, 0, "\\R", "ab\r\nc" },
450 	{ U, PCRE2_NEWLINE_CRLF | BSR(PCRE2_BSR_UNICODE), 0, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
451 	{ MU, A, 0, 0 | F_NOMATCH, "\\R+", "ab" },
452 	{ MU, A, 0, 0, "\\R+", "ab\r\n\r" },
453 	{ MU, A, 0, 0, "\\R*", "ab\r\n\r" },
454 	{ MU, A, 0, 0, "\\R*", "\r\n\r" },
455 	{ MU, A, 0, 0, "\\R{2,4}", "\r\nab\r\r" },
456 	{ MU, A, 0, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
457 	{ MU, A, 0, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
458 	{ MU, A, 0, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
459 	{ MU, A, 0, 0 | F_NOMATCH, "\\R+\\R\\R", "\r\n\r\n" },
460 	{ MU, A, 0, 0, "\\R+\\R\\R", "\r\r\r" },
461 	{ MU, A, 0, 0, "\\R*\\R\\R", "\n\r" },
462 	{ MU, A, 0, 0 | F_NOMATCH, "\\R{2,4}\\R\\R", "\r\r\r" },
463 	{ MU, A, 0, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
464 
465 	/* Atomic groups (no fallback from "next" direction). */
466 	{ MU, A, 0, 0 | F_NOMATCH, "(?>ab)ab", "bab" },
467 	{ MU, A, 0, 0 | F_NOMATCH, "(?>(ab))ab", "bab" },
468 	{ MU, A, 0, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
469 			"bababcdedefgheijijklmlmnop" },
470 	{ MU, A, 0, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
471 	{ MU, A, 0, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
472 	{ MU, A, 0, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
473 	{ MU, A, 0, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
474 	{ MU, A, 0, 0, "((?>a|)+?)b", "aaacaaab" },
475 	{ MU, A, 0, 0, "(?>x|)*$", "aaa" },
476 	{ MU, A, 0, 0, "(?>(x)|)*$", "aaa" },
477 	{ MU, A, 0, 0, "(?>x|())*$", "aaa" },
478 	{ MU, A, 0, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
479 	{ MU, A, 0, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
480 	{ MU, A, 0, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
481 	{ MU, A, 0, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
482 	{ MU, A, 0, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
483 	{ MU, A, 0, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
484 	{ MU, A, 0, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
485 	{ MU, A, 0, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
486 	{ MU, A, 0, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
487 	{ MU, A, 0, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
488 	{ MU, A, 0, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
489 	{ MU, A, 0, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
490 	{ MU, A, 0, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
491 	{ MU, A, 0, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
492 	{ CM, A, 0, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
493 	{ MU, A, 0, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
494 	{ MU, A, 0, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
495 	{ MU, A, 0, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d" },
496 	{ MU, A, 0, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
497 	{ MU, A, 0, 0 | F_PROPERTY, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
498 	{ MU, A, 0, 0 | F_PROPERTY, "\\X{2,4}", "abcdef" },
499 	{ MU, A, 0, 0 | F_PROPERTY, "\\X{2,4}?", "abcdef" },
500 	{ MU, A, 0, 0 | F_NOMATCH | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d##" },
501 	{ MU, A, 0, 0 | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
502 	{ MU, A, 0, 0, "(c(ab)?+ab)+", "cabcababcab" },
503 	{ MU, A, 0, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
504 
505 	/* Possessive quantifiers. */
506 	{ MU, A, 0, 0, "(?:a|b)++m", "mababbaaxababbaam" },
507 	{ MU, A, 0, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
508 	{ MU, A, 0, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
509 	{ MU, A, 0, 0, "(a|b)++m", "mababbaaxababbaam" },
510 	{ MU, A, 0, 0, "(a|b)*+m", "mababbaaxababbaam" },
511 	{ MU, A, 0, 0, "(a|b)*+m", "ababbaaxababbaam" },
512 	{ MU, A, 0, 0, "(a|b(*ACCEPT))++m", "maaxab" },
513 	{ MU, A, 0, 0, "(?:b*)++m", "bxbbxbbbxm" },
514 	{ MU, A, 0, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
515 	{ MU, A, 0, 0, "(?:b*)*+m", "bxbbxbbbxm" },
516 	{ MU, A, 0, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
517 	{ MU, A, 0, 0, "(b*)++m", "bxbbxbbbxm" },
518 	{ MU, A, 0, 0, "(b*)++m", "bxbbxbbbxbbm" },
519 	{ MU, A, 0, 0, "(b*)*+m", "bxbbxbbbxm" },
520 	{ MU, A, 0, 0, "(b*)*+m", "bxbbxbbbxbbm" },
521 	{ MU, A, 0, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
522 	{ MU, A, 0, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
523 	{ MU, A, 0, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
524 	{ MU, A, 0, 0, "(a|(b))++m", "mababbaaxababbaam" },
525 	{ MU, A, 0, 0, "((a)|b)*+m", "mababbaaxababbaam" },
526 	{ MU, A, 0, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
527 	{ MU, A, 0, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
528 	{ MU, A, 0, 0, "(?:(b*))++m", "bxbbxbbbxm" },
529 	{ MU, A, 0, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
530 	{ MU, A, 0, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
531 	{ MU, A, 0, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
532 	{ MU, A, 0, 0, "((b*))++m", "bxbbxbbbxm" },
533 	{ MU, A, 0, 0, "((b*))++m", "bxbbxbbbxbbm" },
534 	{ MU, A, 0, 0, "((b*))*+m", "bxbbxbbbxm" },
535 	{ MU, A, 0, 0, "((b*))*+m", "bxbbxbbbxbbm" },
536 	{ MU, A, 0, 0 | F_NOMATCH, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
537 	{ MU, A, 0, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
538 	{ MU, A, 0, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
539 	{ MU, A, 0, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
540 	{ MU, A, 0, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
541 
542 	/* Back references. */
543 	{ MU, A, 0, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
544 	{ CMU, A, 0, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
545 	{ CM, A, 0, 0, "(a{2,4})\\1", "AaAaaAaA" },
546 	{ MU, A, 0, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
547 	{ MU, A, 0, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
548 	{ MU, A, 0, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
549 	{ MU, A, 0, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
550 	{ MU, A, 0, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
551 	{ MU, A, 0, 0, "(?:(aa)|b)\\1?b", "bb" },
552 	{ CMU, A, 0, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
553 	{ MU, A, 0, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
554 	{ CMU, A, 0, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
555 	{ MU, A, 0, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
556 	{ CM, A, 0, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
557 	{ MU, A, 0, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
558 	{ MU, A, 0, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
559 	{ M, A, 0, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
560 	{ MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
561 	{ MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{0,2}", "wwwww." },
562 	{ MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwww" },
563 	{ MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwwww" },
564 	{ PCRE2_UCP, 0, 0, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
565 	{ CMUP, A, 0, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" },
566 	{ MU | PCRE2_DUPNAMES, A, 0, 0 | F_NOMATCH, "\\k<A>{1,3}(?<A>aa)(?<A>bb)", "aabb" },
567 	{ MU | PCRE2_DUPNAMES | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\k<A>{1,3}(?<A>aa)(?<A>bb)", "aabb" },
568 	{ MU | PCRE2_DUPNAMES | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\k<A>*(?<A>aa)(?<A>bb)", "aabb" },
569 	{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?<A>aa)(?<A>bb)\\k<A>{0,3}aaaaaa", "aabbaaaaaa" },
570 	{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?<A>aa)(?<A>bb)\\k<A>{2,5}bb", "aabbaaaabb" },
571 	{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{0,3}m", "aaaaaaaabbbbaabbbbm" },
572 	{ MU | PCRE2_DUPNAMES, A, 0, 0 | F_NOMATCH, "\\k<A>{1,3}?(?<A>aa)(?<A>bb)", "aabb" },
573 	{ MU | PCRE2_DUPNAMES | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\k<A>{1,3}?(?<A>aa)(?<A>bb)", "aabb" },
574 	{ MU | PCRE2_DUPNAMES, A, 0, 0, "\\k<A>*?(?<A>aa)(?<A>bb)", "aabb" },
575 	{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{0,3}?m", "aaaaaabbbbbbaabbbbbbbbbbm" },
576 	{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>*?m", "aaaaaabbbbbbaabbbbbbbbbbm" },
577 	{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{2,3}?", "aaaabbbbaaaabbbbbbbbbb" },
578 	{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}M", "aaaaaaaabbbbaabbbbm" },
579 	{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{1,3}M", "aaaaaaaabbbbaabbbbm" },
580 	{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}?M", "aaaaaabbbbbbaabbbbbbbbbbm" },
581 	{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{2,3}?", "aaaabbbbaaaabbbbbbbbbb" },
582 
583 	/* Assertions. */
584 	{ MU, A, 0, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
585 	{ MU, A, 0, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
586 	{ MU, A, 0, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
587 	{ MU, A, 0, 0, "(?<=aaa|aa|a)a", "aaa" },
588 	{ MU, A, 0, 2, "(?<=aaa|aa|a)a", "aaa" },
589 	{ M, A, 0, 0, "(?<=aaa|aa|a)a", "aaa" },
590 	{ M, A, 0, 2, "(?<=aaa|aa|a)a", "aaa" },
591 	{ MU, A, 0, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
592 	{ MU, A, 0, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
593 	{ MU, A, 0, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
594 	{ MU, A, 0, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
595 	{ MU, A, 0, 0, "((?(?=(a))a)+k)", "bbak" },
596 	{ MU, A, 0, 0, "((?(?=a)a)+k)", "bbak" },
597 	{ MU, A, 0, 0 | F_NOMATCH, "(?=(?>(a))m)amk", "a k" },
598 	{ MU, A, 0, 0 | F_NOMATCH, "(?!(?>(a))m)amk", "a k" },
599 	{ MU, A, 0, 0 | F_NOMATCH, "(?>(?=(a))am)amk", "a k" },
600 	{ MU, A, 0, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
601 	{ MU, A, 0, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
602 	{ MU, A, 0, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
603 	{ MU, A, 0, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
604 	{ MU, A, 0, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
605 	{ MU, A, 0, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
606 	{ MU, A, 0, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
607 	{ MU, A, 0, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
608 	{ MU, A, 0, 0, "a(?=(?C)\\B(?C`x`))b", "ab" },
609 	{ MU, A, 0, 0, "a(?!(?C)\\B(?C`x`))bb|ab", "abb" },
610 	{ MU, A, 0, 0, "a(?=\\b|(?C)\\B(?C`x`))b", "ab" },
611 	{ MU, A, 0, 0, "a(?!\\b|(?C)\\B(?C`x`))bb|ab", "abb" },
612 	{ MU, A, 0, 0, "c(?(?=(?C)\\B(?C`x`))ab|a)", "cab" },
613 	{ MU, A, 0, 0, "c(?(?!(?C)\\B(?C`x`))ab|a)", "cab" },
614 	{ MU, A, 0, 0, "c(?(?=\\b|(?C)\\B(?C`x`))ab|a)", "cab" },
615 	{ MU, A, 0, 0, "c(?(?!\\b|(?C)\\B(?C`x`))ab|a)", "cab" },
616 	{ MU, A, 0, 0, "a(?=)b", "ab" },
617 	{ MU, A, 0, 0 | F_NOMATCH, "a(?!)b", "ab" },
618 
619 	/* Not empty, ACCEPT, FAIL */
620 	{ MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "a*", "bcx" },
621 	{ MU, A, PCRE2_NOTEMPTY, 0, "a*", "bcaad" },
622 	{ MU, A, PCRE2_NOTEMPTY, 0, "a*?", "bcaad" },
623 	{ MU, A, PCRE2_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
624 	{ MU, A, 0, 0, "a(*ACCEPT)b", "ab" },
625 	{ MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "a*(*ACCEPT)b", "bcx" },
626 	{ MU, A, PCRE2_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
627 	{ MU, A, PCRE2_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
628 	{ MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "(?:z|a*(*ACCEPT)b)", "bcx" },
629 	{ MU, A, PCRE2_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
630 	{ MU, A, PCRE2_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
631 	{ MU, A, PCRE2_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
632 	{ MU, A, PCRE2_NOTEMPTY_ATSTART, 0 | F_NOMATCH, "a*(*ACCEPT)b", "" },
633 	{ MU, A, 0, 0, "((a(*ACCEPT)b))", "ab" },
634 	{ MU, A, 0, 0, "(a(*FAIL)a|a)", "aaa" },
635 	{ MU, A, 0, 0, "(?=ab(*ACCEPT)b)a", "ab" },
636 	{ MU, A, 0, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
637 	{ MU, A, 0, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
638 	{ MU, A, PCRE2_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
639 
640 	/* Conditional blocks. */
641 	{ MU, A, 0, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
642 	{ MU, A, 0, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
643 	{ MU, A, 0, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
644 	{ MU, A, 0, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
645 	{ MU, A, 0, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
646 	{ MU, A, 0, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
647 	{ MU, A, 0, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
648 	{ MU, A, 0, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
649 	{ MU, A, 0, 0 | F_DIFF, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
650 	{ MU, A, 0, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
651 	{ MU, A, 0, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
652 	{ MU, A, 0, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
653 	{ MU, A, 0, 0, "(?(?=a)ab)", "a" },
654 	{ MU, A, 0, 0, "(?(?<!b)c)", "b" },
655 	{ MU, A, 0, 0, "(?(DEFINE)a(b))", "a" },
656 	{ MU, A, 0, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
657 	{ MU, A, 0, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
658 	{ MU, A, 0, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
659 	{ MU, A, 0, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
660 	{ MU, A, 0, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
661 	{ MU, A, 0, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
662 	{ MU, A, 0, 0, "(c)?\?(?(1)a|b)", "cbb" },
663 	{ MU, A, 0, 0 | F_DIFF, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
664 	{ MU, A, 0, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
665 	{ MU, A, 0, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
666 	{ MU, A, 0, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
667 	{ MU, A, 0, 0 | F_DIFF, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
668 	{ MU, A, 0, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
669 	{ MU, A, 0, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
670 	{ MU, A, 0, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
671 	{ MU, A, 0, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
672 	{ MU, A, 0, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)*l", "bc ddd abccabccl" },
673 	{ MU, A, 0, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+?dd", "bcabcacdb bdddd" },
674 	{ MU, A, 0, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+l", "ababccddabdbccd abcccl" },
675 	{ MU, A, 0, 0, "((?:a|aa)(?(1)aaa))x", "aax" },
676 	{ MU, A, 0, 0, "(?(?!)a|b)", "ab" },
677 	{ MU, A, 0, 0, "(?(?!)a)", "ab" },
678 	{ MU, A, 0, 0 | F_NOMATCH, "(?(?!)a|b)", "ac" },
679 
680 	/* Set start of match. */
681 	{ MU, A, 0, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
682 	{ MU, A, 0, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
683 	{ MU, A, 0, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
684 	{ MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "a\\K(*ACCEPT)b", "aa" },
685 	{ MU, A, PCRE2_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
686 
687 	/* First line. */
688 	{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_PROPERTY, "\\p{Any}a", "bb\naaa" },
689 	{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}a", "bb\r\naaa" },
690 	{ MU | PCRE2_FIRSTLINE, A, 0, 0, "(?<=a)", "a" },
691 	{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "[^a][^b]", "ab" },
692 	{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "a", "\na" },
693 	{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "[abc]", "\na" },
694 	{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "^a", "\na" },
695 	{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "^(?<=\n)", "\na" },
696 	{ MU | PCRE2_FIRSTLINE, A, 0, 0, "\xf0\x90\x90\x80", "\xf0\x90\x90\x80" },
697 	{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_ANY, 0, 0 | F_NOMATCH, "#", "\xc2\x85#" },
698 	{ M | PCRE2_FIRSTLINE, PCRE2_NEWLINE_ANY, 0, 0 | F_NOMATCH, "#", "\x85#" },
699 	{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_ANY, 0, 0 | F_NOMATCH, "^#", "\xe2\x80\xa8#" },
700 	{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0 | F_PROPERTY, "\\p{Any}", "\r\na" },
701 	{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0, ".", "\r" },
702 	{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0, "a", "\ra" },
703 	{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0 | F_NOMATCH, "ba", "bbb\r\nba" },
704 	{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}{4}|a", "\r\na" },
705 	{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 1, ".", "\r\n" },
706 	{ PCRE2_FIRSTLINE | PCRE2_DOTALL, PCRE2_NEWLINE_LF, 0, 0 | F_NOMATCH, "ab.", "ab" },
707 	{ MU | PCRE2_FIRSTLINE, A, 0, 1 | F_NOMATCH, "^[a-d0-9]", "\nxx\nd" },
708 	{ PCRE2_FIRSTLINE | PCRE2_DOTALL, PCRE2_NEWLINE_ANY, 0, 0, "....a", "012\n0a" },
709 	{ MU | PCRE2_FIRSTLINE, A, 0, 0, "[aC]", "a" },
710 
711 	/* Recurse. */
712 	{ MU, A, 0, 0, "(a)(?1)", "aa" },
713 	{ MU, A, 0, 0, "((a))(?1)", "aa" },
714 	{ MU, A, 0, 0, "(b|a)(?1)", "aa" },
715 	{ MU, A, 0, 0, "(b|(a))(?1)", "aa" },
716 	{ MU, A, 0, 0 | F_NOMATCH, "((a)(b)(?:a*))(?1)", "aba" },
717 	{ MU, A, 0, 0, "((a)(b)(?:a*))(?1)", "abab" },
718 	{ MU, A, 0, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
719 	{ MU, A, 0, 0, "((?2)b|(a)){2}(?1)", "aabab" },
720 	{ MU, A, 0, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
721 	{ MU, A, 0, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
722 	{ MU, A, 0, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
723 	{ MU, A, 0, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
724 	{ MU, A, 0, 0, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
725 	{ MU, A, 0, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
726 	{ MU, A, 0, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
727 	{ MU, A, 0, 0 | F_NOMATCH, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
728 	{ MU, A, 0, 0, "b|<(?R)*>", "<<b>" },
729 	{ MU, A, 0, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
730 	{ MU, A, 0, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
731 	{ MU, A, 0, 0, "(a)((?(R)a|b))(?2)", "aabbabaa" },
732 	{ MU, A, 0, 0, "(a)((?(R2)a|b))(?2)", "aabbabaa" },
733 	{ MU, A, 0, 0, "(a)((?(R1)a|b))(?2)", "ababba" },
734 	{ MU, A, 0, 0, "(?(R0)aa|bb(?R))", "abba aabb bbaa" },
735 	{ MU, A, 0, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" },
736 	{ MU, A, 0, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" },
737 	{ MU, A, 0, 0, "((?(R)a|(?1)){3})", "XaaaaaaaaaX" },
738 	{ MU, A, 0, 0, "((?:(?(R)a|(?1))){3})", "XaaaaaaaaaX" },
739 	{ MU, A, 0, 0, "((?(R)a|(?1)){1,3})aaaaaa", "aaaaaaaaXaaaaaaaaa" },
740 	{ MU, A, 0, 0, "((?(R)a|(?1)){1,3}?)M", "aaaM" },
741 	{ MU, A, 0, 0, "((.)(?:.|\\2(?1))){0}#(?1)#", "#aabbccdde# #aabbccddee#" },
742 	{ MU, A, 0, 0, "((.)(?:\\2|\\2{4}b)){0}#(?:(?1))+#", "#aaaab# #aaaaab#" },
743 
744 	/* 16 bit specific tests. */
745 	{ CM, A, 0, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" },
746 	{ CM, A, 0, 0 | F_FORCECONV, "\xe1\xbd\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
747 	{ CM, A, 0, 0 | F_FORCECONV, "[\xc3\xa1]", "\xc3\x81\xc3\xa1" },
748 	{ CM, A, 0, 0 | F_FORCECONV, "[\xe1\xbd\xb8]", "\xe1\xbf\xb8\xe1\xbd\xb8" },
749 	{ CM, A, 0, 0 | F_FORCECONV, "[a-\xed\xb0\x80]", "A" },
750 	{ CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[a-\\x{dc00}]", "B" },
751 	{ CM, A, 0, 0 | F_NO8 | F_NOMATCH | F_FORCECONV, "[b-\\x{dc00}]", "a" },
752 	{ CM, A, 0, 0 | F_NO8 | F_FORCECONV, "\xed\xa0\x80\\x{d800}\xed\xb0\x80\\x{dc00}", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80" },
753 	{ CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[\xed\xa0\x80\\x{d800}]{1,2}?[\xed\xb0\x80\\x{dc00}]{1,2}?#", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80#" },
754 	{ CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80\xed\xb0\x80#]{0,3}(?<=\xed\xb0\x80.)", "\xed\xa0\x80#\xed\xa0\x80##\xed\xb0\x80\xed\xa0\x80" },
755 	{ CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\x9f\xbf\xed\xa0\x83" },
756 	{ CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\xb4\x80\xed\xb3\xb0" },
757 	{ CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" },
758 	{ CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" },
759 	{ CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" },
760 	{ CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80][\xed\xb0\x80]{2,}", "\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" },
761 	{ M, A, 0, 0 | F_FORCECONV, "[^\xed\xb0\x80]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
762 	{ M, A, 0, 0 | F_NO8 | F_FORCECONV, "[^\\x{dc00}]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
763 	{ CM, A, 0, 0 | F_FORCECONV, ".\\B.", "\xed\xa0\x80\xed\xb0\x80" },
764 	{ CM, A, 0, 0 | F_FORCECONV, "\\D+(?:\\d+|.)\\S+(?:\\s+|.)\\W+(?:\\w+|.)\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80" },
765 	{ CM, A, 0, 0 | F_FORCECONV, "\\d*\\s*\\w*\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80" },
766 	{ CM, A, 0, 0 | F_FORCECONV | F_NOMATCH, "\\d*?\\D*?\\s*?\\S*?\\w*?\\W*?##", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80#" },
767 	{ CM | PCRE2_EXTENDED, A, 0, 0 | F_FORCECONV, "\xed\xa0\x80 \xed\xb0\x80 !", "\xed\xa0\x80\xed\xb0\x80!" },
768 	{ CM, A, 0, 0 | F_FORCECONV, "\xed\xa0\x80+#[^#]+\xed\xa0\x80", "\xed\xa0\x80#a\xed\xa0\x80" },
769 	{ CM, A, 0, 0 | F_FORCECONV, "(\xed\xa0\x80+)#\\1", "\xed\xa0\x80\xed\xa0\x80#\xed\xa0\x80\xed\xa0\x80" },
770 	{ M, PCRE2_NEWLINE_ANY, 0, 0 | F_NO8 | F_FORCECONV, "^-", "a--\xe2\x80\xa8--" },
771 	{ 0, BSR(PCRE2_BSR_UNICODE), 0, 0 | F_NO8 | F_FORCECONV, "\\R", "ab\xe2\x80\xa8" },
772 	{ 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\v", "ab\xe2\x80\xa9" },
773 	{ 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\h", "ab\xe1\xa0\x8e" },
774 	{ 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\v+?\\V+?#", "\xe2\x80\xa9\xe2\x80\xa9\xef\xbf\xbf\xef\xbf\xbf#" },
775 	{ 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\h+?\\H+?#", "\xe1\xa0\x8e\xe1\xa0\x8e\xef\xbf\xbf\xef\xbf\xbf#" },
776 
777 	/* Partial matching. */
778 	{ MU, A, PCRE2_PARTIAL_SOFT, 0, "ab", "a" },
779 	{ MU, A, PCRE2_PARTIAL_SOFT, 0, "ab|a", "a" },
780 	{ MU, A, PCRE2_PARTIAL_HARD, 0, "ab|a", "a" },
781 	{ MU, A, PCRE2_PARTIAL_SOFT, 0, "\\b#", "a" },
782 	{ MU, A, PCRE2_PARTIAL_SOFT, 0, "(?<=a)b", "a" },
783 	{ MU, A, PCRE2_PARTIAL_SOFT, 0, "abc|(?<=xxa)bc", "xxab" },
784 	{ MU, A, PCRE2_PARTIAL_SOFT, 0, "a\\B", "a" },
785 	{ MU, A, PCRE2_PARTIAL_HARD, 0, "a\\b", "a" },
786 
787 	/* (*MARK) verb. */
788 	{ MU, A, 0, 0, "a(*MARK:aa)a", "ababaa" },
789 	{ MU, A, 0, 0 | F_NOMATCH, "a(*:aa)a", "abab" },
790 	{ MU, A, 0, 0, "a(*:aa)(b(*:bb)b|bc)", "abc" },
791 	{ MU, A, 0, 0 | F_NOMATCH, "a(*:1)x|b(*:2)y", "abc" },
792 	{ MU, A, 0, 0, "(?>a(*:aa))b|ac", "ac" },
793 	{ MU, A, 0, 0, "(?(DEFINE)(a(*:aa)))(?1)", "a" },
794 	{ MU, A, 0, 0 | F_NOMATCH, "(?(DEFINE)((a)(*:aa)))(?1)b", "aa" },
795 	{ MU, A, 0, 0, "(?(DEFINE)(a(*:aa)))a(?1)b|aac", "aac" },
796 	{ MU, A, 0, 0, "(a(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
797 	{ MU, A, 0, 0, "(a(*:aa)){0}(?:b(?1)b)+", "babba" },
798 	{ MU, A, 0, 0 | F_NOMATCH, "(a(*:aa)){0}(?:b(?1)b)+", "ba" },
799 	{ MU, A, 0, 0, "(a\\K(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
800 	{ MU, A, 0, 0, "(a\\K(*:aa)){0}(?:b(?1)b)+", "babba" },
801 	{ MU, A, 0, 0 | F_NOMATCH, "(a\\K(*:aa)){0}(?:b(?1)b)+", "ba" },
802 	{ MU, A, 0, 0 | F_NOMATCH, "(*:mark)m", "a" },
803 
804 	/* (*COMMIT) verb. */
805 	{ MU, A, 0, 0 | F_NOMATCH, "a(*COMMIT)b", "ac" },
806 	{ MU, A, 0, 0, "aa(*COMMIT)b", "xaxaab" },
807 	{ MU, A, 0, 0 | F_NOMATCH, "a(*COMMIT)(*:msg)b|ac", "ac" },
808 	{ MU, A, 0, 0 | F_NOMATCH, "(a(*COMMIT)b)++", "abac" },
809 	{ MU, A, 0, 0 | F_NOMATCH, "((a)(*COMMIT)b)++", "abac" },
810 	{ MU, A, 0, 0 | F_NOMATCH, "(?=a(*COMMIT)b)ab|ad", "ad" },
811 
812 	/* (*PRUNE) verb. */
813 	{ MU, A, 0, 0, "aa\\K(*PRUNE)b", "aaab" },
814 	{ MU, A, 0, 0, "aa(*PRUNE:bb)b|a", "aa" },
815 	{ MU, A, 0, 0, "(a)(a)(*PRUNE)b|(a)", "aa" },
816 	{ MU, A, 0, 0, "(a)(a)(a)(a)(a)(a)(a)(a)(*PRUNE)b|(a)", "aaaaaaaa" },
817 	{ MU, A, PCRE2_PARTIAL_SOFT, 0, "a(*PRUNE)a|", "a" },
818 	{ MU, A, PCRE2_PARTIAL_SOFT, 0, "a(*PRUNE)a|m", "a" },
819 	{ MU, A, 0, 0 | F_NOMATCH, "(?=a(*PRUNE)b)ab|ad", "ad" },
820 	{ MU, A, 0, 0, "a(*COMMIT)(*PRUNE)d|bc", "abc" },
821 	{ MU, A, 0, 0, "(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
822 	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
823 	{ MU, A, 0, 0, "(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
824 	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
825 	{ MU, A, 0, 0, "(a(*COMMIT)b){0}a(?1)(*PRUNE)c|bc", "abc" },
826 	{ MU, A, 0, 0 | F_NOMATCH, "(a(*COMMIT)b){0}a(*COMMIT)(?1)(*PRUNE)c|bc", "abc" },
827 	{ MU, A, 0, 0, "(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
828 	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
829 	{ MU, A, 0, 0, "((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
830 	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
831 	{ MU, A, 0, 0, "(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" },
832 	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" },
833 	{ MU, A, 0, 0, "(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" },
834 	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" },
835 	{ MU, A, 0, 0, "(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" },
836 	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" },
837 	{ MU, A, 0, 0, "(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" },
838 	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" },
839 	{ MU, A, 0, 0, "(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" },
840 	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" },
841 	{ MU, A, 0, 0, "(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" },
842 	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" },
843 
844 	/* (*SKIP) verb. */
845 	{ MU, A, 0, 0 | F_NOMATCH, "(?=a(*SKIP)b)ab|ad", "ad" },
846 	{ MU, A, 0, 0, "(\\w+(*SKIP)#)", "abcd,xyz#," },
847 	{ MU, A, 0, 0, "\\w+(*SKIP)#|mm", "abcd,xyz#," },
848 	{ MU, A, 0, 0 | F_NOMATCH, "b+(?<=(*SKIP)#c)|b+", "#bbb" },
849 
850 	/* (*THEN) verb. */
851 	{ MU, A, 0, 0, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcaabcaabcaabcnacm" },
852 	{ MU, A, 0, 0 | F_NOMATCH, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcm" },
853 	{ MU, A, 0, 0, "((?:a(*THEN)|aab)c|a+)+m", "aabcaabcnmaabcaabcm" },
854 	{ MU, A, 0, 0, "((?:a|aab)(*THEN)c|a+)+m", "aam" },
855 	{ MU, A, 0, 0, "((?:a(*COMMIT)|aab)(*THEN)c|a+)+m", "aam" },
856 	{ MU, A, 0, 0, "(?(?=a(*THEN)b)ab|ad)", "ad" },
857 	{ MU, A, 0, 0, "(?(?!a(*THEN)b)ad|add)", "add" },
858 	{ MU, A, 0, 0 | F_NOMATCH, "(?(?=a)a(*THEN)b|ad)", "ad" },
859 	{ MU, A, 0, 0, "(?!(?(?=a)ab|b(*THEN)d))bn|bnn", "bnn" },
860 
861 	/* Recurse and control verbs. */
862 	{ MU, A, 0, 0, "(a(*ACCEPT)b){0}a(?1)b", "aacaabb" },
863 	{ MU, A, 0, 0, "((a)\\2(*ACCEPT)b){0}a(?1)b", "aaacaaabb" },
864 	{ MU, A, 0, 0, "((ab|a(*ACCEPT)x)+|ababababax){0}_(?1)_", "_ababababax_ _ababababa_" },
865 	{ MU, A, 0, 0, "((.)(?:A(*ACCEPT)|(?1)\\2)){0}_(?1)_", "_bcdaAdcb_bcdaAdcb_" },
866 	{ MU, A, 0, 0, "((*MARK:m)(?:a|a(*COMMIT)b|aa)){0}_(?1)_", "_ab_" },
867 	{ MU, A, 0, 0, "((*MARK:m)(?:a|a(*COMMIT)b|aa)){0}_(?1)_|(_aa_)", "_aa_" },
868 	{ MU, A, 0, 0, "(a(*COMMIT)(?:b|bb)|c(*ACCEPT)d|dd){0}_(?1)+_", "_ax_ _cd_ _abbb_ _abcd_ _abbcdd_" },
869 	{ MU, A, 0, 0, "((.)(?:.|(*COMMIT)\\2{3}(*ACCEPT).*|.*)){0}_(?1){0,4}_", "_aaaabbbbccccddd_ _aaaabbbbccccdddd_" },
870 
871 #ifdef SUPPORT_UNICODE
872 	/* Script runs and iterations. */
873 	{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)*#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
874 	{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)+#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
875 	{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)*?#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
876 	{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)+?#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
877 	{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)*+#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
878 	{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)++#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
879 	{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)?#", "!ab!abc!ab!ab#" },
880 	{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)??#", "!ab!abc!ab!ab#" },
881 #endif
882 
883 	/* Deep recursion. */
884 	{ MU, A, 0, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
885 	{ MU, A, 0, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
886 	{ MU, A, 0, 0, "((a?)+)+b", "aaaaaaaaaaaa b" },
887 
888 	/* Deep recursion: Stack limit reached. */
889 	{ M, A, 0, 0 | F_NOMATCH, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
890 	{ M, A, 0, 0 | F_NOMATCH, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
891 	{ M, A, 0, 0 | F_NOMATCH, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
892 	{ M, A, 0, 0 | F_NOMATCH, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
893 	{ M, A, 0, 0 | F_NOMATCH, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
894 
895 	{ 0, 0, 0, 0, NULL, NULL }
896 };
897 
898 #ifdef SUPPORT_PCRE2_8
callback8(void * arg)899 static pcre2_jit_stack_8* callback8(void *arg)
900 {
901 	return (pcre2_jit_stack_8 *)arg;
902 }
903 #endif
904 
905 #ifdef SUPPORT_PCRE2_16
callback16(void * arg)906 static pcre2_jit_stack_16* callback16(void *arg)
907 {
908 	return (pcre2_jit_stack_16 *)arg;
909 }
910 #endif
911 
912 #ifdef SUPPORT_PCRE2_32
callback32(void * arg)913 static pcre2_jit_stack_32* callback32(void *arg)
914 {
915 	return (pcre2_jit_stack_32 *)arg;
916 }
917 #endif
918 
919 #ifdef SUPPORT_PCRE2_8
920 static pcre2_jit_stack_8 *stack8;
921 
getstack8(void)922 static pcre2_jit_stack_8 *getstack8(void)
923 {
924 	if (!stack8)
925 		stack8 = pcre2_jit_stack_create_8(1, 1024 * 1024, NULL);
926 	return stack8;
927 }
928 
setstack8(pcre2_match_context_8 * mcontext)929 static void setstack8(pcre2_match_context_8 *mcontext)
930 {
931 	if (!mcontext) {
932 		if (stack8)
933 			pcre2_jit_stack_free_8(stack8);
934 		stack8 = NULL;
935 		return;
936 	}
937 
938 	pcre2_jit_stack_assign_8(mcontext, callback8, getstack8());
939 }
940 #endif /* SUPPORT_PCRE2_8 */
941 
942 #ifdef SUPPORT_PCRE2_16
943 static pcre2_jit_stack_16 *stack16;
944 
getstack16(void)945 static pcre2_jit_stack_16 *getstack16(void)
946 {
947 	if (!stack16)
948 		stack16 = pcre2_jit_stack_create_16(1, 1024 * 1024, NULL);
949 	return stack16;
950 }
951 
setstack16(pcre2_match_context_16 * mcontext)952 static void setstack16(pcre2_match_context_16 *mcontext)
953 {
954 	if (!mcontext) {
955 		if (stack16)
956 			pcre2_jit_stack_free_16(stack16);
957 		stack16 = NULL;
958 		return;
959 	}
960 
961 	pcre2_jit_stack_assign_16(mcontext, callback16, getstack16());
962 }
963 #endif /* SUPPORT_PCRE2_16 */
964 
965 #ifdef SUPPORT_PCRE2_32
966 static pcre2_jit_stack_32 *stack32;
967 
getstack32(void)968 static pcre2_jit_stack_32 *getstack32(void)
969 {
970 	if (!stack32)
971 		stack32 = pcre2_jit_stack_create_32(1, 1024 * 1024, NULL);
972 	return stack32;
973 }
974 
setstack32(pcre2_match_context_32 * mcontext)975 static void setstack32(pcre2_match_context_32 *mcontext)
976 {
977 	if (!mcontext) {
978 		if (stack32)
979 			pcre2_jit_stack_free_32(stack32);
980 		stack32 = NULL;
981 		return;
982 	}
983 
984 	pcre2_jit_stack_assign_32(mcontext, callback32, getstack32());
985 }
986 #endif /* SUPPORT_PCRE2_32 */
987 
988 #ifdef SUPPORT_PCRE2_16
989 
convert_utf8_to_utf16(PCRE2_SPTR8 input,PCRE2_UCHAR16 * output,int * offsetmap,int max_length)990 static int convert_utf8_to_utf16(PCRE2_SPTR8 input, PCRE2_UCHAR16 *output, int *offsetmap, int max_length)
991 {
992 	PCRE2_SPTR8 iptr = input;
993 	PCRE2_UCHAR16 *optr = output;
994 	unsigned int c;
995 
996 	if (max_length == 0)
997 		return 0;
998 
999 	while (*iptr && max_length > 1) {
1000 		c = 0;
1001 		if (offsetmap)
1002 			*offsetmap++ = (int)(iptr - (unsigned char*)input);
1003 
1004 		if (*iptr < 0xc0)
1005 			c = *iptr++;
1006 		else if (!(*iptr & 0x20)) {
1007 			c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
1008 			iptr += 2;
1009 		} else if (!(*iptr & 0x10)) {
1010 			c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
1011 			iptr += 3;
1012 		} else if (!(*iptr & 0x08)) {
1013 			c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
1014 			iptr += 4;
1015 		}
1016 
1017 		if (c < 65536) {
1018 			*optr++ = c;
1019 			max_length--;
1020 		} else if (max_length <= 2) {
1021 			*optr = '\0';
1022 			return (int)(optr - output);
1023 		} else {
1024 			c -= 0x10000;
1025 			*optr++ = 0xd800 | ((c >> 10) & 0x3ff);
1026 			*optr++ = 0xdc00 | (c & 0x3ff);
1027 			max_length -= 2;
1028 			if (offsetmap)
1029 				offsetmap++;
1030 		}
1031 	}
1032 	if (offsetmap)
1033 		*offsetmap = (int)(iptr - (unsigned char*)input);
1034 	*optr = '\0';
1035 	return (int)(optr - output);
1036 }
1037 
copy_char8_to_char16(PCRE2_SPTR8 input,PCRE2_UCHAR16 * output,int max_length)1038 static int copy_char8_to_char16(PCRE2_SPTR8 input, PCRE2_UCHAR16 *output, int max_length)
1039 {
1040 	PCRE2_SPTR8 iptr = input;
1041 	PCRE2_UCHAR16 *optr = output;
1042 
1043 	if (max_length == 0)
1044 		return 0;
1045 
1046 	while (*iptr && max_length > 1) {
1047 		*optr++ = *iptr++;
1048 		max_length--;
1049 	}
1050 	*optr = '\0';
1051 	return (int)(optr - output);
1052 }
1053 
1054 #define REGTEST_MAX_LENGTH16 4096
1055 static PCRE2_UCHAR16 regtest_buf16[REGTEST_MAX_LENGTH16];
1056 static int regtest_offsetmap16[REGTEST_MAX_LENGTH16];
1057 
1058 #endif /* SUPPORT_PCRE2_16 */
1059 
1060 #ifdef SUPPORT_PCRE2_32
1061 
convert_utf8_to_utf32(PCRE2_SPTR8 input,PCRE2_UCHAR32 * output,int * offsetmap,int max_length)1062 static int convert_utf8_to_utf32(PCRE2_SPTR8 input, PCRE2_UCHAR32 *output, int *offsetmap, int max_length)
1063 {
1064 	PCRE2_SPTR8 iptr = input;
1065 	PCRE2_UCHAR32 *optr = output;
1066 	unsigned int c;
1067 
1068 	if (max_length == 0)
1069 		return 0;
1070 
1071 	while (*iptr && max_length > 1) {
1072 		c = 0;
1073 		if (offsetmap)
1074 			*offsetmap++ = (int)(iptr - (unsigned char*)input);
1075 
1076 		if (*iptr < 0xc0)
1077 			c = *iptr++;
1078 		else if (!(*iptr & 0x20)) {
1079 			c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
1080 			iptr += 2;
1081 		} else if (!(*iptr & 0x10)) {
1082 			c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
1083 			iptr += 3;
1084 		} else if (!(*iptr & 0x08)) {
1085 			c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
1086 			iptr += 4;
1087 		}
1088 
1089 		*optr++ = c;
1090 		max_length--;
1091 	}
1092 	if (offsetmap)
1093 		*offsetmap = (int)(iptr - (unsigned char*)input);
1094 	*optr = 0;
1095 	return (int)(optr - output);
1096 }
1097 
copy_char8_to_char32(PCRE2_SPTR8 input,PCRE2_UCHAR32 * output,int max_length)1098 static int copy_char8_to_char32(PCRE2_SPTR8 input, PCRE2_UCHAR32 *output, int max_length)
1099 {
1100 	PCRE2_SPTR8 iptr = input;
1101 	PCRE2_UCHAR32 *optr = output;
1102 
1103 	if (max_length == 0)
1104 		return 0;
1105 
1106 	while (*iptr && max_length > 1) {
1107 		*optr++ = *iptr++;
1108 		max_length--;
1109 	}
1110 	*optr = '\0';
1111 	return (int)(optr - output);
1112 }
1113 
1114 #define REGTEST_MAX_LENGTH32 4096
1115 static PCRE2_UCHAR32 regtest_buf32[REGTEST_MAX_LENGTH32];
1116 static int regtest_offsetmap32[REGTEST_MAX_LENGTH32];
1117 
1118 #endif /* SUPPORT_PCRE2_32 */
1119 
check_ascii(const char * input)1120 static int check_ascii(const char *input)
1121 {
1122 	const unsigned char *ptr = (unsigned char *)input;
1123 	while (*ptr) {
1124 		if (*ptr > 127)
1125 			return 0;
1126 		ptr++;
1127 	}
1128 	return 1;
1129 }
1130 
1131 #define OVECTOR_SIZE 15
1132 
regression_tests(void)1133 static int regression_tests(void)
1134 {
1135 	struct regression_test_case *current = regression_test_cases;
1136 	int error;
1137 	PCRE2_SIZE err_offs;
1138 	int is_successful;
1139 	int is_ascii;
1140 	int total = 0;
1141 	int successful = 0;
1142 	int successful_row = 0;
1143 	int counter = 0;
1144 	int jit_compile_mode;
1145 	int utf = 0;
1146 	int disabled_options = 0;
1147 	int i;
1148 #ifdef SUPPORT_PCRE2_8
1149 	pcre2_code_8 *re8;
1150 	pcre2_compile_context_8 *ccontext8;
1151 	pcre2_match_data_8 *mdata8_1;
1152 	pcre2_match_data_8 *mdata8_2;
1153 	pcre2_match_context_8 *mcontext8;
1154 	PCRE2_SIZE *ovector8_1 = NULL;
1155 	PCRE2_SIZE *ovector8_2 = NULL;
1156 	int return_value8[2];
1157 #endif
1158 #ifdef SUPPORT_PCRE2_16
1159 	pcre2_code_16 *re16;
1160 	pcre2_compile_context_16 *ccontext16;
1161 	pcre2_match_data_16 *mdata16_1;
1162 	pcre2_match_data_16 *mdata16_2;
1163 	pcre2_match_context_16 *mcontext16;
1164 	PCRE2_SIZE *ovector16_1 = NULL;
1165 	PCRE2_SIZE *ovector16_2 = NULL;
1166 	int return_value16[2];
1167 	int length16;
1168 #endif
1169 #ifdef SUPPORT_PCRE2_32
1170 	pcre2_code_32 *re32;
1171 	pcre2_compile_context_32 *ccontext32;
1172 	pcre2_match_data_32 *mdata32_1;
1173 	pcre2_match_data_32 *mdata32_2;
1174 	pcre2_match_context_32 *mcontext32;
1175 	PCRE2_SIZE *ovector32_1 = NULL;
1176 	PCRE2_SIZE *ovector32_2 = NULL;
1177 	int return_value32[2];
1178 	int length32;
1179 #endif
1180 
1181 #if defined SUPPORT_PCRE2_8
1182 	PCRE2_UCHAR8 cpu_info[128];
1183 #elif defined SUPPORT_PCRE2_16
1184 	PCRE2_UCHAR16 cpu_info[128];
1185 #elif defined SUPPORT_PCRE2_32
1186 	PCRE2_UCHAR32 cpu_info[128];
1187 #endif
1188 #if defined SUPPORT_UNICODE && ((defined(SUPPORT_PCRE2_8) + defined(SUPPORT_PCRE2_16) + defined(SUPPORT_PCRE2_32)) >= 2)
1189 	int return_value;
1190 #endif
1191 
1192 	/* This test compares the behaviour of interpreter and JIT. Although disabling
1193 	utf or ucp may make tests fail, if the pcre_exec result is the SAME, it is
1194 	still considered successful from pcre_jit_test point of view. */
1195 
1196 #if defined SUPPORT_PCRE2_8
1197 	pcre2_config_8(PCRE2_CONFIG_JITTARGET, &cpu_info);
1198 #elif defined SUPPORT_PCRE2_16
1199 	pcre2_config_16(PCRE2_CONFIG_JITTARGET, &cpu_info);
1200 #elif defined SUPPORT_PCRE2_32
1201 	pcre2_config_32(PCRE2_CONFIG_JITTARGET, &cpu_info);
1202 #endif
1203 
1204 	printf("Running JIT regression tests\n");
1205 	printf("  target CPU of SLJIT compiler: ");
1206 	for (i = 0; cpu_info[i]; i++)
1207 		printf("%c", (char)(cpu_info[i]));
1208 	printf("\n");
1209 
1210 #if defined SUPPORT_PCRE2_8
1211 	pcre2_config_8(PCRE2_CONFIG_UNICODE, &utf);
1212 #elif defined SUPPORT_PCRE2_16
1213 	pcre2_config_16(PCRE2_CONFIG_UNICODE, &utf);
1214 #elif defined SUPPORT_PCRE2_32
1215 	pcre2_config_32(PCRE2_CONFIG_UNICODE, &utf);
1216 #endif
1217 
1218 	if (!utf)
1219 		disabled_options |= PCRE2_UTF;
1220 #ifdef SUPPORT_PCRE2_8
1221 	printf("  in  8 bit mode with UTF-8  %s:\n", utf ? "enabled" : "disabled");
1222 #endif
1223 #ifdef SUPPORT_PCRE2_16
1224 	printf("  in 16 bit mode with UTF-16 %s:\n", utf ? "enabled" : "disabled");
1225 #endif
1226 #ifdef SUPPORT_PCRE2_32
1227 	printf("  in 32 bit mode with UTF-32 %s:\n", utf ? "enabled" : "disabled");
1228 #endif
1229 
1230 	while (current->pattern) {
1231 		/* printf("\nPattern: %s :\n", current->pattern); */
1232 		total++;
1233 		is_ascii = 0;
1234 		if (!(current->start_offset & F_PROPERTY))
1235 			is_ascii = check_ascii(current->pattern) && check_ascii(current->input);
1236 
1237 		if (current->match_options & PCRE2_PARTIAL_SOFT)
1238 			jit_compile_mode = PCRE2_JIT_PARTIAL_SOFT;
1239 		else if (current->match_options & PCRE2_PARTIAL_HARD)
1240 			jit_compile_mode = PCRE2_JIT_PARTIAL_HARD;
1241 		else
1242 			jit_compile_mode = PCRE2_JIT_COMPLETE;
1243 		error = 0;
1244 #ifdef SUPPORT_PCRE2_8
1245 		re8 = NULL;
1246 		ccontext8 = pcre2_compile_context_create_8(NULL);
1247 		if (ccontext8) {
1248 			if (GET_NEWLINE(current->newline))
1249 				pcre2_set_newline_8(ccontext8, GET_NEWLINE(current->newline));
1250 			if (GET_BSR(current->newline))
1251 				pcre2_set_bsr_8(ccontext8, GET_BSR(current->newline));
1252 
1253 			if (!(current->start_offset & F_NO8)) {
1254 				re8 = pcre2_compile_8((PCRE2_SPTR8)current->pattern, PCRE2_ZERO_TERMINATED,
1255 					current->compile_options & ~disabled_options,
1256 					&error, &err_offs, ccontext8);
1257 
1258 				if (!re8 && (utf || is_ascii))
1259 					printf("\n8 bit: Cannot compile pattern \"%s\": %d\n", current->pattern, error);
1260 			}
1261 			pcre2_compile_context_free_8(ccontext8);
1262 		}
1263 		else
1264 			printf("\n8 bit: Cannot allocate compile context\n");
1265 #endif
1266 #ifdef SUPPORT_PCRE2_16
1267 		if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
1268 			convert_utf8_to_utf16((PCRE2_SPTR8)current->pattern, regtest_buf16, NULL, REGTEST_MAX_LENGTH16);
1269 		else
1270 			copy_char8_to_char16((PCRE2_SPTR8)current->pattern, regtest_buf16, REGTEST_MAX_LENGTH16);
1271 
1272 		re16 = NULL;
1273 		ccontext16 = pcre2_compile_context_create_16(NULL);
1274 		if (ccontext16) {
1275 			if (GET_NEWLINE(current->newline))
1276 				pcre2_set_newline_16(ccontext16, GET_NEWLINE(current->newline));
1277 			if (GET_BSR(current->newline))
1278 				pcre2_set_bsr_16(ccontext16, GET_BSR(current->newline));
1279 
1280 			if (!(current->start_offset & F_NO16)) {
1281 				re16 = pcre2_compile_16(regtest_buf16, PCRE2_ZERO_TERMINATED,
1282 					current->compile_options & ~disabled_options,
1283 					&error, &err_offs, ccontext16);
1284 
1285 				if (!re16 && (utf || is_ascii))
1286 					printf("\n16 bit: Cannot compile pattern \"%s\": %d\n", current->pattern, error);
1287 			}
1288 			pcre2_compile_context_free_16(ccontext16);
1289 		}
1290 		else
1291 			printf("\n16 bit: Cannot allocate compile context\n");
1292 #endif
1293 #ifdef SUPPORT_PCRE2_32
1294 		if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
1295 			convert_utf8_to_utf32((PCRE2_SPTR8)current->pattern, regtest_buf32, NULL, REGTEST_MAX_LENGTH32);
1296 		else
1297 			copy_char8_to_char32((PCRE2_SPTR8)current->pattern, regtest_buf32, REGTEST_MAX_LENGTH32);
1298 
1299 		re32 = NULL;
1300 		ccontext32 = pcre2_compile_context_create_32(NULL);
1301 		if (ccontext32) {
1302 			if (GET_NEWLINE(current->newline))
1303 				pcre2_set_newline_32(ccontext32, GET_NEWLINE(current->newline));
1304 			if (GET_BSR(current->newline))
1305 				pcre2_set_bsr_32(ccontext32, GET_BSR(current->newline));
1306 
1307 			if (!(current->start_offset & F_NO32)) {
1308 				re32 = pcre2_compile_32(regtest_buf32, PCRE2_ZERO_TERMINATED,
1309 					current->compile_options & ~disabled_options,
1310 					&error, &err_offs, ccontext32);
1311 
1312 				if (!re32 && (utf || is_ascii))
1313 					printf("\n32 bit: Cannot compile pattern \"%s\": %d\n", current->pattern, error);
1314 			}
1315 			pcre2_compile_context_free_32(ccontext32);
1316 		}
1317 		else
1318 			printf("\n32 bit: Cannot allocate compile context\n");
1319 #endif
1320 
1321 		counter++;
1322 		if ((counter & 0x3) != 0) {
1323 #ifdef SUPPORT_PCRE2_8
1324 			setstack8(NULL);
1325 #endif
1326 #ifdef SUPPORT_PCRE2_16
1327 			setstack16(NULL);
1328 #endif
1329 #ifdef SUPPORT_PCRE2_32
1330 			setstack32(NULL);
1331 #endif
1332 		}
1333 
1334 #ifdef SUPPORT_PCRE2_8
1335 		return_value8[0] = -1000;
1336 		return_value8[1] = -1000;
1337 		mdata8_1 = pcre2_match_data_create_8(OVECTOR_SIZE, NULL);
1338 		mdata8_2 = pcre2_match_data_create_8(OVECTOR_SIZE, NULL);
1339 		mcontext8 = pcre2_match_context_create_8(NULL);
1340 		if (!mdata8_1 || !mdata8_2 || !mcontext8) {
1341 			printf("\n8 bit: Cannot allocate match data\n");
1342 			pcre2_match_data_free_8(mdata8_1);
1343 			pcre2_match_data_free_8(mdata8_2);
1344 			pcre2_match_context_free_8(mcontext8);
1345 			pcre2_code_free_8(re8);
1346 			re8 = NULL;
1347 		} else {
1348 			ovector8_1 = pcre2_get_ovector_pointer_8(mdata8_1);
1349 			ovector8_2 = pcre2_get_ovector_pointer_8(mdata8_2);
1350 			for (i = 0; i < OVECTOR_SIZE * 2; ++i)
1351 				ovector8_1[i] = -2;
1352 			for (i = 0; i < OVECTOR_SIZE * 2; ++i)
1353 				ovector8_2[i] = -2;
1354 		}
1355 		if (re8) {
1356 			return_value8[1] = pcre2_match_8(re8, (PCRE2_SPTR8)current->input, strlen(current->input),
1357 				current->start_offset & OFFSET_MASK, current->match_options, mdata8_2, NULL);
1358 
1359 			if (pcre2_jit_compile_8(re8, jit_compile_mode)) {
1360 				printf("\n8 bit: JIT compiler does not support \"%s\"\n", current->pattern);
1361 			} else if ((counter & 0x1) != 0) {
1362 				setstack8(mcontext8);
1363 				return_value8[0] = pcre2_match_8(re8, (PCRE2_SPTR8)current->input, strlen(current->input),
1364 					current->start_offset & OFFSET_MASK, current->match_options, mdata8_1, mcontext8);
1365 			} else {
1366 				pcre2_jit_stack_assign_8(mcontext8, NULL, getstack8());
1367 				return_value8[0] = pcre2_jit_match_8(re8, (PCRE2_SPTR8)current->input, strlen(current->input),
1368 					current->start_offset & OFFSET_MASK, current->match_options, mdata8_1, mcontext8);
1369 			}
1370 		}
1371 #endif
1372 
1373 #ifdef SUPPORT_PCRE2_16
1374 		return_value16[0] = -1000;
1375 		return_value16[1] = -1000;
1376 		mdata16_1 = pcre2_match_data_create_16(OVECTOR_SIZE, NULL);
1377 		mdata16_2 = pcre2_match_data_create_16(OVECTOR_SIZE, NULL);
1378 		mcontext16 = pcre2_match_context_create_16(NULL);
1379 		if (!mdata16_1 || !mdata16_2 || !mcontext16) {
1380 			printf("\n16 bit: Cannot allocate match data\n");
1381 			pcre2_match_data_free_16(mdata16_1);
1382 			pcre2_match_data_free_16(mdata16_2);
1383 			pcre2_match_context_free_16(mcontext16);
1384 			pcre2_code_free_16(re16);
1385 			re16 = NULL;
1386 		} else {
1387 			ovector16_1 = pcre2_get_ovector_pointer_16(mdata16_1);
1388 			ovector16_2 = pcre2_get_ovector_pointer_16(mdata16_2);
1389 			for (i = 0; i < OVECTOR_SIZE * 2; ++i)
1390 				ovector16_1[i] = -2;
1391 			for (i = 0; i < OVECTOR_SIZE * 2; ++i)
1392 				ovector16_2[i] = -2;
1393 		}
1394 		if (re16) {
1395 			if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
1396 				length16 = convert_utf8_to_utf16((PCRE2_SPTR8)current->input, regtest_buf16, regtest_offsetmap16, REGTEST_MAX_LENGTH16);
1397 			else
1398 				length16 = copy_char8_to_char16((PCRE2_SPTR8)current->input, regtest_buf16, REGTEST_MAX_LENGTH16);
1399 
1400 			return_value16[1] = pcre2_match_16(re16, regtest_buf16, length16,
1401 				current->start_offset & OFFSET_MASK, current->match_options, mdata16_2, NULL);
1402 
1403 			if (pcre2_jit_compile_16(re16, jit_compile_mode)) {
1404 				printf("\n16 bit: JIT compiler does not support \"%s\"\n", current->pattern);
1405 			} else if ((counter & 0x1) != 0) {
1406 				setstack16(mcontext16);
1407 				return_value16[0] = pcre2_match_16(re16, regtest_buf16, length16,
1408 					current->start_offset & OFFSET_MASK, current->match_options, mdata16_1, mcontext16);
1409 			} else {
1410 				pcre2_jit_stack_assign_16(mcontext16, NULL, getstack16());
1411 				return_value16[0] = pcre2_jit_match_16(re16, regtest_buf16, length16,
1412 					current->start_offset & OFFSET_MASK, current->match_options, mdata16_1, mcontext16);
1413 			}
1414 		}
1415 #endif
1416 
1417 #ifdef SUPPORT_PCRE2_32
1418 		return_value32[0] = -1000;
1419 		return_value32[1] = -1000;
1420 		mdata32_1 = pcre2_match_data_create_32(OVECTOR_SIZE, NULL);
1421 		mdata32_2 = pcre2_match_data_create_32(OVECTOR_SIZE, NULL);
1422 		mcontext32 = pcre2_match_context_create_32(NULL);
1423 		if (!mdata32_1 || !mdata32_2 || !mcontext32) {
1424 			printf("\n32 bit: Cannot allocate match data\n");
1425 			pcre2_match_data_free_32(mdata32_1);
1426 			pcre2_match_data_free_32(mdata32_2);
1427 			pcre2_match_context_free_32(mcontext32);
1428 			pcre2_code_free_32(re32);
1429 			re32 = NULL;
1430 		} else {
1431 			ovector32_1 = pcre2_get_ovector_pointer_32(mdata32_1);
1432 			ovector32_2 = pcre2_get_ovector_pointer_32(mdata32_2);
1433 			for (i = 0; i < OVECTOR_SIZE * 2; ++i)
1434 				ovector32_1[i] = -2;
1435 			for (i = 0; i < OVECTOR_SIZE * 2; ++i)
1436 				ovector32_2[i] = -2;
1437 		}
1438 		if (re32) {
1439 			if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
1440 				length32 = convert_utf8_to_utf32((PCRE2_SPTR8)current->input, regtest_buf32, regtest_offsetmap32, REGTEST_MAX_LENGTH32);
1441 			else
1442 				length32 = copy_char8_to_char32((PCRE2_SPTR8)current->input, regtest_buf32, REGTEST_MAX_LENGTH32);
1443 
1444 			return_value32[1] = pcre2_match_32(re32, regtest_buf32, length32,
1445 				current->start_offset & OFFSET_MASK, current->match_options, mdata32_2, NULL);
1446 
1447 			if (pcre2_jit_compile_32(re32, jit_compile_mode)) {
1448 				printf("\n32 bit: JIT compiler does not support \"%s\"\n", current->pattern);
1449 			} else if ((counter & 0x1) != 0) {
1450 				setstack32(mcontext32);
1451 				return_value32[0] = pcre2_match_32(re32, regtest_buf32, length32,
1452 					current->start_offset & OFFSET_MASK, current->match_options, mdata32_1, mcontext32);
1453 			} else {
1454 				pcre2_jit_stack_assign_32(mcontext32, NULL, getstack32());
1455 				return_value32[0] = pcre2_jit_match_32(re32, regtest_buf32, length32,
1456 					current->start_offset & OFFSET_MASK, current->match_options, mdata32_1, mcontext32);
1457 			}
1458 		}
1459 #endif
1460 
1461 		/* printf("[%d-%d-%d|%d-%d|%d-%d|%d-%d]%s",
1462 			return_value8[0], return_value16[0], return_value32[0],
1463 			(int)ovector8_1[0], (int)ovector8_1[1],
1464 			(int)ovector16_1[0], (int)ovector16_1[1],
1465 			(int)ovector32_1[0], (int)ovector32_1[1],
1466 			(current->compile_options & PCRE2_CASELESS) ? "C" : ""); */
1467 
1468 		/* If F_DIFF is set, just run the test, but do not compare the results.
1469 		Segfaults can still be captured. */
1470 
1471 		is_successful = 1;
1472 		if (!(current->start_offset & F_DIFF)) {
1473 #if defined SUPPORT_UNICODE && ((defined(SUPPORT_PCRE2_8) + defined(SUPPORT_PCRE2_16) + defined(SUPPORT_PCRE2_32)) >= 2)
1474 			if (!(current->start_offset & F_FORCECONV)) {
1475 
1476 				/* All results must be the same. */
1477 #ifdef SUPPORT_PCRE2_8
1478 				if ((return_value = return_value8[0]) != return_value8[1]) {
1479 					printf("\n8 bit: Return value differs(J8:%d,I8:%d): [%d] '%s' @ '%s'\n",
1480 						return_value8[0], return_value8[1], total, current->pattern, current->input);
1481 					is_successful = 0;
1482 				} else
1483 #endif
1484 #ifdef SUPPORT_PCRE2_16
1485 				if ((return_value = return_value16[0]) != return_value16[1]) {
1486 					printf("\n16 bit: Return value differs(J16:%d,I16:%d): [%d] '%s' @ '%s'\n",
1487 						return_value16[0], return_value16[1], total, current->pattern, current->input);
1488 					is_successful = 0;
1489 				} else
1490 #endif
1491 #ifdef SUPPORT_PCRE2_32
1492 				if ((return_value = return_value32[0]) != return_value32[1]) {
1493 					printf("\n32 bit: Return value differs(J32:%d,I32:%d): [%d] '%s' @ '%s'\n",
1494 						return_value32[0], return_value32[1], total, current->pattern, current->input);
1495 					is_successful = 0;
1496 				} else
1497 #endif
1498 #if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_16
1499 				if (return_value8[0] != return_value16[0]) {
1500 					printf("\n8 and 16 bit: Return value differs(J8:%d,J16:%d): [%d] '%s' @ '%s'\n",
1501 						return_value8[0], return_value16[0],
1502 						total, current->pattern, current->input);
1503 					is_successful = 0;
1504 				} else
1505 #endif
1506 #if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_32
1507 				if (return_value8[0] != return_value32[0]) {
1508 					printf("\n8 and 32 bit: Return value differs(J8:%d,J32:%d): [%d] '%s' @ '%s'\n",
1509 						return_value8[0], return_value32[0],
1510 						total, current->pattern, current->input);
1511 					is_successful = 0;
1512 				} else
1513 #endif
1514 #if defined SUPPORT_PCRE2_16 && defined SUPPORT_PCRE2_32
1515 				if (return_value16[0] != return_value32[0]) {
1516 					printf("\n16 and 32 bit: Return value differs(J16:%d,J32:%d): [%d] '%s' @ '%s'\n",
1517 						return_value16[0], return_value32[0],
1518 						total, current->pattern, current->input);
1519 					is_successful = 0;
1520 				} else
1521 #endif
1522 				if (return_value >= 0 || return_value == PCRE2_ERROR_PARTIAL) {
1523 					if (return_value == PCRE2_ERROR_PARTIAL) {
1524 						return_value = 2;
1525 					} else {
1526 						return_value *= 2;
1527 					}
1528 #ifdef SUPPORT_PCRE2_8
1529 					return_value8[0] = return_value;
1530 #endif
1531 #ifdef SUPPORT_PCRE2_16
1532 					return_value16[0] = return_value;
1533 #endif
1534 #ifdef SUPPORT_PCRE2_32
1535 					return_value32[0] = return_value;
1536 #endif
1537 					/* Transform back the results. */
1538 					if (current->compile_options & PCRE2_UTF) {
1539 #ifdef SUPPORT_PCRE2_16
1540 						for (i = 0; i < return_value; ++i) {
1541 							if (ovector16_1[i] != PCRE2_UNSET)
1542 								ovector16_1[i] = regtest_offsetmap16[ovector16_1[i]];
1543 							if (ovector16_2[i] != PCRE2_UNSET)
1544 								ovector16_2[i] = regtest_offsetmap16[ovector16_2[i]];
1545 						}
1546 #endif
1547 #ifdef SUPPORT_PCRE2_32
1548 						for (i = 0; i < return_value; ++i) {
1549 							if (ovector32_1[i] != PCRE2_UNSET)
1550 								ovector32_1[i] = regtest_offsetmap32[ovector32_1[i]];
1551 							if (ovector32_2[i] != PCRE2_UNSET)
1552 								ovector32_2[i] = regtest_offsetmap32[ovector32_2[i]];
1553 						}
1554 #endif
1555 					}
1556 
1557 					for (i = 0; i < return_value; ++i) {
1558 #if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_16
1559 						if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
1560 							printf("\n8 and 16 bit: Ovector[%d] value differs(J8:%d,I8:%d,J16:%d,I16:%d): [%d] '%s' @ '%s' \n",
1561 								i, (int)ovector8_1[i], (int)ovector8_2[i], (int)ovector16_1[i], (int)ovector16_2[i],
1562 								total, current->pattern, current->input);
1563 							is_successful = 0;
1564 						}
1565 #endif
1566 #if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_32
1567 						if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector32_1[i] || ovector8_1[i] != ovector32_2[i]) {
1568 							printf("\n8 and 32 bit: Ovector[%d] value differs(J8:%d,I8:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
1569 								i, (int)ovector8_1[i], (int)ovector8_2[i], (int)ovector32_1[i], (int)ovector32_2[i],
1570 								total, current->pattern, current->input);
1571 							is_successful = 0;
1572 						}
1573 #endif
1574 #if defined SUPPORT_PCRE2_16 && defined SUPPORT_PCRE2_32
1575 						if (ovector16_1[i] != ovector16_2[i] || ovector16_1[i] != ovector32_1[i] || ovector16_1[i] != ovector32_2[i]) {
1576 							printf("\n16 and 32 bit: Ovector[%d] value differs(J16:%d,I16:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
1577 								i, (int)ovector16_1[i], (int)ovector16_2[i], (int)ovector32_1[i], (int)ovector32_2[i],
1578 								total, current->pattern, current->input);
1579 							is_successful = 0;
1580 						}
1581 #endif
1582 					}
1583 				}
1584 			} else
1585 #endif /* more than one of SUPPORT_PCRE2_8, SUPPORT_PCRE2_16 and SUPPORT_PCRE2_32 */
1586 			{
1587 #ifdef SUPPORT_PCRE2_8
1588 				if (return_value8[0] != return_value8[1]) {
1589 					printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1590 						return_value8[0], return_value8[1], total, current->pattern, current->input);
1591 					is_successful = 0;
1592 				} else if (return_value8[0] >= 0 || return_value8[0] == PCRE2_ERROR_PARTIAL) {
1593 					if (return_value8[0] == PCRE2_ERROR_PARTIAL)
1594 						return_value8[0] = 2;
1595 					else
1596 						return_value8[0] *= 2;
1597 
1598 					for (i = 0; i < return_value8[0]; ++i)
1599 						if (ovector8_1[i] != ovector8_2[i]) {
1600 							printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1601 								i, (int)ovector8_1[i], (int)ovector8_2[i], total, current->pattern, current->input);
1602 							is_successful = 0;
1603 						}
1604 				}
1605 #endif
1606 
1607 #ifdef SUPPORT_PCRE2_16
1608 				if (return_value16[0] != return_value16[1]) {
1609 					printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1610 						return_value16[0], return_value16[1], total, current->pattern, current->input);
1611 					is_successful = 0;
1612 				} else if (return_value16[0] >= 0 || return_value16[0] == PCRE2_ERROR_PARTIAL) {
1613 					if (return_value16[0] == PCRE2_ERROR_PARTIAL)
1614 						return_value16[0] = 2;
1615 					else
1616 						return_value16[0] *= 2;
1617 
1618 					for (i = 0; i < return_value16[0]; ++i)
1619 						if (ovector16_1[i] != ovector16_2[i]) {
1620 							printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1621 								i, (int)ovector16_1[i], (int)ovector16_2[i], total, current->pattern, current->input);
1622 							is_successful = 0;
1623 						}
1624 				}
1625 #endif
1626 
1627 #ifdef SUPPORT_PCRE2_32
1628 				if (return_value32[0] != return_value32[1]) {
1629 					printf("\n32 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1630 						return_value32[0], return_value32[1], total, current->pattern, current->input);
1631 					is_successful = 0;
1632 				} else if (return_value32[0] >= 0 || return_value32[0] == PCRE2_ERROR_PARTIAL) {
1633 					if (return_value32[0] == PCRE2_ERROR_PARTIAL)
1634 						return_value32[0] = 2;
1635 					else
1636 						return_value32[0] *= 2;
1637 
1638 					for (i = 0; i < return_value32[0]; ++i)
1639 						if (ovector32_1[i] != ovector32_2[i]) {
1640 							printf("\n32 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1641 								i, (int)ovector32_1[i], (int)ovector32_2[i], total, current->pattern, current->input);
1642 							is_successful = 0;
1643 						}
1644 				}
1645 #endif
1646 			}
1647 		}
1648 
1649 		if (is_successful) {
1650 #ifdef SUPPORT_PCRE2_8
1651 			if (!(current->start_offset & F_NO8) && (utf || is_ascii)) {
1652 				if (return_value8[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1653 					printf("8 bit: Test should match: [%d] '%s' @ '%s'\n",
1654 						total, current->pattern, current->input);
1655 					is_successful = 0;
1656 				}
1657 
1658 				if (return_value8[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1659 					printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n",
1660 						total, current->pattern, current->input);
1661 					is_successful = 0;
1662 				}
1663 			}
1664 #endif
1665 #ifdef SUPPORT_PCRE2_16
1666 			if (!(current->start_offset & F_NO16) && (utf || is_ascii)) {
1667 				if (return_value16[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1668 					printf("16 bit: Test should match: [%d] '%s' @ '%s'\n",
1669 						total, current->pattern, current->input);
1670 					is_successful = 0;
1671 				}
1672 
1673 				if (return_value16[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1674 					printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n",
1675 						total, current->pattern, current->input);
1676 					is_successful = 0;
1677 				}
1678 			}
1679 #endif
1680 #ifdef SUPPORT_PCRE2_32
1681 			if (!(current->start_offset & F_NO32) && (utf || is_ascii)) {
1682 				if (return_value32[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1683 					printf("32 bit: Test should match: [%d] '%s' @ '%s'\n",
1684 						total, current->pattern, current->input);
1685 					is_successful = 0;
1686 				}
1687 
1688 				if (return_value32[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1689 					printf("32 bit: Test should not match: [%d] '%s' @ '%s'\n",
1690 						total, current->pattern, current->input);
1691 					is_successful = 0;
1692 				}
1693 			}
1694 #endif
1695 		}
1696 
1697 		if (is_successful) {
1698 #ifdef SUPPORT_PCRE2_8
1699 			if (re8 && !(current->start_offset & F_NO8) && pcre2_get_mark_8(mdata8_1) != pcre2_get_mark_8(mdata8_2)) {
1700 				printf("8 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1701 					total, current->pattern, current->input);
1702 				is_successful = 0;
1703 			}
1704 #endif
1705 #ifdef SUPPORT_PCRE2_16
1706 			if (re16 && !(current->start_offset & F_NO16) && pcre2_get_mark_16(mdata16_1) != pcre2_get_mark_16(mdata16_2)) {
1707 				printf("16 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1708 					total, current->pattern, current->input);
1709 				is_successful = 0;
1710 			}
1711 #endif
1712 #ifdef SUPPORT_PCRE2_32
1713 			if (re32 && !(current->start_offset & F_NO32) && pcre2_get_mark_32(mdata32_1) != pcre2_get_mark_32(mdata32_2)) {
1714 				printf("32 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1715 					total, current->pattern, current->input);
1716 				is_successful = 0;
1717 			}
1718 #endif
1719 		}
1720 
1721 #ifdef SUPPORT_PCRE2_8
1722 		pcre2_code_free_8(re8);
1723 		pcre2_match_data_free_8(mdata8_1);
1724 		pcre2_match_data_free_8(mdata8_2);
1725 		pcre2_match_context_free_8(mcontext8);
1726 #endif
1727 #ifdef SUPPORT_PCRE2_16
1728 		pcre2_code_free_16(re16);
1729 		pcre2_match_data_free_16(mdata16_1);
1730 		pcre2_match_data_free_16(mdata16_2);
1731 		pcre2_match_context_free_16(mcontext16);
1732 #endif
1733 #ifdef SUPPORT_PCRE2_32
1734 		pcre2_code_free_32(re32);
1735 		pcre2_match_data_free_32(mdata32_1);
1736 		pcre2_match_data_free_32(mdata32_2);
1737 		pcre2_match_context_free_32(mcontext32);
1738 #endif
1739 
1740 		if (is_successful) {
1741 			successful++;
1742 			successful_row++;
1743 			printf(".");
1744 			if (successful_row >= 60) {
1745 				successful_row = 0;
1746 				printf("\n");
1747 			}
1748 		} else
1749 			successful_row = 0;
1750 
1751 		fflush(stdout);
1752 		current++;
1753 	}
1754 #ifdef SUPPORT_PCRE2_8
1755 	setstack8(NULL);
1756 #endif
1757 #ifdef SUPPORT_PCRE2_16
1758 	setstack16(NULL);
1759 #endif
1760 #ifdef SUPPORT_PCRE2_32
1761 	setstack32(NULL);
1762 #endif
1763 
1764 	if (total == successful) {
1765 		printf("\nAll JIT regression tests are successfully passed.\n");
1766 		return 0;
1767 	} else {
1768 		printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
1769 		return 1;
1770 	}
1771 }
1772 
1773 #if defined SUPPORT_UNICODE && (defined SUPPORT_PCRE2_8 || defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32)
1774 
check_invalid_utf_result(int pattern_index,const char * type,int result,int match_start,int match_end,PCRE2_SIZE * ovector)1775 static int check_invalid_utf_result(int pattern_index, const char *type, int result,
1776 	int match_start, int match_end, PCRE2_SIZE *ovector)
1777 {
1778 	if (match_start < 0) {
1779 		if (result != -1) {
1780 			printf("Pattern[%d] %s result is not -1.\n", pattern_index, type);
1781 			return 1;
1782 		}
1783 		return 0;
1784 	}
1785 
1786 	if (result <= 0) {
1787 		printf("Pattern[%d] %s result (%d) is not greater than 0.\n", pattern_index, type, result);
1788 		return 1;
1789 	}
1790 
1791 	if (ovector[0] != (PCRE2_SIZE)match_start) {
1792 		printf("Pattern[%d] %s ovector[0] is unexpected (%d instead of %d)\n",
1793 			pattern_index, type, (int)ovector[0], match_start);
1794 		return 1;
1795 	}
1796 
1797 	if (ovector[1] != (PCRE2_SIZE)match_end) {
1798 		printf("Pattern[%d] %s ovector[1] is unexpected (%d instead of %d)\n",
1799 			pattern_index, type, (int)ovector[1], match_end);
1800 		return 1;
1801 	}
1802 
1803 	return 0;
1804 }
1805 
1806 #endif /* SUPPORT_UNICODE && (SUPPORT_PCRE2_8 || SUPPORT_PCRE2_16 || SUPPORT_PCRE2_32) */
1807 
1808 #if defined SUPPORT_UNICODE && defined SUPPORT_PCRE2_8
1809 
1810 #define UDA (PCRE2_UTF | PCRE2_DOTALL | PCRE2_ANCHORED)
1811 #define CI (PCRE2_JIT_COMPLETE | PCRE2_JIT_INVALID_UTF)
1812 #define CPI (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_INVALID_UTF)
1813 
1814 struct invalid_utf8_regression_test_case {
1815 	int compile_options;
1816 	int jit_compile_options;
1817 	int start_offset;
1818 	int skip_left;
1819 	int skip_right;
1820 	int match_start;
1821 	int match_end;
1822 	const char *pattern[2];
1823 	const char *input;
1824 };
1825 
1826 static struct invalid_utf8_regression_test_case invalid_utf8_regression_test_cases[] = {
1827 	{ UDA, CI, 0, 0, 0, 0, 4, { ".", NULL }, "\xf4\x8f\xbf\xbf" },
1828 	{ UDA, CI, 0, 0, 0, 0, 4, { ".", NULL }, "\xf0\x90\x80\x80" },
1829 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf4\x90\x80\x80" },
1830 	{ UDA, CI, 0, 0, 1, -1, -1, { ".", NULL }, "\xf4\x8f\xbf\xbf" },
1831 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf0\x90\x80\x7f" },
1832 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf0\x90\x80\xc0" },
1833 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf0\x8f\xbf\xbf" },
1834 	{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xef\xbf\xbf#" },
1835 	{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xef\xbf\xbf" },
1836 	{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xe0\xa0\x80#" },
1837 	{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xe0\xa0\x80" },
1838 	{ UDA, CI, 0, 0, 2, -1, -1, { ".", NULL }, "\xef\xbf\xbf#" },
1839 	{ UDA, CI, 0, 0, 1, -1, -1, { ".", NULL }, "\xef\xbf\xbf" },
1840 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xef\xbf\x7f#" },
1841 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xef\xbf\xc0" },
1842 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x9f\xbf#" },
1843 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x9f\xbf" },
1844 	{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xed\x9f\xbf#" },
1845 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xed\xa0\x80#" },
1846 	{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xee\x80\x80#" },
1847 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xed\xbf\xbf#" },
1848 	{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xdf\xbf##" },
1849 	{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xdf\xbf#" },
1850 	{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xdf\xbf" },
1851 	{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xc2\x80##" },
1852 	{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xc2\x80#" },
1853 	{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xc2\x80" },
1854 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x80##" },
1855 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xdf\xc0##" },
1856 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x80" },
1857 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xdf\xc0" },
1858 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xc1\xbf##" },
1859 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xc1\xbf" },
1860 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\x80###" },
1861 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\x80" },
1862 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf8###" },
1863 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf8" },
1864 	{ UDA, CI, 0, 0, 0, 0, 1, { ".", NULL }, "\x7f" },
1865 
1866 	{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "\xf4\x8f\xbf\xbf#" },
1867 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\xa0\x80\x80#" },
1868 	{ UDA, CPI, 4, 1, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xbf\xbf#" },
1869 	{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "#\xef\xbf\xbf#" },
1870 	{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "#\xe0\xa0\x80#" },
1871 	{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "\xf0\x90\x80\x80#" },
1872 	{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "\xf3\xbf\xbf\xbf#" },
1873 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf0\x8f\xbf\xbf#" },
1874 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf5\x80\x80\x80#" },
1875 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x90\x80\x80#" },
1876 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xbf\xff#" },
1877 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xff\xbf#" },
1878 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xef\x80\x80\x80#" },
1879 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\x80\x80\x80\x80#" },
1880 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xe0\x9f\xbf#" },
1881 	{ UDA, CPI, 4, 2, 0, -1, -1, { "\\B", "\\b" }, "#\xe0\xa0\x80#" },
1882 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xf0\x80\x80#" },
1883 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xed\xa0\x80#" },
1884 	{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "##\xdf\xbf#" },
1885 	{ UDA, CPI, 4, 2, 0, 2, 2, { "\\B", NULL }, "##\xdf\xbf#" },
1886 	{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "##\xc2\x80#" },
1887 	{ UDA, CPI, 4, 2, 0, 2, 2, { "\\B", NULL }, "##\xc2\x80#" },
1888 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xc1\xbf#" },
1889 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xdf\xc0#" },
1890 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xe0\x80#" },
1891 	{ UDA, CPI, 4, 2, 0, -1, -1, { "\\B", "\\b" }, "##\xe0\x80#" },
1892 
1893 	{ UDA, CPI, 3, 0, 0, 3, 3, { "\\B", NULL }, "\xef\xbf\xbf#" },
1894 	{ UDA, CPI, 3, 0, 0, 3, 3, { "\\B", NULL }, "\xe0\xa0\x80#" },
1895 	{ UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xe0\x9f\xbf#" },
1896 	{ UDA, CPI, 3, 1, 0, -1, -1, { "\\B", "\\b" }, "\xef\xbf\xbf#" },
1897 	{ UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xdf\x80\x80#" },
1898 	{ UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xef\xbf\xff#" },
1899 	{ UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xef\xff\xbf#" },
1900 	{ UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xed\xbf\xbf#" },
1901 
1902 	{ UDA, CPI, 2, 0, 0, 2, 2, { "\\B", NULL }, "\xdf\xbf#" },
1903 	{ UDA, CPI, 2, 0, 0, 2, 2, { "\\B", NULL }, "\xc2\x80#" },
1904 	{ UDA, CPI, 2, 1, 0, -1, -1, { "\\B", "\\b" }, "\xdf\xbf#" },
1905 	{ UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xc1\xbf#" },
1906 	{ UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xe0\x80#" },
1907 	{ UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xdf\xff#" },
1908 	{ UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xff\xbf#" },
1909 
1910 	{ UDA, CPI, 1, 0, 0, 1, 1, { "\\B", NULL }, "\x7f#" },
1911 	{ UDA, CPI, 1, 0, 0, 1, 1, { "\\B", NULL }, "\x01#" },
1912 	{ UDA, CPI, 1, 0, 0, -1, -1, { "\\B", "\\b" }, "\x80#" },
1913 	{ UDA, CPI, 1, 0, 0, -1, -1, { "\\B", "\\b" }, "\x80#" },
1914 
1915 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 2, { "(.)\\1", NULL }, "aA" },
1916 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, -1, -1, { "(.)\\1", NULL }, "a\xff" },
1917 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 4, { "(.)\\1", NULL }, "\xc3\xa1\xc3\x81" },
1918 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { "(.)\\1", NULL }, "\xc3\xa1\xc3\x81" },
1919 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, -1, -1, { "(.)\\1", NULL }, "\xc2\x80\x80" },
1920 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 6, { "(.)\\1", NULL }, "\xe1\xbd\xb8\xe1\xbf\xb8" },
1921 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { "(.)\\1", NULL }, "\xe1\xbd\xb8\xe1\xbf\xb8" },
1922 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 8, { "(.)\\1", NULL }, "\xf0\x90\x90\x80\xf0\x90\x90\xa8" },
1923 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { "(.)\\1", NULL }, "\xf0\x90\x90\x80\xf0\x90\x90\xa8" },
1924 
1925 	{ UDA, CPI, 0, 0, 0, 0, 1, { "\\X", NULL }, "A" },
1926 	{ UDA, CPI, 0, 0, 0, -1, -1, { "\\X", NULL }, "\xff" },
1927 	{ UDA, CPI, 0, 0, 0, 0, 2, { "\\X", NULL }, "\xc3\xa1" },
1928 	{ UDA, CPI, 0, 0, 1, -1, -1, { "\\X", NULL }, "\xc3\xa1" },
1929 	{ UDA, CPI, 0, 0, 0, -1, -1, { "\\X", NULL }, "\xc3\x7f" },
1930 	{ UDA, CPI, 0, 0, 0, 0, 3, { "\\X", NULL }, "\xe1\xbd\xb8" },
1931 	{ UDA, CPI, 0, 0, 1, -1, -1, { "\\X", NULL }, "\xe1\xbd\xb8" },
1932 	{ UDA, CPI, 0, 0, 0, 0, 4, { "\\X", NULL }, "\xf0\x90\x90\x80" },
1933 	{ UDA, CPI, 0, 0, 1, -1, -1, { "\\X", NULL }, "\xf0\x90\x90\x80" },
1934 
1935 	{ UDA, CPI, 0, 0, 0, -1, -1, { "[^#]", NULL }, "#" },
1936 	{ UDA, CPI, 0, 0, 0, 0, 4, { "[^#]", NULL }, "\xf4\x8f\xbf\xbf" },
1937 	{ UDA, CPI, 0, 0, 0, -1, -1, { "[^#]", NULL }, "\xf4\x90\x80\x80" },
1938 	{ UDA, CPI, 0, 0, 0, -1, -1, { "[^#]", NULL }, "\xc1\x80" },
1939 
1940 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 2, 3, { "^\\W", NULL }, " \x0a#"},
1941 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 14, 15, { "^\\W", NULL }, " \xc0\x8a#\xe0\x80\x8a#\xf0\x80\x80\x8a#\x0a#"},
1942 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xf8\x0a#"},
1943 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xc3\x0a#"},
1944 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xf1\x0a#"},
1945 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 4, 5, { "^\\W", NULL }, " \xf2\xbf\x0a#"},
1946 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 5, 6, { "^\\W", NULL }, " \xf2\xbf\xbf\x0a#"},
1947 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xef\x0a#"},
1948 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 4, 5, { "^\\W", NULL }, " \xef\xbf\x0a#"},
1949 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 5, 6, { "^\\W", NULL }, " \x85#\xc2\x85#"},
1950 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 7, 8, { "^\\W", NULL }, " \xe2\x80\xf8\xe2\x80\xa8#"},
1951 
1952 	{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, -1, -1, { "#", NULL }, "\xe2\x80\xf8\xe2\x80\xa8#"},
1953 	{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, 3, 4, { "#", NULL }, "\xe2\x80\xf8#\xe2\x80\xa8#"},
1954 	{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, -1, -1, { "#", NULL }, "abcd\xc2\x85#"},
1955 	{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, 1, 2, { "#", NULL }, "\x85#\xc2\x85#"},
1956 	{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, 5, 6, { "#", NULL }, "\xef,\x80,\xf8#\x0a"},
1957 	{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, -1, -1, { "#", NULL }, "\xef,\x80,\xf8\x0a#"},
1958 
1959 	{ PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 4, 8, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7#\xc7\x85#" },
1960 	{ PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 7, 11, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7\x80\x80\x80#\xc7\x85#" },
1961 	{ PCRE2_UTF, CI, 0, 0, 0, 4, 8, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7#\xc7\x85#" },
1962 	{ PCRE2_UTF, CI, 0, 0, 0, 7, 11, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7\x80\x80\x80#\xc7\x85#" },
1963 
1964 	/* These two are not invalid UTF tests, but this infrastructure fits better for them. */
1965 	{ 0, PCRE2_JIT_COMPLETE, 0, 0, 1, -1, -1, { "\\X{2}", NULL }, "\r\n\n" },
1966 	{ 0, PCRE2_JIT_COMPLETE, 0, 0, 1, -1, -1, { "\\R{2}", NULL }, "\r\n\n" },
1967 
1968 	{ 0, 0, 0, 0, 0, 0, 0, { NULL, NULL }, NULL }
1969 };
1970 
1971 #undef UDA
1972 #undef CI
1973 #undef CPI
1974 
run_invalid_utf8_test(struct invalid_utf8_regression_test_case * current,int pattern_index,int i,pcre2_compile_context_8 * ccontext,pcre2_match_data_8 * mdata)1975 static int run_invalid_utf8_test(struct invalid_utf8_regression_test_case *current,
1976 	int pattern_index, int i, pcre2_compile_context_8 *ccontext, pcre2_match_data_8 *mdata)
1977 {
1978 	pcre2_code_8 *code;
1979 	int result, errorcode;
1980 	PCRE2_SIZE length, erroroffset;
1981 	PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_8(mdata);
1982 
1983 	if (current->pattern[i] == NULL)
1984 		return 1;
1985 
1986 	code = pcre2_compile_8((PCRE2_UCHAR8*)current->pattern[i], PCRE2_ZERO_TERMINATED,
1987 		current->compile_options, &errorcode, &erroroffset, ccontext);
1988 
1989 	if (!code) {
1990 		printf("Pattern[%d:0] cannot be compiled. Error offset: %d\n", pattern_index, (int)erroroffset);
1991 		return 0;
1992 	}
1993 
1994 	if (pcre2_jit_compile_8(code, current->jit_compile_options) != 0) {
1995 		printf("Pattern[%d:0] cannot be compiled by the JIT compiler.\n", pattern_index);
1996 		pcre2_code_free_8(code);
1997 		return 0;
1998 	}
1999 
2000 	length = (PCRE2_SIZE)(strlen(current->input) - current->skip_left - current->skip_right);
2001 
2002 	if (current->jit_compile_options & PCRE2_JIT_COMPLETE) {
2003 		result = pcre2_jit_match_8(code, (PCRE2_UCHAR8*)(current->input + current->skip_left),
2004 			length, current->start_offset - current->skip_left, 0, mdata, NULL);
2005 
2006 		if (check_invalid_utf_result(pattern_index, "match", result, current->match_start, current->match_end, ovector)) {
2007 			pcre2_code_free_8(code);
2008 			return 0;
2009 		}
2010 	}
2011 
2012 	if (current->jit_compile_options & PCRE2_JIT_PARTIAL_SOFT) {
2013 		result = pcre2_jit_match_8(code, (PCRE2_UCHAR8*)(current->input + current->skip_left),
2014 			length, current->start_offset - current->skip_left, PCRE2_PARTIAL_SOFT, mdata, NULL);
2015 
2016 		if (check_invalid_utf_result(pattern_index, "partial match", result, current->match_start, current->match_end, ovector)) {
2017 			pcre2_code_free_8(code);
2018 			return 0;
2019 		}
2020 	}
2021 
2022 	pcre2_code_free_8(code);
2023 	return 1;
2024 }
2025 
invalid_utf8_regression_tests(void)2026 static int invalid_utf8_regression_tests(void)
2027 {
2028 	struct invalid_utf8_regression_test_case *current;
2029 	pcre2_compile_context_8 *ccontext;
2030 	pcre2_match_data_8 *mdata;
2031 	int total = 0, successful = 0;
2032 	int result;
2033 
2034 	printf("\nRunning invalid-utf8 JIT regression tests\n");
2035 
2036 	ccontext = pcre2_compile_context_create_8(NULL);
2037 	pcre2_set_newline_8(ccontext, PCRE2_NEWLINE_ANY);
2038 	mdata = pcre2_match_data_create_8(4, NULL);
2039 
2040 	for (current = invalid_utf8_regression_test_cases; current->pattern[0]; current++) {
2041 		/* printf("\nPattern: %s :\n", current->pattern); */
2042 		total++;
2043 
2044 		result = 1;
2045 		if (!run_invalid_utf8_test(current, total - 1, 0, ccontext, mdata))
2046 			result = 0;
2047 		if (!run_invalid_utf8_test(current, total - 1, 1, ccontext, mdata))
2048 			result = 0;
2049 
2050 		if (result) {
2051 			successful++;
2052 		}
2053 
2054 		printf(".");
2055 		if ((total % 60) == 0)
2056 			printf("\n");
2057 	}
2058 
2059 	if ((total % 60) != 0)
2060 		printf("\n");
2061 
2062 	pcre2_match_data_free_8(mdata);
2063 	pcre2_compile_context_free_8(ccontext);
2064 
2065 	if (total == successful) {
2066 		printf("\nAll invalid UTF8 JIT regression tests are successfully passed.\n");
2067 		return 0;
2068 	} else {
2069 		printf("\nInvalid UTF8 successful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
2070 		return 1;
2071 	}
2072 }
2073 
2074 #else /* !SUPPORT_UNICODE || !SUPPORT_PCRE2_8 */
2075 
invalid_utf8_regression_tests(void)2076 static int invalid_utf8_regression_tests(void)
2077 {
2078 	return 0;
2079 }
2080 
2081 #endif /* SUPPORT_UNICODE && SUPPORT_PCRE2_8 */
2082 
2083 #if defined SUPPORT_UNICODE && defined SUPPORT_PCRE2_16
2084 
2085 #define UDA (PCRE2_UTF | PCRE2_DOTALL | PCRE2_ANCHORED)
2086 #define CI (PCRE2_JIT_COMPLETE | PCRE2_JIT_INVALID_UTF)
2087 #define CPI (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_INVALID_UTF)
2088 
2089 struct invalid_utf16_regression_test_case {
2090 	int compile_options;
2091 	int jit_compile_options;
2092 	int start_offset;
2093 	int skip_left;
2094 	int skip_right;
2095 	int match_start;
2096 	int match_end;
2097 	const PCRE2_UCHAR16 *pattern[2];
2098 	const PCRE2_UCHAR16 *input;
2099 };
2100 
2101 static PCRE2_UCHAR16 allany16[] = { '.', 0 };
2102 static PCRE2_UCHAR16 non_word_boundary16[] = { '\\', 'B', 0 };
2103 static PCRE2_UCHAR16 word_boundary16[] = { '\\', 'b', 0 };
2104 static PCRE2_UCHAR16 backreference16[] = { '(', '.', ')', '\\', '1', 0 };
2105 static PCRE2_UCHAR16 grapheme16[] = { '\\', 'X', 0 };
2106 static PCRE2_UCHAR16 nothashmark16[] = { '[', '^', '#', ']', 0 };
2107 static PCRE2_UCHAR16 afternl16[] = { '^', '\\', 'W', 0 };
2108 static PCRE2_UCHAR16 generic16[] = { '#', 0xd800, 0xdc00, '#', 0 };
2109 static PCRE2_UCHAR16 test16_1[] = { 0xd7ff, 0xe000, 0xffff, 0x01, '#', 0 };
2110 static PCRE2_UCHAR16 test16_2[] = { 0xd800, 0xdc00, '#', 0 };
2111 static PCRE2_UCHAR16 test16_3[] = { 0xdbff, 0xdfff, '#', 0 };
2112 static PCRE2_UCHAR16 test16_4[] = { 0xd800, 0xdbff, '#', 0 };
2113 static PCRE2_UCHAR16 test16_5[] = { '#', 0xd800, '#', 0 };
2114 static PCRE2_UCHAR16 test16_6[] = { 'a', 'A', 0xdc28, 0 };
2115 static PCRE2_UCHAR16 test16_7[] = { 0xd801, 0xdc00, 0xd801, 0xdc28, 0 };
2116 static PCRE2_UCHAR16 test16_8[] = { '#', 0xd800, 0xdc00, 0 };
2117 static PCRE2_UCHAR16 test16_9[] = { ' ', 0x2028, '#', 0 };
2118 static PCRE2_UCHAR16 test16_10[] = { ' ', 0xdc00, 0xd800, 0x2028, '#', 0 };
2119 static PCRE2_UCHAR16 test16_11[] = { 0xdc00, 0xdc00, 0xd800, 0xdc00, 0xdc00, '#', 0xd800, 0xdc00, '#', 0 };
2120 static PCRE2_UCHAR16 test16_12[] = { '#', 0xd800, 0xdc00, 0xd800, '#', 0xd800, 0xdc00, 0xdc00, 0xdc00, '#', 0xd800, 0xdc00, '#', 0 };
2121 
2122 static struct invalid_utf16_regression_test_case invalid_utf16_regression_test_cases[] = {
2123 	{ UDA, CI, 0, 0, 0, 0, 1, { allany16, NULL }, test16_1 },
2124 	{ UDA, CI, 1, 0, 0, 1, 2, { allany16, NULL }, test16_1 },
2125 	{ UDA, CI, 2, 0, 0, 2, 3, { allany16, NULL }, test16_1 },
2126 	{ UDA, CI, 3, 0, 0, 3, 4, { allany16, NULL }, test16_1 },
2127 	{ UDA, CI, 0, 0, 0, 0, 2, { allany16, NULL }, test16_2 },
2128 	{ UDA, CI, 0, 0, 2, -1, -1, { allany16, NULL }, test16_2 },
2129 	{ UDA, CI, 1, 0, 0, -1, -1, { allany16, NULL }, test16_2 },
2130 	{ UDA, CI, 0, 0, 0, 0, 2, { allany16, NULL }, test16_3 },
2131 	{ UDA, CI, 0, 0, 2, -1, -1, { allany16, NULL }, test16_3 },
2132 	{ UDA, CI, 1, 0, 0, -1, -1, { allany16, NULL }, test16_3 },
2133 
2134 	{ UDA, CPI, 1, 0, 0, 1, 1, { non_word_boundary16, NULL }, test16_1 },
2135 	{ UDA, CPI, 2, 0, 0, 2, 2, { non_word_boundary16, NULL }, test16_1 },
2136 	{ UDA, CPI, 3, 0, 0, 3, 3, { non_word_boundary16, NULL }, test16_1 },
2137 	{ UDA, CPI, 4, 0, 0, 4, 4, { non_word_boundary16, NULL }, test16_1 },
2138 	{ UDA, CPI, 2, 0, 0, 2, 2, { non_word_boundary16, NULL }, test16_2 },
2139 	{ UDA, CPI, 2, 0, 0, 2, 2, { non_word_boundary16, NULL }, test16_3 },
2140 	{ UDA, CPI, 2, 1, 0, -1, -1, { non_word_boundary16, word_boundary16 }, test16_2 },
2141 	{ UDA, CPI, 2, 1, 0, -1, -1, { non_word_boundary16, word_boundary16 }, test16_3 },
2142 	{ UDA, CPI, 2, 0, 0, -1, -1, { non_word_boundary16, word_boundary16 }, test16_4 },
2143 	{ UDA, CPI, 2, 0, 0, -1, -1, { non_word_boundary16, word_boundary16 }, test16_5 },
2144 
2145 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 2, { backreference16, NULL }, test16_6 },
2146 	{ UDA | PCRE2_CASELESS, CPI, 1, 0, 0, -1, -1, { backreference16, NULL }, test16_6 },
2147 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 4, { backreference16, NULL }, test16_7 },
2148 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { backreference16, NULL }, test16_7 },
2149 
2150 	{ UDA, CPI, 0, 0, 0, 0, 1, { grapheme16, NULL }, test16_6 },
2151 	{ UDA, CPI, 1, 0, 0, 1, 2, { grapheme16, NULL }, test16_6 },
2152 	{ UDA, CPI, 2, 0, 0, -1, -1, { grapheme16, NULL }, test16_6 },
2153 	{ UDA, CPI, 0, 0, 0, 0, 2, { grapheme16, NULL }, test16_7 },
2154 	{ UDA, CPI, 2, 0, 0, 2, 4, { grapheme16, NULL }, test16_7 },
2155 	{ UDA, CPI, 1, 0, 0, -1, -1, { grapheme16, NULL }, test16_7 },
2156 
2157 	{ UDA, CPI, 0, 0, 0, -1, -1, { nothashmark16, NULL }, test16_8 },
2158 	{ UDA, CPI, 1, 0, 0, 1, 3, { nothashmark16, NULL }, test16_8 },
2159 	{ UDA, CPI, 2, 0, 0, -1, -1, { nothashmark16, NULL }, test16_8 },
2160 
2161 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 2, 3, { afternl16, NULL }, test16_9 },
2162 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 4, 5, { afternl16, NULL }, test16_10 },
2163 
2164 	{ PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 5, 9, { generic16, NULL }, test16_11 },
2165 	{ PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 9, 13, { generic16, NULL }, test16_12 },
2166 	{ PCRE2_UTF, CI, 0, 0, 0, 5, 9, { generic16, NULL }, test16_11 },
2167 	{ PCRE2_UTF, CI, 0, 0, 0, 9, 13, { generic16, NULL }, test16_12 },
2168 
2169 	{ 0, 0, 0, 0, 0, 0, 0, { NULL, NULL }, NULL }
2170 };
2171 
2172 #undef UDA
2173 #undef CI
2174 #undef CPI
2175 
run_invalid_utf16_test(struct invalid_utf16_regression_test_case * current,int pattern_index,int i,pcre2_compile_context_16 * ccontext,pcre2_match_data_16 * mdata)2176 static int run_invalid_utf16_test(struct invalid_utf16_regression_test_case *current,
2177 	int pattern_index, int i, pcre2_compile_context_16 *ccontext, pcre2_match_data_16 *mdata)
2178 {
2179 	pcre2_code_16 *code;
2180 	int result, errorcode;
2181 	PCRE2_SIZE length, erroroffset;
2182 	const PCRE2_UCHAR16 *input;
2183 	PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_16(mdata);
2184 
2185 	if (current->pattern[i] == NULL)
2186 		return 1;
2187 
2188 	code = pcre2_compile_16(current->pattern[i], PCRE2_ZERO_TERMINATED,
2189 		current->compile_options, &errorcode, &erroroffset, ccontext);
2190 
2191 	if (!code) {
2192 		printf("Pattern[%d:0] cannot be compiled. Error offset: %d\n", pattern_index, (int)erroroffset);
2193 		return 0;
2194 	}
2195 
2196 	if (pcre2_jit_compile_16(code, current->jit_compile_options) != 0) {
2197 		printf("Pattern[%d:0] cannot be compiled by the JIT compiler.\n", pattern_index);
2198 		pcre2_code_free_16(code);
2199 		return 0;
2200 	}
2201 
2202 	input = current->input;
2203 	length = 0;
2204 
2205 	while (*input++ != 0)
2206 		length++;
2207 
2208 	length -= current->skip_left + current->skip_right;
2209 
2210 	if (current->jit_compile_options & PCRE2_JIT_COMPLETE) {
2211 		result = pcre2_jit_match_16(code, (current->input + current->skip_left),
2212 			length, current->start_offset - current->skip_left, 0, mdata, NULL);
2213 
2214 		if (check_invalid_utf_result(pattern_index, "match", result, current->match_start, current->match_end, ovector)) {
2215 			pcre2_code_free_16(code);
2216 			return 0;
2217 		}
2218 	}
2219 
2220 	if (current->jit_compile_options & PCRE2_JIT_PARTIAL_SOFT) {
2221 		result = pcre2_jit_match_16(code, (current->input + current->skip_left),
2222 			length, current->start_offset - current->skip_left, PCRE2_PARTIAL_SOFT, mdata, NULL);
2223 
2224 		if (check_invalid_utf_result(pattern_index, "partial match", result, current->match_start, current->match_end, ovector)) {
2225 			pcre2_code_free_16(code);
2226 			return 0;
2227 		}
2228 	}
2229 
2230 	pcre2_code_free_16(code);
2231 	return 1;
2232 }
2233 
invalid_utf16_regression_tests(void)2234 static int invalid_utf16_regression_tests(void)
2235 {
2236 	struct invalid_utf16_regression_test_case *current;
2237 	pcre2_compile_context_16 *ccontext;
2238 	pcre2_match_data_16 *mdata;
2239 	int total = 0, successful = 0;
2240 	int result;
2241 
2242 	printf("\nRunning invalid-utf16 JIT regression tests\n");
2243 
2244 	ccontext = pcre2_compile_context_create_16(NULL);
2245 	pcre2_set_newline_16(ccontext, PCRE2_NEWLINE_ANY);
2246 	mdata = pcre2_match_data_create_16(4, NULL);
2247 
2248 	for (current = invalid_utf16_regression_test_cases; current->pattern[0]; current++) {
2249 		/* printf("\nPattern: %s :\n", current->pattern); */
2250 		total++;
2251 
2252 		result = 1;
2253 		if (!run_invalid_utf16_test(current, total - 1, 0, ccontext, mdata))
2254 			result = 0;
2255 		if (!run_invalid_utf16_test(current, total - 1, 1, ccontext, mdata))
2256 			result = 0;
2257 
2258 		if (result) {
2259 			successful++;
2260 		}
2261 
2262 		printf(".");
2263 		if ((total % 60) == 0)
2264 			printf("\n");
2265 	}
2266 
2267 	if ((total % 60) != 0)
2268 		printf("\n");
2269 
2270 	pcre2_match_data_free_16(mdata);
2271 	pcre2_compile_context_free_16(ccontext);
2272 
2273 	if (total == successful) {
2274 		printf("\nAll invalid UTF16 JIT regression tests are successfully passed.\n");
2275 		return 0;
2276 	} else {
2277 		printf("\nInvalid UTF16 successful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
2278 		return 1;
2279 	}
2280 }
2281 
2282 #else /* !SUPPORT_UNICODE || !SUPPORT_PCRE2_16 */
2283 
invalid_utf16_regression_tests(void)2284 static int invalid_utf16_regression_tests(void)
2285 {
2286 	return 0;
2287 }
2288 
2289 #endif /* SUPPORT_UNICODE && SUPPORT_PCRE2_16 */
2290 
2291 #if defined SUPPORT_UNICODE && defined SUPPORT_PCRE2_32
2292 
2293 #define UDA (PCRE2_UTF | PCRE2_DOTALL | PCRE2_ANCHORED)
2294 #define CI (PCRE2_JIT_COMPLETE | PCRE2_JIT_INVALID_UTF)
2295 #define CPI (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_INVALID_UTF)
2296 
2297 struct invalid_utf32_regression_test_case {
2298 	int compile_options;
2299 	int jit_compile_options;
2300 	int start_offset;
2301 	int skip_left;
2302 	int skip_right;
2303 	int match_start;
2304 	int match_end;
2305 	const PCRE2_UCHAR32 *pattern[2];
2306 	const PCRE2_UCHAR32 *input;
2307 };
2308 
2309 static PCRE2_UCHAR32 allany32[] = { '.', 0 };
2310 static PCRE2_UCHAR32 non_word_boundary32[] = { '\\', 'B', 0 };
2311 static PCRE2_UCHAR32 word_boundary32[] = { '\\', 'b', 0 };
2312 static PCRE2_UCHAR32 backreference32[] = { '(', '.', ')', '\\', '1', 0 };
2313 static PCRE2_UCHAR32 grapheme32[] = { '\\', 'X', 0 };
2314 static PCRE2_UCHAR32 nothashmark32[] = { '[', '^', '#', ']', 0 };
2315 static PCRE2_UCHAR32 afternl32[] = { '^', '\\', 'W', 0 };
2316 static PCRE2_UCHAR32 test32_1[] = { 0x10ffff, 0x10ffff, 0x110000, 0x10ffff, 0 };
2317 static PCRE2_UCHAR32 test32_2[] = { 'a', 'A', 0x110000, 0 };
2318 static PCRE2_UCHAR32 test32_3[] = { '#', 0x10ffff, 0x110000, 0 };
2319 static PCRE2_UCHAR32 test32_4[] = { ' ', 0x2028, '#', 0 };
2320 static PCRE2_UCHAR32 test32_5[] = { ' ', 0x110000, 0x2028, '#', 0 };
2321 
2322 static struct invalid_utf32_regression_test_case invalid_utf32_regression_test_cases[] = {
2323 	{ UDA, CI, 0, 0, 0, 0, 1, { allany32, NULL }, test32_1 },
2324 	{ UDA, CI, 2, 0, 0, -1, -1, { allany32, NULL }, test32_1 },
2325 
2326 	{ UDA, CPI, 1, 0, 0, 1, 1, { non_word_boundary32, NULL }, test32_1 },
2327 	{ UDA, CPI, 2, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_1 },
2328 	{ UDA, CPI, 3, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_1 },
2329 
2330 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 2, { backreference32, NULL }, test32_2 },
2331 	{ UDA | PCRE2_CASELESS, CPI, 1, 0, 0, -1, -1, { backreference32, NULL }, test32_2 },
2332 
2333 	{ UDA, CPI, 0, 0, 0, 0, 1, { grapheme32, NULL }, test32_1 },
2334 	{ UDA, CPI, 2, 0, 0, -1, -1, { grapheme32, NULL }, test32_1 },
2335 
2336 	{ UDA, CPI, 0, 0, 0, -1, -1, { nothashmark32, NULL }, test32_3 },
2337 	{ UDA, CPI, 1, 0, 0, 1, 2, { nothashmark32, NULL }, test32_3 },
2338 	{ UDA, CPI, 2, 0, 0, -1, -1, { nothashmark32, NULL }, test32_3 },
2339 
2340 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 2, 3, { afternl32, NULL }, test32_4 },
2341 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { afternl32, NULL }, test32_5 },
2342 
2343 	{ 0, 0, 0, 0, 0, 0, 0, { NULL, NULL }, NULL }
2344 };
2345 
2346 #undef UDA
2347 #undef CI
2348 #undef CPI
2349 
run_invalid_utf32_test(struct invalid_utf32_regression_test_case * current,int pattern_index,int i,pcre2_compile_context_32 * ccontext,pcre2_match_data_32 * mdata)2350 static int run_invalid_utf32_test(struct invalid_utf32_regression_test_case *current,
2351 	int pattern_index, int i, pcre2_compile_context_32 *ccontext, pcre2_match_data_32 *mdata)
2352 {
2353 	pcre2_code_32 *code;
2354 	int result, errorcode;
2355 	PCRE2_SIZE length, erroroffset;
2356 	const PCRE2_UCHAR32 *input;
2357 	PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_32(mdata);
2358 
2359 	if (current->pattern[i] == NULL)
2360 		return 1;
2361 
2362 	code = pcre2_compile_32(current->pattern[i], PCRE2_ZERO_TERMINATED,
2363 		current->compile_options, &errorcode, &erroroffset, ccontext);
2364 
2365 	if (!code) {
2366 		printf("Pattern[%d:0] cannot be compiled. Error offset: %d\n", pattern_index, (int)erroroffset);
2367 		return 0;
2368 	}
2369 
2370 	if (pcre2_jit_compile_32(code, current->jit_compile_options) != 0) {
2371 		printf("Pattern[%d:0] cannot be compiled by the JIT compiler.\n", pattern_index);
2372 		pcre2_code_free_32(code);
2373 		return 0;
2374 	}
2375 
2376 	input = current->input;
2377 	length = 0;
2378 
2379 	while (*input++ != 0)
2380 		length++;
2381 
2382 	length -= current->skip_left + current->skip_right;
2383 
2384 	if (current->jit_compile_options & PCRE2_JIT_COMPLETE) {
2385 		result = pcre2_jit_match_32(code, (current->input + current->skip_left),
2386 			length, current->start_offset - current->skip_left, 0, mdata, NULL);
2387 
2388 		if (check_invalid_utf_result(pattern_index, "match", result, current->match_start, current->match_end, ovector)) {
2389 			pcre2_code_free_32(code);
2390 			return 0;
2391 		}
2392 	}
2393 
2394 	if (current->jit_compile_options & PCRE2_JIT_PARTIAL_SOFT) {
2395 		result = pcre2_jit_match_32(code, (current->input + current->skip_left),
2396 			length, current->start_offset - current->skip_left, PCRE2_PARTIAL_SOFT, mdata, NULL);
2397 
2398 		if (check_invalid_utf_result(pattern_index, "partial match", result, current->match_start, current->match_end, ovector)) {
2399 			pcre2_code_free_32(code);
2400 			return 0;
2401 		}
2402 	}
2403 
2404 	pcre2_code_free_32(code);
2405 	return 1;
2406 }
2407 
invalid_utf32_regression_tests(void)2408 static int invalid_utf32_regression_tests(void)
2409 {
2410 	struct invalid_utf32_regression_test_case *current;
2411 	pcre2_compile_context_32 *ccontext;
2412 	pcre2_match_data_32 *mdata;
2413 	int total = 0, successful = 0;
2414 	int result;
2415 
2416 	printf("\nRunning invalid-utf32 JIT regression tests\n");
2417 
2418 	ccontext = pcre2_compile_context_create_32(NULL);
2419 	pcre2_set_newline_32(ccontext, PCRE2_NEWLINE_ANY);
2420 	mdata = pcre2_match_data_create_32(4, NULL);
2421 
2422 	for (current = invalid_utf32_regression_test_cases; current->pattern[0]; current++) {
2423 		/* printf("\nPattern: %s :\n", current->pattern); */
2424 		total++;
2425 
2426 		result = 1;
2427 		if (!run_invalid_utf32_test(current, total - 1, 0, ccontext, mdata))
2428 			result = 0;
2429 		if (!run_invalid_utf32_test(current, total - 1, 1, ccontext, mdata))
2430 			result = 0;
2431 
2432 		if (result) {
2433 			successful++;
2434 		}
2435 
2436 		printf(".");
2437 		if ((total % 60) == 0)
2438 			printf("\n");
2439 	}
2440 
2441 	if ((total % 60) != 0)
2442 		printf("\n");
2443 
2444 	pcre2_match_data_free_32(mdata);
2445 	pcre2_compile_context_free_32(ccontext);
2446 
2447 	if (total == successful) {
2448 		printf("\nAll invalid UTF32 JIT regression tests are successfully passed.\n");
2449 		return 0;
2450 	} else {
2451 		printf("\nInvalid UTF32 successful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
2452 		return 1;
2453 	}
2454 }
2455 
2456 #else /* !SUPPORT_UNICODE || !SUPPORT_PCRE2_32 */
2457 
invalid_utf32_regression_tests(void)2458 static int invalid_utf32_regression_tests(void)
2459 {
2460 	return 0;
2461 }
2462 
2463 #endif /* SUPPORT_UNICODE && SUPPORT_PCRE2_32 */
2464 
2465 /* End of pcre2_jit_test.c */
2466