• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*************************************************
2 *      Perl-Compatible Regular Expressions       *
3 *************************************************/
4 
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7 
8                        Written by Philip Hazel
9      Original API code Copyright (c) 1997-2012 University of Cambridge
10          New API code Copyright (c) 2016 University of Cambridge
11 
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
15 
16     * Redistributions of source code must retain the above copyright notice,
17       this list of conditions and the following disclaimer.
18 
19     * Redistributions in binary form must reproduce the above copyright
20       notice, this list of conditions and the following disclaimer in the
21       documentation and/or other materials provided with the distribution.
22 
23     * Neither the name of the University of Cambridge nor the names of its
24       contributors may be used to endorse or promote products derived from
25       this software without specific prior written permission.
26 
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
39 */
40 
41 #ifdef HAVE_CONFIG_H
42 #include "config.h"
43 #endif
44 
45 #include <stdio.h>
46 #include <string.h>
47 
48 #define PCRE2_CODE_UNIT_WIDTH 0
49 #include "pcre2.h"
50 
51 /*
52  Letter characters:
53    \xe6\x92\xad = 0x64ad = 25773 (kanji)
54  Non-letter characters:
55    \xc2\xa1 = 0xa1 =  (Inverted Exclamation Mark)
56    \xf3\xa9\xb7\x80 = 0xe9dc0 = 957888
57    \xed\xa0\x80 = 55296 = 0xd800 (Invalid UTF character)
58    \xed\xb0\x80 = 56320 = 0xdc00 (Invalid UTF character)
59  Newlines:
60    \xc2\x85 = 0x85 = 133 (NExt Line = NEL)
61    \xe2\x80\xa8 = 0x2028 = 8232 (Line Separator)
62  Othercase pairs:
63    \xc3\xa9 = 0xe9 = 233 (e')
64       \xc3\x89 = 0xc9 = 201 (E')
65    \xc3\xa1 = 0xe1 = 225 (a')
66       \xc3\x81 = 0xc1 = 193 (A')
67    \x53 = 0x53 = S
68      \x73 = 0x73 = s
69      \xc5\xbf = 0x17f = 383 (long S)
70    \xc8\xba = 0x23a = 570
71       \xe2\xb1\xa5 = 0x2c65 = 11365
72    \xe1\xbd\xb8 = 0x1f78 = 8056
73       \xe1\xbf\xb8 = 0x1ff8 = 8184
74    \xf0\x90\x90\x80 = 0x10400 = 66560
75       \xf0\x90\x90\xa8 = 0x10428 = 66600
76    \xc7\x84 = 0x1c4 = 452
77      \xc7\x85 = 0x1c5 = 453
78      \xc7\x86 = 0x1c6 = 454
79  Caseless sets:
80    ucp_Armenian - \x{531}-\x{556} -> \x{561}-\x{586}
81    ucp_Coptic - \x{2c80}-\x{2ce3} -> caseless: XOR 0x1
82    ucp_Latin - \x{ff21}-\x{ff3a} -> \x{ff41]-\x{ff5a}
83 
84  Mark property:
85    \xcc\x8d = 0x30d = 781
86  Special:
87    \xc2\x80 = 0x80 = 128 (lowest 2 byte character)
88    \xdf\xbf = 0x7ff = 2047 (highest 2 byte character)
89    \xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character)
90    \xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character)
91    \xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character)
92    \xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character)
93 */
94 
95 static int regression_tests(void);
96 static int invalid_utf8_regression_tests(void);
97 static int invalid_utf16_regression_tests(void);
98 static int invalid_utf32_regression_tests(void);
99 
main(void)100 int main(void)
101 {
102 	int jit = 0;
103 #if defined SUPPORT_PCRE2_8
104 	pcre2_config_8(PCRE2_CONFIG_JIT, &jit);
105 #elif defined SUPPORT_PCRE2_16
106 	pcre2_config_16(PCRE2_CONFIG_JIT, &jit);
107 #elif defined SUPPORT_PCRE2_32
108 	pcre2_config_32(PCRE2_CONFIG_JIT, &jit);
109 #endif
110 	if (!jit) {
111 		printf("JIT must be enabled to run pcre2_jit_test\n");
112 		return 1;
113 	}
114 	return regression_tests()
115 		| invalid_utf8_regression_tests()
116 		| invalid_utf16_regression_tests()
117 		| invalid_utf32_regression_tests();
118 }
119 
120 /* --------------------------------------------------------------------------------------- */
121 
122 #if !(defined SUPPORT_PCRE2_8) && !(defined SUPPORT_PCRE2_16) && !(defined SUPPORT_PCRE2_32)
123 #error SUPPORT_PCRE2_8 or SUPPORT_PCRE2_16 or SUPPORT_PCRE2_32 must be defined
124 #endif
125 
126 #define MU	(PCRE2_MULTILINE | PCRE2_UTF)
127 #define MUP	(PCRE2_MULTILINE | PCRE2_UTF | PCRE2_UCP)
128 #define CMU	(PCRE2_CASELESS | PCRE2_MULTILINE | PCRE2_UTF)
129 #define CMUP	(PCRE2_CASELESS | PCRE2_MULTILINE | PCRE2_UTF | PCRE2_UCP)
130 #define M	(PCRE2_MULTILINE)
131 #define MP	(PCRE2_MULTILINE | PCRE2_UCP)
132 #define U	(PCRE2_UTF)
133 #define CM	(PCRE2_CASELESS | PCRE2_MULTILINE)
134 
135 #define BSR(x)	((x) << 16)
136 #define A	PCRE2_NEWLINE_ANYCRLF
137 
138 #define GET_NEWLINE(x)	((x) & 0xffff)
139 #define GET_BSR(x)	((x) >> 16)
140 
141 #define OFFSET_MASK	0x00ffff
142 #define F_NO8		0x010000
143 #define F_NO16		0x020000
144 #define F_NO32		0x020000
145 #define F_NOMATCH	0x040000
146 #define F_DIFF		0x080000
147 #define F_FORCECONV	0x100000
148 #define F_PROPERTY	0x200000
149 
150 struct regression_test_case {
151 	int compile_options;
152 	int newline;
153 	int match_options;
154 	int start_offset;
155 	const char *pattern;
156 	const char *input;
157 };
158 
159 static struct regression_test_case regression_test_cases[] = {
160 	/* Constant strings. */
161 	{ MU, A, 0, 0, "AbC", "AbAbC" },
162 	{ MU, A, 0, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
163 	{ CMU, A, 0, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
164 	{ M, A, 0, 0, "[^a]", "aAbB" },
165 	{ CM, A, 0, 0, "[^m]", "mMnN" },
166 	{ M, A, 0, 0, "a[^b][^#]", "abacd" },
167 	{ CM, A, 0, 0, "A[^B][^E]", "abacd" },
168 	{ CMU, A, 0, 0, "[^x][^#]", "XxBll" },
169 	{ MU, A, 0, 0, "[^a]", "aaa\xc3\xa1#Ab" },
170 	{ CMU, A, 0, 0, "[^A]", "aA\xe6\x92\xad" },
171 	{ MU, A, 0, 0, "\\W(\\W)?\\w", "\r\n+bc" },
172 	{ MU, A, 0, 0, "\\W(\\W)?\\w", "\n\r+bc" },
173 	{ MU, A, 0, 0, "\\W(\\W)?\\w", "\r\r+bc" },
174 	{ MU, A, 0, 0, "\\W(\\W)?\\w", "\n\n+bc" },
175 	{ MU, A, 0, 0, "[axd]", "sAXd" },
176 	{ CMU, A, 0, 0, "[axd]", "sAXd" },
177 	{ CMU, A, 0, 0 | F_NOMATCH, "[^axd]", "DxA" },
178 	{ MU, A, 0, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
179 	{ MU, A, 0, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
180 	{ CMU, A, 0, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
181 	{ MU, A, 0, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
182 	{ MU, A, 0, 0, "[^a]", "\xc2\x80[]" },
183 	{ CMU, A, 0, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
184 	{ CM, A, 0, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
185 	{ PCRE2_CASELESS, 0, 0, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
186 	{ PCRE2_CASELESS, 0, 0, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
187 	{ PCRE2_CASELESS, 0, 0, 0, "a1", "Aa1" },
188 #ifndef NEVER_BACKSLASH_C
189 	{ M, A, 0, 0, "\\Ca", "cda" },
190 	{ CM, A, 0, 0, "\\Ca", "CDA" },
191 	{ M, A, 0, 0 | F_NOMATCH, "\\Cx", "cda" },
192 	{ CM, A, 0, 0 | F_NOMATCH, "\\Cx", "CDA" },
193 #endif /* !NEVER_BACKSLASH_C */
194 	{ CMUP, A, 0, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
195 	{ CMUP, A, 0, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
196 	{ CMUP, A, 0, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
197 	{ CMUP, A, 0, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
198 	{ M, A, 0, 0, "[3-57-9]", "5" },
199 	{ PCRE2_AUTO_CALLOUT, A, 0, 0, "12345678901234567890123456789012345678901234567890123456789012345678901234567890",
200 		"12345678901234567890123456789012345678901234567890123456789012345678901234567890" },
201 
202 	/* Assertions. */
203 	{ MU, A, 0, 0, "\\b[^A]", "A_B#" },
204 	{ M, A, 0, 0 | F_NOMATCH, "\\b\\W", "\n*" },
205 	{ MU, A, 0, 0, "\\B[^,]\\b[^s]\\b", "#X" },
206 	{ MP, A, 0, 0, "\\B", "_\xa1" },
207 	{ MP, A, 0, 0 | F_PROPERTY, "\\b_\\b[,A]\\B", "_," },
208 	{ MUP, A, 0, 0, "\\b", "\xe6\x92\xad!" },
209 	{ MUP, A, 0, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
210 	{ MUP, A, 0, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
211 	{ MUP, A, 0, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
212 	{ MU, A, 0, 0 | F_NOMATCH, "\\b.", "\xcd\xbe" },
213 	{ CMUP, A, 0, 0, "\\By", "\xf0\x90\x90\xa8y" },
214 	{ M, A, 0, 0 | F_NOMATCH, "\\R^", "\n" },
215 	{ M, A, 0, 1 | F_NOMATCH, "^", "\n" },
216 	{ 0, 0, 0, 0, "^ab", "ab" },
217 	{ 0, 0, 0, 0 | F_NOMATCH, "^ab", "aab" },
218 	{ M, PCRE2_NEWLINE_CRLF, 0, 0, "^a", "\r\raa\n\naa\r\naa" },
219 	{ MU, A, 0, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
220 	{ M, PCRE2_NEWLINE_ANY, 0, 0, "^-", "a--b--\x85--" },
221 	{ MU, PCRE2_NEWLINE_ANY, 0, 0, "^-", "a--\xe2\x80\xa8--" },
222 	{ MU, PCRE2_NEWLINE_ANY, 0, 0, "^-", "a--\xc2\x85--" },
223 	{ 0, 0, 0, 0, "ab$", "ab" },
224 	{ 0, 0, 0, 0 | F_NOMATCH, "ab$", "abab\n\n" },
225 	{ PCRE2_DOLLAR_ENDONLY, 0, 0, 0 | F_NOMATCH, "ab$", "abab\r\n" },
226 	{ M, PCRE2_NEWLINE_CRLF, 0, 0, "a$", "\r\raa\n\naa\r\naa" },
227 	{ M, PCRE2_NEWLINE_ANY, 0, 0, "a$", "aaa" },
228 	{ MU, PCRE2_NEWLINE_ANYCRLF, 0, 0, "#$", "#\xc2\x85###\r#" },
229 	{ MU, PCRE2_NEWLINE_ANY, 0, 0, "#$", "#\xe2\x80\xa9" },
230 	{ 0, PCRE2_NEWLINE_ANY, PCRE2_NOTBOL, 0 | F_NOMATCH, "^a", "aa\naa" },
231 	{ M, PCRE2_NEWLINE_ANY, PCRE2_NOTBOL, 0, "^a", "aa\naa" },
232 	{ 0, PCRE2_NEWLINE_ANY, PCRE2_NOTEOL, 0 | F_NOMATCH, "a$", "aa\naa" },
233 	{ 0, PCRE2_NEWLINE_ANY, PCRE2_NOTEOL, 0 | F_NOMATCH, "a$", "aa\r\n" },
234 	{ U | PCRE2_DOLLAR_ENDONLY, PCRE2_NEWLINE_ANY, 0, 0 | F_PROPERTY, "\\p{Any}{2,}$", "aa\r\n" },
235 	{ M, PCRE2_NEWLINE_ANY, PCRE2_NOTEOL, 0, "a$", "aa\naa" },
236 	{ 0, PCRE2_NEWLINE_CR, 0, 0, ".\\Z", "aaa" },
237 	{ U, PCRE2_NEWLINE_CR, 0, 0, "a\\Z", "aaa\r" },
238 	{ 0, PCRE2_NEWLINE_CR, 0, 0, ".\\Z", "aaa\n" },
239 	{ 0, PCRE2_NEWLINE_CRLF, 0, 0, ".\\Z", "aaa\r" },
240 	{ U, PCRE2_NEWLINE_CRLF, 0, 0, ".\\Z", "aaa\n" },
241 	{ 0, PCRE2_NEWLINE_CRLF, 0, 0, ".\\Z", "aaa\r\n" },
242 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa" },
243 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r" },
244 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\n" },
245 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r\n" },
246 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\xe2\x80\xa8" },
247 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa" },
248 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r" },
249 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\n" },
250 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r\n" },
251 	{ U, PCRE2_NEWLINE_ANY, 0, 0, ".\\Z", "aaa\xc2\x85" },
252 	{ U, PCRE2_NEWLINE_ANY, 0, 0, ".\\Z", "aaa\xe2\x80\xa8" },
253 	{ M, A, 0, 0, "\\Aa", "aaa" },
254 	{ M, A, 0, 1 | F_NOMATCH, "\\Aa", "aaa" },
255 	{ M, A, 0, 1, "\\Ga", "aaa" },
256 	{ M, A, 0, 1 | F_NOMATCH, "\\Ga", "aba" },
257 	{ M, A, 0, 0, "a\\z", "aaa" },
258 	{ M, A, 0, 0 | F_NOMATCH, "a\\z", "aab" },
259 
260 	/* Brackets and alternatives. */
261 	{ MU, A, 0, 0, "(ab|bb|cd)", "bacde" },
262 	{ MU, A, 0, 0, "(?:ab|a)(bc|c)", "ababc" },
263 	{ MU, A, 0, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
264 	{ CMU, A, 0, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
265 	{ MU, A, 0, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
266 	{ MU, A, 0, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
267 	{ MU, A, 0, 0, "\xc7\x82|\xc6\x82", "\xf1\x83\x82\x82\xc7\x82\xc7\x83" },
268 	{ MU, A, 0, 0, "=\xc7\x82|#\xc6\x82", "\xf1\x83\x82\x82=\xc7\x82\xc7\x83" },
269 	{ MU, A, 0, 0, "\xc7\x82\xc7\x83|\xc6\x82\xc6\x82", "\xf1\x83\x82\x82\xc7\x82\xc7\x83" },
270 	{ MU, A, 0, 0, "\xc6\x82\xc6\x82|\xc7\x83\xc7\x83|\xc8\x84\xc8\x84", "\xf1\x83\x82\x82\xc8\x84\xc8\x84" },
271 	{ U, A, 0, 0, "\xe1\x81\x80|\xe2\x82\x80|\xe4\x84\x80", "\xdf\xbf\xc2\x80\xe4\x84\x80" },
272 	{ U, A, 0, 0, "(?:\xe1\x81\x80|\xe2\x82\x80|\xe4\x84\x80)#", "\xdf\xbf\xc2\x80#\xe4\x84\x80#" },
273 	{ CM, A, 0, 0, "ab|cd", "CD" },
274 	{ CM, A, 0, 0, "a1277|a1377|bX487", "bx487" },
275 	{ CM, A, 0, 0, "a1277|a1377|bx487", "bX487" },
276 
277 	/* Greedy and non-greedy ? operators. */
278 	{ MU, A, 0, 0, "(?:a)?a", "laab" },
279 	{ CMU, A, 0, 0, "(A)?A", "llaab" },
280 	{ MU, A, 0, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
281 	{ MU, A, 0, 0, "(a)?a", "manm" },
282 	{ CMU, A, 0, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
283 	{ MU, A, 0, 0, "(a|b)?\?d((?:e)?)", "abcde" },
284 	{ MU, A, 0, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
285 
286 	/* Greedy and non-greedy + operators */
287 	{ MU, A, 0, 0, "(aa)+aa", "aaaaaaa" },
288 	{ MU, A, 0, 0, "(aa)+?aa", "aaaaaaa" },
289 	{ MU, A, 0, 0, "(?:aba|ab|a)+l", "ababamababal" },
290 	{ MU, A, 0, 0, "(?:aba|ab|a)+?l", "ababamababal" },
291 	{ MU, A, 0, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
292 	{ MU, A, 0, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
293 	{ MU, A, 0, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
294 	{ MU, A, 0, 0, "(aa|bb){8,1000}", "abaabbaabbaabbaab_aabbaabbaabbaabbaabbaabb_" },
295 
296 	/* Greedy and non-greedy * operators */
297 	{ CMU, A, 0, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
298 	{ MU, A, 0, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
299 	{ MU, A, 0, 0, "(aa|ab)*ab", "aaabaaab" },
300 	{ CMU, A, 0, 0, "(aa|Ab)*?aB", "aaabaaab" },
301 	{ MU, A, 0, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
302 	{ MU, A, 0, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
303 	{ M, A, 0, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
304 	{ M, A, 0, 0, "((?:a|)*){0}a", "a" },
305 
306 	/* Combining ? + * operators */
307 	{ MU, A, 0, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
308 	{ MU, A, 0, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
309 	{ MU, A, 0, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
310 	{ MU, A, 0, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
311 	{ MU, A, 0, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
312 
313 	/* Single character iterators. */
314 	{ MU, A, 0, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
315 	{ MU, A, 0, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
316 	{ MU, A, 0, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
317 	{ MU, A, 0, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
318 	{ MU, A, 0, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
319 	{ MU, A, 0, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
320 	{ MU, A, 0, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
321 	{ MU, A, 0, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
322 	{ MU, A, 0, 0, "(ba{2})+c", "baabaaabacbaabaac" },
323 	{ MU, A, 0, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
324 	{ MU, A, 0, 0, "(a?+[^b])+", "babaacacb" },
325 	{ MU, A, 0, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
326 	{ CMU, A, 0, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
327 	{ CMU, A, 0, 0, "[c-f]+k", "DemmFke" },
328 	{ MU, A, 0, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
329 	{ MU, A, 0, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
330 	{ CMU, A, 0, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
331 	{ CMU, A, 0, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
332 	{ CMU, A, 0, 0, "[ace]{3,}", "AcbDAcEEcEd" },
333 	{ CMU, A, 0, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
334 	{ MU, A, 0, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
335 	{ CMU, A, 0, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
336 	{ MU, A, 0, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
337 	{ MU, A, 0, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
338 	{ MU, A, 0, 0, "\\b\\w+\\B", "x,a_cd" },
339 	{ MUP, A, 0, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
340 	{ CMU, A, 0, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
341 	{ CMUP, A, 0, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" },
342 	{ CMU, A, 0, 0, "[^\xf0\x90\x90\x80]{2,4}@", "\xf0\x90\x90\xa8\xf0\x90\x90\x80###\xf0\x90\x90\x80@@@" },
343 	{ CMU, A, 0, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
344 	{ MU, A, 0, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
345 	{ MU, A, 0, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" },
346 	{ MU, A, 0, 0, "\\d+123", "987654321,01234" },
347 	{ MU, A, 0, 0, "abcd*|\\w+xy", "aaaaa,abxyz" },
348 	{ MU, A, 0, 0, "(?:abc|((?:amc|\\b\\w*xy)))", "aaaaa,abxyz" },
349 	{ MU, A, 0, 0, "a(?R)|([a-z]++)#", ".abcd.abcd#."},
350 	{ MU, A, 0, 0, "a(?R)|([a-z]++)#", ".abcd.mbcd#."},
351 	{ MU, A, 0, 0, ".[ab]*.", "xx" },
352 	{ MU, A, 0, 0, ".[ab]*a", "xxa" },
353 	{ MU, A, 0, 0, ".[ab]?.", "xx" },
354 	{ MU, A, 0, 0, "_[ab]+_*a", "_aa" },
355 	{ MU, A, 0, 0, "#(A+)#\\d+", "#A#A#0" },
356 	{ MU, A, 0, 0, "(?P<size>\\d+)m|M", "4M" },
357 	{ M, PCRE2_NEWLINE_CRLF, 0, 0, "\\n?.+#", "\n,\n,#" },
358 
359 	/* Bracket repeats with limit. */
360 	{ MU, A, 0, 0, "(?:(ab){2}){5}M", "abababababababababababM" },
361 	{ MU, A, 0, 0, "(?:ab|abab){1,5}M", "abababababababababababM" },
362 	{ MU, A, 0, 0, "(?>ab|abab){1,5}M", "abababababababababababM" },
363 	{ MU, A, 0, 0, "(?:ab|abab){1,5}?M", "abababababababababababM" },
364 	{ MU, A, 0, 0, "(?>ab|abab){1,5}?M", "abababababababababababM" },
365 	{ MU, A, 0, 0, "(?:(ab){1,4}?){1,3}?M", "abababababababababababababM" },
366 	{ MU, A, 0, 0, "(?:(ab){1,4}){1,3}abababababababababababM", "ababababababababababababM" },
367 	{ MU, A, 0, 0 | F_NOMATCH, "(?:(ab){1,4}){1,3}abababababababababababM", "abababababababababababM" },
368 	{ MU, A, 0, 0, "(ab){4,6}?M", "abababababababM" },
369 
370 	/* Basic character sets. */
371 	{ MU, A, 0, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
372 	{ MU, A, 0, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
373 	{ MU, A, 0, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
374 	{ MU, A, 0, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
375 	{ MU, A, 0, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
376 	{ MU, A, 0, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
377 	{ MU, A, 0, 0, "x[bcef]+", "xaxdxecbfg" },
378 	{ MU, A, 0, 0, "x[bcdghij]+", "xaxexfxdgbjk" },
379 	{ MU, A, 0, 0, "x[^befg]+", "xbxexacdhg" },
380 	{ MU, A, 0, 0, "x[^bcdl]+", "xlxbxaekmd" },
381 	{ MU, A, 0, 0, "x[^bcdghi]+", "xbxdxgxaefji" },
382 	{ MU, A, 0, 0, "x[B-Fb-f]+", "xaxAxgxbfBFG" },
383 	{ CMU, A, 0, 0, "\\x{e9}+", "#\xf0\x90\x90\xa8\xc3\xa8\xc3\xa9\xc3\x89\xc3\x88" },
384 	{ CMU, A, 0, 0, "[^\\x{e9}]+", "\xc3\xa9#\xf0\x90\x90\xa8\xc3\xa8\xc3\x88\xc3\x89" },
385 	{ MU, A, 0, 0, "[\\x02\\x7e]+", "\xc3\x81\xe1\xbf\xb8\xf0\x90\x90\xa8\x01\x02\x7e\x7f" },
386 	{ MU, A, 0, 0, "[^\\x02\\x7e]+", "\x02\xc3\x81\xe1\xbf\xb8\xf0\x90\x90\xa8\x01\x7f\x7e" },
387 	{ MU, A, 0, 0, "[\\x{81}-\\x{7fe}]+", "#\xe1\xbf\xb8\xf0\x90\x90\xa8\xc2\x80\xc2\x81\xdf\xbe\xdf\xbf" },
388 	{ MU, A, 0, 0, "[^\\x{81}-\\x{7fe}]+", "\xc2\x81#\xe1\xbf\xb8\xf0\x90\x90\xa8\xc2\x80\xdf\xbf\xdf\xbe" },
389 	{ MU, A, 0, 0, "[\\x{801}-\\x{fffe}]+", "#\xc3\xa9\xf0\x90\x90\x80\xe0\xa0\x80\xe0\xa0\x81\xef\xbf\xbe\xef\xbf\xbf" },
390 	{ MU, A, 0, 0, "[^\\x{801}-\\x{fffe}]+", "\xe0\xa0\x81#\xc3\xa9\xf0\x90\x90\x80\xe0\xa0\x80\xef\xbf\xbf\xef\xbf\xbe" },
391 	{ MU, A, 0, 0, "[\\x{10001}-\\x{10fffe}]+", "#\xc3\xa9\xe2\xb1\xa5\xf0\x90\x80\x80\xf0\x90\x80\x81\xf4\x8f\xbf\xbe\xf4\x8f\xbf\xbf" },
392 	{ MU, A, 0, 0, "[^\\x{10001}-\\x{10fffe}]+", "\xf0\x90\x80\x81#\xc3\xa9\xe2\xb1\xa5\xf0\x90\x80\x80\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbe" },
393 	{ CMU, A, 0, 0 | F_NOMATCH, "^[\\x{0100}-\\x{017f}]", " " },
394 
395 	/* Unicode properties. */
396 	{ MUP, A, 0, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
397 	{ MUP, A, 0, 0 | F_PROPERTY, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
398 	{ MUP, A, 0, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
399 	{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}]", "abc" },
400 	{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}]", "abc" },
401 	{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
402 	{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
403 	{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
404 	{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
405 	{ MUP, A, 0, 0 | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
406 	{ MUP, A, 0, 0 | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
407 	{ MUP, A, 0, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
408 	{ CMUP, A, 0, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
409 	{ MUP, A, 0, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
410 	{ MUP, A, 0, 0 | F_PROPERTY, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
411 	{ MU, A, 0, 0 | F_PROPERTY, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
412 	{ CMUP, A, 0, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
413 	{ MUP, A, 0, 0 | F_PROPERTY, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
414 	{ MUP, A, 0, 0 | F_PROPERTY, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
415 	{ PCRE2_UCP, 0, 0, 0 | F_PROPERTY, "[a-b\\s]{2,5}[^a]", "AB  baaa" },
416 	{ MUP, 0, 0, 0 | F_NOMATCH, "[^\\p{Hangul}\\p{Z}]", " " },
417 	{ MUP, 0, 0, 0, "[\\p{Lu}\\P{Latin}]+", "c\xEA\xA4\xAE,A,b" },
418 	{ MUP, 0, 0, 0, "[\\x{a92e}\\p{Lu}\\P{Latin}]+", "c\xEA\xA4\xAE,A,b" },
419 	{ CMUP, 0, 0, 0, "[^S]\\B", "\xe2\x80\x8a" },
420 
421 	/* Possible empty brackets. */
422 	{ MU, A, 0, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
423 	{ MU, A, 0, 0, "(|ab||bc|a)+d", "abcxabcabd" },
424 	{ MU, A, 0, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
425 	{ MU, A, 0, 0, "(|ab||bc|a)*d", "abcxabcabd" },
426 	{ MU, A, 0, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
427 	{ MU, A, 0, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
428 	{ MU, A, 0, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
429 	{ MU, A, 0, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
430 	{ MU, A, 0, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
431 	{ MU, A, 0, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
432 
433 	/* Start offset. */
434 	{ MU, A, 0, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
435 	{ MU, A, 0, 4 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
436 	{ MU, A, 0, 2 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
437 	{ MU, A, 0, 1, "(\\w\\W\\w)+", "ab#d" },
438 
439 	/* Newline. */
440 	{ M, PCRE2_NEWLINE_CRLF, 0, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
441 	{ M, PCRE2_NEWLINE_CR, 0, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
442 	{ M, PCRE2_NEWLINE_CRLF, 0, 0, "\\W{1,3}[^#]", "\r\n##...." },
443 	{ MU, A, PCRE2_NO_UTF_CHECK, 1, "^.a", "\n\x80\nxa" },
444 	{ MU, A, 0, 1, "^", "\r\n" },
445 	{ M, PCRE2_NEWLINE_CRLF, 0, 1 | F_NOMATCH, "^", "\r\n" },
446 	{ M, PCRE2_NEWLINE_CRLF, 0, 1, "^", "\r\na" },
447 
448 	/* Any character except newline or any newline. */
449 	{ 0, PCRE2_NEWLINE_CRLF, 0, 0, ".", "\r" },
450 	{ U, PCRE2_NEWLINE_CRLF, 0, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
451 	{ 0, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
452 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
453 	{ U, PCRE2_NEWLINE_ANY, 0, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
454 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0 | F_NOMATCH, ".(.).", "\xe2\x80\xa8\nb\r" },
455 	{ 0, PCRE2_NEWLINE_ANY, 0, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
456 	{ U, PCRE2_NEWLINE_ANY, 0, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
457 	{ 0, BSR(PCRE2_BSR_ANYCRLF), 0, 0, "\\R", "\r" },
458 	{ 0, BSR(PCRE2_BSR_ANYCRLF), 0, 0, "\\R", "\x85#\r\n#" },
459 	{ U, BSR(PCRE2_BSR_UNICODE), 0, 0, "\\R", "ab\xe2\x80\xa8#c" },
460 	{ U, BSR(PCRE2_BSR_UNICODE), 0, 0, "\\R", "ab\r\nc" },
461 	{ U, PCRE2_NEWLINE_CRLF | BSR(PCRE2_BSR_UNICODE), 0, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
462 	{ MU, A, 0, 0 | F_NOMATCH, "\\R+", "ab" },
463 	{ MU, A, 0, 0, "\\R+", "ab\r\n\r" },
464 	{ MU, A, 0, 0, "\\R*", "ab\r\n\r" },
465 	{ MU, A, 0, 0, "\\R*", "\r\n\r" },
466 	{ MU, A, 0, 0, "\\R{2,4}", "\r\nab\r\r" },
467 	{ MU, A, 0, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
468 	{ MU, A, 0, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
469 	{ MU, A, 0, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
470 	{ MU, A, 0, 0 | F_NOMATCH, "\\R+\\R\\R", "\r\n\r\n" },
471 	{ MU, A, 0, 0, "\\R+\\R\\R", "\r\r\r" },
472 	{ MU, A, 0, 0, "\\R*\\R\\R", "\n\r" },
473 	{ MU, A, 0, 0 | F_NOMATCH, "\\R{2,4}\\R\\R", "\r\r\r" },
474 	{ MU, A, 0, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
475 
476 	/* Atomic groups (no fallback from "next" direction). */
477 	{ MU, A, 0, 0 | F_NOMATCH, "(?>ab)ab", "bab" },
478 	{ MU, A, 0, 0 | F_NOMATCH, "(?>(ab))ab", "bab" },
479 	{ MU, A, 0, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
480 			"bababcdedefgheijijklmlmnop" },
481 	{ MU, A, 0, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
482 	{ MU, A, 0, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
483 	{ MU, A, 0, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
484 	{ MU, A, 0, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
485 	{ MU, A, 0, 0, "((?>a|)+?)b", "aaacaaab" },
486 	{ MU, A, 0, 0, "(?>x|)*$", "aaa" },
487 	{ MU, A, 0, 0, "(?>(x)|)*$", "aaa" },
488 	{ MU, A, 0, 0, "(?>x|())*$", "aaa" },
489 	{ MU, A, 0, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
490 	{ MU, A, 0, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
491 	{ MU, A, 0, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
492 	{ MU, A, 0, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
493 	{ MU, A, 0, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
494 	{ MU, A, 0, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
495 	{ MU, A, 0, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
496 	{ MU, A, 0, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
497 	{ MU, A, 0, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
498 	{ MU, A, 0, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
499 	{ MU, A, 0, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
500 	{ MU, A, 0, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
501 	{ MU, A, 0, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
502 	{ MU, A, 0, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
503 	{ CM, A, 0, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
504 	{ MU, A, 0, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
505 	{ MU, A, 0, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
506 	{ MU, A, 0, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d" },
507 	{ MU, A, 0, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
508 	{ MU, A, 0, 0 | F_PROPERTY, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
509 	{ MU, A, 0, 0 | F_PROPERTY, "\\X{2,4}", "abcdef" },
510 	{ MU, A, 0, 0 | F_PROPERTY, "\\X{2,4}?", "abcdef" },
511 	{ MU, A, 0, 0 | F_NOMATCH | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d##" },
512 	{ MU, A, 0, 0 | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
513 	{ MU, A, 0, 0, "(c(ab)?+ab)+", "cabcababcab" },
514 	{ MU, A, 0, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
515 
516 	/* Possessive quantifiers. */
517 	{ MU, A, 0, 0, "(?:a|b)++m", "mababbaaxababbaam" },
518 	{ MU, A, 0, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
519 	{ MU, A, 0, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
520 	{ MU, A, 0, 0, "(a|b)++m", "mababbaaxababbaam" },
521 	{ MU, A, 0, 0, "(a|b)*+m", "mababbaaxababbaam" },
522 	{ MU, A, 0, 0, "(a|b)*+m", "ababbaaxababbaam" },
523 	{ MU, A, 0, 0, "(a|b(*ACCEPT))++m", "maaxab" },
524 	{ MU, A, 0, 0, "(?:b*)++m", "bxbbxbbbxm" },
525 	{ MU, A, 0, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
526 	{ MU, A, 0, 0, "(?:b*)*+m", "bxbbxbbbxm" },
527 	{ MU, A, 0, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
528 	{ MU, A, 0, 0, "(b*)++m", "bxbbxbbbxm" },
529 	{ MU, A, 0, 0, "(b*)++m", "bxbbxbbbxbbm" },
530 	{ MU, A, 0, 0, "(b*)*+m", "bxbbxbbbxm" },
531 	{ MU, A, 0, 0, "(b*)*+m", "bxbbxbbbxbbm" },
532 	{ MU, A, 0, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
533 	{ MU, A, 0, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
534 	{ MU, A, 0, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
535 	{ MU, A, 0, 0, "(a|(b))++m", "mababbaaxababbaam" },
536 	{ MU, A, 0, 0, "((a)|b)*+m", "mababbaaxababbaam" },
537 	{ MU, A, 0, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
538 	{ MU, A, 0, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
539 	{ MU, A, 0, 0, "(?:(b*))++m", "bxbbxbbbxm" },
540 	{ MU, A, 0, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
541 	{ MU, A, 0, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
542 	{ MU, A, 0, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
543 	{ MU, A, 0, 0, "((b*))++m", "bxbbxbbbxm" },
544 	{ MU, A, 0, 0, "((b*))++m", "bxbbxbbbxbbm" },
545 	{ MU, A, 0, 0, "((b*))*+m", "bxbbxbbbxm" },
546 	{ MU, A, 0, 0, "((b*))*+m", "bxbbxbbbxbbm" },
547 	{ MU, A, 0, 0 | F_NOMATCH, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
548 	{ MU, A, 0, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
549 	{ MU, A, 0, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
550 	{ MU, A, 0, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
551 	{ MU, A, 0, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
552 
553 	/* Back references. */
554 	{ MU, A, 0, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
555 	{ CMU, A, 0, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
556 	{ CM, A, 0, 0, "(a{2,4})\\1", "AaAaaAaA" },
557 	{ MU, A, 0, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
558 	{ MU, A, 0, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
559 	{ MU, A, 0, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
560 	{ MU, A, 0, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
561 	{ MU, A, 0, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
562 	{ MU, A, 0, 0, "(?:(aa)|b)\\1?b", "bb" },
563 	{ CMU, A, 0, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
564 	{ MU, A, 0, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
565 	{ CMU, A, 0, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
566 	{ MU, A, 0, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
567 	{ CM, A, 0, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
568 	{ MU, A, 0, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
569 	{ MU, A, 0, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
570 	{ M, A, 0, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
571 	{ MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
572 	{ MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{0,2}", "wwwww." },
573 	{ MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwww" },
574 	{ MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwwww" },
575 	{ PCRE2_UCP, 0, 0, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
576 	{ CMUP, A, 0, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" },
577 	{ MU | PCRE2_DUPNAMES, A, 0, 0 | F_NOMATCH, "\\k<A>{1,3}(?<A>aa)(?<A>bb)", "aabb" },
578 	{ MU | PCRE2_DUPNAMES | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\k<A>{1,3}(?<A>aa)(?<A>bb)", "aabb" },
579 	{ MU | PCRE2_DUPNAMES | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\k<A>*(?<A>aa)(?<A>bb)", "aabb" },
580 	{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?<A>aa)(?<A>bb)\\k<A>{0,3}aaaaaa", "aabbaaaaaa" },
581 	{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?<A>aa)(?<A>bb)\\k<A>{2,5}bb", "aabbaaaabb" },
582 	{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{0,3}m", "aaaaaaaabbbbaabbbbm" },
583 	{ MU | PCRE2_DUPNAMES, A, 0, 0 | F_NOMATCH, "\\k<A>{1,3}?(?<A>aa)(?<A>bb)", "aabb" },
584 	{ MU | PCRE2_DUPNAMES | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\k<A>{1,3}?(?<A>aa)(?<A>bb)", "aabb" },
585 	{ MU | PCRE2_DUPNAMES, A, 0, 0, "\\k<A>*?(?<A>aa)(?<A>bb)", "aabb" },
586 	{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{0,3}?m", "aaaaaabbbbbbaabbbbbbbbbbm" },
587 	{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>*?m", "aaaaaabbbbbbaabbbbbbbbbbm" },
588 	{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{2,3}?", "aaaabbbbaaaabbbbbbbbbb" },
589 	{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}M", "aaaaaaaabbbbaabbbbm" },
590 	{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{1,3}M", "aaaaaaaabbbbaabbbbm" },
591 	{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}?M", "aaaaaabbbbbbaabbbbbbbbbbm" },
592 	{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{2,3}?", "aaaabbbbaaaabbbbbbbbbb" },
593 
594 	/* Assertions. */
595 	{ MU, A, 0, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
596 	{ MU, A, 0, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
597 	{ MU, A, 0, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
598 	{ MU, A, 0, 0, "(?<=aaa|aa|a)a", "aaa" },
599 	{ MU, A, 0, 2, "(?<=aaa|aa|a)a", "aaa" },
600 	{ M, A, 0, 0, "(?<=aaa|aa|a)a", "aaa" },
601 	{ M, A, 0, 2, "(?<=aaa|aa|a)a", "aaa" },
602 	{ MU, A, 0, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
603 	{ MU, A, 0, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
604 	{ MU, A, 0, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
605 	{ MU, A, 0, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
606 	{ MU, A, 0, 0, "((?(?=(a))a)+k)", "bbak" },
607 	{ MU, A, 0, 0, "((?(?=a)a)+k)", "bbak" },
608 	{ MU, A, 0, 0 | F_NOMATCH, "(?=(?>(a))m)amk", "a k" },
609 	{ MU, A, 0, 0 | F_NOMATCH, "(?!(?>(a))m)amk", "a k" },
610 	{ MU, A, 0, 0 | F_NOMATCH, "(?>(?=(a))am)amk", "a k" },
611 	{ MU, A, 0, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
612 	{ MU, A, 0, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
613 	{ MU, A, 0, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
614 	{ MU, A, 0, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
615 	{ MU, A, 0, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
616 	{ MU, A, 0, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
617 	{ MU, A, 0, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
618 	{ MU, A, 0, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
619 	{ MU, A, 0, 0, "a(?=(?C)\\B(?C`x`))b", "ab" },
620 	{ MU, A, 0, 0, "a(?!(?C)\\B(?C`x`))bb|ab", "abb" },
621 	{ MU, A, 0, 0, "a(?=\\b|(?C)\\B(?C`x`))b", "ab" },
622 	{ MU, A, 0, 0, "a(?!\\b|(?C)\\B(?C`x`))bb|ab", "abb" },
623 	{ MU, A, 0, 0, "c(?(?=(?C)\\B(?C`x`))ab|a)", "cab" },
624 	{ MU, A, 0, 0, "c(?(?!(?C)\\B(?C`x`))ab|a)", "cab" },
625 	{ MU, A, 0, 0, "c(?(?=\\b|(?C)\\B(?C`x`))ab|a)", "cab" },
626 	{ MU, A, 0, 0, "c(?(?!\\b|(?C)\\B(?C`x`))ab|a)", "cab" },
627 	{ MU, A, 0, 0, "a(?=)b", "ab" },
628 	{ MU, A, 0, 0 | F_NOMATCH, "a(?!)b", "ab" },
629 
630 	/* Not empty, ACCEPT, FAIL */
631 	{ MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "a*", "bcx" },
632 	{ MU, A, PCRE2_NOTEMPTY, 0, "a*", "bcaad" },
633 	{ MU, A, PCRE2_NOTEMPTY, 0, "a*?", "bcaad" },
634 	{ MU, A, PCRE2_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
635 	{ MU, A, 0, 0, "a(*ACCEPT)b", "ab" },
636 	{ MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "a*(*ACCEPT)b", "bcx" },
637 	{ MU, A, PCRE2_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
638 	{ MU, A, PCRE2_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
639 	{ MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "(?:z|a*(*ACCEPT)b)", "bcx" },
640 	{ MU, A, PCRE2_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
641 	{ MU, A, PCRE2_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
642 	{ MU, A, PCRE2_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
643 	{ MU, A, PCRE2_NOTEMPTY_ATSTART, 0 | F_NOMATCH, "a*(*ACCEPT)b", "" },
644 	{ MU, A, 0, 0, "((a(*ACCEPT)b))", "ab" },
645 	{ MU, A, 0, 0, "(a(*FAIL)a|a)", "aaa" },
646 	{ MU, A, 0, 0, "(?=ab(*ACCEPT)b)a", "ab" },
647 	{ MU, A, 0, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
648 	{ MU, A, 0, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
649 	{ MU, A, PCRE2_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
650 	{ MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "(?=A)", "AB" },
651 
652 	/* Conditional blocks. */
653 	{ MU, A, 0, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
654 	{ MU, A, 0, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
655 	{ MU, A, 0, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
656 	{ MU, A, 0, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
657 	{ MU, A, 0, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
658 	{ MU, A, 0, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
659 	{ MU, A, 0, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
660 	{ MU, A, 0, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
661 	{ MU, A, 0, 0 | F_DIFF, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
662 	{ MU, A, 0, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
663 	{ MU, A, 0, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
664 	{ MU, A, 0, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
665 	{ MU, A, 0, 0, "(?(?=a)ab)", "a" },
666 	{ MU, A, 0, 0, "(?(?<!b)c)", "b" },
667 	{ MU, A, 0, 0, "(?(DEFINE)a(b))", "a" },
668 	{ MU, A, 0, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
669 	{ MU, A, 0, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
670 	{ MU, A, 0, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
671 	{ MU, A, 0, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
672 	{ MU, A, 0, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
673 	{ MU, A, 0, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
674 	{ MU, A, 0, 0, "(c)?\?(?(1)a|b)", "cbb" },
675 	{ MU, A, 0, 0 | F_DIFF, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
676 	{ MU, A, 0, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
677 	{ MU, A, 0, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
678 	{ MU, A, 0, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
679 	{ MU, A, 0, 0 | F_DIFF, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
680 	{ MU, A, 0, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
681 	{ MU, A, 0, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
682 	{ MU, A, 0, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
683 	{ MU, A, 0, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
684 	{ MU, A, 0, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)*l", "bc ddd abccabccl" },
685 	{ MU, A, 0, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+?dd", "bcabcacdb bdddd" },
686 	{ MU, A, 0, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+l", "ababccddabdbccd abcccl" },
687 	{ MU, A, 0, 0, "((?:a|aa)(?(1)aaa))x", "aax" },
688 	{ MU, A, 0, 0, "(?(?!)a|b)", "ab" },
689 	{ MU, A, 0, 0, "(?(?!)a)", "ab" },
690 	{ MU, A, 0, 0 | F_NOMATCH, "(?(?!)a|b)", "ac" },
691 
692 	/* Set start of match. */
693 	{ MU, A, 0, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
694 	{ MU, A, 0, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
695 	{ MU, A, 0, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
696 	{ MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "a\\K(*ACCEPT)b", "aa" },
697 	{ MU, A, PCRE2_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
698 
699 	/* First line. */
700 	{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_PROPERTY, "\\p{Any}a", "bb\naaa" },
701 	{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}a", "bb\r\naaa" },
702 	{ MU | PCRE2_FIRSTLINE, A, 0, 0, "(?<=a)", "a" },
703 	{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "[^a][^b]", "ab" },
704 	{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "a", "\na" },
705 	{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "[abc]", "\na" },
706 	{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "^a", "\na" },
707 	{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "^(?<=\n)", "\na" },
708 	{ MU | PCRE2_FIRSTLINE, A, 0, 0, "\xf0\x90\x90\x80", "\xf0\x90\x90\x80" },
709 	{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_ANY, 0, 0 | F_NOMATCH, "#", "\xc2\x85#" },
710 	{ M | PCRE2_FIRSTLINE, PCRE2_NEWLINE_ANY, 0, 0 | F_NOMATCH, "#", "\x85#" },
711 	{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_ANY, 0, 0 | F_NOMATCH, "^#", "\xe2\x80\xa8#" },
712 	{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0 | F_PROPERTY, "\\p{Any}", "\r\na" },
713 	{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0, ".", "\r" },
714 	{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0, "a", "\ra" },
715 	{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0 | F_NOMATCH, "ba", "bbb\r\nba" },
716 	{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}{4}|a", "\r\na" },
717 	{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 1, ".", "\r\n" },
718 	{ PCRE2_FIRSTLINE | PCRE2_DOTALL, PCRE2_NEWLINE_LF, 0, 0 | F_NOMATCH, "ab.", "ab" },
719 	{ MU | PCRE2_FIRSTLINE, A, 0, 1 | F_NOMATCH, "^[a-d0-9]", "\nxx\nd" },
720 	{ PCRE2_FIRSTLINE | PCRE2_DOTALL, PCRE2_NEWLINE_ANY, 0, 0, "....a", "012\n0a" },
721 	{ MU | PCRE2_FIRSTLINE, A, 0, 0, "[aC]", "a" },
722 
723 	/* Recurse. */
724 	{ MU, A, 0, 0, "(a)(?1)", "aa" },
725 	{ MU, A, 0, 0, "((a))(?1)", "aa" },
726 	{ MU, A, 0, 0, "(b|a)(?1)", "aa" },
727 	{ MU, A, 0, 0, "(b|(a))(?1)", "aa" },
728 	{ MU, A, 0, 0 | F_NOMATCH, "((a)(b)(?:a*))(?1)", "aba" },
729 	{ MU, A, 0, 0, "((a)(b)(?:a*))(?1)", "abab" },
730 	{ MU, A, 0, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
731 	{ MU, A, 0, 0, "((?2)b|(a)){2}(?1)", "aabab" },
732 	{ MU, A, 0, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
733 	{ MU, A, 0, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
734 	{ MU, A, 0, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
735 	{ MU, A, 0, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
736 	{ MU, A, 0, 0, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
737 	{ MU, A, 0, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
738 	{ MU, A, 0, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
739 	{ MU, A, 0, 0 | F_NOMATCH, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
740 	{ MU, A, 0, 0, "b|<(?R)*>", "<<b>" },
741 	{ MU, A, 0, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
742 	{ MU, A, 0, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
743 	{ MU, A, 0, 0, "(a)((?(R)a|b))(?2)", "aabbabaa" },
744 	{ MU, A, 0, 0, "(a)((?(R2)a|b))(?2)", "aabbabaa" },
745 	{ MU, A, 0, 0, "(a)((?(R1)a|b))(?2)", "ababba" },
746 	{ MU, A, 0, 0, "(?(R0)aa|bb(?R))", "abba aabb bbaa" },
747 	{ MU, A, 0, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" },
748 	{ MU, A, 0, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" },
749 	{ MU, A, 0, 0, "((?(R)a|(?1)){3})", "XaaaaaaaaaX" },
750 	{ MU, A, 0, 0, "((?:(?(R)a|(?1))){3})", "XaaaaaaaaaX" },
751 	{ MU, A, 0, 0, "((?(R)a|(?1)){1,3})aaaaaa", "aaaaaaaaXaaaaaaaaa" },
752 	{ MU, A, 0, 0, "((?(R)a|(?1)){1,3}?)M", "aaaM" },
753 	{ MU, A, 0, 0, "((.)(?:.|\\2(?1))){0}#(?1)#", "#aabbccdde# #aabbccddee#" },
754 	{ MU, A, 0, 0, "((.)(?:\\2|\\2{4}b)){0}#(?:(?1))+#", "#aaaab# #aaaaab#" },
755 	{ MU, A, 0, 0 | F_NOMATCH, "(?1)$((.|\\2xx){1,2})", "abc" },
756 
757 	/* 16 bit specific tests. */
758 	{ CM, A, 0, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" },
759 	{ CM, A, 0, 0 | F_FORCECONV, "\xe1\xbd\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
760 	{ CM, A, 0, 0 | F_FORCECONV, "[\xc3\xa1]", "\xc3\x81\xc3\xa1" },
761 	{ CM, A, 0, 0 | F_FORCECONV, "[\xe1\xbd\xb8]", "\xe1\xbf\xb8\xe1\xbd\xb8" },
762 	{ CM, A, 0, 0 | F_FORCECONV, "[a-\xed\xb0\x80]", "A" },
763 	{ CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[a-\\x{dc00}]", "B" },
764 	{ CM, A, 0, 0 | F_NO8 | F_NOMATCH | F_FORCECONV, "[b-\\x{dc00}]", "a" },
765 	{ CM, A, 0, 0 | F_NO8 | F_FORCECONV, "\xed\xa0\x80\\x{d800}\xed\xb0\x80\\x{dc00}", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80" },
766 	{ CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[\xed\xa0\x80\\x{d800}]{1,2}?[\xed\xb0\x80\\x{dc00}]{1,2}?#", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80#" },
767 	{ CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80\xed\xb0\x80#]{0,3}(?<=\xed\xb0\x80.)", "\xed\xa0\x80#\xed\xa0\x80##\xed\xb0\x80\xed\xa0\x80" },
768 	{ CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\x9f\xbf\xed\xa0\x83" },
769 	{ CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\xb4\x80\xed\xb3\xb0" },
770 	{ CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" },
771 	{ CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" },
772 	{ CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" },
773 	{ CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80][\xed\xb0\x80]{2,}", "\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" },
774 	{ M, A, 0, 0 | F_FORCECONV, "[^\xed\xb0\x80]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
775 	{ M, A, 0, 0 | F_NO8 | F_FORCECONV, "[^\\x{dc00}]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
776 	{ CM, A, 0, 0 | F_FORCECONV, ".\\B.", "\xed\xa0\x80\xed\xb0\x80" },
777 	{ CM, A, 0, 0 | F_FORCECONV, "\\D+(?:\\d+|.)\\S+(?:\\s+|.)\\W+(?:\\w+|.)\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80" },
778 	{ CM, A, 0, 0 | F_FORCECONV, "\\d*\\s*\\w*\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80" },
779 	{ CM, A, 0, 0 | F_FORCECONV | F_NOMATCH, "\\d*?\\D*?\\s*?\\S*?\\w*?\\W*?##", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80#" },
780 	{ CM | PCRE2_EXTENDED, A, 0, 0 | F_FORCECONV, "\xed\xa0\x80 \xed\xb0\x80 !", "\xed\xa0\x80\xed\xb0\x80!" },
781 	{ CM, A, 0, 0 | F_FORCECONV, "\xed\xa0\x80+#[^#]+\xed\xa0\x80", "\xed\xa0\x80#a\xed\xa0\x80" },
782 	{ CM, A, 0, 0 | F_FORCECONV, "(\xed\xa0\x80+)#\\1", "\xed\xa0\x80\xed\xa0\x80#\xed\xa0\x80\xed\xa0\x80" },
783 	{ M, PCRE2_NEWLINE_ANY, 0, 0 | F_NO8 | F_FORCECONV, "^-", "a--\xe2\x80\xa8--" },
784 	{ 0, BSR(PCRE2_BSR_UNICODE), 0, 0 | F_NO8 | F_FORCECONV, "\\R", "ab\xe2\x80\xa8" },
785 	{ 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\v", "ab\xe2\x80\xa9" },
786 	{ 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\h", "ab\xe1\xa0\x8e" },
787 	{ 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\v+?\\V+?#", "\xe2\x80\xa9\xe2\x80\xa9\xef\xbf\xbf\xef\xbf\xbf#" },
788 	{ 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\h+?\\H+?#", "\xe1\xa0\x8e\xe1\xa0\x8e\xef\xbf\xbf\xef\xbf\xbf#" },
789 
790 	/* Partial matching. */
791 	{ MU, A, PCRE2_PARTIAL_SOFT, 0, "ab", "a" },
792 	{ MU, A, PCRE2_PARTIAL_SOFT, 0, "ab|a", "a" },
793 	{ MU, A, PCRE2_PARTIAL_HARD, 0, "ab|a", "a" },
794 	{ MU, A, PCRE2_PARTIAL_SOFT, 0, "\\b#", "a" },
795 	{ MU, A, PCRE2_PARTIAL_SOFT, 0, "(?<=a)b", "a" },
796 	{ MU, A, PCRE2_PARTIAL_SOFT, 0, "abc|(?<=xxa)bc", "xxab" },
797 	{ MU, A, PCRE2_PARTIAL_SOFT, 0, "a\\B", "a" },
798 	{ MU, A, PCRE2_PARTIAL_HARD, 0, "a\\b", "a" },
799 
800 	/* (*MARK) verb. */
801 	{ MU, A, 0, 0, "a(*MARK:aa)a", "ababaa" },
802 	{ MU, A, 0, 0 | F_NOMATCH, "a(*:aa)a", "abab" },
803 	{ MU, A, 0, 0, "a(*:aa)(b(*:bb)b|bc)", "abc" },
804 	{ MU, A, 0, 0 | F_NOMATCH, "a(*:1)x|b(*:2)y", "abc" },
805 	{ MU, A, 0, 0, "(?>a(*:aa))b|ac", "ac" },
806 	{ MU, A, 0, 0, "(?(DEFINE)(a(*:aa)))(?1)", "a" },
807 	{ MU, A, 0, 0 | F_NOMATCH, "(?(DEFINE)((a)(*:aa)))(?1)b", "aa" },
808 	{ MU, A, 0, 0, "(?(DEFINE)(a(*:aa)))a(?1)b|aac", "aac" },
809 	{ MU, A, 0, 0, "(a(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
810 	{ MU, A, 0, 0, "(a(*:aa)){0}(?:b(?1)b)+", "babba" },
811 	{ MU, A, 0, 0 | F_NOMATCH, "(a(*:aa)){0}(?:b(?1)b)+", "ba" },
812 	{ MU, A, 0, 0, "(a\\K(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
813 	{ MU, A, 0, 0, "(a\\K(*:aa)){0}(?:b(?1)b)+", "babba" },
814 	{ MU, A, 0, 0 | F_NOMATCH, "(a\\K(*:aa)){0}(?:b(?1)b)+", "ba" },
815 	{ MU, A, 0, 0 | F_NOMATCH, "(*:mark)m", "a" },
816 
817 	/* (*COMMIT) verb. */
818 	{ MU, A, 0, 0 | F_NOMATCH, "a(*COMMIT)b", "ac" },
819 	{ MU, A, 0, 0, "aa(*COMMIT)b", "xaxaab" },
820 	{ MU, A, 0, 0 | F_NOMATCH, "a(*COMMIT)(*:msg)b|ac", "ac" },
821 	{ MU, A, 0, 0 | F_NOMATCH, "(a(*COMMIT)b)++", "abac" },
822 	{ MU, A, 0, 0 | F_NOMATCH, "((a)(*COMMIT)b)++", "abac" },
823 	{ MU, A, 0, 0 | F_NOMATCH, "(?=a(*COMMIT)b)ab|ad", "ad" },
824 
825 	/* (*PRUNE) verb. */
826 	{ MU, A, 0, 0, "aa\\K(*PRUNE)b", "aaab" },
827 	{ MU, A, 0, 0, "aa(*PRUNE:bb)b|a", "aa" },
828 	{ MU, A, 0, 0, "(a)(a)(*PRUNE)b|(a)", "aa" },
829 	{ MU, A, 0, 0, "(a)(a)(a)(a)(a)(a)(a)(a)(*PRUNE)b|(a)", "aaaaaaaa" },
830 	{ MU, A, PCRE2_PARTIAL_SOFT, 0, "a(*PRUNE)a|", "a" },
831 	{ MU, A, PCRE2_PARTIAL_SOFT, 0, "a(*PRUNE)a|m", "a" },
832 	{ MU, A, 0, 0 | F_NOMATCH, "(?=a(*PRUNE)b)ab|ad", "ad" },
833 	{ MU, A, 0, 0, "a(*COMMIT)(*PRUNE)d|bc", "abc" },
834 	{ MU, A, 0, 0, "(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
835 	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
836 	{ MU, A, 0, 0, "(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
837 	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
838 	{ MU, A, 0, 0, "(a(*COMMIT)b){0}a(?1)(*PRUNE)c|bc", "abc" },
839 	{ MU, A, 0, 0 | F_NOMATCH, "(a(*COMMIT)b){0}a(*COMMIT)(?1)(*PRUNE)c|bc", "abc" },
840 	{ MU, A, 0, 0, "(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
841 	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
842 	{ MU, A, 0, 0, "((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
843 	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
844 	{ MU, A, 0, 0, "(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" },
845 	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" },
846 	{ MU, A, 0, 0, "(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" },
847 	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" },
848 	{ MU, A, 0, 0, "(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" },
849 	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" },
850 	{ MU, A, 0, 0, "(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" },
851 	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" },
852 	{ MU, A, 0, 0, "(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" },
853 	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" },
854 	{ MU, A, 0, 0, "(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" },
855 	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" },
856 
857 	/* (*SKIP) verb. */
858 	{ MU, A, 0, 0 | F_NOMATCH, "(?=a(*SKIP)b)ab|ad", "ad" },
859 	{ MU, A, 0, 0, "(\\w+(*SKIP)#)", "abcd,xyz#," },
860 	{ MU, A, 0, 0, "\\w+(*SKIP)#|mm", "abcd,xyz#," },
861 	{ MU, A, 0, 0 | F_NOMATCH, "b+(?<=(*SKIP)#c)|b+", "#bbb" },
862 
863 	/* (*THEN) verb. */
864 	{ MU, A, 0, 0, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcaabcaabcaabcnacm" },
865 	{ MU, A, 0, 0 | F_NOMATCH, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcm" },
866 	{ MU, A, 0, 0, "((?:a(*THEN)|aab)c|a+)+m", "aabcaabcnmaabcaabcm" },
867 	{ MU, A, 0, 0, "((?:a|aab)(*THEN)c|a+)+m", "aam" },
868 	{ MU, A, 0, 0, "((?:a(*COMMIT)|aab)(*THEN)c|a+)+m", "aam" },
869 	{ MU, A, 0, 0, "(?(?=a(*THEN)b)ab|ad)", "ad" },
870 	{ MU, A, 0, 0, "(?(?!a(*THEN)b)ad|add)", "add" },
871 	{ MU, A, 0, 0 | F_NOMATCH, "(?(?=a)a(*THEN)b|ad)", "ad" },
872 	{ MU, A, 0, 0, "(?!(?(?=a)ab|b(*THEN)d))bn|bnn", "bnn" },
873 	{ MU, A, 0, 0, "(?=(*THEN: ))* ", " " },
874 	{ MU, A, 0, 0, "a(*THEN)(?R) |", "a" },
875 
876 	/* Recurse and control verbs. */
877 	{ MU, A, 0, 0, "(a(*ACCEPT)b){0}a(?1)b", "aacaabb" },
878 	{ MU, A, 0, 0, "((a)\\2(*ACCEPT)b){0}a(?1)b", "aaacaaabb" },
879 	{ MU, A, 0, 0, "((ab|a(*ACCEPT)x)+|ababababax){0}_(?1)_", "_ababababax_ _ababababa_" },
880 	{ MU, A, 0, 0, "((.)(?:A(*ACCEPT)|(?1)\\2)){0}_(?1)_", "_bcdaAdcb_bcdaAdcb_" },
881 	{ MU, A, 0, 0, "((*MARK:m)(?:a|a(*COMMIT)b|aa)){0}_(?1)_", "_ab_" },
882 	{ MU, A, 0, 0, "((*MARK:m)(?:a|a(*COMMIT)b|aa)){0}_(?1)_|(_aa_)", "_aa_" },
883 	{ MU, A, 0, 0, "(a(*COMMIT)(?:b|bb)|c(*ACCEPT)d|dd){0}_(?1)+_", "_ax_ _cd_ _abbb_ _abcd_ _abbcdd_" },
884 	{ MU, A, 0, 0, "((.)(?:.|(*COMMIT)\\2{3}(*ACCEPT).*|.*)){0}_(?1){0,4}_", "_aaaabbbbccccddd_ _aaaabbbbccccdddd_" },
885 
886 #ifdef SUPPORT_UNICODE
887 	/* Script runs and iterations. */
888 	{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)*#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
889 	{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)+#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
890 	{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)*?#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
891 	{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)+?#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
892 	{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)*+#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
893 	{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)++#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
894 	{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)?#", "!ab!abc!ab!ab#" },
895 	{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)??#", "!ab!abc!ab!ab#" },
896 #endif
897 
898 	/* Deep recursion. */
899 	{ MU, A, 0, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
900 	{ MU, A, 0, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
901 	{ MU, A, 0, 0, "((a?)+)+b", "aaaaaaaaaaaa b" },
902 
903 	/* Deep recursion: Stack limit reached. */
904 	{ M, A, 0, 0 | F_NOMATCH, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
905 	{ M, A, 0, 0 | F_NOMATCH, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
906 	{ M, A, 0, 0 | F_NOMATCH, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
907 	{ M, A, 0, 0 | F_NOMATCH, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
908 	{ M, A, 0, 0 | F_NOMATCH, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
909 
910 	{ 0, 0, 0, 0, NULL, NULL }
911 };
912 
913 #ifdef SUPPORT_PCRE2_8
callback8(void * arg)914 static pcre2_jit_stack_8* callback8(void *arg)
915 {
916 	return (pcre2_jit_stack_8 *)arg;
917 }
918 #endif
919 
920 #ifdef SUPPORT_PCRE2_16
callback16(void * arg)921 static pcre2_jit_stack_16* callback16(void *arg)
922 {
923 	return (pcre2_jit_stack_16 *)arg;
924 }
925 #endif
926 
927 #ifdef SUPPORT_PCRE2_32
callback32(void * arg)928 static pcre2_jit_stack_32* callback32(void *arg)
929 {
930 	return (pcre2_jit_stack_32 *)arg;
931 }
932 #endif
933 
934 #ifdef SUPPORT_PCRE2_8
935 static pcre2_jit_stack_8 *stack8;
936 
getstack8(void)937 static pcre2_jit_stack_8 *getstack8(void)
938 {
939 	if (!stack8)
940 		stack8 = pcre2_jit_stack_create_8(1, 1024 * 1024, NULL);
941 	return stack8;
942 }
943 
setstack8(pcre2_match_context_8 * mcontext)944 static void setstack8(pcre2_match_context_8 *mcontext)
945 {
946 	if (!mcontext) {
947 		if (stack8)
948 			pcre2_jit_stack_free_8(stack8);
949 		stack8 = NULL;
950 		return;
951 	}
952 
953 	pcre2_jit_stack_assign_8(mcontext, callback8, getstack8());
954 }
955 #endif /* SUPPORT_PCRE2_8 */
956 
957 #ifdef SUPPORT_PCRE2_16
958 static pcre2_jit_stack_16 *stack16;
959 
getstack16(void)960 static pcre2_jit_stack_16 *getstack16(void)
961 {
962 	if (!stack16)
963 		stack16 = pcre2_jit_stack_create_16(1, 1024 * 1024, NULL);
964 	return stack16;
965 }
966 
setstack16(pcre2_match_context_16 * mcontext)967 static void setstack16(pcre2_match_context_16 *mcontext)
968 {
969 	if (!mcontext) {
970 		if (stack16)
971 			pcre2_jit_stack_free_16(stack16);
972 		stack16 = NULL;
973 		return;
974 	}
975 
976 	pcre2_jit_stack_assign_16(mcontext, callback16, getstack16());
977 }
978 #endif /* SUPPORT_PCRE2_16 */
979 
980 #ifdef SUPPORT_PCRE2_32
981 static pcre2_jit_stack_32 *stack32;
982 
getstack32(void)983 static pcre2_jit_stack_32 *getstack32(void)
984 {
985 	if (!stack32)
986 		stack32 = pcre2_jit_stack_create_32(1, 1024 * 1024, NULL);
987 	return stack32;
988 }
989 
setstack32(pcre2_match_context_32 * mcontext)990 static void setstack32(pcre2_match_context_32 *mcontext)
991 {
992 	if (!mcontext) {
993 		if (stack32)
994 			pcre2_jit_stack_free_32(stack32);
995 		stack32 = NULL;
996 		return;
997 	}
998 
999 	pcre2_jit_stack_assign_32(mcontext, callback32, getstack32());
1000 }
1001 #endif /* SUPPORT_PCRE2_32 */
1002 
1003 #ifdef SUPPORT_PCRE2_16
1004 
convert_utf8_to_utf16(PCRE2_SPTR8 input,PCRE2_UCHAR16 * output,int * offsetmap,int max_length)1005 static int convert_utf8_to_utf16(PCRE2_SPTR8 input, PCRE2_UCHAR16 *output, int *offsetmap, int max_length)
1006 {
1007 	PCRE2_SPTR8 iptr = input;
1008 	PCRE2_UCHAR16 *optr = output;
1009 	unsigned int c;
1010 
1011 	if (max_length == 0)
1012 		return 0;
1013 
1014 	while (*iptr && max_length > 1) {
1015 		c = 0;
1016 		if (offsetmap)
1017 			*offsetmap++ = (int)(iptr - (unsigned char*)input);
1018 
1019 		if (*iptr < 0xc0)
1020 			c = *iptr++;
1021 		else if (!(*iptr & 0x20)) {
1022 			c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
1023 			iptr += 2;
1024 		} else if (!(*iptr & 0x10)) {
1025 			c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
1026 			iptr += 3;
1027 		} else if (!(*iptr & 0x08)) {
1028 			c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
1029 			iptr += 4;
1030 		}
1031 
1032 		if (c < 65536) {
1033 			*optr++ = c;
1034 			max_length--;
1035 		} else if (max_length <= 2) {
1036 			*optr = '\0';
1037 			return (int)(optr - output);
1038 		} else {
1039 			c -= 0x10000;
1040 			*optr++ = 0xd800 | ((c >> 10) & 0x3ff);
1041 			*optr++ = 0xdc00 | (c & 0x3ff);
1042 			max_length -= 2;
1043 			if (offsetmap)
1044 				offsetmap++;
1045 		}
1046 	}
1047 	if (offsetmap)
1048 		*offsetmap = (int)(iptr - (unsigned char*)input);
1049 	*optr = '\0';
1050 	return (int)(optr - output);
1051 }
1052 
copy_char8_to_char16(PCRE2_SPTR8 input,PCRE2_UCHAR16 * output,int max_length)1053 static int copy_char8_to_char16(PCRE2_SPTR8 input, PCRE2_UCHAR16 *output, int max_length)
1054 {
1055 	PCRE2_SPTR8 iptr = input;
1056 	PCRE2_UCHAR16 *optr = output;
1057 
1058 	if (max_length == 0)
1059 		return 0;
1060 
1061 	while (*iptr && max_length > 1) {
1062 		*optr++ = *iptr++;
1063 		max_length--;
1064 	}
1065 	*optr = '\0';
1066 	return (int)(optr - output);
1067 }
1068 
1069 #define REGTEST_MAX_LENGTH16 4096
1070 static PCRE2_UCHAR16 regtest_buf16[REGTEST_MAX_LENGTH16];
1071 static int regtest_offsetmap16[REGTEST_MAX_LENGTH16];
1072 
1073 #endif /* SUPPORT_PCRE2_16 */
1074 
1075 #ifdef SUPPORT_PCRE2_32
1076 
convert_utf8_to_utf32(PCRE2_SPTR8 input,PCRE2_UCHAR32 * output,int * offsetmap,int max_length)1077 static int convert_utf8_to_utf32(PCRE2_SPTR8 input, PCRE2_UCHAR32 *output, int *offsetmap, int max_length)
1078 {
1079 	PCRE2_SPTR8 iptr = input;
1080 	PCRE2_UCHAR32 *optr = output;
1081 	unsigned int c;
1082 
1083 	if (max_length == 0)
1084 		return 0;
1085 
1086 	while (*iptr && max_length > 1) {
1087 		c = 0;
1088 		if (offsetmap)
1089 			*offsetmap++ = (int)(iptr - (unsigned char*)input);
1090 
1091 		if (*iptr < 0xc0)
1092 			c = *iptr++;
1093 		else if (!(*iptr & 0x20)) {
1094 			c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
1095 			iptr += 2;
1096 		} else if (!(*iptr & 0x10)) {
1097 			c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
1098 			iptr += 3;
1099 		} else if (!(*iptr & 0x08)) {
1100 			c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
1101 			iptr += 4;
1102 		}
1103 
1104 		*optr++ = c;
1105 		max_length--;
1106 	}
1107 	if (offsetmap)
1108 		*offsetmap = (int)(iptr - (unsigned char*)input);
1109 	*optr = 0;
1110 	return (int)(optr - output);
1111 }
1112 
copy_char8_to_char32(PCRE2_SPTR8 input,PCRE2_UCHAR32 * output,int max_length)1113 static int copy_char8_to_char32(PCRE2_SPTR8 input, PCRE2_UCHAR32 *output, int max_length)
1114 {
1115 	PCRE2_SPTR8 iptr = input;
1116 	PCRE2_UCHAR32 *optr = output;
1117 
1118 	if (max_length == 0)
1119 		return 0;
1120 
1121 	while (*iptr && max_length > 1) {
1122 		*optr++ = *iptr++;
1123 		max_length--;
1124 	}
1125 	*optr = '\0';
1126 	return (int)(optr - output);
1127 }
1128 
1129 #define REGTEST_MAX_LENGTH32 4096
1130 static PCRE2_UCHAR32 regtest_buf32[REGTEST_MAX_LENGTH32];
1131 static int regtest_offsetmap32[REGTEST_MAX_LENGTH32];
1132 
1133 #endif /* SUPPORT_PCRE2_32 */
1134 
check_ascii(const char * input)1135 static int check_ascii(const char *input)
1136 {
1137 	const unsigned char *ptr = (unsigned char *)input;
1138 	while (*ptr) {
1139 		if (*ptr > 127)
1140 			return 0;
1141 		ptr++;
1142 	}
1143 	return 1;
1144 }
1145 
1146 #define OVECTOR_SIZE 15
1147 
regression_tests(void)1148 static int regression_tests(void)
1149 {
1150 	struct regression_test_case *current = regression_test_cases;
1151 	int error;
1152 	PCRE2_SIZE err_offs;
1153 	int is_successful;
1154 	int is_ascii;
1155 	int total = 0;
1156 	int successful = 0;
1157 	int successful_row = 0;
1158 	int counter = 0;
1159 	int jit_compile_mode;
1160 	int utf = 0;
1161 	int disabled_options = 0;
1162 	int i;
1163 #ifdef SUPPORT_PCRE2_8
1164 	pcre2_code_8 *re8;
1165 	pcre2_compile_context_8 *ccontext8;
1166 	pcre2_match_data_8 *mdata8_1;
1167 	pcre2_match_data_8 *mdata8_2;
1168 	pcre2_match_context_8 *mcontext8;
1169 	PCRE2_SIZE *ovector8_1 = NULL;
1170 	PCRE2_SIZE *ovector8_2 = NULL;
1171 	int return_value8[2];
1172 #endif
1173 #ifdef SUPPORT_PCRE2_16
1174 	pcre2_code_16 *re16;
1175 	pcre2_compile_context_16 *ccontext16;
1176 	pcre2_match_data_16 *mdata16_1;
1177 	pcre2_match_data_16 *mdata16_2;
1178 	pcre2_match_context_16 *mcontext16;
1179 	PCRE2_SIZE *ovector16_1 = NULL;
1180 	PCRE2_SIZE *ovector16_2 = NULL;
1181 	int return_value16[2];
1182 	int length16;
1183 #endif
1184 #ifdef SUPPORT_PCRE2_32
1185 	pcre2_code_32 *re32;
1186 	pcre2_compile_context_32 *ccontext32;
1187 	pcre2_match_data_32 *mdata32_1;
1188 	pcre2_match_data_32 *mdata32_2;
1189 	pcre2_match_context_32 *mcontext32;
1190 	PCRE2_SIZE *ovector32_1 = NULL;
1191 	PCRE2_SIZE *ovector32_2 = NULL;
1192 	int return_value32[2];
1193 	int length32;
1194 #endif
1195 
1196 #if defined SUPPORT_PCRE2_8
1197 	PCRE2_UCHAR8 cpu_info[128];
1198 #elif defined SUPPORT_PCRE2_16
1199 	PCRE2_UCHAR16 cpu_info[128];
1200 #elif defined SUPPORT_PCRE2_32
1201 	PCRE2_UCHAR32 cpu_info[128];
1202 #endif
1203 #if defined SUPPORT_UNICODE && ((defined(SUPPORT_PCRE2_8) + defined(SUPPORT_PCRE2_16) + defined(SUPPORT_PCRE2_32)) >= 2)
1204 	int return_value;
1205 #endif
1206 
1207 	/* This test compares the behaviour of interpreter and JIT. Although disabling
1208 	utf or ucp may make tests fail, if the pcre2_match result is the SAME, it is
1209 	still considered successful from pcre2_jit_test point of view. */
1210 
1211 #if defined SUPPORT_PCRE2_8
1212 	pcre2_config_8(PCRE2_CONFIG_JITTARGET, &cpu_info);
1213 #elif defined SUPPORT_PCRE2_16
1214 	pcre2_config_16(PCRE2_CONFIG_JITTARGET, &cpu_info);
1215 #elif defined SUPPORT_PCRE2_32
1216 	pcre2_config_32(PCRE2_CONFIG_JITTARGET, &cpu_info);
1217 #endif
1218 
1219 	printf("Running JIT regression tests\n");
1220 	printf("  target CPU of SLJIT compiler: ");
1221 	for (i = 0; cpu_info[i]; i++)
1222 		printf("%c", (char)(cpu_info[i]));
1223 	printf("\n");
1224 
1225 #if defined SUPPORT_PCRE2_8
1226 	pcre2_config_8(PCRE2_CONFIG_UNICODE, &utf);
1227 #elif defined SUPPORT_PCRE2_16
1228 	pcre2_config_16(PCRE2_CONFIG_UNICODE, &utf);
1229 #elif defined SUPPORT_PCRE2_32
1230 	pcre2_config_32(PCRE2_CONFIG_UNICODE, &utf);
1231 #endif
1232 
1233 	if (!utf)
1234 		disabled_options |= PCRE2_UTF;
1235 #ifdef SUPPORT_PCRE2_8
1236 	printf("  in  8 bit mode with UTF-8  %s:\n", utf ? "enabled" : "disabled");
1237 #endif
1238 #ifdef SUPPORT_PCRE2_16
1239 	printf("  in 16 bit mode with UTF-16 %s:\n", utf ? "enabled" : "disabled");
1240 #endif
1241 #ifdef SUPPORT_PCRE2_32
1242 	printf("  in 32 bit mode with UTF-32 %s:\n", utf ? "enabled" : "disabled");
1243 #endif
1244 
1245 	while (current->pattern) {
1246 		/* printf("\nPattern: %s :\n", current->pattern); */
1247 		total++;
1248 		is_ascii = 0;
1249 		if (!(current->start_offset & F_PROPERTY))
1250 			is_ascii = check_ascii(current->pattern) && check_ascii(current->input);
1251 
1252 		if (current->match_options & PCRE2_PARTIAL_SOFT)
1253 			jit_compile_mode = PCRE2_JIT_PARTIAL_SOFT;
1254 		else if (current->match_options & PCRE2_PARTIAL_HARD)
1255 			jit_compile_mode = PCRE2_JIT_PARTIAL_HARD;
1256 		else
1257 			jit_compile_mode = PCRE2_JIT_COMPLETE;
1258 		error = 0;
1259 #ifdef SUPPORT_PCRE2_8
1260 		re8 = NULL;
1261 		ccontext8 = pcre2_compile_context_create_8(NULL);
1262 		if (ccontext8) {
1263 			if (GET_NEWLINE(current->newline))
1264 				pcre2_set_newline_8(ccontext8, GET_NEWLINE(current->newline));
1265 			if (GET_BSR(current->newline))
1266 				pcre2_set_bsr_8(ccontext8, GET_BSR(current->newline));
1267 
1268 			if (!(current->start_offset & F_NO8)) {
1269 				re8 = pcre2_compile_8((PCRE2_SPTR8)current->pattern, PCRE2_ZERO_TERMINATED,
1270 					current->compile_options & ~disabled_options,
1271 					&error, &err_offs, ccontext8);
1272 
1273 				if (!re8 && (utf || is_ascii))
1274 					printf("\n8 bit: Cannot compile pattern \"%s\": %d\n", current->pattern, error);
1275 			}
1276 			pcre2_compile_context_free_8(ccontext8);
1277 		}
1278 		else
1279 			printf("\n8 bit: Cannot allocate compile context\n");
1280 #endif
1281 #ifdef SUPPORT_PCRE2_16
1282 		if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
1283 			convert_utf8_to_utf16((PCRE2_SPTR8)current->pattern, regtest_buf16, NULL, REGTEST_MAX_LENGTH16);
1284 		else
1285 			copy_char8_to_char16((PCRE2_SPTR8)current->pattern, regtest_buf16, REGTEST_MAX_LENGTH16);
1286 
1287 		re16 = NULL;
1288 		ccontext16 = pcre2_compile_context_create_16(NULL);
1289 		if (ccontext16) {
1290 			if (GET_NEWLINE(current->newline))
1291 				pcre2_set_newline_16(ccontext16, GET_NEWLINE(current->newline));
1292 			if (GET_BSR(current->newline))
1293 				pcre2_set_bsr_16(ccontext16, GET_BSR(current->newline));
1294 
1295 			if (!(current->start_offset & F_NO16)) {
1296 				re16 = pcre2_compile_16(regtest_buf16, PCRE2_ZERO_TERMINATED,
1297 					current->compile_options & ~disabled_options,
1298 					&error, &err_offs, ccontext16);
1299 
1300 				if (!re16 && (utf || is_ascii))
1301 					printf("\n16 bit: Cannot compile pattern \"%s\": %d\n", current->pattern, error);
1302 			}
1303 			pcre2_compile_context_free_16(ccontext16);
1304 		}
1305 		else
1306 			printf("\n16 bit: Cannot allocate compile context\n");
1307 #endif
1308 #ifdef SUPPORT_PCRE2_32
1309 		if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
1310 			convert_utf8_to_utf32((PCRE2_SPTR8)current->pattern, regtest_buf32, NULL, REGTEST_MAX_LENGTH32);
1311 		else
1312 			copy_char8_to_char32((PCRE2_SPTR8)current->pattern, regtest_buf32, REGTEST_MAX_LENGTH32);
1313 
1314 		re32 = NULL;
1315 		ccontext32 = pcre2_compile_context_create_32(NULL);
1316 		if (ccontext32) {
1317 			if (GET_NEWLINE(current->newline))
1318 				pcre2_set_newline_32(ccontext32, GET_NEWLINE(current->newline));
1319 			if (GET_BSR(current->newline))
1320 				pcre2_set_bsr_32(ccontext32, GET_BSR(current->newline));
1321 
1322 			if (!(current->start_offset & F_NO32)) {
1323 				re32 = pcre2_compile_32(regtest_buf32, PCRE2_ZERO_TERMINATED,
1324 					current->compile_options & ~disabled_options,
1325 					&error, &err_offs, ccontext32);
1326 
1327 				if (!re32 && (utf || is_ascii))
1328 					printf("\n32 bit: Cannot compile pattern \"%s\": %d\n", current->pattern, error);
1329 			}
1330 			pcre2_compile_context_free_32(ccontext32);
1331 		}
1332 		else
1333 			printf("\n32 bit: Cannot allocate compile context\n");
1334 #endif
1335 
1336 		counter++;
1337 		if ((counter & 0x3) != 0) {
1338 #ifdef SUPPORT_PCRE2_8
1339 			setstack8(NULL);
1340 #endif
1341 #ifdef SUPPORT_PCRE2_16
1342 			setstack16(NULL);
1343 #endif
1344 #ifdef SUPPORT_PCRE2_32
1345 			setstack32(NULL);
1346 #endif
1347 		}
1348 
1349 #ifdef SUPPORT_PCRE2_8
1350 		return_value8[0] = -1000;
1351 		return_value8[1] = -1000;
1352 		mdata8_1 = pcre2_match_data_create_8(OVECTOR_SIZE, NULL);
1353 		mdata8_2 = pcre2_match_data_create_8(OVECTOR_SIZE, NULL);
1354 		mcontext8 = pcre2_match_context_create_8(NULL);
1355 		if (!mdata8_1 || !mdata8_2 || !mcontext8) {
1356 			printf("\n8 bit: Cannot allocate match data\n");
1357 			pcre2_match_data_free_8(mdata8_1);
1358 			pcre2_match_data_free_8(mdata8_2);
1359 			pcre2_match_context_free_8(mcontext8);
1360 			pcre2_code_free_8(re8);
1361 			re8 = NULL;
1362 		} else {
1363 			ovector8_1 = pcre2_get_ovector_pointer_8(mdata8_1);
1364 			ovector8_2 = pcre2_get_ovector_pointer_8(mdata8_2);
1365 			for (i = 0; i < OVECTOR_SIZE * 2; ++i)
1366 				ovector8_1[i] = -2;
1367 			for (i = 0; i < OVECTOR_SIZE * 2; ++i)
1368 				ovector8_2[i] = -2;
1369 			pcre2_set_match_limit_8(mcontext8, 10000000);
1370 		}
1371 		if (re8) {
1372 			return_value8[1] = pcre2_match_8(re8, (PCRE2_SPTR8)current->input, strlen(current->input),
1373 				current->start_offset & OFFSET_MASK, current->match_options, mdata8_2, mcontext8);
1374 
1375 			if (pcre2_jit_compile_8(re8, jit_compile_mode)) {
1376 				printf("\n8 bit: JIT compiler does not support \"%s\"\n", current->pattern);
1377 			} else if ((counter & 0x1) != 0) {
1378 				setstack8(mcontext8);
1379 				return_value8[0] = pcre2_match_8(re8, (PCRE2_SPTR8)current->input, strlen(current->input),
1380 					current->start_offset & OFFSET_MASK, current->match_options, mdata8_1, mcontext8);
1381 			} else {
1382 				pcre2_jit_stack_assign_8(mcontext8, NULL, getstack8());
1383 				return_value8[0] = pcre2_jit_match_8(re8, (PCRE2_SPTR8)current->input, strlen(current->input),
1384 					current->start_offset & OFFSET_MASK, current->match_options, mdata8_1, mcontext8);
1385 			}
1386 		}
1387 #endif
1388 
1389 #ifdef SUPPORT_PCRE2_16
1390 		return_value16[0] = -1000;
1391 		return_value16[1] = -1000;
1392 		mdata16_1 = pcre2_match_data_create_16(OVECTOR_SIZE, NULL);
1393 		mdata16_2 = pcre2_match_data_create_16(OVECTOR_SIZE, NULL);
1394 		mcontext16 = pcre2_match_context_create_16(NULL);
1395 		if (!mdata16_1 || !mdata16_2 || !mcontext16) {
1396 			printf("\n16 bit: Cannot allocate match data\n");
1397 			pcre2_match_data_free_16(mdata16_1);
1398 			pcre2_match_data_free_16(mdata16_2);
1399 			pcre2_match_context_free_16(mcontext16);
1400 			pcre2_code_free_16(re16);
1401 			re16 = NULL;
1402 		} else {
1403 			ovector16_1 = pcre2_get_ovector_pointer_16(mdata16_1);
1404 			ovector16_2 = pcre2_get_ovector_pointer_16(mdata16_2);
1405 			for (i = 0; i < OVECTOR_SIZE * 2; ++i)
1406 				ovector16_1[i] = -2;
1407 			for (i = 0; i < OVECTOR_SIZE * 2; ++i)
1408 				ovector16_2[i] = -2;
1409 			pcre2_set_match_limit_16(mcontext16, 10000000);
1410 		}
1411 		if (re16) {
1412 			if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
1413 				length16 = convert_utf8_to_utf16((PCRE2_SPTR8)current->input, regtest_buf16, regtest_offsetmap16, REGTEST_MAX_LENGTH16);
1414 			else
1415 				length16 = copy_char8_to_char16((PCRE2_SPTR8)current->input, regtest_buf16, REGTEST_MAX_LENGTH16);
1416 
1417 			return_value16[1] = pcre2_match_16(re16, regtest_buf16, length16,
1418 				current->start_offset & OFFSET_MASK, current->match_options, mdata16_2, mcontext16);
1419 
1420 			if (pcre2_jit_compile_16(re16, jit_compile_mode)) {
1421 				printf("\n16 bit: JIT compiler does not support \"%s\"\n", current->pattern);
1422 			} else if ((counter & 0x1) != 0) {
1423 				setstack16(mcontext16);
1424 				return_value16[0] = pcre2_match_16(re16, regtest_buf16, length16,
1425 					current->start_offset & OFFSET_MASK, current->match_options, mdata16_1, mcontext16);
1426 			} else {
1427 				pcre2_jit_stack_assign_16(mcontext16, NULL, getstack16());
1428 				return_value16[0] = pcre2_jit_match_16(re16, regtest_buf16, length16,
1429 					current->start_offset & OFFSET_MASK, current->match_options, mdata16_1, mcontext16);
1430 			}
1431 		}
1432 #endif
1433 
1434 #ifdef SUPPORT_PCRE2_32
1435 		return_value32[0] = -1000;
1436 		return_value32[1] = -1000;
1437 		mdata32_1 = pcre2_match_data_create_32(OVECTOR_SIZE, NULL);
1438 		mdata32_2 = pcre2_match_data_create_32(OVECTOR_SIZE, NULL);
1439 		mcontext32 = pcre2_match_context_create_32(NULL);
1440 		if (!mdata32_1 || !mdata32_2 || !mcontext32) {
1441 			printf("\n32 bit: Cannot allocate match data\n");
1442 			pcre2_match_data_free_32(mdata32_1);
1443 			pcre2_match_data_free_32(mdata32_2);
1444 			pcre2_match_context_free_32(mcontext32);
1445 			pcre2_code_free_32(re32);
1446 			re32 = NULL;
1447 		} else {
1448 			ovector32_1 = pcre2_get_ovector_pointer_32(mdata32_1);
1449 			ovector32_2 = pcre2_get_ovector_pointer_32(mdata32_2);
1450 			for (i = 0; i < OVECTOR_SIZE * 2; ++i)
1451 				ovector32_1[i] = -2;
1452 			for (i = 0; i < OVECTOR_SIZE * 2; ++i)
1453 				ovector32_2[i] = -2;
1454 			pcre2_set_match_limit_32(mcontext32, 10000000);
1455 		}
1456 		if (re32) {
1457 			if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
1458 				length32 = convert_utf8_to_utf32((PCRE2_SPTR8)current->input, regtest_buf32, regtest_offsetmap32, REGTEST_MAX_LENGTH32);
1459 			else
1460 				length32 = copy_char8_to_char32((PCRE2_SPTR8)current->input, regtest_buf32, REGTEST_MAX_LENGTH32);
1461 
1462 			return_value32[1] = pcre2_match_32(re32, regtest_buf32, length32,
1463 				current->start_offset & OFFSET_MASK, current->match_options, mdata32_2, mcontext32);
1464 
1465 			if (pcre2_jit_compile_32(re32, jit_compile_mode)) {
1466 				printf("\n32 bit: JIT compiler does not support \"%s\"\n", current->pattern);
1467 			} else if ((counter & 0x1) != 0) {
1468 				setstack32(mcontext32);
1469 				return_value32[0] = pcre2_match_32(re32, regtest_buf32, length32,
1470 					current->start_offset & OFFSET_MASK, current->match_options, mdata32_1, mcontext32);
1471 			} else {
1472 				pcre2_jit_stack_assign_32(mcontext32, NULL, getstack32());
1473 				return_value32[0] = pcre2_jit_match_32(re32, regtest_buf32, length32,
1474 					current->start_offset & OFFSET_MASK, current->match_options, mdata32_1, mcontext32);
1475 			}
1476 		}
1477 #endif
1478 
1479 		/* printf("[%d-%d-%d|%d-%d|%d-%d|%d-%d]%s",
1480 			return_value8[0], return_value16[0], return_value32[0],
1481 			(int)ovector8_1[0], (int)ovector8_1[1],
1482 			(int)ovector16_1[0], (int)ovector16_1[1],
1483 			(int)ovector32_1[0], (int)ovector32_1[1],
1484 			(current->compile_options & PCRE2_CASELESS) ? "C" : ""); */
1485 
1486 		/* If F_DIFF is set, just run the test, but do not compare the results.
1487 		Segfaults can still be captured. */
1488 
1489 		is_successful = 1;
1490 		if (!(current->start_offset & F_DIFF)) {
1491 #if defined SUPPORT_UNICODE && ((defined(SUPPORT_PCRE2_8) + defined(SUPPORT_PCRE2_16) + defined(SUPPORT_PCRE2_32)) >= 2)
1492 			if (!(current->start_offset & F_FORCECONV)) {
1493 
1494 				/* All results must be the same. */
1495 #ifdef SUPPORT_PCRE2_8
1496 				if ((return_value = return_value8[0]) != return_value8[1]) {
1497 					printf("\n8 bit: Return value differs(J8:%d,I8:%d): [%d] '%s' @ '%s'\n",
1498 						return_value8[0], return_value8[1], total, current->pattern, current->input);
1499 					is_successful = 0;
1500 				} else
1501 #endif
1502 #ifdef SUPPORT_PCRE2_16
1503 				if ((return_value = return_value16[0]) != return_value16[1]) {
1504 					printf("\n16 bit: Return value differs(J16:%d,I16:%d): [%d] '%s' @ '%s'\n",
1505 						return_value16[0], return_value16[1], total, current->pattern, current->input);
1506 					is_successful = 0;
1507 				} else
1508 #endif
1509 #ifdef SUPPORT_PCRE2_32
1510 				if ((return_value = return_value32[0]) != return_value32[1]) {
1511 					printf("\n32 bit: Return value differs(J32:%d,I32:%d): [%d] '%s' @ '%s'\n",
1512 						return_value32[0], return_value32[1], total, current->pattern, current->input);
1513 					is_successful = 0;
1514 				} else
1515 #endif
1516 #if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_16
1517 				if (return_value8[0] != return_value16[0]) {
1518 					printf("\n8 and 16 bit: Return value differs(J8:%d,J16:%d): [%d] '%s' @ '%s'\n",
1519 						return_value8[0], return_value16[0],
1520 						total, current->pattern, current->input);
1521 					is_successful = 0;
1522 				} else
1523 #endif
1524 #if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_32
1525 				if (return_value8[0] != return_value32[0]) {
1526 					printf("\n8 and 32 bit: Return value differs(J8:%d,J32:%d): [%d] '%s' @ '%s'\n",
1527 						return_value8[0], return_value32[0],
1528 						total, current->pattern, current->input);
1529 					is_successful = 0;
1530 				} else
1531 #endif
1532 #if defined SUPPORT_PCRE2_16 && defined SUPPORT_PCRE2_32
1533 				if (return_value16[0] != return_value32[0]) {
1534 					printf("\n16 and 32 bit: Return value differs(J16:%d,J32:%d): [%d] '%s' @ '%s'\n",
1535 						return_value16[0], return_value32[0],
1536 						total, current->pattern, current->input);
1537 					is_successful = 0;
1538 				} else
1539 #endif
1540 				if (return_value >= 0 || return_value == PCRE2_ERROR_PARTIAL) {
1541 					if (return_value == PCRE2_ERROR_PARTIAL) {
1542 						return_value = 2;
1543 					} else {
1544 						return_value *= 2;
1545 					}
1546 #ifdef SUPPORT_PCRE2_8
1547 					return_value8[0] = return_value;
1548 #endif
1549 #ifdef SUPPORT_PCRE2_16
1550 					return_value16[0] = return_value;
1551 #endif
1552 #ifdef SUPPORT_PCRE2_32
1553 					return_value32[0] = return_value;
1554 #endif
1555 					/* Transform back the results. */
1556 					if (current->compile_options & PCRE2_UTF) {
1557 #ifdef SUPPORT_PCRE2_16
1558 						for (i = 0; i < return_value; ++i) {
1559 							if (ovector16_1[i] != PCRE2_UNSET)
1560 								ovector16_1[i] = regtest_offsetmap16[ovector16_1[i]];
1561 							if (ovector16_2[i] != PCRE2_UNSET)
1562 								ovector16_2[i] = regtest_offsetmap16[ovector16_2[i]];
1563 						}
1564 #endif
1565 #ifdef SUPPORT_PCRE2_32
1566 						for (i = 0; i < return_value; ++i) {
1567 							if (ovector32_1[i] != PCRE2_UNSET)
1568 								ovector32_1[i] = regtest_offsetmap32[ovector32_1[i]];
1569 							if (ovector32_2[i] != PCRE2_UNSET)
1570 								ovector32_2[i] = regtest_offsetmap32[ovector32_2[i]];
1571 						}
1572 #endif
1573 					}
1574 
1575 					for (i = 0; i < return_value; ++i) {
1576 #if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_16
1577 						if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
1578 							printf("\n8 and 16 bit: Ovector[%d] value differs(J8:%d,I8:%d,J16:%d,I16:%d): [%d] '%s' @ '%s' \n",
1579 								i, (int)ovector8_1[i], (int)ovector8_2[i], (int)ovector16_1[i], (int)ovector16_2[i],
1580 								total, current->pattern, current->input);
1581 							is_successful = 0;
1582 						}
1583 #endif
1584 #if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_32
1585 						if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector32_1[i] || ovector8_1[i] != ovector32_2[i]) {
1586 							printf("\n8 and 32 bit: Ovector[%d] value differs(J8:%d,I8:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
1587 								i, (int)ovector8_1[i], (int)ovector8_2[i], (int)ovector32_1[i], (int)ovector32_2[i],
1588 								total, current->pattern, current->input);
1589 							is_successful = 0;
1590 						}
1591 #endif
1592 #if defined SUPPORT_PCRE2_16 && defined SUPPORT_PCRE2_32
1593 						if (ovector16_1[i] != ovector16_2[i] || ovector16_1[i] != ovector32_1[i] || ovector16_1[i] != ovector32_2[i]) {
1594 							printf("\n16 and 32 bit: Ovector[%d] value differs(J16:%d,I16:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
1595 								i, (int)ovector16_1[i], (int)ovector16_2[i], (int)ovector32_1[i], (int)ovector32_2[i],
1596 								total, current->pattern, current->input);
1597 							is_successful = 0;
1598 						}
1599 #endif
1600 					}
1601 				}
1602 			} else
1603 #endif /* more than one of SUPPORT_PCRE2_8, SUPPORT_PCRE2_16 and SUPPORT_PCRE2_32 */
1604 			{
1605 #ifdef SUPPORT_PCRE2_8
1606 				if (return_value8[0] != return_value8[1]) {
1607 					printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1608 						return_value8[0], return_value8[1], total, current->pattern, current->input);
1609 					is_successful = 0;
1610 				} else if (return_value8[0] >= 0 || return_value8[0] == PCRE2_ERROR_PARTIAL) {
1611 					if (return_value8[0] == PCRE2_ERROR_PARTIAL)
1612 						return_value8[0] = 2;
1613 					else
1614 						return_value8[0] *= 2;
1615 
1616 					for (i = 0; i < return_value8[0]; ++i)
1617 						if (ovector8_1[i] != ovector8_2[i]) {
1618 							printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1619 								i, (int)ovector8_1[i], (int)ovector8_2[i], total, current->pattern, current->input);
1620 							is_successful = 0;
1621 						}
1622 				}
1623 #endif
1624 
1625 #ifdef SUPPORT_PCRE2_16
1626 				if (return_value16[0] != return_value16[1]) {
1627 					printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1628 						return_value16[0], return_value16[1], total, current->pattern, current->input);
1629 					is_successful = 0;
1630 				} else if (return_value16[0] >= 0 || return_value16[0] == PCRE2_ERROR_PARTIAL) {
1631 					if (return_value16[0] == PCRE2_ERROR_PARTIAL)
1632 						return_value16[0] = 2;
1633 					else
1634 						return_value16[0] *= 2;
1635 
1636 					for (i = 0; i < return_value16[0]; ++i)
1637 						if (ovector16_1[i] != ovector16_2[i]) {
1638 							printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1639 								i, (int)ovector16_1[i], (int)ovector16_2[i], total, current->pattern, current->input);
1640 							is_successful = 0;
1641 						}
1642 				}
1643 #endif
1644 
1645 #ifdef SUPPORT_PCRE2_32
1646 				if (return_value32[0] != return_value32[1]) {
1647 					printf("\n32 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1648 						return_value32[0], return_value32[1], total, current->pattern, current->input);
1649 					is_successful = 0;
1650 				} else if (return_value32[0] >= 0 || return_value32[0] == PCRE2_ERROR_PARTIAL) {
1651 					if (return_value32[0] == PCRE2_ERROR_PARTIAL)
1652 						return_value32[0] = 2;
1653 					else
1654 						return_value32[0] *= 2;
1655 
1656 					for (i = 0; i < return_value32[0]; ++i)
1657 						if (ovector32_1[i] != ovector32_2[i]) {
1658 							printf("\n32 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1659 								i, (int)ovector32_1[i], (int)ovector32_2[i], total, current->pattern, current->input);
1660 							is_successful = 0;
1661 						}
1662 				}
1663 #endif
1664 			}
1665 		}
1666 
1667 		if (is_successful) {
1668 #ifdef SUPPORT_PCRE2_8
1669 			if (!(current->start_offset & F_NO8) && (utf || is_ascii)) {
1670 				if (return_value8[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1671 					printf("8 bit: Test should match: [%d] '%s' @ '%s'\n",
1672 						total, current->pattern, current->input);
1673 					is_successful = 0;
1674 				}
1675 
1676 				if (return_value8[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1677 					printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n",
1678 						total, current->pattern, current->input);
1679 					is_successful = 0;
1680 				}
1681 			}
1682 #endif
1683 #ifdef SUPPORT_PCRE2_16
1684 			if (!(current->start_offset & F_NO16) && (utf || is_ascii)) {
1685 				if (return_value16[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1686 					printf("16 bit: Test should match: [%d] '%s' @ '%s'\n",
1687 						total, current->pattern, current->input);
1688 					is_successful = 0;
1689 				}
1690 
1691 				if (return_value16[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1692 					printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n",
1693 						total, current->pattern, current->input);
1694 					is_successful = 0;
1695 				}
1696 			}
1697 #endif
1698 #ifdef SUPPORT_PCRE2_32
1699 			if (!(current->start_offset & F_NO32) && (utf || is_ascii)) {
1700 				if (return_value32[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1701 					printf("32 bit: Test should match: [%d] '%s' @ '%s'\n",
1702 						total, current->pattern, current->input);
1703 					is_successful = 0;
1704 				}
1705 
1706 				if (return_value32[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1707 					printf("32 bit: Test should not match: [%d] '%s' @ '%s'\n",
1708 						total, current->pattern, current->input);
1709 					is_successful = 0;
1710 				}
1711 			}
1712 #endif
1713 		}
1714 
1715 		if (is_successful) {
1716 #ifdef SUPPORT_PCRE2_8
1717 			if (re8 && !(current->start_offset & F_NO8) && pcre2_get_mark_8(mdata8_1) != pcre2_get_mark_8(mdata8_2)) {
1718 				printf("8 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1719 					total, current->pattern, current->input);
1720 				is_successful = 0;
1721 			}
1722 #endif
1723 #ifdef SUPPORT_PCRE2_16
1724 			if (re16 && !(current->start_offset & F_NO16) && pcre2_get_mark_16(mdata16_1) != pcre2_get_mark_16(mdata16_2)) {
1725 				printf("16 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1726 					total, current->pattern, current->input);
1727 				is_successful = 0;
1728 			}
1729 #endif
1730 #ifdef SUPPORT_PCRE2_32
1731 			if (re32 && !(current->start_offset & F_NO32) && pcre2_get_mark_32(mdata32_1) != pcre2_get_mark_32(mdata32_2)) {
1732 				printf("32 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1733 					total, current->pattern, current->input);
1734 				is_successful = 0;
1735 			}
1736 #endif
1737 		}
1738 
1739 #ifdef SUPPORT_PCRE2_8
1740 		pcre2_code_free_8(re8);
1741 		pcre2_match_data_free_8(mdata8_1);
1742 		pcre2_match_data_free_8(mdata8_2);
1743 		pcre2_match_context_free_8(mcontext8);
1744 #endif
1745 #ifdef SUPPORT_PCRE2_16
1746 		pcre2_code_free_16(re16);
1747 		pcre2_match_data_free_16(mdata16_1);
1748 		pcre2_match_data_free_16(mdata16_2);
1749 		pcre2_match_context_free_16(mcontext16);
1750 #endif
1751 #ifdef SUPPORT_PCRE2_32
1752 		pcre2_code_free_32(re32);
1753 		pcre2_match_data_free_32(mdata32_1);
1754 		pcre2_match_data_free_32(mdata32_2);
1755 		pcre2_match_context_free_32(mcontext32);
1756 #endif
1757 
1758 		if (is_successful) {
1759 			successful++;
1760 			successful_row++;
1761 			printf(".");
1762 			if (successful_row >= 60) {
1763 				successful_row = 0;
1764 				printf("\n");
1765 			}
1766 		} else
1767 			successful_row = 0;
1768 
1769 		fflush(stdout);
1770 		current++;
1771 	}
1772 #ifdef SUPPORT_PCRE2_8
1773 	setstack8(NULL);
1774 #endif
1775 #ifdef SUPPORT_PCRE2_16
1776 	setstack16(NULL);
1777 #endif
1778 #ifdef SUPPORT_PCRE2_32
1779 	setstack32(NULL);
1780 #endif
1781 
1782 	if (total == successful) {
1783 		printf("\nAll JIT regression tests are successfully passed.\n");
1784 		return 0;
1785 	} else {
1786 		printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
1787 		return 1;
1788 	}
1789 }
1790 
1791 #if defined SUPPORT_UNICODE
1792 
check_invalid_utf_result(int pattern_index,const char * type,int result,int match_start,int match_end,PCRE2_SIZE * ovector)1793 static int check_invalid_utf_result(int pattern_index, const char *type, int result,
1794 	int match_start, int match_end, PCRE2_SIZE *ovector)
1795 {
1796 	if (match_start < 0) {
1797 		if (result != -1) {
1798 			printf("Pattern[%d] %s result is not -1.\n", pattern_index, type);
1799 			return 1;
1800 		}
1801 		return 0;
1802 	}
1803 
1804 	if (result <= 0) {
1805 		printf("Pattern[%d] %s result (%d) is not greater than 0.\n", pattern_index, type, result);
1806 		return 1;
1807 	}
1808 
1809 	if (ovector[0] != (PCRE2_SIZE)match_start) {
1810 		printf("Pattern[%d] %s ovector[0] is unexpected (%d instead of %d)\n",
1811 			pattern_index, type, (int)ovector[0], match_start);
1812 		return 1;
1813 	}
1814 
1815 	if (ovector[1] != (PCRE2_SIZE)match_end) {
1816 		printf("Pattern[%d] %s ovector[1] is unexpected (%d instead of %d)\n",
1817 			pattern_index, type, (int)ovector[1], match_end);
1818 		return 1;
1819 	}
1820 
1821 	return 0;
1822 }
1823 
1824 #endif /* SUPPORT_UNICODE */
1825 
1826 #if defined SUPPORT_UNICODE && defined SUPPORT_PCRE2_8
1827 
1828 #define UDA (PCRE2_UTF | PCRE2_DOTALL | PCRE2_ANCHORED)
1829 #define CI (PCRE2_JIT_COMPLETE | PCRE2_JIT_INVALID_UTF)
1830 #define CPI (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_INVALID_UTF)
1831 
1832 struct invalid_utf8_regression_test_case {
1833 	int compile_options;
1834 	int jit_compile_options;
1835 	int start_offset;
1836 	int skip_left;
1837 	int skip_right;
1838 	int match_start;
1839 	int match_end;
1840 	const char *pattern[2];
1841 	const char *input;
1842 };
1843 
1844 static const char invalid_utf8_newline_cr;
1845 
1846 static const struct invalid_utf8_regression_test_case invalid_utf8_regression_test_cases[] = {
1847 	{ UDA, CI, 0, 0, 0, 0, 4, { ".", NULL }, "\xf4\x8f\xbf\xbf" },
1848 	{ UDA, CI, 0, 0, 0, 0, 4, { ".", NULL }, "\xf0\x90\x80\x80" },
1849 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf4\x90\x80\x80" },
1850 	{ UDA, CI, 0, 0, 1, -1, -1, { ".", NULL }, "\xf4\x8f\xbf\xbf" },
1851 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf0\x90\x80\x7f" },
1852 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf0\x90\x80\xc0" },
1853 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf0\x8f\xbf\xbf" },
1854 	{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xef\xbf\xbf#" },
1855 	{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xef\xbf\xbf" },
1856 	{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xe0\xa0\x80#" },
1857 	{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xe0\xa0\x80" },
1858 	{ UDA, CI, 0, 0, 2, -1, -1, { ".", NULL }, "\xef\xbf\xbf#" },
1859 	{ UDA, CI, 0, 0, 1, -1, -1, { ".", NULL }, "\xef\xbf\xbf" },
1860 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xef\xbf\x7f#" },
1861 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xef\xbf\xc0" },
1862 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x9f\xbf#" },
1863 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x9f\xbf" },
1864 	{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xed\x9f\xbf#" },
1865 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xed\xa0\x80#" },
1866 	{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xee\x80\x80#" },
1867 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xed\xbf\xbf#" },
1868 	{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xdf\xbf##" },
1869 	{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xdf\xbf#" },
1870 	{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xdf\xbf" },
1871 	{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xc2\x80##" },
1872 	{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xc2\x80#" },
1873 	{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xc2\x80" },
1874 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x80##" },
1875 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xdf\xc0##" },
1876 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x80" },
1877 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xdf\xc0" },
1878 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xc1\xbf##" },
1879 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xc1\xbf" },
1880 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\x80###" },
1881 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\x80" },
1882 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf8###" },
1883 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf8" },
1884 	{ UDA, CI, 0, 0, 0, 0, 1, { ".", NULL }, "\x7f" },
1885 
1886 	{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "\xf4\x8f\xbf\xbf#" },
1887 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\xa0\x80\x80\xf4\xa0\x80\x80" },
1888 	{ UDA, CPI, 4, 1, 1, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbf" },
1889 	{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "#\xef\xbf\xbf#" },
1890 	{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "#\xe0\xa0\x80#" },
1891 	{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "\xf0\x90\x80\x80#" },
1892 	{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "\xf3\xbf\xbf\xbf#" },
1893 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf0\x8f\xbf\xbf\xf0\x8f\xbf\xbf" },
1894 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf5\x80\x80\x80\xf5\x80\x80\x80" },
1895 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x90\x80\x80\xf4\x90\x80\x80" },
1896 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xbf\xff\xf4\x8f\xbf\xff" },
1897 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xff\xbf\xf4\x8f\xff\xbf" },
1898 	{ UDA, CPI, 4, 0, 1, -1, -1, { "\\B", "\\b" }, "\xef\x80\x80\x80\xef\x80\x80" },
1899 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\x80\x80\x80\x80\x80\x80\x80\x80" },
1900 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xe0\x9f\xbf\xe0\x9f\xbf#" },
1901 	{ UDA, CPI, 4, 2, 2, -1, -1, { "\\B", "\\b" }, "#\xe0\xa0\x80\xe0\xa0\x80#" },
1902 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xf0\x80\x80\xf0\x80\x80#" },
1903 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xed\xa0\x80\xed\xa0\x80#" },
1904 	{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "##\xdf\xbf#" },
1905 	{ UDA, CPI, 4, 2, 0, 2, 2, { "\\B", NULL }, "##\xdf\xbf#" },
1906 	{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "##\xc2\x80#" },
1907 	{ UDA, CPI, 4, 2, 0, 2, 2, { "\\B", NULL }, "##\xc2\x80#" },
1908 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xc1\xbf\xc1\xbf##" },
1909 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xdf\xc0\xdf\xc0##" },
1910 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xe0\x80\xe0\x80##" },
1911 
1912 	{ UDA, CPI, 3, 0, 0, 3, 3, { "\\B", NULL }, "\xef\xbf\xbf#" },
1913 	{ UDA, CPI, 3, 0, 0, 3, 3, { "\\B", NULL }, "\xe0\xa0\x80#" },
1914 	{ UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xe0\x9f\xbf\xe0\x9f\xbf" },
1915 	{ UDA, CPI, 3, 1, 1, -1, -1, { "\\B", "\\b" }, "\xef\xbf\xbf\xef\xbf\xbf" },
1916 	{ UDA, CPI, 3, 0, 1, -1, -1, { "\\B", "\\b" }, "\xdf\x80\x80\xdf\x80" },
1917 	{ UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xef\xbf\xff\xef\xbf\xff" },
1918 	{ UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xef\xff\xbf\xef\xff\xbf" },
1919 	{ UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xed\xbf\xbf\xed\xbf\xbf" },
1920 
1921 	{ UDA, CPI, 2, 0, 0, 2, 2, { "\\B", NULL }, "\xdf\xbf#" },
1922 	{ UDA, CPI, 2, 0, 0, 2, 2, { "\\B", NULL }, "\xc2\x80#" },
1923 	{ UDA, CPI, 2, 1, 1, -1, -1, { "\\B", "\\b" }, "\xdf\xbf\xdf\xbf" },
1924 	{ UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xc1\xbf\xc1\xbf" },
1925 	{ UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xe0\x80\xe0\x80" },
1926 	{ UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xdf\xff\xdf\xff" },
1927 	{ UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xff\xbf\xff\xbf" },
1928 
1929 	{ UDA, CPI, 1, 0, 0, 1, 1, { "\\B", NULL }, "\x7f#" },
1930 	{ UDA, CPI, 1, 0, 0, 1, 1, { "\\B", NULL }, "\x01#" },
1931 	{ UDA, CPI, 1, 0, 0, -1, -1, { "\\B", "\\b" }, "\x80\x80" },
1932 	{ UDA, CPI, 1, 0, 0, -1, -1, { "\\B", "\\b" }, "\xb0\xb0" },
1933 
1934 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 2, { "(.)\\1", NULL }, "aA" },
1935 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, -1, -1, { "(.)\\1", NULL }, "a\xff" },
1936 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 4, { "(.)\\1", NULL }, "\xc3\xa1\xc3\x81" },
1937 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { "(.)\\1", NULL }, "\xc3\xa1\xc3\x81" },
1938 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, -1, -1, { "(.)\\1", NULL }, "\xc2\x80\x80" },
1939 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 6, { "(.)\\1", NULL }, "\xe1\xbd\xb8\xe1\xbf\xb8" },
1940 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { "(.)\\1", NULL }, "\xe1\xbd\xb8\xe1\xbf\xb8" },
1941 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 8, { "(.)\\1", NULL }, "\xf0\x90\x90\x80\xf0\x90\x90\xa8" },
1942 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { "(.)\\1", NULL }, "\xf0\x90\x90\x80\xf0\x90\x90\xa8" },
1943 
1944 	{ UDA, CPI, 0, 0, 0, 0, 1, { "\\X", NULL }, "A" },
1945 	{ UDA, CPI, 0, 0, 0, -1, -1, { "\\X", NULL }, "\xff" },
1946 	{ UDA, CPI, 0, 0, 0, 0, 2, { "\\X", NULL }, "\xc3\xa1" },
1947 	{ UDA, CPI, 0, 0, 1, -1, -1, { "\\X", NULL }, "\xc3\xa1" },
1948 	{ UDA, CPI, 0, 0, 0, -1, -1, { "\\X", NULL }, "\xc3\x7f" },
1949 	{ UDA, CPI, 0, 0, 0, 0, 3, { "\\X", NULL }, "\xe1\xbd\xb8" },
1950 	{ UDA, CPI, 0, 0, 1, -1, -1, { "\\X", NULL }, "\xe1\xbd\xb8" },
1951 	{ UDA, CPI, 0, 0, 0, 0, 4, { "\\X", NULL }, "\xf0\x90\x90\x80" },
1952 	{ UDA, CPI, 0, 0, 1, -1, -1, { "\\X", NULL }, "\xf0\x90\x90\x80" },
1953 
1954 	{ UDA, CPI, 0, 0, 0, -1, -1, { "[^#]", NULL }, "#" },
1955 	{ UDA, CPI, 0, 0, 0, 0, 4, { "[^#]", NULL }, "\xf4\x8f\xbf\xbf" },
1956 	{ UDA, CPI, 0, 0, 0, -1, -1, { "[^#]", NULL }, "\xf4\x90\x80\x80" },
1957 	{ UDA, CPI, 0, 0, 0, -1, -1, { "[^#]", NULL }, "\xc1\x80" },
1958 
1959 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 2, 3, { "^\\W", NULL }, " \x0a#"},
1960 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 14, 15, { "^\\W", NULL }, " \xc0\x8a#\xe0\x80\x8a#\xf0\x80\x80\x8a#\x0a#"},
1961 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xf8\x0a#"},
1962 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xc3\x0a#"},
1963 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xf1\x0a#"},
1964 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 4, 5, { "^\\W", NULL }, " \xf2\xbf\x0a#"},
1965 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 5, 6, { "^\\W", NULL }, " \xf2\xbf\xbf\x0a#"},
1966 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xef\x0a#"},
1967 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 4, 5, { "^\\W", NULL }, " \xef\xbf\x0a#"},
1968 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 5, 6, { "^\\W", NULL }, " \x85#\xc2\x85#"},
1969 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 7, 8, { "^\\W", NULL }, " \xe2\x80\xf8\xe2\x80\xa8#"},
1970 
1971 	{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, -1, -1, { "#", NULL }, "\xe2\x80\xf8\xe2\x80\xa8#"},
1972 	{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, 3, 4, { "#", NULL }, "\xe2\x80\xf8#\xe2\x80\xa8#"},
1973 	{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, -1, -1, { "#", NULL }, "abcd\xc2\x85#"},
1974 	{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, 1, 2, { "#", NULL }, "\x85#\xc2\x85#"},
1975 	{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, 5, 6, { "#", NULL }, "\xef,\x80,\xf8#\x0a"},
1976 	{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, -1, -1, { "#", NULL }, "\xef,\x80,\xf8\x0a#"},
1977 
1978 	{ PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 4, 8, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7#\xc7\x85#" },
1979 	{ PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 7, 11, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7\x80\x80\x80#\xc7\x85#" },
1980 	{ PCRE2_UTF, CI, 0, 0, 0, 4, 8, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7#\xc7\x85#" },
1981 	{ PCRE2_UTF, CI, 0, 0, 0, 7, 11, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7\x80\x80\x80#\xc7\x85#" },
1982 
1983 	{ PCRE2_UTF | PCRE2_UCP, CI, 0, 0, 0, -1, -1, { "[\\s]", NULL }, "\xed\xa0\x80" },
1984 
1985 	/* These two are not invalid UTF tests, but this infrastructure fits better for them. */
1986 	{ 0, PCRE2_JIT_COMPLETE, 0, 0, 1, -1, -1, { "\\X{2}", NULL }, "\r\n\n" },
1987 	{ 0, PCRE2_JIT_COMPLETE, 0, 0, 1, -1, -1, { "\\R{2}", NULL }, "\r\n\n" },
1988 
1989 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 0, 0, 0, -1, -1, { "^.a", &invalid_utf8_newline_cr }, "\xc3\xa7#a" },
1990 
1991 	{ 0, 0, 0, 0, 0, 0, 0, { NULL, NULL }, NULL }
1992 };
1993 
1994 #undef UDA
1995 #undef CI
1996 #undef CPI
1997 
run_invalid_utf8_test(const struct invalid_utf8_regression_test_case * current,int pattern_index,int i,pcre2_compile_context_8 * ccontext,pcre2_match_data_8 * mdata)1998 static int run_invalid_utf8_test(const struct invalid_utf8_regression_test_case *current,
1999 	int pattern_index, int i, pcre2_compile_context_8 *ccontext, pcre2_match_data_8 *mdata)
2000 {
2001 	pcre2_code_8 *code;
2002 	int result, errorcode;
2003 	PCRE2_SIZE length, erroroffset;
2004 	PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_8(mdata);
2005 
2006 	if (current->pattern[i] == NULL)
2007 		return 1;
2008 
2009 	code = pcre2_compile_8((PCRE2_UCHAR8*)current->pattern[i], PCRE2_ZERO_TERMINATED,
2010 		current->compile_options, &errorcode, &erroroffset, ccontext);
2011 
2012 	if (!code) {
2013 		printf("Pattern[%d:0] cannot be compiled. Error offset: %d\n", pattern_index, (int)erroroffset);
2014 		return 0;
2015 	}
2016 
2017 	if (pcre2_jit_compile_8(code, current->jit_compile_options) != 0) {
2018 		printf("Pattern[%d:0] cannot be compiled by the JIT compiler.\n", pattern_index);
2019 		pcre2_code_free_8(code);
2020 		return 0;
2021 	}
2022 
2023 	length = (PCRE2_SIZE)(strlen(current->input) - current->skip_left - current->skip_right);
2024 
2025 	if (current->jit_compile_options & PCRE2_JIT_COMPLETE) {
2026 		result = pcre2_jit_match_8(code, (PCRE2_UCHAR8*)(current->input + current->skip_left),
2027 			length, current->start_offset - current->skip_left, 0, mdata, NULL);
2028 
2029 		if (check_invalid_utf_result(pattern_index, "match", result, current->match_start, current->match_end, ovector)) {
2030 			pcre2_code_free_8(code);
2031 			return 0;
2032 		}
2033 	}
2034 
2035 	if (current->jit_compile_options & PCRE2_JIT_PARTIAL_SOFT) {
2036 		result = pcre2_jit_match_8(code, (PCRE2_UCHAR8*)(current->input + current->skip_left),
2037 			length, current->start_offset - current->skip_left, PCRE2_PARTIAL_SOFT, mdata, NULL);
2038 
2039 		if (check_invalid_utf_result(pattern_index, "partial match", result, current->match_start, current->match_end, ovector)) {
2040 			pcre2_code_free_8(code);
2041 			return 0;
2042 		}
2043 	}
2044 
2045 	pcre2_code_free_8(code);
2046 	return 1;
2047 }
2048 
invalid_utf8_regression_tests(void)2049 static int invalid_utf8_regression_tests(void)
2050 {
2051 	const struct invalid_utf8_regression_test_case *current;
2052 	pcre2_compile_context_8 *ccontext;
2053 	pcre2_match_data_8 *mdata;
2054 	int total = 0, successful = 0;
2055 	int result;
2056 
2057 	printf("\nRunning invalid-utf8 JIT regression tests\n");
2058 
2059 	ccontext = pcre2_compile_context_create_8(NULL);
2060 	pcre2_set_newline_8(ccontext, PCRE2_NEWLINE_ANY);
2061 	mdata = pcre2_match_data_create_8(4, NULL);
2062 
2063 	for (current = invalid_utf8_regression_test_cases; current->pattern[0]; current++) {
2064 		/* printf("\nPattern: %s :\n", current->pattern); */
2065 		total++;
2066 
2067 		result = 1;
2068 		if (current->pattern[1] != &invalid_utf8_newline_cr)
2069 		{
2070 			if (!run_invalid_utf8_test(current, total - 1, 0, ccontext, mdata))
2071 				result = 0;
2072 			if (!run_invalid_utf8_test(current, total - 1, 1, ccontext, mdata))
2073 				result = 0;
2074 		} else {
2075 			pcre2_set_newline_8(ccontext, PCRE2_NEWLINE_CR);
2076 			if (!run_invalid_utf8_test(current, total - 1, 0, ccontext, mdata))
2077 				result = 0;
2078 			pcre2_set_newline_8(ccontext, PCRE2_NEWLINE_ANY);
2079 		}
2080 
2081 		if (result) {
2082 			successful++;
2083 		}
2084 
2085 		printf(".");
2086 		if ((total % 60) == 0)
2087 			printf("\n");
2088 	}
2089 
2090 	if ((total % 60) != 0)
2091 		printf("\n");
2092 
2093 	pcre2_match_data_free_8(mdata);
2094 	pcre2_compile_context_free_8(ccontext);
2095 
2096 	if (total == successful) {
2097 		printf("\nAll invalid UTF8 JIT regression tests are successfully passed.\n");
2098 		return 0;
2099 	} else {
2100 		printf("\nInvalid UTF8 successful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
2101 		return 1;
2102 	}
2103 }
2104 
2105 #else /* !SUPPORT_UNICODE || !SUPPORT_PCRE2_8 */
2106 
invalid_utf8_regression_tests(void)2107 static int invalid_utf8_regression_tests(void)
2108 {
2109 	return 0;
2110 }
2111 
2112 #endif /* SUPPORT_UNICODE && SUPPORT_PCRE2_8 */
2113 
2114 #if defined SUPPORT_UNICODE && defined SUPPORT_PCRE2_16
2115 
2116 #define UDA (PCRE2_UTF | PCRE2_DOTALL | PCRE2_ANCHORED)
2117 #define CI (PCRE2_JIT_COMPLETE | PCRE2_JIT_INVALID_UTF)
2118 #define CPI (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_INVALID_UTF)
2119 
2120 struct invalid_utf16_regression_test_case {
2121 	int compile_options;
2122 	int jit_compile_options;
2123 	int start_offset;
2124 	int skip_left;
2125 	int skip_right;
2126 	int match_start;
2127 	int match_end;
2128 	const PCRE2_UCHAR16 *pattern[2];
2129 	const PCRE2_UCHAR16 *input;
2130 };
2131 
2132 static PCRE2_UCHAR16 allany16[] = { '.', 0 };
2133 static PCRE2_UCHAR16 non_word_boundary16[] = { '\\', 'B', 0 };
2134 static PCRE2_UCHAR16 word_boundary16[] = { '\\', 'b', 0 };
2135 static PCRE2_UCHAR16 backreference16[] = { '(', '.', ')', '\\', '1', 0 };
2136 static PCRE2_UCHAR16 grapheme16[] = { '\\', 'X', 0 };
2137 static PCRE2_UCHAR16 nothashmark16[] = { '[', '^', '#', ']', 0 };
2138 static PCRE2_UCHAR16 afternl16[] = { '^', '\\', 'W', 0 };
2139 static PCRE2_UCHAR16 generic16[] = { '#', 0xd800, 0xdc00, '#', 0 };
2140 static PCRE2_UCHAR16 test16_1[] = { 0xd7ff, 0xe000, 0xffff, 0x01, '#', 0 };
2141 static PCRE2_UCHAR16 test16_2[] = { 0xd800, 0xdc00, 0xd800, 0xdc00, 0 };
2142 static PCRE2_UCHAR16 test16_3[] = { 0xdbff, 0xdfff, 0xdbff, 0xdfff, 0 };
2143 static PCRE2_UCHAR16 test16_4[] = { 0xd800, 0xdbff, 0xd800, 0xdbff, 0 };
2144 static PCRE2_UCHAR16 test16_5[] = { '#', 0xd800, 0xdc00, '#', 0 };
2145 static PCRE2_UCHAR16 test16_6[] = { 'a', 'A', 0xdc28, 0 };
2146 static PCRE2_UCHAR16 test16_7[] = { 0xd801, 0xdc00, 0xd801, 0xdc28, 0 };
2147 static PCRE2_UCHAR16 test16_8[] = { '#', 0xd800, 0xdc00, 0 };
2148 static PCRE2_UCHAR16 test16_9[] = { ' ', 0x2028, '#', 0 };
2149 static PCRE2_UCHAR16 test16_10[] = { ' ', 0xdc00, 0xd800, 0x2028, '#', 0 };
2150 static PCRE2_UCHAR16 test16_11[] = { 0xdc00, 0xdc00, 0xd800, 0xdc00, 0xdc00, '#', 0xd800, 0xdc00, '#', 0 };
2151 static PCRE2_UCHAR16 test16_12[] = { '#', 0xd800, 0xdc00, 0xd800, '#', 0xd800, 0xdc00, 0xdc00, 0xdc00, '#', 0xd800, 0xdc00, '#', 0 };
2152 
2153 static const struct invalid_utf16_regression_test_case invalid_utf16_regression_test_cases[] = {
2154 	{ UDA, CI, 0, 0, 0, 0, 1, { allany16, NULL }, test16_1 },
2155 	{ UDA, CI, 1, 0, 0, 1, 2, { allany16, NULL }, test16_1 },
2156 	{ UDA, CI, 2, 0, 0, 2, 3, { allany16, NULL }, test16_1 },
2157 	{ UDA, CI, 3, 0, 0, 3, 4, { allany16, NULL }, test16_1 },
2158 	{ UDA, CI, 0, 0, 0, 0, 2, { allany16, NULL }, test16_2 },
2159 	{ UDA, CI, 0, 0, 3, -1, -1, { allany16, NULL }, test16_2 },
2160 	{ UDA, CI, 1, 0, 0, -1, -1, { allany16, NULL }, test16_2 },
2161 	{ UDA, CI, 0, 0, 0, 0, 2, { allany16, NULL }, test16_3 },
2162 	{ UDA, CI, 0, 0, 3, -1, -1, { allany16, NULL }, test16_3 },
2163 	{ UDA, CI, 1, 0, 0, -1, -1, { allany16, NULL }, test16_3 },
2164 
2165 	{ UDA, CPI, 1, 0, 0, 1, 1, { non_word_boundary16, NULL }, test16_1 },
2166 	{ UDA, CPI, 2, 0, 0, 2, 2, { non_word_boundary16, NULL }, test16_1 },
2167 	{ UDA, CPI, 3, 0, 0, 3, 3, { non_word_boundary16, NULL }, test16_1 },
2168 	{ UDA, CPI, 4, 0, 0, 4, 4, { non_word_boundary16, NULL }, test16_1 },
2169 	{ UDA, CPI, 2, 0, 0, 2, 2, { non_word_boundary16, NULL }, test16_2 },
2170 	{ UDA, CPI, 2, 0, 0, 2, 2, { non_word_boundary16, NULL }, test16_3 },
2171 	{ UDA, CPI, 2, 1, 1, -1, -1, { non_word_boundary16, word_boundary16 }, test16_2 },
2172 	{ UDA, CPI, 2, 1, 1, -1, -1, { non_word_boundary16, word_boundary16 }, test16_3 },
2173 	{ UDA, CPI, 2, 0, 0, -1, -1, { non_word_boundary16, word_boundary16 }, test16_4 },
2174 	{ UDA, CPI, 2, 0, 0, -1, -1, { non_word_boundary16, word_boundary16 }, test16_5 },
2175 
2176 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 2, { backreference16, NULL }, test16_6 },
2177 	{ UDA | PCRE2_CASELESS, CPI, 1, 0, 0, -1, -1, { backreference16, NULL }, test16_6 },
2178 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 4, { backreference16, NULL }, test16_7 },
2179 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { backreference16, NULL }, test16_7 },
2180 
2181 	{ UDA, CPI, 0, 0, 0, 0, 1, { grapheme16, NULL }, test16_6 },
2182 	{ UDA, CPI, 1, 0, 0, 1, 2, { grapheme16, NULL }, test16_6 },
2183 	{ UDA, CPI, 2, 0, 0, -1, -1, { grapheme16, NULL }, test16_6 },
2184 	{ UDA, CPI, 0, 0, 0, 0, 2, { grapheme16, NULL }, test16_7 },
2185 	{ UDA, CPI, 2, 0, 0, 2, 4, { grapheme16, NULL }, test16_7 },
2186 	{ UDA, CPI, 1, 0, 0, -1, -1, { grapheme16, NULL }, test16_7 },
2187 
2188 	{ UDA, CPI, 0, 0, 0, -1, -1, { nothashmark16, NULL }, test16_8 },
2189 	{ UDA, CPI, 1, 0, 0, 1, 3, { nothashmark16, NULL }, test16_8 },
2190 	{ UDA, CPI, 2, 0, 0, -1, -1, { nothashmark16, NULL }, test16_8 },
2191 
2192 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 2, 3, { afternl16, NULL }, test16_9 },
2193 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 4, 5, { afternl16, NULL }, test16_10 },
2194 
2195 	{ PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 5, 9, { generic16, NULL }, test16_11 },
2196 	{ PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 9, 13, { generic16, NULL }, test16_12 },
2197 	{ PCRE2_UTF, CI, 0, 0, 0, 5, 9, { generic16, NULL }, test16_11 },
2198 	{ PCRE2_UTF, CI, 0, 0, 0, 9, 13, { generic16, NULL }, test16_12 },
2199 
2200 	{ 0, 0, 0, 0, 0, 0, 0, { NULL, NULL }, NULL }
2201 };
2202 
2203 #undef UDA
2204 #undef CI
2205 #undef CPI
2206 
run_invalid_utf16_test(const struct invalid_utf16_regression_test_case * current,int pattern_index,int i,pcre2_compile_context_16 * ccontext,pcre2_match_data_16 * mdata)2207 static int run_invalid_utf16_test(const struct invalid_utf16_regression_test_case *current,
2208 	int pattern_index, int i, pcre2_compile_context_16 *ccontext, pcre2_match_data_16 *mdata)
2209 {
2210 	pcre2_code_16 *code;
2211 	int result, errorcode;
2212 	PCRE2_SIZE length, erroroffset;
2213 	const PCRE2_UCHAR16 *input;
2214 	PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_16(mdata);
2215 
2216 	if (current->pattern[i] == NULL)
2217 		return 1;
2218 
2219 	code = pcre2_compile_16(current->pattern[i], PCRE2_ZERO_TERMINATED,
2220 		current->compile_options, &errorcode, &erroroffset, ccontext);
2221 
2222 	if (!code) {
2223 		printf("Pattern[%d:0] cannot be compiled. Error offset: %d\n", pattern_index, (int)erroroffset);
2224 		return 0;
2225 	}
2226 
2227 	if (pcre2_jit_compile_16(code, current->jit_compile_options) != 0) {
2228 		printf("Pattern[%d:0] cannot be compiled by the JIT compiler.\n", pattern_index);
2229 		pcre2_code_free_16(code);
2230 		return 0;
2231 	}
2232 
2233 	input = current->input;
2234 	length = 0;
2235 
2236 	while (*input++ != 0)
2237 		length++;
2238 
2239 	length -= current->skip_left + current->skip_right;
2240 
2241 	if (current->jit_compile_options & PCRE2_JIT_COMPLETE) {
2242 		result = pcre2_jit_match_16(code, (current->input + current->skip_left),
2243 			length, current->start_offset - current->skip_left, 0, mdata, NULL);
2244 
2245 		if (check_invalid_utf_result(pattern_index, "match", result, current->match_start, current->match_end, ovector)) {
2246 			pcre2_code_free_16(code);
2247 			return 0;
2248 		}
2249 	}
2250 
2251 	if (current->jit_compile_options & PCRE2_JIT_PARTIAL_SOFT) {
2252 		result = pcre2_jit_match_16(code, (current->input + current->skip_left),
2253 			length, current->start_offset - current->skip_left, PCRE2_PARTIAL_SOFT, mdata, NULL);
2254 
2255 		if (check_invalid_utf_result(pattern_index, "partial match", result, current->match_start, current->match_end, ovector)) {
2256 			pcre2_code_free_16(code);
2257 			return 0;
2258 		}
2259 	}
2260 
2261 	pcre2_code_free_16(code);
2262 	return 1;
2263 }
2264 
invalid_utf16_regression_tests(void)2265 static int invalid_utf16_regression_tests(void)
2266 {
2267 	const struct invalid_utf16_regression_test_case *current;
2268 	pcre2_compile_context_16 *ccontext;
2269 	pcre2_match_data_16 *mdata;
2270 	int total = 0, successful = 0;
2271 	int result;
2272 
2273 	printf("\nRunning invalid-utf16 JIT regression tests\n");
2274 
2275 	ccontext = pcre2_compile_context_create_16(NULL);
2276 	pcre2_set_newline_16(ccontext, PCRE2_NEWLINE_ANY);
2277 	mdata = pcre2_match_data_create_16(4, NULL);
2278 
2279 	for (current = invalid_utf16_regression_test_cases; current->pattern[0]; current++) {
2280 		/* printf("\nPattern: %s :\n", current->pattern); */
2281 		total++;
2282 
2283 		result = 1;
2284 		if (!run_invalid_utf16_test(current, total - 1, 0, ccontext, mdata))
2285 			result = 0;
2286 		if (!run_invalid_utf16_test(current, total - 1, 1, ccontext, mdata))
2287 			result = 0;
2288 
2289 		if (result) {
2290 			successful++;
2291 		}
2292 
2293 		printf(".");
2294 		if ((total % 60) == 0)
2295 			printf("\n");
2296 	}
2297 
2298 	if ((total % 60) != 0)
2299 		printf("\n");
2300 
2301 	pcre2_match_data_free_16(mdata);
2302 	pcre2_compile_context_free_16(ccontext);
2303 
2304 	if (total == successful) {
2305 		printf("\nAll invalid UTF16 JIT regression tests are successfully passed.\n");
2306 		return 0;
2307 	} else {
2308 		printf("\nInvalid UTF16 successful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
2309 		return 1;
2310 	}
2311 }
2312 
2313 #else /* !SUPPORT_UNICODE || !SUPPORT_PCRE2_16 */
2314 
invalid_utf16_regression_tests(void)2315 static int invalid_utf16_regression_tests(void)
2316 {
2317 	return 0;
2318 }
2319 
2320 #endif /* SUPPORT_UNICODE && SUPPORT_PCRE2_16 */
2321 
2322 #if defined SUPPORT_UNICODE && defined SUPPORT_PCRE2_32
2323 
2324 #define UDA (PCRE2_UTF | PCRE2_DOTALL | PCRE2_ANCHORED)
2325 #define CI (PCRE2_JIT_COMPLETE | PCRE2_JIT_INVALID_UTF)
2326 #define CPI (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_INVALID_UTF)
2327 
2328 struct invalid_utf32_regression_test_case {
2329 	int compile_options;
2330 	int jit_compile_options;
2331 	int start_offset;
2332 	int skip_left;
2333 	int skip_right;
2334 	int match_start;
2335 	int match_end;
2336 	const PCRE2_UCHAR32 *pattern[2];
2337 	const PCRE2_UCHAR32 *input;
2338 };
2339 
2340 static PCRE2_UCHAR32 allany32[] = { '.', 0 };
2341 static PCRE2_UCHAR32 non_word_boundary32[] = { '\\', 'B', 0 };
2342 static PCRE2_UCHAR32 word_boundary32[] = { '\\', 'b', 0 };
2343 static PCRE2_UCHAR32 backreference32[] = { '(', '.', ')', '\\', '1', 0 };
2344 static PCRE2_UCHAR32 grapheme32[] = { '\\', 'X', 0 };
2345 static PCRE2_UCHAR32 nothashmark32[] = { '[', '^', '#', ']', 0 };
2346 static PCRE2_UCHAR32 afternl32[] = { '^', '\\', 'W', 0 };
2347 static PCRE2_UCHAR32 test32_1[] = { 0x10ffff, 0x10ffff, 0x110000, 0x110000, 0x10ffff, 0 };
2348 static PCRE2_UCHAR32 test32_2[] = { 0xd7ff, 0xe000, 0xd800, 0xdfff, 0xe000, 0xdfff, 0xd800, 0 };
2349 static PCRE2_UCHAR32 test32_3[] = { 'a', 'A', 0x110000, 0 };
2350 static PCRE2_UCHAR32 test32_4[] = { '#', 0x10ffff, 0x110000, 0 };
2351 static PCRE2_UCHAR32 test32_5[] = { ' ', 0x2028, '#', 0 };
2352 static PCRE2_UCHAR32 test32_6[] = { ' ', 0x110000, 0x2028, '#', 0 };
2353 
2354 static const struct invalid_utf32_regression_test_case invalid_utf32_regression_test_cases[] = {
2355 	{ UDA, CI, 0, 0, 0, 0, 1, { allany32, NULL }, test32_1 },
2356 	{ UDA, CI, 2, 0, 0, -1, -1, { allany32, NULL }, test32_1 },
2357 	{ UDA, CI, 0, 0, 0, 0, 1, { allany32, NULL }, test32_2 },
2358 	{ UDA, CI, 1, 0, 0, 1, 2, { allany32, NULL }, test32_2 },
2359 	{ UDA, CI, 2, 0, 0, -1, -1, { allany32, NULL }, test32_2 },
2360 	{ UDA, CI, 3, 0, 0, -1, -1, { allany32, NULL }, test32_2 },
2361 
2362 	{ UDA, CPI, 1, 0, 0, 1, 1, { non_word_boundary32, NULL }, test32_1 },
2363 	{ UDA, CPI, 3, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_1 },
2364 	{ UDA, CPI, 1, 0, 0, 1, 1, { non_word_boundary32, NULL }, test32_2 },
2365 	{ UDA, CPI, 3, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_2 },
2366 	{ UDA, CPI, 6, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_2 },
2367 
2368 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 2, { backreference32, NULL }, test32_3 },
2369 	{ UDA | PCRE2_CASELESS, CPI, 1, 0, 0, -1, -1, { backreference32, NULL }, test32_3 },
2370 
2371 	{ UDA, CPI, 0, 0, 0, 0, 1, { grapheme32, NULL }, test32_1 },
2372 	{ UDA, CPI, 2, 0, 0, -1, -1, { grapheme32, NULL }, test32_1 },
2373 	{ UDA, CPI, 1, 0, 0, 1, 2, { grapheme32, NULL }, test32_2 },
2374 	{ UDA, CPI, 2, 0, 0, -1, -1, { grapheme32, NULL }, test32_2 },
2375 	{ UDA, CPI, 3, 0, 0, -1, -1, { grapheme32, NULL }, test32_2 },
2376 	{ UDA, CPI, 4, 0, 0, 4, 5, { grapheme32, NULL }, test32_2 },
2377 
2378 	{ UDA, CPI, 0, 0, 0, -1, -1, { nothashmark32, NULL }, test32_4 },
2379 	{ UDA, CPI, 1, 0, 0, 1, 2, { nothashmark32, NULL }, test32_4 },
2380 	{ UDA, CPI, 2, 0, 0, -1, -1, { nothashmark32, NULL }, test32_4 },
2381 	{ UDA, CPI, 1, 0, 0, 1, 2, { nothashmark32, NULL }, test32_2 },
2382 	{ UDA, CPI, 2, 0, 0, -1, -1, { nothashmark32, NULL }, test32_2 },
2383 
2384 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 2, 3, { afternl32, NULL }, test32_5 },
2385 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { afternl32, NULL }, test32_6 },
2386 
2387 	{ 0, 0, 0, 0, 0, 0, 0, { NULL, NULL }, NULL }
2388 };
2389 
2390 #undef UDA
2391 #undef CI
2392 #undef CPI
2393 
run_invalid_utf32_test(const struct invalid_utf32_regression_test_case * current,int pattern_index,int i,pcre2_compile_context_32 * ccontext,pcre2_match_data_32 * mdata)2394 static int run_invalid_utf32_test(const struct invalid_utf32_regression_test_case *current,
2395 	int pattern_index, int i, pcre2_compile_context_32 *ccontext, pcre2_match_data_32 *mdata)
2396 {
2397 	pcre2_code_32 *code;
2398 	int result, errorcode;
2399 	PCRE2_SIZE length, erroroffset;
2400 	const PCRE2_UCHAR32 *input;
2401 	PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_32(mdata);
2402 
2403 	if (current->pattern[i] == NULL)
2404 		return 1;
2405 
2406 	code = pcre2_compile_32(current->pattern[i], PCRE2_ZERO_TERMINATED,
2407 		current->compile_options, &errorcode, &erroroffset, ccontext);
2408 
2409 	if (!code) {
2410 		printf("Pattern[%d:0] cannot be compiled. Error offset: %d\n", pattern_index, (int)erroroffset);
2411 		return 0;
2412 	}
2413 
2414 	if (pcre2_jit_compile_32(code, current->jit_compile_options) != 0) {
2415 		printf("Pattern[%d:0] cannot be compiled by the JIT compiler.\n", pattern_index);
2416 		pcre2_code_free_32(code);
2417 		return 0;
2418 	}
2419 
2420 	input = current->input;
2421 	length = 0;
2422 
2423 	while (*input++ != 0)
2424 		length++;
2425 
2426 	length -= current->skip_left + current->skip_right;
2427 
2428 	if (current->jit_compile_options & PCRE2_JIT_COMPLETE) {
2429 		result = pcre2_jit_match_32(code, (current->input + current->skip_left),
2430 			length, current->start_offset - current->skip_left, 0, mdata, NULL);
2431 
2432 		if (check_invalid_utf_result(pattern_index, "match", result, current->match_start, current->match_end, ovector)) {
2433 			pcre2_code_free_32(code);
2434 			return 0;
2435 		}
2436 	}
2437 
2438 	if (current->jit_compile_options & PCRE2_JIT_PARTIAL_SOFT) {
2439 		result = pcre2_jit_match_32(code, (current->input + current->skip_left),
2440 			length, current->start_offset - current->skip_left, PCRE2_PARTIAL_SOFT, mdata, NULL);
2441 
2442 		if (check_invalid_utf_result(pattern_index, "partial match", result, current->match_start, current->match_end, ovector)) {
2443 			pcre2_code_free_32(code);
2444 			return 0;
2445 		}
2446 	}
2447 
2448 	pcre2_code_free_32(code);
2449 	return 1;
2450 }
2451 
invalid_utf32_regression_tests(void)2452 static int invalid_utf32_regression_tests(void)
2453 {
2454 	const struct invalid_utf32_regression_test_case *current;
2455 	pcre2_compile_context_32 *ccontext;
2456 	pcre2_match_data_32 *mdata;
2457 	int total = 0, successful = 0;
2458 	int result;
2459 
2460 	printf("\nRunning invalid-utf32 JIT regression tests\n");
2461 
2462 	ccontext = pcre2_compile_context_create_32(NULL);
2463 	pcre2_set_newline_32(ccontext, PCRE2_NEWLINE_ANY);
2464 	mdata = pcre2_match_data_create_32(4, NULL);
2465 
2466 	for (current = invalid_utf32_regression_test_cases; current->pattern[0]; current++) {
2467 		/* printf("\nPattern: %s :\n", current->pattern); */
2468 		total++;
2469 
2470 		result = 1;
2471 		if (!run_invalid_utf32_test(current, total - 1, 0, ccontext, mdata))
2472 			result = 0;
2473 		if (!run_invalid_utf32_test(current, total - 1, 1, ccontext, mdata))
2474 			result = 0;
2475 
2476 		if (result) {
2477 			successful++;
2478 		}
2479 
2480 		printf(".");
2481 		if ((total % 60) == 0)
2482 			printf("\n");
2483 	}
2484 
2485 	if ((total % 60) != 0)
2486 		printf("\n");
2487 
2488 	pcre2_match_data_free_32(mdata);
2489 	pcre2_compile_context_free_32(ccontext);
2490 
2491 	if (total == successful) {
2492 		printf("\nAll invalid UTF32 JIT regression tests are successfully passed.\n");
2493 		return 0;
2494 	} else {
2495 		printf("\nInvalid UTF32 successful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
2496 		return 1;
2497 	}
2498 }
2499 
2500 #else /* !SUPPORT_UNICODE || !SUPPORT_PCRE2_32 */
2501 
invalid_utf32_regression_tests(void)2502 static int invalid_utf32_regression_tests(void)
2503 {
2504 	return 0;
2505 }
2506 
2507 #endif /* SUPPORT_UNICODE && SUPPORT_PCRE2_32 */
2508 
2509 /* End of pcre2_jit_test.c */
2510