• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*************************************************
2 *      Perl-Compatible Regular Expressions       *
3 *************************************************/
4 
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7 
8                        Written by Philip Hazel
9      Original API code Copyright (c) 1997-2012 University of Cambridge
10          New API code Copyright (c) 2016 University of Cambridge
11 
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
15 
16     * Redistributions of source code must retain the above copyright notice,
17       this list of conditions and the following disclaimer.
18 
19     * Redistributions in binary form must reproduce the above copyright
20       notice, this list of conditions and the following disclaimer in the
21       documentation and/or other materials provided with the distribution.
22 
23     * Neither the name of the University of Cambridge nor the names of its
24       contributors may be used to endorse or promote products derived from
25       this software without specific prior written permission.
26 
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
39 */
40 
41 #ifdef HAVE_CONFIG_H
42 #include "config.h"
43 #endif
44 
45 #include <stdio.h>
46 #include <string.h>
47 
48 #define PCRE2_CODE_UNIT_WIDTH 0
49 #include "pcre2.h"
50 
51 /*
52  Letter characters:
53    \xe6\x92\xad = 0x64ad = 25773 (kanji)
54  Non-letter characters:
55    \xc2\xa1 = 0xa1 =  (Inverted Exclamation Mark)
56    \xf3\xa9\xb7\x80 = 0xe9dc0 = 957888
57    \xed\xa0\x80 = 55296 = 0xd800 (Invalid UTF character)
58    \xed\xb0\x80 = 56320 = 0xdc00 (Invalid UTF character)
59  Newlines:
60    \xc2\x85 = 0x85 = 133 (NExt Line = NEL)
61    \xe2\x80\xa8 = 0x2028 = 8232 (Line Separator)
62  Othercase pairs:
63    \xc3\xa9 = 0xe9 = 233 (e')
64       \xc3\x89 = 0xc9 = 201 (E')
65    \xc3\xa1 = 0xe1 = 225 (a')
66       \xc3\x81 = 0xc1 = 193 (A')
67    \x53 = 0x53 = S
68      \x73 = 0x73 = s
69      \xc5\xbf = 0x17f = 383 (long S)
70    \xc8\xba = 0x23a = 570
71       \xe2\xb1\xa5 = 0x2c65 = 11365
72    \xe1\xbd\xb8 = 0x1f78 = 8056
73       \xe1\xbf\xb8 = 0x1ff8 = 8184
74    \xf0\x90\x90\x80 = 0x10400 = 66560
75       \xf0\x90\x90\xa8 = 0x10428 = 66600
76    \xc7\x84 = 0x1c4 = 452
77      \xc7\x85 = 0x1c5 = 453
78      \xc7\x86 = 0x1c6 = 454
79  Caseless sets:
80    ucp_Armenian - \x{531}-\x{556} -> \x{561}-\x{586}
81    ucp_Coptic - \x{2c80}-\x{2ce3} -> caseless: XOR 0x1
82    ucp_Latin - \x{ff21}-\x{ff3a} -> \x{ff41]-\x{ff5a}
83 
84  Mark property:
85    \xcc\x8d = 0x30d = 781
86  Special:
87    \xc2\x80 = 0x80 = 128 (lowest 2 byte character)
88    \xdf\xbf = 0x7ff = 2047 (highest 2 byte character)
89    \xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character)
90    \xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character)
91    \xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character)
92    \xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character)
93 */
94 
95 static int regression_tests(void);
96 static int invalid_utf8_regression_tests(void);
97 static int invalid_utf16_regression_tests(void);
98 static int invalid_utf32_regression_tests(void);
99 
main(void)100 int main(void)
101 {
102 	int jit = 0;
103 #if defined SUPPORT_PCRE2_8
104 	pcre2_config_8(PCRE2_CONFIG_JIT, &jit);
105 #elif defined SUPPORT_PCRE2_16
106 	pcre2_config_16(PCRE2_CONFIG_JIT, &jit);
107 #elif defined SUPPORT_PCRE2_32
108 	pcre2_config_32(PCRE2_CONFIG_JIT, &jit);
109 #endif
110 	if (!jit) {
111 		printf("JIT must be enabled to run pcre_jit_test\n");
112 		return 1;
113 	}
114 	return regression_tests()
115 		| invalid_utf8_regression_tests()
116 		| invalid_utf16_regression_tests()
117 		| invalid_utf32_regression_tests();
118 }
119 
120 /* --------------------------------------------------------------------------------------- */
121 
122 #if !(defined SUPPORT_PCRE2_8) && !(defined SUPPORT_PCRE2_16) && !(defined SUPPORT_PCRE2_32)
123 #error SUPPORT_PCRE2_8 or SUPPORT_PCRE2_16 or SUPPORT_PCRE2_32 must be defined
124 #endif
125 
126 #define MU	(PCRE2_MULTILINE | PCRE2_UTF)
127 #define MUP	(PCRE2_MULTILINE | PCRE2_UTF | PCRE2_UCP)
128 #define CMU	(PCRE2_CASELESS | PCRE2_MULTILINE | PCRE2_UTF)
129 #define CMUP	(PCRE2_CASELESS | PCRE2_MULTILINE | PCRE2_UTF | PCRE2_UCP)
130 #define M	(PCRE2_MULTILINE)
131 #define MP	(PCRE2_MULTILINE | PCRE2_UCP)
132 #define U	(PCRE2_UTF)
133 #define CM	(PCRE2_CASELESS | PCRE2_MULTILINE)
134 
135 #define BSR(x)	((x) << 16)
136 #define A	PCRE2_NEWLINE_ANYCRLF
137 
138 #define GET_NEWLINE(x)	((x) & 0xffff)
139 #define GET_BSR(x)	((x) >> 16)
140 
141 #define OFFSET_MASK	0x00ffff
142 #define F_NO8		0x010000
143 #define F_NO16		0x020000
144 #define F_NO32		0x020000
145 #define F_NOMATCH	0x040000
146 #define F_DIFF		0x080000
147 #define F_FORCECONV	0x100000
148 #define F_PROPERTY	0x200000
149 
150 struct regression_test_case {
151 	int compile_options;
152 	int newline;
153 	int match_options;
154 	int start_offset;
155 	const char *pattern;
156 	const char *input;
157 };
158 
159 static struct regression_test_case regression_test_cases[] = {
160 	/* Constant strings. */
161 	{ MU, A, 0, 0, "AbC", "AbAbC" },
162 	{ MU, A, 0, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
163 	{ CMU, A, 0, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
164 	{ M, A, 0, 0, "[^a]", "aAbB" },
165 	{ CM, A, 0, 0, "[^m]", "mMnN" },
166 	{ M, A, 0, 0, "a[^b][^#]", "abacd" },
167 	{ CM, A, 0, 0, "A[^B][^E]", "abacd" },
168 	{ CMU, A, 0, 0, "[^x][^#]", "XxBll" },
169 	{ MU, A, 0, 0, "[^a]", "aaa\xc3\xa1#Ab" },
170 	{ CMU, A, 0, 0, "[^A]", "aA\xe6\x92\xad" },
171 	{ MU, A, 0, 0, "\\W(\\W)?\\w", "\r\n+bc" },
172 	{ MU, A, 0, 0, "\\W(\\W)?\\w", "\n\r+bc" },
173 	{ MU, A, 0, 0, "\\W(\\W)?\\w", "\r\r+bc" },
174 	{ MU, A, 0, 0, "\\W(\\W)?\\w", "\n\n+bc" },
175 	{ MU, A, 0, 0, "[axd]", "sAXd" },
176 	{ CMU, A, 0, 0, "[axd]", "sAXd" },
177 	{ CMU, A, 0, 0 | F_NOMATCH, "[^axd]", "DxA" },
178 	{ MU, A, 0, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
179 	{ MU, A, 0, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
180 	{ CMU, A, 0, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
181 	{ MU, A, 0, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
182 	{ MU, A, 0, 0, "[^a]", "\xc2\x80[]" },
183 	{ CMU, A, 0, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
184 	{ CM, A, 0, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
185 	{ PCRE2_CASELESS, 0, 0, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
186 	{ PCRE2_CASELESS, 0, 0, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
187 	{ PCRE2_CASELESS, 0, 0, 0, "a1", "Aa1" },
188 #ifndef NEVER_BACKSLASH_C
189 	{ M, A, 0, 0, "\\Ca", "cda" },
190 	{ CM, A, 0, 0, "\\Ca", "CDA" },
191 	{ M, A, 0, 0 | F_NOMATCH, "\\Cx", "cda" },
192 	{ CM, A, 0, 0 | F_NOMATCH, "\\Cx", "CDA" },
193 #endif /* !NEVER_BACKSLASH_C */
194 	{ CMUP, A, 0, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
195 	{ CMUP, A, 0, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
196 	{ CMUP, A, 0, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
197 	{ CMUP, A, 0, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
198 	{ M, A, 0, 0, "[3-57-9]", "5" },
199 	{ PCRE2_AUTO_CALLOUT, A, 0, 0, "12345678901234567890123456789012345678901234567890123456789012345678901234567890",
200 		"12345678901234567890123456789012345678901234567890123456789012345678901234567890" },
201 
202 	/* Assertions. */
203 	{ MU, A, 0, 0, "\\b[^A]", "A_B#" },
204 	{ M, A, 0, 0 | F_NOMATCH, "\\b\\W", "\n*" },
205 	{ MU, A, 0, 0, "\\B[^,]\\b[^s]\\b", "#X" },
206 	{ MP, A, 0, 0, "\\B", "_\xa1" },
207 	{ MP, A, 0, 0 | F_PROPERTY, "\\b_\\b[,A]\\B", "_," },
208 	{ MUP, A, 0, 0, "\\b", "\xe6\x92\xad!" },
209 	{ MUP, A, 0, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
210 	{ MUP, A, 0, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
211 	{ MUP, A, 0, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
212 	{ MU, A, 0, 0 | F_NOMATCH, "\\b.", "\xcd\xbe" },
213 	{ CMUP, A, 0, 0, "\\By", "\xf0\x90\x90\xa8y" },
214 	{ M, A, 0, 0 | F_NOMATCH, "\\R^", "\n" },
215 	{ M, A, 0, 1 | F_NOMATCH, "^", "\n" },
216 	{ 0, 0, 0, 0, "^ab", "ab" },
217 	{ 0, 0, 0, 0 | F_NOMATCH, "^ab", "aab" },
218 	{ M, PCRE2_NEWLINE_CRLF, 0, 0, "^a", "\r\raa\n\naa\r\naa" },
219 	{ MU, A, 0, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
220 	{ M, PCRE2_NEWLINE_ANY, 0, 0, "^-", "a--b--\x85--" },
221 	{ MU, PCRE2_NEWLINE_ANY, 0, 0, "^-", "a--\xe2\x80\xa8--" },
222 	{ MU, PCRE2_NEWLINE_ANY, 0, 0, "^-", "a--\xc2\x85--" },
223 	{ 0, 0, 0, 0, "ab$", "ab" },
224 	{ 0, 0, 0, 0 | F_NOMATCH, "ab$", "abab\n\n" },
225 	{ PCRE2_DOLLAR_ENDONLY, 0, 0, 0 | F_NOMATCH, "ab$", "abab\r\n" },
226 	{ M, PCRE2_NEWLINE_CRLF, 0, 0, "a$", "\r\raa\n\naa\r\naa" },
227 	{ M, PCRE2_NEWLINE_ANY, 0, 0, "a$", "aaa" },
228 	{ MU, PCRE2_NEWLINE_ANYCRLF, 0, 0, "#$", "#\xc2\x85###\r#" },
229 	{ MU, PCRE2_NEWLINE_ANY, 0, 0, "#$", "#\xe2\x80\xa9" },
230 	{ 0, PCRE2_NEWLINE_ANY, PCRE2_NOTBOL, 0 | F_NOMATCH, "^a", "aa\naa" },
231 	{ M, PCRE2_NEWLINE_ANY, PCRE2_NOTBOL, 0, "^a", "aa\naa" },
232 	{ 0, PCRE2_NEWLINE_ANY, PCRE2_NOTEOL, 0 | F_NOMATCH, "a$", "aa\naa" },
233 	{ 0, PCRE2_NEWLINE_ANY, PCRE2_NOTEOL, 0 | F_NOMATCH, "a$", "aa\r\n" },
234 	{ U | PCRE2_DOLLAR_ENDONLY, PCRE2_NEWLINE_ANY, 0, 0 | F_PROPERTY, "\\p{Any}{2,}$", "aa\r\n" },
235 	{ M, PCRE2_NEWLINE_ANY, PCRE2_NOTEOL, 0, "a$", "aa\naa" },
236 	{ 0, PCRE2_NEWLINE_CR, 0, 0, ".\\Z", "aaa" },
237 	{ U, PCRE2_NEWLINE_CR, 0, 0, "a\\Z", "aaa\r" },
238 	{ 0, PCRE2_NEWLINE_CR, 0, 0, ".\\Z", "aaa\n" },
239 	{ 0, PCRE2_NEWLINE_CRLF, 0, 0, ".\\Z", "aaa\r" },
240 	{ U, PCRE2_NEWLINE_CRLF, 0, 0, ".\\Z", "aaa\n" },
241 	{ 0, PCRE2_NEWLINE_CRLF, 0, 0, ".\\Z", "aaa\r\n" },
242 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa" },
243 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r" },
244 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\n" },
245 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r\n" },
246 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\xe2\x80\xa8" },
247 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa" },
248 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r" },
249 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\n" },
250 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r\n" },
251 	{ U, PCRE2_NEWLINE_ANY, 0, 0, ".\\Z", "aaa\xc2\x85" },
252 	{ U, PCRE2_NEWLINE_ANY, 0, 0, ".\\Z", "aaa\xe2\x80\xa8" },
253 	{ M, A, 0, 0, "\\Aa", "aaa" },
254 	{ M, A, 0, 1 | F_NOMATCH, "\\Aa", "aaa" },
255 	{ M, A, 0, 1, "\\Ga", "aaa" },
256 	{ M, A, 0, 1 | F_NOMATCH, "\\Ga", "aba" },
257 	{ M, A, 0, 0, "a\\z", "aaa" },
258 	{ M, A, 0, 0 | F_NOMATCH, "a\\z", "aab" },
259 
260 	/* Brackets and alternatives. */
261 	{ MU, A, 0, 0, "(ab|bb|cd)", "bacde" },
262 	{ MU, A, 0, 0, "(?:ab|a)(bc|c)", "ababc" },
263 	{ MU, A, 0, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
264 	{ CMU, A, 0, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
265 	{ MU, A, 0, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
266 	{ MU, A, 0, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
267 	{ MU, A, 0, 0, "\xc7\x82|\xc6\x82", "\xf1\x83\x82\x82\xc7\x82\xc7\x83" },
268 	{ MU, A, 0, 0, "=\xc7\x82|#\xc6\x82", "\xf1\x83\x82\x82=\xc7\x82\xc7\x83" },
269 	{ MU, A, 0, 0, "\xc7\x82\xc7\x83|\xc6\x82\xc6\x82", "\xf1\x83\x82\x82\xc7\x82\xc7\x83" },
270 	{ MU, A, 0, 0, "\xc6\x82\xc6\x82|\xc7\x83\xc7\x83|\xc8\x84\xc8\x84", "\xf1\x83\x82\x82\xc8\x84\xc8\x84" },
271 	{ U, A, 0, 0, "\xe1\x81\x80|\xe2\x82\x80|\xe4\x84\x80", "\xdf\xbf\xc2\x80\xe4\x84\x80" },
272 	{ U, A, 0, 0, "(?:\xe1\x81\x80|\xe2\x82\x80|\xe4\x84\x80)#", "\xdf\xbf\xc2\x80#\xe4\x84\x80#" },
273 	{ CM, A, 0, 0, "ab|cd", "CD" },
274 	{ CM, A, 0, 0, "a1277|a1377|bX487", "bx487" },
275 	{ CM, A, 0, 0, "a1277|a1377|bx487", "bX487" },
276 
277 	/* Greedy and non-greedy ? operators. */
278 	{ MU, A, 0, 0, "(?:a)?a", "laab" },
279 	{ CMU, A, 0, 0, "(A)?A", "llaab" },
280 	{ MU, A, 0, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
281 	{ MU, A, 0, 0, "(a)?a", "manm" },
282 	{ CMU, A, 0, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
283 	{ MU, A, 0, 0, "(a|b)?\?d((?:e)?)", "abcde" },
284 	{ MU, A, 0, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
285 
286 	/* Greedy and non-greedy + operators */
287 	{ MU, A, 0, 0, "(aa)+aa", "aaaaaaa" },
288 	{ MU, A, 0, 0, "(aa)+?aa", "aaaaaaa" },
289 	{ MU, A, 0, 0, "(?:aba|ab|a)+l", "ababamababal" },
290 	{ MU, A, 0, 0, "(?:aba|ab|a)+?l", "ababamababal" },
291 	{ MU, A, 0, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
292 	{ MU, A, 0, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
293 	{ MU, A, 0, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
294 
295 	/* Greedy and non-greedy * operators */
296 	{ CMU, A, 0, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
297 	{ MU, A, 0, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
298 	{ MU, A, 0, 0, "(aa|ab)*ab", "aaabaaab" },
299 	{ CMU, A, 0, 0, "(aa|Ab)*?aB", "aaabaaab" },
300 	{ MU, A, 0, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
301 	{ MU, A, 0, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
302 	{ M, A, 0, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
303 	{ M, A, 0, 0, "((?:a|)*){0}a", "a" },
304 
305 	/* Combining ? + * operators */
306 	{ MU, A, 0, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
307 	{ MU, A, 0, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
308 	{ MU, A, 0, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
309 	{ MU, A, 0, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
310 	{ MU, A, 0, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
311 
312 	/* Single character iterators. */
313 	{ MU, A, 0, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
314 	{ MU, A, 0, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
315 	{ MU, A, 0, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
316 	{ MU, A, 0, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
317 	{ MU, A, 0, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
318 	{ MU, A, 0, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
319 	{ MU, A, 0, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
320 	{ MU, A, 0, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
321 	{ MU, A, 0, 0, "(ba{2})+c", "baabaaabacbaabaac" },
322 	{ MU, A, 0, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
323 	{ MU, A, 0, 0, "(a?+[^b])+", "babaacacb" },
324 	{ MU, A, 0, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
325 	{ CMU, A, 0, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
326 	{ CMU, A, 0, 0, "[c-f]+k", "DemmFke" },
327 	{ MU, A, 0, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
328 	{ MU, A, 0, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
329 	{ CMU, A, 0, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
330 	{ CMU, A, 0, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
331 	{ CMU, A, 0, 0, "[ace]{3,}", "AcbDAcEEcEd" },
332 	{ CMU, A, 0, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
333 	{ MU, A, 0, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
334 	{ CMU, A, 0, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
335 	{ MU, A, 0, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
336 	{ MU, A, 0, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
337 	{ MU, A, 0, 0, "\\b\\w+\\B", "x,a_cd" },
338 	{ MUP, A, 0, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
339 	{ CMU, A, 0, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
340 	{ CMUP, A, 0, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" },
341 	{ CMU, A, 0, 0, "[^\xf0\x90\x90\x80]{2,4}@", "\xf0\x90\x90\xa8\xf0\x90\x90\x80###\xf0\x90\x90\x80@@@" },
342 	{ CMU, A, 0, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
343 	{ MU, A, 0, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
344 	{ MU, A, 0, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" },
345 	{ MU, A, 0, 0, "\\d+123", "987654321,01234" },
346 	{ MU, A, 0, 0, "abcd*|\\w+xy", "aaaaa,abxyz" },
347 	{ MU, A, 0, 0, "(?:abc|((?:amc|\\b\\w*xy)))", "aaaaa,abxyz" },
348 	{ MU, A, 0, 0, "a(?R)|([a-z]++)#", ".abcd.abcd#."},
349 	{ MU, A, 0, 0, "a(?R)|([a-z]++)#", ".abcd.mbcd#."},
350 	{ MU, A, 0, 0, ".[ab]*.", "xx" },
351 	{ MU, A, 0, 0, ".[ab]*a", "xxa" },
352 	{ MU, A, 0, 0, ".[ab]?.", "xx" },
353 	{ MU, A, 0, 0, "_[ab]+_*a", "_aa" },
354 	{ MU, A, 0, 0, "#(A+)#\\d+", "#A#A#0" },
355 	{ MU, A, 0, 0, "(?P<size>\\d+)m|M", "4M" },
356 
357 	/* Bracket repeats with limit. */
358 	{ MU, A, 0, 0, "(?:(ab){2}){5}M", "abababababababababababM" },
359 	{ MU, A, 0, 0, "(?:ab|abab){1,5}M", "abababababababababababM" },
360 	{ MU, A, 0, 0, "(?>ab|abab){1,5}M", "abababababababababababM" },
361 	{ MU, A, 0, 0, "(?:ab|abab){1,5}?M", "abababababababababababM" },
362 	{ MU, A, 0, 0, "(?>ab|abab){1,5}?M", "abababababababababababM" },
363 	{ MU, A, 0, 0, "(?:(ab){1,4}?){1,3}?M", "abababababababababababababM" },
364 	{ MU, A, 0, 0, "(?:(ab){1,4}){1,3}abababababababababababM", "ababababababababababababM" },
365 	{ MU, A, 0, 0 | F_NOMATCH, "(?:(ab){1,4}){1,3}abababababababababababM", "abababababababababababM" },
366 	{ MU, A, 0, 0, "(ab){4,6}?M", "abababababababM" },
367 
368 	/* Basic character sets. */
369 	{ MU, A, 0, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
370 	{ MU, A, 0, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
371 	{ MU, A, 0, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
372 	{ MU, A, 0, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
373 	{ MU, A, 0, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
374 	{ MU, A, 0, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
375 	{ MU, A, 0, 0, "x[bcef]+", "xaxdxecbfg" },
376 	{ MU, A, 0, 0, "x[bcdghij]+", "xaxexfxdgbjk" },
377 	{ MU, A, 0, 0, "x[^befg]+", "xbxexacdhg" },
378 	{ MU, A, 0, 0, "x[^bcdl]+", "xlxbxaekmd" },
379 	{ MU, A, 0, 0, "x[^bcdghi]+", "xbxdxgxaefji" },
380 	{ MU, A, 0, 0, "x[B-Fb-f]+", "xaxAxgxbfBFG" },
381 	{ CMU, A, 0, 0, "\\x{e9}+", "#\xf0\x90\x90\xa8\xc3\xa8\xc3\xa9\xc3\x89\xc3\x88" },
382 	{ CMU, A, 0, 0, "[^\\x{e9}]+", "\xc3\xa9#\xf0\x90\x90\xa8\xc3\xa8\xc3\x88\xc3\x89" },
383 	{ MU, A, 0, 0, "[\\x02\\x7e]+", "\xc3\x81\xe1\xbf\xb8\xf0\x90\x90\xa8\x01\x02\x7e\x7f" },
384 	{ MU, A, 0, 0, "[^\\x02\\x7e]+", "\x02\xc3\x81\xe1\xbf\xb8\xf0\x90\x90\xa8\x01\x7f\x7e" },
385 	{ MU, A, 0, 0, "[\\x{81}-\\x{7fe}]+", "#\xe1\xbf\xb8\xf0\x90\x90\xa8\xc2\x80\xc2\x81\xdf\xbe\xdf\xbf" },
386 	{ MU, A, 0, 0, "[^\\x{81}-\\x{7fe}]+", "\xc2\x81#\xe1\xbf\xb8\xf0\x90\x90\xa8\xc2\x80\xdf\xbf\xdf\xbe" },
387 	{ MU, A, 0, 0, "[\\x{801}-\\x{fffe}]+", "#\xc3\xa9\xf0\x90\x90\x80\xe0\xa0\x80\xe0\xa0\x81\xef\xbf\xbe\xef\xbf\xbf" },
388 	{ MU, A, 0, 0, "[^\\x{801}-\\x{fffe}]+", "\xe0\xa0\x81#\xc3\xa9\xf0\x90\x90\x80\xe0\xa0\x80\xef\xbf\xbf\xef\xbf\xbe" },
389 	{ MU, A, 0, 0, "[\\x{10001}-\\x{10fffe}]+", "#\xc3\xa9\xe2\xb1\xa5\xf0\x90\x80\x80\xf0\x90\x80\x81\xf4\x8f\xbf\xbe\xf4\x8f\xbf\xbf" },
390 	{ MU, A, 0, 0, "[^\\x{10001}-\\x{10fffe}]+", "\xf0\x90\x80\x81#\xc3\xa9\xe2\xb1\xa5\xf0\x90\x80\x80\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbe" },
391 	{ CMU, A, 0, 0 | F_NOMATCH, "^[\\x{0100}-\\x{017f}]", " " },
392 
393 	/* Unicode properties. */
394 	{ MUP, A, 0, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
395 	{ MUP, A, 0, 0 | F_PROPERTY, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
396 	{ MUP, A, 0, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
397 	{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}]", "abc" },
398 	{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}]", "abc" },
399 	{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
400 	{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
401 	{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
402 	{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
403 	{ MUP, A, 0, 0 | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
404 	{ MUP, A, 0, 0 | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
405 	{ MUP, A, 0, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
406 	{ CMUP, A, 0, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
407 	{ MUP, A, 0, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
408 	{ MUP, A, 0, 0 | F_PROPERTY, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
409 	{ MU, A, 0, 0 | F_PROPERTY, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
410 	{ CMUP, A, 0, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
411 	{ MUP, A, 0, 0 | F_PROPERTY, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
412 	{ MUP, A, 0, 0 | F_PROPERTY, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
413 	{ PCRE2_UCP, 0, 0, 0 | F_PROPERTY, "[a-b\\s]{2,5}[^a]", "AB  baaa" },
414 	{ MUP, 0, 0, 0 | F_NOMATCH, "[^\\p{Hangul}\\p{Z}]", " " },
415 	{ CMUP, 0, 0, 0, "[^S]\\B", "\xe2\x80\x8a" },
416 
417 	/* Possible empty brackets. */
418 	{ MU, A, 0, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
419 	{ MU, A, 0, 0, "(|ab||bc|a)+d", "abcxabcabd" },
420 	{ MU, A, 0, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
421 	{ MU, A, 0, 0, "(|ab||bc|a)*d", "abcxabcabd" },
422 	{ MU, A, 0, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
423 	{ MU, A, 0, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
424 	{ MU, A, 0, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
425 	{ MU, A, 0, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
426 	{ MU, A, 0, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
427 	{ MU, A, 0, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
428 
429 	/* Start offset. */
430 	{ MU, A, 0, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
431 	{ MU, A, 0, 4 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
432 	{ MU, A, 0, 2 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
433 	{ MU, A, 0, 1, "(\\w\\W\\w)+", "ab#d" },
434 
435 	/* Newline. */
436 	{ M, PCRE2_NEWLINE_CRLF, 0, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
437 	{ M, PCRE2_NEWLINE_CR, 0, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
438 	{ M, PCRE2_NEWLINE_CRLF, 0, 0, "\\W{1,3}[^#]", "\r\n##...." },
439 	{ MU, A, PCRE2_NO_UTF_CHECK, 1, "^.a", "\n\x80\nxa" },
440 	{ MU, A, 0, 1, "^", "\r\n" },
441 	{ M, PCRE2_NEWLINE_CRLF, 0, 1 | F_NOMATCH, "^", "\r\n" },
442 	{ M, PCRE2_NEWLINE_CRLF, 0, 1, "^", "\r\na" },
443 
444 	/* Any character except newline or any newline. */
445 	{ 0, PCRE2_NEWLINE_CRLF, 0, 0, ".", "\r" },
446 	{ U, PCRE2_NEWLINE_CRLF, 0, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
447 	{ 0, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
448 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
449 	{ U, PCRE2_NEWLINE_ANY, 0, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
450 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0 | F_NOMATCH, ".(.).", "\xe2\x80\xa8\nb\r" },
451 	{ 0, PCRE2_NEWLINE_ANY, 0, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
452 	{ U, PCRE2_NEWLINE_ANY, 0, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
453 	{ 0, BSR(PCRE2_BSR_ANYCRLF), 0, 0, "\\R", "\r" },
454 	{ 0, BSR(PCRE2_BSR_ANYCRLF), 0, 0, "\\R", "\x85#\r\n#" },
455 	{ U, BSR(PCRE2_BSR_UNICODE), 0, 0, "\\R", "ab\xe2\x80\xa8#c" },
456 	{ U, BSR(PCRE2_BSR_UNICODE), 0, 0, "\\R", "ab\r\nc" },
457 	{ U, PCRE2_NEWLINE_CRLF | BSR(PCRE2_BSR_UNICODE), 0, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
458 	{ MU, A, 0, 0 | F_NOMATCH, "\\R+", "ab" },
459 	{ MU, A, 0, 0, "\\R+", "ab\r\n\r" },
460 	{ MU, A, 0, 0, "\\R*", "ab\r\n\r" },
461 	{ MU, A, 0, 0, "\\R*", "\r\n\r" },
462 	{ MU, A, 0, 0, "\\R{2,4}", "\r\nab\r\r" },
463 	{ MU, A, 0, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
464 	{ MU, A, 0, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
465 	{ MU, A, 0, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
466 	{ MU, A, 0, 0 | F_NOMATCH, "\\R+\\R\\R", "\r\n\r\n" },
467 	{ MU, A, 0, 0, "\\R+\\R\\R", "\r\r\r" },
468 	{ MU, A, 0, 0, "\\R*\\R\\R", "\n\r" },
469 	{ MU, A, 0, 0 | F_NOMATCH, "\\R{2,4}\\R\\R", "\r\r\r" },
470 	{ MU, A, 0, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
471 
472 	/* Atomic groups (no fallback from "next" direction). */
473 	{ MU, A, 0, 0 | F_NOMATCH, "(?>ab)ab", "bab" },
474 	{ MU, A, 0, 0 | F_NOMATCH, "(?>(ab))ab", "bab" },
475 	{ MU, A, 0, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
476 			"bababcdedefgheijijklmlmnop" },
477 	{ MU, A, 0, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
478 	{ MU, A, 0, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
479 	{ MU, A, 0, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
480 	{ MU, A, 0, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
481 	{ MU, A, 0, 0, "((?>a|)+?)b", "aaacaaab" },
482 	{ MU, A, 0, 0, "(?>x|)*$", "aaa" },
483 	{ MU, A, 0, 0, "(?>(x)|)*$", "aaa" },
484 	{ MU, A, 0, 0, "(?>x|())*$", "aaa" },
485 	{ MU, A, 0, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
486 	{ MU, A, 0, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
487 	{ MU, A, 0, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
488 	{ MU, A, 0, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
489 	{ MU, A, 0, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
490 	{ MU, A, 0, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
491 	{ MU, A, 0, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
492 	{ MU, A, 0, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
493 	{ MU, A, 0, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
494 	{ MU, A, 0, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
495 	{ MU, A, 0, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
496 	{ MU, A, 0, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
497 	{ MU, A, 0, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
498 	{ MU, A, 0, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
499 	{ CM, A, 0, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
500 	{ MU, A, 0, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
501 	{ MU, A, 0, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
502 	{ MU, A, 0, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d" },
503 	{ MU, A, 0, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
504 	{ MU, A, 0, 0 | F_PROPERTY, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
505 	{ MU, A, 0, 0 | F_PROPERTY, "\\X{2,4}", "abcdef" },
506 	{ MU, A, 0, 0 | F_PROPERTY, "\\X{2,4}?", "abcdef" },
507 	{ MU, A, 0, 0 | F_NOMATCH | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d##" },
508 	{ MU, A, 0, 0 | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
509 	{ MU, A, 0, 0, "(c(ab)?+ab)+", "cabcababcab" },
510 	{ MU, A, 0, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
511 
512 	/* Possessive quantifiers. */
513 	{ MU, A, 0, 0, "(?:a|b)++m", "mababbaaxababbaam" },
514 	{ MU, A, 0, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
515 	{ MU, A, 0, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
516 	{ MU, A, 0, 0, "(a|b)++m", "mababbaaxababbaam" },
517 	{ MU, A, 0, 0, "(a|b)*+m", "mababbaaxababbaam" },
518 	{ MU, A, 0, 0, "(a|b)*+m", "ababbaaxababbaam" },
519 	{ MU, A, 0, 0, "(a|b(*ACCEPT))++m", "maaxab" },
520 	{ MU, A, 0, 0, "(?:b*)++m", "bxbbxbbbxm" },
521 	{ MU, A, 0, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
522 	{ MU, A, 0, 0, "(?:b*)*+m", "bxbbxbbbxm" },
523 	{ MU, A, 0, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
524 	{ MU, A, 0, 0, "(b*)++m", "bxbbxbbbxm" },
525 	{ MU, A, 0, 0, "(b*)++m", "bxbbxbbbxbbm" },
526 	{ MU, A, 0, 0, "(b*)*+m", "bxbbxbbbxm" },
527 	{ MU, A, 0, 0, "(b*)*+m", "bxbbxbbbxbbm" },
528 	{ MU, A, 0, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
529 	{ MU, A, 0, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
530 	{ MU, A, 0, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
531 	{ MU, A, 0, 0, "(a|(b))++m", "mababbaaxababbaam" },
532 	{ MU, A, 0, 0, "((a)|b)*+m", "mababbaaxababbaam" },
533 	{ MU, A, 0, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
534 	{ MU, A, 0, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
535 	{ MU, A, 0, 0, "(?:(b*))++m", "bxbbxbbbxm" },
536 	{ MU, A, 0, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
537 	{ MU, A, 0, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
538 	{ MU, A, 0, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
539 	{ MU, A, 0, 0, "((b*))++m", "bxbbxbbbxm" },
540 	{ MU, A, 0, 0, "((b*))++m", "bxbbxbbbxbbm" },
541 	{ MU, A, 0, 0, "((b*))*+m", "bxbbxbbbxm" },
542 	{ MU, A, 0, 0, "((b*))*+m", "bxbbxbbbxbbm" },
543 	{ MU, A, 0, 0 | F_NOMATCH, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
544 	{ MU, A, 0, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
545 	{ MU, A, 0, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
546 	{ MU, A, 0, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
547 	{ MU, A, 0, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
548 
549 	/* Back references. */
550 	{ MU, A, 0, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
551 	{ CMU, A, 0, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
552 	{ CM, A, 0, 0, "(a{2,4})\\1", "AaAaaAaA" },
553 	{ MU, A, 0, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
554 	{ MU, A, 0, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
555 	{ MU, A, 0, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
556 	{ MU, A, 0, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
557 	{ MU, A, 0, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
558 	{ MU, A, 0, 0, "(?:(aa)|b)\\1?b", "bb" },
559 	{ CMU, A, 0, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
560 	{ MU, A, 0, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
561 	{ CMU, A, 0, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
562 	{ MU, A, 0, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
563 	{ CM, A, 0, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
564 	{ MU, A, 0, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
565 	{ MU, A, 0, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
566 	{ M, A, 0, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
567 	{ MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
568 	{ MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{0,2}", "wwwww." },
569 	{ MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwww" },
570 	{ MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwwww" },
571 	{ PCRE2_UCP, 0, 0, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
572 	{ CMUP, A, 0, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" },
573 	{ MU | PCRE2_DUPNAMES, A, 0, 0 | F_NOMATCH, "\\k<A>{1,3}(?<A>aa)(?<A>bb)", "aabb" },
574 	{ MU | PCRE2_DUPNAMES | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\k<A>{1,3}(?<A>aa)(?<A>bb)", "aabb" },
575 	{ MU | PCRE2_DUPNAMES | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\k<A>*(?<A>aa)(?<A>bb)", "aabb" },
576 	{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?<A>aa)(?<A>bb)\\k<A>{0,3}aaaaaa", "aabbaaaaaa" },
577 	{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?<A>aa)(?<A>bb)\\k<A>{2,5}bb", "aabbaaaabb" },
578 	{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{0,3}m", "aaaaaaaabbbbaabbbbm" },
579 	{ MU | PCRE2_DUPNAMES, A, 0, 0 | F_NOMATCH, "\\k<A>{1,3}?(?<A>aa)(?<A>bb)", "aabb" },
580 	{ MU | PCRE2_DUPNAMES | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\k<A>{1,3}?(?<A>aa)(?<A>bb)", "aabb" },
581 	{ MU | PCRE2_DUPNAMES, A, 0, 0, "\\k<A>*?(?<A>aa)(?<A>bb)", "aabb" },
582 	{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{0,3}?m", "aaaaaabbbbbbaabbbbbbbbbbm" },
583 	{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>*?m", "aaaaaabbbbbbaabbbbbbbbbbm" },
584 	{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{2,3}?", "aaaabbbbaaaabbbbbbbbbb" },
585 	{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}M", "aaaaaaaabbbbaabbbbm" },
586 	{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{1,3}M", "aaaaaaaabbbbaabbbbm" },
587 	{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}?M", "aaaaaabbbbbbaabbbbbbbbbbm" },
588 	{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{2,3}?", "aaaabbbbaaaabbbbbbbbbb" },
589 
590 	/* Assertions. */
591 	{ MU, A, 0, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
592 	{ MU, A, 0, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
593 	{ MU, A, 0, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
594 	{ MU, A, 0, 0, "(?<=aaa|aa|a)a", "aaa" },
595 	{ MU, A, 0, 2, "(?<=aaa|aa|a)a", "aaa" },
596 	{ M, A, 0, 0, "(?<=aaa|aa|a)a", "aaa" },
597 	{ M, A, 0, 2, "(?<=aaa|aa|a)a", "aaa" },
598 	{ MU, A, 0, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
599 	{ MU, A, 0, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
600 	{ MU, A, 0, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
601 	{ MU, A, 0, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
602 	{ MU, A, 0, 0, "((?(?=(a))a)+k)", "bbak" },
603 	{ MU, A, 0, 0, "((?(?=a)a)+k)", "bbak" },
604 	{ MU, A, 0, 0 | F_NOMATCH, "(?=(?>(a))m)amk", "a k" },
605 	{ MU, A, 0, 0 | F_NOMATCH, "(?!(?>(a))m)amk", "a k" },
606 	{ MU, A, 0, 0 | F_NOMATCH, "(?>(?=(a))am)amk", "a k" },
607 	{ MU, A, 0, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
608 	{ MU, A, 0, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
609 	{ MU, A, 0, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
610 	{ MU, A, 0, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
611 	{ MU, A, 0, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
612 	{ MU, A, 0, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
613 	{ MU, A, 0, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
614 	{ MU, A, 0, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
615 	{ MU, A, 0, 0, "a(?=(?C)\\B(?C`x`))b", "ab" },
616 	{ MU, A, 0, 0, "a(?!(?C)\\B(?C`x`))bb|ab", "abb" },
617 	{ MU, A, 0, 0, "a(?=\\b|(?C)\\B(?C`x`))b", "ab" },
618 	{ MU, A, 0, 0, "a(?!\\b|(?C)\\B(?C`x`))bb|ab", "abb" },
619 	{ MU, A, 0, 0, "c(?(?=(?C)\\B(?C`x`))ab|a)", "cab" },
620 	{ MU, A, 0, 0, "c(?(?!(?C)\\B(?C`x`))ab|a)", "cab" },
621 	{ MU, A, 0, 0, "c(?(?=\\b|(?C)\\B(?C`x`))ab|a)", "cab" },
622 	{ MU, A, 0, 0, "c(?(?!\\b|(?C)\\B(?C`x`))ab|a)", "cab" },
623 	{ MU, A, 0, 0, "a(?=)b", "ab" },
624 	{ MU, A, 0, 0 | F_NOMATCH, "a(?!)b", "ab" },
625 
626 	/* Not empty, ACCEPT, FAIL */
627 	{ MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "a*", "bcx" },
628 	{ MU, A, PCRE2_NOTEMPTY, 0, "a*", "bcaad" },
629 	{ MU, A, PCRE2_NOTEMPTY, 0, "a*?", "bcaad" },
630 	{ MU, A, PCRE2_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
631 	{ MU, A, 0, 0, "a(*ACCEPT)b", "ab" },
632 	{ MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "a*(*ACCEPT)b", "bcx" },
633 	{ MU, A, PCRE2_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
634 	{ MU, A, PCRE2_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
635 	{ MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "(?:z|a*(*ACCEPT)b)", "bcx" },
636 	{ MU, A, PCRE2_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
637 	{ MU, A, PCRE2_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
638 	{ MU, A, PCRE2_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
639 	{ MU, A, PCRE2_NOTEMPTY_ATSTART, 0 | F_NOMATCH, "a*(*ACCEPT)b", "" },
640 	{ MU, A, 0, 0, "((a(*ACCEPT)b))", "ab" },
641 	{ MU, A, 0, 0, "(a(*FAIL)a|a)", "aaa" },
642 	{ MU, A, 0, 0, "(?=ab(*ACCEPT)b)a", "ab" },
643 	{ MU, A, 0, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
644 	{ MU, A, 0, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
645 	{ MU, A, PCRE2_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
646 	{ MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "(?=A)", "AB" },
647 
648 	/* Conditional blocks. */
649 	{ MU, A, 0, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
650 	{ MU, A, 0, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
651 	{ MU, A, 0, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
652 	{ MU, A, 0, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
653 	{ MU, A, 0, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
654 	{ MU, A, 0, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
655 	{ MU, A, 0, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
656 	{ MU, A, 0, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
657 	{ MU, A, 0, 0 | F_DIFF, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
658 	{ MU, A, 0, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
659 	{ MU, A, 0, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
660 	{ MU, A, 0, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
661 	{ MU, A, 0, 0, "(?(?=a)ab)", "a" },
662 	{ MU, A, 0, 0, "(?(?<!b)c)", "b" },
663 	{ MU, A, 0, 0, "(?(DEFINE)a(b))", "a" },
664 	{ MU, A, 0, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
665 	{ MU, A, 0, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
666 	{ MU, A, 0, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
667 	{ MU, A, 0, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
668 	{ MU, A, 0, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
669 	{ MU, A, 0, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
670 	{ MU, A, 0, 0, "(c)?\?(?(1)a|b)", "cbb" },
671 	{ MU, A, 0, 0 | F_DIFF, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
672 	{ MU, A, 0, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
673 	{ MU, A, 0, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
674 	{ MU, A, 0, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
675 	{ MU, A, 0, 0 | F_DIFF, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
676 	{ MU, A, 0, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
677 	{ MU, A, 0, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
678 	{ MU, A, 0, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
679 	{ MU, A, 0, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
680 	{ MU, A, 0, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)*l", "bc ddd abccabccl" },
681 	{ MU, A, 0, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+?dd", "bcabcacdb bdddd" },
682 	{ MU, A, 0, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+l", "ababccddabdbccd abcccl" },
683 	{ MU, A, 0, 0, "((?:a|aa)(?(1)aaa))x", "aax" },
684 	{ MU, A, 0, 0, "(?(?!)a|b)", "ab" },
685 	{ MU, A, 0, 0, "(?(?!)a)", "ab" },
686 	{ MU, A, 0, 0 | F_NOMATCH, "(?(?!)a|b)", "ac" },
687 
688 	/* Set start of match. */
689 	{ MU, A, 0, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
690 	{ MU, A, 0, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
691 	{ MU, A, 0, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
692 	{ MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "a\\K(*ACCEPT)b", "aa" },
693 	{ MU, A, PCRE2_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
694 
695 	/* First line. */
696 	{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_PROPERTY, "\\p{Any}a", "bb\naaa" },
697 	{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}a", "bb\r\naaa" },
698 	{ MU | PCRE2_FIRSTLINE, A, 0, 0, "(?<=a)", "a" },
699 	{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "[^a][^b]", "ab" },
700 	{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "a", "\na" },
701 	{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "[abc]", "\na" },
702 	{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "^a", "\na" },
703 	{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "^(?<=\n)", "\na" },
704 	{ MU | PCRE2_FIRSTLINE, A, 0, 0, "\xf0\x90\x90\x80", "\xf0\x90\x90\x80" },
705 	{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_ANY, 0, 0 | F_NOMATCH, "#", "\xc2\x85#" },
706 	{ M | PCRE2_FIRSTLINE, PCRE2_NEWLINE_ANY, 0, 0 | F_NOMATCH, "#", "\x85#" },
707 	{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_ANY, 0, 0 | F_NOMATCH, "^#", "\xe2\x80\xa8#" },
708 	{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0 | F_PROPERTY, "\\p{Any}", "\r\na" },
709 	{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0, ".", "\r" },
710 	{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0, "a", "\ra" },
711 	{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0 | F_NOMATCH, "ba", "bbb\r\nba" },
712 	{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}{4}|a", "\r\na" },
713 	{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 1, ".", "\r\n" },
714 	{ PCRE2_FIRSTLINE | PCRE2_DOTALL, PCRE2_NEWLINE_LF, 0, 0 | F_NOMATCH, "ab.", "ab" },
715 	{ MU | PCRE2_FIRSTLINE, A, 0, 1 | F_NOMATCH, "^[a-d0-9]", "\nxx\nd" },
716 	{ PCRE2_FIRSTLINE | PCRE2_DOTALL, PCRE2_NEWLINE_ANY, 0, 0, "....a", "012\n0a" },
717 	{ MU | PCRE2_FIRSTLINE, A, 0, 0, "[aC]", "a" },
718 
719 	/* Recurse. */
720 	{ MU, A, 0, 0, "(a)(?1)", "aa" },
721 	{ MU, A, 0, 0, "((a))(?1)", "aa" },
722 	{ MU, A, 0, 0, "(b|a)(?1)", "aa" },
723 	{ MU, A, 0, 0, "(b|(a))(?1)", "aa" },
724 	{ MU, A, 0, 0 | F_NOMATCH, "((a)(b)(?:a*))(?1)", "aba" },
725 	{ MU, A, 0, 0, "((a)(b)(?:a*))(?1)", "abab" },
726 	{ MU, A, 0, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
727 	{ MU, A, 0, 0, "((?2)b|(a)){2}(?1)", "aabab" },
728 	{ MU, A, 0, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
729 	{ MU, A, 0, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
730 	{ MU, A, 0, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
731 	{ MU, A, 0, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
732 	{ MU, A, 0, 0, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
733 	{ MU, A, 0, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
734 	{ MU, A, 0, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
735 	{ MU, A, 0, 0 | F_NOMATCH, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
736 	{ MU, A, 0, 0, "b|<(?R)*>", "<<b>" },
737 	{ MU, A, 0, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
738 	{ MU, A, 0, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
739 	{ MU, A, 0, 0, "(a)((?(R)a|b))(?2)", "aabbabaa" },
740 	{ MU, A, 0, 0, "(a)((?(R2)a|b))(?2)", "aabbabaa" },
741 	{ MU, A, 0, 0, "(a)((?(R1)a|b))(?2)", "ababba" },
742 	{ MU, A, 0, 0, "(?(R0)aa|bb(?R))", "abba aabb bbaa" },
743 	{ MU, A, 0, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" },
744 	{ MU, A, 0, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" },
745 	{ MU, A, 0, 0, "((?(R)a|(?1)){3})", "XaaaaaaaaaX" },
746 	{ MU, A, 0, 0, "((?:(?(R)a|(?1))){3})", "XaaaaaaaaaX" },
747 	{ MU, A, 0, 0, "((?(R)a|(?1)){1,3})aaaaaa", "aaaaaaaaXaaaaaaaaa" },
748 	{ MU, A, 0, 0, "((?(R)a|(?1)){1,3}?)M", "aaaM" },
749 	{ MU, A, 0, 0, "((.)(?:.|\\2(?1))){0}#(?1)#", "#aabbccdde# #aabbccddee#" },
750 	{ MU, A, 0, 0, "((.)(?:\\2|\\2{4}b)){0}#(?:(?1))+#", "#aaaab# #aaaaab#" },
751 	{ MU, A, 0, 0 | F_NOMATCH, "(?1)$((.|\\2xx){1,2})", "abc" },
752 
753 	/* 16 bit specific tests. */
754 	{ CM, A, 0, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" },
755 	{ CM, A, 0, 0 | F_FORCECONV, "\xe1\xbd\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
756 	{ CM, A, 0, 0 | F_FORCECONV, "[\xc3\xa1]", "\xc3\x81\xc3\xa1" },
757 	{ CM, A, 0, 0 | F_FORCECONV, "[\xe1\xbd\xb8]", "\xe1\xbf\xb8\xe1\xbd\xb8" },
758 	{ CM, A, 0, 0 | F_FORCECONV, "[a-\xed\xb0\x80]", "A" },
759 	{ CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[a-\\x{dc00}]", "B" },
760 	{ CM, A, 0, 0 | F_NO8 | F_NOMATCH | F_FORCECONV, "[b-\\x{dc00}]", "a" },
761 	{ CM, A, 0, 0 | F_NO8 | F_FORCECONV, "\xed\xa0\x80\\x{d800}\xed\xb0\x80\\x{dc00}", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80" },
762 	{ CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[\xed\xa0\x80\\x{d800}]{1,2}?[\xed\xb0\x80\\x{dc00}]{1,2}?#", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80#" },
763 	{ CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80\xed\xb0\x80#]{0,3}(?<=\xed\xb0\x80.)", "\xed\xa0\x80#\xed\xa0\x80##\xed\xb0\x80\xed\xa0\x80" },
764 	{ CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\x9f\xbf\xed\xa0\x83" },
765 	{ CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\xb4\x80\xed\xb3\xb0" },
766 	{ CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" },
767 	{ CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" },
768 	{ CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" },
769 	{ CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80][\xed\xb0\x80]{2,}", "\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" },
770 	{ M, A, 0, 0 | F_FORCECONV, "[^\xed\xb0\x80]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
771 	{ M, A, 0, 0 | F_NO8 | F_FORCECONV, "[^\\x{dc00}]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
772 	{ CM, A, 0, 0 | F_FORCECONV, ".\\B.", "\xed\xa0\x80\xed\xb0\x80" },
773 	{ CM, A, 0, 0 | F_FORCECONV, "\\D+(?:\\d+|.)\\S+(?:\\s+|.)\\W+(?:\\w+|.)\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80" },
774 	{ CM, A, 0, 0 | F_FORCECONV, "\\d*\\s*\\w*\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80" },
775 	{ CM, A, 0, 0 | F_FORCECONV | F_NOMATCH, "\\d*?\\D*?\\s*?\\S*?\\w*?\\W*?##", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80#" },
776 	{ CM | PCRE2_EXTENDED, A, 0, 0 | F_FORCECONV, "\xed\xa0\x80 \xed\xb0\x80 !", "\xed\xa0\x80\xed\xb0\x80!" },
777 	{ CM, A, 0, 0 | F_FORCECONV, "\xed\xa0\x80+#[^#]+\xed\xa0\x80", "\xed\xa0\x80#a\xed\xa0\x80" },
778 	{ CM, A, 0, 0 | F_FORCECONV, "(\xed\xa0\x80+)#\\1", "\xed\xa0\x80\xed\xa0\x80#\xed\xa0\x80\xed\xa0\x80" },
779 	{ M, PCRE2_NEWLINE_ANY, 0, 0 | F_NO8 | F_FORCECONV, "^-", "a--\xe2\x80\xa8--" },
780 	{ 0, BSR(PCRE2_BSR_UNICODE), 0, 0 | F_NO8 | F_FORCECONV, "\\R", "ab\xe2\x80\xa8" },
781 	{ 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\v", "ab\xe2\x80\xa9" },
782 	{ 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\h", "ab\xe1\xa0\x8e" },
783 	{ 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\v+?\\V+?#", "\xe2\x80\xa9\xe2\x80\xa9\xef\xbf\xbf\xef\xbf\xbf#" },
784 	{ 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\h+?\\H+?#", "\xe1\xa0\x8e\xe1\xa0\x8e\xef\xbf\xbf\xef\xbf\xbf#" },
785 
786 	/* Partial matching. */
787 	{ MU, A, PCRE2_PARTIAL_SOFT, 0, "ab", "a" },
788 	{ MU, A, PCRE2_PARTIAL_SOFT, 0, "ab|a", "a" },
789 	{ MU, A, PCRE2_PARTIAL_HARD, 0, "ab|a", "a" },
790 	{ MU, A, PCRE2_PARTIAL_SOFT, 0, "\\b#", "a" },
791 	{ MU, A, PCRE2_PARTIAL_SOFT, 0, "(?<=a)b", "a" },
792 	{ MU, A, PCRE2_PARTIAL_SOFT, 0, "abc|(?<=xxa)bc", "xxab" },
793 	{ MU, A, PCRE2_PARTIAL_SOFT, 0, "a\\B", "a" },
794 	{ MU, A, PCRE2_PARTIAL_HARD, 0, "a\\b", "a" },
795 
796 	/* (*MARK) verb. */
797 	{ MU, A, 0, 0, "a(*MARK:aa)a", "ababaa" },
798 	{ MU, A, 0, 0 | F_NOMATCH, "a(*:aa)a", "abab" },
799 	{ MU, A, 0, 0, "a(*:aa)(b(*:bb)b|bc)", "abc" },
800 	{ MU, A, 0, 0 | F_NOMATCH, "a(*:1)x|b(*:2)y", "abc" },
801 	{ MU, A, 0, 0, "(?>a(*:aa))b|ac", "ac" },
802 	{ MU, A, 0, 0, "(?(DEFINE)(a(*:aa)))(?1)", "a" },
803 	{ MU, A, 0, 0 | F_NOMATCH, "(?(DEFINE)((a)(*:aa)))(?1)b", "aa" },
804 	{ MU, A, 0, 0, "(?(DEFINE)(a(*:aa)))a(?1)b|aac", "aac" },
805 	{ MU, A, 0, 0, "(a(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
806 	{ MU, A, 0, 0, "(a(*:aa)){0}(?:b(?1)b)+", "babba" },
807 	{ MU, A, 0, 0 | F_NOMATCH, "(a(*:aa)){0}(?:b(?1)b)+", "ba" },
808 	{ MU, A, 0, 0, "(a\\K(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
809 	{ MU, A, 0, 0, "(a\\K(*:aa)){0}(?:b(?1)b)+", "babba" },
810 	{ MU, A, 0, 0 | F_NOMATCH, "(a\\K(*:aa)){0}(?:b(?1)b)+", "ba" },
811 	{ MU, A, 0, 0 | F_NOMATCH, "(*:mark)m", "a" },
812 
813 	/* (*COMMIT) verb. */
814 	{ MU, A, 0, 0 | F_NOMATCH, "a(*COMMIT)b", "ac" },
815 	{ MU, A, 0, 0, "aa(*COMMIT)b", "xaxaab" },
816 	{ MU, A, 0, 0 | F_NOMATCH, "a(*COMMIT)(*:msg)b|ac", "ac" },
817 	{ MU, A, 0, 0 | F_NOMATCH, "(a(*COMMIT)b)++", "abac" },
818 	{ MU, A, 0, 0 | F_NOMATCH, "((a)(*COMMIT)b)++", "abac" },
819 	{ MU, A, 0, 0 | F_NOMATCH, "(?=a(*COMMIT)b)ab|ad", "ad" },
820 
821 	/* (*PRUNE) verb. */
822 	{ MU, A, 0, 0, "aa\\K(*PRUNE)b", "aaab" },
823 	{ MU, A, 0, 0, "aa(*PRUNE:bb)b|a", "aa" },
824 	{ MU, A, 0, 0, "(a)(a)(*PRUNE)b|(a)", "aa" },
825 	{ MU, A, 0, 0, "(a)(a)(a)(a)(a)(a)(a)(a)(*PRUNE)b|(a)", "aaaaaaaa" },
826 	{ MU, A, PCRE2_PARTIAL_SOFT, 0, "a(*PRUNE)a|", "a" },
827 	{ MU, A, PCRE2_PARTIAL_SOFT, 0, "a(*PRUNE)a|m", "a" },
828 	{ MU, A, 0, 0 | F_NOMATCH, "(?=a(*PRUNE)b)ab|ad", "ad" },
829 	{ MU, A, 0, 0, "a(*COMMIT)(*PRUNE)d|bc", "abc" },
830 	{ MU, A, 0, 0, "(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
831 	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
832 	{ MU, A, 0, 0, "(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
833 	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
834 	{ MU, A, 0, 0, "(a(*COMMIT)b){0}a(?1)(*PRUNE)c|bc", "abc" },
835 	{ MU, A, 0, 0 | F_NOMATCH, "(a(*COMMIT)b){0}a(*COMMIT)(?1)(*PRUNE)c|bc", "abc" },
836 	{ MU, A, 0, 0, "(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
837 	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
838 	{ MU, A, 0, 0, "((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
839 	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
840 	{ MU, A, 0, 0, "(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" },
841 	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" },
842 	{ MU, A, 0, 0, "(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" },
843 	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" },
844 	{ MU, A, 0, 0, "(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" },
845 	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" },
846 	{ MU, A, 0, 0, "(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" },
847 	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" },
848 	{ MU, A, 0, 0, "(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" },
849 	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" },
850 	{ MU, A, 0, 0, "(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" },
851 	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" },
852 
853 	/* (*SKIP) verb. */
854 	{ MU, A, 0, 0 | F_NOMATCH, "(?=a(*SKIP)b)ab|ad", "ad" },
855 	{ MU, A, 0, 0, "(\\w+(*SKIP)#)", "abcd,xyz#," },
856 	{ MU, A, 0, 0, "\\w+(*SKIP)#|mm", "abcd,xyz#," },
857 	{ MU, A, 0, 0 | F_NOMATCH, "b+(?<=(*SKIP)#c)|b+", "#bbb" },
858 
859 	/* (*THEN) verb. */
860 	{ MU, A, 0, 0, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcaabcaabcaabcnacm" },
861 	{ MU, A, 0, 0 | F_NOMATCH, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcm" },
862 	{ MU, A, 0, 0, "((?:a(*THEN)|aab)c|a+)+m", "aabcaabcnmaabcaabcm" },
863 	{ MU, A, 0, 0, "((?:a|aab)(*THEN)c|a+)+m", "aam" },
864 	{ MU, A, 0, 0, "((?:a(*COMMIT)|aab)(*THEN)c|a+)+m", "aam" },
865 	{ MU, A, 0, 0, "(?(?=a(*THEN)b)ab|ad)", "ad" },
866 	{ MU, A, 0, 0, "(?(?!a(*THEN)b)ad|add)", "add" },
867 	{ MU, A, 0, 0 | F_NOMATCH, "(?(?=a)a(*THEN)b|ad)", "ad" },
868 	{ MU, A, 0, 0, "(?!(?(?=a)ab|b(*THEN)d))bn|bnn", "bnn" },
869 	{ MU, A, 0, 0, "(?=(*THEN: ))* ", " " },
870 	{ MU, A, 0, 0, "a(*THEN)(?R) |", "a" },
871 
872 	/* Recurse and control verbs. */
873 	{ MU, A, 0, 0, "(a(*ACCEPT)b){0}a(?1)b", "aacaabb" },
874 	{ MU, A, 0, 0, "((a)\\2(*ACCEPT)b){0}a(?1)b", "aaacaaabb" },
875 	{ MU, A, 0, 0, "((ab|a(*ACCEPT)x)+|ababababax){0}_(?1)_", "_ababababax_ _ababababa_" },
876 	{ MU, A, 0, 0, "((.)(?:A(*ACCEPT)|(?1)\\2)){0}_(?1)_", "_bcdaAdcb_bcdaAdcb_" },
877 	{ MU, A, 0, 0, "((*MARK:m)(?:a|a(*COMMIT)b|aa)){0}_(?1)_", "_ab_" },
878 	{ MU, A, 0, 0, "((*MARK:m)(?:a|a(*COMMIT)b|aa)){0}_(?1)_|(_aa_)", "_aa_" },
879 	{ MU, A, 0, 0, "(a(*COMMIT)(?:b|bb)|c(*ACCEPT)d|dd){0}_(?1)+_", "_ax_ _cd_ _abbb_ _abcd_ _abbcdd_" },
880 	{ MU, A, 0, 0, "((.)(?:.|(*COMMIT)\\2{3}(*ACCEPT).*|.*)){0}_(?1){0,4}_", "_aaaabbbbccccddd_ _aaaabbbbccccdddd_" },
881 
882 #ifdef SUPPORT_UNICODE
883 	/* Script runs and iterations. */
884 	{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)*#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
885 	{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)+#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
886 	{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)*?#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
887 	{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)+?#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
888 	{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)*+#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
889 	{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)++#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
890 	{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)?#", "!ab!abc!ab!ab#" },
891 	{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)??#", "!ab!abc!ab!ab#" },
892 #endif
893 
894 	/* Deep recursion. */
895 	{ MU, A, 0, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
896 	{ MU, A, 0, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
897 	{ MU, A, 0, 0, "((a?)+)+b", "aaaaaaaaaaaa b" },
898 
899 	/* Deep recursion: Stack limit reached. */
900 	{ M, A, 0, 0 | F_NOMATCH, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
901 	{ M, A, 0, 0 | F_NOMATCH, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
902 	{ M, A, 0, 0 | F_NOMATCH, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
903 	{ M, A, 0, 0 | F_NOMATCH, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
904 	{ M, A, 0, 0 | F_NOMATCH, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
905 
906 	{ 0, 0, 0, 0, NULL, NULL }
907 };
908 
909 #ifdef SUPPORT_PCRE2_8
callback8(void * arg)910 static pcre2_jit_stack_8* callback8(void *arg)
911 {
912 	return (pcre2_jit_stack_8 *)arg;
913 }
914 #endif
915 
916 #ifdef SUPPORT_PCRE2_16
callback16(void * arg)917 static pcre2_jit_stack_16* callback16(void *arg)
918 {
919 	return (pcre2_jit_stack_16 *)arg;
920 }
921 #endif
922 
923 #ifdef SUPPORT_PCRE2_32
callback32(void * arg)924 static pcre2_jit_stack_32* callback32(void *arg)
925 {
926 	return (pcre2_jit_stack_32 *)arg;
927 }
928 #endif
929 
930 #ifdef SUPPORT_PCRE2_8
931 static pcre2_jit_stack_8 *stack8;
932 
getstack8(void)933 static pcre2_jit_stack_8 *getstack8(void)
934 {
935 	if (!stack8)
936 		stack8 = pcre2_jit_stack_create_8(1, 1024 * 1024, NULL);
937 	return stack8;
938 }
939 
setstack8(pcre2_match_context_8 * mcontext)940 static void setstack8(pcre2_match_context_8 *mcontext)
941 {
942 	if (!mcontext) {
943 		if (stack8)
944 			pcre2_jit_stack_free_8(stack8);
945 		stack8 = NULL;
946 		return;
947 	}
948 
949 	pcre2_jit_stack_assign_8(mcontext, callback8, getstack8());
950 }
951 #endif /* SUPPORT_PCRE2_8 */
952 
953 #ifdef SUPPORT_PCRE2_16
954 static pcre2_jit_stack_16 *stack16;
955 
getstack16(void)956 static pcre2_jit_stack_16 *getstack16(void)
957 {
958 	if (!stack16)
959 		stack16 = pcre2_jit_stack_create_16(1, 1024 * 1024, NULL);
960 	return stack16;
961 }
962 
setstack16(pcre2_match_context_16 * mcontext)963 static void setstack16(pcre2_match_context_16 *mcontext)
964 {
965 	if (!mcontext) {
966 		if (stack16)
967 			pcre2_jit_stack_free_16(stack16);
968 		stack16 = NULL;
969 		return;
970 	}
971 
972 	pcre2_jit_stack_assign_16(mcontext, callback16, getstack16());
973 }
974 #endif /* SUPPORT_PCRE2_16 */
975 
976 #ifdef SUPPORT_PCRE2_32
977 static pcre2_jit_stack_32 *stack32;
978 
getstack32(void)979 static pcre2_jit_stack_32 *getstack32(void)
980 {
981 	if (!stack32)
982 		stack32 = pcre2_jit_stack_create_32(1, 1024 * 1024, NULL);
983 	return stack32;
984 }
985 
setstack32(pcre2_match_context_32 * mcontext)986 static void setstack32(pcre2_match_context_32 *mcontext)
987 {
988 	if (!mcontext) {
989 		if (stack32)
990 			pcre2_jit_stack_free_32(stack32);
991 		stack32 = NULL;
992 		return;
993 	}
994 
995 	pcre2_jit_stack_assign_32(mcontext, callback32, getstack32());
996 }
997 #endif /* SUPPORT_PCRE2_32 */
998 
999 #ifdef SUPPORT_PCRE2_16
1000 
convert_utf8_to_utf16(PCRE2_SPTR8 input,PCRE2_UCHAR16 * output,int * offsetmap,int max_length)1001 static int convert_utf8_to_utf16(PCRE2_SPTR8 input, PCRE2_UCHAR16 *output, int *offsetmap, int max_length)
1002 {
1003 	PCRE2_SPTR8 iptr = input;
1004 	PCRE2_UCHAR16 *optr = output;
1005 	unsigned int c;
1006 
1007 	if (max_length == 0)
1008 		return 0;
1009 
1010 	while (*iptr && max_length > 1) {
1011 		c = 0;
1012 		if (offsetmap)
1013 			*offsetmap++ = (int)(iptr - (unsigned char*)input);
1014 
1015 		if (*iptr < 0xc0)
1016 			c = *iptr++;
1017 		else if (!(*iptr & 0x20)) {
1018 			c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
1019 			iptr += 2;
1020 		} else if (!(*iptr & 0x10)) {
1021 			c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
1022 			iptr += 3;
1023 		} else if (!(*iptr & 0x08)) {
1024 			c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
1025 			iptr += 4;
1026 		}
1027 
1028 		if (c < 65536) {
1029 			*optr++ = c;
1030 			max_length--;
1031 		} else if (max_length <= 2) {
1032 			*optr = '\0';
1033 			return (int)(optr - output);
1034 		} else {
1035 			c -= 0x10000;
1036 			*optr++ = 0xd800 | ((c >> 10) & 0x3ff);
1037 			*optr++ = 0xdc00 | (c & 0x3ff);
1038 			max_length -= 2;
1039 			if (offsetmap)
1040 				offsetmap++;
1041 		}
1042 	}
1043 	if (offsetmap)
1044 		*offsetmap = (int)(iptr - (unsigned char*)input);
1045 	*optr = '\0';
1046 	return (int)(optr - output);
1047 }
1048 
copy_char8_to_char16(PCRE2_SPTR8 input,PCRE2_UCHAR16 * output,int max_length)1049 static int copy_char8_to_char16(PCRE2_SPTR8 input, PCRE2_UCHAR16 *output, int max_length)
1050 {
1051 	PCRE2_SPTR8 iptr = input;
1052 	PCRE2_UCHAR16 *optr = output;
1053 
1054 	if (max_length == 0)
1055 		return 0;
1056 
1057 	while (*iptr && max_length > 1) {
1058 		*optr++ = *iptr++;
1059 		max_length--;
1060 	}
1061 	*optr = '\0';
1062 	return (int)(optr - output);
1063 }
1064 
1065 #define REGTEST_MAX_LENGTH16 4096
1066 static PCRE2_UCHAR16 regtest_buf16[REGTEST_MAX_LENGTH16];
1067 static int regtest_offsetmap16[REGTEST_MAX_LENGTH16];
1068 
1069 #endif /* SUPPORT_PCRE2_16 */
1070 
1071 #ifdef SUPPORT_PCRE2_32
1072 
convert_utf8_to_utf32(PCRE2_SPTR8 input,PCRE2_UCHAR32 * output,int * offsetmap,int max_length)1073 static int convert_utf8_to_utf32(PCRE2_SPTR8 input, PCRE2_UCHAR32 *output, int *offsetmap, int max_length)
1074 {
1075 	PCRE2_SPTR8 iptr = input;
1076 	PCRE2_UCHAR32 *optr = output;
1077 	unsigned int c;
1078 
1079 	if (max_length == 0)
1080 		return 0;
1081 
1082 	while (*iptr && max_length > 1) {
1083 		c = 0;
1084 		if (offsetmap)
1085 			*offsetmap++ = (int)(iptr - (unsigned char*)input);
1086 
1087 		if (*iptr < 0xc0)
1088 			c = *iptr++;
1089 		else if (!(*iptr & 0x20)) {
1090 			c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
1091 			iptr += 2;
1092 		} else if (!(*iptr & 0x10)) {
1093 			c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
1094 			iptr += 3;
1095 		} else if (!(*iptr & 0x08)) {
1096 			c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
1097 			iptr += 4;
1098 		}
1099 
1100 		*optr++ = c;
1101 		max_length--;
1102 	}
1103 	if (offsetmap)
1104 		*offsetmap = (int)(iptr - (unsigned char*)input);
1105 	*optr = 0;
1106 	return (int)(optr - output);
1107 }
1108 
copy_char8_to_char32(PCRE2_SPTR8 input,PCRE2_UCHAR32 * output,int max_length)1109 static int copy_char8_to_char32(PCRE2_SPTR8 input, PCRE2_UCHAR32 *output, int max_length)
1110 {
1111 	PCRE2_SPTR8 iptr = input;
1112 	PCRE2_UCHAR32 *optr = output;
1113 
1114 	if (max_length == 0)
1115 		return 0;
1116 
1117 	while (*iptr && max_length > 1) {
1118 		*optr++ = *iptr++;
1119 		max_length--;
1120 	}
1121 	*optr = '\0';
1122 	return (int)(optr - output);
1123 }
1124 
1125 #define REGTEST_MAX_LENGTH32 4096
1126 static PCRE2_UCHAR32 regtest_buf32[REGTEST_MAX_LENGTH32];
1127 static int regtest_offsetmap32[REGTEST_MAX_LENGTH32];
1128 
1129 #endif /* SUPPORT_PCRE2_32 */
1130 
check_ascii(const char * input)1131 static int check_ascii(const char *input)
1132 {
1133 	const unsigned char *ptr = (unsigned char *)input;
1134 	while (*ptr) {
1135 		if (*ptr > 127)
1136 			return 0;
1137 		ptr++;
1138 	}
1139 	return 1;
1140 }
1141 
1142 #define OVECTOR_SIZE 15
1143 
regression_tests(void)1144 static int regression_tests(void)
1145 {
1146 	struct regression_test_case *current = regression_test_cases;
1147 	int error;
1148 	PCRE2_SIZE err_offs;
1149 	int is_successful;
1150 	int is_ascii;
1151 	int total = 0;
1152 	int successful = 0;
1153 	int successful_row = 0;
1154 	int counter = 0;
1155 	int jit_compile_mode;
1156 	int utf = 0;
1157 	int disabled_options = 0;
1158 	int i;
1159 #ifdef SUPPORT_PCRE2_8
1160 	pcre2_code_8 *re8;
1161 	pcre2_compile_context_8 *ccontext8;
1162 	pcre2_match_data_8 *mdata8_1;
1163 	pcre2_match_data_8 *mdata8_2;
1164 	pcre2_match_context_8 *mcontext8;
1165 	PCRE2_SIZE *ovector8_1 = NULL;
1166 	PCRE2_SIZE *ovector8_2 = NULL;
1167 	int return_value8[2];
1168 #endif
1169 #ifdef SUPPORT_PCRE2_16
1170 	pcre2_code_16 *re16;
1171 	pcre2_compile_context_16 *ccontext16;
1172 	pcre2_match_data_16 *mdata16_1;
1173 	pcre2_match_data_16 *mdata16_2;
1174 	pcre2_match_context_16 *mcontext16;
1175 	PCRE2_SIZE *ovector16_1 = NULL;
1176 	PCRE2_SIZE *ovector16_2 = NULL;
1177 	int return_value16[2];
1178 	int length16;
1179 #endif
1180 #ifdef SUPPORT_PCRE2_32
1181 	pcre2_code_32 *re32;
1182 	pcre2_compile_context_32 *ccontext32;
1183 	pcre2_match_data_32 *mdata32_1;
1184 	pcre2_match_data_32 *mdata32_2;
1185 	pcre2_match_context_32 *mcontext32;
1186 	PCRE2_SIZE *ovector32_1 = NULL;
1187 	PCRE2_SIZE *ovector32_2 = NULL;
1188 	int return_value32[2];
1189 	int length32;
1190 #endif
1191 
1192 #if defined SUPPORT_PCRE2_8
1193 	PCRE2_UCHAR8 cpu_info[128];
1194 #elif defined SUPPORT_PCRE2_16
1195 	PCRE2_UCHAR16 cpu_info[128];
1196 #elif defined SUPPORT_PCRE2_32
1197 	PCRE2_UCHAR32 cpu_info[128];
1198 #endif
1199 #if defined SUPPORT_UNICODE && ((defined(SUPPORT_PCRE2_8) + defined(SUPPORT_PCRE2_16) + defined(SUPPORT_PCRE2_32)) >= 2)
1200 	int return_value;
1201 #endif
1202 
1203 	/* This test compares the behaviour of interpreter and JIT. Although disabling
1204 	utf or ucp may make tests fail, if the pcre_exec result is the SAME, it is
1205 	still considered successful from pcre_jit_test point of view. */
1206 
1207 #if defined SUPPORT_PCRE2_8
1208 	pcre2_config_8(PCRE2_CONFIG_JITTARGET, &cpu_info);
1209 #elif defined SUPPORT_PCRE2_16
1210 	pcre2_config_16(PCRE2_CONFIG_JITTARGET, &cpu_info);
1211 #elif defined SUPPORT_PCRE2_32
1212 	pcre2_config_32(PCRE2_CONFIG_JITTARGET, &cpu_info);
1213 #endif
1214 
1215 	printf("Running JIT regression tests\n");
1216 	printf("  target CPU of SLJIT compiler: ");
1217 	for (i = 0; cpu_info[i]; i++)
1218 		printf("%c", (char)(cpu_info[i]));
1219 	printf("\n");
1220 
1221 #if defined SUPPORT_PCRE2_8
1222 	pcre2_config_8(PCRE2_CONFIG_UNICODE, &utf);
1223 #elif defined SUPPORT_PCRE2_16
1224 	pcre2_config_16(PCRE2_CONFIG_UNICODE, &utf);
1225 #elif defined SUPPORT_PCRE2_32
1226 	pcre2_config_32(PCRE2_CONFIG_UNICODE, &utf);
1227 #endif
1228 
1229 	if (!utf)
1230 		disabled_options |= PCRE2_UTF;
1231 #ifdef SUPPORT_PCRE2_8
1232 	printf("  in  8 bit mode with UTF-8  %s:\n", utf ? "enabled" : "disabled");
1233 #endif
1234 #ifdef SUPPORT_PCRE2_16
1235 	printf("  in 16 bit mode with UTF-16 %s:\n", utf ? "enabled" : "disabled");
1236 #endif
1237 #ifdef SUPPORT_PCRE2_32
1238 	printf("  in 32 bit mode with UTF-32 %s:\n", utf ? "enabled" : "disabled");
1239 #endif
1240 
1241 	while (current->pattern) {
1242 		/* printf("\nPattern: %s :\n", current->pattern); */
1243 		total++;
1244 		is_ascii = 0;
1245 		if (!(current->start_offset & F_PROPERTY))
1246 			is_ascii = check_ascii(current->pattern) && check_ascii(current->input);
1247 
1248 		if (current->match_options & PCRE2_PARTIAL_SOFT)
1249 			jit_compile_mode = PCRE2_JIT_PARTIAL_SOFT;
1250 		else if (current->match_options & PCRE2_PARTIAL_HARD)
1251 			jit_compile_mode = PCRE2_JIT_PARTIAL_HARD;
1252 		else
1253 			jit_compile_mode = PCRE2_JIT_COMPLETE;
1254 		error = 0;
1255 #ifdef SUPPORT_PCRE2_8
1256 		re8 = NULL;
1257 		ccontext8 = pcre2_compile_context_create_8(NULL);
1258 		if (ccontext8) {
1259 			if (GET_NEWLINE(current->newline))
1260 				pcre2_set_newline_8(ccontext8, GET_NEWLINE(current->newline));
1261 			if (GET_BSR(current->newline))
1262 				pcre2_set_bsr_8(ccontext8, GET_BSR(current->newline));
1263 
1264 			if (!(current->start_offset & F_NO8)) {
1265 				re8 = pcre2_compile_8((PCRE2_SPTR8)current->pattern, PCRE2_ZERO_TERMINATED,
1266 					current->compile_options & ~disabled_options,
1267 					&error, &err_offs, ccontext8);
1268 
1269 				if (!re8 && (utf || is_ascii))
1270 					printf("\n8 bit: Cannot compile pattern \"%s\": %d\n", current->pattern, error);
1271 			}
1272 			pcre2_compile_context_free_8(ccontext8);
1273 		}
1274 		else
1275 			printf("\n8 bit: Cannot allocate compile context\n");
1276 #endif
1277 #ifdef SUPPORT_PCRE2_16
1278 		if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
1279 			convert_utf8_to_utf16((PCRE2_SPTR8)current->pattern, regtest_buf16, NULL, REGTEST_MAX_LENGTH16);
1280 		else
1281 			copy_char8_to_char16((PCRE2_SPTR8)current->pattern, regtest_buf16, REGTEST_MAX_LENGTH16);
1282 
1283 		re16 = NULL;
1284 		ccontext16 = pcre2_compile_context_create_16(NULL);
1285 		if (ccontext16) {
1286 			if (GET_NEWLINE(current->newline))
1287 				pcre2_set_newline_16(ccontext16, GET_NEWLINE(current->newline));
1288 			if (GET_BSR(current->newline))
1289 				pcre2_set_bsr_16(ccontext16, GET_BSR(current->newline));
1290 
1291 			if (!(current->start_offset & F_NO16)) {
1292 				re16 = pcre2_compile_16(regtest_buf16, PCRE2_ZERO_TERMINATED,
1293 					current->compile_options & ~disabled_options,
1294 					&error, &err_offs, ccontext16);
1295 
1296 				if (!re16 && (utf || is_ascii))
1297 					printf("\n16 bit: Cannot compile pattern \"%s\": %d\n", current->pattern, error);
1298 			}
1299 			pcre2_compile_context_free_16(ccontext16);
1300 		}
1301 		else
1302 			printf("\n16 bit: Cannot allocate compile context\n");
1303 #endif
1304 #ifdef SUPPORT_PCRE2_32
1305 		if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
1306 			convert_utf8_to_utf32((PCRE2_SPTR8)current->pattern, regtest_buf32, NULL, REGTEST_MAX_LENGTH32);
1307 		else
1308 			copy_char8_to_char32((PCRE2_SPTR8)current->pattern, regtest_buf32, REGTEST_MAX_LENGTH32);
1309 
1310 		re32 = NULL;
1311 		ccontext32 = pcre2_compile_context_create_32(NULL);
1312 		if (ccontext32) {
1313 			if (GET_NEWLINE(current->newline))
1314 				pcre2_set_newline_32(ccontext32, GET_NEWLINE(current->newline));
1315 			if (GET_BSR(current->newline))
1316 				pcre2_set_bsr_32(ccontext32, GET_BSR(current->newline));
1317 
1318 			if (!(current->start_offset & F_NO32)) {
1319 				re32 = pcre2_compile_32(regtest_buf32, PCRE2_ZERO_TERMINATED,
1320 					current->compile_options & ~disabled_options,
1321 					&error, &err_offs, ccontext32);
1322 
1323 				if (!re32 && (utf || is_ascii))
1324 					printf("\n32 bit: Cannot compile pattern \"%s\": %d\n", current->pattern, error);
1325 			}
1326 			pcre2_compile_context_free_32(ccontext32);
1327 		}
1328 		else
1329 			printf("\n32 bit: Cannot allocate compile context\n");
1330 #endif
1331 
1332 		counter++;
1333 		if ((counter & 0x3) != 0) {
1334 #ifdef SUPPORT_PCRE2_8
1335 			setstack8(NULL);
1336 #endif
1337 #ifdef SUPPORT_PCRE2_16
1338 			setstack16(NULL);
1339 #endif
1340 #ifdef SUPPORT_PCRE2_32
1341 			setstack32(NULL);
1342 #endif
1343 		}
1344 
1345 #ifdef SUPPORT_PCRE2_8
1346 		return_value8[0] = -1000;
1347 		return_value8[1] = -1000;
1348 		mdata8_1 = pcre2_match_data_create_8(OVECTOR_SIZE, NULL);
1349 		mdata8_2 = pcre2_match_data_create_8(OVECTOR_SIZE, NULL);
1350 		mcontext8 = pcre2_match_context_create_8(NULL);
1351 		if (!mdata8_1 || !mdata8_2 || !mcontext8) {
1352 			printf("\n8 bit: Cannot allocate match data\n");
1353 			pcre2_match_data_free_8(mdata8_1);
1354 			pcre2_match_data_free_8(mdata8_2);
1355 			pcre2_match_context_free_8(mcontext8);
1356 			pcre2_code_free_8(re8);
1357 			re8 = NULL;
1358 		} else {
1359 			ovector8_1 = pcre2_get_ovector_pointer_8(mdata8_1);
1360 			ovector8_2 = pcre2_get_ovector_pointer_8(mdata8_2);
1361 			for (i = 0; i < OVECTOR_SIZE * 2; ++i)
1362 				ovector8_1[i] = -2;
1363 			for (i = 0; i < OVECTOR_SIZE * 2; ++i)
1364 				ovector8_2[i] = -2;
1365 			pcre2_set_match_limit_8(mcontext8, 10000000);
1366 		}
1367 		if (re8) {
1368 			return_value8[1] = pcre2_match_8(re8, (PCRE2_SPTR8)current->input, strlen(current->input),
1369 				current->start_offset & OFFSET_MASK, current->match_options, mdata8_2, mcontext8);
1370 
1371 			if (pcre2_jit_compile_8(re8, jit_compile_mode)) {
1372 				printf("\n8 bit: JIT compiler does not support \"%s\"\n", current->pattern);
1373 			} else if ((counter & 0x1) != 0) {
1374 				setstack8(mcontext8);
1375 				return_value8[0] = pcre2_match_8(re8, (PCRE2_SPTR8)current->input, strlen(current->input),
1376 					current->start_offset & OFFSET_MASK, current->match_options, mdata8_1, mcontext8);
1377 			} else {
1378 				pcre2_jit_stack_assign_8(mcontext8, NULL, getstack8());
1379 				return_value8[0] = pcre2_jit_match_8(re8, (PCRE2_SPTR8)current->input, strlen(current->input),
1380 					current->start_offset & OFFSET_MASK, current->match_options, mdata8_1, mcontext8);
1381 			}
1382 		}
1383 #endif
1384 
1385 #ifdef SUPPORT_PCRE2_16
1386 		return_value16[0] = -1000;
1387 		return_value16[1] = -1000;
1388 		mdata16_1 = pcre2_match_data_create_16(OVECTOR_SIZE, NULL);
1389 		mdata16_2 = pcre2_match_data_create_16(OVECTOR_SIZE, NULL);
1390 		mcontext16 = pcre2_match_context_create_16(NULL);
1391 		if (!mdata16_1 || !mdata16_2 || !mcontext16) {
1392 			printf("\n16 bit: Cannot allocate match data\n");
1393 			pcre2_match_data_free_16(mdata16_1);
1394 			pcre2_match_data_free_16(mdata16_2);
1395 			pcre2_match_context_free_16(mcontext16);
1396 			pcre2_code_free_16(re16);
1397 			re16 = NULL;
1398 		} else {
1399 			ovector16_1 = pcre2_get_ovector_pointer_16(mdata16_1);
1400 			ovector16_2 = pcre2_get_ovector_pointer_16(mdata16_2);
1401 			for (i = 0; i < OVECTOR_SIZE * 2; ++i)
1402 				ovector16_1[i] = -2;
1403 			for (i = 0; i < OVECTOR_SIZE * 2; ++i)
1404 				ovector16_2[i] = -2;
1405 			pcre2_set_match_limit_16(mcontext16, 10000000);
1406 		}
1407 		if (re16) {
1408 			if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
1409 				length16 = convert_utf8_to_utf16((PCRE2_SPTR8)current->input, regtest_buf16, regtest_offsetmap16, REGTEST_MAX_LENGTH16);
1410 			else
1411 				length16 = copy_char8_to_char16((PCRE2_SPTR8)current->input, regtest_buf16, REGTEST_MAX_LENGTH16);
1412 
1413 			return_value16[1] = pcre2_match_16(re16, regtest_buf16, length16,
1414 				current->start_offset & OFFSET_MASK, current->match_options, mdata16_2, mcontext16);
1415 
1416 			if (pcre2_jit_compile_16(re16, jit_compile_mode)) {
1417 				printf("\n16 bit: JIT compiler does not support \"%s\"\n", current->pattern);
1418 			} else if ((counter & 0x1) != 0) {
1419 				setstack16(mcontext16);
1420 				return_value16[0] = pcre2_match_16(re16, regtest_buf16, length16,
1421 					current->start_offset & OFFSET_MASK, current->match_options, mdata16_1, mcontext16);
1422 			} else {
1423 				pcre2_jit_stack_assign_16(mcontext16, NULL, getstack16());
1424 				return_value16[0] = pcre2_jit_match_16(re16, regtest_buf16, length16,
1425 					current->start_offset & OFFSET_MASK, current->match_options, mdata16_1, mcontext16);
1426 			}
1427 		}
1428 #endif
1429 
1430 #ifdef SUPPORT_PCRE2_32
1431 		return_value32[0] = -1000;
1432 		return_value32[1] = -1000;
1433 		mdata32_1 = pcre2_match_data_create_32(OVECTOR_SIZE, NULL);
1434 		mdata32_2 = pcre2_match_data_create_32(OVECTOR_SIZE, NULL);
1435 		mcontext32 = pcre2_match_context_create_32(NULL);
1436 		if (!mdata32_1 || !mdata32_2 || !mcontext32) {
1437 			printf("\n32 bit: Cannot allocate match data\n");
1438 			pcre2_match_data_free_32(mdata32_1);
1439 			pcre2_match_data_free_32(mdata32_2);
1440 			pcre2_match_context_free_32(mcontext32);
1441 			pcre2_code_free_32(re32);
1442 			re32 = NULL;
1443 		} else {
1444 			ovector32_1 = pcre2_get_ovector_pointer_32(mdata32_1);
1445 			ovector32_2 = pcre2_get_ovector_pointer_32(mdata32_2);
1446 			for (i = 0; i < OVECTOR_SIZE * 2; ++i)
1447 				ovector32_1[i] = -2;
1448 			for (i = 0; i < OVECTOR_SIZE * 2; ++i)
1449 				ovector32_2[i] = -2;
1450 			pcre2_set_match_limit_32(mcontext32, 10000000);
1451 		}
1452 		if (re32) {
1453 			if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
1454 				length32 = convert_utf8_to_utf32((PCRE2_SPTR8)current->input, regtest_buf32, regtest_offsetmap32, REGTEST_MAX_LENGTH32);
1455 			else
1456 				length32 = copy_char8_to_char32((PCRE2_SPTR8)current->input, regtest_buf32, REGTEST_MAX_LENGTH32);
1457 
1458 			return_value32[1] = pcre2_match_32(re32, regtest_buf32, length32,
1459 				current->start_offset & OFFSET_MASK, current->match_options, mdata32_2, mcontext32);
1460 
1461 			if (pcre2_jit_compile_32(re32, jit_compile_mode)) {
1462 				printf("\n32 bit: JIT compiler does not support \"%s\"\n", current->pattern);
1463 			} else if ((counter & 0x1) != 0) {
1464 				setstack32(mcontext32);
1465 				return_value32[0] = pcre2_match_32(re32, regtest_buf32, length32,
1466 					current->start_offset & OFFSET_MASK, current->match_options, mdata32_1, mcontext32);
1467 			} else {
1468 				pcre2_jit_stack_assign_32(mcontext32, NULL, getstack32());
1469 				return_value32[0] = pcre2_jit_match_32(re32, regtest_buf32, length32,
1470 					current->start_offset & OFFSET_MASK, current->match_options, mdata32_1, mcontext32);
1471 			}
1472 		}
1473 #endif
1474 
1475 		/* printf("[%d-%d-%d|%d-%d|%d-%d|%d-%d]%s",
1476 			return_value8[0], return_value16[0], return_value32[0],
1477 			(int)ovector8_1[0], (int)ovector8_1[1],
1478 			(int)ovector16_1[0], (int)ovector16_1[1],
1479 			(int)ovector32_1[0], (int)ovector32_1[1],
1480 			(current->compile_options & PCRE2_CASELESS) ? "C" : ""); */
1481 
1482 		/* If F_DIFF is set, just run the test, but do not compare the results.
1483 		Segfaults can still be captured. */
1484 
1485 		is_successful = 1;
1486 		if (!(current->start_offset & F_DIFF)) {
1487 #if defined SUPPORT_UNICODE && ((defined(SUPPORT_PCRE2_8) + defined(SUPPORT_PCRE2_16) + defined(SUPPORT_PCRE2_32)) >= 2)
1488 			if (!(current->start_offset & F_FORCECONV)) {
1489 
1490 				/* All results must be the same. */
1491 #ifdef SUPPORT_PCRE2_8
1492 				if ((return_value = return_value8[0]) != return_value8[1]) {
1493 					printf("\n8 bit: Return value differs(J8:%d,I8:%d): [%d] '%s' @ '%s'\n",
1494 						return_value8[0], return_value8[1], total, current->pattern, current->input);
1495 					is_successful = 0;
1496 				} else
1497 #endif
1498 #ifdef SUPPORT_PCRE2_16
1499 				if ((return_value = return_value16[0]) != return_value16[1]) {
1500 					printf("\n16 bit: Return value differs(J16:%d,I16:%d): [%d] '%s' @ '%s'\n",
1501 						return_value16[0], return_value16[1], total, current->pattern, current->input);
1502 					is_successful = 0;
1503 				} else
1504 #endif
1505 #ifdef SUPPORT_PCRE2_32
1506 				if ((return_value = return_value32[0]) != return_value32[1]) {
1507 					printf("\n32 bit: Return value differs(J32:%d,I32:%d): [%d] '%s' @ '%s'\n",
1508 						return_value32[0], return_value32[1], total, current->pattern, current->input);
1509 					is_successful = 0;
1510 				} else
1511 #endif
1512 #if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_16
1513 				if (return_value8[0] != return_value16[0]) {
1514 					printf("\n8 and 16 bit: Return value differs(J8:%d,J16:%d): [%d] '%s' @ '%s'\n",
1515 						return_value8[0], return_value16[0],
1516 						total, current->pattern, current->input);
1517 					is_successful = 0;
1518 				} else
1519 #endif
1520 #if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_32
1521 				if (return_value8[0] != return_value32[0]) {
1522 					printf("\n8 and 32 bit: Return value differs(J8:%d,J32:%d): [%d] '%s' @ '%s'\n",
1523 						return_value8[0], return_value32[0],
1524 						total, current->pattern, current->input);
1525 					is_successful = 0;
1526 				} else
1527 #endif
1528 #if defined SUPPORT_PCRE2_16 && defined SUPPORT_PCRE2_32
1529 				if (return_value16[0] != return_value32[0]) {
1530 					printf("\n16 and 32 bit: Return value differs(J16:%d,J32:%d): [%d] '%s' @ '%s'\n",
1531 						return_value16[0], return_value32[0],
1532 						total, current->pattern, current->input);
1533 					is_successful = 0;
1534 				} else
1535 #endif
1536 				if (return_value >= 0 || return_value == PCRE2_ERROR_PARTIAL) {
1537 					if (return_value == PCRE2_ERROR_PARTIAL) {
1538 						return_value = 2;
1539 					} else {
1540 						return_value *= 2;
1541 					}
1542 #ifdef SUPPORT_PCRE2_8
1543 					return_value8[0] = return_value;
1544 #endif
1545 #ifdef SUPPORT_PCRE2_16
1546 					return_value16[0] = return_value;
1547 #endif
1548 #ifdef SUPPORT_PCRE2_32
1549 					return_value32[0] = return_value;
1550 #endif
1551 					/* Transform back the results. */
1552 					if (current->compile_options & PCRE2_UTF) {
1553 #ifdef SUPPORT_PCRE2_16
1554 						for (i = 0; i < return_value; ++i) {
1555 							if (ovector16_1[i] != PCRE2_UNSET)
1556 								ovector16_1[i] = regtest_offsetmap16[ovector16_1[i]];
1557 							if (ovector16_2[i] != PCRE2_UNSET)
1558 								ovector16_2[i] = regtest_offsetmap16[ovector16_2[i]];
1559 						}
1560 #endif
1561 #ifdef SUPPORT_PCRE2_32
1562 						for (i = 0; i < return_value; ++i) {
1563 							if (ovector32_1[i] != PCRE2_UNSET)
1564 								ovector32_1[i] = regtest_offsetmap32[ovector32_1[i]];
1565 							if (ovector32_2[i] != PCRE2_UNSET)
1566 								ovector32_2[i] = regtest_offsetmap32[ovector32_2[i]];
1567 						}
1568 #endif
1569 					}
1570 
1571 					for (i = 0; i < return_value; ++i) {
1572 #if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_16
1573 						if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
1574 							printf("\n8 and 16 bit: Ovector[%d] value differs(J8:%d,I8:%d,J16:%d,I16:%d): [%d] '%s' @ '%s' \n",
1575 								i, (int)ovector8_1[i], (int)ovector8_2[i], (int)ovector16_1[i], (int)ovector16_2[i],
1576 								total, current->pattern, current->input);
1577 							is_successful = 0;
1578 						}
1579 #endif
1580 #if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_32
1581 						if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector32_1[i] || ovector8_1[i] != ovector32_2[i]) {
1582 							printf("\n8 and 32 bit: Ovector[%d] value differs(J8:%d,I8:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
1583 								i, (int)ovector8_1[i], (int)ovector8_2[i], (int)ovector32_1[i], (int)ovector32_2[i],
1584 								total, current->pattern, current->input);
1585 							is_successful = 0;
1586 						}
1587 #endif
1588 #if defined SUPPORT_PCRE2_16 && defined SUPPORT_PCRE2_32
1589 						if (ovector16_1[i] != ovector16_2[i] || ovector16_1[i] != ovector32_1[i] || ovector16_1[i] != ovector32_2[i]) {
1590 							printf("\n16 and 32 bit: Ovector[%d] value differs(J16:%d,I16:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
1591 								i, (int)ovector16_1[i], (int)ovector16_2[i], (int)ovector32_1[i], (int)ovector32_2[i],
1592 								total, current->pattern, current->input);
1593 							is_successful = 0;
1594 						}
1595 #endif
1596 					}
1597 				}
1598 			} else
1599 #endif /* more than one of SUPPORT_PCRE2_8, SUPPORT_PCRE2_16 and SUPPORT_PCRE2_32 */
1600 			{
1601 #ifdef SUPPORT_PCRE2_8
1602 				if (return_value8[0] != return_value8[1]) {
1603 					printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1604 						return_value8[0], return_value8[1], total, current->pattern, current->input);
1605 					is_successful = 0;
1606 				} else if (return_value8[0] >= 0 || return_value8[0] == PCRE2_ERROR_PARTIAL) {
1607 					if (return_value8[0] == PCRE2_ERROR_PARTIAL)
1608 						return_value8[0] = 2;
1609 					else
1610 						return_value8[0] *= 2;
1611 
1612 					for (i = 0; i < return_value8[0]; ++i)
1613 						if (ovector8_1[i] != ovector8_2[i]) {
1614 							printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1615 								i, (int)ovector8_1[i], (int)ovector8_2[i], total, current->pattern, current->input);
1616 							is_successful = 0;
1617 						}
1618 				}
1619 #endif
1620 
1621 #ifdef SUPPORT_PCRE2_16
1622 				if (return_value16[0] != return_value16[1]) {
1623 					printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1624 						return_value16[0], return_value16[1], total, current->pattern, current->input);
1625 					is_successful = 0;
1626 				} else if (return_value16[0] >= 0 || return_value16[0] == PCRE2_ERROR_PARTIAL) {
1627 					if (return_value16[0] == PCRE2_ERROR_PARTIAL)
1628 						return_value16[0] = 2;
1629 					else
1630 						return_value16[0] *= 2;
1631 
1632 					for (i = 0; i < return_value16[0]; ++i)
1633 						if (ovector16_1[i] != ovector16_2[i]) {
1634 							printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1635 								i, (int)ovector16_1[i], (int)ovector16_2[i], total, current->pattern, current->input);
1636 							is_successful = 0;
1637 						}
1638 				}
1639 #endif
1640 
1641 #ifdef SUPPORT_PCRE2_32
1642 				if (return_value32[0] != return_value32[1]) {
1643 					printf("\n32 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1644 						return_value32[0], return_value32[1], total, current->pattern, current->input);
1645 					is_successful = 0;
1646 				} else if (return_value32[0] >= 0 || return_value32[0] == PCRE2_ERROR_PARTIAL) {
1647 					if (return_value32[0] == PCRE2_ERROR_PARTIAL)
1648 						return_value32[0] = 2;
1649 					else
1650 						return_value32[0] *= 2;
1651 
1652 					for (i = 0; i < return_value32[0]; ++i)
1653 						if (ovector32_1[i] != ovector32_2[i]) {
1654 							printf("\n32 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1655 								i, (int)ovector32_1[i], (int)ovector32_2[i], total, current->pattern, current->input);
1656 							is_successful = 0;
1657 						}
1658 				}
1659 #endif
1660 			}
1661 		}
1662 
1663 		if (is_successful) {
1664 #ifdef SUPPORT_PCRE2_8
1665 			if (!(current->start_offset & F_NO8) && (utf || is_ascii)) {
1666 				if (return_value8[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1667 					printf("8 bit: Test should match: [%d] '%s' @ '%s'\n",
1668 						total, current->pattern, current->input);
1669 					is_successful = 0;
1670 				}
1671 
1672 				if (return_value8[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1673 					printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n",
1674 						total, current->pattern, current->input);
1675 					is_successful = 0;
1676 				}
1677 			}
1678 #endif
1679 #ifdef SUPPORT_PCRE2_16
1680 			if (!(current->start_offset & F_NO16) && (utf || is_ascii)) {
1681 				if (return_value16[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1682 					printf("16 bit: Test should match: [%d] '%s' @ '%s'\n",
1683 						total, current->pattern, current->input);
1684 					is_successful = 0;
1685 				}
1686 
1687 				if (return_value16[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1688 					printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n",
1689 						total, current->pattern, current->input);
1690 					is_successful = 0;
1691 				}
1692 			}
1693 #endif
1694 #ifdef SUPPORT_PCRE2_32
1695 			if (!(current->start_offset & F_NO32) && (utf || is_ascii)) {
1696 				if (return_value32[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1697 					printf("32 bit: Test should match: [%d] '%s' @ '%s'\n",
1698 						total, current->pattern, current->input);
1699 					is_successful = 0;
1700 				}
1701 
1702 				if (return_value32[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1703 					printf("32 bit: Test should not match: [%d] '%s' @ '%s'\n",
1704 						total, current->pattern, current->input);
1705 					is_successful = 0;
1706 				}
1707 			}
1708 #endif
1709 		}
1710 
1711 		if (is_successful) {
1712 #ifdef SUPPORT_PCRE2_8
1713 			if (re8 && !(current->start_offset & F_NO8) && pcre2_get_mark_8(mdata8_1) != pcre2_get_mark_8(mdata8_2)) {
1714 				printf("8 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1715 					total, current->pattern, current->input);
1716 				is_successful = 0;
1717 			}
1718 #endif
1719 #ifdef SUPPORT_PCRE2_16
1720 			if (re16 && !(current->start_offset & F_NO16) && pcre2_get_mark_16(mdata16_1) != pcre2_get_mark_16(mdata16_2)) {
1721 				printf("16 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1722 					total, current->pattern, current->input);
1723 				is_successful = 0;
1724 			}
1725 #endif
1726 #ifdef SUPPORT_PCRE2_32
1727 			if (re32 && !(current->start_offset & F_NO32) && pcre2_get_mark_32(mdata32_1) != pcre2_get_mark_32(mdata32_2)) {
1728 				printf("32 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1729 					total, current->pattern, current->input);
1730 				is_successful = 0;
1731 			}
1732 #endif
1733 		}
1734 
1735 #ifdef SUPPORT_PCRE2_8
1736 		pcre2_code_free_8(re8);
1737 		pcre2_match_data_free_8(mdata8_1);
1738 		pcre2_match_data_free_8(mdata8_2);
1739 		pcre2_match_context_free_8(mcontext8);
1740 #endif
1741 #ifdef SUPPORT_PCRE2_16
1742 		pcre2_code_free_16(re16);
1743 		pcre2_match_data_free_16(mdata16_1);
1744 		pcre2_match_data_free_16(mdata16_2);
1745 		pcre2_match_context_free_16(mcontext16);
1746 #endif
1747 #ifdef SUPPORT_PCRE2_32
1748 		pcre2_code_free_32(re32);
1749 		pcre2_match_data_free_32(mdata32_1);
1750 		pcre2_match_data_free_32(mdata32_2);
1751 		pcre2_match_context_free_32(mcontext32);
1752 #endif
1753 
1754 		if (is_successful) {
1755 			successful++;
1756 			successful_row++;
1757 			printf(".");
1758 			if (successful_row >= 60) {
1759 				successful_row = 0;
1760 				printf("\n");
1761 			}
1762 		} else
1763 			successful_row = 0;
1764 
1765 		fflush(stdout);
1766 		current++;
1767 	}
1768 #ifdef SUPPORT_PCRE2_8
1769 	setstack8(NULL);
1770 #endif
1771 #ifdef SUPPORT_PCRE2_16
1772 	setstack16(NULL);
1773 #endif
1774 #ifdef SUPPORT_PCRE2_32
1775 	setstack32(NULL);
1776 #endif
1777 
1778 	if (total == successful) {
1779 		printf("\nAll JIT regression tests are successfully passed.\n");
1780 		return 0;
1781 	} else {
1782 		printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
1783 		return 1;
1784 	}
1785 }
1786 
1787 #if defined SUPPORT_UNICODE
1788 
check_invalid_utf_result(int pattern_index,const char * type,int result,int match_start,int match_end,PCRE2_SIZE * ovector)1789 static int check_invalid_utf_result(int pattern_index, const char *type, int result,
1790 	int match_start, int match_end, PCRE2_SIZE *ovector)
1791 {
1792 	if (match_start < 0) {
1793 		if (result != -1) {
1794 			printf("Pattern[%d] %s result is not -1.\n", pattern_index, type);
1795 			return 1;
1796 		}
1797 		return 0;
1798 	}
1799 
1800 	if (result <= 0) {
1801 		printf("Pattern[%d] %s result (%d) is not greater than 0.\n", pattern_index, type, result);
1802 		return 1;
1803 	}
1804 
1805 	if (ovector[0] != (PCRE2_SIZE)match_start) {
1806 		printf("Pattern[%d] %s ovector[0] is unexpected (%d instead of %d)\n",
1807 			pattern_index, type, (int)ovector[0], match_start);
1808 		return 1;
1809 	}
1810 
1811 	if (ovector[1] != (PCRE2_SIZE)match_end) {
1812 		printf("Pattern[%d] %s ovector[1] is unexpected (%d instead of %d)\n",
1813 			pattern_index, type, (int)ovector[1], match_end);
1814 		return 1;
1815 	}
1816 
1817 	return 0;
1818 }
1819 
1820 #endif /* SUPPORT_UNICODE */
1821 
1822 #if defined SUPPORT_UNICODE && defined SUPPORT_PCRE2_8
1823 
1824 #define UDA (PCRE2_UTF | PCRE2_DOTALL | PCRE2_ANCHORED)
1825 #define CI (PCRE2_JIT_COMPLETE | PCRE2_JIT_INVALID_UTF)
1826 #define CPI (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_INVALID_UTF)
1827 
1828 struct invalid_utf8_regression_test_case {
1829 	int compile_options;
1830 	int jit_compile_options;
1831 	int start_offset;
1832 	int skip_left;
1833 	int skip_right;
1834 	int match_start;
1835 	int match_end;
1836 	const char *pattern[2];
1837 	const char *input;
1838 };
1839 
1840 static const char invalid_utf8_newline_cr;
1841 
1842 static const struct invalid_utf8_regression_test_case invalid_utf8_regression_test_cases[] = {
1843 	{ UDA, CI, 0, 0, 0, 0, 4, { ".", NULL }, "\xf4\x8f\xbf\xbf" },
1844 	{ UDA, CI, 0, 0, 0, 0, 4, { ".", NULL }, "\xf0\x90\x80\x80" },
1845 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf4\x90\x80\x80" },
1846 	{ UDA, CI, 0, 0, 1, -1, -1, { ".", NULL }, "\xf4\x8f\xbf\xbf" },
1847 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf0\x90\x80\x7f" },
1848 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf0\x90\x80\xc0" },
1849 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf0\x8f\xbf\xbf" },
1850 	{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xef\xbf\xbf#" },
1851 	{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xef\xbf\xbf" },
1852 	{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xe0\xa0\x80#" },
1853 	{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xe0\xa0\x80" },
1854 	{ UDA, CI, 0, 0, 2, -1, -1, { ".", NULL }, "\xef\xbf\xbf#" },
1855 	{ UDA, CI, 0, 0, 1, -1, -1, { ".", NULL }, "\xef\xbf\xbf" },
1856 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xef\xbf\x7f#" },
1857 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xef\xbf\xc0" },
1858 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x9f\xbf#" },
1859 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x9f\xbf" },
1860 	{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xed\x9f\xbf#" },
1861 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xed\xa0\x80#" },
1862 	{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xee\x80\x80#" },
1863 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xed\xbf\xbf#" },
1864 	{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xdf\xbf##" },
1865 	{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xdf\xbf#" },
1866 	{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xdf\xbf" },
1867 	{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xc2\x80##" },
1868 	{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xc2\x80#" },
1869 	{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xc2\x80" },
1870 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x80##" },
1871 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xdf\xc0##" },
1872 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x80" },
1873 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xdf\xc0" },
1874 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xc1\xbf##" },
1875 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xc1\xbf" },
1876 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\x80###" },
1877 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\x80" },
1878 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf8###" },
1879 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf8" },
1880 	{ UDA, CI, 0, 0, 0, 0, 1, { ".", NULL }, "\x7f" },
1881 
1882 	{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "\xf4\x8f\xbf\xbf#" },
1883 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\xa0\x80\x80\xf4\xa0\x80\x80" },
1884 	{ UDA, CPI, 4, 1, 1, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbf" },
1885 	{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "#\xef\xbf\xbf#" },
1886 	{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "#\xe0\xa0\x80#" },
1887 	{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "\xf0\x90\x80\x80#" },
1888 	{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "\xf3\xbf\xbf\xbf#" },
1889 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf0\x8f\xbf\xbf\xf0\x8f\xbf\xbf" },
1890 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf5\x80\x80\x80\xf5\x80\x80\x80" },
1891 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x90\x80\x80\xf4\x90\x80\x80" },
1892 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xbf\xff\xf4\x8f\xbf\xff" },
1893 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xff\xbf\xf4\x8f\xff\xbf" },
1894 	{ UDA, CPI, 4, 0, 1, -1, -1, { "\\B", "\\b" }, "\xef\x80\x80\x80\xef\x80\x80" },
1895 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\x80\x80\x80\x80\x80\x80\x80\x80" },
1896 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xe0\x9f\xbf\xe0\x9f\xbf#" },
1897 	{ UDA, CPI, 4, 2, 2, -1, -1, { "\\B", "\\b" }, "#\xe0\xa0\x80\xe0\xa0\x80#" },
1898 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xf0\x80\x80\xf0\x80\x80#" },
1899 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xed\xa0\x80\xed\xa0\x80#" },
1900 	{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "##\xdf\xbf#" },
1901 	{ UDA, CPI, 4, 2, 0, 2, 2, { "\\B", NULL }, "##\xdf\xbf#" },
1902 	{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "##\xc2\x80#" },
1903 	{ UDA, CPI, 4, 2, 0, 2, 2, { "\\B", NULL }, "##\xc2\x80#" },
1904 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xc1\xbf\xc1\xbf##" },
1905 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xdf\xc0\xdf\xc0##" },
1906 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xe0\x80\xe0\x80##" },
1907 
1908 	{ UDA, CPI, 3, 0, 0, 3, 3, { "\\B", NULL }, "\xef\xbf\xbf#" },
1909 	{ UDA, CPI, 3, 0, 0, 3, 3, { "\\B", NULL }, "\xe0\xa0\x80#" },
1910 	{ UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xe0\x9f\xbf\xe0\x9f\xbf" },
1911 	{ UDA, CPI, 3, 1, 1, -1, -1, { "\\B", "\\b" }, "\xef\xbf\xbf\xef\xbf\xbf" },
1912 	{ UDA, CPI, 3, 0, 1, -1, -1, { "\\B", "\\b" }, "\xdf\x80\x80\xdf\x80" },
1913 	{ UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xef\xbf\xff\xef\xbf\xff" },
1914 	{ UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xef\xff\xbf\xef\xff\xbf" },
1915 	{ UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xed\xbf\xbf\xed\xbf\xbf" },
1916 
1917 	{ UDA, CPI, 2, 0, 0, 2, 2, { "\\B", NULL }, "\xdf\xbf#" },
1918 	{ UDA, CPI, 2, 0, 0, 2, 2, { "\\B", NULL }, "\xc2\x80#" },
1919 	{ UDA, CPI, 2, 1, 1, -1, -1, { "\\B", "\\b" }, "\xdf\xbf\xdf\xbf" },
1920 	{ UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xc1\xbf\xc1\xbf" },
1921 	{ UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xe0\x80\xe0\x80" },
1922 	{ UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xdf\xff\xdf\xff" },
1923 	{ UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xff\xbf\xff\xbf" },
1924 
1925 	{ UDA, CPI, 1, 0, 0, 1, 1, { "\\B", NULL }, "\x7f#" },
1926 	{ UDA, CPI, 1, 0, 0, 1, 1, { "\\B", NULL }, "\x01#" },
1927 	{ UDA, CPI, 1, 0, 0, -1, -1, { "\\B", "\\b" }, "\x80\x80" },
1928 	{ UDA, CPI, 1, 0, 0, -1, -1, { "\\B", "\\b" }, "\xb0\xb0" },
1929 
1930 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 2, { "(.)\\1", NULL }, "aA" },
1931 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, -1, -1, { "(.)\\1", NULL }, "a\xff" },
1932 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 4, { "(.)\\1", NULL }, "\xc3\xa1\xc3\x81" },
1933 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { "(.)\\1", NULL }, "\xc3\xa1\xc3\x81" },
1934 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, -1, -1, { "(.)\\1", NULL }, "\xc2\x80\x80" },
1935 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 6, { "(.)\\1", NULL }, "\xe1\xbd\xb8\xe1\xbf\xb8" },
1936 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { "(.)\\1", NULL }, "\xe1\xbd\xb8\xe1\xbf\xb8" },
1937 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 8, { "(.)\\1", NULL }, "\xf0\x90\x90\x80\xf0\x90\x90\xa8" },
1938 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { "(.)\\1", NULL }, "\xf0\x90\x90\x80\xf0\x90\x90\xa8" },
1939 
1940 	{ UDA, CPI, 0, 0, 0, 0, 1, { "\\X", NULL }, "A" },
1941 	{ UDA, CPI, 0, 0, 0, -1, -1, { "\\X", NULL }, "\xff" },
1942 	{ UDA, CPI, 0, 0, 0, 0, 2, { "\\X", NULL }, "\xc3\xa1" },
1943 	{ UDA, CPI, 0, 0, 1, -1, -1, { "\\X", NULL }, "\xc3\xa1" },
1944 	{ UDA, CPI, 0, 0, 0, -1, -1, { "\\X", NULL }, "\xc3\x7f" },
1945 	{ UDA, CPI, 0, 0, 0, 0, 3, { "\\X", NULL }, "\xe1\xbd\xb8" },
1946 	{ UDA, CPI, 0, 0, 1, -1, -1, { "\\X", NULL }, "\xe1\xbd\xb8" },
1947 	{ UDA, CPI, 0, 0, 0, 0, 4, { "\\X", NULL }, "\xf0\x90\x90\x80" },
1948 	{ UDA, CPI, 0, 0, 1, -1, -1, { "\\X", NULL }, "\xf0\x90\x90\x80" },
1949 
1950 	{ UDA, CPI, 0, 0, 0, -1, -1, { "[^#]", NULL }, "#" },
1951 	{ UDA, CPI, 0, 0, 0, 0, 4, { "[^#]", NULL }, "\xf4\x8f\xbf\xbf" },
1952 	{ UDA, CPI, 0, 0, 0, -1, -1, { "[^#]", NULL }, "\xf4\x90\x80\x80" },
1953 	{ UDA, CPI, 0, 0, 0, -1, -1, { "[^#]", NULL }, "\xc1\x80" },
1954 
1955 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 2, 3, { "^\\W", NULL }, " \x0a#"},
1956 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 14, 15, { "^\\W", NULL }, " \xc0\x8a#\xe0\x80\x8a#\xf0\x80\x80\x8a#\x0a#"},
1957 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xf8\x0a#"},
1958 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xc3\x0a#"},
1959 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xf1\x0a#"},
1960 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 4, 5, { "^\\W", NULL }, " \xf2\xbf\x0a#"},
1961 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 5, 6, { "^\\W", NULL }, " \xf2\xbf\xbf\x0a#"},
1962 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xef\x0a#"},
1963 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 4, 5, { "^\\W", NULL }, " \xef\xbf\x0a#"},
1964 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 5, 6, { "^\\W", NULL }, " \x85#\xc2\x85#"},
1965 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 7, 8, { "^\\W", NULL }, " \xe2\x80\xf8\xe2\x80\xa8#"},
1966 
1967 	{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, -1, -1, { "#", NULL }, "\xe2\x80\xf8\xe2\x80\xa8#"},
1968 	{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, 3, 4, { "#", NULL }, "\xe2\x80\xf8#\xe2\x80\xa8#"},
1969 	{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, -1, -1, { "#", NULL }, "abcd\xc2\x85#"},
1970 	{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, 1, 2, { "#", NULL }, "\x85#\xc2\x85#"},
1971 	{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, 5, 6, { "#", NULL }, "\xef,\x80,\xf8#\x0a"},
1972 	{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, -1, -1, { "#", NULL }, "\xef,\x80,\xf8\x0a#"},
1973 
1974 	{ PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 4, 8, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7#\xc7\x85#" },
1975 	{ PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 7, 11, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7\x80\x80\x80#\xc7\x85#" },
1976 	{ PCRE2_UTF, CI, 0, 0, 0, 4, 8, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7#\xc7\x85#" },
1977 	{ PCRE2_UTF, CI, 0, 0, 0, 7, 11, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7\x80\x80\x80#\xc7\x85#" },
1978 
1979 	{ PCRE2_UTF | PCRE2_UCP, CI, 0, 0, 0, -1, -1, { "[\\s]", NULL }, "\xed\xa0\x80" },
1980 
1981 	/* These two are not invalid UTF tests, but this infrastructure fits better for them. */
1982 	{ 0, PCRE2_JIT_COMPLETE, 0, 0, 1, -1, -1, { "\\X{2}", NULL }, "\r\n\n" },
1983 	{ 0, PCRE2_JIT_COMPLETE, 0, 0, 1, -1, -1, { "\\R{2}", NULL }, "\r\n\n" },
1984 
1985 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 0, 0, 0, -1, -1, { "^.a", &invalid_utf8_newline_cr }, "\xc3\xa7#a" },
1986 
1987 	{ 0, 0, 0, 0, 0, 0, 0, { NULL, NULL }, NULL }
1988 };
1989 
1990 #undef UDA
1991 #undef CI
1992 #undef CPI
1993 
run_invalid_utf8_test(const struct invalid_utf8_regression_test_case * current,int pattern_index,int i,pcre2_compile_context_8 * ccontext,pcre2_match_data_8 * mdata)1994 static int run_invalid_utf8_test(const struct invalid_utf8_regression_test_case *current,
1995 	int pattern_index, int i, pcre2_compile_context_8 *ccontext, pcre2_match_data_8 *mdata)
1996 {
1997 	pcre2_code_8 *code;
1998 	int result, errorcode;
1999 	PCRE2_SIZE length, erroroffset;
2000 	PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_8(mdata);
2001 
2002 	if (current->pattern[i] == NULL)
2003 		return 1;
2004 
2005 	code = pcre2_compile_8((PCRE2_UCHAR8*)current->pattern[i], PCRE2_ZERO_TERMINATED,
2006 		current->compile_options, &errorcode, &erroroffset, ccontext);
2007 
2008 	if (!code) {
2009 		printf("Pattern[%d:0] cannot be compiled. Error offset: %d\n", pattern_index, (int)erroroffset);
2010 		return 0;
2011 	}
2012 
2013 	if (pcre2_jit_compile_8(code, current->jit_compile_options) != 0) {
2014 		printf("Pattern[%d:0] cannot be compiled by the JIT compiler.\n", pattern_index);
2015 		pcre2_code_free_8(code);
2016 		return 0;
2017 	}
2018 
2019 	length = (PCRE2_SIZE)(strlen(current->input) - current->skip_left - current->skip_right);
2020 
2021 	if (current->jit_compile_options & PCRE2_JIT_COMPLETE) {
2022 		result = pcre2_jit_match_8(code, (PCRE2_UCHAR8*)(current->input + current->skip_left),
2023 			length, current->start_offset - current->skip_left, 0, mdata, NULL);
2024 
2025 		if (check_invalid_utf_result(pattern_index, "match", result, current->match_start, current->match_end, ovector)) {
2026 			pcre2_code_free_8(code);
2027 			return 0;
2028 		}
2029 	}
2030 
2031 	if (current->jit_compile_options & PCRE2_JIT_PARTIAL_SOFT) {
2032 		result = pcre2_jit_match_8(code, (PCRE2_UCHAR8*)(current->input + current->skip_left),
2033 			length, current->start_offset - current->skip_left, PCRE2_PARTIAL_SOFT, mdata, NULL);
2034 
2035 		if (check_invalid_utf_result(pattern_index, "partial match", result, current->match_start, current->match_end, ovector)) {
2036 			pcre2_code_free_8(code);
2037 			return 0;
2038 		}
2039 	}
2040 
2041 	pcre2_code_free_8(code);
2042 	return 1;
2043 }
2044 
invalid_utf8_regression_tests(void)2045 static int invalid_utf8_regression_tests(void)
2046 {
2047 	const struct invalid_utf8_regression_test_case *current;
2048 	pcre2_compile_context_8 *ccontext;
2049 	pcre2_match_data_8 *mdata;
2050 	int total = 0, successful = 0;
2051 	int result;
2052 
2053 	printf("\nRunning invalid-utf8 JIT regression tests\n");
2054 
2055 	ccontext = pcre2_compile_context_create_8(NULL);
2056 	pcre2_set_newline_8(ccontext, PCRE2_NEWLINE_ANY);
2057 	mdata = pcre2_match_data_create_8(4, NULL);
2058 
2059 	for (current = invalid_utf8_regression_test_cases; current->pattern[0]; current++) {
2060 		/* printf("\nPattern: %s :\n", current->pattern); */
2061 		total++;
2062 
2063 		result = 1;
2064 		if (current->pattern[1] != &invalid_utf8_newline_cr)
2065 		{
2066 			if (!run_invalid_utf8_test(current, total - 1, 0, ccontext, mdata))
2067 				result = 0;
2068 			if (!run_invalid_utf8_test(current, total - 1, 1, ccontext, mdata))
2069 				result = 0;
2070 		} else {
2071 			pcre2_set_newline_8(ccontext, PCRE2_NEWLINE_CR);
2072 			if (!run_invalid_utf8_test(current, total - 1, 0, ccontext, mdata))
2073 				result = 0;
2074 			pcre2_set_newline_8(ccontext, PCRE2_NEWLINE_ANY);
2075 		}
2076 
2077 		if (result) {
2078 			successful++;
2079 		}
2080 
2081 		printf(".");
2082 		if ((total % 60) == 0)
2083 			printf("\n");
2084 	}
2085 
2086 	if ((total % 60) != 0)
2087 		printf("\n");
2088 
2089 	pcre2_match_data_free_8(mdata);
2090 	pcre2_compile_context_free_8(ccontext);
2091 
2092 	if (total == successful) {
2093 		printf("\nAll invalid UTF8 JIT regression tests are successfully passed.\n");
2094 		return 0;
2095 	} else {
2096 		printf("\nInvalid UTF8 successful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
2097 		return 1;
2098 	}
2099 }
2100 
2101 #else /* !SUPPORT_UNICODE || !SUPPORT_PCRE2_8 */
2102 
invalid_utf8_regression_tests(void)2103 static int invalid_utf8_regression_tests(void)
2104 {
2105 	return 0;
2106 }
2107 
2108 #endif /* SUPPORT_UNICODE && SUPPORT_PCRE2_8 */
2109 
2110 #if defined SUPPORT_UNICODE && defined SUPPORT_PCRE2_16
2111 
2112 #define UDA (PCRE2_UTF | PCRE2_DOTALL | PCRE2_ANCHORED)
2113 #define CI (PCRE2_JIT_COMPLETE | PCRE2_JIT_INVALID_UTF)
2114 #define CPI (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_INVALID_UTF)
2115 
2116 struct invalid_utf16_regression_test_case {
2117 	int compile_options;
2118 	int jit_compile_options;
2119 	int start_offset;
2120 	int skip_left;
2121 	int skip_right;
2122 	int match_start;
2123 	int match_end;
2124 	const PCRE2_UCHAR16 *pattern[2];
2125 	const PCRE2_UCHAR16 *input;
2126 };
2127 
2128 static PCRE2_UCHAR16 allany16[] = { '.', 0 };
2129 static PCRE2_UCHAR16 non_word_boundary16[] = { '\\', 'B', 0 };
2130 static PCRE2_UCHAR16 word_boundary16[] = { '\\', 'b', 0 };
2131 static PCRE2_UCHAR16 backreference16[] = { '(', '.', ')', '\\', '1', 0 };
2132 static PCRE2_UCHAR16 grapheme16[] = { '\\', 'X', 0 };
2133 static PCRE2_UCHAR16 nothashmark16[] = { '[', '^', '#', ']', 0 };
2134 static PCRE2_UCHAR16 afternl16[] = { '^', '\\', 'W', 0 };
2135 static PCRE2_UCHAR16 generic16[] = { '#', 0xd800, 0xdc00, '#', 0 };
2136 static PCRE2_UCHAR16 test16_1[] = { 0xd7ff, 0xe000, 0xffff, 0x01, '#', 0 };
2137 static PCRE2_UCHAR16 test16_2[] = { 0xd800, 0xdc00, 0xd800, 0xdc00, 0 };
2138 static PCRE2_UCHAR16 test16_3[] = { 0xdbff, 0xdfff, 0xdbff, 0xdfff, 0 };
2139 static PCRE2_UCHAR16 test16_4[] = { 0xd800, 0xdbff, 0xd800, 0xdbff, 0 };
2140 static PCRE2_UCHAR16 test16_5[] = { '#', 0xd800, 0xdc00, '#', 0 };
2141 static PCRE2_UCHAR16 test16_6[] = { 'a', 'A', 0xdc28, 0 };
2142 static PCRE2_UCHAR16 test16_7[] = { 0xd801, 0xdc00, 0xd801, 0xdc28, 0 };
2143 static PCRE2_UCHAR16 test16_8[] = { '#', 0xd800, 0xdc00, 0 };
2144 static PCRE2_UCHAR16 test16_9[] = { ' ', 0x2028, '#', 0 };
2145 static PCRE2_UCHAR16 test16_10[] = { ' ', 0xdc00, 0xd800, 0x2028, '#', 0 };
2146 static PCRE2_UCHAR16 test16_11[] = { 0xdc00, 0xdc00, 0xd800, 0xdc00, 0xdc00, '#', 0xd800, 0xdc00, '#', 0 };
2147 static PCRE2_UCHAR16 test16_12[] = { '#', 0xd800, 0xdc00, 0xd800, '#', 0xd800, 0xdc00, 0xdc00, 0xdc00, '#', 0xd800, 0xdc00, '#', 0 };
2148 
2149 static const struct invalid_utf16_regression_test_case invalid_utf16_regression_test_cases[] = {
2150 	{ UDA, CI, 0, 0, 0, 0, 1, { allany16, NULL }, test16_1 },
2151 	{ UDA, CI, 1, 0, 0, 1, 2, { allany16, NULL }, test16_1 },
2152 	{ UDA, CI, 2, 0, 0, 2, 3, { allany16, NULL }, test16_1 },
2153 	{ UDA, CI, 3, 0, 0, 3, 4, { allany16, NULL }, test16_1 },
2154 	{ UDA, CI, 0, 0, 0, 0, 2, { allany16, NULL }, test16_2 },
2155 	{ UDA, CI, 0, 0, 3, -1, -1, { allany16, NULL }, test16_2 },
2156 	{ UDA, CI, 1, 0, 0, -1, -1, { allany16, NULL }, test16_2 },
2157 	{ UDA, CI, 0, 0, 0, 0, 2, { allany16, NULL }, test16_3 },
2158 	{ UDA, CI, 0, 0, 3, -1, -1, { allany16, NULL }, test16_3 },
2159 	{ UDA, CI, 1, 0, 0, -1, -1, { allany16, NULL }, test16_3 },
2160 
2161 	{ UDA, CPI, 1, 0, 0, 1, 1, { non_word_boundary16, NULL }, test16_1 },
2162 	{ UDA, CPI, 2, 0, 0, 2, 2, { non_word_boundary16, NULL }, test16_1 },
2163 	{ UDA, CPI, 3, 0, 0, 3, 3, { non_word_boundary16, NULL }, test16_1 },
2164 	{ UDA, CPI, 4, 0, 0, 4, 4, { non_word_boundary16, NULL }, test16_1 },
2165 	{ UDA, CPI, 2, 0, 0, 2, 2, { non_word_boundary16, NULL }, test16_2 },
2166 	{ UDA, CPI, 2, 0, 0, 2, 2, { non_word_boundary16, NULL }, test16_3 },
2167 	{ UDA, CPI, 2, 1, 1, -1, -1, { non_word_boundary16, word_boundary16 }, test16_2 },
2168 	{ UDA, CPI, 2, 1, 1, -1, -1, { non_word_boundary16, word_boundary16 }, test16_3 },
2169 	{ UDA, CPI, 2, 0, 0, -1, -1, { non_word_boundary16, word_boundary16 }, test16_4 },
2170 	{ UDA, CPI, 2, 0, 0, -1, -1, { non_word_boundary16, word_boundary16 }, test16_5 },
2171 
2172 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 2, { backreference16, NULL }, test16_6 },
2173 	{ UDA | PCRE2_CASELESS, CPI, 1, 0, 0, -1, -1, { backreference16, NULL }, test16_6 },
2174 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 4, { backreference16, NULL }, test16_7 },
2175 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { backreference16, NULL }, test16_7 },
2176 
2177 	{ UDA, CPI, 0, 0, 0, 0, 1, { grapheme16, NULL }, test16_6 },
2178 	{ UDA, CPI, 1, 0, 0, 1, 2, { grapheme16, NULL }, test16_6 },
2179 	{ UDA, CPI, 2, 0, 0, -1, -1, { grapheme16, NULL }, test16_6 },
2180 	{ UDA, CPI, 0, 0, 0, 0, 2, { grapheme16, NULL }, test16_7 },
2181 	{ UDA, CPI, 2, 0, 0, 2, 4, { grapheme16, NULL }, test16_7 },
2182 	{ UDA, CPI, 1, 0, 0, -1, -1, { grapheme16, NULL }, test16_7 },
2183 
2184 	{ UDA, CPI, 0, 0, 0, -1, -1, { nothashmark16, NULL }, test16_8 },
2185 	{ UDA, CPI, 1, 0, 0, 1, 3, { nothashmark16, NULL }, test16_8 },
2186 	{ UDA, CPI, 2, 0, 0, -1, -1, { nothashmark16, NULL }, test16_8 },
2187 
2188 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 2, 3, { afternl16, NULL }, test16_9 },
2189 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 4, 5, { afternl16, NULL }, test16_10 },
2190 
2191 	{ PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 5, 9, { generic16, NULL }, test16_11 },
2192 	{ PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 9, 13, { generic16, NULL }, test16_12 },
2193 	{ PCRE2_UTF, CI, 0, 0, 0, 5, 9, { generic16, NULL }, test16_11 },
2194 	{ PCRE2_UTF, CI, 0, 0, 0, 9, 13, { generic16, NULL }, test16_12 },
2195 
2196 	{ 0, 0, 0, 0, 0, 0, 0, { NULL, NULL }, NULL }
2197 };
2198 
2199 #undef UDA
2200 #undef CI
2201 #undef CPI
2202 
run_invalid_utf16_test(const struct invalid_utf16_regression_test_case * current,int pattern_index,int i,pcre2_compile_context_16 * ccontext,pcre2_match_data_16 * mdata)2203 static int run_invalid_utf16_test(const struct invalid_utf16_regression_test_case *current,
2204 	int pattern_index, int i, pcre2_compile_context_16 *ccontext, pcre2_match_data_16 *mdata)
2205 {
2206 	pcre2_code_16 *code;
2207 	int result, errorcode;
2208 	PCRE2_SIZE length, erroroffset;
2209 	const PCRE2_UCHAR16 *input;
2210 	PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_16(mdata);
2211 
2212 	if (current->pattern[i] == NULL)
2213 		return 1;
2214 
2215 	code = pcre2_compile_16(current->pattern[i], PCRE2_ZERO_TERMINATED,
2216 		current->compile_options, &errorcode, &erroroffset, ccontext);
2217 
2218 	if (!code) {
2219 		printf("Pattern[%d:0] cannot be compiled. Error offset: %d\n", pattern_index, (int)erroroffset);
2220 		return 0;
2221 	}
2222 
2223 	if (pcre2_jit_compile_16(code, current->jit_compile_options) != 0) {
2224 		printf("Pattern[%d:0] cannot be compiled by the JIT compiler.\n", pattern_index);
2225 		pcre2_code_free_16(code);
2226 		return 0;
2227 	}
2228 
2229 	input = current->input;
2230 	length = 0;
2231 
2232 	while (*input++ != 0)
2233 		length++;
2234 
2235 	length -= current->skip_left + current->skip_right;
2236 
2237 	if (current->jit_compile_options & PCRE2_JIT_COMPLETE) {
2238 		result = pcre2_jit_match_16(code, (current->input + current->skip_left),
2239 			length, current->start_offset - current->skip_left, 0, mdata, NULL);
2240 
2241 		if (check_invalid_utf_result(pattern_index, "match", result, current->match_start, current->match_end, ovector)) {
2242 			pcre2_code_free_16(code);
2243 			return 0;
2244 		}
2245 	}
2246 
2247 	if (current->jit_compile_options & PCRE2_JIT_PARTIAL_SOFT) {
2248 		result = pcre2_jit_match_16(code, (current->input + current->skip_left),
2249 			length, current->start_offset - current->skip_left, PCRE2_PARTIAL_SOFT, mdata, NULL);
2250 
2251 		if (check_invalid_utf_result(pattern_index, "partial match", result, current->match_start, current->match_end, ovector)) {
2252 			pcre2_code_free_16(code);
2253 			return 0;
2254 		}
2255 	}
2256 
2257 	pcre2_code_free_16(code);
2258 	return 1;
2259 }
2260 
invalid_utf16_regression_tests(void)2261 static int invalid_utf16_regression_tests(void)
2262 {
2263 	const struct invalid_utf16_regression_test_case *current;
2264 	pcre2_compile_context_16 *ccontext;
2265 	pcre2_match_data_16 *mdata;
2266 	int total = 0, successful = 0;
2267 	int result;
2268 
2269 	printf("\nRunning invalid-utf16 JIT regression tests\n");
2270 
2271 	ccontext = pcre2_compile_context_create_16(NULL);
2272 	pcre2_set_newline_16(ccontext, PCRE2_NEWLINE_ANY);
2273 	mdata = pcre2_match_data_create_16(4, NULL);
2274 
2275 	for (current = invalid_utf16_regression_test_cases; current->pattern[0]; current++) {
2276 		/* printf("\nPattern: %s :\n", current->pattern); */
2277 		total++;
2278 
2279 		result = 1;
2280 		if (!run_invalid_utf16_test(current, total - 1, 0, ccontext, mdata))
2281 			result = 0;
2282 		if (!run_invalid_utf16_test(current, total - 1, 1, ccontext, mdata))
2283 			result = 0;
2284 
2285 		if (result) {
2286 			successful++;
2287 		}
2288 
2289 		printf(".");
2290 		if ((total % 60) == 0)
2291 			printf("\n");
2292 	}
2293 
2294 	if ((total % 60) != 0)
2295 		printf("\n");
2296 
2297 	pcre2_match_data_free_16(mdata);
2298 	pcre2_compile_context_free_16(ccontext);
2299 
2300 	if (total == successful) {
2301 		printf("\nAll invalid UTF16 JIT regression tests are successfully passed.\n");
2302 		return 0;
2303 	} else {
2304 		printf("\nInvalid UTF16 successful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
2305 		return 1;
2306 	}
2307 }
2308 
2309 #else /* !SUPPORT_UNICODE || !SUPPORT_PCRE2_16 */
2310 
invalid_utf16_regression_tests(void)2311 static int invalid_utf16_regression_tests(void)
2312 {
2313 	return 0;
2314 }
2315 
2316 #endif /* SUPPORT_UNICODE && SUPPORT_PCRE2_16 */
2317 
2318 #if defined SUPPORT_UNICODE && defined SUPPORT_PCRE2_32
2319 
2320 #define UDA (PCRE2_UTF | PCRE2_DOTALL | PCRE2_ANCHORED)
2321 #define CI (PCRE2_JIT_COMPLETE | PCRE2_JIT_INVALID_UTF)
2322 #define CPI (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_INVALID_UTF)
2323 
2324 struct invalid_utf32_regression_test_case {
2325 	int compile_options;
2326 	int jit_compile_options;
2327 	int start_offset;
2328 	int skip_left;
2329 	int skip_right;
2330 	int match_start;
2331 	int match_end;
2332 	const PCRE2_UCHAR32 *pattern[2];
2333 	const PCRE2_UCHAR32 *input;
2334 };
2335 
2336 static PCRE2_UCHAR32 allany32[] = { '.', 0 };
2337 static PCRE2_UCHAR32 non_word_boundary32[] = { '\\', 'B', 0 };
2338 static PCRE2_UCHAR32 word_boundary32[] = { '\\', 'b', 0 };
2339 static PCRE2_UCHAR32 backreference32[] = { '(', '.', ')', '\\', '1', 0 };
2340 static PCRE2_UCHAR32 grapheme32[] = { '\\', 'X', 0 };
2341 static PCRE2_UCHAR32 nothashmark32[] = { '[', '^', '#', ']', 0 };
2342 static PCRE2_UCHAR32 afternl32[] = { '^', '\\', 'W', 0 };
2343 static PCRE2_UCHAR32 test32_1[] = { 0x10ffff, 0x10ffff, 0x110000, 0x110000, 0x10ffff, 0 };
2344 static PCRE2_UCHAR32 test32_2[] = { 0xd7ff, 0xe000, 0xd800, 0xdfff, 0xe000, 0xdfff, 0xd800, 0 };
2345 static PCRE2_UCHAR32 test32_3[] = { 'a', 'A', 0x110000, 0 };
2346 static PCRE2_UCHAR32 test32_4[] = { '#', 0x10ffff, 0x110000, 0 };
2347 static PCRE2_UCHAR32 test32_5[] = { ' ', 0x2028, '#', 0 };
2348 static PCRE2_UCHAR32 test32_6[] = { ' ', 0x110000, 0x2028, '#', 0 };
2349 
2350 static const struct invalid_utf32_regression_test_case invalid_utf32_regression_test_cases[] = {
2351 	{ UDA, CI, 0, 0, 0, 0, 1, { allany32, NULL }, test32_1 },
2352 	{ UDA, CI, 2, 0, 0, -1, -1, { allany32, NULL }, test32_1 },
2353 	{ UDA, CI, 0, 0, 0, 0, 1, { allany32, NULL }, test32_2 },
2354 	{ UDA, CI, 1, 0, 0, 1, 2, { allany32, NULL }, test32_2 },
2355 	{ UDA, CI, 2, 0, 0, -1, -1, { allany32, NULL }, test32_2 },
2356 	{ UDA, CI, 3, 0, 0, -1, -1, { allany32, NULL }, test32_2 },
2357 
2358 	{ UDA, CPI, 1, 0, 0, 1, 1, { non_word_boundary32, NULL }, test32_1 },
2359 	{ UDA, CPI, 3, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_1 },
2360 	{ UDA, CPI, 1, 0, 0, 1, 1, { non_word_boundary32, NULL }, test32_2 },
2361 	{ UDA, CPI, 3, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_2 },
2362 	{ UDA, CPI, 6, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_2 },
2363 
2364 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 2, { backreference32, NULL }, test32_3 },
2365 	{ UDA | PCRE2_CASELESS, CPI, 1, 0, 0, -1, -1, { backreference32, NULL }, test32_3 },
2366 
2367 	{ UDA, CPI, 0, 0, 0, 0, 1, { grapheme32, NULL }, test32_1 },
2368 	{ UDA, CPI, 2, 0, 0, -1, -1, { grapheme32, NULL }, test32_1 },
2369 	{ UDA, CPI, 1, 0, 0, 1, 2, { grapheme32, NULL }, test32_2 },
2370 	{ UDA, CPI, 2, 0, 0, -1, -1, { grapheme32, NULL }, test32_2 },
2371 	{ UDA, CPI, 3, 0, 0, -1, -1, { grapheme32, NULL }, test32_2 },
2372 	{ UDA, CPI, 4, 0, 0, 4, 5, { grapheme32, NULL }, test32_2 },
2373 
2374 	{ UDA, CPI, 0, 0, 0, -1, -1, { nothashmark32, NULL }, test32_4 },
2375 	{ UDA, CPI, 1, 0, 0, 1, 2, { nothashmark32, NULL }, test32_4 },
2376 	{ UDA, CPI, 2, 0, 0, -1, -1, { nothashmark32, NULL }, test32_4 },
2377 	{ UDA, CPI, 1, 0, 0, 1, 2, { nothashmark32, NULL }, test32_2 },
2378 	{ UDA, CPI, 2, 0, 0, -1, -1, { nothashmark32, NULL }, test32_2 },
2379 
2380 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 2, 3, { afternl32, NULL }, test32_5 },
2381 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { afternl32, NULL }, test32_6 },
2382 
2383 	{ 0, 0, 0, 0, 0, 0, 0, { NULL, NULL }, NULL }
2384 };
2385 
2386 #undef UDA
2387 #undef CI
2388 #undef CPI
2389 
run_invalid_utf32_test(const struct invalid_utf32_regression_test_case * current,int pattern_index,int i,pcre2_compile_context_32 * ccontext,pcre2_match_data_32 * mdata)2390 static int run_invalid_utf32_test(const struct invalid_utf32_regression_test_case *current,
2391 	int pattern_index, int i, pcre2_compile_context_32 *ccontext, pcre2_match_data_32 *mdata)
2392 {
2393 	pcre2_code_32 *code;
2394 	int result, errorcode;
2395 	PCRE2_SIZE length, erroroffset;
2396 	const PCRE2_UCHAR32 *input;
2397 	PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_32(mdata);
2398 
2399 	if (current->pattern[i] == NULL)
2400 		return 1;
2401 
2402 	code = pcre2_compile_32(current->pattern[i], PCRE2_ZERO_TERMINATED,
2403 		current->compile_options, &errorcode, &erroroffset, ccontext);
2404 
2405 	if (!code) {
2406 		printf("Pattern[%d:0] cannot be compiled. Error offset: %d\n", pattern_index, (int)erroroffset);
2407 		return 0;
2408 	}
2409 
2410 	if (pcre2_jit_compile_32(code, current->jit_compile_options) != 0) {
2411 		printf("Pattern[%d:0] cannot be compiled by the JIT compiler.\n", pattern_index);
2412 		pcre2_code_free_32(code);
2413 		return 0;
2414 	}
2415 
2416 	input = current->input;
2417 	length = 0;
2418 
2419 	while (*input++ != 0)
2420 		length++;
2421 
2422 	length -= current->skip_left + current->skip_right;
2423 
2424 	if (current->jit_compile_options & PCRE2_JIT_COMPLETE) {
2425 		result = pcre2_jit_match_32(code, (current->input + current->skip_left),
2426 			length, current->start_offset - current->skip_left, 0, mdata, NULL);
2427 
2428 		if (check_invalid_utf_result(pattern_index, "match", result, current->match_start, current->match_end, ovector)) {
2429 			pcre2_code_free_32(code);
2430 			return 0;
2431 		}
2432 	}
2433 
2434 	if (current->jit_compile_options & PCRE2_JIT_PARTIAL_SOFT) {
2435 		result = pcre2_jit_match_32(code, (current->input + current->skip_left),
2436 			length, current->start_offset - current->skip_left, PCRE2_PARTIAL_SOFT, mdata, NULL);
2437 
2438 		if (check_invalid_utf_result(pattern_index, "partial match", result, current->match_start, current->match_end, ovector)) {
2439 			pcre2_code_free_32(code);
2440 			return 0;
2441 		}
2442 	}
2443 
2444 	pcre2_code_free_32(code);
2445 	return 1;
2446 }
2447 
invalid_utf32_regression_tests(void)2448 static int invalid_utf32_regression_tests(void)
2449 {
2450 	const struct invalid_utf32_regression_test_case *current;
2451 	pcre2_compile_context_32 *ccontext;
2452 	pcre2_match_data_32 *mdata;
2453 	int total = 0, successful = 0;
2454 	int result;
2455 
2456 	printf("\nRunning invalid-utf32 JIT regression tests\n");
2457 
2458 	ccontext = pcre2_compile_context_create_32(NULL);
2459 	pcre2_set_newline_32(ccontext, PCRE2_NEWLINE_ANY);
2460 	mdata = pcre2_match_data_create_32(4, NULL);
2461 
2462 	for (current = invalid_utf32_regression_test_cases; current->pattern[0]; current++) {
2463 		/* printf("\nPattern: %s :\n", current->pattern); */
2464 		total++;
2465 
2466 		result = 1;
2467 		if (!run_invalid_utf32_test(current, total - 1, 0, ccontext, mdata))
2468 			result = 0;
2469 		if (!run_invalid_utf32_test(current, total - 1, 1, ccontext, mdata))
2470 			result = 0;
2471 
2472 		if (result) {
2473 			successful++;
2474 		}
2475 
2476 		printf(".");
2477 		if ((total % 60) == 0)
2478 			printf("\n");
2479 	}
2480 
2481 	if ((total % 60) != 0)
2482 		printf("\n");
2483 
2484 	pcre2_match_data_free_32(mdata);
2485 	pcre2_compile_context_free_32(ccontext);
2486 
2487 	if (total == successful) {
2488 		printf("\nAll invalid UTF32 JIT regression tests are successfully passed.\n");
2489 		return 0;
2490 	} else {
2491 		printf("\nInvalid UTF32 successful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
2492 		return 1;
2493 	}
2494 }
2495 
2496 #else /* !SUPPORT_UNICODE || !SUPPORT_PCRE2_32 */
2497 
invalid_utf32_regression_tests(void)2498 static int invalid_utf32_regression_tests(void)
2499 {
2500 	return 0;
2501 }
2502 
2503 #endif /* SUPPORT_UNICODE && SUPPORT_PCRE2_32 */
2504 
2505 /* End of pcre2_jit_test.c */
2506