• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*************************************************
2 *      Perl-Compatible Regular Expressions       *
3 *************************************************/
4 
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7 
8                        Written by Philip Hazel
9      Original API code Copyright (c) 1997-2012 University of Cambridge
10          New API code Copyright (c) 2016 University of Cambridge
11 
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
15 
16     * Redistributions of source code must retain the above copyright notice,
17       this list of conditions and the following disclaimer.
18 
19     * Redistributions in binary form must reproduce the above copyright
20       notice, this list of conditions and the following disclaimer in the
21       documentation and/or other materials provided with the distribution.
22 
23     * Neither the name of the University of Cambridge nor the names of its
24       contributors may be used to endorse or promote products derived from
25       this software without specific prior written permission.
26 
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
39 */
40 
41 #ifdef HAVE_CONFIG_H
42 #include "config.h"
43 #endif
44 
45 #include <stdio.h>
46 #include <string.h>
47 
48 #define PCRE2_CODE_UNIT_WIDTH 0
49 #include "pcre2.h"
50 
51 /*
52  Letter characters:
53    \xe6\x92\xad = 0x64ad = 25773 (kanji)
54  Non-letter characters:
55    \xc2\xa1 = 0xa1 =  (Inverted Exclamation Mark)
56    \xf3\xa9\xb7\x80 = 0xe9dc0 = 957888
57    \xed\xa0\x80 = 55296 = 0xd800 (Invalid UTF character)
58    \xed\xb0\x80 = 56320 = 0xdc00 (Invalid UTF character)
59  Newlines:
60    \xc2\x85 = 0x85 = 133 (NExt Line = NEL)
61    \xe2\x80\xa8 = 0x2028 = 8232 (Line Separator)
62  Othercase pairs:
63    \xc3\xa9 = 0xe9 = 233 (e')
64       \xc3\x89 = 0xc9 = 201 (E')
65    \xc3\xa1 = 0xe1 = 225 (a')
66       \xc3\x81 = 0xc1 = 193 (A')
67    \x53 = 0x53 = S
68      \x73 = 0x73 = s
69      \xc5\xbf = 0x17f = 383 (long S)
70    \xc8\xba = 0x23a = 570
71       \xe2\xb1\xa5 = 0x2c65 = 11365
72    \xe1\xbd\xb8 = 0x1f78 = 8056
73       \xe1\xbf\xb8 = 0x1ff8 = 8184
74    \xf0\x90\x90\x80 = 0x10400 = 66560
75       \xf0\x90\x90\xa8 = 0x10428 = 66600
76    \xc7\x84 = 0x1c4 = 452
77      \xc7\x85 = 0x1c5 = 453
78      \xc7\x86 = 0x1c6 = 454
79  Caseless sets:
80    ucp_Armenian - \x{531}-\x{556} -> \x{561}-\x{586}
81    ucp_Coptic - \x{2c80}-\x{2ce3} -> caseless: XOR 0x1
82    ucp_Latin - \x{ff21}-\x{ff3a} -> \x{ff41]-\x{ff5a}
83 
84  Mark property:
85    \xcc\x8d = 0x30d = 781
86  Special:
87    \xc2\x80 = 0x80 = 128 (lowest 2 byte character)
88    \xdf\xbf = 0x7ff = 2047 (highest 2 byte character)
89    \xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character)
90    \xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character)
91    \xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character)
92    \xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character)
93 */
94 
95 static int regression_tests(void);
96 static int invalid_utf8_regression_tests(void);
97 static int invalid_utf16_regression_tests(void);
98 static int invalid_utf32_regression_tests(void);
99 
main(void)100 int main(void)
101 {
102 	int jit = 0;
103 #if defined SUPPORT_PCRE2_8
104 	pcre2_config_8(PCRE2_CONFIG_JIT, &jit);
105 #elif defined SUPPORT_PCRE2_16
106 	pcre2_config_16(PCRE2_CONFIG_JIT, &jit);
107 #elif defined SUPPORT_PCRE2_32
108 	pcre2_config_32(PCRE2_CONFIG_JIT, &jit);
109 #endif
110 	if (!jit) {
111 		printf("JIT must be enabled to run pcre2_jit_test\n");
112 		return 1;
113 	}
114 	return regression_tests()
115 		| invalid_utf8_regression_tests()
116 		| invalid_utf16_regression_tests()
117 		| invalid_utf32_regression_tests();
118 }
119 
120 /* --------------------------------------------------------------------------------------- */
121 
122 #if !(defined SUPPORT_PCRE2_8) && !(defined SUPPORT_PCRE2_16) && !(defined SUPPORT_PCRE2_32)
123 #error SUPPORT_PCRE2_8 or SUPPORT_PCRE2_16 or SUPPORT_PCRE2_32 must be defined
124 #endif
125 
126 #define MU	(PCRE2_MULTILINE | PCRE2_UTF)
127 #define MUP	(PCRE2_MULTILINE | PCRE2_UTF | PCRE2_UCP)
128 #define CMU	(PCRE2_CASELESS | PCRE2_MULTILINE | PCRE2_UTF)
129 #define CMUP	(PCRE2_CASELESS | PCRE2_MULTILINE | PCRE2_UTF | PCRE2_UCP)
130 #define M	(PCRE2_MULTILINE)
131 #define MP	(PCRE2_MULTILINE | PCRE2_UCP)
132 #define U	(PCRE2_UTF)
133 #define CM	(PCRE2_CASELESS | PCRE2_MULTILINE)
134 
135 #define BSR(x)	((x) << 16)
136 #define A	PCRE2_NEWLINE_ANYCRLF
137 
138 #define GET_NEWLINE(x)	((x) & 0xffff)
139 #define GET_BSR(x)	((x) >> 16)
140 
141 #define OFFSET_MASK	0x00ffff
142 #define F_NO8		0x010000
143 #define F_NO16		0x020000
144 #define F_NO32		0x020000
145 #define F_NOMATCH	0x040000
146 #define F_DIFF		0x080000
147 #define F_FORCECONV	0x100000
148 #define F_PROPERTY	0x200000
149 
150 struct regression_test_case {
151 	int compile_options;
152 	int newline;
153 	int match_options;
154 	int start_offset;
155 	const char *pattern;
156 	const char *input;
157 };
158 
159 static struct regression_test_case regression_test_cases[] = {
160 	/* Constant strings. */
161 	{ MU, A, 0, 0, "AbC", "AbAbC" },
162 	{ MU, A, 0, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
163 	{ CMU, A, 0, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
164 	{ M, A, 0, 0, "[^a]", "aAbB" },
165 	{ CM, A, 0, 0, "[^m]", "mMnN" },
166 	{ M, A, 0, 0, "a[^b][^#]", "abacd" },
167 	{ CM, A, 0, 0, "A[^B][^E]", "abacd" },
168 	{ CMU, A, 0, 0, "[^x][^#]", "XxBll" },
169 	{ MU, A, 0, 0, "[^a]", "aaa\xc3\xa1#Ab" },
170 	{ CMU, A, 0, 0, "[^A]", "aA\xe6\x92\xad" },
171 	{ MU, A, 0, 0, "\\W(\\W)?\\w", "\r\n+bc" },
172 	{ MU, A, 0, 0, "\\W(\\W)?\\w", "\n\r+bc" },
173 	{ MU, A, 0, 0, "\\W(\\W)?\\w", "\r\r+bc" },
174 	{ MU, A, 0, 0, "\\W(\\W)?\\w", "\n\n+bc" },
175 	{ MU, A, 0, 0, "[axd]", "sAXd" },
176 	{ CMU, A, 0, 0, "[axd]", "sAXd" },
177 	{ CMU, A, 0, 0 | F_NOMATCH, "[^axd]", "DxA" },
178 	{ MU, A, 0, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
179 	{ MU, A, 0, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
180 	{ CMU, A, 0, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
181 	{ MU, A, 0, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
182 	{ MU, A, 0, 0, "[^a]", "\xc2\x80[]" },
183 	{ CMU, A, 0, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
184 	{ CM, A, 0, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
185 	{ PCRE2_CASELESS, 0, 0, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
186 	{ PCRE2_CASELESS, 0, 0, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
187 	{ PCRE2_CASELESS, 0, 0, 0, "a1", "Aa1" },
188 #ifndef NEVER_BACKSLASH_C
189 	{ M, A, 0, 0, "\\Ca", "cda" },
190 	{ CM, A, 0, 0, "\\Ca", "CDA" },
191 	{ M, A, 0, 0 | F_NOMATCH, "\\Cx", "cda" },
192 	{ CM, A, 0, 0 | F_NOMATCH, "\\Cx", "CDA" },
193 #endif /* !NEVER_BACKSLASH_C */
194 	{ CMUP, A, 0, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
195 	{ CMUP, A, 0, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
196 	{ CMUP, A, 0, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
197 	{ CMUP, A, 0, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
198 	{ M, A, 0, 0, "[3-57-9]", "5" },
199 	{ PCRE2_AUTO_CALLOUT, A, 0, 0, "12345678901234567890123456789012345678901234567890123456789012345678901234567890",
200 		"12345678901234567890123456789012345678901234567890123456789012345678901234567890" },
201 
202 	/* Assertions. */
203 	{ MU, A, 0, 0, "\\b[^A]", "A_B#" },
204 	{ M, A, 0, 0 | F_NOMATCH, "\\b\\W", "\n*" },
205 	{ MU, A, 0, 0, "\\B[^,]\\b[^s]\\b", "#X" },
206 	{ MP, A, 0, 0, "\\B", "_\xa1" },
207 	{ MP, A, 0, 0 | F_PROPERTY, "\\b_\\b[,A]\\B", "_," },
208 	{ MUP, A, 0, 0, "\\b", "\xe6\x92\xad!" },
209 	{ MUP, A, 0, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
210 	{ MUP, A, 0, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
211 	{ MUP, A, 0, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
212 	{ MU, A, 0, 0 | F_NOMATCH, "\\b.", "\xcd\xbe" },
213 	{ CMUP, A, 0, 0, "\\By", "\xf0\x90\x90\xa8y" },
214 	{ M, A, 0, 0 | F_NOMATCH, "\\R^", "\n" },
215 	{ M, A, 0, 1 | F_NOMATCH, "^", "\n" },
216 	{ 0, 0, 0, 0, "^ab", "ab" },
217 	{ 0, 0, 0, 0 | F_NOMATCH, "^ab", "aab" },
218 	{ M, PCRE2_NEWLINE_CRLF, 0, 0, "^a", "\r\raa\n\naa\r\naa" },
219 	{ MU, A, 0, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
220 	{ M, PCRE2_NEWLINE_ANY, 0, 0, "^-", "a--b--\x85--" },
221 	{ MU, PCRE2_NEWLINE_ANY, 0, 0, "^-", "a--\xe2\x80\xa8--" },
222 	{ MU, PCRE2_NEWLINE_ANY, 0, 0, "^-", "a--\xc2\x85--" },
223 	{ 0, 0, 0, 0, "ab$", "ab" },
224 	{ 0, 0, 0, 0 | F_NOMATCH, "ab$", "abab\n\n" },
225 	{ PCRE2_DOLLAR_ENDONLY, 0, 0, 0 | F_NOMATCH, "ab$", "abab\r\n" },
226 	{ M, PCRE2_NEWLINE_CRLF, 0, 0, "a$", "\r\raa\n\naa\r\naa" },
227 	{ M, PCRE2_NEWLINE_ANY, 0, 0, "a$", "aaa" },
228 	{ MU, PCRE2_NEWLINE_ANYCRLF, 0, 0, "#$", "#\xc2\x85###\r#" },
229 	{ MU, PCRE2_NEWLINE_ANY, 0, 0, "#$", "#\xe2\x80\xa9" },
230 	{ 0, PCRE2_NEWLINE_ANY, PCRE2_NOTBOL, 0 | F_NOMATCH, "^a", "aa\naa" },
231 	{ M, PCRE2_NEWLINE_ANY, PCRE2_NOTBOL, 0, "^a", "aa\naa" },
232 	{ 0, PCRE2_NEWLINE_ANY, PCRE2_NOTEOL, 0 | F_NOMATCH, "a$", "aa\naa" },
233 	{ 0, PCRE2_NEWLINE_ANY, PCRE2_NOTEOL, 0 | F_NOMATCH, "a$", "aa\r\n" },
234 	{ U | PCRE2_DOLLAR_ENDONLY, PCRE2_NEWLINE_ANY, 0, 0 | F_PROPERTY, "\\p{Any}{2,}$", "aa\r\n" },
235 	{ M, PCRE2_NEWLINE_ANY, PCRE2_NOTEOL, 0, "a$", "aa\naa" },
236 	{ 0, PCRE2_NEWLINE_CR, 0, 0, ".\\Z", "aaa" },
237 	{ U, PCRE2_NEWLINE_CR, 0, 0, "a\\Z", "aaa\r" },
238 	{ 0, PCRE2_NEWLINE_CR, 0, 0, ".\\Z", "aaa\n" },
239 	{ 0, PCRE2_NEWLINE_CRLF, 0, 0, ".\\Z", "aaa\r" },
240 	{ U, PCRE2_NEWLINE_CRLF, 0, 0, ".\\Z", "aaa\n" },
241 	{ 0, PCRE2_NEWLINE_CRLF, 0, 0, ".\\Z", "aaa\r\n" },
242 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa" },
243 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r" },
244 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\n" },
245 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r\n" },
246 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\xe2\x80\xa8" },
247 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa" },
248 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r" },
249 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\n" },
250 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r\n" },
251 	{ U, PCRE2_NEWLINE_ANY, 0, 0, ".\\Z", "aaa\xc2\x85" },
252 	{ U, PCRE2_NEWLINE_ANY, 0, 0, ".\\Z", "aaa\xe2\x80\xa8" },
253 	{ M, A, 0, 0, "\\Aa", "aaa" },
254 	{ M, A, 0, 1 | F_NOMATCH, "\\Aa", "aaa" },
255 	{ M, A, 0, 1, "\\Ga", "aaa" },
256 	{ M, A, 0, 1 | F_NOMATCH, "\\Ga", "aba" },
257 	{ M, A, 0, 0, "a\\z", "aaa" },
258 	{ M, A, 0, 0 | F_NOMATCH, "a\\z", "aab" },
259 
260 	/* Brackets and alternatives. */
261 	{ MU, A, 0, 0, "(ab|bb|cd)", "bacde" },
262 	{ MU, A, 0, 0, "(?:ab|a)(bc|c)", "ababc" },
263 	{ MU, A, 0, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
264 	{ CMU, A, 0, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
265 	{ MU, A, 0, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
266 	{ MU, A, 0, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
267 	{ MU, A, 0, 0, "\xc7\x82|\xc6\x82", "\xf1\x83\x82\x82\xc7\x82\xc7\x83" },
268 	{ MU, A, 0, 0, "=\xc7\x82|#\xc6\x82", "\xf1\x83\x82\x82=\xc7\x82\xc7\x83" },
269 	{ MU, A, 0, 0, "\xc7\x82\xc7\x83|\xc6\x82\xc6\x82", "\xf1\x83\x82\x82\xc7\x82\xc7\x83" },
270 	{ MU, A, 0, 0, "\xc6\x82\xc6\x82|\xc7\x83\xc7\x83|\xc8\x84\xc8\x84", "\xf1\x83\x82\x82\xc8\x84\xc8\x84" },
271 	{ U, A, 0, 0, "\xe1\x81\x80|\xe2\x82\x80|\xe4\x84\x80", "\xdf\xbf\xc2\x80\xe4\x84\x80" },
272 	{ U, A, 0, 0, "(?:\xe1\x81\x80|\xe2\x82\x80|\xe4\x84\x80)#", "\xdf\xbf\xc2\x80#\xe4\x84\x80#" },
273 	{ CM, A, 0, 0, "ab|cd", "CD" },
274 	{ CM, A, 0, 0, "a1277|a1377|bX487", "bx487" },
275 	{ CM, A, 0, 0, "a1277|a1377|bx487", "bX487" },
276 
277 	/* Greedy and non-greedy ? operators. */
278 	{ MU, A, 0, 0, "(?:a)?a", "laab" },
279 	{ CMU, A, 0, 0, "(A)?A", "llaab" },
280 	{ MU, A, 0, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
281 	{ MU, A, 0, 0, "(a)?a", "manm" },
282 	{ CMU, A, 0, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
283 	{ MU, A, 0, 0, "(a|b)?\?d((?:e)?)", "abcde" },
284 	{ MU, A, 0, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
285 
286 	/* Greedy and non-greedy + operators */
287 	{ MU, A, 0, 0, "(aa)+aa", "aaaaaaa" },
288 	{ MU, A, 0, 0, "(aa)+?aa", "aaaaaaa" },
289 	{ MU, A, 0, 0, "(?:aba|ab|a)+l", "ababamababal" },
290 	{ MU, A, 0, 0, "(?:aba|ab|a)+?l", "ababamababal" },
291 	{ MU, A, 0, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
292 	{ MU, A, 0, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
293 	{ MU, A, 0, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
294 	{ MU, A, 0, 0, "(aa|bb){8,1000}", "abaabbaabbaabbaab_aabbaabbaabbaabbaabbaabb_" },
295 
296 	/* Greedy and non-greedy * operators */
297 	{ CMU, A, 0, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
298 	{ MU, A, 0, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
299 	{ MU, A, 0, 0, "(aa|ab)*ab", "aaabaaab" },
300 	{ CMU, A, 0, 0, "(aa|Ab)*?aB", "aaabaaab" },
301 	{ MU, A, 0, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
302 	{ MU, A, 0, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
303 	{ M, A, 0, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
304 	{ M, A, 0, 0, "((?:a|)*){0}a", "a" },
305 
306 	/* Combining ? + * operators */
307 	{ MU, A, 0, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
308 	{ MU, A, 0, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
309 	{ MU, A, 0, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
310 	{ MU, A, 0, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
311 	{ MU, A, 0, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
312 
313 	/* Single character iterators. */
314 	{ MU, A, 0, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
315 	{ MU, A, 0, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
316 	{ MU, A, 0, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
317 	{ MU, A, 0, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
318 	{ MU, A, 0, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
319 	{ MU, A, 0, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
320 	{ MU, A, 0, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
321 	{ MU, A, 0, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
322 	{ MU, A, 0, 0, "(ba{2})+c", "baabaaabacbaabaac" },
323 	{ MU, A, 0, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
324 	{ MU, A, 0, 0, "(a?+[^b])+", "babaacacb" },
325 	{ MU, A, 0, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
326 	{ CMU, A, 0, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
327 	{ CMU, A, 0, 0, "[c-f]+k", "DemmFke" },
328 	{ MU, A, 0, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
329 	{ MU, A, 0, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
330 	{ CMU, A, 0, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
331 	{ CMU, A, 0, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
332 	{ CMU, A, 0, 0, "[ace]{3,}", "AcbDAcEEcEd" },
333 	{ CMU, A, 0, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
334 	{ MU, A, 0, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
335 	{ CMU, A, 0, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
336 	{ MU, A, 0, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
337 	{ MU, A, 0, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
338 	{ MU, A, 0, 0, "\\b\\w+\\B", "x,a_cd" },
339 	{ MUP, A, 0, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
340 	{ CMU, A, 0, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
341 	{ CMUP, A, 0, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" },
342 	{ CMU, A, 0, 0, "[^\xf0\x90\x90\x80]{2,4}@", "\xf0\x90\x90\xa8\xf0\x90\x90\x80###\xf0\x90\x90\x80@@@" },
343 	{ CMU, A, 0, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
344 	{ MU, A, 0, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
345 	{ MU, A, 0, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" },
346 	{ MU, A, 0, 0, "\\d+123", "987654321,01234" },
347 	{ MU, A, 0, 0, "abcd*|\\w+xy", "aaaaa,abxyz" },
348 	{ MU, A, 0, 0, "(?:abc|((?:amc|\\b\\w*xy)))", "aaaaa,abxyz" },
349 	{ MU, A, 0, 0, "a(?R)|([a-z]++)#", ".abcd.abcd#."},
350 	{ MU, A, 0, 0, "a(?R)|([a-z]++)#", ".abcd.mbcd#."},
351 	{ MU, A, 0, 0, ".[ab]*.", "xx" },
352 	{ MU, A, 0, 0, ".[ab]*a", "xxa" },
353 	{ MU, A, 0, 0, ".[ab]?.", "xx" },
354 	{ MU, A, 0, 0, "_[ab]+_*a", "_aa" },
355 	{ MU, A, 0, 0, "#(A+)#\\d+", "#A#A#0" },
356 	{ MU, A, 0, 0, "(?P<size>\\d+)m|M", "4M" },
357 
358 	/* Bracket repeats with limit. */
359 	{ MU, A, 0, 0, "(?:(ab){2}){5}M", "abababababababababababM" },
360 	{ MU, A, 0, 0, "(?:ab|abab){1,5}M", "abababababababababababM" },
361 	{ MU, A, 0, 0, "(?>ab|abab){1,5}M", "abababababababababababM" },
362 	{ MU, A, 0, 0, "(?:ab|abab){1,5}?M", "abababababababababababM" },
363 	{ MU, A, 0, 0, "(?>ab|abab){1,5}?M", "abababababababababababM" },
364 	{ MU, A, 0, 0, "(?:(ab){1,4}?){1,3}?M", "abababababababababababababM" },
365 	{ MU, A, 0, 0, "(?:(ab){1,4}){1,3}abababababababababababM", "ababababababababababababM" },
366 	{ MU, A, 0, 0 | F_NOMATCH, "(?:(ab){1,4}){1,3}abababababababababababM", "abababababababababababM" },
367 	{ MU, A, 0, 0, "(ab){4,6}?M", "abababababababM" },
368 
369 	/* Basic character sets. */
370 	{ MU, A, 0, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
371 	{ MU, A, 0, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
372 	{ MU, A, 0, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
373 	{ MU, A, 0, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
374 	{ MU, A, 0, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
375 	{ MU, A, 0, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
376 	{ MU, A, 0, 0, "x[bcef]+", "xaxdxecbfg" },
377 	{ MU, A, 0, 0, "x[bcdghij]+", "xaxexfxdgbjk" },
378 	{ MU, A, 0, 0, "x[^befg]+", "xbxexacdhg" },
379 	{ MU, A, 0, 0, "x[^bcdl]+", "xlxbxaekmd" },
380 	{ MU, A, 0, 0, "x[^bcdghi]+", "xbxdxgxaefji" },
381 	{ MU, A, 0, 0, "x[B-Fb-f]+", "xaxAxgxbfBFG" },
382 	{ CMU, A, 0, 0, "\\x{e9}+", "#\xf0\x90\x90\xa8\xc3\xa8\xc3\xa9\xc3\x89\xc3\x88" },
383 	{ CMU, A, 0, 0, "[^\\x{e9}]+", "\xc3\xa9#\xf0\x90\x90\xa8\xc3\xa8\xc3\x88\xc3\x89" },
384 	{ MU, A, 0, 0, "[\\x02\\x7e]+", "\xc3\x81\xe1\xbf\xb8\xf0\x90\x90\xa8\x01\x02\x7e\x7f" },
385 	{ MU, A, 0, 0, "[^\\x02\\x7e]+", "\x02\xc3\x81\xe1\xbf\xb8\xf0\x90\x90\xa8\x01\x7f\x7e" },
386 	{ MU, A, 0, 0, "[\\x{81}-\\x{7fe}]+", "#\xe1\xbf\xb8\xf0\x90\x90\xa8\xc2\x80\xc2\x81\xdf\xbe\xdf\xbf" },
387 	{ MU, A, 0, 0, "[^\\x{81}-\\x{7fe}]+", "\xc2\x81#\xe1\xbf\xb8\xf0\x90\x90\xa8\xc2\x80\xdf\xbf\xdf\xbe" },
388 	{ MU, A, 0, 0, "[\\x{801}-\\x{fffe}]+", "#\xc3\xa9\xf0\x90\x90\x80\xe0\xa0\x80\xe0\xa0\x81\xef\xbf\xbe\xef\xbf\xbf" },
389 	{ MU, A, 0, 0, "[^\\x{801}-\\x{fffe}]+", "\xe0\xa0\x81#\xc3\xa9\xf0\x90\x90\x80\xe0\xa0\x80\xef\xbf\xbf\xef\xbf\xbe" },
390 	{ MU, A, 0, 0, "[\\x{10001}-\\x{10fffe}]+", "#\xc3\xa9\xe2\xb1\xa5\xf0\x90\x80\x80\xf0\x90\x80\x81\xf4\x8f\xbf\xbe\xf4\x8f\xbf\xbf" },
391 	{ MU, A, 0, 0, "[^\\x{10001}-\\x{10fffe}]+", "\xf0\x90\x80\x81#\xc3\xa9\xe2\xb1\xa5\xf0\x90\x80\x80\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbe" },
392 	{ CMU, A, 0, 0 | F_NOMATCH, "^[\\x{0100}-\\x{017f}]", " " },
393 
394 	/* Unicode properties. */
395 	{ MUP, A, 0, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
396 	{ MUP, A, 0, 0 | F_PROPERTY, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
397 	{ MUP, A, 0, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
398 	{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}]", "abc" },
399 	{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}]", "abc" },
400 	{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
401 	{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
402 	{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
403 	{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
404 	{ MUP, A, 0, 0 | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
405 	{ MUP, A, 0, 0 | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
406 	{ MUP, A, 0, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
407 	{ CMUP, A, 0, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
408 	{ MUP, A, 0, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
409 	{ MUP, A, 0, 0 | F_PROPERTY, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
410 	{ MU, A, 0, 0 | F_PROPERTY, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
411 	{ CMUP, A, 0, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
412 	{ MUP, A, 0, 0 | F_PROPERTY, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
413 	{ MUP, A, 0, 0 | F_PROPERTY, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
414 	{ PCRE2_UCP, 0, 0, 0 | F_PROPERTY, "[a-b\\s]{2,5}[^a]", "AB  baaa" },
415 	{ MUP, 0, 0, 0 | F_NOMATCH, "[^\\p{Hangul}\\p{Z}]", " " },
416 	{ MUP, 0, 0, 0, "[\\p{Lu}\\P{Latin}]+", "c\xEA\xA4\xAE,A,b" },
417 	{ MUP, 0, 0, 0, "[\\x{a92e}\\p{Lu}\\P{Latin}]+", "c\xEA\xA4\xAE,A,b" },
418 	{ CMUP, 0, 0, 0, "[^S]\\B", "\xe2\x80\x8a" },
419 
420 	/* Possible empty brackets. */
421 	{ MU, A, 0, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
422 	{ MU, A, 0, 0, "(|ab||bc|a)+d", "abcxabcabd" },
423 	{ MU, A, 0, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
424 	{ MU, A, 0, 0, "(|ab||bc|a)*d", "abcxabcabd" },
425 	{ MU, A, 0, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
426 	{ MU, A, 0, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
427 	{ MU, A, 0, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
428 	{ MU, A, 0, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
429 	{ MU, A, 0, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
430 	{ MU, A, 0, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
431 
432 	/* Start offset. */
433 	{ MU, A, 0, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
434 	{ MU, A, 0, 4 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
435 	{ MU, A, 0, 2 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
436 	{ MU, A, 0, 1, "(\\w\\W\\w)+", "ab#d" },
437 
438 	/* Newline. */
439 	{ M, PCRE2_NEWLINE_CRLF, 0, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
440 	{ M, PCRE2_NEWLINE_CR, 0, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
441 	{ M, PCRE2_NEWLINE_CRLF, 0, 0, "\\W{1,3}[^#]", "\r\n##...." },
442 	{ MU, A, PCRE2_NO_UTF_CHECK, 1, "^.a", "\n\x80\nxa" },
443 	{ MU, A, 0, 1, "^", "\r\n" },
444 	{ M, PCRE2_NEWLINE_CRLF, 0, 1 | F_NOMATCH, "^", "\r\n" },
445 	{ M, PCRE2_NEWLINE_CRLF, 0, 1, "^", "\r\na" },
446 
447 	/* Any character except newline or any newline. */
448 	{ 0, PCRE2_NEWLINE_CRLF, 0, 0, ".", "\r" },
449 	{ U, PCRE2_NEWLINE_CRLF, 0, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
450 	{ 0, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
451 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
452 	{ U, PCRE2_NEWLINE_ANY, 0, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
453 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0 | F_NOMATCH, ".(.).", "\xe2\x80\xa8\nb\r" },
454 	{ 0, PCRE2_NEWLINE_ANY, 0, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
455 	{ U, PCRE2_NEWLINE_ANY, 0, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
456 	{ 0, BSR(PCRE2_BSR_ANYCRLF), 0, 0, "\\R", "\r" },
457 	{ 0, BSR(PCRE2_BSR_ANYCRLF), 0, 0, "\\R", "\x85#\r\n#" },
458 	{ U, BSR(PCRE2_BSR_UNICODE), 0, 0, "\\R", "ab\xe2\x80\xa8#c" },
459 	{ U, BSR(PCRE2_BSR_UNICODE), 0, 0, "\\R", "ab\r\nc" },
460 	{ U, PCRE2_NEWLINE_CRLF | BSR(PCRE2_BSR_UNICODE), 0, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
461 	{ MU, A, 0, 0 | F_NOMATCH, "\\R+", "ab" },
462 	{ MU, A, 0, 0, "\\R+", "ab\r\n\r" },
463 	{ MU, A, 0, 0, "\\R*", "ab\r\n\r" },
464 	{ MU, A, 0, 0, "\\R*", "\r\n\r" },
465 	{ MU, A, 0, 0, "\\R{2,4}", "\r\nab\r\r" },
466 	{ MU, A, 0, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
467 	{ MU, A, 0, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
468 	{ MU, A, 0, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
469 	{ MU, A, 0, 0 | F_NOMATCH, "\\R+\\R\\R", "\r\n\r\n" },
470 	{ MU, A, 0, 0, "\\R+\\R\\R", "\r\r\r" },
471 	{ MU, A, 0, 0, "\\R*\\R\\R", "\n\r" },
472 	{ MU, A, 0, 0 | F_NOMATCH, "\\R{2,4}\\R\\R", "\r\r\r" },
473 	{ MU, A, 0, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
474 
475 	/* Atomic groups (no fallback from "next" direction). */
476 	{ MU, A, 0, 0 | F_NOMATCH, "(?>ab)ab", "bab" },
477 	{ MU, A, 0, 0 | F_NOMATCH, "(?>(ab))ab", "bab" },
478 	{ MU, A, 0, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
479 			"bababcdedefgheijijklmlmnop" },
480 	{ MU, A, 0, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
481 	{ MU, A, 0, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
482 	{ MU, A, 0, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
483 	{ MU, A, 0, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
484 	{ MU, A, 0, 0, "((?>a|)+?)b", "aaacaaab" },
485 	{ MU, A, 0, 0, "(?>x|)*$", "aaa" },
486 	{ MU, A, 0, 0, "(?>(x)|)*$", "aaa" },
487 	{ MU, A, 0, 0, "(?>x|())*$", "aaa" },
488 	{ MU, A, 0, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
489 	{ MU, A, 0, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
490 	{ MU, A, 0, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
491 	{ MU, A, 0, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
492 	{ MU, A, 0, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
493 	{ MU, A, 0, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
494 	{ MU, A, 0, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
495 	{ MU, A, 0, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
496 	{ MU, A, 0, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
497 	{ MU, A, 0, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
498 	{ MU, A, 0, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
499 	{ MU, A, 0, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
500 	{ MU, A, 0, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
501 	{ MU, A, 0, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
502 	{ CM, A, 0, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
503 	{ MU, A, 0, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
504 	{ MU, A, 0, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
505 	{ MU, A, 0, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d" },
506 	{ MU, A, 0, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
507 	{ MU, A, 0, 0 | F_PROPERTY, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
508 	{ MU, A, 0, 0 | F_PROPERTY, "\\X{2,4}", "abcdef" },
509 	{ MU, A, 0, 0 | F_PROPERTY, "\\X{2,4}?", "abcdef" },
510 	{ MU, A, 0, 0 | F_NOMATCH | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d##" },
511 	{ MU, A, 0, 0 | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
512 	{ MU, A, 0, 0, "(c(ab)?+ab)+", "cabcababcab" },
513 	{ MU, A, 0, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
514 
515 	/* Possessive quantifiers. */
516 	{ MU, A, 0, 0, "(?:a|b)++m", "mababbaaxababbaam" },
517 	{ MU, A, 0, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
518 	{ MU, A, 0, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
519 	{ MU, A, 0, 0, "(a|b)++m", "mababbaaxababbaam" },
520 	{ MU, A, 0, 0, "(a|b)*+m", "mababbaaxababbaam" },
521 	{ MU, A, 0, 0, "(a|b)*+m", "ababbaaxababbaam" },
522 	{ MU, A, 0, 0, "(a|b(*ACCEPT))++m", "maaxab" },
523 	{ MU, A, 0, 0, "(?:b*)++m", "bxbbxbbbxm" },
524 	{ MU, A, 0, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
525 	{ MU, A, 0, 0, "(?:b*)*+m", "bxbbxbbbxm" },
526 	{ MU, A, 0, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
527 	{ MU, A, 0, 0, "(b*)++m", "bxbbxbbbxm" },
528 	{ MU, A, 0, 0, "(b*)++m", "bxbbxbbbxbbm" },
529 	{ MU, A, 0, 0, "(b*)*+m", "bxbbxbbbxm" },
530 	{ MU, A, 0, 0, "(b*)*+m", "bxbbxbbbxbbm" },
531 	{ MU, A, 0, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
532 	{ MU, A, 0, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
533 	{ MU, A, 0, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
534 	{ MU, A, 0, 0, "(a|(b))++m", "mababbaaxababbaam" },
535 	{ MU, A, 0, 0, "((a)|b)*+m", "mababbaaxababbaam" },
536 	{ MU, A, 0, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
537 	{ MU, A, 0, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
538 	{ MU, A, 0, 0, "(?:(b*))++m", "bxbbxbbbxm" },
539 	{ MU, A, 0, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
540 	{ MU, A, 0, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
541 	{ MU, A, 0, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
542 	{ MU, A, 0, 0, "((b*))++m", "bxbbxbbbxm" },
543 	{ MU, A, 0, 0, "((b*))++m", "bxbbxbbbxbbm" },
544 	{ MU, A, 0, 0, "((b*))*+m", "bxbbxbbbxm" },
545 	{ MU, A, 0, 0, "((b*))*+m", "bxbbxbbbxbbm" },
546 	{ MU, A, 0, 0 | F_NOMATCH, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
547 	{ MU, A, 0, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
548 	{ MU, A, 0, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
549 	{ MU, A, 0, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
550 	{ MU, A, 0, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
551 
552 	/* Back references. */
553 	{ MU, A, 0, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
554 	{ CMU, A, 0, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
555 	{ CM, A, 0, 0, "(a{2,4})\\1", "AaAaaAaA" },
556 	{ MU, A, 0, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
557 	{ MU, A, 0, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
558 	{ MU, A, 0, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
559 	{ MU, A, 0, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
560 	{ MU, A, 0, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
561 	{ MU, A, 0, 0, "(?:(aa)|b)\\1?b", "bb" },
562 	{ CMU, A, 0, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
563 	{ MU, A, 0, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
564 	{ CMU, A, 0, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
565 	{ MU, A, 0, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
566 	{ CM, A, 0, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
567 	{ MU, A, 0, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
568 	{ MU, A, 0, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
569 	{ M, A, 0, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
570 	{ MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
571 	{ MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{0,2}", "wwwww." },
572 	{ MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwww" },
573 	{ MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwwww" },
574 	{ PCRE2_UCP, 0, 0, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
575 	{ CMUP, A, 0, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" },
576 	{ MU | PCRE2_DUPNAMES, A, 0, 0 | F_NOMATCH, "\\k<A>{1,3}(?<A>aa)(?<A>bb)", "aabb" },
577 	{ MU | PCRE2_DUPNAMES | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\k<A>{1,3}(?<A>aa)(?<A>bb)", "aabb" },
578 	{ MU | PCRE2_DUPNAMES | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\k<A>*(?<A>aa)(?<A>bb)", "aabb" },
579 	{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?<A>aa)(?<A>bb)\\k<A>{0,3}aaaaaa", "aabbaaaaaa" },
580 	{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?<A>aa)(?<A>bb)\\k<A>{2,5}bb", "aabbaaaabb" },
581 	{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{0,3}m", "aaaaaaaabbbbaabbbbm" },
582 	{ MU | PCRE2_DUPNAMES, A, 0, 0 | F_NOMATCH, "\\k<A>{1,3}?(?<A>aa)(?<A>bb)", "aabb" },
583 	{ MU | PCRE2_DUPNAMES | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\k<A>{1,3}?(?<A>aa)(?<A>bb)", "aabb" },
584 	{ MU | PCRE2_DUPNAMES, A, 0, 0, "\\k<A>*?(?<A>aa)(?<A>bb)", "aabb" },
585 	{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{0,3}?m", "aaaaaabbbbbbaabbbbbbbbbbm" },
586 	{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>*?m", "aaaaaabbbbbbaabbbbbbbbbbm" },
587 	{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{2,3}?", "aaaabbbbaaaabbbbbbbbbb" },
588 	{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}M", "aaaaaaaabbbbaabbbbm" },
589 	{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{1,3}M", "aaaaaaaabbbbaabbbbm" },
590 	{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}?M", "aaaaaabbbbbbaabbbbbbbbbbm" },
591 	{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{2,3}?", "aaaabbbbaaaabbbbbbbbbb" },
592 
593 	/* Assertions. */
594 	{ MU, A, 0, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
595 	{ MU, A, 0, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
596 	{ MU, A, 0, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
597 	{ MU, A, 0, 0, "(?<=aaa|aa|a)a", "aaa" },
598 	{ MU, A, 0, 2, "(?<=aaa|aa|a)a", "aaa" },
599 	{ M, A, 0, 0, "(?<=aaa|aa|a)a", "aaa" },
600 	{ M, A, 0, 2, "(?<=aaa|aa|a)a", "aaa" },
601 	{ MU, A, 0, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
602 	{ MU, A, 0, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
603 	{ MU, A, 0, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
604 	{ MU, A, 0, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
605 	{ MU, A, 0, 0, "((?(?=(a))a)+k)", "bbak" },
606 	{ MU, A, 0, 0, "((?(?=a)a)+k)", "bbak" },
607 	{ MU, A, 0, 0 | F_NOMATCH, "(?=(?>(a))m)amk", "a k" },
608 	{ MU, A, 0, 0 | F_NOMATCH, "(?!(?>(a))m)amk", "a k" },
609 	{ MU, A, 0, 0 | F_NOMATCH, "(?>(?=(a))am)amk", "a k" },
610 	{ MU, A, 0, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
611 	{ MU, A, 0, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
612 	{ MU, A, 0, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
613 	{ MU, A, 0, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
614 	{ MU, A, 0, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
615 	{ MU, A, 0, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
616 	{ MU, A, 0, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
617 	{ MU, A, 0, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
618 	{ MU, A, 0, 0, "a(?=(?C)\\B(?C`x`))b", "ab" },
619 	{ MU, A, 0, 0, "a(?!(?C)\\B(?C`x`))bb|ab", "abb" },
620 	{ MU, A, 0, 0, "a(?=\\b|(?C)\\B(?C`x`))b", "ab" },
621 	{ MU, A, 0, 0, "a(?!\\b|(?C)\\B(?C`x`))bb|ab", "abb" },
622 	{ MU, A, 0, 0, "c(?(?=(?C)\\B(?C`x`))ab|a)", "cab" },
623 	{ MU, A, 0, 0, "c(?(?!(?C)\\B(?C`x`))ab|a)", "cab" },
624 	{ MU, A, 0, 0, "c(?(?=\\b|(?C)\\B(?C`x`))ab|a)", "cab" },
625 	{ MU, A, 0, 0, "c(?(?!\\b|(?C)\\B(?C`x`))ab|a)", "cab" },
626 	{ MU, A, 0, 0, "a(?=)b", "ab" },
627 	{ MU, A, 0, 0 | F_NOMATCH, "a(?!)b", "ab" },
628 
629 	/* Not empty, ACCEPT, FAIL */
630 	{ MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "a*", "bcx" },
631 	{ MU, A, PCRE2_NOTEMPTY, 0, "a*", "bcaad" },
632 	{ MU, A, PCRE2_NOTEMPTY, 0, "a*?", "bcaad" },
633 	{ MU, A, PCRE2_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
634 	{ MU, A, 0, 0, "a(*ACCEPT)b", "ab" },
635 	{ MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "a*(*ACCEPT)b", "bcx" },
636 	{ MU, A, PCRE2_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
637 	{ MU, A, PCRE2_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
638 	{ MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "(?:z|a*(*ACCEPT)b)", "bcx" },
639 	{ MU, A, PCRE2_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
640 	{ MU, A, PCRE2_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
641 	{ MU, A, PCRE2_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
642 	{ MU, A, PCRE2_NOTEMPTY_ATSTART, 0 | F_NOMATCH, "a*(*ACCEPT)b", "" },
643 	{ MU, A, 0, 0, "((a(*ACCEPT)b))", "ab" },
644 	{ MU, A, 0, 0, "(a(*FAIL)a|a)", "aaa" },
645 	{ MU, A, 0, 0, "(?=ab(*ACCEPT)b)a", "ab" },
646 	{ MU, A, 0, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
647 	{ MU, A, 0, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
648 	{ MU, A, PCRE2_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
649 	{ MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "(?=A)", "AB" },
650 
651 	/* Conditional blocks. */
652 	{ MU, A, 0, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
653 	{ MU, A, 0, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
654 	{ MU, A, 0, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
655 	{ MU, A, 0, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
656 	{ MU, A, 0, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
657 	{ MU, A, 0, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
658 	{ MU, A, 0, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
659 	{ MU, A, 0, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
660 	{ MU, A, 0, 0 | F_DIFF, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
661 	{ MU, A, 0, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
662 	{ MU, A, 0, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
663 	{ MU, A, 0, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
664 	{ MU, A, 0, 0, "(?(?=a)ab)", "a" },
665 	{ MU, A, 0, 0, "(?(?<!b)c)", "b" },
666 	{ MU, A, 0, 0, "(?(DEFINE)a(b))", "a" },
667 	{ MU, A, 0, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
668 	{ MU, A, 0, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
669 	{ MU, A, 0, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
670 	{ MU, A, 0, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
671 	{ MU, A, 0, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
672 	{ MU, A, 0, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
673 	{ MU, A, 0, 0, "(c)?\?(?(1)a|b)", "cbb" },
674 	{ MU, A, 0, 0 | F_DIFF, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
675 	{ MU, A, 0, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
676 	{ MU, A, 0, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
677 	{ MU, A, 0, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
678 	{ MU, A, 0, 0 | F_DIFF, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
679 	{ MU, A, 0, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
680 	{ MU, A, 0, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
681 	{ MU, A, 0, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
682 	{ MU, A, 0, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
683 	{ MU, A, 0, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)*l", "bc ddd abccabccl" },
684 	{ MU, A, 0, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+?dd", "bcabcacdb bdddd" },
685 	{ MU, A, 0, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+l", "ababccddabdbccd abcccl" },
686 	{ MU, A, 0, 0, "((?:a|aa)(?(1)aaa))x", "aax" },
687 	{ MU, A, 0, 0, "(?(?!)a|b)", "ab" },
688 	{ MU, A, 0, 0, "(?(?!)a)", "ab" },
689 	{ MU, A, 0, 0 | F_NOMATCH, "(?(?!)a|b)", "ac" },
690 
691 	/* Set start of match. */
692 	{ MU, A, 0, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
693 	{ MU, A, 0, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
694 	{ MU, A, 0, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
695 	{ MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "a\\K(*ACCEPT)b", "aa" },
696 	{ MU, A, PCRE2_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
697 
698 	/* First line. */
699 	{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_PROPERTY, "\\p{Any}a", "bb\naaa" },
700 	{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}a", "bb\r\naaa" },
701 	{ MU | PCRE2_FIRSTLINE, A, 0, 0, "(?<=a)", "a" },
702 	{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "[^a][^b]", "ab" },
703 	{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "a", "\na" },
704 	{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "[abc]", "\na" },
705 	{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "^a", "\na" },
706 	{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "^(?<=\n)", "\na" },
707 	{ MU | PCRE2_FIRSTLINE, A, 0, 0, "\xf0\x90\x90\x80", "\xf0\x90\x90\x80" },
708 	{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_ANY, 0, 0 | F_NOMATCH, "#", "\xc2\x85#" },
709 	{ M | PCRE2_FIRSTLINE, PCRE2_NEWLINE_ANY, 0, 0 | F_NOMATCH, "#", "\x85#" },
710 	{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_ANY, 0, 0 | F_NOMATCH, "^#", "\xe2\x80\xa8#" },
711 	{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0 | F_PROPERTY, "\\p{Any}", "\r\na" },
712 	{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0, ".", "\r" },
713 	{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0, "a", "\ra" },
714 	{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0 | F_NOMATCH, "ba", "bbb\r\nba" },
715 	{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}{4}|a", "\r\na" },
716 	{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 1, ".", "\r\n" },
717 	{ PCRE2_FIRSTLINE | PCRE2_DOTALL, PCRE2_NEWLINE_LF, 0, 0 | F_NOMATCH, "ab.", "ab" },
718 	{ MU | PCRE2_FIRSTLINE, A, 0, 1 | F_NOMATCH, "^[a-d0-9]", "\nxx\nd" },
719 	{ PCRE2_FIRSTLINE | PCRE2_DOTALL, PCRE2_NEWLINE_ANY, 0, 0, "....a", "012\n0a" },
720 	{ MU | PCRE2_FIRSTLINE, A, 0, 0, "[aC]", "a" },
721 
722 	/* Recurse. */
723 	{ MU, A, 0, 0, "(a)(?1)", "aa" },
724 	{ MU, A, 0, 0, "((a))(?1)", "aa" },
725 	{ MU, A, 0, 0, "(b|a)(?1)", "aa" },
726 	{ MU, A, 0, 0, "(b|(a))(?1)", "aa" },
727 	{ MU, A, 0, 0 | F_NOMATCH, "((a)(b)(?:a*))(?1)", "aba" },
728 	{ MU, A, 0, 0, "((a)(b)(?:a*))(?1)", "abab" },
729 	{ MU, A, 0, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
730 	{ MU, A, 0, 0, "((?2)b|(a)){2}(?1)", "aabab" },
731 	{ MU, A, 0, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
732 	{ MU, A, 0, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
733 	{ MU, A, 0, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
734 	{ MU, A, 0, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
735 	{ MU, A, 0, 0, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
736 	{ MU, A, 0, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
737 	{ MU, A, 0, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
738 	{ MU, A, 0, 0 | F_NOMATCH, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
739 	{ MU, A, 0, 0, "b|<(?R)*>", "<<b>" },
740 	{ MU, A, 0, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
741 	{ MU, A, 0, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
742 	{ MU, A, 0, 0, "(a)((?(R)a|b))(?2)", "aabbabaa" },
743 	{ MU, A, 0, 0, "(a)((?(R2)a|b))(?2)", "aabbabaa" },
744 	{ MU, A, 0, 0, "(a)((?(R1)a|b))(?2)", "ababba" },
745 	{ MU, A, 0, 0, "(?(R0)aa|bb(?R))", "abba aabb bbaa" },
746 	{ MU, A, 0, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" },
747 	{ MU, A, 0, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" },
748 	{ MU, A, 0, 0, "((?(R)a|(?1)){3})", "XaaaaaaaaaX" },
749 	{ MU, A, 0, 0, "((?:(?(R)a|(?1))){3})", "XaaaaaaaaaX" },
750 	{ MU, A, 0, 0, "((?(R)a|(?1)){1,3})aaaaaa", "aaaaaaaaXaaaaaaaaa" },
751 	{ MU, A, 0, 0, "((?(R)a|(?1)){1,3}?)M", "aaaM" },
752 	{ MU, A, 0, 0, "((.)(?:.|\\2(?1))){0}#(?1)#", "#aabbccdde# #aabbccddee#" },
753 	{ MU, A, 0, 0, "((.)(?:\\2|\\2{4}b)){0}#(?:(?1))+#", "#aaaab# #aaaaab#" },
754 	{ MU, A, 0, 0 | F_NOMATCH, "(?1)$((.|\\2xx){1,2})", "abc" },
755 
756 	/* 16 bit specific tests. */
757 	{ CM, A, 0, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" },
758 	{ CM, A, 0, 0 | F_FORCECONV, "\xe1\xbd\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
759 	{ CM, A, 0, 0 | F_FORCECONV, "[\xc3\xa1]", "\xc3\x81\xc3\xa1" },
760 	{ CM, A, 0, 0 | F_FORCECONV, "[\xe1\xbd\xb8]", "\xe1\xbf\xb8\xe1\xbd\xb8" },
761 	{ CM, A, 0, 0 | F_FORCECONV, "[a-\xed\xb0\x80]", "A" },
762 	{ CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[a-\\x{dc00}]", "B" },
763 	{ CM, A, 0, 0 | F_NO8 | F_NOMATCH | F_FORCECONV, "[b-\\x{dc00}]", "a" },
764 	{ CM, A, 0, 0 | F_NO8 | F_FORCECONV, "\xed\xa0\x80\\x{d800}\xed\xb0\x80\\x{dc00}", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80" },
765 	{ CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[\xed\xa0\x80\\x{d800}]{1,2}?[\xed\xb0\x80\\x{dc00}]{1,2}?#", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80#" },
766 	{ CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80\xed\xb0\x80#]{0,3}(?<=\xed\xb0\x80.)", "\xed\xa0\x80#\xed\xa0\x80##\xed\xb0\x80\xed\xa0\x80" },
767 	{ CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\x9f\xbf\xed\xa0\x83" },
768 	{ CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\xb4\x80\xed\xb3\xb0" },
769 	{ CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" },
770 	{ CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" },
771 	{ CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" },
772 	{ CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80][\xed\xb0\x80]{2,}", "\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" },
773 	{ M, A, 0, 0 | F_FORCECONV, "[^\xed\xb0\x80]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
774 	{ M, A, 0, 0 | F_NO8 | F_FORCECONV, "[^\\x{dc00}]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
775 	{ CM, A, 0, 0 | F_FORCECONV, ".\\B.", "\xed\xa0\x80\xed\xb0\x80" },
776 	{ CM, A, 0, 0 | F_FORCECONV, "\\D+(?:\\d+|.)\\S+(?:\\s+|.)\\W+(?:\\w+|.)\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80" },
777 	{ CM, A, 0, 0 | F_FORCECONV, "\\d*\\s*\\w*\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80" },
778 	{ CM, A, 0, 0 | F_FORCECONV | F_NOMATCH, "\\d*?\\D*?\\s*?\\S*?\\w*?\\W*?##", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80#" },
779 	{ CM | PCRE2_EXTENDED, A, 0, 0 | F_FORCECONV, "\xed\xa0\x80 \xed\xb0\x80 !", "\xed\xa0\x80\xed\xb0\x80!" },
780 	{ CM, A, 0, 0 | F_FORCECONV, "\xed\xa0\x80+#[^#]+\xed\xa0\x80", "\xed\xa0\x80#a\xed\xa0\x80" },
781 	{ CM, A, 0, 0 | F_FORCECONV, "(\xed\xa0\x80+)#\\1", "\xed\xa0\x80\xed\xa0\x80#\xed\xa0\x80\xed\xa0\x80" },
782 	{ M, PCRE2_NEWLINE_ANY, 0, 0 | F_NO8 | F_FORCECONV, "^-", "a--\xe2\x80\xa8--" },
783 	{ 0, BSR(PCRE2_BSR_UNICODE), 0, 0 | F_NO8 | F_FORCECONV, "\\R", "ab\xe2\x80\xa8" },
784 	{ 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\v", "ab\xe2\x80\xa9" },
785 	{ 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\h", "ab\xe1\xa0\x8e" },
786 	{ 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\v+?\\V+?#", "\xe2\x80\xa9\xe2\x80\xa9\xef\xbf\xbf\xef\xbf\xbf#" },
787 	{ 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\h+?\\H+?#", "\xe1\xa0\x8e\xe1\xa0\x8e\xef\xbf\xbf\xef\xbf\xbf#" },
788 
789 	/* Partial matching. */
790 	{ MU, A, PCRE2_PARTIAL_SOFT, 0, "ab", "a" },
791 	{ MU, A, PCRE2_PARTIAL_SOFT, 0, "ab|a", "a" },
792 	{ MU, A, PCRE2_PARTIAL_HARD, 0, "ab|a", "a" },
793 	{ MU, A, PCRE2_PARTIAL_SOFT, 0, "\\b#", "a" },
794 	{ MU, A, PCRE2_PARTIAL_SOFT, 0, "(?<=a)b", "a" },
795 	{ MU, A, PCRE2_PARTIAL_SOFT, 0, "abc|(?<=xxa)bc", "xxab" },
796 	{ MU, A, PCRE2_PARTIAL_SOFT, 0, "a\\B", "a" },
797 	{ MU, A, PCRE2_PARTIAL_HARD, 0, "a\\b", "a" },
798 
799 	/* (*MARK) verb. */
800 	{ MU, A, 0, 0, "a(*MARK:aa)a", "ababaa" },
801 	{ MU, A, 0, 0 | F_NOMATCH, "a(*:aa)a", "abab" },
802 	{ MU, A, 0, 0, "a(*:aa)(b(*:bb)b|bc)", "abc" },
803 	{ MU, A, 0, 0 | F_NOMATCH, "a(*:1)x|b(*:2)y", "abc" },
804 	{ MU, A, 0, 0, "(?>a(*:aa))b|ac", "ac" },
805 	{ MU, A, 0, 0, "(?(DEFINE)(a(*:aa)))(?1)", "a" },
806 	{ MU, A, 0, 0 | F_NOMATCH, "(?(DEFINE)((a)(*:aa)))(?1)b", "aa" },
807 	{ MU, A, 0, 0, "(?(DEFINE)(a(*:aa)))a(?1)b|aac", "aac" },
808 	{ MU, A, 0, 0, "(a(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
809 	{ MU, A, 0, 0, "(a(*:aa)){0}(?:b(?1)b)+", "babba" },
810 	{ MU, A, 0, 0 | F_NOMATCH, "(a(*:aa)){0}(?:b(?1)b)+", "ba" },
811 	{ MU, A, 0, 0, "(a\\K(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
812 	{ MU, A, 0, 0, "(a\\K(*:aa)){0}(?:b(?1)b)+", "babba" },
813 	{ MU, A, 0, 0 | F_NOMATCH, "(a\\K(*:aa)){0}(?:b(?1)b)+", "ba" },
814 	{ MU, A, 0, 0 | F_NOMATCH, "(*:mark)m", "a" },
815 
816 	/* (*COMMIT) verb. */
817 	{ MU, A, 0, 0 | F_NOMATCH, "a(*COMMIT)b", "ac" },
818 	{ MU, A, 0, 0, "aa(*COMMIT)b", "xaxaab" },
819 	{ MU, A, 0, 0 | F_NOMATCH, "a(*COMMIT)(*:msg)b|ac", "ac" },
820 	{ MU, A, 0, 0 | F_NOMATCH, "(a(*COMMIT)b)++", "abac" },
821 	{ MU, A, 0, 0 | F_NOMATCH, "((a)(*COMMIT)b)++", "abac" },
822 	{ MU, A, 0, 0 | F_NOMATCH, "(?=a(*COMMIT)b)ab|ad", "ad" },
823 
824 	/* (*PRUNE) verb. */
825 	{ MU, A, 0, 0, "aa\\K(*PRUNE)b", "aaab" },
826 	{ MU, A, 0, 0, "aa(*PRUNE:bb)b|a", "aa" },
827 	{ MU, A, 0, 0, "(a)(a)(*PRUNE)b|(a)", "aa" },
828 	{ MU, A, 0, 0, "(a)(a)(a)(a)(a)(a)(a)(a)(*PRUNE)b|(a)", "aaaaaaaa" },
829 	{ MU, A, PCRE2_PARTIAL_SOFT, 0, "a(*PRUNE)a|", "a" },
830 	{ MU, A, PCRE2_PARTIAL_SOFT, 0, "a(*PRUNE)a|m", "a" },
831 	{ MU, A, 0, 0 | F_NOMATCH, "(?=a(*PRUNE)b)ab|ad", "ad" },
832 	{ MU, A, 0, 0, "a(*COMMIT)(*PRUNE)d|bc", "abc" },
833 	{ MU, A, 0, 0, "(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
834 	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
835 	{ MU, A, 0, 0, "(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
836 	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
837 	{ MU, A, 0, 0, "(a(*COMMIT)b){0}a(?1)(*PRUNE)c|bc", "abc" },
838 	{ MU, A, 0, 0 | F_NOMATCH, "(a(*COMMIT)b){0}a(*COMMIT)(?1)(*PRUNE)c|bc", "abc" },
839 	{ MU, A, 0, 0, "(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
840 	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
841 	{ MU, A, 0, 0, "((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
842 	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
843 	{ MU, A, 0, 0, "(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" },
844 	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" },
845 	{ MU, A, 0, 0, "(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" },
846 	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" },
847 	{ MU, A, 0, 0, "(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" },
848 	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" },
849 	{ MU, A, 0, 0, "(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" },
850 	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" },
851 	{ MU, A, 0, 0, "(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" },
852 	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" },
853 	{ MU, A, 0, 0, "(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" },
854 	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" },
855 
856 	/* (*SKIP) verb. */
857 	{ MU, A, 0, 0 | F_NOMATCH, "(?=a(*SKIP)b)ab|ad", "ad" },
858 	{ MU, A, 0, 0, "(\\w+(*SKIP)#)", "abcd,xyz#," },
859 	{ MU, A, 0, 0, "\\w+(*SKIP)#|mm", "abcd,xyz#," },
860 	{ MU, A, 0, 0 | F_NOMATCH, "b+(?<=(*SKIP)#c)|b+", "#bbb" },
861 
862 	/* (*THEN) verb. */
863 	{ MU, A, 0, 0, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcaabcaabcaabcnacm" },
864 	{ MU, A, 0, 0 | F_NOMATCH, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcm" },
865 	{ MU, A, 0, 0, "((?:a(*THEN)|aab)c|a+)+m", "aabcaabcnmaabcaabcm" },
866 	{ MU, A, 0, 0, "((?:a|aab)(*THEN)c|a+)+m", "aam" },
867 	{ MU, A, 0, 0, "((?:a(*COMMIT)|aab)(*THEN)c|a+)+m", "aam" },
868 	{ MU, A, 0, 0, "(?(?=a(*THEN)b)ab|ad)", "ad" },
869 	{ MU, A, 0, 0, "(?(?!a(*THEN)b)ad|add)", "add" },
870 	{ MU, A, 0, 0 | F_NOMATCH, "(?(?=a)a(*THEN)b|ad)", "ad" },
871 	{ MU, A, 0, 0, "(?!(?(?=a)ab|b(*THEN)d))bn|bnn", "bnn" },
872 	{ MU, A, 0, 0, "(?=(*THEN: ))* ", " " },
873 	{ MU, A, 0, 0, "a(*THEN)(?R) |", "a" },
874 
875 	/* Recurse and control verbs. */
876 	{ MU, A, 0, 0, "(a(*ACCEPT)b){0}a(?1)b", "aacaabb" },
877 	{ MU, A, 0, 0, "((a)\\2(*ACCEPT)b){0}a(?1)b", "aaacaaabb" },
878 	{ MU, A, 0, 0, "((ab|a(*ACCEPT)x)+|ababababax){0}_(?1)_", "_ababababax_ _ababababa_" },
879 	{ MU, A, 0, 0, "((.)(?:A(*ACCEPT)|(?1)\\2)){0}_(?1)_", "_bcdaAdcb_bcdaAdcb_" },
880 	{ MU, A, 0, 0, "((*MARK:m)(?:a|a(*COMMIT)b|aa)){0}_(?1)_", "_ab_" },
881 	{ MU, A, 0, 0, "((*MARK:m)(?:a|a(*COMMIT)b|aa)){0}_(?1)_|(_aa_)", "_aa_" },
882 	{ MU, A, 0, 0, "(a(*COMMIT)(?:b|bb)|c(*ACCEPT)d|dd){0}_(?1)+_", "_ax_ _cd_ _abbb_ _abcd_ _abbcdd_" },
883 	{ MU, A, 0, 0, "((.)(?:.|(*COMMIT)\\2{3}(*ACCEPT).*|.*)){0}_(?1){0,4}_", "_aaaabbbbccccddd_ _aaaabbbbccccdddd_" },
884 
885 #ifdef SUPPORT_UNICODE
886 	/* Script runs and iterations. */
887 	{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)*#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
888 	{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)+#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
889 	{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)*?#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
890 	{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)+?#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
891 	{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)*+#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
892 	{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)++#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
893 	{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)?#", "!ab!abc!ab!ab#" },
894 	{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)??#", "!ab!abc!ab!ab#" },
895 #endif
896 
897 	/* Deep recursion. */
898 	{ MU, A, 0, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
899 	{ MU, A, 0, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
900 	{ MU, A, 0, 0, "((a?)+)+b", "aaaaaaaaaaaa b" },
901 
902 	/* Deep recursion: Stack limit reached. */
903 	{ M, A, 0, 0 | F_NOMATCH, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
904 	{ M, A, 0, 0 | F_NOMATCH, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
905 	{ M, A, 0, 0 | F_NOMATCH, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
906 	{ M, A, 0, 0 | F_NOMATCH, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
907 	{ M, A, 0, 0 | F_NOMATCH, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
908 
909 	{ 0, 0, 0, 0, NULL, NULL }
910 };
911 
912 #ifdef SUPPORT_PCRE2_8
callback8(void * arg)913 static pcre2_jit_stack_8* callback8(void *arg)
914 {
915 	return (pcre2_jit_stack_8 *)arg;
916 }
917 #endif
918 
919 #ifdef SUPPORT_PCRE2_16
callback16(void * arg)920 static pcre2_jit_stack_16* callback16(void *arg)
921 {
922 	return (pcre2_jit_stack_16 *)arg;
923 }
924 #endif
925 
926 #ifdef SUPPORT_PCRE2_32
callback32(void * arg)927 static pcre2_jit_stack_32* callback32(void *arg)
928 {
929 	return (pcre2_jit_stack_32 *)arg;
930 }
931 #endif
932 
933 #ifdef SUPPORT_PCRE2_8
934 static pcre2_jit_stack_8 *stack8;
935 
getstack8(void)936 static pcre2_jit_stack_8 *getstack8(void)
937 {
938 	if (!stack8)
939 		stack8 = pcre2_jit_stack_create_8(1, 1024 * 1024, NULL);
940 	return stack8;
941 }
942 
setstack8(pcre2_match_context_8 * mcontext)943 static void setstack8(pcre2_match_context_8 *mcontext)
944 {
945 	if (!mcontext) {
946 		if (stack8)
947 			pcre2_jit_stack_free_8(stack8);
948 		stack8 = NULL;
949 		return;
950 	}
951 
952 	pcre2_jit_stack_assign_8(mcontext, callback8, getstack8());
953 }
954 #endif /* SUPPORT_PCRE2_8 */
955 
956 #ifdef SUPPORT_PCRE2_16
957 static pcre2_jit_stack_16 *stack16;
958 
getstack16(void)959 static pcre2_jit_stack_16 *getstack16(void)
960 {
961 	if (!stack16)
962 		stack16 = pcre2_jit_stack_create_16(1, 1024 * 1024, NULL);
963 	return stack16;
964 }
965 
setstack16(pcre2_match_context_16 * mcontext)966 static void setstack16(pcre2_match_context_16 *mcontext)
967 {
968 	if (!mcontext) {
969 		if (stack16)
970 			pcre2_jit_stack_free_16(stack16);
971 		stack16 = NULL;
972 		return;
973 	}
974 
975 	pcre2_jit_stack_assign_16(mcontext, callback16, getstack16());
976 }
977 #endif /* SUPPORT_PCRE2_16 */
978 
979 #ifdef SUPPORT_PCRE2_32
980 static pcre2_jit_stack_32 *stack32;
981 
getstack32(void)982 static pcre2_jit_stack_32 *getstack32(void)
983 {
984 	if (!stack32)
985 		stack32 = pcre2_jit_stack_create_32(1, 1024 * 1024, NULL);
986 	return stack32;
987 }
988 
setstack32(pcre2_match_context_32 * mcontext)989 static void setstack32(pcre2_match_context_32 *mcontext)
990 {
991 	if (!mcontext) {
992 		if (stack32)
993 			pcre2_jit_stack_free_32(stack32);
994 		stack32 = NULL;
995 		return;
996 	}
997 
998 	pcre2_jit_stack_assign_32(mcontext, callback32, getstack32());
999 }
1000 #endif /* SUPPORT_PCRE2_32 */
1001 
1002 #ifdef SUPPORT_PCRE2_16
1003 
convert_utf8_to_utf16(PCRE2_SPTR8 input,PCRE2_UCHAR16 * output,int * offsetmap,int max_length)1004 static int convert_utf8_to_utf16(PCRE2_SPTR8 input, PCRE2_UCHAR16 *output, int *offsetmap, int max_length)
1005 {
1006 	PCRE2_SPTR8 iptr = input;
1007 	PCRE2_UCHAR16 *optr = output;
1008 	unsigned int c;
1009 
1010 	if (max_length == 0)
1011 		return 0;
1012 
1013 	while (*iptr && max_length > 1) {
1014 		c = 0;
1015 		if (offsetmap)
1016 			*offsetmap++ = (int)(iptr - (unsigned char*)input);
1017 
1018 		if (*iptr < 0xc0)
1019 			c = *iptr++;
1020 		else if (!(*iptr & 0x20)) {
1021 			c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
1022 			iptr += 2;
1023 		} else if (!(*iptr & 0x10)) {
1024 			c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
1025 			iptr += 3;
1026 		} else if (!(*iptr & 0x08)) {
1027 			c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
1028 			iptr += 4;
1029 		}
1030 
1031 		if (c < 65536) {
1032 			*optr++ = c;
1033 			max_length--;
1034 		} else if (max_length <= 2) {
1035 			*optr = '\0';
1036 			return (int)(optr - output);
1037 		} else {
1038 			c -= 0x10000;
1039 			*optr++ = 0xd800 | ((c >> 10) & 0x3ff);
1040 			*optr++ = 0xdc00 | (c & 0x3ff);
1041 			max_length -= 2;
1042 			if (offsetmap)
1043 				offsetmap++;
1044 		}
1045 	}
1046 	if (offsetmap)
1047 		*offsetmap = (int)(iptr - (unsigned char*)input);
1048 	*optr = '\0';
1049 	return (int)(optr - output);
1050 }
1051 
copy_char8_to_char16(PCRE2_SPTR8 input,PCRE2_UCHAR16 * output,int max_length)1052 static int copy_char8_to_char16(PCRE2_SPTR8 input, PCRE2_UCHAR16 *output, int max_length)
1053 {
1054 	PCRE2_SPTR8 iptr = input;
1055 	PCRE2_UCHAR16 *optr = output;
1056 
1057 	if (max_length == 0)
1058 		return 0;
1059 
1060 	while (*iptr && max_length > 1) {
1061 		*optr++ = *iptr++;
1062 		max_length--;
1063 	}
1064 	*optr = '\0';
1065 	return (int)(optr - output);
1066 }
1067 
1068 #define REGTEST_MAX_LENGTH16 4096
1069 static PCRE2_UCHAR16 regtest_buf16[REGTEST_MAX_LENGTH16];
1070 static int regtest_offsetmap16[REGTEST_MAX_LENGTH16];
1071 
1072 #endif /* SUPPORT_PCRE2_16 */
1073 
1074 #ifdef SUPPORT_PCRE2_32
1075 
convert_utf8_to_utf32(PCRE2_SPTR8 input,PCRE2_UCHAR32 * output,int * offsetmap,int max_length)1076 static int convert_utf8_to_utf32(PCRE2_SPTR8 input, PCRE2_UCHAR32 *output, int *offsetmap, int max_length)
1077 {
1078 	PCRE2_SPTR8 iptr = input;
1079 	PCRE2_UCHAR32 *optr = output;
1080 	unsigned int c;
1081 
1082 	if (max_length == 0)
1083 		return 0;
1084 
1085 	while (*iptr && max_length > 1) {
1086 		c = 0;
1087 		if (offsetmap)
1088 			*offsetmap++ = (int)(iptr - (unsigned char*)input);
1089 
1090 		if (*iptr < 0xc0)
1091 			c = *iptr++;
1092 		else if (!(*iptr & 0x20)) {
1093 			c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
1094 			iptr += 2;
1095 		} else if (!(*iptr & 0x10)) {
1096 			c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
1097 			iptr += 3;
1098 		} else if (!(*iptr & 0x08)) {
1099 			c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
1100 			iptr += 4;
1101 		}
1102 
1103 		*optr++ = c;
1104 		max_length--;
1105 	}
1106 	if (offsetmap)
1107 		*offsetmap = (int)(iptr - (unsigned char*)input);
1108 	*optr = 0;
1109 	return (int)(optr - output);
1110 }
1111 
copy_char8_to_char32(PCRE2_SPTR8 input,PCRE2_UCHAR32 * output,int max_length)1112 static int copy_char8_to_char32(PCRE2_SPTR8 input, PCRE2_UCHAR32 *output, int max_length)
1113 {
1114 	PCRE2_SPTR8 iptr = input;
1115 	PCRE2_UCHAR32 *optr = output;
1116 
1117 	if (max_length == 0)
1118 		return 0;
1119 
1120 	while (*iptr && max_length > 1) {
1121 		*optr++ = *iptr++;
1122 		max_length--;
1123 	}
1124 	*optr = '\0';
1125 	return (int)(optr - output);
1126 }
1127 
1128 #define REGTEST_MAX_LENGTH32 4096
1129 static PCRE2_UCHAR32 regtest_buf32[REGTEST_MAX_LENGTH32];
1130 static int regtest_offsetmap32[REGTEST_MAX_LENGTH32];
1131 
1132 #endif /* SUPPORT_PCRE2_32 */
1133 
check_ascii(const char * input)1134 static int check_ascii(const char *input)
1135 {
1136 	const unsigned char *ptr = (unsigned char *)input;
1137 	while (*ptr) {
1138 		if (*ptr > 127)
1139 			return 0;
1140 		ptr++;
1141 	}
1142 	return 1;
1143 }
1144 
1145 #define OVECTOR_SIZE 15
1146 
regression_tests(void)1147 static int regression_tests(void)
1148 {
1149 	struct regression_test_case *current = regression_test_cases;
1150 	int error;
1151 	PCRE2_SIZE err_offs;
1152 	int is_successful;
1153 	int is_ascii;
1154 	int total = 0;
1155 	int successful = 0;
1156 	int successful_row = 0;
1157 	int counter = 0;
1158 	int jit_compile_mode;
1159 	int utf = 0;
1160 	int disabled_options = 0;
1161 	int i;
1162 #ifdef SUPPORT_PCRE2_8
1163 	pcre2_code_8 *re8;
1164 	pcre2_compile_context_8 *ccontext8;
1165 	pcre2_match_data_8 *mdata8_1;
1166 	pcre2_match_data_8 *mdata8_2;
1167 	pcre2_match_context_8 *mcontext8;
1168 	PCRE2_SIZE *ovector8_1 = NULL;
1169 	PCRE2_SIZE *ovector8_2 = NULL;
1170 	int return_value8[2];
1171 #endif
1172 #ifdef SUPPORT_PCRE2_16
1173 	pcre2_code_16 *re16;
1174 	pcre2_compile_context_16 *ccontext16;
1175 	pcre2_match_data_16 *mdata16_1;
1176 	pcre2_match_data_16 *mdata16_2;
1177 	pcre2_match_context_16 *mcontext16;
1178 	PCRE2_SIZE *ovector16_1 = NULL;
1179 	PCRE2_SIZE *ovector16_2 = NULL;
1180 	int return_value16[2];
1181 	int length16;
1182 #endif
1183 #ifdef SUPPORT_PCRE2_32
1184 	pcre2_code_32 *re32;
1185 	pcre2_compile_context_32 *ccontext32;
1186 	pcre2_match_data_32 *mdata32_1;
1187 	pcre2_match_data_32 *mdata32_2;
1188 	pcre2_match_context_32 *mcontext32;
1189 	PCRE2_SIZE *ovector32_1 = NULL;
1190 	PCRE2_SIZE *ovector32_2 = NULL;
1191 	int return_value32[2];
1192 	int length32;
1193 #endif
1194 
1195 #if defined SUPPORT_PCRE2_8
1196 	PCRE2_UCHAR8 cpu_info[128];
1197 #elif defined SUPPORT_PCRE2_16
1198 	PCRE2_UCHAR16 cpu_info[128];
1199 #elif defined SUPPORT_PCRE2_32
1200 	PCRE2_UCHAR32 cpu_info[128];
1201 #endif
1202 #if defined SUPPORT_UNICODE && ((defined(SUPPORT_PCRE2_8) + defined(SUPPORT_PCRE2_16) + defined(SUPPORT_PCRE2_32)) >= 2)
1203 	int return_value;
1204 #endif
1205 
1206 	/* This test compares the behaviour of interpreter and JIT. Although disabling
1207 	utf or ucp may make tests fail, if the pcre2_match result is the SAME, it is
1208 	still considered successful from pcre2_jit_test point of view. */
1209 
1210 #if defined SUPPORT_PCRE2_8
1211 	pcre2_config_8(PCRE2_CONFIG_JITTARGET, &cpu_info);
1212 #elif defined SUPPORT_PCRE2_16
1213 	pcre2_config_16(PCRE2_CONFIG_JITTARGET, &cpu_info);
1214 #elif defined SUPPORT_PCRE2_32
1215 	pcre2_config_32(PCRE2_CONFIG_JITTARGET, &cpu_info);
1216 #endif
1217 
1218 	printf("Running JIT regression tests\n");
1219 	printf("  target CPU of SLJIT compiler: ");
1220 	for (i = 0; cpu_info[i]; i++)
1221 		printf("%c", (char)(cpu_info[i]));
1222 	printf("\n");
1223 
1224 #if defined SUPPORT_PCRE2_8
1225 	pcre2_config_8(PCRE2_CONFIG_UNICODE, &utf);
1226 #elif defined SUPPORT_PCRE2_16
1227 	pcre2_config_16(PCRE2_CONFIG_UNICODE, &utf);
1228 #elif defined SUPPORT_PCRE2_32
1229 	pcre2_config_32(PCRE2_CONFIG_UNICODE, &utf);
1230 #endif
1231 
1232 	if (!utf)
1233 		disabled_options |= PCRE2_UTF;
1234 #ifdef SUPPORT_PCRE2_8
1235 	printf("  in  8 bit mode with UTF-8  %s:\n", utf ? "enabled" : "disabled");
1236 #endif
1237 #ifdef SUPPORT_PCRE2_16
1238 	printf("  in 16 bit mode with UTF-16 %s:\n", utf ? "enabled" : "disabled");
1239 #endif
1240 #ifdef SUPPORT_PCRE2_32
1241 	printf("  in 32 bit mode with UTF-32 %s:\n", utf ? "enabled" : "disabled");
1242 #endif
1243 
1244 	while (current->pattern) {
1245 		/* printf("\nPattern: %s :\n", current->pattern); */
1246 		total++;
1247 		is_ascii = 0;
1248 		if (!(current->start_offset & F_PROPERTY))
1249 			is_ascii = check_ascii(current->pattern) && check_ascii(current->input);
1250 
1251 		if (current->match_options & PCRE2_PARTIAL_SOFT)
1252 			jit_compile_mode = PCRE2_JIT_PARTIAL_SOFT;
1253 		else if (current->match_options & PCRE2_PARTIAL_HARD)
1254 			jit_compile_mode = PCRE2_JIT_PARTIAL_HARD;
1255 		else
1256 			jit_compile_mode = PCRE2_JIT_COMPLETE;
1257 		error = 0;
1258 #ifdef SUPPORT_PCRE2_8
1259 		re8 = NULL;
1260 		ccontext8 = pcre2_compile_context_create_8(NULL);
1261 		if (ccontext8) {
1262 			if (GET_NEWLINE(current->newline))
1263 				pcre2_set_newline_8(ccontext8, GET_NEWLINE(current->newline));
1264 			if (GET_BSR(current->newline))
1265 				pcre2_set_bsr_8(ccontext8, GET_BSR(current->newline));
1266 
1267 			if (!(current->start_offset & F_NO8)) {
1268 				re8 = pcre2_compile_8((PCRE2_SPTR8)current->pattern, PCRE2_ZERO_TERMINATED,
1269 					current->compile_options & ~disabled_options,
1270 					&error, &err_offs, ccontext8);
1271 
1272 				if (!re8 && (utf || is_ascii))
1273 					printf("\n8 bit: Cannot compile pattern \"%s\": %d\n", current->pattern, error);
1274 			}
1275 			pcre2_compile_context_free_8(ccontext8);
1276 		}
1277 		else
1278 			printf("\n8 bit: Cannot allocate compile context\n");
1279 #endif
1280 #ifdef SUPPORT_PCRE2_16
1281 		if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
1282 			convert_utf8_to_utf16((PCRE2_SPTR8)current->pattern, regtest_buf16, NULL, REGTEST_MAX_LENGTH16);
1283 		else
1284 			copy_char8_to_char16((PCRE2_SPTR8)current->pattern, regtest_buf16, REGTEST_MAX_LENGTH16);
1285 
1286 		re16 = NULL;
1287 		ccontext16 = pcre2_compile_context_create_16(NULL);
1288 		if (ccontext16) {
1289 			if (GET_NEWLINE(current->newline))
1290 				pcre2_set_newline_16(ccontext16, GET_NEWLINE(current->newline));
1291 			if (GET_BSR(current->newline))
1292 				pcre2_set_bsr_16(ccontext16, GET_BSR(current->newline));
1293 
1294 			if (!(current->start_offset & F_NO16)) {
1295 				re16 = pcre2_compile_16(regtest_buf16, PCRE2_ZERO_TERMINATED,
1296 					current->compile_options & ~disabled_options,
1297 					&error, &err_offs, ccontext16);
1298 
1299 				if (!re16 && (utf || is_ascii))
1300 					printf("\n16 bit: Cannot compile pattern \"%s\": %d\n", current->pattern, error);
1301 			}
1302 			pcre2_compile_context_free_16(ccontext16);
1303 		}
1304 		else
1305 			printf("\n16 bit: Cannot allocate compile context\n");
1306 #endif
1307 #ifdef SUPPORT_PCRE2_32
1308 		if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
1309 			convert_utf8_to_utf32((PCRE2_SPTR8)current->pattern, regtest_buf32, NULL, REGTEST_MAX_LENGTH32);
1310 		else
1311 			copy_char8_to_char32((PCRE2_SPTR8)current->pattern, regtest_buf32, REGTEST_MAX_LENGTH32);
1312 
1313 		re32 = NULL;
1314 		ccontext32 = pcre2_compile_context_create_32(NULL);
1315 		if (ccontext32) {
1316 			if (GET_NEWLINE(current->newline))
1317 				pcre2_set_newline_32(ccontext32, GET_NEWLINE(current->newline));
1318 			if (GET_BSR(current->newline))
1319 				pcre2_set_bsr_32(ccontext32, GET_BSR(current->newline));
1320 
1321 			if (!(current->start_offset & F_NO32)) {
1322 				re32 = pcre2_compile_32(regtest_buf32, PCRE2_ZERO_TERMINATED,
1323 					current->compile_options & ~disabled_options,
1324 					&error, &err_offs, ccontext32);
1325 
1326 				if (!re32 && (utf || is_ascii))
1327 					printf("\n32 bit: Cannot compile pattern \"%s\": %d\n", current->pattern, error);
1328 			}
1329 			pcre2_compile_context_free_32(ccontext32);
1330 		}
1331 		else
1332 			printf("\n32 bit: Cannot allocate compile context\n");
1333 #endif
1334 
1335 		counter++;
1336 		if ((counter & 0x3) != 0) {
1337 #ifdef SUPPORT_PCRE2_8
1338 			setstack8(NULL);
1339 #endif
1340 #ifdef SUPPORT_PCRE2_16
1341 			setstack16(NULL);
1342 #endif
1343 #ifdef SUPPORT_PCRE2_32
1344 			setstack32(NULL);
1345 #endif
1346 		}
1347 
1348 #ifdef SUPPORT_PCRE2_8
1349 		return_value8[0] = -1000;
1350 		return_value8[1] = -1000;
1351 		mdata8_1 = pcre2_match_data_create_8(OVECTOR_SIZE, NULL);
1352 		mdata8_2 = pcre2_match_data_create_8(OVECTOR_SIZE, NULL);
1353 		mcontext8 = pcre2_match_context_create_8(NULL);
1354 		if (!mdata8_1 || !mdata8_2 || !mcontext8) {
1355 			printf("\n8 bit: Cannot allocate match data\n");
1356 			pcre2_match_data_free_8(mdata8_1);
1357 			pcre2_match_data_free_8(mdata8_2);
1358 			pcre2_match_context_free_8(mcontext8);
1359 			pcre2_code_free_8(re8);
1360 			re8 = NULL;
1361 		} else {
1362 			ovector8_1 = pcre2_get_ovector_pointer_8(mdata8_1);
1363 			ovector8_2 = pcre2_get_ovector_pointer_8(mdata8_2);
1364 			for (i = 0; i < OVECTOR_SIZE * 2; ++i)
1365 				ovector8_1[i] = -2;
1366 			for (i = 0; i < OVECTOR_SIZE * 2; ++i)
1367 				ovector8_2[i] = -2;
1368 			pcre2_set_match_limit_8(mcontext8, 10000000);
1369 		}
1370 		if (re8) {
1371 			return_value8[1] = pcre2_match_8(re8, (PCRE2_SPTR8)current->input, strlen(current->input),
1372 				current->start_offset & OFFSET_MASK, current->match_options, mdata8_2, mcontext8);
1373 
1374 			if (pcre2_jit_compile_8(re8, jit_compile_mode)) {
1375 				printf("\n8 bit: JIT compiler does not support \"%s\"\n", current->pattern);
1376 			} else if ((counter & 0x1) != 0) {
1377 				setstack8(mcontext8);
1378 				return_value8[0] = pcre2_match_8(re8, (PCRE2_SPTR8)current->input, strlen(current->input),
1379 					current->start_offset & OFFSET_MASK, current->match_options, mdata8_1, mcontext8);
1380 			} else {
1381 				pcre2_jit_stack_assign_8(mcontext8, NULL, getstack8());
1382 				return_value8[0] = pcre2_jit_match_8(re8, (PCRE2_SPTR8)current->input, strlen(current->input),
1383 					current->start_offset & OFFSET_MASK, current->match_options, mdata8_1, mcontext8);
1384 			}
1385 		}
1386 #endif
1387 
1388 #ifdef SUPPORT_PCRE2_16
1389 		return_value16[0] = -1000;
1390 		return_value16[1] = -1000;
1391 		mdata16_1 = pcre2_match_data_create_16(OVECTOR_SIZE, NULL);
1392 		mdata16_2 = pcre2_match_data_create_16(OVECTOR_SIZE, NULL);
1393 		mcontext16 = pcre2_match_context_create_16(NULL);
1394 		if (!mdata16_1 || !mdata16_2 || !mcontext16) {
1395 			printf("\n16 bit: Cannot allocate match data\n");
1396 			pcre2_match_data_free_16(mdata16_1);
1397 			pcre2_match_data_free_16(mdata16_2);
1398 			pcre2_match_context_free_16(mcontext16);
1399 			pcre2_code_free_16(re16);
1400 			re16 = NULL;
1401 		} else {
1402 			ovector16_1 = pcre2_get_ovector_pointer_16(mdata16_1);
1403 			ovector16_2 = pcre2_get_ovector_pointer_16(mdata16_2);
1404 			for (i = 0; i < OVECTOR_SIZE * 2; ++i)
1405 				ovector16_1[i] = -2;
1406 			for (i = 0; i < OVECTOR_SIZE * 2; ++i)
1407 				ovector16_2[i] = -2;
1408 			pcre2_set_match_limit_16(mcontext16, 10000000);
1409 		}
1410 		if (re16) {
1411 			if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
1412 				length16 = convert_utf8_to_utf16((PCRE2_SPTR8)current->input, regtest_buf16, regtest_offsetmap16, REGTEST_MAX_LENGTH16);
1413 			else
1414 				length16 = copy_char8_to_char16((PCRE2_SPTR8)current->input, regtest_buf16, REGTEST_MAX_LENGTH16);
1415 
1416 			return_value16[1] = pcre2_match_16(re16, regtest_buf16, length16,
1417 				current->start_offset & OFFSET_MASK, current->match_options, mdata16_2, mcontext16);
1418 
1419 			if (pcre2_jit_compile_16(re16, jit_compile_mode)) {
1420 				printf("\n16 bit: JIT compiler does not support \"%s\"\n", current->pattern);
1421 			} else if ((counter & 0x1) != 0) {
1422 				setstack16(mcontext16);
1423 				return_value16[0] = pcre2_match_16(re16, regtest_buf16, length16,
1424 					current->start_offset & OFFSET_MASK, current->match_options, mdata16_1, mcontext16);
1425 			} else {
1426 				pcre2_jit_stack_assign_16(mcontext16, NULL, getstack16());
1427 				return_value16[0] = pcre2_jit_match_16(re16, regtest_buf16, length16,
1428 					current->start_offset & OFFSET_MASK, current->match_options, mdata16_1, mcontext16);
1429 			}
1430 		}
1431 #endif
1432 
1433 #ifdef SUPPORT_PCRE2_32
1434 		return_value32[0] = -1000;
1435 		return_value32[1] = -1000;
1436 		mdata32_1 = pcre2_match_data_create_32(OVECTOR_SIZE, NULL);
1437 		mdata32_2 = pcre2_match_data_create_32(OVECTOR_SIZE, NULL);
1438 		mcontext32 = pcre2_match_context_create_32(NULL);
1439 		if (!mdata32_1 || !mdata32_2 || !mcontext32) {
1440 			printf("\n32 bit: Cannot allocate match data\n");
1441 			pcre2_match_data_free_32(mdata32_1);
1442 			pcre2_match_data_free_32(mdata32_2);
1443 			pcre2_match_context_free_32(mcontext32);
1444 			pcre2_code_free_32(re32);
1445 			re32 = NULL;
1446 		} else {
1447 			ovector32_1 = pcre2_get_ovector_pointer_32(mdata32_1);
1448 			ovector32_2 = pcre2_get_ovector_pointer_32(mdata32_2);
1449 			for (i = 0; i < OVECTOR_SIZE * 2; ++i)
1450 				ovector32_1[i] = -2;
1451 			for (i = 0; i < OVECTOR_SIZE * 2; ++i)
1452 				ovector32_2[i] = -2;
1453 			pcre2_set_match_limit_32(mcontext32, 10000000);
1454 		}
1455 		if (re32) {
1456 			if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
1457 				length32 = convert_utf8_to_utf32((PCRE2_SPTR8)current->input, regtest_buf32, regtest_offsetmap32, REGTEST_MAX_LENGTH32);
1458 			else
1459 				length32 = copy_char8_to_char32((PCRE2_SPTR8)current->input, regtest_buf32, REGTEST_MAX_LENGTH32);
1460 
1461 			return_value32[1] = pcre2_match_32(re32, regtest_buf32, length32,
1462 				current->start_offset & OFFSET_MASK, current->match_options, mdata32_2, mcontext32);
1463 
1464 			if (pcre2_jit_compile_32(re32, jit_compile_mode)) {
1465 				printf("\n32 bit: JIT compiler does not support \"%s\"\n", current->pattern);
1466 			} else if ((counter & 0x1) != 0) {
1467 				setstack32(mcontext32);
1468 				return_value32[0] = pcre2_match_32(re32, regtest_buf32, length32,
1469 					current->start_offset & OFFSET_MASK, current->match_options, mdata32_1, mcontext32);
1470 			} else {
1471 				pcre2_jit_stack_assign_32(mcontext32, NULL, getstack32());
1472 				return_value32[0] = pcre2_jit_match_32(re32, regtest_buf32, length32,
1473 					current->start_offset & OFFSET_MASK, current->match_options, mdata32_1, mcontext32);
1474 			}
1475 		}
1476 #endif
1477 
1478 		/* printf("[%d-%d-%d|%d-%d|%d-%d|%d-%d]%s",
1479 			return_value8[0], return_value16[0], return_value32[0],
1480 			(int)ovector8_1[0], (int)ovector8_1[1],
1481 			(int)ovector16_1[0], (int)ovector16_1[1],
1482 			(int)ovector32_1[0], (int)ovector32_1[1],
1483 			(current->compile_options & PCRE2_CASELESS) ? "C" : ""); */
1484 
1485 		/* If F_DIFF is set, just run the test, but do not compare the results.
1486 		Segfaults can still be captured. */
1487 
1488 		is_successful = 1;
1489 		if (!(current->start_offset & F_DIFF)) {
1490 #if defined SUPPORT_UNICODE && ((defined(SUPPORT_PCRE2_8) + defined(SUPPORT_PCRE2_16) + defined(SUPPORT_PCRE2_32)) >= 2)
1491 			if (!(current->start_offset & F_FORCECONV)) {
1492 
1493 				/* All results must be the same. */
1494 #ifdef SUPPORT_PCRE2_8
1495 				if ((return_value = return_value8[0]) != return_value8[1]) {
1496 					printf("\n8 bit: Return value differs(J8:%d,I8:%d): [%d] '%s' @ '%s'\n",
1497 						return_value8[0], return_value8[1], total, current->pattern, current->input);
1498 					is_successful = 0;
1499 				} else
1500 #endif
1501 #ifdef SUPPORT_PCRE2_16
1502 				if ((return_value = return_value16[0]) != return_value16[1]) {
1503 					printf("\n16 bit: Return value differs(J16:%d,I16:%d): [%d] '%s' @ '%s'\n",
1504 						return_value16[0], return_value16[1], total, current->pattern, current->input);
1505 					is_successful = 0;
1506 				} else
1507 #endif
1508 #ifdef SUPPORT_PCRE2_32
1509 				if ((return_value = return_value32[0]) != return_value32[1]) {
1510 					printf("\n32 bit: Return value differs(J32:%d,I32:%d): [%d] '%s' @ '%s'\n",
1511 						return_value32[0], return_value32[1], total, current->pattern, current->input);
1512 					is_successful = 0;
1513 				} else
1514 #endif
1515 #if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_16
1516 				if (return_value8[0] != return_value16[0]) {
1517 					printf("\n8 and 16 bit: Return value differs(J8:%d,J16:%d): [%d] '%s' @ '%s'\n",
1518 						return_value8[0], return_value16[0],
1519 						total, current->pattern, current->input);
1520 					is_successful = 0;
1521 				} else
1522 #endif
1523 #if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_32
1524 				if (return_value8[0] != return_value32[0]) {
1525 					printf("\n8 and 32 bit: Return value differs(J8:%d,J32:%d): [%d] '%s' @ '%s'\n",
1526 						return_value8[0], return_value32[0],
1527 						total, current->pattern, current->input);
1528 					is_successful = 0;
1529 				} else
1530 #endif
1531 #if defined SUPPORT_PCRE2_16 && defined SUPPORT_PCRE2_32
1532 				if (return_value16[0] != return_value32[0]) {
1533 					printf("\n16 and 32 bit: Return value differs(J16:%d,J32:%d): [%d] '%s' @ '%s'\n",
1534 						return_value16[0], return_value32[0],
1535 						total, current->pattern, current->input);
1536 					is_successful = 0;
1537 				} else
1538 #endif
1539 				if (return_value >= 0 || return_value == PCRE2_ERROR_PARTIAL) {
1540 					if (return_value == PCRE2_ERROR_PARTIAL) {
1541 						return_value = 2;
1542 					} else {
1543 						return_value *= 2;
1544 					}
1545 #ifdef SUPPORT_PCRE2_8
1546 					return_value8[0] = return_value;
1547 #endif
1548 #ifdef SUPPORT_PCRE2_16
1549 					return_value16[0] = return_value;
1550 #endif
1551 #ifdef SUPPORT_PCRE2_32
1552 					return_value32[0] = return_value;
1553 #endif
1554 					/* Transform back the results. */
1555 					if (current->compile_options & PCRE2_UTF) {
1556 #ifdef SUPPORT_PCRE2_16
1557 						for (i = 0; i < return_value; ++i) {
1558 							if (ovector16_1[i] != PCRE2_UNSET)
1559 								ovector16_1[i] = regtest_offsetmap16[ovector16_1[i]];
1560 							if (ovector16_2[i] != PCRE2_UNSET)
1561 								ovector16_2[i] = regtest_offsetmap16[ovector16_2[i]];
1562 						}
1563 #endif
1564 #ifdef SUPPORT_PCRE2_32
1565 						for (i = 0; i < return_value; ++i) {
1566 							if (ovector32_1[i] != PCRE2_UNSET)
1567 								ovector32_1[i] = regtest_offsetmap32[ovector32_1[i]];
1568 							if (ovector32_2[i] != PCRE2_UNSET)
1569 								ovector32_2[i] = regtest_offsetmap32[ovector32_2[i]];
1570 						}
1571 #endif
1572 					}
1573 
1574 					for (i = 0; i < return_value; ++i) {
1575 #if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_16
1576 						if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
1577 							printf("\n8 and 16 bit: Ovector[%d] value differs(J8:%d,I8:%d,J16:%d,I16:%d): [%d] '%s' @ '%s' \n",
1578 								i, (int)ovector8_1[i], (int)ovector8_2[i], (int)ovector16_1[i], (int)ovector16_2[i],
1579 								total, current->pattern, current->input);
1580 							is_successful = 0;
1581 						}
1582 #endif
1583 #if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_32
1584 						if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector32_1[i] || ovector8_1[i] != ovector32_2[i]) {
1585 							printf("\n8 and 32 bit: Ovector[%d] value differs(J8:%d,I8:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
1586 								i, (int)ovector8_1[i], (int)ovector8_2[i], (int)ovector32_1[i], (int)ovector32_2[i],
1587 								total, current->pattern, current->input);
1588 							is_successful = 0;
1589 						}
1590 #endif
1591 #if defined SUPPORT_PCRE2_16 && defined SUPPORT_PCRE2_32
1592 						if (ovector16_1[i] != ovector16_2[i] || ovector16_1[i] != ovector32_1[i] || ovector16_1[i] != ovector32_2[i]) {
1593 							printf("\n16 and 32 bit: Ovector[%d] value differs(J16:%d,I16:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
1594 								i, (int)ovector16_1[i], (int)ovector16_2[i], (int)ovector32_1[i], (int)ovector32_2[i],
1595 								total, current->pattern, current->input);
1596 							is_successful = 0;
1597 						}
1598 #endif
1599 					}
1600 				}
1601 			} else
1602 #endif /* more than one of SUPPORT_PCRE2_8, SUPPORT_PCRE2_16 and SUPPORT_PCRE2_32 */
1603 			{
1604 #ifdef SUPPORT_PCRE2_8
1605 				if (return_value8[0] != return_value8[1]) {
1606 					printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1607 						return_value8[0], return_value8[1], total, current->pattern, current->input);
1608 					is_successful = 0;
1609 				} else if (return_value8[0] >= 0 || return_value8[0] == PCRE2_ERROR_PARTIAL) {
1610 					if (return_value8[0] == PCRE2_ERROR_PARTIAL)
1611 						return_value8[0] = 2;
1612 					else
1613 						return_value8[0] *= 2;
1614 
1615 					for (i = 0; i < return_value8[0]; ++i)
1616 						if (ovector8_1[i] != ovector8_2[i]) {
1617 							printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1618 								i, (int)ovector8_1[i], (int)ovector8_2[i], total, current->pattern, current->input);
1619 							is_successful = 0;
1620 						}
1621 				}
1622 #endif
1623 
1624 #ifdef SUPPORT_PCRE2_16
1625 				if (return_value16[0] != return_value16[1]) {
1626 					printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1627 						return_value16[0], return_value16[1], total, current->pattern, current->input);
1628 					is_successful = 0;
1629 				} else if (return_value16[0] >= 0 || return_value16[0] == PCRE2_ERROR_PARTIAL) {
1630 					if (return_value16[0] == PCRE2_ERROR_PARTIAL)
1631 						return_value16[0] = 2;
1632 					else
1633 						return_value16[0] *= 2;
1634 
1635 					for (i = 0; i < return_value16[0]; ++i)
1636 						if (ovector16_1[i] != ovector16_2[i]) {
1637 							printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1638 								i, (int)ovector16_1[i], (int)ovector16_2[i], total, current->pattern, current->input);
1639 							is_successful = 0;
1640 						}
1641 				}
1642 #endif
1643 
1644 #ifdef SUPPORT_PCRE2_32
1645 				if (return_value32[0] != return_value32[1]) {
1646 					printf("\n32 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1647 						return_value32[0], return_value32[1], total, current->pattern, current->input);
1648 					is_successful = 0;
1649 				} else if (return_value32[0] >= 0 || return_value32[0] == PCRE2_ERROR_PARTIAL) {
1650 					if (return_value32[0] == PCRE2_ERROR_PARTIAL)
1651 						return_value32[0] = 2;
1652 					else
1653 						return_value32[0] *= 2;
1654 
1655 					for (i = 0; i < return_value32[0]; ++i)
1656 						if (ovector32_1[i] != ovector32_2[i]) {
1657 							printf("\n32 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1658 								i, (int)ovector32_1[i], (int)ovector32_2[i], total, current->pattern, current->input);
1659 							is_successful = 0;
1660 						}
1661 				}
1662 #endif
1663 			}
1664 		}
1665 
1666 		if (is_successful) {
1667 #ifdef SUPPORT_PCRE2_8
1668 			if (!(current->start_offset & F_NO8) && (utf || is_ascii)) {
1669 				if (return_value8[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1670 					printf("8 bit: Test should match: [%d] '%s' @ '%s'\n",
1671 						total, current->pattern, current->input);
1672 					is_successful = 0;
1673 				}
1674 
1675 				if (return_value8[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1676 					printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n",
1677 						total, current->pattern, current->input);
1678 					is_successful = 0;
1679 				}
1680 			}
1681 #endif
1682 #ifdef SUPPORT_PCRE2_16
1683 			if (!(current->start_offset & F_NO16) && (utf || is_ascii)) {
1684 				if (return_value16[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1685 					printf("16 bit: Test should match: [%d] '%s' @ '%s'\n",
1686 						total, current->pattern, current->input);
1687 					is_successful = 0;
1688 				}
1689 
1690 				if (return_value16[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1691 					printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n",
1692 						total, current->pattern, current->input);
1693 					is_successful = 0;
1694 				}
1695 			}
1696 #endif
1697 #ifdef SUPPORT_PCRE2_32
1698 			if (!(current->start_offset & F_NO32) && (utf || is_ascii)) {
1699 				if (return_value32[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1700 					printf("32 bit: Test should match: [%d] '%s' @ '%s'\n",
1701 						total, current->pattern, current->input);
1702 					is_successful = 0;
1703 				}
1704 
1705 				if (return_value32[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1706 					printf("32 bit: Test should not match: [%d] '%s' @ '%s'\n",
1707 						total, current->pattern, current->input);
1708 					is_successful = 0;
1709 				}
1710 			}
1711 #endif
1712 		}
1713 
1714 		if (is_successful) {
1715 #ifdef SUPPORT_PCRE2_8
1716 			if (re8 && !(current->start_offset & F_NO8) && pcre2_get_mark_8(mdata8_1) != pcre2_get_mark_8(mdata8_2)) {
1717 				printf("8 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1718 					total, current->pattern, current->input);
1719 				is_successful = 0;
1720 			}
1721 #endif
1722 #ifdef SUPPORT_PCRE2_16
1723 			if (re16 && !(current->start_offset & F_NO16) && pcre2_get_mark_16(mdata16_1) != pcre2_get_mark_16(mdata16_2)) {
1724 				printf("16 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1725 					total, current->pattern, current->input);
1726 				is_successful = 0;
1727 			}
1728 #endif
1729 #ifdef SUPPORT_PCRE2_32
1730 			if (re32 && !(current->start_offset & F_NO32) && pcre2_get_mark_32(mdata32_1) != pcre2_get_mark_32(mdata32_2)) {
1731 				printf("32 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1732 					total, current->pattern, current->input);
1733 				is_successful = 0;
1734 			}
1735 #endif
1736 		}
1737 
1738 #ifdef SUPPORT_PCRE2_8
1739 		pcre2_code_free_8(re8);
1740 		pcre2_match_data_free_8(mdata8_1);
1741 		pcre2_match_data_free_8(mdata8_2);
1742 		pcre2_match_context_free_8(mcontext8);
1743 #endif
1744 #ifdef SUPPORT_PCRE2_16
1745 		pcre2_code_free_16(re16);
1746 		pcre2_match_data_free_16(mdata16_1);
1747 		pcre2_match_data_free_16(mdata16_2);
1748 		pcre2_match_context_free_16(mcontext16);
1749 #endif
1750 #ifdef SUPPORT_PCRE2_32
1751 		pcre2_code_free_32(re32);
1752 		pcre2_match_data_free_32(mdata32_1);
1753 		pcre2_match_data_free_32(mdata32_2);
1754 		pcre2_match_context_free_32(mcontext32);
1755 #endif
1756 
1757 		if (is_successful) {
1758 			successful++;
1759 			successful_row++;
1760 			printf(".");
1761 			if (successful_row >= 60) {
1762 				successful_row = 0;
1763 				printf("\n");
1764 			}
1765 		} else
1766 			successful_row = 0;
1767 
1768 		fflush(stdout);
1769 		current++;
1770 	}
1771 #ifdef SUPPORT_PCRE2_8
1772 	setstack8(NULL);
1773 #endif
1774 #ifdef SUPPORT_PCRE2_16
1775 	setstack16(NULL);
1776 #endif
1777 #ifdef SUPPORT_PCRE2_32
1778 	setstack32(NULL);
1779 #endif
1780 
1781 	if (total == successful) {
1782 		printf("\nAll JIT regression tests are successfully passed.\n");
1783 		return 0;
1784 	} else {
1785 		printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
1786 		return 1;
1787 	}
1788 }
1789 
1790 #if defined SUPPORT_UNICODE
1791 
check_invalid_utf_result(int pattern_index,const char * type,int result,int match_start,int match_end,PCRE2_SIZE * ovector)1792 static int check_invalid_utf_result(int pattern_index, const char *type, int result,
1793 	int match_start, int match_end, PCRE2_SIZE *ovector)
1794 {
1795 	if (match_start < 0) {
1796 		if (result != -1) {
1797 			printf("Pattern[%d] %s result is not -1.\n", pattern_index, type);
1798 			return 1;
1799 		}
1800 		return 0;
1801 	}
1802 
1803 	if (result <= 0) {
1804 		printf("Pattern[%d] %s result (%d) is not greater than 0.\n", pattern_index, type, result);
1805 		return 1;
1806 	}
1807 
1808 	if (ovector[0] != (PCRE2_SIZE)match_start) {
1809 		printf("Pattern[%d] %s ovector[0] is unexpected (%d instead of %d)\n",
1810 			pattern_index, type, (int)ovector[0], match_start);
1811 		return 1;
1812 	}
1813 
1814 	if (ovector[1] != (PCRE2_SIZE)match_end) {
1815 		printf("Pattern[%d] %s ovector[1] is unexpected (%d instead of %d)\n",
1816 			pattern_index, type, (int)ovector[1], match_end);
1817 		return 1;
1818 	}
1819 
1820 	return 0;
1821 }
1822 
1823 #endif /* SUPPORT_UNICODE */
1824 
1825 #if defined SUPPORT_UNICODE && defined SUPPORT_PCRE2_8
1826 
1827 #define UDA (PCRE2_UTF | PCRE2_DOTALL | PCRE2_ANCHORED)
1828 #define CI (PCRE2_JIT_COMPLETE | PCRE2_JIT_INVALID_UTF)
1829 #define CPI (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_INVALID_UTF)
1830 
1831 struct invalid_utf8_regression_test_case {
1832 	int compile_options;
1833 	int jit_compile_options;
1834 	int start_offset;
1835 	int skip_left;
1836 	int skip_right;
1837 	int match_start;
1838 	int match_end;
1839 	const char *pattern[2];
1840 	const char *input;
1841 };
1842 
1843 static const char invalid_utf8_newline_cr;
1844 
1845 static const struct invalid_utf8_regression_test_case invalid_utf8_regression_test_cases[] = {
1846 	{ UDA, CI, 0, 0, 0, 0, 4, { ".", NULL }, "\xf4\x8f\xbf\xbf" },
1847 	{ UDA, CI, 0, 0, 0, 0, 4, { ".", NULL }, "\xf0\x90\x80\x80" },
1848 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf4\x90\x80\x80" },
1849 	{ UDA, CI, 0, 0, 1, -1, -1, { ".", NULL }, "\xf4\x8f\xbf\xbf" },
1850 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf0\x90\x80\x7f" },
1851 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf0\x90\x80\xc0" },
1852 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf0\x8f\xbf\xbf" },
1853 	{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xef\xbf\xbf#" },
1854 	{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xef\xbf\xbf" },
1855 	{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xe0\xa0\x80#" },
1856 	{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xe0\xa0\x80" },
1857 	{ UDA, CI, 0, 0, 2, -1, -1, { ".", NULL }, "\xef\xbf\xbf#" },
1858 	{ UDA, CI, 0, 0, 1, -1, -1, { ".", NULL }, "\xef\xbf\xbf" },
1859 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xef\xbf\x7f#" },
1860 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xef\xbf\xc0" },
1861 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x9f\xbf#" },
1862 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x9f\xbf" },
1863 	{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xed\x9f\xbf#" },
1864 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xed\xa0\x80#" },
1865 	{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xee\x80\x80#" },
1866 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xed\xbf\xbf#" },
1867 	{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xdf\xbf##" },
1868 	{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xdf\xbf#" },
1869 	{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xdf\xbf" },
1870 	{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xc2\x80##" },
1871 	{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xc2\x80#" },
1872 	{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xc2\x80" },
1873 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x80##" },
1874 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xdf\xc0##" },
1875 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x80" },
1876 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xdf\xc0" },
1877 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xc1\xbf##" },
1878 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xc1\xbf" },
1879 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\x80###" },
1880 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\x80" },
1881 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf8###" },
1882 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf8" },
1883 	{ UDA, CI, 0, 0, 0, 0, 1, { ".", NULL }, "\x7f" },
1884 
1885 	{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "\xf4\x8f\xbf\xbf#" },
1886 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\xa0\x80\x80\xf4\xa0\x80\x80" },
1887 	{ UDA, CPI, 4, 1, 1, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbf" },
1888 	{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "#\xef\xbf\xbf#" },
1889 	{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "#\xe0\xa0\x80#" },
1890 	{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "\xf0\x90\x80\x80#" },
1891 	{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "\xf3\xbf\xbf\xbf#" },
1892 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf0\x8f\xbf\xbf\xf0\x8f\xbf\xbf" },
1893 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf5\x80\x80\x80\xf5\x80\x80\x80" },
1894 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x90\x80\x80\xf4\x90\x80\x80" },
1895 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xbf\xff\xf4\x8f\xbf\xff" },
1896 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xff\xbf\xf4\x8f\xff\xbf" },
1897 	{ UDA, CPI, 4, 0, 1, -1, -1, { "\\B", "\\b" }, "\xef\x80\x80\x80\xef\x80\x80" },
1898 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\x80\x80\x80\x80\x80\x80\x80\x80" },
1899 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xe0\x9f\xbf\xe0\x9f\xbf#" },
1900 	{ UDA, CPI, 4, 2, 2, -1, -1, { "\\B", "\\b" }, "#\xe0\xa0\x80\xe0\xa0\x80#" },
1901 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xf0\x80\x80\xf0\x80\x80#" },
1902 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xed\xa0\x80\xed\xa0\x80#" },
1903 	{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "##\xdf\xbf#" },
1904 	{ UDA, CPI, 4, 2, 0, 2, 2, { "\\B", NULL }, "##\xdf\xbf#" },
1905 	{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "##\xc2\x80#" },
1906 	{ UDA, CPI, 4, 2, 0, 2, 2, { "\\B", NULL }, "##\xc2\x80#" },
1907 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xc1\xbf\xc1\xbf##" },
1908 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xdf\xc0\xdf\xc0##" },
1909 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xe0\x80\xe0\x80##" },
1910 
1911 	{ UDA, CPI, 3, 0, 0, 3, 3, { "\\B", NULL }, "\xef\xbf\xbf#" },
1912 	{ UDA, CPI, 3, 0, 0, 3, 3, { "\\B", NULL }, "\xe0\xa0\x80#" },
1913 	{ UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xe0\x9f\xbf\xe0\x9f\xbf" },
1914 	{ UDA, CPI, 3, 1, 1, -1, -1, { "\\B", "\\b" }, "\xef\xbf\xbf\xef\xbf\xbf" },
1915 	{ UDA, CPI, 3, 0, 1, -1, -1, { "\\B", "\\b" }, "\xdf\x80\x80\xdf\x80" },
1916 	{ UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xef\xbf\xff\xef\xbf\xff" },
1917 	{ UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xef\xff\xbf\xef\xff\xbf" },
1918 	{ UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xed\xbf\xbf\xed\xbf\xbf" },
1919 
1920 	{ UDA, CPI, 2, 0, 0, 2, 2, { "\\B", NULL }, "\xdf\xbf#" },
1921 	{ UDA, CPI, 2, 0, 0, 2, 2, { "\\B", NULL }, "\xc2\x80#" },
1922 	{ UDA, CPI, 2, 1, 1, -1, -1, { "\\B", "\\b" }, "\xdf\xbf\xdf\xbf" },
1923 	{ UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xc1\xbf\xc1\xbf" },
1924 	{ UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xe0\x80\xe0\x80" },
1925 	{ UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xdf\xff\xdf\xff" },
1926 	{ UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xff\xbf\xff\xbf" },
1927 
1928 	{ UDA, CPI, 1, 0, 0, 1, 1, { "\\B", NULL }, "\x7f#" },
1929 	{ UDA, CPI, 1, 0, 0, 1, 1, { "\\B", NULL }, "\x01#" },
1930 	{ UDA, CPI, 1, 0, 0, -1, -1, { "\\B", "\\b" }, "\x80\x80" },
1931 	{ UDA, CPI, 1, 0, 0, -1, -1, { "\\B", "\\b" }, "\xb0\xb0" },
1932 
1933 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 2, { "(.)\\1", NULL }, "aA" },
1934 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, -1, -1, { "(.)\\1", NULL }, "a\xff" },
1935 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 4, { "(.)\\1", NULL }, "\xc3\xa1\xc3\x81" },
1936 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { "(.)\\1", NULL }, "\xc3\xa1\xc3\x81" },
1937 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, -1, -1, { "(.)\\1", NULL }, "\xc2\x80\x80" },
1938 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 6, { "(.)\\1", NULL }, "\xe1\xbd\xb8\xe1\xbf\xb8" },
1939 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { "(.)\\1", NULL }, "\xe1\xbd\xb8\xe1\xbf\xb8" },
1940 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 8, { "(.)\\1", NULL }, "\xf0\x90\x90\x80\xf0\x90\x90\xa8" },
1941 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { "(.)\\1", NULL }, "\xf0\x90\x90\x80\xf0\x90\x90\xa8" },
1942 
1943 	{ UDA, CPI, 0, 0, 0, 0, 1, { "\\X", NULL }, "A" },
1944 	{ UDA, CPI, 0, 0, 0, -1, -1, { "\\X", NULL }, "\xff" },
1945 	{ UDA, CPI, 0, 0, 0, 0, 2, { "\\X", NULL }, "\xc3\xa1" },
1946 	{ UDA, CPI, 0, 0, 1, -1, -1, { "\\X", NULL }, "\xc3\xa1" },
1947 	{ UDA, CPI, 0, 0, 0, -1, -1, { "\\X", NULL }, "\xc3\x7f" },
1948 	{ UDA, CPI, 0, 0, 0, 0, 3, { "\\X", NULL }, "\xe1\xbd\xb8" },
1949 	{ UDA, CPI, 0, 0, 1, -1, -1, { "\\X", NULL }, "\xe1\xbd\xb8" },
1950 	{ UDA, CPI, 0, 0, 0, 0, 4, { "\\X", NULL }, "\xf0\x90\x90\x80" },
1951 	{ UDA, CPI, 0, 0, 1, -1, -1, { "\\X", NULL }, "\xf0\x90\x90\x80" },
1952 
1953 	{ UDA, CPI, 0, 0, 0, -1, -1, { "[^#]", NULL }, "#" },
1954 	{ UDA, CPI, 0, 0, 0, 0, 4, { "[^#]", NULL }, "\xf4\x8f\xbf\xbf" },
1955 	{ UDA, CPI, 0, 0, 0, -1, -1, { "[^#]", NULL }, "\xf4\x90\x80\x80" },
1956 	{ UDA, CPI, 0, 0, 0, -1, -1, { "[^#]", NULL }, "\xc1\x80" },
1957 
1958 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 2, 3, { "^\\W", NULL }, " \x0a#"},
1959 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 14, 15, { "^\\W", NULL }, " \xc0\x8a#\xe0\x80\x8a#\xf0\x80\x80\x8a#\x0a#"},
1960 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xf8\x0a#"},
1961 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xc3\x0a#"},
1962 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xf1\x0a#"},
1963 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 4, 5, { "^\\W", NULL }, " \xf2\xbf\x0a#"},
1964 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 5, 6, { "^\\W", NULL }, " \xf2\xbf\xbf\x0a#"},
1965 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xef\x0a#"},
1966 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 4, 5, { "^\\W", NULL }, " \xef\xbf\x0a#"},
1967 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 5, 6, { "^\\W", NULL }, " \x85#\xc2\x85#"},
1968 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 7, 8, { "^\\W", NULL }, " \xe2\x80\xf8\xe2\x80\xa8#"},
1969 
1970 	{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, -1, -1, { "#", NULL }, "\xe2\x80\xf8\xe2\x80\xa8#"},
1971 	{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, 3, 4, { "#", NULL }, "\xe2\x80\xf8#\xe2\x80\xa8#"},
1972 	{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, -1, -1, { "#", NULL }, "abcd\xc2\x85#"},
1973 	{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, 1, 2, { "#", NULL }, "\x85#\xc2\x85#"},
1974 	{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, 5, 6, { "#", NULL }, "\xef,\x80,\xf8#\x0a"},
1975 	{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, -1, -1, { "#", NULL }, "\xef,\x80,\xf8\x0a#"},
1976 
1977 	{ PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 4, 8, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7#\xc7\x85#" },
1978 	{ PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 7, 11, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7\x80\x80\x80#\xc7\x85#" },
1979 	{ PCRE2_UTF, CI, 0, 0, 0, 4, 8, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7#\xc7\x85#" },
1980 	{ PCRE2_UTF, CI, 0, 0, 0, 7, 11, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7\x80\x80\x80#\xc7\x85#" },
1981 
1982 	{ PCRE2_UTF | PCRE2_UCP, CI, 0, 0, 0, -1, -1, { "[\\s]", NULL }, "\xed\xa0\x80" },
1983 
1984 	/* These two are not invalid UTF tests, but this infrastructure fits better for them. */
1985 	{ 0, PCRE2_JIT_COMPLETE, 0, 0, 1, -1, -1, { "\\X{2}", NULL }, "\r\n\n" },
1986 	{ 0, PCRE2_JIT_COMPLETE, 0, 0, 1, -1, -1, { "\\R{2}", NULL }, "\r\n\n" },
1987 
1988 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 0, 0, 0, -1, -1, { "^.a", &invalid_utf8_newline_cr }, "\xc3\xa7#a" },
1989 
1990 	{ 0, 0, 0, 0, 0, 0, 0, { NULL, NULL }, NULL }
1991 };
1992 
1993 #undef UDA
1994 #undef CI
1995 #undef CPI
1996 
run_invalid_utf8_test(const struct invalid_utf8_regression_test_case * current,int pattern_index,int i,pcre2_compile_context_8 * ccontext,pcre2_match_data_8 * mdata)1997 static int run_invalid_utf8_test(const struct invalid_utf8_regression_test_case *current,
1998 	int pattern_index, int i, pcre2_compile_context_8 *ccontext, pcre2_match_data_8 *mdata)
1999 {
2000 	pcre2_code_8 *code;
2001 	int result, errorcode;
2002 	PCRE2_SIZE length, erroroffset;
2003 	PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_8(mdata);
2004 
2005 	if (current->pattern[i] == NULL)
2006 		return 1;
2007 
2008 	code = pcre2_compile_8((PCRE2_UCHAR8*)current->pattern[i], PCRE2_ZERO_TERMINATED,
2009 		current->compile_options, &errorcode, &erroroffset, ccontext);
2010 
2011 	if (!code) {
2012 		printf("Pattern[%d:0] cannot be compiled. Error offset: %d\n", pattern_index, (int)erroroffset);
2013 		return 0;
2014 	}
2015 
2016 	if (pcre2_jit_compile_8(code, current->jit_compile_options) != 0) {
2017 		printf("Pattern[%d:0] cannot be compiled by the JIT compiler.\n", pattern_index);
2018 		pcre2_code_free_8(code);
2019 		return 0;
2020 	}
2021 
2022 	length = (PCRE2_SIZE)(strlen(current->input) - current->skip_left - current->skip_right);
2023 
2024 	if (current->jit_compile_options & PCRE2_JIT_COMPLETE) {
2025 		result = pcre2_jit_match_8(code, (PCRE2_UCHAR8*)(current->input + current->skip_left),
2026 			length, current->start_offset - current->skip_left, 0, mdata, NULL);
2027 
2028 		if (check_invalid_utf_result(pattern_index, "match", result, current->match_start, current->match_end, ovector)) {
2029 			pcre2_code_free_8(code);
2030 			return 0;
2031 		}
2032 	}
2033 
2034 	if (current->jit_compile_options & PCRE2_JIT_PARTIAL_SOFT) {
2035 		result = pcre2_jit_match_8(code, (PCRE2_UCHAR8*)(current->input + current->skip_left),
2036 			length, current->start_offset - current->skip_left, PCRE2_PARTIAL_SOFT, mdata, NULL);
2037 
2038 		if (check_invalid_utf_result(pattern_index, "partial match", result, current->match_start, current->match_end, ovector)) {
2039 			pcre2_code_free_8(code);
2040 			return 0;
2041 		}
2042 	}
2043 
2044 	pcre2_code_free_8(code);
2045 	return 1;
2046 }
2047 
invalid_utf8_regression_tests(void)2048 static int invalid_utf8_regression_tests(void)
2049 {
2050 	const struct invalid_utf8_regression_test_case *current;
2051 	pcre2_compile_context_8 *ccontext;
2052 	pcre2_match_data_8 *mdata;
2053 	int total = 0, successful = 0;
2054 	int result;
2055 
2056 	printf("\nRunning invalid-utf8 JIT regression tests\n");
2057 
2058 	ccontext = pcre2_compile_context_create_8(NULL);
2059 	pcre2_set_newline_8(ccontext, PCRE2_NEWLINE_ANY);
2060 	mdata = pcre2_match_data_create_8(4, NULL);
2061 
2062 	for (current = invalid_utf8_regression_test_cases; current->pattern[0]; current++) {
2063 		/* printf("\nPattern: %s :\n", current->pattern); */
2064 		total++;
2065 
2066 		result = 1;
2067 		if (current->pattern[1] != &invalid_utf8_newline_cr)
2068 		{
2069 			if (!run_invalid_utf8_test(current, total - 1, 0, ccontext, mdata))
2070 				result = 0;
2071 			if (!run_invalid_utf8_test(current, total - 1, 1, ccontext, mdata))
2072 				result = 0;
2073 		} else {
2074 			pcre2_set_newline_8(ccontext, PCRE2_NEWLINE_CR);
2075 			if (!run_invalid_utf8_test(current, total - 1, 0, ccontext, mdata))
2076 				result = 0;
2077 			pcre2_set_newline_8(ccontext, PCRE2_NEWLINE_ANY);
2078 		}
2079 
2080 		if (result) {
2081 			successful++;
2082 		}
2083 
2084 		printf(".");
2085 		if ((total % 60) == 0)
2086 			printf("\n");
2087 	}
2088 
2089 	if ((total % 60) != 0)
2090 		printf("\n");
2091 
2092 	pcre2_match_data_free_8(mdata);
2093 	pcre2_compile_context_free_8(ccontext);
2094 
2095 	if (total == successful) {
2096 		printf("\nAll invalid UTF8 JIT regression tests are successfully passed.\n");
2097 		return 0;
2098 	} else {
2099 		printf("\nInvalid UTF8 successful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
2100 		return 1;
2101 	}
2102 }
2103 
2104 #else /* !SUPPORT_UNICODE || !SUPPORT_PCRE2_8 */
2105 
invalid_utf8_regression_tests(void)2106 static int invalid_utf8_regression_tests(void)
2107 {
2108 	return 0;
2109 }
2110 
2111 #endif /* SUPPORT_UNICODE && SUPPORT_PCRE2_8 */
2112 
2113 #if defined SUPPORT_UNICODE && defined SUPPORT_PCRE2_16
2114 
2115 #define UDA (PCRE2_UTF | PCRE2_DOTALL | PCRE2_ANCHORED)
2116 #define CI (PCRE2_JIT_COMPLETE | PCRE2_JIT_INVALID_UTF)
2117 #define CPI (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_INVALID_UTF)
2118 
2119 struct invalid_utf16_regression_test_case {
2120 	int compile_options;
2121 	int jit_compile_options;
2122 	int start_offset;
2123 	int skip_left;
2124 	int skip_right;
2125 	int match_start;
2126 	int match_end;
2127 	const PCRE2_UCHAR16 *pattern[2];
2128 	const PCRE2_UCHAR16 *input;
2129 };
2130 
2131 static PCRE2_UCHAR16 allany16[] = { '.', 0 };
2132 static PCRE2_UCHAR16 non_word_boundary16[] = { '\\', 'B', 0 };
2133 static PCRE2_UCHAR16 word_boundary16[] = { '\\', 'b', 0 };
2134 static PCRE2_UCHAR16 backreference16[] = { '(', '.', ')', '\\', '1', 0 };
2135 static PCRE2_UCHAR16 grapheme16[] = { '\\', 'X', 0 };
2136 static PCRE2_UCHAR16 nothashmark16[] = { '[', '^', '#', ']', 0 };
2137 static PCRE2_UCHAR16 afternl16[] = { '^', '\\', 'W', 0 };
2138 static PCRE2_UCHAR16 generic16[] = { '#', 0xd800, 0xdc00, '#', 0 };
2139 static PCRE2_UCHAR16 test16_1[] = { 0xd7ff, 0xe000, 0xffff, 0x01, '#', 0 };
2140 static PCRE2_UCHAR16 test16_2[] = { 0xd800, 0xdc00, 0xd800, 0xdc00, 0 };
2141 static PCRE2_UCHAR16 test16_3[] = { 0xdbff, 0xdfff, 0xdbff, 0xdfff, 0 };
2142 static PCRE2_UCHAR16 test16_4[] = { 0xd800, 0xdbff, 0xd800, 0xdbff, 0 };
2143 static PCRE2_UCHAR16 test16_5[] = { '#', 0xd800, 0xdc00, '#', 0 };
2144 static PCRE2_UCHAR16 test16_6[] = { 'a', 'A', 0xdc28, 0 };
2145 static PCRE2_UCHAR16 test16_7[] = { 0xd801, 0xdc00, 0xd801, 0xdc28, 0 };
2146 static PCRE2_UCHAR16 test16_8[] = { '#', 0xd800, 0xdc00, 0 };
2147 static PCRE2_UCHAR16 test16_9[] = { ' ', 0x2028, '#', 0 };
2148 static PCRE2_UCHAR16 test16_10[] = { ' ', 0xdc00, 0xd800, 0x2028, '#', 0 };
2149 static PCRE2_UCHAR16 test16_11[] = { 0xdc00, 0xdc00, 0xd800, 0xdc00, 0xdc00, '#', 0xd800, 0xdc00, '#', 0 };
2150 static PCRE2_UCHAR16 test16_12[] = { '#', 0xd800, 0xdc00, 0xd800, '#', 0xd800, 0xdc00, 0xdc00, 0xdc00, '#', 0xd800, 0xdc00, '#', 0 };
2151 
2152 static const struct invalid_utf16_regression_test_case invalid_utf16_regression_test_cases[] = {
2153 	{ UDA, CI, 0, 0, 0, 0, 1, { allany16, NULL }, test16_1 },
2154 	{ UDA, CI, 1, 0, 0, 1, 2, { allany16, NULL }, test16_1 },
2155 	{ UDA, CI, 2, 0, 0, 2, 3, { allany16, NULL }, test16_1 },
2156 	{ UDA, CI, 3, 0, 0, 3, 4, { allany16, NULL }, test16_1 },
2157 	{ UDA, CI, 0, 0, 0, 0, 2, { allany16, NULL }, test16_2 },
2158 	{ UDA, CI, 0, 0, 3, -1, -1, { allany16, NULL }, test16_2 },
2159 	{ UDA, CI, 1, 0, 0, -1, -1, { allany16, NULL }, test16_2 },
2160 	{ UDA, CI, 0, 0, 0, 0, 2, { allany16, NULL }, test16_3 },
2161 	{ UDA, CI, 0, 0, 3, -1, -1, { allany16, NULL }, test16_3 },
2162 	{ UDA, CI, 1, 0, 0, -1, -1, { allany16, NULL }, test16_3 },
2163 
2164 	{ UDA, CPI, 1, 0, 0, 1, 1, { non_word_boundary16, NULL }, test16_1 },
2165 	{ UDA, CPI, 2, 0, 0, 2, 2, { non_word_boundary16, NULL }, test16_1 },
2166 	{ UDA, CPI, 3, 0, 0, 3, 3, { non_word_boundary16, NULL }, test16_1 },
2167 	{ UDA, CPI, 4, 0, 0, 4, 4, { non_word_boundary16, NULL }, test16_1 },
2168 	{ UDA, CPI, 2, 0, 0, 2, 2, { non_word_boundary16, NULL }, test16_2 },
2169 	{ UDA, CPI, 2, 0, 0, 2, 2, { non_word_boundary16, NULL }, test16_3 },
2170 	{ UDA, CPI, 2, 1, 1, -1, -1, { non_word_boundary16, word_boundary16 }, test16_2 },
2171 	{ UDA, CPI, 2, 1, 1, -1, -1, { non_word_boundary16, word_boundary16 }, test16_3 },
2172 	{ UDA, CPI, 2, 0, 0, -1, -1, { non_word_boundary16, word_boundary16 }, test16_4 },
2173 	{ UDA, CPI, 2, 0, 0, -1, -1, { non_word_boundary16, word_boundary16 }, test16_5 },
2174 
2175 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 2, { backreference16, NULL }, test16_6 },
2176 	{ UDA | PCRE2_CASELESS, CPI, 1, 0, 0, -1, -1, { backreference16, NULL }, test16_6 },
2177 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 4, { backreference16, NULL }, test16_7 },
2178 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { backreference16, NULL }, test16_7 },
2179 
2180 	{ UDA, CPI, 0, 0, 0, 0, 1, { grapheme16, NULL }, test16_6 },
2181 	{ UDA, CPI, 1, 0, 0, 1, 2, { grapheme16, NULL }, test16_6 },
2182 	{ UDA, CPI, 2, 0, 0, -1, -1, { grapheme16, NULL }, test16_6 },
2183 	{ UDA, CPI, 0, 0, 0, 0, 2, { grapheme16, NULL }, test16_7 },
2184 	{ UDA, CPI, 2, 0, 0, 2, 4, { grapheme16, NULL }, test16_7 },
2185 	{ UDA, CPI, 1, 0, 0, -1, -1, { grapheme16, NULL }, test16_7 },
2186 
2187 	{ UDA, CPI, 0, 0, 0, -1, -1, { nothashmark16, NULL }, test16_8 },
2188 	{ UDA, CPI, 1, 0, 0, 1, 3, { nothashmark16, NULL }, test16_8 },
2189 	{ UDA, CPI, 2, 0, 0, -1, -1, { nothashmark16, NULL }, test16_8 },
2190 
2191 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 2, 3, { afternl16, NULL }, test16_9 },
2192 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 4, 5, { afternl16, NULL }, test16_10 },
2193 
2194 	{ PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 5, 9, { generic16, NULL }, test16_11 },
2195 	{ PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 9, 13, { generic16, NULL }, test16_12 },
2196 	{ PCRE2_UTF, CI, 0, 0, 0, 5, 9, { generic16, NULL }, test16_11 },
2197 	{ PCRE2_UTF, CI, 0, 0, 0, 9, 13, { generic16, NULL }, test16_12 },
2198 
2199 	{ 0, 0, 0, 0, 0, 0, 0, { NULL, NULL }, NULL }
2200 };
2201 
2202 #undef UDA
2203 #undef CI
2204 #undef CPI
2205 
run_invalid_utf16_test(const struct invalid_utf16_regression_test_case * current,int pattern_index,int i,pcre2_compile_context_16 * ccontext,pcre2_match_data_16 * mdata)2206 static int run_invalid_utf16_test(const struct invalid_utf16_regression_test_case *current,
2207 	int pattern_index, int i, pcre2_compile_context_16 *ccontext, pcre2_match_data_16 *mdata)
2208 {
2209 	pcre2_code_16 *code;
2210 	int result, errorcode;
2211 	PCRE2_SIZE length, erroroffset;
2212 	const PCRE2_UCHAR16 *input;
2213 	PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_16(mdata);
2214 
2215 	if (current->pattern[i] == NULL)
2216 		return 1;
2217 
2218 	code = pcre2_compile_16(current->pattern[i], PCRE2_ZERO_TERMINATED,
2219 		current->compile_options, &errorcode, &erroroffset, ccontext);
2220 
2221 	if (!code) {
2222 		printf("Pattern[%d:0] cannot be compiled. Error offset: %d\n", pattern_index, (int)erroroffset);
2223 		return 0;
2224 	}
2225 
2226 	if (pcre2_jit_compile_16(code, current->jit_compile_options) != 0) {
2227 		printf("Pattern[%d:0] cannot be compiled by the JIT compiler.\n", pattern_index);
2228 		pcre2_code_free_16(code);
2229 		return 0;
2230 	}
2231 
2232 	input = current->input;
2233 	length = 0;
2234 
2235 	while (*input++ != 0)
2236 		length++;
2237 
2238 	length -= current->skip_left + current->skip_right;
2239 
2240 	if (current->jit_compile_options & PCRE2_JIT_COMPLETE) {
2241 		result = pcre2_jit_match_16(code, (current->input + current->skip_left),
2242 			length, current->start_offset - current->skip_left, 0, mdata, NULL);
2243 
2244 		if (check_invalid_utf_result(pattern_index, "match", result, current->match_start, current->match_end, ovector)) {
2245 			pcre2_code_free_16(code);
2246 			return 0;
2247 		}
2248 	}
2249 
2250 	if (current->jit_compile_options & PCRE2_JIT_PARTIAL_SOFT) {
2251 		result = pcre2_jit_match_16(code, (current->input + current->skip_left),
2252 			length, current->start_offset - current->skip_left, PCRE2_PARTIAL_SOFT, mdata, NULL);
2253 
2254 		if (check_invalid_utf_result(pattern_index, "partial match", result, current->match_start, current->match_end, ovector)) {
2255 			pcre2_code_free_16(code);
2256 			return 0;
2257 		}
2258 	}
2259 
2260 	pcre2_code_free_16(code);
2261 	return 1;
2262 }
2263 
invalid_utf16_regression_tests(void)2264 static int invalid_utf16_regression_tests(void)
2265 {
2266 	const struct invalid_utf16_regression_test_case *current;
2267 	pcre2_compile_context_16 *ccontext;
2268 	pcre2_match_data_16 *mdata;
2269 	int total = 0, successful = 0;
2270 	int result;
2271 
2272 	printf("\nRunning invalid-utf16 JIT regression tests\n");
2273 
2274 	ccontext = pcre2_compile_context_create_16(NULL);
2275 	pcre2_set_newline_16(ccontext, PCRE2_NEWLINE_ANY);
2276 	mdata = pcre2_match_data_create_16(4, NULL);
2277 
2278 	for (current = invalid_utf16_regression_test_cases; current->pattern[0]; current++) {
2279 		/* printf("\nPattern: %s :\n", current->pattern); */
2280 		total++;
2281 
2282 		result = 1;
2283 		if (!run_invalid_utf16_test(current, total - 1, 0, ccontext, mdata))
2284 			result = 0;
2285 		if (!run_invalid_utf16_test(current, total - 1, 1, ccontext, mdata))
2286 			result = 0;
2287 
2288 		if (result) {
2289 			successful++;
2290 		}
2291 
2292 		printf(".");
2293 		if ((total % 60) == 0)
2294 			printf("\n");
2295 	}
2296 
2297 	if ((total % 60) != 0)
2298 		printf("\n");
2299 
2300 	pcre2_match_data_free_16(mdata);
2301 	pcre2_compile_context_free_16(ccontext);
2302 
2303 	if (total == successful) {
2304 		printf("\nAll invalid UTF16 JIT regression tests are successfully passed.\n");
2305 		return 0;
2306 	} else {
2307 		printf("\nInvalid UTF16 successful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
2308 		return 1;
2309 	}
2310 }
2311 
2312 #else /* !SUPPORT_UNICODE || !SUPPORT_PCRE2_16 */
2313 
invalid_utf16_regression_tests(void)2314 static int invalid_utf16_regression_tests(void)
2315 {
2316 	return 0;
2317 }
2318 
2319 #endif /* SUPPORT_UNICODE && SUPPORT_PCRE2_16 */
2320 
2321 #if defined SUPPORT_UNICODE && defined SUPPORT_PCRE2_32
2322 
2323 #define UDA (PCRE2_UTF | PCRE2_DOTALL | PCRE2_ANCHORED)
2324 #define CI (PCRE2_JIT_COMPLETE | PCRE2_JIT_INVALID_UTF)
2325 #define CPI (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_INVALID_UTF)
2326 
2327 struct invalid_utf32_regression_test_case {
2328 	int compile_options;
2329 	int jit_compile_options;
2330 	int start_offset;
2331 	int skip_left;
2332 	int skip_right;
2333 	int match_start;
2334 	int match_end;
2335 	const PCRE2_UCHAR32 *pattern[2];
2336 	const PCRE2_UCHAR32 *input;
2337 };
2338 
2339 static PCRE2_UCHAR32 allany32[] = { '.', 0 };
2340 static PCRE2_UCHAR32 non_word_boundary32[] = { '\\', 'B', 0 };
2341 static PCRE2_UCHAR32 word_boundary32[] = { '\\', 'b', 0 };
2342 static PCRE2_UCHAR32 backreference32[] = { '(', '.', ')', '\\', '1', 0 };
2343 static PCRE2_UCHAR32 grapheme32[] = { '\\', 'X', 0 };
2344 static PCRE2_UCHAR32 nothashmark32[] = { '[', '^', '#', ']', 0 };
2345 static PCRE2_UCHAR32 afternl32[] = { '^', '\\', 'W', 0 };
2346 static PCRE2_UCHAR32 test32_1[] = { 0x10ffff, 0x10ffff, 0x110000, 0x110000, 0x10ffff, 0 };
2347 static PCRE2_UCHAR32 test32_2[] = { 0xd7ff, 0xe000, 0xd800, 0xdfff, 0xe000, 0xdfff, 0xd800, 0 };
2348 static PCRE2_UCHAR32 test32_3[] = { 'a', 'A', 0x110000, 0 };
2349 static PCRE2_UCHAR32 test32_4[] = { '#', 0x10ffff, 0x110000, 0 };
2350 static PCRE2_UCHAR32 test32_5[] = { ' ', 0x2028, '#', 0 };
2351 static PCRE2_UCHAR32 test32_6[] = { ' ', 0x110000, 0x2028, '#', 0 };
2352 
2353 static const struct invalid_utf32_regression_test_case invalid_utf32_regression_test_cases[] = {
2354 	{ UDA, CI, 0, 0, 0, 0, 1, { allany32, NULL }, test32_1 },
2355 	{ UDA, CI, 2, 0, 0, -1, -1, { allany32, NULL }, test32_1 },
2356 	{ UDA, CI, 0, 0, 0, 0, 1, { allany32, NULL }, test32_2 },
2357 	{ UDA, CI, 1, 0, 0, 1, 2, { allany32, NULL }, test32_2 },
2358 	{ UDA, CI, 2, 0, 0, -1, -1, { allany32, NULL }, test32_2 },
2359 	{ UDA, CI, 3, 0, 0, -1, -1, { allany32, NULL }, test32_2 },
2360 
2361 	{ UDA, CPI, 1, 0, 0, 1, 1, { non_word_boundary32, NULL }, test32_1 },
2362 	{ UDA, CPI, 3, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_1 },
2363 	{ UDA, CPI, 1, 0, 0, 1, 1, { non_word_boundary32, NULL }, test32_2 },
2364 	{ UDA, CPI, 3, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_2 },
2365 	{ UDA, CPI, 6, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_2 },
2366 
2367 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 2, { backreference32, NULL }, test32_3 },
2368 	{ UDA | PCRE2_CASELESS, CPI, 1, 0, 0, -1, -1, { backreference32, NULL }, test32_3 },
2369 
2370 	{ UDA, CPI, 0, 0, 0, 0, 1, { grapheme32, NULL }, test32_1 },
2371 	{ UDA, CPI, 2, 0, 0, -1, -1, { grapheme32, NULL }, test32_1 },
2372 	{ UDA, CPI, 1, 0, 0, 1, 2, { grapheme32, NULL }, test32_2 },
2373 	{ UDA, CPI, 2, 0, 0, -1, -1, { grapheme32, NULL }, test32_2 },
2374 	{ UDA, CPI, 3, 0, 0, -1, -1, { grapheme32, NULL }, test32_2 },
2375 	{ UDA, CPI, 4, 0, 0, 4, 5, { grapheme32, NULL }, test32_2 },
2376 
2377 	{ UDA, CPI, 0, 0, 0, -1, -1, { nothashmark32, NULL }, test32_4 },
2378 	{ UDA, CPI, 1, 0, 0, 1, 2, { nothashmark32, NULL }, test32_4 },
2379 	{ UDA, CPI, 2, 0, 0, -1, -1, { nothashmark32, NULL }, test32_4 },
2380 	{ UDA, CPI, 1, 0, 0, 1, 2, { nothashmark32, NULL }, test32_2 },
2381 	{ UDA, CPI, 2, 0, 0, -1, -1, { nothashmark32, NULL }, test32_2 },
2382 
2383 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 2, 3, { afternl32, NULL }, test32_5 },
2384 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { afternl32, NULL }, test32_6 },
2385 
2386 	{ 0, 0, 0, 0, 0, 0, 0, { NULL, NULL }, NULL }
2387 };
2388 
2389 #undef UDA
2390 #undef CI
2391 #undef CPI
2392 
run_invalid_utf32_test(const struct invalid_utf32_regression_test_case * current,int pattern_index,int i,pcre2_compile_context_32 * ccontext,pcre2_match_data_32 * mdata)2393 static int run_invalid_utf32_test(const struct invalid_utf32_regression_test_case *current,
2394 	int pattern_index, int i, pcre2_compile_context_32 *ccontext, pcre2_match_data_32 *mdata)
2395 {
2396 	pcre2_code_32 *code;
2397 	int result, errorcode;
2398 	PCRE2_SIZE length, erroroffset;
2399 	const PCRE2_UCHAR32 *input;
2400 	PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_32(mdata);
2401 
2402 	if (current->pattern[i] == NULL)
2403 		return 1;
2404 
2405 	code = pcre2_compile_32(current->pattern[i], PCRE2_ZERO_TERMINATED,
2406 		current->compile_options, &errorcode, &erroroffset, ccontext);
2407 
2408 	if (!code) {
2409 		printf("Pattern[%d:0] cannot be compiled. Error offset: %d\n", pattern_index, (int)erroroffset);
2410 		return 0;
2411 	}
2412 
2413 	if (pcre2_jit_compile_32(code, current->jit_compile_options) != 0) {
2414 		printf("Pattern[%d:0] cannot be compiled by the JIT compiler.\n", pattern_index);
2415 		pcre2_code_free_32(code);
2416 		return 0;
2417 	}
2418 
2419 	input = current->input;
2420 	length = 0;
2421 
2422 	while (*input++ != 0)
2423 		length++;
2424 
2425 	length -= current->skip_left + current->skip_right;
2426 
2427 	if (current->jit_compile_options & PCRE2_JIT_COMPLETE) {
2428 		result = pcre2_jit_match_32(code, (current->input + current->skip_left),
2429 			length, current->start_offset - current->skip_left, 0, mdata, NULL);
2430 
2431 		if (check_invalid_utf_result(pattern_index, "match", result, current->match_start, current->match_end, ovector)) {
2432 			pcre2_code_free_32(code);
2433 			return 0;
2434 		}
2435 	}
2436 
2437 	if (current->jit_compile_options & PCRE2_JIT_PARTIAL_SOFT) {
2438 		result = pcre2_jit_match_32(code, (current->input + current->skip_left),
2439 			length, current->start_offset - current->skip_left, PCRE2_PARTIAL_SOFT, mdata, NULL);
2440 
2441 		if (check_invalid_utf_result(pattern_index, "partial match", result, current->match_start, current->match_end, ovector)) {
2442 			pcre2_code_free_32(code);
2443 			return 0;
2444 		}
2445 	}
2446 
2447 	pcre2_code_free_32(code);
2448 	return 1;
2449 }
2450 
invalid_utf32_regression_tests(void)2451 static int invalid_utf32_regression_tests(void)
2452 {
2453 	const struct invalid_utf32_regression_test_case *current;
2454 	pcre2_compile_context_32 *ccontext;
2455 	pcre2_match_data_32 *mdata;
2456 	int total = 0, successful = 0;
2457 	int result;
2458 
2459 	printf("\nRunning invalid-utf32 JIT regression tests\n");
2460 
2461 	ccontext = pcre2_compile_context_create_32(NULL);
2462 	pcre2_set_newline_32(ccontext, PCRE2_NEWLINE_ANY);
2463 	mdata = pcre2_match_data_create_32(4, NULL);
2464 
2465 	for (current = invalid_utf32_regression_test_cases; current->pattern[0]; current++) {
2466 		/* printf("\nPattern: %s :\n", current->pattern); */
2467 		total++;
2468 
2469 		result = 1;
2470 		if (!run_invalid_utf32_test(current, total - 1, 0, ccontext, mdata))
2471 			result = 0;
2472 		if (!run_invalid_utf32_test(current, total - 1, 1, ccontext, mdata))
2473 			result = 0;
2474 
2475 		if (result) {
2476 			successful++;
2477 		}
2478 
2479 		printf(".");
2480 		if ((total % 60) == 0)
2481 			printf("\n");
2482 	}
2483 
2484 	if ((total % 60) != 0)
2485 		printf("\n");
2486 
2487 	pcre2_match_data_free_32(mdata);
2488 	pcre2_compile_context_free_32(ccontext);
2489 
2490 	if (total == successful) {
2491 		printf("\nAll invalid UTF32 JIT regression tests are successfully passed.\n");
2492 		return 0;
2493 	} else {
2494 		printf("\nInvalid UTF32 successful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
2495 		return 1;
2496 	}
2497 }
2498 
2499 #else /* !SUPPORT_UNICODE || !SUPPORT_PCRE2_32 */
2500 
invalid_utf32_regression_tests(void)2501 static int invalid_utf32_regression_tests(void)
2502 {
2503 	return 0;
2504 }
2505 
2506 #endif /* SUPPORT_UNICODE && SUPPORT_PCRE2_32 */
2507 
2508 /* End of pcre2_jit_test.c */
2509