1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 This module by Zoltan Herczeg
10 Original API code Copyright (c) 1997-2012 University of Cambridge
11 New API code Copyright (c) 2016-2019 University of Cambridge
12
13 -----------------------------------------------------------------------------
14 Redistribution and use in source and binary forms, with or without
15 modification, are permitted provided that the following conditions are met:
16
17 * Redistributions of source code must retain the above copyright notice,
18 this list of conditions and the following disclaimer.
19
20 * Redistributions in binary form must reproduce the above copyright
21 notice, this list of conditions and the following disclaimer in the
22 documentation and/or other materials provided with the distribution.
23
24 * Neither the name of the University of Cambridge nor the names of its
25 contributors may be used to endorse or promote products derived from
26 this software without specific prior written permission.
27
28 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
29 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 POSSIBILITY OF SUCH DAMAGE.
39 -----------------------------------------------------------------------------
40 */
41
42 #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND)
43
44 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
jump_if_utf_char_start(struct sljit_compiler * compiler,sljit_s32 reg)45 static struct sljit_jump *jump_if_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg)
46 {
47 #if PCRE2_CODE_UNIT_WIDTH == 8
48 OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xc0);
49 return CMP(SLJIT_NOT_EQUAL, reg, 0, SLJIT_IMM, 0x80);
50 #elif PCRE2_CODE_UNIT_WIDTH == 16
51 OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xfc00);
52 return CMP(SLJIT_NOT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00);
53 #else
54 #error "Unknown code width"
55 #endif
56 }
57 #endif
58
character_to_int32(PCRE2_UCHAR chr)59 static sljit_s32 character_to_int32(PCRE2_UCHAR chr)
60 {
61 sljit_u32 value = chr;
62 #if PCRE2_CODE_UNIT_WIDTH == 8
63 #define SSE2_COMPARE_TYPE_INDEX 0
64 return (sljit_s32)((value << 24) | (value << 16) | (value << 8) | value);
65 #elif PCRE2_CODE_UNIT_WIDTH == 16
66 #define SSE2_COMPARE_TYPE_INDEX 1
67 return (sljit_s32)((value << 16) | value);
68 #elif PCRE2_CODE_UNIT_WIDTH == 32
69 #define SSE2_COMPARE_TYPE_INDEX 2
70 return (sljit_s32)(value);
71 #else
72 #error "Unsupported unit width"
73 #endif
74 }
75
load_from_mem_sse2(struct sljit_compiler * compiler,sljit_s32 dst_xmm_reg,sljit_s32 src_general_reg,sljit_s8 offset)76 static void load_from_mem_sse2(struct sljit_compiler *compiler, sljit_s32 dst_xmm_reg, sljit_s32 src_general_reg, sljit_s8 offset)
77 {
78 sljit_u8 instruction[5];
79
80 SLJIT_ASSERT(dst_xmm_reg < 8);
81 SLJIT_ASSERT(src_general_reg < 8);
82
83 /* MOVDQA xmm1, xmm2/m128 */
84 instruction[0] = ((sljit_u8)offset & 0xf) == 0 ? 0x66 : 0xf3;
85 instruction[1] = 0x0f;
86 instruction[2] = 0x6f;
87
88 if (offset == 0)
89 {
90 instruction[3] = (dst_xmm_reg << 3) | src_general_reg;
91 sljit_emit_op_custom(compiler, instruction, 4);
92 return;
93 }
94
95 instruction[3] = 0x40 | (dst_xmm_reg << 3) | src_general_reg;
96 instruction[4] = (sljit_u8)offset;
97 sljit_emit_op_custom(compiler, instruction, 5);
98 }
99
100 typedef enum {
101 sse2_compare_match1,
102 sse2_compare_match1i,
103 sse2_compare_match2,
104 } sse2_compare_type;
105
fast_forward_char_pair_sse2_compare(struct sljit_compiler * compiler,sse2_compare_type compare_type,int step,sljit_s32 dst_ind,sljit_s32 cmp1_ind,sljit_s32 cmp2_ind,sljit_s32 tmp_ind)106 static void fast_forward_char_pair_sse2_compare(struct sljit_compiler *compiler, sse2_compare_type compare_type,
107 int step, sljit_s32 dst_ind, sljit_s32 cmp1_ind, sljit_s32 cmp2_ind, sljit_s32 tmp_ind)
108 {
109 sljit_u8 instruction[4];
110 instruction[0] = 0x66;
111 instruction[1] = 0x0f;
112
113 SLJIT_ASSERT(step >= 0 && step <= 3);
114
115 if (compare_type != sse2_compare_match2)
116 {
117 if (step == 0)
118 {
119 if (compare_type == sse2_compare_match1i)
120 {
121 /* POR xmm1, xmm2/m128 */
122 /* instruction[0] = 0x66; */
123 /* instruction[1] = 0x0f; */
124 instruction[2] = 0xeb;
125 instruction[3] = 0xc0 | (dst_ind << 3) | cmp2_ind;
126 sljit_emit_op_custom(compiler, instruction, 4);
127 }
128 return;
129 }
130
131 if (step != 2)
132 return;
133
134 /* PCMPEQB/W/D xmm1, xmm2/m128 */
135 /* instruction[0] = 0x66; */
136 /* instruction[1] = 0x0f; */
137 instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
138 instruction[3] = 0xc0 | (dst_ind << 3) | cmp1_ind;
139 sljit_emit_op_custom(compiler, instruction, 4);
140 return;
141 }
142
143 switch (step)
144 {
145 case 0:
146 /* MOVDQA xmm1, xmm2/m128 */
147 /* instruction[0] = 0x66; */
148 /* instruction[1] = 0x0f; */
149 instruction[2] = 0x6f;
150 instruction[3] = 0xc0 | (tmp_ind << 3) | dst_ind;
151 sljit_emit_op_custom(compiler, instruction, 4);
152 return;
153
154 case 1:
155 /* PCMPEQB/W/D xmm1, xmm2/m128 */
156 /* instruction[0] = 0x66; */
157 /* instruction[1] = 0x0f; */
158 instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
159 instruction[3] = 0xc0 | (dst_ind << 3) | cmp1_ind;
160 sljit_emit_op_custom(compiler, instruction, 4);
161 return;
162
163 case 2:
164 /* PCMPEQB/W/D xmm1, xmm2/m128 */
165 /* instruction[0] = 0x66; */
166 /* instruction[1] = 0x0f; */
167 instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
168 instruction[3] = 0xc0 | (tmp_ind << 3) | cmp2_ind;
169 sljit_emit_op_custom(compiler, instruction, 4);
170 return;
171
172 case 3:
173 /* POR xmm1, xmm2/m128 */
174 /* instruction[0] = 0x66; */
175 /* instruction[1] = 0x0f; */
176 instruction[2] = 0xeb;
177 instruction[3] = 0xc0 | (dst_ind << 3) | tmp_ind;
178 sljit_emit_op_custom(compiler, instruction, 4);
179 return;
180 }
181 }
182
183 #define JIT_HAS_FAST_FORWARD_CHAR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_SSE2))
184
fast_forward_char_simd(compiler_common * common,PCRE2_UCHAR char1,PCRE2_UCHAR char2,sljit_s32 offset)185 static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
186 {
187 DEFINE_COMPILER;
188 struct sljit_label *start;
189 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
190 struct sljit_label *restart;
191 #endif
192 struct sljit_jump *quit;
193 struct sljit_jump *partial_quit[2];
194 sse2_compare_type compare_type = sse2_compare_match1;
195 sljit_u8 instruction[8];
196 sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1);
197 sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR);
198 sljit_s32 data_ind = 0;
199 sljit_s32 tmp_ind = 1;
200 sljit_s32 cmp1_ind = 2;
201 sljit_s32 cmp2_ind = 3;
202 sljit_u32 bit = 0;
203 int i;
204
205 SLJIT_UNUSED_ARG(offset);
206
207 if (char1 != char2)
208 {
209 bit = char1 ^ char2;
210 compare_type = sse2_compare_match1i;
211
212 if (!is_powerof2(bit))
213 {
214 bit = 0;
215 compare_type = sse2_compare_match2;
216 }
217 }
218
219 partial_quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
220 if (common->mode == PCRE2_JIT_COMPLETE)
221 add_jump(compiler, &common->failed_match, partial_quit[0]);
222
223 /* First part (unaligned start) */
224
225 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1 | bit));
226
227 SLJIT_ASSERT(tmp1_reg_ind < 8);
228
229 /* MOVD xmm, r/m32 */
230 instruction[0] = 0x66;
231 instruction[1] = 0x0f;
232 instruction[2] = 0x6e;
233 instruction[3] = 0xc0 | (cmp1_ind << 3) | tmp1_reg_ind;
234 sljit_emit_op_custom(compiler, instruction, 4);
235
236 if (char1 != char2)
237 {
238 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2));
239
240 /* MOVD xmm, r/m32 */
241 instruction[3] = 0xc0 | (cmp2_ind << 3) | tmp1_reg_ind;
242 sljit_emit_op_custom(compiler, instruction, 4);
243 }
244
245 OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
246
247 /* PSHUFD xmm1, xmm2/m128, imm8 */
248 /* instruction[0] = 0x66; */
249 /* instruction[1] = 0x0f; */
250 instruction[2] = 0x70;
251 instruction[3] = 0xc0 | (cmp1_ind << 3) | cmp1_ind;
252 instruction[4] = 0;
253 sljit_emit_op_custom(compiler, instruction, 5);
254
255 if (char1 != char2)
256 {
257 /* PSHUFD xmm1, xmm2/m128, imm8 */
258 instruction[3] = 0xc0 | (cmp2_ind << 3) | cmp2_ind;
259 sljit_emit_op_custom(compiler, instruction, 5);
260 }
261
262 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
263 restart = LABEL();
264 #endif
265 OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf);
266 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf);
267
268 load_from_mem_sse2(compiler, data_ind, str_ptr_reg_ind, 0);
269 for (i = 0; i < 4; i++)
270 fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
271
272 /* PMOVMSKB reg, xmm */
273 /* instruction[0] = 0x66; */
274 /* instruction[1] = 0x0f; */
275 instruction[2] = 0xd7;
276 instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | data_ind;
277 sljit_emit_op_custom(compiler, instruction, 4);
278
279 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
280 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
281
282 quit = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0);
283
284 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
285
286 /* Second part (aligned) */
287 start = LABEL();
288
289 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
290
291 partial_quit[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
292 if (common->mode == PCRE2_JIT_COMPLETE)
293 add_jump(compiler, &common->failed_match, partial_quit[1]);
294
295 load_from_mem_sse2(compiler, data_ind, str_ptr_reg_ind, 0);
296 for (i = 0; i < 4; i++)
297 fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
298
299 /* PMOVMSKB reg, xmm */
300 /* instruction[0] = 0x66; */
301 /* instruction[1] = 0x0f; */
302 instruction[2] = 0xd7;
303 instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | data_ind;
304 sljit_emit_op_custom(compiler, instruction, 4);
305
306 CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start);
307
308 JUMPHERE(quit);
309
310 /* BSF r32, r/m32 */
311 instruction[0] = 0x0f;
312 instruction[1] = 0xbc;
313 instruction[2] = 0xc0 | (tmp1_reg_ind << 3) | tmp1_reg_ind;
314 sljit_emit_op_custom(compiler, instruction, 3);
315
316 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
317
318 if (common->mode != PCRE2_JIT_COMPLETE)
319 {
320 JUMPHERE(partial_quit[0]);
321 JUMPHERE(partial_quit[1]);
322 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
323 CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
324 }
325 else
326 add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
327
328 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
329 if (common->utf && offset > 0)
330 {
331 SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
332
333 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
334
335 quit = jump_if_utf_char_start(compiler, TMP1);
336
337 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
338 add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
339 OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
340 JUMPTO(SLJIT_JUMP, restart);
341
342 JUMPHERE(quit);
343 }
344 #endif
345 }
346
347 #define JIT_HAS_FAST_REQUESTED_CHAR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_SSE2))
348
fast_requested_char_simd(compiler_common * common,PCRE2_UCHAR char1,PCRE2_UCHAR char2)349 static jump_list *fast_requested_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2)
350 {
351 DEFINE_COMPILER;
352 struct sljit_label *start;
353 struct sljit_jump *quit;
354 jump_list *not_found = NULL;
355 sse2_compare_type compare_type = sse2_compare_match1;
356 sljit_u8 instruction[8];
357 sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1);
358 sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR);
359 sljit_s32 data_ind = 0;
360 sljit_s32 tmp_ind = 1;
361 sljit_s32 cmp1_ind = 2;
362 sljit_s32 cmp2_ind = 3;
363 sljit_u32 bit = 0;
364 int i;
365
366 if (char1 != char2)
367 {
368 bit = char1 ^ char2;
369 compare_type = sse2_compare_match1i;
370
371 if (!is_powerof2(bit))
372 {
373 bit = 0;
374 compare_type = sse2_compare_match2;
375 }
376 }
377
378 add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
379 OP1(SLJIT_MOV, TMP2, 0, TMP1, 0);
380 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
381
382 /* First part (unaligned start) */
383
384 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1 | bit));
385
386 SLJIT_ASSERT(tmp1_reg_ind < 8);
387
388 /* MOVD xmm, r/m32 */
389 instruction[0] = 0x66;
390 instruction[1] = 0x0f;
391 instruction[2] = 0x6e;
392 instruction[3] = 0xc0 | (cmp1_ind << 3) | tmp1_reg_ind;
393 sljit_emit_op_custom(compiler, instruction, 4);
394
395 if (char1 != char2)
396 {
397 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2));
398
399 /* MOVD xmm, r/m32 */
400 instruction[3] = 0xc0 | (cmp2_ind << 3) | tmp1_reg_ind;
401 sljit_emit_op_custom(compiler, instruction, 4);
402 }
403
404 OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0);
405
406 /* PSHUFD xmm1, xmm2/m128, imm8 */
407 /* instruction[0] = 0x66; */
408 /* instruction[1] = 0x0f; */
409 instruction[2] = 0x70;
410 instruction[3] = 0xc0 | (cmp1_ind << 3) | cmp1_ind;
411 instruction[4] = 0;
412 sljit_emit_op_custom(compiler, instruction, 5);
413
414 if (char1 != char2)
415 {
416 /* PSHUFD xmm1, xmm2/m128, imm8 */
417 instruction[3] = 0xc0 | (cmp2_ind << 3) | cmp2_ind;
418 sljit_emit_op_custom(compiler, instruction, 5);
419 }
420
421 OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf);
422 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf);
423
424 load_from_mem_sse2(compiler, data_ind, str_ptr_reg_ind, 0);
425 for (i = 0; i < 4; i++)
426 fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
427
428 /* PMOVMSKB reg, xmm */
429 /* instruction[0] = 0x66; */
430 /* instruction[1] = 0x0f; */
431 instruction[2] = 0xd7;
432 instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | data_ind;
433 sljit_emit_op_custom(compiler, instruction, 4);
434
435 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
436 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
437
438 quit = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0);
439
440 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
441
442 /* Second part (aligned) */
443 start = LABEL();
444
445 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
446
447 add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
448
449 load_from_mem_sse2(compiler, data_ind, str_ptr_reg_ind, 0);
450 for (i = 0; i < 4; i++)
451 fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
452
453 /* PMOVMSKB reg, xmm */
454 /* instruction[0] = 0x66; */
455 /* instruction[1] = 0x0f; */
456 instruction[2] = 0xd7;
457 instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | data_ind;
458 sljit_emit_op_custom(compiler, instruction, 4);
459
460 CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start);
461
462 JUMPHERE(quit);
463
464 /* BSF r32, r/m32 */
465 instruction[0] = 0x0f;
466 instruction[1] = 0xbc;
467 instruction[2] = 0xc0 | (tmp1_reg_ind << 3) | tmp1_reg_ind;
468 sljit_emit_op_custom(compiler, instruction, 3);
469
470 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, STR_PTR, 0);
471 add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
472
473 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
474 return not_found;
475 }
476
477 #ifndef _WIN64
478
max_fast_forward_char_pair_offset(void)479 static SLJIT_INLINE sljit_u32 max_fast_forward_char_pair_offset(void)
480 {
481 #if PCRE2_CODE_UNIT_WIDTH == 8
482 return 15;
483 #elif PCRE2_CODE_UNIT_WIDTH == 16
484 return 7;
485 #elif PCRE2_CODE_UNIT_WIDTH == 32
486 return 3;
487 #else
488 #error "Unsupported unit width"
489 #endif
490 }
491
492 #define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_SSE2))
493
fast_forward_char_pair_simd(compiler_common * common,sljit_s32 offs1,PCRE2_UCHAR char1a,PCRE2_UCHAR char1b,sljit_s32 offs2,PCRE2_UCHAR char2a,PCRE2_UCHAR char2b)494 static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1,
495 PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b)
496 {
497 DEFINE_COMPILER;
498 sse2_compare_type compare1_type = sse2_compare_match1;
499 sse2_compare_type compare2_type = sse2_compare_match1;
500 sljit_u32 bit1 = 0;
501 sljit_u32 bit2 = 0;
502 sljit_u32 diff = IN_UCHARS(offs1 - offs2);
503 sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1);
504 sljit_s32 tmp2_reg_ind = sljit_get_register_index(TMP2);
505 sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR);
506 sljit_s32 data1_ind = 0;
507 sljit_s32 data2_ind = 1;
508 sljit_s32 tmp1_ind = 2;
509 sljit_s32 tmp2_ind = 3;
510 sljit_s32 cmp1a_ind = 4;
511 sljit_s32 cmp1b_ind = 5;
512 sljit_s32 cmp2a_ind = 6;
513 sljit_s32 cmp2b_ind = 7;
514 struct sljit_label *start;
515 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
516 struct sljit_label *restart;
517 #endif
518 struct sljit_jump *jump[2];
519 sljit_u8 instruction[8];
520 int i;
521
522 SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2);
523 SLJIT_ASSERT(diff <= IN_UCHARS(max_fast_forward_char_pair_offset()));
524 SLJIT_ASSERT(tmp1_reg_ind < 8 && tmp2_reg_ind == 1);
525
526 /* Initialize. */
527 if (common->match_end_ptr != 0)
528 {
529 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
530 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
531 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1));
532
533 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, STR_END, 0);
534 CMOV(SLJIT_LESS, STR_END, TMP1, 0);
535 }
536
537 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
538 add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
539
540 /* MOVD xmm, r/m32 */
541 instruction[0] = 0x66;
542 instruction[1] = 0x0f;
543 instruction[2] = 0x6e;
544
545 if (char1a == char1b)
546 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a));
547 else
548 {
549 bit1 = char1a ^ char1b;
550 if (is_powerof2(bit1))
551 {
552 compare1_type = sse2_compare_match1i;
553 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a | bit1));
554 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(bit1));
555 }
556 else
557 {
558 compare1_type = sse2_compare_match2;
559 bit1 = 0;
560 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a));
561 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(char1b));
562 }
563 }
564
565 instruction[3] = 0xc0 | (cmp1a_ind << 3) | tmp1_reg_ind;
566 sljit_emit_op_custom(compiler, instruction, 4);
567
568 if (char1a != char1b)
569 {
570 instruction[3] = 0xc0 | (cmp1b_ind << 3) | tmp2_reg_ind;
571 sljit_emit_op_custom(compiler, instruction, 4);
572 }
573
574 if (char2a == char2b)
575 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a));
576 else
577 {
578 bit2 = char2a ^ char2b;
579 if (is_powerof2(bit2))
580 {
581 compare2_type = sse2_compare_match1i;
582 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a | bit2));
583 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(bit2));
584 }
585 else
586 {
587 compare2_type = sse2_compare_match2;
588 bit2 = 0;
589 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a));
590 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(char2b));
591 }
592 }
593
594 instruction[3] = 0xc0 | (cmp2a_ind << 3) | tmp1_reg_ind;
595 sljit_emit_op_custom(compiler, instruction, 4);
596
597 if (char2a != char2b)
598 {
599 instruction[3] = 0xc0 | (cmp2b_ind << 3) | tmp2_reg_ind;
600 sljit_emit_op_custom(compiler, instruction, 4);
601 }
602
603 /* PSHUFD xmm1, xmm2/m128, imm8 */
604 /* instruction[0] = 0x66; */
605 /* instruction[1] = 0x0f; */
606 instruction[2] = 0x70;
607 instruction[4] = 0;
608
609 instruction[3] = 0xc0 | (cmp1a_ind << 3) | cmp1a_ind;
610 sljit_emit_op_custom(compiler, instruction, 5);
611
612 if (char1a != char1b)
613 {
614 instruction[3] = 0xc0 | (cmp1b_ind << 3) | cmp1b_ind;
615 sljit_emit_op_custom(compiler, instruction, 5);
616 }
617
618 instruction[3] = 0xc0 | (cmp2a_ind << 3) | cmp2a_ind;
619 sljit_emit_op_custom(compiler, instruction, 5);
620
621 if (char2a != char2b)
622 {
623 instruction[3] = 0xc0 | (cmp2b_ind << 3) | cmp2b_ind;
624 sljit_emit_op_custom(compiler, instruction, 5);
625 }
626
627 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
628 restart = LABEL();
629 #endif
630
631 OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, diff);
632 OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
633 OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf);
634
635 load_from_mem_sse2(compiler, data1_ind, str_ptr_reg_ind, 0);
636
637 jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_PTR, 0);
638
639 load_from_mem_sse2(compiler, data2_ind, str_ptr_reg_ind, -(sljit_s8)diff);
640 jump[1] = JUMP(SLJIT_JUMP);
641
642 JUMPHERE(jump[0]);
643
644 /* MOVDQA xmm1, xmm2/m128 */
645 /* instruction[0] = 0x66; */
646 /* instruction[1] = 0x0f; */
647 instruction[2] = 0x6f;
648 instruction[3] = 0xc0 | (data2_ind << 3) | data1_ind;
649 sljit_emit_op_custom(compiler, instruction, 4);
650
651 /* PSLLDQ xmm1, imm8 */
652 /* instruction[0] = 0x66; */
653 /* instruction[1] = 0x0f; */
654 instruction[2] = 0x73;
655 instruction[3] = 0xc0 | (7 << 3) | data2_ind;
656 instruction[4] = diff;
657 sljit_emit_op_custom(compiler, instruction, 5);
658
659 JUMPHERE(jump[1]);
660
661 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf);
662
663 for (i = 0; i < 4; i++)
664 {
665 fast_forward_char_pair_sse2_compare(compiler, compare2_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp2_ind);
666 fast_forward_char_pair_sse2_compare(compiler, compare1_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind);
667 }
668
669 /* PAND xmm1, xmm2/m128 */
670 /* instruction[0] = 0x66; */
671 /* instruction[1] = 0x0f; */
672 instruction[2] = 0xdb;
673 instruction[3] = 0xc0 | (data1_ind << 3) | data2_ind;
674 sljit_emit_op_custom(compiler, instruction, 4);
675
676 /* PMOVMSKB reg, xmm */
677 /* instruction[0] = 0x66; */
678 /* instruction[1] = 0x0f; */
679 instruction[2] = 0xd7;
680 instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | 0;
681 sljit_emit_op_custom(compiler, instruction, 4);
682
683 /* Ignore matches before the first STR_PTR. */
684 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
685 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
686
687 jump[0] = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0);
688
689 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
690
691 /* Main loop. */
692 start = LABEL();
693
694 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
695 add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
696
697 load_from_mem_sse2(compiler, data1_ind, str_ptr_reg_ind, 0);
698 load_from_mem_sse2(compiler, data2_ind, str_ptr_reg_ind, -(sljit_s8)diff);
699
700 for (i = 0; i < 4; i++)
701 {
702 fast_forward_char_pair_sse2_compare(compiler, compare1_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp2_ind);
703 fast_forward_char_pair_sse2_compare(compiler, compare2_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp1_ind);
704 }
705
706 /* PAND xmm1, xmm2/m128 */
707 /* instruction[0] = 0x66; */
708 /* instruction[1] = 0x0f; */
709 instruction[2] = 0xdb;
710 instruction[3] = 0xc0 | (data1_ind << 3) | data2_ind;
711 sljit_emit_op_custom(compiler, instruction, 4);
712
713 /* PMOVMSKB reg, xmm */
714 /* instruction[0] = 0x66; */
715 /* instruction[1] = 0x0f; */
716 instruction[2] = 0xd7;
717 instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | 0;
718 sljit_emit_op_custom(compiler, instruction, 4);
719
720 CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start);
721
722 JUMPHERE(jump[0]);
723
724 /* BSF r32, r/m32 */
725 instruction[0] = 0x0f;
726 instruction[1] = 0xbc;
727 instruction[2] = 0xc0 | (tmp1_reg_ind << 3) | tmp1_reg_ind;
728 sljit_emit_op_custom(compiler, instruction, 3);
729
730 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
731
732 add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
733
734 if (common->match_end_ptr != 0)
735 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
736
737 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
738 if (common->utf)
739 {
740 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offs1));
741
742 jump[0] = jump_if_utf_char_start(compiler, TMP1);
743
744 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
745 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, restart);
746
747 add_jump(compiler, &common->failed_match, JUMP(SLJIT_JUMP));
748
749 JUMPHERE(jump[0]);
750 }
751 #endif
752
753 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
754
755 if (common->match_end_ptr != 0)
756 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
757 }
758
759 #endif /* !_WIN64 */
760
761 #undef SSE2_COMPARE_TYPE_INDEX
762
763 #endif /* SLJIT_CONFIG_X86 && !SUPPORT_VALGRIND */
764
765 #if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64 && (defined __ARM_NEON || defined __ARM_NEON__))
766
767 #include <arm_neon.h>
768
769 typedef union {
770 unsigned int x;
771 struct { unsigned char c1, c2, c3, c4; } c;
772 } int_char;
773
774 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
utf_continue(sljit_u8 * s)775 static SLJIT_INLINE int utf_continue(sljit_u8 *s)
776 {
777 #if PCRE2_CODE_UNIT_WIDTH == 8
778 return (*s & 0xc0) == 0x80;
779 #elif PCRE2_CODE_UNIT_WIDTH == 16
780 return (*s & 0xfc00) == 0xdc00;
781 #else
782 #error "Unknown code width"
783 #endif
784 }
785 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
786
787 #if PCRE2_CODE_UNIT_WIDTH == 8
788 # define VECTOR_FACTOR 16
789 # define vect_t uint8x16_t
790 # define VLD1Q(X) vld1q_u8((sljit_u8 *)(X))
791 # define VCEQQ vceqq_u8
792 # define VORRQ vorrq_u8
793 # define VST1Q vst1q_u8
794 # define VDUPQ vdupq_n_u8
795 # define VEXTQ vextq_u8
796 # define VANDQ vandq_u8
797 typedef union {
798 uint8_t mem[16];
799 uint64_t dw[2];
800 } quad_word;
801 #elif PCRE2_CODE_UNIT_WIDTH == 16
802 # define VECTOR_FACTOR 8
803 # define vect_t uint16x8_t
804 # define VLD1Q(X) vld1q_u16((sljit_u16 *)(X))
805 # define VCEQQ vceqq_u16
806 # define VORRQ vorrq_u16
807 # define VST1Q vst1q_u16
808 # define VDUPQ vdupq_n_u16
809 # define VEXTQ vextq_u16
810 # define VANDQ vandq_u16
811 typedef union {
812 uint16_t mem[8];
813 uint64_t dw[2];
814 } quad_word;
815 #else
816 # define VECTOR_FACTOR 4
817 # define vect_t uint32x4_t
818 # define VLD1Q(X) vld1q_u32((sljit_u32 *)(X))
819 # define VCEQQ vceqq_u32
820 # define VORRQ vorrq_u32
821 # define VST1Q vst1q_u32
822 # define VDUPQ vdupq_n_u32
823 # define VEXTQ vextq_u32
824 # define VANDQ vandq_u32
825 typedef union {
826 uint32_t mem[4];
827 uint64_t dw[2];
828 } quad_word;
829 #endif
830
831 #define FFCS
832 #include "pcre2_jit_neon_inc.h"
833 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
834 # define FF_UTF
835 # include "pcre2_jit_neon_inc.h"
836 # undef FF_UTF
837 #endif
838 #undef FFCS
839
840 #define FFCS_2
841 #include "pcre2_jit_neon_inc.h"
842 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
843 # define FF_UTF
844 # include "pcre2_jit_neon_inc.h"
845 # undef FF_UTF
846 #endif
847 #undef FFCS_2
848
849 #define FFCS_MASK
850 #include "pcre2_jit_neon_inc.h"
851 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
852 # define FF_UTF
853 # include "pcre2_jit_neon_inc.h"
854 # undef FF_UTF
855 #endif
856 #undef FFCS_MASK
857
858 #define JIT_HAS_FAST_FORWARD_CHAR_SIMD 1
859
fast_forward_char_simd(compiler_common * common,PCRE2_UCHAR char1,PCRE2_UCHAR char2,sljit_s32 offset)860 static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
861 {
862 DEFINE_COMPILER;
863 int_char ic;
864 struct sljit_jump *partial_quit;
865 /* Save temporary registers. */
866 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0);
867 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP3, 0);
868
869 /* Prepare function arguments */
870 OP1(SLJIT_MOV, SLJIT_R0, 0, STR_END, 0);
871 OP1(SLJIT_MOV, SLJIT_R1, 0, STR_PTR, 0);
872 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, offset);
873
874 if (char1 == char2)
875 {
876 ic.c.c1 = char1;
877 ic.c.c2 = char2;
878 OP1(SLJIT_MOV, SLJIT_R4, 0, SLJIT_IMM, ic.x);
879
880 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
881 if (common->utf && offset > 0)
882 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
883 SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_utf));
884 else
885 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
886 SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs));
887 #else
888 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
889 SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs));
890 #endif
891 }
892 else
893 {
894 PCRE2_UCHAR mask = char1 ^ char2;
895 if (is_powerof2(mask))
896 {
897 ic.c.c1 = char1 | mask;
898 ic.c.c2 = mask;
899 OP1(SLJIT_MOV, SLJIT_R4, 0, SLJIT_IMM, ic.x);
900
901 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
902 if (common->utf && offset > 0)
903 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
904 SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_mask_utf));
905 else
906 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
907 SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_mask));
908 #else
909 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
910 SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_mask));
911 #endif
912 }
913 else
914 {
915 ic.c.c1 = char1;
916 ic.c.c2 = char2;
917 OP1(SLJIT_MOV, SLJIT_R4, 0, SLJIT_IMM, ic.x);
918
919 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
920 if (common->utf && offset > 0)
921 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
922 SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_2_utf));
923 else
924 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
925 SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_2));
926 #else
927 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
928 SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_2));
929 #endif
930 }
931 }
932 /* Restore registers. */
933 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
934 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
935
936 /* Check return value. */
937 partial_quit = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
938 if (common->mode == PCRE2_JIT_COMPLETE)
939 add_jump(compiler, &common->failed_match, partial_quit);
940
941 /* Fast forward STR_PTR to the result of memchr. */
942 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
943
944 if (common->mode != PCRE2_JIT_COMPLETE)
945 JUMPHERE(partial_quit);
946 }
947
948 typedef enum {
949 compare_match1,
950 compare_match1i,
951 compare_match2,
952 } compare_type;
953
fast_forward_char_pair_compare(compare_type ctype,vect_t dst,vect_t cmp1,vect_t cmp2)954 static inline vect_t fast_forward_char_pair_compare(compare_type ctype, vect_t dst, vect_t cmp1, vect_t cmp2)
955 {
956 if (ctype == compare_match2)
957 {
958 vect_t tmp = dst;
959 dst = VCEQQ(dst, cmp1);
960 tmp = VCEQQ(tmp, cmp2);
961 dst = VORRQ(dst, tmp);
962 return dst;
963 }
964
965 if (ctype == compare_match1i)
966 dst = VORRQ(dst, cmp2);
967 dst = VCEQQ(dst, cmp1);
968 return dst;
969 }
970
max_fast_forward_char_pair_offset(void)971 static SLJIT_INLINE sljit_u32 max_fast_forward_char_pair_offset(void)
972 {
973 #if PCRE2_CODE_UNIT_WIDTH == 8
974 return 15;
975 #elif PCRE2_CODE_UNIT_WIDTH == 16
976 return 7;
977 #elif PCRE2_CODE_UNIT_WIDTH == 32
978 return 3;
979 #else
980 #error "Unsupported unit width"
981 #endif
982 }
983
984 /* ARM doesn't have a shift left across lanes. */
shift_left_n_lanes(vect_t a,sljit_u8 n)985 static SLJIT_INLINE vect_t shift_left_n_lanes(vect_t a, sljit_u8 n)
986 {
987 vect_t zero = VDUPQ(0);
988 SLJIT_ASSERT(0 < n && n < VECTOR_FACTOR);
989 /* VEXTQ takes an immediate as last argument. */
990 #define C(X) case X: return VEXTQ(zero, a, VECTOR_FACTOR - X);
991 switch (n)
992 {
993 C(1); C(2); C(3);
994 #if PCRE2_CODE_UNIT_WIDTH != 32
995 C(4); C(5); C(6); C(7);
996 # if PCRE2_CODE_UNIT_WIDTH != 16
997 C(8); C(9); C(10); C(11); C(12); C(13); C(14); C(15);
998 # endif
999 #endif
1000 default:
1001 /* Based on the ASSERT(0 < n && n < VECTOR_FACTOR) above, this won't
1002 happen. The return is still here for compilers to not warn. */
1003 return a;
1004 }
1005 }
1006
1007 #define FFCPS
1008 #define FFCPS_DIFF1
1009 #define FFCPS_CHAR1A2A
1010
1011 #define FFCPS_0
1012 #include "pcre2_jit_neon_inc.h"
1013 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1014 # define FF_UTF
1015 # include "pcre2_jit_neon_inc.h"
1016 # undef FF_UTF
1017 #endif
1018 #undef FFCPS_0
1019
1020 #undef FFCPS_CHAR1A2A
1021
1022 #define FFCPS_1
1023 #include "pcre2_jit_neon_inc.h"
1024 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1025 # define FF_UTF
1026 # include "pcre2_jit_neon_inc.h"
1027 # undef FF_UTF
1028 #endif
1029 #undef FFCPS_1
1030
1031 #undef FFCPS_DIFF1
1032
1033 #define FFCPS_DEFAULT
1034 #include "pcre2_jit_neon_inc.h"
1035 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1036 # define FF_UTF
1037 # include "pcre2_jit_neon_inc.h"
1038 # undef FF_UTF
1039 #endif
1040 #undef FFCPS
1041
1042 #define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD 1
1043
fast_forward_char_pair_simd(compiler_common * common,sljit_s32 offs1,PCRE2_UCHAR char1a,PCRE2_UCHAR char1b,sljit_s32 offs2,PCRE2_UCHAR char2a,PCRE2_UCHAR char2b)1044 static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1,
1045 PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b)
1046 {
1047 DEFINE_COMPILER;
1048 sljit_u32 diff = IN_UCHARS(offs1 - offs2);
1049 struct sljit_jump *partial_quit;
1050 int_char ic;
1051 SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2);
1052 SLJIT_ASSERT(diff <= IN_UCHARS(max_fast_forward_char_pair_offset()));
1053 SLJIT_ASSERT(compiler->scratches == 5);
1054
1055 /* Save temporary register STR_PTR. */
1056 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0);
1057
1058 /* Prepare arguments for the function call. */
1059 if (common->match_end_ptr == 0)
1060 OP1(SLJIT_MOV, SLJIT_R0, 0, STR_END, 0);
1061 else
1062 {
1063 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
1064 OP2(SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1));
1065
1066 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, STR_END, 0, SLJIT_R0, 0);
1067 CMOV(SLJIT_LESS, SLJIT_R0, STR_END, 0);
1068 }
1069
1070 OP1(SLJIT_MOV, SLJIT_R1, 0, STR_PTR, 0);
1071 OP1(SLJIT_MOV_S32, SLJIT_R2, 0, SLJIT_IMM, offs1);
1072 OP1(SLJIT_MOV_S32, SLJIT_R3, 0, SLJIT_IMM, offs2);
1073 ic.c.c1 = char1a;
1074 ic.c.c2 = char1b;
1075 ic.c.c3 = char2a;
1076 ic.c.c4 = char2b;
1077 OP1(SLJIT_MOV_U32, SLJIT_R4, 0, SLJIT_IMM, ic.x);
1078
1079 if (diff == 1) {
1080 if (char1a == char1b && char2a == char2b) {
1081 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1082 if (common->utf)
1083 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW),
1084 SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcps_0_utf));
1085 else
1086 #endif
1087 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW),
1088 SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcps_0));
1089 } else {
1090 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1091 if (common->utf)
1092 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW),
1093 SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcps_1_utf));
1094 else
1095 #endif
1096 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW),
1097 SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcps_1));
1098 }
1099 } else {
1100 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1101 if (common->utf)
1102 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW),
1103 SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcps_default_utf));
1104 else
1105 #endif
1106 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW),
1107 SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcps_default));
1108 }
1109
1110 /* Restore STR_PTR register. */
1111 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
1112
1113 /* Check return value. */
1114 partial_quit = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
1115 add_jump(compiler, &common->failed_match, partial_quit);
1116
1117 /* Fast forward STR_PTR to the result of memchr. */
1118 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
1119
1120 JUMPHERE(partial_quit);
1121 }
1122
1123 #endif /* SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64 */
1124