1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 This module by Zoltan Herczeg
10 Original API code Copyright (c) 1997-2012 University of Cambridge
11 New API code Copyright (c) 2016-2019 University of Cambridge
12
13 -----------------------------------------------------------------------------
14 Redistribution and use in source and binary forms, with or without
15 modification, are permitted provided that the following conditions are met:
16
17 * Redistributions of source code must retain the above copyright notice,
18 this list of conditions and the following disclaimer.
19
20 * Redistributions in binary form must reproduce the above copyright
21 notice, this list of conditions and the following disclaimer in the
22 documentation and/or other materials provided with the distribution.
23
24 * Neither the name of the University of Cambridge nor the names of its
25 contributors may be used to endorse or promote products derived from
26 this software without specific prior written permission.
27
28 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
29 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 POSSIBILITY OF SUCH DAMAGE.
39 -----------------------------------------------------------------------------
40 */
41
42 #ifdef HAVE_CONFIG_H
43 #include "config.h"
44 #endif
45
46 #include "pcre2_internal.h"
47
48 #ifdef SUPPORT_JIT
49
50 /* All-in-one: Since we use the JIT compiler only from here,
51 we just include it. This way we don't need to touch the build
52 system files. */
53
54 #define SLJIT_CONFIG_AUTO 1
55 #define SLJIT_CONFIG_STATIC 1
56 #define SLJIT_VERBOSE 0
57
58 #ifdef PCRE2_DEBUG
59 #define SLJIT_DEBUG 1
60 #else
61 #define SLJIT_DEBUG 0
62 #endif
63
64 #define SLJIT_MALLOC(size, allocator_data) pcre2_jit_malloc(size, allocator_data)
65 #define SLJIT_FREE(ptr, allocator_data) pcre2_jit_free(ptr, allocator_data)
66
pcre2_jit_malloc(size_t size,void * allocator_data)67 static void * pcre2_jit_malloc(size_t size, void *allocator_data)
68 {
69 pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
70 return allocator->malloc(size, allocator->memory_data);
71 }
72
pcre2_jit_free(void * ptr,void * allocator_data)73 static void pcre2_jit_free(void *ptr, void *allocator_data)
74 {
75 pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
76 allocator->free(ptr, allocator->memory_data);
77 }
78
79 #include "sljit/sljitLir.c"
80
81 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
82 #error Unsupported architecture
83 #endif
84
85 /* Defines for debugging purposes. */
86
87 /* 1 - Use unoptimized capturing brackets.
88 2 - Enable capture_last_ptr (includes option 1). */
89 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
90
91 /* 1 - Always have a control head. */
92 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
93
94 /* Allocate memory for the regex stack on the real machine stack.
95 Fast, but limited size. */
96 #define MACHINE_STACK_SIZE 32768
97
98 /* Growth rate for stack allocated by the OS. Should be the multiply
99 of page size. */
100 #define STACK_GROWTH_RATE 8192
101
102 /* Enable to check that the allocation could destroy temporaries. */
103 #if defined SLJIT_DEBUG && SLJIT_DEBUG
104 #define DESTROY_REGISTERS 1
105 #endif
106
107 /*
108 Short summary about the backtracking mechanism empolyed by the jit code generator:
109
110 The code generator follows the recursive nature of the PERL compatible regular
111 expressions. The basic blocks of regular expressions are condition checkers
112 whose execute different commands depending on the result of the condition check.
113 The relationship between the operators can be horizontal (concatenation) and
114 vertical (sub-expression) (See struct backtrack_common for more details).
115
116 'ab' - 'a' and 'b' regexps are concatenated
117 'a+' - 'a' is the sub-expression of the '+' operator
118
119 The condition checkers are boolean (true/false) checkers. Machine code is generated
120 for the checker itself and for the actions depending on the result of the checker.
121 The 'true' case is called as the matching path (expected path), and the other is called as
122 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
123 branches on the matching path.
124
125 Greedy star operator (*) :
126 Matching path: match happens.
127 Backtrack path: match failed.
128 Non-greedy star operator (*?) :
129 Matching path: no need to perform a match.
130 Backtrack path: match is required.
131
132 The following example shows how the code generated for a capturing bracket
133 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
134 we have the following regular expression:
135
136 A(B|C)D
137
138 The generated code will be the following:
139
140 A matching path
141 '(' matching path (pushing arguments to the stack)
142 B matching path
143 ')' matching path (pushing arguments to the stack)
144 D matching path
145 return with successful match
146
147 D backtrack path
148 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
149 B backtrack path
150 C expected path
151 jump to D matching path
152 C backtrack path
153 A backtrack path
154
155 Notice, that the order of backtrack code paths are the opposite of the fast
156 code paths. In this way the topmost value on the stack is always belong
157 to the current backtrack code path. The backtrack path must check
158 whether there is a next alternative. If so, it needs to jump back to
159 the matching path eventually. Otherwise it needs to clear out its own stack
160 frame and continue the execution on the backtrack code paths.
161 */
162
163 /*
164 Saved stack frames:
165
166 Atomic blocks and asserts require reloading the values of private data
167 when the backtrack mechanism performed. Because of OP_RECURSE, the data
168 are not necessarly known in compile time, thus we need a dynamic restore
169 mechanism.
170
171 The stack frames are stored in a chain list, and have the following format:
172 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
173
174 Thus we can restore the private data to a particular point in the stack.
175 */
176
177 typedef struct jit_arguments {
178 /* Pointers first. */
179 struct sljit_stack *stack;
180 PCRE2_SPTR str;
181 PCRE2_SPTR begin;
182 PCRE2_SPTR end;
183 pcre2_match_data *match_data;
184 PCRE2_SPTR startchar_ptr;
185 PCRE2_UCHAR *mark_ptr;
186 int (*callout)(pcre2_callout_block *, void *);
187 void *callout_data;
188 /* Everything else after. */
189 sljit_uw offset_limit;
190 sljit_u32 limit_match;
191 sljit_u32 oveccount;
192 sljit_u32 options;
193 } jit_arguments;
194
195 #define JIT_NUMBER_OF_COMPILE_MODES 3
196
197 typedef struct executable_functions {
198 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
199 void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
200 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
201 sljit_u32 top_bracket;
202 sljit_u32 limit_match;
203 } executable_functions;
204
205 typedef struct jump_list {
206 struct sljit_jump *jump;
207 struct jump_list *next;
208 } jump_list;
209
210 typedef struct stub_list {
211 struct sljit_jump *start;
212 struct sljit_label *quit;
213 struct stub_list *next;
214 } stub_list;
215
216 enum frame_types {
217 no_frame = -1,
218 no_stack = -2
219 };
220
221 enum control_types {
222 type_mark = 0,
223 type_then_trap = 1
224 };
225
226 enum early_fail_types {
227 type_skip = 0,
228 type_fail = 1,
229 type_fail_range = 2
230 };
231
232 typedef int (SLJIT_FUNC *jit_function)(jit_arguments *args);
233
234 /* The following structure is the key data type for the recursive
235 code generator. It is allocated by compile_matchingpath, and contains
236 the arguments for compile_backtrackingpath. Must be the first member
237 of its descendants. */
238 typedef struct backtrack_common {
239 /* Concatenation stack. */
240 struct backtrack_common *prev;
241 jump_list *nextbacktracks;
242 /* Internal stack (for component operators). */
243 struct backtrack_common *top;
244 jump_list *topbacktracks;
245 /* Opcode pointer. */
246 PCRE2_SPTR cc;
247 } backtrack_common;
248
249 typedef struct assert_backtrack {
250 backtrack_common common;
251 jump_list *condfailed;
252 /* Less than 0 if a frame is not needed. */
253 int framesize;
254 /* Points to our private memory word on the stack. */
255 int private_data_ptr;
256 /* For iterators. */
257 struct sljit_label *matchingpath;
258 } assert_backtrack;
259
260 typedef struct bracket_backtrack {
261 backtrack_common common;
262 /* Where to coninue if an alternative is successfully matched. */
263 struct sljit_label *alternative_matchingpath;
264 /* For rmin and rmax iterators. */
265 struct sljit_label *recursive_matchingpath;
266 /* For greedy ? operator. */
267 struct sljit_label *zero_matchingpath;
268 /* Contains the branches of a failed condition. */
269 union {
270 /* Both for OP_COND, OP_SCOND. */
271 jump_list *condfailed;
272 assert_backtrack *assert;
273 /* For OP_ONCE. Less than 0 if not needed. */
274 int framesize;
275 /* For brackets with >3 alternatives. */
276 struct sljit_put_label *matching_put_label;
277 } u;
278 /* Points to our private memory word on the stack. */
279 int private_data_ptr;
280 } bracket_backtrack;
281
282 typedef struct bracketpos_backtrack {
283 backtrack_common common;
284 /* Points to our private memory word on the stack. */
285 int private_data_ptr;
286 /* Reverting stack is needed. */
287 int framesize;
288 /* Allocated stack size. */
289 int stacksize;
290 } bracketpos_backtrack;
291
292 typedef struct braminzero_backtrack {
293 backtrack_common common;
294 struct sljit_label *matchingpath;
295 } braminzero_backtrack;
296
297 typedef struct char_iterator_backtrack {
298 backtrack_common common;
299 /* Next iteration. */
300 struct sljit_label *matchingpath;
301 union {
302 jump_list *backtracks;
303 struct {
304 unsigned int othercasebit;
305 PCRE2_UCHAR chr;
306 BOOL enabled;
307 } charpos;
308 } u;
309 } char_iterator_backtrack;
310
311 typedef struct ref_iterator_backtrack {
312 backtrack_common common;
313 /* Next iteration. */
314 struct sljit_label *matchingpath;
315 } ref_iterator_backtrack;
316
317 typedef struct recurse_entry {
318 struct recurse_entry *next;
319 /* Contains the function entry label. */
320 struct sljit_label *entry_label;
321 /* Contains the function entry label. */
322 struct sljit_label *backtrack_label;
323 /* Collects the entry calls until the function is not created. */
324 jump_list *entry_calls;
325 /* Collects the backtrack calls until the function is not created. */
326 jump_list *backtrack_calls;
327 /* Points to the starting opcode. */
328 sljit_sw start;
329 } recurse_entry;
330
331 typedef struct recurse_backtrack {
332 backtrack_common common;
333 /* Return to the matching path. */
334 struct sljit_label *matchingpath;
335 /* Recursive pattern. */
336 recurse_entry *entry;
337 /* Pattern is inlined. */
338 BOOL inlined_pattern;
339 } recurse_backtrack;
340
341 #define OP_THEN_TRAP OP_TABLE_LENGTH
342
343 typedef struct then_trap_backtrack {
344 backtrack_common common;
345 /* If then_trap is not NULL, this structure contains the real
346 then_trap for the backtracking path. */
347 struct then_trap_backtrack *then_trap;
348 /* Points to the starting opcode. */
349 sljit_sw start;
350 /* Exit point for the then opcodes of this alternative. */
351 jump_list *quit;
352 /* Frame size of the current alternative. */
353 int framesize;
354 } then_trap_backtrack;
355
356 #define MAX_N_CHARS 12
357 #define MAX_DIFF_CHARS 5
358
359 typedef struct fast_forward_char_data {
360 /* Number of characters in the chars array, 255 for any character. */
361 sljit_u8 count;
362 /* Number of last UTF-8 characters in the chars array. */
363 sljit_u8 last_count;
364 /* Available characters in the current position. */
365 PCRE2_UCHAR chars[MAX_DIFF_CHARS];
366 } fast_forward_char_data;
367
368 #define MAX_CLASS_RANGE_SIZE 4
369 #define MAX_CLASS_CHARS_SIZE 3
370
371 typedef struct compiler_common {
372 /* The sljit ceneric compiler. */
373 struct sljit_compiler *compiler;
374 /* Compiled regular expression. */
375 pcre2_real_code *re;
376 /* First byte code. */
377 PCRE2_SPTR start;
378 /* Maps private data offset to each opcode. */
379 sljit_s32 *private_data_ptrs;
380 /* Chain list of read-only data ptrs. */
381 void *read_only_data_head;
382 /* Tells whether the capturing bracket is optimized. */
383 sljit_u8 *optimized_cbracket;
384 /* Tells whether the starting offset is a target of then. */
385 sljit_u8 *then_offsets;
386 /* Current position where a THEN must jump. */
387 then_trap_backtrack *then_trap;
388 /* Starting offset of private data for capturing brackets. */
389 sljit_s32 cbra_ptr;
390 /* Output vector starting point. Must be divisible by 2. */
391 sljit_s32 ovector_start;
392 /* Points to the starting character of the current match. */
393 sljit_s32 start_ptr;
394 /* Last known position of the requested byte. */
395 sljit_s32 req_char_ptr;
396 /* Head of the last recursion. */
397 sljit_s32 recursive_head_ptr;
398 /* First inspected character for partial matching.
399 (Needed for avoiding zero length partial matches.) */
400 sljit_s32 start_used_ptr;
401 /* Starting pointer for partial soft matches. */
402 sljit_s32 hit_start;
403 /* Pointer of the match end position. */
404 sljit_s32 match_end_ptr;
405 /* Points to the marked string. */
406 sljit_s32 mark_ptr;
407 /* Recursive control verb management chain. */
408 sljit_s32 control_head_ptr;
409 /* Points to the last matched capture block index. */
410 sljit_s32 capture_last_ptr;
411 /* Fast forward skipping byte code pointer. */
412 PCRE2_SPTR fast_forward_bc_ptr;
413 /* Locals used by fast fail optimization. */
414 sljit_s32 early_fail_start_ptr;
415 sljit_s32 early_fail_end_ptr;
416 /* Variables used by recursive call generator. */
417 sljit_s32 recurse_bitset_size;
418 uint8_t *recurse_bitset;
419
420 /* Flipped and lower case tables. */
421 const sljit_u8 *fcc;
422 sljit_sw lcc;
423 /* Mode can be PCRE2_JIT_COMPLETE and others. */
424 int mode;
425 /* TRUE, when empty match is accepted for partial matching. */
426 BOOL allow_empty_partial;
427 /* TRUE, when minlength is greater than 0. */
428 BOOL might_be_empty;
429 /* \K is found in the pattern. */
430 BOOL has_set_som;
431 /* (*SKIP:arg) is found in the pattern. */
432 BOOL has_skip_arg;
433 /* (*THEN) is found in the pattern. */
434 BOOL has_then;
435 /* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */
436 BOOL has_skip_in_assert_back;
437 /* Quit is redirected by recurse, negative assertion, or positive assertion in conditional block. */
438 BOOL local_quit_available;
439 /* Currently in a positive assertion. */
440 BOOL in_positive_assertion;
441 /* Newline control. */
442 int nltype;
443 sljit_u32 nlmax;
444 sljit_u32 nlmin;
445 int newline;
446 int bsr_nltype;
447 sljit_u32 bsr_nlmax;
448 sljit_u32 bsr_nlmin;
449 /* Dollar endonly. */
450 int endonly;
451 /* Tables. */
452 sljit_sw ctypes;
453 /* Named capturing brackets. */
454 PCRE2_SPTR name_table;
455 sljit_sw name_count;
456 sljit_sw name_entry_size;
457
458 /* Labels and jump lists. */
459 struct sljit_label *partialmatchlabel;
460 struct sljit_label *quit_label;
461 struct sljit_label *abort_label;
462 struct sljit_label *accept_label;
463 struct sljit_label *ff_newline_shortcut;
464 stub_list *stubs;
465 recurse_entry *entries;
466 recurse_entry *currententry;
467 jump_list *partialmatch;
468 jump_list *quit;
469 jump_list *positive_assertion_quit;
470 jump_list *abort;
471 jump_list *failed_match;
472 jump_list *accept;
473 jump_list *calllimit;
474 jump_list *stackalloc;
475 jump_list *revertframes;
476 jump_list *wordboundary;
477 jump_list *anynewline;
478 jump_list *hspace;
479 jump_list *vspace;
480 jump_list *casefulcmp;
481 jump_list *caselesscmp;
482 jump_list *reset_match;
483 BOOL unset_backref;
484 BOOL alt_circumflex;
485 #ifdef SUPPORT_UNICODE
486 BOOL utf;
487 BOOL invalid_utf;
488 BOOL ucp;
489 /* Points to saving area for iref. */
490 sljit_s32 iref_ptr;
491 jump_list *getucd;
492 jump_list *getucdtype;
493 #if PCRE2_CODE_UNIT_WIDTH == 8
494 jump_list *utfreadchar;
495 jump_list *utfreadtype8;
496 jump_list *utfpeakcharback;
497 #endif
498 #if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
499 jump_list *utfreadchar_invalid;
500 jump_list *utfreadnewline_invalid;
501 jump_list *utfmoveback_invalid;
502 jump_list *utfpeakcharback_invalid;
503 #endif
504 #endif /* SUPPORT_UNICODE */
505 } compiler_common;
506
507 /* For byte_sequence_compare. */
508
509 typedef struct compare_context {
510 int length;
511 int sourcereg;
512 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
513 int ucharptr;
514 union {
515 sljit_s32 asint;
516 sljit_u16 asushort;
517 #if PCRE2_CODE_UNIT_WIDTH == 8
518 sljit_u8 asbyte;
519 sljit_u8 asuchars[4];
520 #elif PCRE2_CODE_UNIT_WIDTH == 16
521 sljit_u16 asuchars[2];
522 #elif PCRE2_CODE_UNIT_WIDTH == 32
523 sljit_u32 asuchars[1];
524 #endif
525 } c;
526 union {
527 sljit_s32 asint;
528 sljit_u16 asushort;
529 #if PCRE2_CODE_UNIT_WIDTH == 8
530 sljit_u8 asbyte;
531 sljit_u8 asuchars[4];
532 #elif PCRE2_CODE_UNIT_WIDTH == 16
533 sljit_u16 asuchars[2];
534 #elif PCRE2_CODE_UNIT_WIDTH == 32
535 sljit_u32 asuchars[1];
536 #endif
537 } oc;
538 #endif
539 } compare_context;
540
541 /* Undefine sljit macros. */
542 #undef CMP
543
544 /* Used for accessing the elements of the stack. */
545 #define STACK(i) ((i) * (int)sizeof(sljit_sw))
546
547 #ifdef SLJIT_PREF_SHIFT_REG
548 #if SLJIT_PREF_SHIFT_REG == SLJIT_R2
549 /* Nothing. */
550 #elif SLJIT_PREF_SHIFT_REG == SLJIT_R3
551 #define SHIFT_REG_IS_R3
552 #else
553 #error "Unsupported shift register"
554 #endif
555 #endif
556
557 #define TMP1 SLJIT_R0
558 #ifdef SHIFT_REG_IS_R3
559 #define TMP2 SLJIT_R3
560 #define TMP3 SLJIT_R2
561 #else
562 #define TMP2 SLJIT_R2
563 #define TMP3 SLJIT_R3
564 #endif
565 #define STR_PTR SLJIT_R1
566 #define STR_END SLJIT_S0
567 #define STACK_TOP SLJIT_S1
568 #define STACK_LIMIT SLJIT_S2
569 #define COUNT_MATCH SLJIT_S3
570 #define ARGUMENTS SLJIT_S4
571 #define RETURN_ADDR SLJIT_R4
572
573 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
574 #define HAS_VIRTUAL_REGISTERS 1
575 #else
576 #define HAS_VIRTUAL_REGISTERS 0
577 #endif
578
579 /* Local space layout. */
580 /* These two locals can be used by the current opcode. */
581 #define LOCALS0 (0 * sizeof(sljit_sw))
582 #define LOCALS1 (1 * sizeof(sljit_sw))
583 /* Two local variables for possessive quantifiers (char1 cannot use them). */
584 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
585 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
586 /* Max limit of recursions. */
587 #define LIMIT_MATCH (4 * sizeof(sljit_sw))
588 /* The output vector is stored on the stack, and contains pointers
589 to characters. The vector data is divided into two groups: the first
590 group contains the start / end character pointers, and the second is
591 the start pointers when the end of the capturing group has not yet reached. */
592 #define OVECTOR_START (common->ovector_start)
593 #define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
594 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
595 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
596
597 #if PCRE2_CODE_UNIT_WIDTH == 8
598 #define MOV_UCHAR SLJIT_MOV_U8
599 #define IN_UCHARS(x) (x)
600 #elif PCRE2_CODE_UNIT_WIDTH == 16
601 #define MOV_UCHAR SLJIT_MOV_U16
602 #define UCHAR_SHIFT (1)
603 #define IN_UCHARS(x) ((x) * 2)
604 #elif PCRE2_CODE_UNIT_WIDTH == 32
605 #define MOV_UCHAR SLJIT_MOV_U32
606 #define UCHAR_SHIFT (2)
607 #define IN_UCHARS(x) ((x) * 4)
608 #else
609 #error Unsupported compiling mode
610 #endif
611
612 /* Shortcuts. */
613 #define DEFINE_COMPILER \
614 struct sljit_compiler *compiler = common->compiler
615 #define OP1(op, dst, dstw, src, srcw) \
616 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
617 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
618 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
619 #define OP_SRC(op, src, srcw) \
620 sljit_emit_op_src(compiler, (op), (src), (srcw))
621 #define LABEL() \
622 sljit_emit_label(compiler)
623 #define JUMP(type) \
624 sljit_emit_jump(compiler, (type))
625 #define JUMPTO(type, label) \
626 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
627 #define JUMPHERE(jump) \
628 sljit_set_label((jump), sljit_emit_label(compiler))
629 #define SET_LABEL(jump, label) \
630 sljit_set_label((jump), (label))
631 #define CMP(type, src1, src1w, src2, src2w) \
632 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
633 #define CMPTO(type, src1, src1w, src2, src2w, label) \
634 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
635 #define OP_FLAGS(op, dst, dstw, type) \
636 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (type))
637 #define CMOV(type, dst_reg, src, srcw) \
638 sljit_emit_cmov(compiler, (type), (dst_reg), (src), (srcw))
639 #define GET_LOCAL_BASE(dst, dstw, offset) \
640 sljit_get_local_base(compiler, (dst), (dstw), (offset))
641
642 #define READ_CHAR_MAX 0x7fffffff
643
644 #define INVALID_UTF_CHAR -1
645 #define UNASSIGNED_UTF_CHAR 888
646
647 #if defined SUPPORT_UNICODE
648 #if PCRE2_CODE_UNIT_WIDTH == 8
649
650 #define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
651 { \
652 if (ptr[0] <= 0x7f) \
653 c = *ptr++; \
654 else if (ptr + 1 < end && ptr[1] >= 0x80 && ptr[1] < 0xc0) \
655 { \
656 c = ptr[1] - 0x80; \
657 \
658 if (ptr[0] >= 0xc2 && ptr[0] <= 0xdf) \
659 { \
660 c |= (ptr[0] - 0xc0) << 6; \
661 ptr += 2; \
662 } \
663 else if (ptr + 2 < end && ptr[2] >= 0x80 && ptr[2] < 0xc0) \
664 { \
665 c = c << 6 | (ptr[2] - 0x80); \
666 \
667 if (ptr[0] >= 0xe0 && ptr[0] <= 0xef) \
668 { \
669 c |= (ptr[0] - 0xe0) << 12; \
670 ptr += 3; \
671 \
672 if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \
673 { \
674 invalid_action; \
675 } \
676 } \
677 else if (ptr + 3 < end && ptr[3] >= 0x80 && ptr[3] < 0xc0) \
678 { \
679 c = c << 6 | (ptr[3] - 0x80); \
680 \
681 if (ptr[0] >= 0xf0 && ptr[0] <= 0xf4) \
682 { \
683 c |= (ptr[0] - 0xf0) << 18; \
684 ptr += 4; \
685 \
686 if (c >= 0x110000 || c < 0x10000) \
687 { \
688 invalid_action; \
689 } \
690 } \
691 else \
692 { \
693 invalid_action; \
694 } \
695 } \
696 else \
697 { \
698 invalid_action; \
699 } \
700 } \
701 else \
702 { \
703 invalid_action; \
704 } \
705 } \
706 else \
707 { \
708 invalid_action; \
709 } \
710 }
711
712 #define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
713 { \
714 c = ptr[-1]; \
715 if (c <= 0x7f) \
716 ptr--; \
717 else if (ptr - 1 > start && ptr[-1] >= 0x80 && ptr[-1] < 0xc0) \
718 { \
719 c -= 0x80; \
720 \
721 if (ptr[-2] >= 0xc2 && ptr[-2] <= 0xdf) \
722 { \
723 c |= (ptr[-2] - 0xc0) << 6; \
724 ptr -= 2; \
725 } \
726 else if (ptr - 2 > start && ptr[-2] >= 0x80 && ptr[-2] < 0xc0) \
727 { \
728 c = c << 6 | (ptr[-2] - 0x80); \
729 \
730 if (ptr[-3] >= 0xe0 && ptr[-3] <= 0xef) \
731 { \
732 c |= (ptr[-3] - 0xe0) << 12; \
733 ptr -= 3; \
734 \
735 if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \
736 { \
737 invalid_action; \
738 } \
739 } \
740 else if (ptr - 3 > start && ptr[-3] >= 0x80 && ptr[-3] < 0xc0) \
741 { \
742 c = c << 6 | (ptr[-3] - 0x80); \
743 \
744 if (ptr[-4] >= 0xf0 && ptr[-4] <= 0xf4) \
745 { \
746 c |= (ptr[-4] - 0xf0) << 18; \
747 ptr -= 4; \
748 \
749 if (c >= 0x110000 || c < 0x10000) \
750 { \
751 invalid_action; \
752 } \
753 } \
754 else \
755 { \
756 invalid_action; \
757 } \
758 } \
759 else \
760 { \
761 invalid_action; \
762 } \
763 } \
764 else \
765 { \
766 invalid_action; \
767 } \
768 } \
769 else \
770 { \
771 invalid_action; \
772 } \
773 }
774
775 #elif PCRE2_CODE_UNIT_WIDTH == 16
776
777 #define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
778 { \
779 if (ptr[0] < 0xd800 || ptr[0] >= 0xe000) \
780 c = *ptr++; \
781 else if (ptr[0] < 0xdc00 && ptr + 1 < end && ptr[1] >= 0xdc00 && ptr[1] < 0xe000) \
782 { \
783 c = (((ptr[0] - 0xd800) << 10) | (ptr[1] - 0xdc00)) + 0x10000; \
784 ptr += 2; \
785 } \
786 else \
787 { \
788 invalid_action; \
789 } \
790 }
791
792 #define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
793 { \
794 c = ptr[-1]; \
795 if (c < 0xd800 || c >= 0xe000) \
796 ptr--; \
797 else if (c >= 0xdc00 && ptr - 1 > start && ptr[-2] >= 0xd800 && ptr[-2] < 0xdc00) \
798 { \
799 c = (((ptr[-2] - 0xd800) << 10) | (c - 0xdc00)) + 0x10000; \
800 ptr -= 2; \
801 } \
802 else \
803 { \
804 invalid_action; \
805 } \
806 }
807
808
809 #elif PCRE2_CODE_UNIT_WIDTH == 32
810
811 #define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
812 { \
813 if (ptr[0] < 0xd800 || (ptr[0] >= 0xe000 && ptr[0] < 0x110000)) \
814 c = *ptr++; \
815 else \
816 { \
817 invalid_action; \
818 } \
819 }
820
821 #define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
822 { \
823 c = ptr[-1]; \
824 if (ptr[-1] < 0xd800 || (ptr[-1] >= 0xe000 && ptr[-1] < 0x110000)) \
825 ptr--; \
826 else \
827 { \
828 invalid_action; \
829 } \
830 }
831
832 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
833 #endif /* SUPPORT_UNICODE */
834
bracketend(PCRE2_SPTR cc)835 static PCRE2_SPTR bracketend(PCRE2_SPTR cc)
836 {
837 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
838 do cc += GET(cc, 1); while (*cc == OP_ALT);
839 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
840 cc += 1 + LINK_SIZE;
841 return cc;
842 }
843
no_alternatives(PCRE2_SPTR cc)844 static int no_alternatives(PCRE2_SPTR cc)
845 {
846 int count = 0;
847 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
848 do
849 {
850 cc += GET(cc, 1);
851 count++;
852 }
853 while (*cc == OP_ALT);
854 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
855 return count;
856 }
857
858 /* Functions whose might need modification for all new supported opcodes:
859 next_opcode
860 check_opcode_types
861 set_private_data_ptrs
862 get_framesize
863 init_frame
864 get_recurse_data_length
865 copy_recurse_data
866 compile_matchingpath
867 compile_backtrackingpath
868 */
869
next_opcode(compiler_common * common,PCRE2_SPTR cc)870 static PCRE2_SPTR next_opcode(compiler_common *common, PCRE2_SPTR cc)
871 {
872 SLJIT_UNUSED_ARG(common);
873 switch(*cc)
874 {
875 case OP_SOD:
876 case OP_SOM:
877 case OP_SET_SOM:
878 case OP_NOT_WORD_BOUNDARY:
879 case OP_WORD_BOUNDARY:
880 case OP_NOT_DIGIT:
881 case OP_DIGIT:
882 case OP_NOT_WHITESPACE:
883 case OP_WHITESPACE:
884 case OP_NOT_WORDCHAR:
885 case OP_WORDCHAR:
886 case OP_ANY:
887 case OP_ALLANY:
888 case OP_NOTPROP:
889 case OP_PROP:
890 case OP_ANYNL:
891 case OP_NOT_HSPACE:
892 case OP_HSPACE:
893 case OP_NOT_VSPACE:
894 case OP_VSPACE:
895 case OP_EXTUNI:
896 case OP_EODN:
897 case OP_EOD:
898 case OP_CIRC:
899 case OP_CIRCM:
900 case OP_DOLL:
901 case OP_DOLLM:
902 case OP_CRSTAR:
903 case OP_CRMINSTAR:
904 case OP_CRPLUS:
905 case OP_CRMINPLUS:
906 case OP_CRQUERY:
907 case OP_CRMINQUERY:
908 case OP_CRRANGE:
909 case OP_CRMINRANGE:
910 case OP_CRPOSSTAR:
911 case OP_CRPOSPLUS:
912 case OP_CRPOSQUERY:
913 case OP_CRPOSRANGE:
914 case OP_CLASS:
915 case OP_NCLASS:
916 case OP_REF:
917 case OP_REFI:
918 case OP_DNREF:
919 case OP_DNREFI:
920 case OP_RECURSE:
921 case OP_CALLOUT:
922 case OP_ALT:
923 case OP_KET:
924 case OP_KETRMAX:
925 case OP_KETRMIN:
926 case OP_KETRPOS:
927 case OP_REVERSE:
928 case OP_ASSERT:
929 case OP_ASSERT_NOT:
930 case OP_ASSERTBACK:
931 case OP_ASSERTBACK_NOT:
932 case OP_ASSERT_NA:
933 case OP_ASSERTBACK_NA:
934 case OP_ONCE:
935 case OP_SCRIPT_RUN:
936 case OP_BRA:
937 case OP_BRAPOS:
938 case OP_CBRA:
939 case OP_CBRAPOS:
940 case OP_COND:
941 case OP_SBRA:
942 case OP_SBRAPOS:
943 case OP_SCBRA:
944 case OP_SCBRAPOS:
945 case OP_SCOND:
946 case OP_CREF:
947 case OP_DNCREF:
948 case OP_RREF:
949 case OP_DNRREF:
950 case OP_FALSE:
951 case OP_TRUE:
952 case OP_BRAZERO:
953 case OP_BRAMINZERO:
954 case OP_BRAPOSZERO:
955 case OP_PRUNE:
956 case OP_SKIP:
957 case OP_THEN:
958 case OP_COMMIT:
959 case OP_FAIL:
960 case OP_ACCEPT:
961 case OP_ASSERT_ACCEPT:
962 case OP_CLOSE:
963 case OP_SKIPZERO:
964 return cc + PRIV(OP_lengths)[*cc];
965
966 case OP_CHAR:
967 case OP_CHARI:
968 case OP_NOT:
969 case OP_NOTI:
970 case OP_STAR:
971 case OP_MINSTAR:
972 case OP_PLUS:
973 case OP_MINPLUS:
974 case OP_QUERY:
975 case OP_MINQUERY:
976 case OP_UPTO:
977 case OP_MINUPTO:
978 case OP_EXACT:
979 case OP_POSSTAR:
980 case OP_POSPLUS:
981 case OP_POSQUERY:
982 case OP_POSUPTO:
983 case OP_STARI:
984 case OP_MINSTARI:
985 case OP_PLUSI:
986 case OP_MINPLUSI:
987 case OP_QUERYI:
988 case OP_MINQUERYI:
989 case OP_UPTOI:
990 case OP_MINUPTOI:
991 case OP_EXACTI:
992 case OP_POSSTARI:
993 case OP_POSPLUSI:
994 case OP_POSQUERYI:
995 case OP_POSUPTOI:
996 case OP_NOTSTAR:
997 case OP_NOTMINSTAR:
998 case OP_NOTPLUS:
999 case OP_NOTMINPLUS:
1000 case OP_NOTQUERY:
1001 case OP_NOTMINQUERY:
1002 case OP_NOTUPTO:
1003 case OP_NOTMINUPTO:
1004 case OP_NOTEXACT:
1005 case OP_NOTPOSSTAR:
1006 case OP_NOTPOSPLUS:
1007 case OP_NOTPOSQUERY:
1008 case OP_NOTPOSUPTO:
1009 case OP_NOTSTARI:
1010 case OP_NOTMINSTARI:
1011 case OP_NOTPLUSI:
1012 case OP_NOTMINPLUSI:
1013 case OP_NOTQUERYI:
1014 case OP_NOTMINQUERYI:
1015 case OP_NOTUPTOI:
1016 case OP_NOTMINUPTOI:
1017 case OP_NOTEXACTI:
1018 case OP_NOTPOSSTARI:
1019 case OP_NOTPOSPLUSI:
1020 case OP_NOTPOSQUERYI:
1021 case OP_NOTPOSUPTOI:
1022 cc += PRIV(OP_lengths)[*cc];
1023 #ifdef SUPPORT_UNICODE
1024 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1025 #endif
1026 return cc;
1027
1028 /* Special cases. */
1029 case OP_TYPESTAR:
1030 case OP_TYPEMINSTAR:
1031 case OP_TYPEPLUS:
1032 case OP_TYPEMINPLUS:
1033 case OP_TYPEQUERY:
1034 case OP_TYPEMINQUERY:
1035 case OP_TYPEUPTO:
1036 case OP_TYPEMINUPTO:
1037 case OP_TYPEEXACT:
1038 case OP_TYPEPOSSTAR:
1039 case OP_TYPEPOSPLUS:
1040 case OP_TYPEPOSQUERY:
1041 case OP_TYPEPOSUPTO:
1042 return cc + PRIV(OP_lengths)[*cc] - 1;
1043
1044 case OP_ANYBYTE:
1045 #ifdef SUPPORT_UNICODE
1046 if (common->utf) return NULL;
1047 #endif
1048 return cc + 1;
1049
1050 case OP_CALLOUT_STR:
1051 return cc + GET(cc, 1 + 2*LINK_SIZE);
1052
1053 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1054 case OP_XCLASS:
1055 return cc + GET(cc, 1);
1056 #endif
1057
1058 case OP_MARK:
1059 case OP_COMMIT_ARG:
1060 case OP_PRUNE_ARG:
1061 case OP_SKIP_ARG:
1062 case OP_THEN_ARG:
1063 return cc + 1 + 2 + cc[1];
1064
1065 default:
1066 SLJIT_UNREACHABLE();
1067 return NULL;
1068 }
1069 }
1070
check_opcode_types(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend)1071 static BOOL check_opcode_types(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend)
1072 {
1073 int count;
1074 PCRE2_SPTR slot;
1075 PCRE2_SPTR assert_back_end = cc - 1;
1076 PCRE2_SPTR assert_na_end = cc - 1;
1077
1078 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
1079 while (cc < ccend)
1080 {
1081 switch(*cc)
1082 {
1083 case OP_SET_SOM:
1084 common->has_set_som = TRUE;
1085 common->might_be_empty = TRUE;
1086 cc += 1;
1087 break;
1088
1089 case OP_REFI:
1090 #ifdef SUPPORT_UNICODE
1091 if (common->iref_ptr == 0)
1092 {
1093 common->iref_ptr = common->ovector_start;
1094 common->ovector_start += 3 * sizeof(sljit_sw);
1095 }
1096 #endif /* SUPPORT_UNICODE */
1097 /* Fall through. */
1098 case OP_REF:
1099 common->optimized_cbracket[GET2(cc, 1)] = 0;
1100 cc += 1 + IMM2_SIZE;
1101 break;
1102
1103 case OP_ASSERT_NA:
1104 case OP_ASSERTBACK_NA:
1105 slot = bracketend(cc);
1106 if (slot > assert_na_end)
1107 assert_na_end = slot;
1108 cc += 1 + LINK_SIZE;
1109 break;
1110
1111 case OP_CBRAPOS:
1112 case OP_SCBRAPOS:
1113 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
1114 cc += 1 + LINK_SIZE + IMM2_SIZE;
1115 break;
1116
1117 case OP_COND:
1118 case OP_SCOND:
1119 /* Only AUTO_CALLOUT can insert this opcode. We do
1120 not intend to support this case. */
1121 if (cc[1 + LINK_SIZE] == OP_CALLOUT || cc[1 + LINK_SIZE] == OP_CALLOUT_STR)
1122 return FALSE;
1123 cc += 1 + LINK_SIZE;
1124 break;
1125
1126 case OP_CREF:
1127 common->optimized_cbracket[GET2(cc, 1)] = 0;
1128 cc += 1 + IMM2_SIZE;
1129 break;
1130
1131 case OP_DNREF:
1132 case OP_DNREFI:
1133 case OP_DNCREF:
1134 count = GET2(cc, 1 + IMM2_SIZE);
1135 slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
1136 while (count-- > 0)
1137 {
1138 common->optimized_cbracket[GET2(slot, 0)] = 0;
1139 slot += common->name_entry_size;
1140 }
1141 cc += 1 + 2 * IMM2_SIZE;
1142 break;
1143
1144 case OP_RECURSE:
1145 /* Set its value only once. */
1146 if (common->recursive_head_ptr == 0)
1147 {
1148 common->recursive_head_ptr = common->ovector_start;
1149 common->ovector_start += sizeof(sljit_sw);
1150 }
1151 cc += 1 + LINK_SIZE;
1152 break;
1153
1154 case OP_CALLOUT:
1155 case OP_CALLOUT_STR:
1156 if (common->capture_last_ptr == 0)
1157 {
1158 common->capture_last_ptr = common->ovector_start;
1159 common->ovector_start += sizeof(sljit_sw);
1160 }
1161 cc += (*cc == OP_CALLOUT) ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2*LINK_SIZE);
1162 break;
1163
1164 case OP_ASSERTBACK:
1165 slot = bracketend(cc);
1166 if (slot > assert_back_end)
1167 assert_back_end = slot;
1168 cc += 1 + LINK_SIZE;
1169 break;
1170
1171 case OP_THEN_ARG:
1172 common->has_then = TRUE;
1173 common->control_head_ptr = 1;
1174 /* Fall through. */
1175
1176 case OP_COMMIT_ARG:
1177 case OP_PRUNE_ARG:
1178 if (cc < assert_na_end)
1179 return FALSE;
1180 /* Fall through */
1181 case OP_MARK:
1182 if (common->mark_ptr == 0)
1183 {
1184 common->mark_ptr = common->ovector_start;
1185 common->ovector_start += sizeof(sljit_sw);
1186 }
1187 cc += 1 + 2 + cc[1];
1188 break;
1189
1190 case OP_THEN:
1191 common->has_then = TRUE;
1192 common->control_head_ptr = 1;
1193 cc += 1;
1194 break;
1195
1196 case OP_SKIP:
1197 if (cc < assert_back_end)
1198 common->has_skip_in_assert_back = TRUE;
1199 if (cc < assert_na_end)
1200 return FALSE;
1201 cc += 1;
1202 break;
1203
1204 case OP_SKIP_ARG:
1205 common->control_head_ptr = 1;
1206 common->has_skip_arg = TRUE;
1207 if (cc < assert_back_end)
1208 common->has_skip_in_assert_back = TRUE;
1209 if (cc < assert_na_end)
1210 return FALSE;
1211 cc += 1 + 2 + cc[1];
1212 break;
1213
1214 case OP_PRUNE:
1215 case OP_COMMIT:
1216 case OP_ASSERT_ACCEPT:
1217 if (cc < assert_na_end)
1218 return FALSE;
1219 cc++;
1220 break;
1221
1222 default:
1223 cc = next_opcode(common, cc);
1224 if (cc == NULL)
1225 return FALSE;
1226 break;
1227 }
1228 }
1229 return TRUE;
1230 }
1231
1232 #define EARLY_FAIL_ENHANCE_MAX (1 + 3)
1233
1234 /*
1235 start:
1236 0 - skip / early fail allowed
1237 1 - only early fail with range allowed
1238 >1 - (start - 1) early fail is processed
1239
1240 return: current number of iterators enhanced with fast fail
1241 */
detect_early_fail(compiler_common * common,PCRE2_SPTR cc,int * private_data_start,sljit_s32 depth,int start,BOOL fast_forward_allowed)1242 static int detect_early_fail(compiler_common *common, PCRE2_SPTR cc, int *private_data_start,
1243 sljit_s32 depth, int start, BOOL fast_forward_allowed)
1244 {
1245 PCRE2_SPTR begin = cc;
1246 PCRE2_SPTR next_alt;
1247 PCRE2_SPTR end;
1248 PCRE2_SPTR accelerated_start;
1249 BOOL prev_fast_forward_allowed;
1250 int result = 0;
1251 int count;
1252
1253 SLJIT_ASSERT(*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA);
1254 SLJIT_ASSERT(*cc != OP_CBRA || common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] != 0);
1255 SLJIT_ASSERT(start < EARLY_FAIL_ENHANCE_MAX);
1256
1257 next_alt = cc + GET(cc, 1);
1258 if (*next_alt == OP_ALT)
1259 fast_forward_allowed = FALSE;
1260
1261 do
1262 {
1263 count = start;
1264 cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0);
1265
1266 while (TRUE)
1267 {
1268 accelerated_start = NULL;
1269
1270 switch(*cc)
1271 {
1272 case OP_SOD:
1273 case OP_SOM:
1274 case OP_SET_SOM:
1275 case OP_NOT_WORD_BOUNDARY:
1276 case OP_WORD_BOUNDARY:
1277 case OP_EODN:
1278 case OP_EOD:
1279 case OP_CIRC:
1280 case OP_CIRCM:
1281 case OP_DOLL:
1282 case OP_DOLLM:
1283 /* Zero width assertions. */
1284 cc++;
1285 continue;
1286
1287 case OP_NOT_DIGIT:
1288 case OP_DIGIT:
1289 case OP_NOT_WHITESPACE:
1290 case OP_WHITESPACE:
1291 case OP_NOT_WORDCHAR:
1292 case OP_WORDCHAR:
1293 case OP_ANY:
1294 case OP_ALLANY:
1295 case OP_ANYBYTE:
1296 case OP_NOT_HSPACE:
1297 case OP_HSPACE:
1298 case OP_NOT_VSPACE:
1299 case OP_VSPACE:
1300 fast_forward_allowed = FALSE;
1301 cc++;
1302 continue;
1303
1304 case OP_ANYNL:
1305 case OP_EXTUNI:
1306 fast_forward_allowed = FALSE;
1307 if (count == 0)
1308 count = 1;
1309 cc++;
1310 continue;
1311
1312 case OP_NOTPROP:
1313 case OP_PROP:
1314 fast_forward_allowed = FALSE;
1315 cc += 1 + 2;
1316 continue;
1317
1318 case OP_CHAR:
1319 case OP_CHARI:
1320 case OP_NOT:
1321 case OP_NOTI:
1322 fast_forward_allowed = FALSE;
1323 cc += 2;
1324 #ifdef SUPPORT_UNICODE
1325 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1326 #endif
1327 continue;
1328
1329 case OP_TYPESTAR:
1330 case OP_TYPEMINSTAR:
1331 case OP_TYPEPLUS:
1332 case OP_TYPEMINPLUS:
1333 case OP_TYPEPOSSTAR:
1334 case OP_TYPEPOSPLUS:
1335 /* The type or prop opcode is skipped in the next iteration. */
1336 cc += 1;
1337
1338 if (cc[0] != OP_ANYNL && cc[0] != OP_EXTUNI)
1339 {
1340 accelerated_start = cc - 1;
1341 break;
1342 }
1343
1344 if (count == 0)
1345 count = 1;
1346 fast_forward_allowed = FALSE;
1347 continue;
1348
1349 case OP_TYPEUPTO:
1350 case OP_TYPEMINUPTO:
1351 case OP_TYPEEXACT:
1352 case OP_TYPEPOSUPTO:
1353 cc += IMM2_SIZE;
1354 /* Fall through */
1355
1356 case OP_TYPEQUERY:
1357 case OP_TYPEMINQUERY:
1358 case OP_TYPEPOSQUERY:
1359 /* The type or prop opcode is skipped in the next iteration. */
1360 fast_forward_allowed = FALSE;
1361 if (count == 0)
1362 count = 1;
1363 cc += 1;
1364 continue;
1365
1366 case OP_STAR:
1367 case OP_MINSTAR:
1368 case OP_PLUS:
1369 case OP_MINPLUS:
1370 case OP_POSSTAR:
1371 case OP_POSPLUS:
1372
1373 case OP_STARI:
1374 case OP_MINSTARI:
1375 case OP_PLUSI:
1376 case OP_MINPLUSI:
1377 case OP_POSSTARI:
1378 case OP_POSPLUSI:
1379
1380 case OP_NOTSTAR:
1381 case OP_NOTMINSTAR:
1382 case OP_NOTPLUS:
1383 case OP_NOTMINPLUS:
1384 case OP_NOTPOSSTAR:
1385 case OP_NOTPOSPLUS:
1386
1387 case OP_NOTSTARI:
1388 case OP_NOTMINSTARI:
1389 case OP_NOTPLUSI:
1390 case OP_NOTMINPLUSI:
1391 case OP_NOTPOSSTARI:
1392 case OP_NOTPOSPLUSI:
1393 accelerated_start = cc;
1394 cc += 2;
1395 #ifdef SUPPORT_UNICODE
1396 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1397 #endif
1398 break;
1399
1400 case OP_UPTO:
1401 case OP_MINUPTO:
1402 case OP_EXACT:
1403 case OP_POSUPTO:
1404 case OP_UPTOI:
1405 case OP_MINUPTOI:
1406 case OP_EXACTI:
1407 case OP_POSUPTOI:
1408 case OP_NOTUPTO:
1409 case OP_NOTMINUPTO:
1410 case OP_NOTEXACT:
1411 case OP_NOTPOSUPTO:
1412 case OP_NOTUPTOI:
1413 case OP_NOTMINUPTOI:
1414 case OP_NOTEXACTI:
1415 case OP_NOTPOSUPTOI:
1416 cc += IMM2_SIZE;
1417 /* Fall through */
1418
1419 case OP_QUERY:
1420 case OP_MINQUERY:
1421 case OP_POSQUERY:
1422 case OP_QUERYI:
1423 case OP_MINQUERYI:
1424 case OP_POSQUERYI:
1425 case OP_NOTQUERY:
1426 case OP_NOTMINQUERY:
1427 case OP_NOTPOSQUERY:
1428 case OP_NOTQUERYI:
1429 case OP_NOTMINQUERYI:
1430 case OP_NOTPOSQUERYI:
1431 fast_forward_allowed = FALSE;
1432 if (count == 0)
1433 count = 1;
1434 cc += 2;
1435 #ifdef SUPPORT_UNICODE
1436 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1437 #endif
1438 continue;
1439
1440 case OP_CLASS:
1441 case OP_NCLASS:
1442 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1443 case OP_XCLASS:
1444 accelerated_start = cc;
1445 cc += ((*cc == OP_XCLASS) ? GET(cc, 1) : (unsigned int)(1 + (32 / sizeof(PCRE2_UCHAR))));
1446 #else
1447 accelerated_start = cc;
1448 cc += (1 + (32 / sizeof(PCRE2_UCHAR)));
1449 #endif
1450
1451 switch (*cc)
1452 {
1453 case OP_CRSTAR:
1454 case OP_CRMINSTAR:
1455 case OP_CRPLUS:
1456 case OP_CRMINPLUS:
1457 case OP_CRPOSSTAR:
1458 case OP_CRPOSPLUS:
1459 cc++;
1460 break;
1461
1462 case OP_CRRANGE:
1463 case OP_CRMINRANGE:
1464 case OP_CRPOSRANGE:
1465 cc += 2 * IMM2_SIZE;
1466 /* Fall through */
1467 case OP_CRQUERY:
1468 case OP_CRMINQUERY:
1469 case OP_CRPOSQUERY:
1470 cc++;
1471 if (count == 0)
1472 count = 1;
1473 /* Fall through */
1474 default:
1475 accelerated_start = NULL;
1476 fast_forward_allowed = FALSE;
1477 continue;
1478 }
1479 break;
1480
1481 case OP_ONCE:
1482 case OP_BRA:
1483 case OP_CBRA:
1484 end = cc + GET(cc, 1);
1485
1486 prev_fast_forward_allowed = fast_forward_allowed;
1487 fast_forward_allowed = FALSE;
1488 if (depth >= 4)
1489 break;
1490
1491 end = bracketend(cc) - (1 + LINK_SIZE);
1492 if (*end != OP_KET || (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0))
1493 break;
1494
1495 count = detect_early_fail(common, cc, private_data_start, depth + 1, count, prev_fast_forward_allowed);
1496
1497 if (PRIVATE_DATA(cc) != 0)
1498 common->private_data_ptrs[begin - common->start] = 1;
1499
1500 if (count < EARLY_FAIL_ENHANCE_MAX)
1501 {
1502 cc = end + (1 + LINK_SIZE);
1503 continue;
1504 }
1505 break;
1506
1507 case OP_KET:
1508 SLJIT_ASSERT(PRIVATE_DATA(cc) == 0);
1509 if (cc >= next_alt)
1510 break;
1511 cc += 1 + LINK_SIZE;
1512 continue;
1513 }
1514
1515 if (accelerated_start != NULL)
1516 {
1517 if (count == 0)
1518 {
1519 count++;
1520
1521 if (fast_forward_allowed)
1522 {
1523 common->fast_forward_bc_ptr = accelerated_start;
1524 common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_skip;
1525 *private_data_start += sizeof(sljit_sw);
1526 }
1527 else
1528 {
1529 common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail;
1530
1531 if (common->early_fail_start_ptr == 0)
1532 common->early_fail_start_ptr = *private_data_start;
1533
1534 *private_data_start += sizeof(sljit_sw);
1535 common->early_fail_end_ptr = *private_data_start;
1536
1537 if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1538 return EARLY_FAIL_ENHANCE_MAX;
1539 }
1540 }
1541 else
1542 {
1543 common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail_range;
1544
1545 if (common->early_fail_start_ptr == 0)
1546 common->early_fail_start_ptr = *private_data_start;
1547
1548 *private_data_start += 2 * sizeof(sljit_sw);
1549 common->early_fail_end_ptr = *private_data_start;
1550
1551 if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1552 return EARLY_FAIL_ENHANCE_MAX;
1553 }
1554
1555 /* Cannot be part of a repeat. */
1556 common->private_data_ptrs[begin - common->start] = 1;
1557 count++;
1558
1559 if (count < EARLY_FAIL_ENHANCE_MAX)
1560 continue;
1561 }
1562
1563 break;
1564 }
1565
1566 if (*cc != OP_ALT && *cc != OP_KET)
1567 result = EARLY_FAIL_ENHANCE_MAX;
1568 else if (result < count)
1569 result = count;
1570
1571 cc = next_alt;
1572 next_alt = cc + GET(cc, 1);
1573 }
1574 while (*cc == OP_ALT);
1575
1576 return result;
1577 }
1578
get_class_iterator_size(PCRE2_SPTR cc)1579 static int get_class_iterator_size(PCRE2_SPTR cc)
1580 {
1581 sljit_u32 min;
1582 sljit_u32 max;
1583 switch(*cc)
1584 {
1585 case OP_CRSTAR:
1586 case OP_CRPLUS:
1587 return 2;
1588
1589 case OP_CRMINSTAR:
1590 case OP_CRMINPLUS:
1591 case OP_CRQUERY:
1592 case OP_CRMINQUERY:
1593 return 1;
1594
1595 case OP_CRRANGE:
1596 case OP_CRMINRANGE:
1597 min = GET2(cc, 1);
1598 max = GET2(cc, 1 + IMM2_SIZE);
1599 if (max == 0)
1600 return (*cc == OP_CRRANGE) ? 2 : 1;
1601 max -= min;
1602 if (max > 2)
1603 max = 2;
1604 return max;
1605
1606 default:
1607 return 0;
1608 }
1609 }
1610
detect_repeat(compiler_common * common,PCRE2_SPTR begin)1611 static BOOL detect_repeat(compiler_common *common, PCRE2_SPTR begin)
1612 {
1613 PCRE2_SPTR end = bracketend(begin);
1614 PCRE2_SPTR next;
1615 PCRE2_SPTR next_end;
1616 PCRE2_SPTR max_end;
1617 PCRE2_UCHAR type;
1618 sljit_sw length = end - begin;
1619 sljit_s32 min, max, i;
1620
1621 /* Detect fixed iterations first. */
1622 if (end[-(1 + LINK_SIZE)] != OP_KET || PRIVATE_DATA(begin) != 0)
1623 return FALSE;
1624
1625 /* /(?:AB){4,6}/ is currently converted to /(?:AB){3}(?AB){1,3}/
1626 * Skip the check of the second part. */
1627 if (PRIVATE_DATA(end - LINK_SIZE) == 0)
1628 return TRUE;
1629
1630 next = end;
1631 min = 1;
1632 while (1)
1633 {
1634 if (*next != *begin)
1635 break;
1636 next_end = bracketend(next);
1637 if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
1638 break;
1639 next = next_end;
1640 min++;
1641 }
1642
1643 if (min == 2)
1644 return FALSE;
1645
1646 max = 0;
1647 max_end = next;
1648 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
1649 {
1650 type = *next;
1651 while (1)
1652 {
1653 if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
1654 break;
1655 next_end = bracketend(next + 2 + LINK_SIZE);
1656 if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
1657 break;
1658 next = next_end;
1659 max++;
1660 }
1661
1662 if (next[0] == type && next[1] == *begin && max >= 1)
1663 {
1664 next_end = bracketend(next + 1);
1665 if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
1666 {
1667 for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
1668 if (*next_end != OP_KET)
1669 break;
1670
1671 if (i == max)
1672 {
1673 common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
1674 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
1675 /* +2 the original and the last. */
1676 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
1677 if (min == 1)
1678 return TRUE;
1679 min--;
1680 max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
1681 }
1682 }
1683 }
1684 }
1685
1686 if (min >= 3)
1687 {
1688 common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1689 common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1690 common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1691 return TRUE;
1692 }
1693
1694 return FALSE;
1695 }
1696
1697 #define CASE_ITERATOR_PRIVATE_DATA_1 \
1698 case OP_MINSTAR: \
1699 case OP_MINPLUS: \
1700 case OP_QUERY: \
1701 case OP_MINQUERY: \
1702 case OP_MINSTARI: \
1703 case OP_MINPLUSI: \
1704 case OP_QUERYI: \
1705 case OP_MINQUERYI: \
1706 case OP_NOTMINSTAR: \
1707 case OP_NOTMINPLUS: \
1708 case OP_NOTQUERY: \
1709 case OP_NOTMINQUERY: \
1710 case OP_NOTMINSTARI: \
1711 case OP_NOTMINPLUSI: \
1712 case OP_NOTQUERYI: \
1713 case OP_NOTMINQUERYI:
1714
1715 #define CASE_ITERATOR_PRIVATE_DATA_2A \
1716 case OP_STAR: \
1717 case OP_PLUS: \
1718 case OP_STARI: \
1719 case OP_PLUSI: \
1720 case OP_NOTSTAR: \
1721 case OP_NOTPLUS: \
1722 case OP_NOTSTARI: \
1723 case OP_NOTPLUSI:
1724
1725 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1726 case OP_UPTO: \
1727 case OP_MINUPTO: \
1728 case OP_UPTOI: \
1729 case OP_MINUPTOI: \
1730 case OP_NOTUPTO: \
1731 case OP_NOTMINUPTO: \
1732 case OP_NOTUPTOI: \
1733 case OP_NOTMINUPTOI:
1734
1735 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1736 case OP_TYPEMINSTAR: \
1737 case OP_TYPEMINPLUS: \
1738 case OP_TYPEQUERY: \
1739 case OP_TYPEMINQUERY:
1740
1741 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1742 case OP_TYPESTAR: \
1743 case OP_TYPEPLUS:
1744
1745 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1746 case OP_TYPEUPTO: \
1747 case OP_TYPEMINUPTO:
1748
set_private_data_ptrs(compiler_common * common,int * private_data_start,PCRE2_SPTR ccend)1749 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, PCRE2_SPTR ccend)
1750 {
1751 PCRE2_SPTR cc = common->start;
1752 PCRE2_SPTR alternative;
1753 PCRE2_SPTR end = NULL;
1754 int private_data_ptr = *private_data_start;
1755 int space, size, bracketlen;
1756 BOOL repeat_check = TRUE;
1757
1758 while (cc < ccend)
1759 {
1760 space = 0;
1761 size = 0;
1762 bracketlen = 0;
1763 if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1764 break;
1765
1766 /* When the bracket is prefixed by a zero iteration, skip the repeat check (at this point). */
1767 if (repeat_check && (*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
1768 {
1769 if (detect_repeat(common, cc))
1770 {
1771 /* These brackets are converted to repeats, so no global
1772 based single character repeat is allowed. */
1773 if (cc >= end)
1774 end = bracketend(cc);
1775 }
1776 }
1777 repeat_check = TRUE;
1778
1779 switch(*cc)
1780 {
1781 case OP_KET:
1782 if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1783 {
1784 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1785 private_data_ptr += sizeof(sljit_sw);
1786 cc += common->private_data_ptrs[cc + 1 - common->start];
1787 }
1788 cc += 1 + LINK_SIZE;
1789 break;
1790
1791 case OP_ASSERT:
1792 case OP_ASSERT_NOT:
1793 case OP_ASSERTBACK:
1794 case OP_ASSERTBACK_NOT:
1795 case OP_ASSERT_NA:
1796 case OP_ASSERTBACK_NA:
1797 case OP_ONCE:
1798 case OP_SCRIPT_RUN:
1799 case OP_BRAPOS:
1800 case OP_SBRA:
1801 case OP_SBRAPOS:
1802 case OP_SCOND:
1803 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1804 private_data_ptr += sizeof(sljit_sw);
1805 bracketlen = 1 + LINK_SIZE;
1806 break;
1807
1808 case OP_CBRAPOS:
1809 case OP_SCBRAPOS:
1810 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1811 private_data_ptr += sizeof(sljit_sw);
1812 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1813 break;
1814
1815 case OP_COND:
1816 /* Might be a hidden SCOND. */
1817 common->private_data_ptrs[cc - common->start] = 0;
1818 alternative = cc + GET(cc, 1);
1819 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1820 {
1821 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1822 private_data_ptr += sizeof(sljit_sw);
1823 }
1824 bracketlen = 1 + LINK_SIZE;
1825 break;
1826
1827 case OP_BRA:
1828 bracketlen = 1 + LINK_SIZE;
1829 break;
1830
1831 case OP_CBRA:
1832 case OP_SCBRA:
1833 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1834 break;
1835
1836 case OP_BRAZERO:
1837 case OP_BRAMINZERO:
1838 case OP_BRAPOSZERO:
1839 size = 1;
1840 repeat_check = FALSE;
1841 break;
1842
1843 CASE_ITERATOR_PRIVATE_DATA_1
1844 size = -2;
1845 space = 1;
1846 break;
1847
1848 CASE_ITERATOR_PRIVATE_DATA_2A
1849 size = -2;
1850 space = 2;
1851 break;
1852
1853 CASE_ITERATOR_PRIVATE_DATA_2B
1854 size = -(2 + IMM2_SIZE);
1855 space = 2;
1856 break;
1857
1858 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1859 size = 1;
1860 space = 1;
1861 break;
1862
1863 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1864 size = 1;
1865 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1866 space = 2;
1867 break;
1868
1869 case OP_TYPEUPTO:
1870 size = 1 + IMM2_SIZE;
1871 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1872 space = 2;
1873 break;
1874
1875 case OP_TYPEMINUPTO:
1876 size = 1 + IMM2_SIZE;
1877 space = 2;
1878 break;
1879
1880 case OP_CLASS:
1881 case OP_NCLASS:
1882 size = 1 + 32 / sizeof(PCRE2_UCHAR);
1883 space = get_class_iterator_size(cc + size);
1884 break;
1885
1886 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1887 case OP_XCLASS:
1888 size = GET(cc, 1);
1889 space = get_class_iterator_size(cc + size);
1890 break;
1891 #endif
1892
1893 default:
1894 cc = next_opcode(common, cc);
1895 SLJIT_ASSERT(cc != NULL);
1896 break;
1897 }
1898
1899 /* Character iterators, which are not inside a repeated bracket,
1900 gets a private slot instead of allocating it on the stack. */
1901 if (space > 0 && cc >= end)
1902 {
1903 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1904 private_data_ptr += sizeof(sljit_sw) * space;
1905 }
1906
1907 if (size != 0)
1908 {
1909 if (size < 0)
1910 {
1911 cc += -size;
1912 #ifdef SUPPORT_UNICODE
1913 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1914 #endif
1915 }
1916 else
1917 cc += size;
1918 }
1919
1920 if (bracketlen > 0)
1921 {
1922 if (cc >= end)
1923 {
1924 end = bracketend(cc);
1925 if (end[-1 - LINK_SIZE] == OP_KET)
1926 end = NULL;
1927 }
1928 cc += bracketlen;
1929 }
1930 }
1931 *private_data_start = private_data_ptr;
1932 }
1933
1934 /* Returns with a frame_types (always < 0) if no need for frame. */
get_framesize(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,BOOL recursive,BOOL * needs_control_head)1935 static int get_framesize(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, BOOL recursive, BOOL *needs_control_head)
1936 {
1937 int length = 0;
1938 int possessive = 0;
1939 BOOL stack_restore = FALSE;
1940 BOOL setsom_found = recursive;
1941 BOOL setmark_found = recursive;
1942 /* The last capture is a local variable even for recursions. */
1943 BOOL capture_last_found = FALSE;
1944
1945 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1946 SLJIT_ASSERT(common->control_head_ptr != 0);
1947 *needs_control_head = TRUE;
1948 #else
1949 *needs_control_head = FALSE;
1950 #endif
1951
1952 if (ccend == NULL)
1953 {
1954 ccend = bracketend(cc) - (1 + LINK_SIZE);
1955 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1956 {
1957 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1958 /* This is correct regardless of common->capture_last_ptr. */
1959 capture_last_found = TRUE;
1960 }
1961 cc = next_opcode(common, cc);
1962 }
1963
1964 SLJIT_ASSERT(cc != NULL);
1965 while (cc < ccend)
1966 switch(*cc)
1967 {
1968 case OP_SET_SOM:
1969 SLJIT_ASSERT(common->has_set_som);
1970 stack_restore = TRUE;
1971 if (!setsom_found)
1972 {
1973 length += 2;
1974 setsom_found = TRUE;
1975 }
1976 cc += 1;
1977 break;
1978
1979 case OP_MARK:
1980 case OP_COMMIT_ARG:
1981 case OP_PRUNE_ARG:
1982 case OP_THEN_ARG:
1983 SLJIT_ASSERT(common->mark_ptr != 0);
1984 stack_restore = TRUE;
1985 if (!setmark_found)
1986 {
1987 length += 2;
1988 setmark_found = TRUE;
1989 }
1990 if (common->control_head_ptr != 0)
1991 *needs_control_head = TRUE;
1992 cc += 1 + 2 + cc[1];
1993 break;
1994
1995 case OP_RECURSE:
1996 stack_restore = TRUE;
1997 if (common->has_set_som && !setsom_found)
1998 {
1999 length += 2;
2000 setsom_found = TRUE;
2001 }
2002 if (common->mark_ptr != 0 && !setmark_found)
2003 {
2004 length += 2;
2005 setmark_found = TRUE;
2006 }
2007 if (common->capture_last_ptr != 0 && !capture_last_found)
2008 {
2009 length += 2;
2010 capture_last_found = TRUE;
2011 }
2012 cc += 1 + LINK_SIZE;
2013 break;
2014
2015 case OP_CBRA:
2016 case OP_CBRAPOS:
2017 case OP_SCBRA:
2018 case OP_SCBRAPOS:
2019 stack_restore = TRUE;
2020 if (common->capture_last_ptr != 0 && !capture_last_found)
2021 {
2022 length += 2;
2023 capture_last_found = TRUE;
2024 }
2025 length += 3;
2026 cc += 1 + LINK_SIZE + IMM2_SIZE;
2027 break;
2028
2029 case OP_THEN:
2030 stack_restore = TRUE;
2031 if (common->control_head_ptr != 0)
2032 *needs_control_head = TRUE;
2033 cc ++;
2034 break;
2035
2036 default:
2037 stack_restore = TRUE;
2038 /* Fall through. */
2039
2040 case OP_NOT_WORD_BOUNDARY:
2041 case OP_WORD_BOUNDARY:
2042 case OP_NOT_DIGIT:
2043 case OP_DIGIT:
2044 case OP_NOT_WHITESPACE:
2045 case OP_WHITESPACE:
2046 case OP_NOT_WORDCHAR:
2047 case OP_WORDCHAR:
2048 case OP_ANY:
2049 case OP_ALLANY:
2050 case OP_ANYBYTE:
2051 case OP_NOTPROP:
2052 case OP_PROP:
2053 case OP_ANYNL:
2054 case OP_NOT_HSPACE:
2055 case OP_HSPACE:
2056 case OP_NOT_VSPACE:
2057 case OP_VSPACE:
2058 case OP_EXTUNI:
2059 case OP_EODN:
2060 case OP_EOD:
2061 case OP_CIRC:
2062 case OP_CIRCM:
2063 case OP_DOLL:
2064 case OP_DOLLM:
2065 case OP_CHAR:
2066 case OP_CHARI:
2067 case OP_NOT:
2068 case OP_NOTI:
2069
2070 case OP_EXACT:
2071 case OP_POSSTAR:
2072 case OP_POSPLUS:
2073 case OP_POSQUERY:
2074 case OP_POSUPTO:
2075
2076 case OP_EXACTI:
2077 case OP_POSSTARI:
2078 case OP_POSPLUSI:
2079 case OP_POSQUERYI:
2080 case OP_POSUPTOI:
2081
2082 case OP_NOTEXACT:
2083 case OP_NOTPOSSTAR:
2084 case OP_NOTPOSPLUS:
2085 case OP_NOTPOSQUERY:
2086 case OP_NOTPOSUPTO:
2087
2088 case OP_NOTEXACTI:
2089 case OP_NOTPOSSTARI:
2090 case OP_NOTPOSPLUSI:
2091 case OP_NOTPOSQUERYI:
2092 case OP_NOTPOSUPTOI:
2093
2094 case OP_TYPEEXACT:
2095 case OP_TYPEPOSSTAR:
2096 case OP_TYPEPOSPLUS:
2097 case OP_TYPEPOSQUERY:
2098 case OP_TYPEPOSUPTO:
2099
2100 case OP_CLASS:
2101 case OP_NCLASS:
2102 case OP_XCLASS:
2103
2104 case OP_CALLOUT:
2105 case OP_CALLOUT_STR:
2106
2107 cc = next_opcode(common, cc);
2108 SLJIT_ASSERT(cc != NULL);
2109 break;
2110 }
2111
2112 /* Possessive quantifiers can use a special case. */
2113 if (SLJIT_UNLIKELY(possessive == length))
2114 return stack_restore ? no_frame : no_stack;
2115
2116 if (length > 0)
2117 return length + 1;
2118 return stack_restore ? no_frame : no_stack;
2119 }
2120
init_frame(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,int stackpos,int stacktop)2121 static void init_frame(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, int stackpos, int stacktop)
2122 {
2123 DEFINE_COMPILER;
2124 BOOL setsom_found = FALSE;
2125 BOOL setmark_found = FALSE;
2126 /* The last capture is a local variable even for recursions. */
2127 BOOL capture_last_found = FALSE;
2128 int offset;
2129
2130 /* >= 1 + shortest item size (2) */
2131 SLJIT_UNUSED_ARG(stacktop);
2132 SLJIT_ASSERT(stackpos >= stacktop + 2);
2133
2134 stackpos = STACK(stackpos);
2135 if (ccend == NULL)
2136 {
2137 ccend = bracketend(cc) - (1 + LINK_SIZE);
2138 if (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS)
2139 cc = next_opcode(common, cc);
2140 }
2141
2142 SLJIT_ASSERT(cc != NULL);
2143 while (cc < ccend)
2144 switch(*cc)
2145 {
2146 case OP_SET_SOM:
2147 SLJIT_ASSERT(common->has_set_som);
2148 if (!setsom_found)
2149 {
2150 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
2151 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
2152 stackpos -= (int)sizeof(sljit_sw);
2153 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2154 stackpos -= (int)sizeof(sljit_sw);
2155 setsom_found = TRUE;
2156 }
2157 cc += 1;
2158 break;
2159
2160 case OP_MARK:
2161 case OP_COMMIT_ARG:
2162 case OP_PRUNE_ARG:
2163 case OP_THEN_ARG:
2164 SLJIT_ASSERT(common->mark_ptr != 0);
2165 if (!setmark_found)
2166 {
2167 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2168 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
2169 stackpos -= (int)sizeof(sljit_sw);
2170 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2171 stackpos -= (int)sizeof(sljit_sw);
2172 setmark_found = TRUE;
2173 }
2174 cc += 1 + 2 + cc[1];
2175 break;
2176
2177 case OP_RECURSE:
2178 if (common->has_set_som && !setsom_found)
2179 {
2180 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
2181 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
2182 stackpos -= (int)sizeof(sljit_sw);
2183 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2184 stackpos -= (int)sizeof(sljit_sw);
2185 setsom_found = TRUE;
2186 }
2187 if (common->mark_ptr != 0 && !setmark_found)
2188 {
2189 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2190 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
2191 stackpos -= (int)sizeof(sljit_sw);
2192 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2193 stackpos -= (int)sizeof(sljit_sw);
2194 setmark_found = TRUE;
2195 }
2196 if (common->capture_last_ptr != 0 && !capture_last_found)
2197 {
2198 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
2199 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
2200 stackpos -= (int)sizeof(sljit_sw);
2201 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2202 stackpos -= (int)sizeof(sljit_sw);
2203 capture_last_found = TRUE;
2204 }
2205 cc += 1 + LINK_SIZE;
2206 break;
2207
2208 case OP_CBRA:
2209 case OP_CBRAPOS:
2210 case OP_SCBRA:
2211 case OP_SCBRAPOS:
2212 if (common->capture_last_ptr != 0 && !capture_last_found)
2213 {
2214 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
2215 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
2216 stackpos -= (int)sizeof(sljit_sw);
2217 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2218 stackpos -= (int)sizeof(sljit_sw);
2219 capture_last_found = TRUE;
2220 }
2221 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
2222 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
2223 stackpos -= (int)sizeof(sljit_sw);
2224 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
2225 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
2226 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2227 stackpos -= (int)sizeof(sljit_sw);
2228 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
2229 stackpos -= (int)sizeof(sljit_sw);
2230
2231 cc += 1 + LINK_SIZE + IMM2_SIZE;
2232 break;
2233
2234 default:
2235 cc = next_opcode(common, cc);
2236 SLJIT_ASSERT(cc != NULL);
2237 break;
2238 }
2239
2240 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
2241 SLJIT_ASSERT(stackpos == STACK(stacktop));
2242 }
2243
2244 #define RECURSE_TMP_REG_COUNT 3
2245
2246 typedef struct delayed_mem_copy_status {
2247 struct sljit_compiler *compiler;
2248 int store_bases[RECURSE_TMP_REG_COUNT];
2249 int store_offsets[RECURSE_TMP_REG_COUNT];
2250 int tmp_regs[RECURSE_TMP_REG_COUNT];
2251 int saved_tmp_regs[RECURSE_TMP_REG_COUNT];
2252 int next_tmp_reg;
2253 } delayed_mem_copy_status;
2254
delayed_mem_copy_init(delayed_mem_copy_status * status,compiler_common * common)2255 static void delayed_mem_copy_init(delayed_mem_copy_status *status, compiler_common *common)
2256 {
2257 int i;
2258
2259 for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
2260 {
2261 SLJIT_ASSERT(status->tmp_regs[i] >= 0);
2262 SLJIT_ASSERT(sljit_get_register_index(status->saved_tmp_regs[i]) < 0 || status->tmp_regs[i] == status->saved_tmp_regs[i]);
2263
2264 status->store_bases[i] = -1;
2265 }
2266 status->next_tmp_reg = 0;
2267 status->compiler = common->compiler;
2268 }
2269
delayed_mem_copy_move(delayed_mem_copy_status * status,int load_base,sljit_sw load_offset,int store_base,sljit_sw store_offset)2270 static void delayed_mem_copy_move(delayed_mem_copy_status *status, int load_base, sljit_sw load_offset,
2271 int store_base, sljit_sw store_offset)
2272 {
2273 struct sljit_compiler *compiler = status->compiler;
2274 int next_tmp_reg = status->next_tmp_reg;
2275 int tmp_reg = status->tmp_regs[next_tmp_reg];
2276
2277 SLJIT_ASSERT(load_base > 0 && store_base > 0);
2278
2279 if (status->store_bases[next_tmp_reg] == -1)
2280 {
2281 /* Preserve virtual registers. */
2282 if (sljit_get_register_index(status->saved_tmp_regs[next_tmp_reg]) < 0)
2283 OP1(SLJIT_MOV, status->saved_tmp_regs[next_tmp_reg], 0, tmp_reg, 0);
2284 }
2285 else
2286 OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
2287
2288 OP1(SLJIT_MOV, tmp_reg, 0, SLJIT_MEM1(load_base), load_offset);
2289 status->store_bases[next_tmp_reg] = store_base;
2290 status->store_offsets[next_tmp_reg] = store_offset;
2291
2292 status->next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
2293 }
2294
delayed_mem_copy_finish(delayed_mem_copy_status * status)2295 static void delayed_mem_copy_finish(delayed_mem_copy_status *status)
2296 {
2297 struct sljit_compiler *compiler = status->compiler;
2298 int next_tmp_reg = status->next_tmp_reg;
2299 int tmp_reg, saved_tmp_reg, i;
2300
2301 for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
2302 {
2303 if (status->store_bases[next_tmp_reg] != -1)
2304 {
2305 tmp_reg = status->tmp_regs[next_tmp_reg];
2306 saved_tmp_reg = status->saved_tmp_regs[next_tmp_reg];
2307
2308 OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
2309
2310 /* Restore virtual registers. */
2311 if (sljit_get_register_index(saved_tmp_reg) < 0)
2312 OP1(SLJIT_MOV, tmp_reg, 0, saved_tmp_reg, 0);
2313 }
2314
2315 next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
2316 }
2317 }
2318
2319 #undef RECURSE_TMP_REG_COUNT
2320
recurse_check_bit(compiler_common * common,sljit_sw bit_index)2321 static BOOL recurse_check_bit(compiler_common *common, sljit_sw bit_index)
2322 {
2323 uint8_t *byte;
2324 uint8_t mask;
2325
2326 SLJIT_ASSERT((bit_index & (sizeof(sljit_sw) - 1)) == 0);
2327
2328 bit_index >>= SLJIT_WORD_SHIFT;
2329
2330 mask = 1 << (bit_index & 0x7);
2331 byte = common->recurse_bitset + (bit_index >> 3);
2332
2333 if (*byte & mask)
2334 return FALSE;
2335
2336 *byte |= mask;
2337 return TRUE;
2338 }
2339
get_recurse_data_length(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,BOOL * needs_control_head,BOOL * has_quit,BOOL * has_accept)2340 static int get_recurse_data_length(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend,
2341 BOOL *needs_control_head, BOOL *has_quit, BOOL *has_accept)
2342 {
2343 int length = 1;
2344 int size, offset;
2345 PCRE2_SPTR alternative;
2346 BOOL quit_found = FALSE;
2347 BOOL accept_found = FALSE;
2348 BOOL setsom_found = FALSE;
2349 BOOL setmark_found = FALSE;
2350 BOOL control_head_found = FALSE;
2351
2352 memset(common->recurse_bitset, 0, common->recurse_bitset_size);
2353
2354 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2355 SLJIT_ASSERT(common->control_head_ptr != 0);
2356 control_head_found = TRUE;
2357 #endif
2358
2359 /* Calculate the sum of the private machine words. */
2360 while (cc < ccend)
2361 {
2362 size = 0;
2363 switch(*cc)
2364 {
2365 case OP_SET_SOM:
2366 SLJIT_ASSERT(common->has_set_som);
2367 setsom_found = TRUE;
2368 cc += 1;
2369 break;
2370
2371 case OP_RECURSE:
2372 if (common->has_set_som)
2373 setsom_found = TRUE;
2374 if (common->mark_ptr != 0)
2375 setmark_found = TRUE;
2376 if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2377 length++;
2378 cc += 1 + LINK_SIZE;
2379 break;
2380
2381 case OP_KET:
2382 offset = PRIVATE_DATA(cc);
2383 if (offset != 0)
2384 {
2385 if (recurse_check_bit(common, offset))
2386 length++;
2387 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
2388 cc += PRIVATE_DATA(cc + 1);
2389 }
2390 cc += 1 + LINK_SIZE;
2391 break;
2392
2393 case OP_ASSERT:
2394 case OP_ASSERT_NOT:
2395 case OP_ASSERTBACK:
2396 case OP_ASSERTBACK_NOT:
2397 case OP_ASSERT_NA:
2398 case OP_ASSERTBACK_NA:
2399 case OP_ONCE:
2400 case OP_SCRIPT_RUN:
2401 case OP_BRAPOS:
2402 case OP_SBRA:
2403 case OP_SBRAPOS:
2404 case OP_SCOND:
2405 SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
2406 if (recurse_check_bit(common, PRIVATE_DATA(cc)))
2407 length++;
2408 cc += 1 + LINK_SIZE;
2409 break;
2410
2411 case OP_CBRA:
2412 case OP_SCBRA:
2413 offset = GET2(cc, 1 + LINK_SIZE);
2414 if (recurse_check_bit(common, OVECTOR(offset << 1)))
2415 {
2416 SLJIT_ASSERT(recurse_check_bit(common, OVECTOR((offset << 1) + 1)));
2417 length += 2;
2418 }
2419 if (common->optimized_cbracket[offset] == 0 && recurse_check_bit(common, OVECTOR_PRIV(offset)))
2420 length++;
2421 if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2422 length++;
2423 cc += 1 + LINK_SIZE + IMM2_SIZE;
2424 break;
2425
2426 case OP_CBRAPOS:
2427 case OP_SCBRAPOS:
2428 offset = GET2(cc, 1 + LINK_SIZE);
2429 if (recurse_check_bit(common, OVECTOR(offset << 1)))
2430 {
2431 SLJIT_ASSERT(recurse_check_bit(common, OVECTOR((offset << 1) + 1)));
2432 length += 2;
2433 }
2434 if (recurse_check_bit(common, OVECTOR_PRIV(offset)))
2435 length++;
2436 if (recurse_check_bit(common, PRIVATE_DATA(cc)))
2437 length++;
2438 if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2439 length++;
2440 cc += 1 + LINK_SIZE + IMM2_SIZE;
2441 break;
2442
2443 case OP_COND:
2444 /* Might be a hidden SCOND. */
2445 alternative = cc + GET(cc, 1);
2446 if ((*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) && recurse_check_bit(common, PRIVATE_DATA(cc)))
2447 length++;
2448 cc += 1 + LINK_SIZE;
2449 break;
2450
2451 CASE_ITERATOR_PRIVATE_DATA_1
2452 offset = PRIVATE_DATA(cc);
2453 if (offset != 0 && recurse_check_bit(common, offset))
2454 length++;
2455 cc += 2;
2456 #ifdef SUPPORT_UNICODE
2457 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2458 #endif
2459 break;
2460
2461 CASE_ITERATOR_PRIVATE_DATA_2A
2462 offset = PRIVATE_DATA(cc);
2463 if (offset != 0 && recurse_check_bit(common, offset))
2464 {
2465 SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2466 length += 2;
2467 }
2468 cc += 2;
2469 #ifdef SUPPORT_UNICODE
2470 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2471 #endif
2472 break;
2473
2474 CASE_ITERATOR_PRIVATE_DATA_2B
2475 offset = PRIVATE_DATA(cc);
2476 if (offset != 0 && recurse_check_bit(common, offset))
2477 {
2478 SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2479 length += 2;
2480 }
2481 cc += 2 + IMM2_SIZE;
2482 #ifdef SUPPORT_UNICODE
2483 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2484 #endif
2485 break;
2486
2487 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2488 offset = PRIVATE_DATA(cc);
2489 if (offset != 0 && recurse_check_bit(common, offset))
2490 length++;
2491 cc += 1;
2492 break;
2493
2494 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2495 offset = PRIVATE_DATA(cc);
2496 if (offset != 0 && recurse_check_bit(common, offset))
2497 {
2498 SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2499 length += 2;
2500 }
2501 cc += 1;
2502 break;
2503
2504 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2505 offset = PRIVATE_DATA(cc);
2506 if (offset != 0 && recurse_check_bit(common, offset))
2507 {
2508 SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2509 length += 2;
2510 }
2511 cc += 1 + IMM2_SIZE;
2512 break;
2513
2514 case OP_CLASS:
2515 case OP_NCLASS:
2516 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
2517 case OP_XCLASS:
2518 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2519 #else
2520 size = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2521 #endif
2522
2523 offset = PRIVATE_DATA(cc);
2524 if (offset != 0 && recurse_check_bit(common, offset))
2525 length += get_class_iterator_size(cc + size);
2526 cc += size;
2527 break;
2528
2529 case OP_MARK:
2530 case OP_COMMIT_ARG:
2531 case OP_PRUNE_ARG:
2532 case OP_THEN_ARG:
2533 SLJIT_ASSERT(common->mark_ptr != 0);
2534 if (!setmark_found)
2535 setmark_found = TRUE;
2536 if (common->control_head_ptr != 0)
2537 control_head_found = TRUE;
2538 if (*cc != OP_MARK)
2539 quit_found = TRUE;
2540
2541 cc += 1 + 2 + cc[1];
2542 break;
2543
2544 case OP_PRUNE:
2545 case OP_SKIP:
2546 case OP_COMMIT:
2547 quit_found = TRUE;
2548 cc++;
2549 break;
2550
2551 case OP_SKIP_ARG:
2552 quit_found = TRUE;
2553 cc += 1 + 2 + cc[1];
2554 break;
2555
2556 case OP_THEN:
2557 SLJIT_ASSERT(common->control_head_ptr != 0);
2558 quit_found = TRUE;
2559 control_head_found = TRUE;
2560 cc++;
2561 break;
2562
2563 case OP_ACCEPT:
2564 case OP_ASSERT_ACCEPT:
2565 accept_found = TRUE;
2566 cc++;
2567 break;
2568
2569 default:
2570 cc = next_opcode(common, cc);
2571 SLJIT_ASSERT(cc != NULL);
2572 break;
2573 }
2574 }
2575 SLJIT_ASSERT(cc == ccend);
2576
2577 if (control_head_found)
2578 length++;
2579 if (quit_found)
2580 {
2581 if (setsom_found)
2582 length++;
2583 if (setmark_found)
2584 length++;
2585 }
2586
2587 *needs_control_head = control_head_found;
2588 *has_quit = quit_found;
2589 *has_accept = accept_found;
2590 return length;
2591 }
2592
2593 enum copy_recurse_data_types {
2594 recurse_copy_from_global,
2595 recurse_copy_private_to_global,
2596 recurse_copy_shared_to_global,
2597 recurse_copy_kept_shared_to_global,
2598 recurse_swap_global
2599 };
2600
copy_recurse_data(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,int type,int stackptr,int stacktop,BOOL has_quit)2601 static void copy_recurse_data(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend,
2602 int type, int stackptr, int stacktop, BOOL has_quit)
2603 {
2604 delayed_mem_copy_status status;
2605 PCRE2_SPTR alternative;
2606 sljit_sw private_srcw[2];
2607 sljit_sw shared_srcw[3];
2608 sljit_sw kept_shared_srcw[2];
2609 int private_count, shared_count, kept_shared_count;
2610 int from_sp, base_reg, offset, i;
2611
2612 memset(common->recurse_bitset, 0, common->recurse_bitset_size);
2613
2614 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2615 SLJIT_ASSERT(common->control_head_ptr != 0);
2616 recurse_check_bit(common, common->control_head_ptr);
2617 #endif
2618
2619 switch (type)
2620 {
2621 case recurse_copy_from_global:
2622 from_sp = TRUE;
2623 base_reg = STACK_TOP;
2624 break;
2625
2626 case recurse_copy_private_to_global:
2627 case recurse_copy_shared_to_global:
2628 case recurse_copy_kept_shared_to_global:
2629 from_sp = FALSE;
2630 base_reg = STACK_TOP;
2631 break;
2632
2633 default:
2634 SLJIT_ASSERT(type == recurse_swap_global);
2635 from_sp = FALSE;
2636 base_reg = TMP2;
2637 break;
2638 }
2639
2640 stackptr = STACK(stackptr);
2641 stacktop = STACK(stacktop);
2642
2643 status.tmp_regs[0] = TMP1;
2644 status.saved_tmp_regs[0] = TMP1;
2645
2646 if (base_reg != TMP2)
2647 {
2648 status.tmp_regs[1] = TMP2;
2649 status.saved_tmp_regs[1] = TMP2;
2650 }
2651 else
2652 {
2653 status.saved_tmp_regs[1] = RETURN_ADDR;
2654 if (HAS_VIRTUAL_REGISTERS)
2655 status.tmp_regs[1] = STR_PTR;
2656 else
2657 status.tmp_regs[1] = RETURN_ADDR;
2658 }
2659
2660 status.saved_tmp_regs[2] = TMP3;
2661 if (HAS_VIRTUAL_REGISTERS)
2662 status.tmp_regs[2] = STR_END;
2663 else
2664 status.tmp_regs[2] = TMP3;
2665
2666 delayed_mem_copy_init(&status, common);
2667
2668 if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
2669 {
2670 SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
2671
2672 if (!from_sp)
2673 delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->recursive_head_ptr);
2674
2675 if (from_sp || type == recurse_swap_global)
2676 delayed_mem_copy_move(&status, SLJIT_SP, common->recursive_head_ptr, base_reg, stackptr);
2677 }
2678
2679 stackptr += sizeof(sljit_sw);
2680
2681 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2682 if (type != recurse_copy_shared_to_global)
2683 {
2684 if (!from_sp)
2685 delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->control_head_ptr);
2686
2687 if (from_sp || type == recurse_swap_global)
2688 delayed_mem_copy_move(&status, SLJIT_SP, common->control_head_ptr, base_reg, stackptr);
2689 }
2690
2691 stackptr += sizeof(sljit_sw);
2692 #endif
2693
2694 while (cc < ccend)
2695 {
2696 private_count = 0;
2697 shared_count = 0;
2698 kept_shared_count = 0;
2699
2700 switch(*cc)
2701 {
2702 case OP_SET_SOM:
2703 SLJIT_ASSERT(common->has_set_som);
2704 if (has_quit && recurse_check_bit(common, OVECTOR(0)))
2705 {
2706 kept_shared_srcw[0] = OVECTOR(0);
2707 kept_shared_count = 1;
2708 }
2709 cc += 1;
2710 break;
2711
2712 case OP_RECURSE:
2713 if (has_quit)
2714 {
2715 if (common->has_set_som && recurse_check_bit(common, OVECTOR(0)))
2716 {
2717 kept_shared_srcw[0] = OVECTOR(0);
2718 kept_shared_count = 1;
2719 }
2720 if (common->mark_ptr != 0 && recurse_check_bit(common, common->mark_ptr))
2721 {
2722 kept_shared_srcw[kept_shared_count] = common->mark_ptr;
2723 kept_shared_count++;
2724 }
2725 }
2726 if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2727 {
2728 shared_srcw[0] = common->capture_last_ptr;
2729 shared_count = 1;
2730 }
2731 cc += 1 + LINK_SIZE;
2732 break;
2733
2734 case OP_KET:
2735 private_srcw[0] = PRIVATE_DATA(cc);
2736 if (private_srcw[0] != 0)
2737 {
2738 if (recurse_check_bit(common, private_srcw[0]))
2739 private_count = 1;
2740 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
2741 cc += PRIVATE_DATA(cc + 1);
2742 }
2743 cc += 1 + LINK_SIZE;
2744 break;
2745
2746 case OP_ASSERT:
2747 case OP_ASSERT_NOT:
2748 case OP_ASSERTBACK:
2749 case OP_ASSERTBACK_NOT:
2750 case OP_ASSERT_NA:
2751 case OP_ASSERTBACK_NA:
2752 case OP_ONCE:
2753 case OP_SCRIPT_RUN:
2754 case OP_BRAPOS:
2755 case OP_SBRA:
2756 case OP_SBRAPOS:
2757 case OP_SCOND:
2758 private_srcw[0] = PRIVATE_DATA(cc);
2759 if (recurse_check_bit(common, private_srcw[0]))
2760 private_count = 1;
2761 cc += 1 + LINK_SIZE;
2762 break;
2763
2764 case OP_CBRA:
2765 case OP_SCBRA:
2766 offset = GET2(cc, 1 + LINK_SIZE);
2767 shared_srcw[0] = OVECTOR(offset << 1);
2768 if (recurse_check_bit(common, shared_srcw[0]))
2769 {
2770 shared_srcw[1] = shared_srcw[0] + sizeof(sljit_sw);
2771 SLJIT_ASSERT(recurse_check_bit(common, shared_srcw[1]));
2772 shared_count = 2;
2773 }
2774
2775 if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2776 {
2777 shared_srcw[shared_count] = common->capture_last_ptr;
2778 shared_count++;
2779 }
2780
2781 if (common->optimized_cbracket[offset] == 0)
2782 {
2783 private_srcw[0] = OVECTOR_PRIV(offset);
2784 if (recurse_check_bit(common, private_srcw[0]))
2785 private_count = 1;
2786 }
2787
2788 cc += 1 + LINK_SIZE + IMM2_SIZE;
2789 break;
2790
2791 case OP_CBRAPOS:
2792 case OP_SCBRAPOS:
2793 offset = GET2(cc, 1 + LINK_SIZE);
2794 shared_srcw[0] = OVECTOR(offset << 1);
2795 if (recurse_check_bit(common, shared_srcw[0]))
2796 {
2797 shared_srcw[1] = shared_srcw[0] + sizeof(sljit_sw);
2798 SLJIT_ASSERT(recurse_check_bit(common, shared_srcw[1]));
2799 shared_count = 2;
2800 }
2801
2802 if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2803 {
2804 shared_srcw[shared_count] = common->capture_last_ptr;
2805 shared_count++;
2806 }
2807
2808 private_srcw[0] = PRIVATE_DATA(cc);
2809 if (recurse_check_bit(common, private_srcw[0]))
2810 private_count = 1;
2811
2812 offset = OVECTOR_PRIV(offset);
2813 if (recurse_check_bit(common, offset))
2814 {
2815 private_srcw[private_count] = offset;
2816 private_count++;
2817 }
2818 cc += 1 + LINK_SIZE + IMM2_SIZE;
2819 break;
2820
2821 case OP_COND:
2822 /* Might be a hidden SCOND. */
2823 alternative = cc + GET(cc, 1);
2824 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
2825 {
2826 private_srcw[0] = PRIVATE_DATA(cc);
2827 if (recurse_check_bit(common, private_srcw[0]))
2828 private_count = 1;
2829 }
2830 cc += 1 + LINK_SIZE;
2831 break;
2832
2833 CASE_ITERATOR_PRIVATE_DATA_1
2834 private_srcw[0] = PRIVATE_DATA(cc);
2835 if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2836 private_count = 1;
2837 cc += 2;
2838 #ifdef SUPPORT_UNICODE
2839 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2840 #endif
2841 break;
2842
2843 CASE_ITERATOR_PRIVATE_DATA_2A
2844 private_srcw[0] = PRIVATE_DATA(cc);
2845 if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2846 {
2847 private_count = 2;
2848 private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2849 SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
2850 }
2851 cc += 2;
2852 #ifdef SUPPORT_UNICODE
2853 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2854 #endif
2855 break;
2856
2857 CASE_ITERATOR_PRIVATE_DATA_2B
2858 private_srcw[0] = PRIVATE_DATA(cc);
2859 if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2860 {
2861 private_count = 2;
2862 private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2863 SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
2864 }
2865 cc += 2 + IMM2_SIZE;
2866 #ifdef SUPPORT_UNICODE
2867 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2868 #endif
2869 break;
2870
2871 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2872 private_srcw[0] = PRIVATE_DATA(cc);
2873 if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2874 private_count = 1;
2875 cc += 1;
2876 break;
2877
2878 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2879 private_srcw[0] = PRIVATE_DATA(cc);
2880 if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2881 {
2882 private_count = 2;
2883 private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2884 SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
2885 }
2886 cc += 1;
2887 break;
2888
2889 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2890 private_srcw[0] = PRIVATE_DATA(cc);
2891 if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2892 {
2893 private_count = 2;
2894 private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2895 SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
2896 }
2897 cc += 1 + IMM2_SIZE;
2898 break;
2899
2900 case OP_CLASS:
2901 case OP_NCLASS:
2902 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
2903 case OP_XCLASS:
2904 i = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2905 #else
2906 i = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2907 #endif
2908 if (PRIVATE_DATA(cc) != 0)
2909 switch(get_class_iterator_size(cc + i))
2910 {
2911 case 1:
2912 private_srcw[0] = PRIVATE_DATA(cc);
2913 break;
2914
2915 case 2:
2916 private_srcw[0] = PRIVATE_DATA(cc);
2917 if (recurse_check_bit(common, private_srcw[0]))
2918 {
2919 private_count = 2;
2920 private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2921 SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
2922 }
2923 break;
2924
2925 default:
2926 SLJIT_UNREACHABLE();
2927 break;
2928 }
2929 cc += i;
2930 break;
2931
2932 case OP_MARK:
2933 case OP_COMMIT_ARG:
2934 case OP_PRUNE_ARG:
2935 case OP_THEN_ARG:
2936 SLJIT_ASSERT(common->mark_ptr != 0);
2937 if (has_quit && recurse_check_bit(common, common->mark_ptr))
2938 {
2939 kept_shared_srcw[0] = common->mark_ptr;
2940 kept_shared_count = 1;
2941 }
2942 if (common->control_head_ptr != 0 && recurse_check_bit(common, common->control_head_ptr))
2943 {
2944 private_srcw[0] = common->control_head_ptr;
2945 private_count = 1;
2946 }
2947 cc += 1 + 2 + cc[1];
2948 break;
2949
2950 case OP_THEN:
2951 SLJIT_ASSERT(common->control_head_ptr != 0);
2952 if (recurse_check_bit(common, common->control_head_ptr))
2953 {
2954 private_srcw[0] = common->control_head_ptr;
2955 private_count = 1;
2956 }
2957 cc++;
2958 break;
2959
2960 default:
2961 cc = next_opcode(common, cc);
2962 SLJIT_ASSERT(cc != NULL);
2963 continue;
2964 }
2965
2966 if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
2967 {
2968 SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
2969
2970 for (i = 0; i < private_count; i++)
2971 {
2972 SLJIT_ASSERT(private_srcw[i] != 0);
2973
2974 if (!from_sp)
2975 delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, private_srcw[i]);
2976
2977 if (from_sp || type == recurse_swap_global)
2978 delayed_mem_copy_move(&status, SLJIT_SP, private_srcw[i], base_reg, stackptr);
2979
2980 stackptr += sizeof(sljit_sw);
2981 }
2982 }
2983 else
2984 stackptr += sizeof(sljit_sw) * private_count;
2985
2986 if (type != recurse_copy_private_to_global && type != recurse_copy_kept_shared_to_global)
2987 {
2988 SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_swap_global);
2989
2990 for (i = 0; i < shared_count; i++)
2991 {
2992 SLJIT_ASSERT(shared_srcw[i] != 0);
2993
2994 if (!from_sp)
2995 delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, shared_srcw[i]);
2996
2997 if (from_sp || type == recurse_swap_global)
2998 delayed_mem_copy_move(&status, SLJIT_SP, shared_srcw[i], base_reg, stackptr);
2999
3000 stackptr += sizeof(sljit_sw);
3001 }
3002 }
3003 else
3004 stackptr += sizeof(sljit_sw) * shared_count;
3005
3006 if (type != recurse_copy_private_to_global && type != recurse_swap_global)
3007 {
3008 SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_copy_kept_shared_to_global);
3009
3010 for (i = 0; i < kept_shared_count; i++)
3011 {
3012 SLJIT_ASSERT(kept_shared_srcw[i] != 0);
3013
3014 if (!from_sp)
3015 delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, kept_shared_srcw[i]);
3016
3017 if (from_sp || type == recurse_swap_global)
3018 delayed_mem_copy_move(&status, SLJIT_SP, kept_shared_srcw[i], base_reg, stackptr);
3019
3020 stackptr += sizeof(sljit_sw);
3021 }
3022 }
3023 else
3024 stackptr += sizeof(sljit_sw) * kept_shared_count;
3025 }
3026
3027 SLJIT_ASSERT(cc == ccend && stackptr == stacktop);
3028
3029 delayed_mem_copy_finish(&status);
3030 }
3031
set_then_offsets(compiler_common * common,PCRE2_SPTR cc,sljit_u8 * current_offset)3032 static SLJIT_INLINE PCRE2_SPTR set_then_offsets(compiler_common *common, PCRE2_SPTR cc, sljit_u8 *current_offset)
3033 {
3034 PCRE2_SPTR end = bracketend(cc);
3035 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
3036
3037 /* Assert captures then. */
3038 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA)
3039 current_offset = NULL;
3040 /* Conditional block does not. */
3041 if (*cc == OP_COND || *cc == OP_SCOND)
3042 has_alternatives = FALSE;
3043
3044 cc = next_opcode(common, cc);
3045 if (has_alternatives)
3046 current_offset = common->then_offsets + (cc - common->start);
3047
3048 while (cc < end)
3049 {
3050 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
3051 cc = set_then_offsets(common, cc, current_offset);
3052 else
3053 {
3054 if (*cc == OP_ALT && has_alternatives)
3055 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
3056 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
3057 *current_offset = 1;
3058 cc = next_opcode(common, cc);
3059 }
3060 }
3061
3062 return end;
3063 }
3064
3065 #undef CASE_ITERATOR_PRIVATE_DATA_1
3066 #undef CASE_ITERATOR_PRIVATE_DATA_2A
3067 #undef CASE_ITERATOR_PRIVATE_DATA_2B
3068 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
3069 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
3070 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
3071
is_powerof2(unsigned int value)3072 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
3073 {
3074 return (value & (value - 1)) == 0;
3075 }
3076
set_jumps(jump_list * list,struct sljit_label * label)3077 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
3078 {
3079 while (list)
3080 {
3081 /* sljit_set_label is clever enough to do nothing
3082 if either the jump or the label is NULL. */
3083 SET_LABEL(list->jump, label);
3084 list = list->next;
3085 }
3086 }
3087
add_jump(struct sljit_compiler * compiler,jump_list ** list,struct sljit_jump * jump)3088 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
3089 {
3090 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
3091 if (list_item)
3092 {
3093 list_item->next = *list;
3094 list_item->jump = jump;
3095 *list = list_item;
3096 }
3097 }
3098
add_stub(compiler_common * common,struct sljit_jump * start)3099 static void add_stub(compiler_common *common, struct sljit_jump *start)
3100 {
3101 DEFINE_COMPILER;
3102 stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
3103
3104 if (list_item)
3105 {
3106 list_item->start = start;
3107 list_item->quit = LABEL();
3108 list_item->next = common->stubs;
3109 common->stubs = list_item;
3110 }
3111 }
3112
flush_stubs(compiler_common * common)3113 static void flush_stubs(compiler_common *common)
3114 {
3115 DEFINE_COMPILER;
3116 stub_list *list_item = common->stubs;
3117
3118 while (list_item)
3119 {
3120 JUMPHERE(list_item->start);
3121 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
3122 JUMPTO(SLJIT_JUMP, list_item->quit);
3123 list_item = list_item->next;
3124 }
3125 common->stubs = NULL;
3126 }
3127
count_match(compiler_common * common)3128 static SLJIT_INLINE void count_match(compiler_common *common)
3129 {
3130 DEFINE_COMPILER;
3131
3132 OP2(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
3133 add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
3134 }
3135
allocate_stack(compiler_common * common,int size)3136 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
3137 {
3138 /* May destroy all locals and registers except TMP2. */
3139 DEFINE_COMPILER;
3140
3141 SLJIT_ASSERT(size > 0);
3142 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
3143 #ifdef DESTROY_REGISTERS
3144 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
3145 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3146 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
3147 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
3148 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
3149 #endif
3150 add_stub(common, CMP(SLJIT_LESS, STACK_TOP, 0, STACK_LIMIT, 0));
3151 }
3152
free_stack(compiler_common * common,int size)3153 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
3154 {
3155 DEFINE_COMPILER;
3156
3157 SLJIT_ASSERT(size > 0);
3158 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
3159 }
3160
allocate_read_only_data(compiler_common * common,sljit_uw size)3161 static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
3162 {
3163 DEFINE_COMPILER;
3164 sljit_uw *result;
3165
3166 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
3167 return NULL;
3168
3169 result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
3170 if (SLJIT_UNLIKELY(result == NULL))
3171 {
3172 sljit_set_compiler_memory_error(compiler);
3173 return NULL;
3174 }
3175
3176 *(void**)result = common->read_only_data_head;
3177 common->read_only_data_head = (void *)result;
3178 return result + 1;
3179 }
3180
reset_ovector(compiler_common * common,int length)3181 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
3182 {
3183 DEFINE_COMPILER;
3184 struct sljit_label *loop;
3185 sljit_s32 i;
3186
3187 /* At this point we can freely use all temporary registers. */
3188 SLJIT_ASSERT(length > 1);
3189 /* TMP1 returns with begin - 1. */
3190 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
3191 if (length < 8)
3192 {
3193 for (i = 1; i < length; i++)
3194 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
3195 }
3196 else
3197 {
3198 if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)) == SLJIT_SUCCESS)
3199 {
3200 GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
3201 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
3202 loop = LABEL();
3203 sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw));
3204 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
3205 JUMPTO(SLJIT_NOT_ZERO, loop);
3206 }
3207 else
3208 {
3209 GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START + sizeof(sljit_sw));
3210 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
3211 loop = LABEL();
3212 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R0, 0);
3213 OP2(SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, sizeof(sljit_sw));
3214 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
3215 JUMPTO(SLJIT_NOT_ZERO, loop);
3216 }
3217 }
3218 }
3219
reset_early_fail(compiler_common * common)3220 static SLJIT_INLINE void reset_early_fail(compiler_common *common)
3221 {
3222 DEFINE_COMPILER;
3223 sljit_u32 size = (sljit_u32)(common->early_fail_end_ptr - common->early_fail_start_ptr);
3224 sljit_u32 uncleared_size;
3225 sljit_s32 src = SLJIT_IMM;
3226 sljit_s32 i;
3227 struct sljit_label *loop;
3228
3229 SLJIT_ASSERT(common->early_fail_start_ptr < common->early_fail_end_ptr);
3230
3231 if (size == sizeof(sljit_sw))
3232 {
3233 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->early_fail_start_ptr, SLJIT_IMM, 0);
3234 return;
3235 }
3236
3237 if (sljit_get_register_index(TMP3) >= 0 && !sljit_has_cpu_feature(SLJIT_HAS_ZERO_REGISTER))
3238 {
3239 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
3240 src = TMP3;
3241 }
3242
3243 if (size <= 6 * sizeof(sljit_sw))
3244 {
3245 for (i = common->early_fail_start_ptr; i < common->early_fail_end_ptr; i += sizeof(sljit_sw))
3246 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, src, 0);
3247 return;
3248 }
3249
3250 GET_LOCAL_BASE(TMP1, 0, common->early_fail_start_ptr);
3251
3252 uncleared_size = ((size / sizeof(sljit_sw)) % 3) * sizeof(sljit_sw);
3253
3254 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, size - uncleared_size);
3255
3256 loop = LABEL();
3257 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);
3258 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3259 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -2 * (sljit_sw)sizeof(sljit_sw), src, 0);
3260 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -1 * (sljit_sw)sizeof(sljit_sw), src, 0);
3261 CMPTO(SLJIT_LESS, TMP1, 0, TMP2, 0, loop);
3262
3263 if (uncleared_size >= sizeof(sljit_sw))
3264 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);
3265
3266 if (uncleared_size >= 2 * sizeof(sljit_sw))
3267 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), sizeof(sljit_sw), src, 0);
3268 }
3269
do_reset_match(compiler_common * common,int length)3270 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
3271 {
3272 DEFINE_COMPILER;
3273 struct sljit_label *loop;
3274 int i;
3275
3276 SLJIT_ASSERT(length > 1);
3277 /* OVECTOR(1) contains the "string begin - 1" constant. */
3278 if (length > 2)
3279 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
3280 if (length < 8)
3281 {
3282 for (i = 2; i < length; i++)
3283 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
3284 }
3285 else
3286 {
3287 if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)) == SLJIT_SUCCESS)
3288 {
3289 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
3290 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
3291 loop = LABEL();
3292 sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
3293 OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
3294 JUMPTO(SLJIT_NOT_ZERO, loop);
3295 }
3296 else
3297 {
3298 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + 2 * sizeof(sljit_sw));
3299 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
3300 loop = LABEL();
3301 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
3302 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(sljit_sw));
3303 OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
3304 JUMPTO(SLJIT_NOT_ZERO, loop);
3305 }
3306 }
3307
3308 if (!HAS_VIRTUAL_REGISTERS)
3309 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, stack));
3310 else
3311 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
3312
3313 if (common->mark_ptr != 0)
3314 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
3315 if (common->control_head_ptr != 0)
3316 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
3317 if (HAS_VIRTUAL_REGISTERS)
3318 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
3319
3320 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3321 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, end));
3322 }
3323
do_search_mark(sljit_sw * current,PCRE2_SPTR skip_arg)3324 static sljit_sw SLJIT_FUNC do_search_mark(sljit_sw *current, PCRE2_SPTR skip_arg)
3325 {
3326 while (current != NULL)
3327 {
3328 switch (current[1])
3329 {
3330 case type_then_trap:
3331 break;
3332
3333 case type_mark:
3334 if (PRIV(strcmp)(skip_arg, (PCRE2_SPTR)current[2]) == 0)
3335 return current[3];
3336 break;
3337
3338 default:
3339 SLJIT_UNREACHABLE();
3340 break;
3341 }
3342 SLJIT_ASSERT(current[0] == 0 || current < (sljit_sw*)current[0]);
3343 current = (sljit_sw*)current[0];
3344 }
3345 return 0;
3346 }
3347
copy_ovector(compiler_common * common,int topbracket)3348 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
3349 {
3350 DEFINE_COMPILER;
3351 struct sljit_label *loop;
3352 BOOL has_pre;
3353
3354 /* At this point we can freely use all registers. */
3355 OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
3356 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
3357
3358 if (HAS_VIRTUAL_REGISTERS)
3359 {
3360 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
3361 OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3362 if (common->mark_ptr != 0)
3363 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
3364 OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, oveccount));
3365 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);
3366 if (common->mark_ptr != 0)
3367 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
3368 OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, match_data),
3369 SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
3370 }
3371 else
3372 {
3373 OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3374 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, match_data));
3375 if (common->mark_ptr != 0)
3376 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
3377 OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, oveccount));
3378 OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);
3379 if (common->mark_ptr != 0)
3380 OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R0, 0);
3381 OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
3382 }
3383
3384 has_pre = sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)) == SLJIT_SUCCESS;
3385
3386 GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START - (has_pre ? sizeof(sljit_sw) : 0));
3387 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? SLJIT_R0 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
3388
3389 loop = LABEL();
3390
3391 if (has_pre)
3392 sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw));
3393 else
3394 {
3395 OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0);
3396 OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
3397 }
3398
3399 OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, sizeof(PCRE2_SIZE));
3400 OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_R0, 0);
3401 /* Copy the integer value to the output buffer */
3402 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3403 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
3404 #endif
3405
3406 SLJIT_ASSERT(sizeof(PCRE2_SIZE) == 4 || sizeof(PCRE2_SIZE) == 8);
3407 OP1(((sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV), SLJIT_MEM1(SLJIT_R2), 0, SLJIT_S1, 0);
3408
3409 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3410 JUMPTO(SLJIT_NOT_ZERO, loop);
3411
3412 /* Calculate the return value, which is the maximum ovector value. */
3413 if (topbracket > 1)
3414 {
3415 if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw))) == SLJIT_SUCCESS)
3416 {
3417 GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
3418 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
3419
3420 /* OVECTOR(0) is never equal to SLJIT_S2. */
3421 loop = LABEL();
3422 sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
3423 OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3424 CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
3425 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
3426 }
3427 else
3428 {
3429 GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + (topbracket - 1) * 2 * sizeof(sljit_sw));
3430 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
3431
3432 /* OVECTOR(0) is never equal to SLJIT_S2. */
3433 loop = LABEL();
3434 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), 0);
3435 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2 * (sljit_sw)sizeof(sljit_sw));
3436 OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3437 CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
3438 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
3439 }
3440 }
3441 else
3442 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
3443 }
3444
return_with_partial_match(compiler_common * common,struct sljit_label * quit)3445 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
3446 {
3447 DEFINE_COMPILER;
3448 sljit_s32 mov_opcode;
3449 sljit_s32 arguments_reg = !HAS_VIRTUAL_REGISTERS ? ARGUMENTS : SLJIT_R1;
3450
3451 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S0, str_end_must_be_saved_reg0);
3452 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
3453 && (common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start != 0 : common->hit_start == 0));
3454
3455 if (arguments_reg != ARGUMENTS)
3456 OP1(SLJIT_MOV, arguments_reg, 0, ARGUMENTS, 0);
3457 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP),
3458 common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start : common->start_ptr);
3459 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_PARTIAL);
3460
3461 /* Store match begin and end. */
3462 OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, begin));
3463 OP1(SLJIT_MOV, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_R2, 0);
3464 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, match_data));
3465
3466 mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
3467
3468 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S1, 0);
3469 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3470 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
3471 #endif
3472 OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector), SLJIT_R2, 0);
3473
3474 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_S1, 0);
3475 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3476 OP2(SLJIT_ASHR, STR_END, 0, STR_END, 0, SLJIT_IMM, UCHAR_SHIFT);
3477 #endif
3478 OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector) + sizeof(PCRE2_SIZE), STR_END, 0);
3479
3480 JUMPTO(SLJIT_JUMP, quit);
3481 }
3482
check_start_used_ptr(compiler_common * common)3483 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
3484 {
3485 /* May destroy TMP1. */
3486 DEFINE_COMPILER;
3487 struct sljit_jump *jump;
3488
3489 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3490 {
3491 /* The value of -1 must be kept for start_used_ptr! */
3492 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
3493 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
3494 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
3495 jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
3496 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3497 JUMPHERE(jump);
3498 }
3499 else if (common->mode == PCRE2_JIT_PARTIAL_HARD)
3500 {
3501 jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3502 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3503 JUMPHERE(jump);
3504 }
3505 }
3506
char_has_othercase(compiler_common * common,PCRE2_SPTR cc)3507 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, PCRE2_SPTR cc)
3508 {
3509 /* Detects if the character has an othercase. */
3510 unsigned int c;
3511
3512 #ifdef SUPPORT_UNICODE
3513 if (common->utf || common->ucp)
3514 {
3515 if (common->utf)
3516 {
3517 GETCHAR(c, cc);
3518 }
3519 else
3520 c = *cc;
3521
3522 if (c > 127)
3523 return c != UCD_OTHERCASE(c);
3524
3525 return common->fcc[c] != c;
3526 }
3527 else
3528 #endif
3529 c = *cc;
3530 return MAX_255(c) ? common->fcc[c] != c : FALSE;
3531 }
3532
char_othercase(compiler_common * common,unsigned int c)3533 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
3534 {
3535 /* Returns with the othercase. */
3536 #ifdef SUPPORT_UNICODE
3537 if ((common->utf || common->ucp) && c > 127)
3538 return UCD_OTHERCASE(c);
3539 #endif
3540 return TABLE_GET(c, common->fcc, c);
3541 }
3542
char_get_othercase_bit(compiler_common * common,PCRE2_SPTR cc)3543 static unsigned int char_get_othercase_bit(compiler_common *common, PCRE2_SPTR cc)
3544 {
3545 /* Detects if the character and its othercase has only 1 bit difference. */
3546 unsigned int c, oc, bit;
3547 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3548 int n;
3549 #endif
3550
3551 #ifdef SUPPORT_UNICODE
3552 if (common->utf || common->ucp)
3553 {
3554 if (common->utf)
3555 {
3556 GETCHAR(c, cc);
3557 }
3558 else
3559 c = *cc;
3560
3561 if (c <= 127)
3562 oc = common->fcc[c];
3563 else
3564 oc = UCD_OTHERCASE(c);
3565 }
3566 else
3567 {
3568 c = *cc;
3569 oc = TABLE_GET(c, common->fcc, c);
3570 }
3571 #else
3572 c = *cc;
3573 oc = TABLE_GET(c, common->fcc, c);
3574 #endif
3575
3576 SLJIT_ASSERT(c != oc);
3577
3578 bit = c ^ oc;
3579 /* Optimized for English alphabet. */
3580 if (c <= 127 && bit == 0x20)
3581 return (0 << 8) | 0x20;
3582
3583 /* Since c != oc, they must have at least 1 bit difference. */
3584 if (!is_powerof2(bit))
3585 return 0;
3586
3587 #if PCRE2_CODE_UNIT_WIDTH == 8
3588
3589 #ifdef SUPPORT_UNICODE
3590 if (common->utf && c > 127)
3591 {
3592 n = GET_EXTRALEN(*cc);
3593 while ((bit & 0x3f) == 0)
3594 {
3595 n--;
3596 bit >>= 6;
3597 }
3598 return (n << 8) | bit;
3599 }
3600 #endif /* SUPPORT_UNICODE */
3601 return (0 << 8) | bit;
3602
3603 #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3604
3605 #ifdef SUPPORT_UNICODE
3606 if (common->utf && c > 65535)
3607 {
3608 if (bit >= (1u << 10))
3609 bit >>= 10;
3610 else
3611 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
3612 }
3613 #endif /* SUPPORT_UNICODE */
3614 return (bit < 256) ? ((0u << 8) | bit) : ((1u << 8) | (bit >> 8));
3615
3616 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3617 }
3618
check_partial(compiler_common * common,BOOL force)3619 static void check_partial(compiler_common *common, BOOL force)
3620 {
3621 /* Checks whether a partial matching is occurred. Does not modify registers. */
3622 DEFINE_COMPILER;
3623 struct sljit_jump *jump = NULL;
3624
3625 SLJIT_ASSERT(!force || common->mode != PCRE2_JIT_COMPLETE);
3626
3627 if (common->mode == PCRE2_JIT_COMPLETE)
3628 return;
3629
3630 if (!force && !common->allow_empty_partial)
3631 jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3632 else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3633 jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
3634
3635 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3636 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3637 else
3638 {
3639 if (common->partialmatchlabel != NULL)
3640 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3641 else
3642 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3643 }
3644
3645 if (jump != NULL)
3646 JUMPHERE(jump);
3647 }
3648
check_str_end(compiler_common * common,jump_list ** end_reached)3649 static void check_str_end(compiler_common *common, jump_list **end_reached)
3650 {
3651 /* Does not affect registers. Usually used in a tight spot. */
3652 DEFINE_COMPILER;
3653 struct sljit_jump *jump;
3654
3655 if (common->mode == PCRE2_JIT_COMPLETE)
3656 {
3657 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3658 return;
3659 }
3660
3661 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
3662 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3663 {
3664 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3665 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3666 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
3667 }
3668 else
3669 {
3670 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3671 if (common->partialmatchlabel != NULL)
3672 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3673 else
3674 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3675 }
3676 JUMPHERE(jump);
3677 }
3678
detect_partial_match(compiler_common * common,jump_list ** backtracks)3679 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
3680 {
3681 DEFINE_COMPILER;
3682 struct sljit_jump *jump;
3683
3684 if (common->mode == PCRE2_JIT_COMPLETE)
3685 {
3686 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3687 return;
3688 }
3689
3690 /* Partial matching mode. */
3691 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
3692 if (!common->allow_empty_partial)
3693 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3694 else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3695 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1));
3696
3697 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3698 {
3699 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3700 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
3701 }
3702 else
3703 {
3704 if (common->partialmatchlabel != NULL)
3705 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3706 else
3707 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3708 }
3709 JUMPHERE(jump);
3710 }
3711
process_partial_match(compiler_common * common)3712 static void process_partial_match(compiler_common *common)
3713 {
3714 DEFINE_COMPILER;
3715 struct sljit_jump *jump;
3716
3717 /* Partial matching mode. */
3718 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3719 {
3720 jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3721 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3722 JUMPHERE(jump);
3723 }
3724 else if (common->mode == PCRE2_JIT_PARTIAL_HARD)
3725 {
3726 if (common->partialmatchlabel != NULL)
3727 CMPTO(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0, common->partialmatchlabel);
3728 else
3729 add_jump(compiler, &common->partialmatch, CMP(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3730 }
3731 }
3732
detect_partial_match_to(compiler_common * common,struct sljit_label * label)3733 static void detect_partial_match_to(compiler_common *common, struct sljit_label *label)
3734 {
3735 DEFINE_COMPILER;
3736
3737 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, label);
3738 process_partial_match(common);
3739 }
3740
peek_char(compiler_common * common,sljit_u32 max,sljit_s32 dst,sljit_sw dstw,jump_list ** backtracks)3741 static void peek_char(compiler_common *common, sljit_u32 max, sljit_s32 dst, sljit_sw dstw, jump_list **backtracks)
3742 {
3743 /* Reads the character into TMP1, keeps STR_PTR.
3744 Does not check STR_END. TMP2, dst, RETURN_ADDR Destroyed. */
3745 DEFINE_COMPILER;
3746 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3747 struct sljit_jump *jump;
3748 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
3749
3750 SLJIT_UNUSED_ARG(max);
3751 SLJIT_UNUSED_ARG(dst);
3752 SLJIT_UNUSED_ARG(dstw);
3753 SLJIT_UNUSED_ARG(backtracks);
3754
3755 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3756
3757 #ifdef SUPPORT_UNICODE
3758 #if PCRE2_CODE_UNIT_WIDTH == 8
3759 if (common->utf)
3760 {
3761 if (max < 128) return;
3762
3763 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3764 OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);
3765 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3766 add_jump(compiler, common->invalid_utf ? &common->utfreadchar_invalid : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
3767 OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);
3768 if (backtracks && common->invalid_utf)
3769 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3770 JUMPHERE(jump);
3771 }
3772 #elif PCRE2_CODE_UNIT_WIDTH == 16
3773 if (common->utf)
3774 {
3775 if (max < 0xd800) return;
3776
3777 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3778
3779 if (common->invalid_utf)
3780 {
3781 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
3782 OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);
3783 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3784 add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
3785 OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);
3786 if (backtracks && common->invalid_utf)
3787 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3788 }
3789 else
3790 {
3791 /* TMP2 contains the high surrogate. */
3792 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
3793 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3794 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
3795 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
3796 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3797 }
3798
3799 JUMPHERE(jump);
3800 }
3801 #elif PCRE2_CODE_UNIT_WIDTH == 32
3802 if (common->invalid_utf)
3803 {
3804 if (max < 0xd800) return;
3805
3806 if (backtracks != NULL)
3807 {
3808 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3809 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
3810 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
3811 }
3812 else
3813 {
3814 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3815 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x110000);
3816 CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
3817 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
3818 CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
3819 }
3820 }
3821 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3822 #endif /* SUPPORT_UNICODE */
3823 }
3824
peek_char_back(compiler_common * common,sljit_u32 max,jump_list ** backtracks)3825 static void peek_char_back(compiler_common *common, sljit_u32 max, jump_list **backtracks)
3826 {
3827 /* Reads one character back without moving STR_PTR. TMP2 must
3828 contain the start of the subject buffer. Affects TMP1, TMP2, and RETURN_ADDR. */
3829 DEFINE_COMPILER;
3830
3831 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3832 struct sljit_jump *jump;
3833 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
3834
3835 SLJIT_UNUSED_ARG(max);
3836 SLJIT_UNUSED_ARG(backtracks);
3837
3838 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3839
3840 #ifdef SUPPORT_UNICODE
3841 #if PCRE2_CODE_UNIT_WIDTH == 8
3842 if (common->utf)
3843 {
3844 if (max < 128) return;
3845
3846 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3847 if (common->invalid_utf)
3848 {
3849 add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));
3850 if (backtracks != NULL)
3851 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3852 }
3853 else
3854 add_jump(compiler, &common->utfpeakcharback, JUMP(SLJIT_FAST_CALL));
3855 JUMPHERE(jump);
3856 }
3857 #elif PCRE2_CODE_UNIT_WIDTH == 16
3858 if (common->utf)
3859 {
3860 if (max < 0xd800) return;
3861
3862 if (common->invalid_utf)
3863 {
3864 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3865 add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));
3866 if (backtracks != NULL)
3867 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3868 }
3869 else
3870 {
3871 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
3872 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xdc00);
3873 /* TMP2 contains the low surrogate. */
3874 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3875 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);
3876 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3877 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
3878 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3879 }
3880 JUMPHERE(jump);
3881 }
3882 #elif PCRE2_CODE_UNIT_WIDTH == 32
3883 if (common->invalid_utf)
3884 {
3885 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3886 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
3887 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
3888 }
3889 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3890 #endif /* SUPPORT_UNICODE */
3891 }
3892
3893 #define READ_CHAR_UPDATE_STR_PTR 0x1
3894 #define READ_CHAR_UTF8_NEWLINE 0x2
3895 #define READ_CHAR_NEWLINE (READ_CHAR_UPDATE_STR_PTR | READ_CHAR_UTF8_NEWLINE)
3896 #define READ_CHAR_VALID_UTF 0x4
3897
read_char(compiler_common * common,sljit_u32 min,sljit_u32 max,jump_list ** backtracks,sljit_u32 options)3898 static void read_char(compiler_common *common, sljit_u32 min, sljit_u32 max,
3899 jump_list **backtracks, sljit_u32 options)
3900 {
3901 /* Reads the precise value of a character into TMP1, if the character is
3902 between min and max (c >= min && c <= max). Otherwise it returns with a value
3903 outside the range. Does not check STR_END. */
3904 DEFINE_COMPILER;
3905 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3906 struct sljit_jump *jump;
3907 #endif
3908 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3909 struct sljit_jump *jump2;
3910 #endif
3911
3912 SLJIT_UNUSED_ARG(min);
3913 SLJIT_UNUSED_ARG(max);
3914 SLJIT_UNUSED_ARG(backtracks);
3915 SLJIT_UNUSED_ARG(options);
3916 SLJIT_ASSERT(min <= max);
3917
3918 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3919 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3920
3921 #ifdef SUPPORT_UNICODE
3922 #if PCRE2_CODE_UNIT_WIDTH == 8
3923 if (common->utf)
3924 {
3925 if (max < 128 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;
3926
3927 if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))
3928 {
3929 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3930
3931 if (options & READ_CHAR_UTF8_NEWLINE)
3932 add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));
3933 else
3934 add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
3935
3936 if (backtracks != NULL)
3937 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3938 JUMPHERE(jump);
3939 return;
3940 }
3941
3942 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3943 if (min >= 0x10000)
3944 {
3945 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
3946 if (options & READ_CHAR_UPDATE_STR_PTR)
3947 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3948 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3949 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
3950 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3951 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3952 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3953 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3954 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3955 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3956 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3957 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
3958 if (!(options & READ_CHAR_UPDATE_STR_PTR))
3959 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
3960 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3961 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3962 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3963 JUMPHERE(jump2);
3964 if (options & READ_CHAR_UPDATE_STR_PTR)
3965 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
3966 }
3967 else if (min >= 0x800 && max <= 0xffff)
3968 {
3969 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
3970 if (options & READ_CHAR_UPDATE_STR_PTR)
3971 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3972 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3973 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
3974 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3975 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3976 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3977 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3978 if (!(options & READ_CHAR_UPDATE_STR_PTR))
3979 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3980 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3981 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3982 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3983 JUMPHERE(jump2);
3984 if (options & READ_CHAR_UPDATE_STR_PTR)
3985 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
3986 }
3987 else if (max >= 0x800)
3988 {
3989 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
3990 }
3991 else if (max < 128)
3992 {
3993 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3994 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3995 }
3996 else
3997 {
3998 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3999 if (!(options & READ_CHAR_UPDATE_STR_PTR))
4000 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4001 else
4002 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4003 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4004 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4005 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4006 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4007 if (options & READ_CHAR_UPDATE_STR_PTR)
4008 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
4009 }
4010 JUMPHERE(jump);
4011 }
4012 #elif PCRE2_CODE_UNIT_WIDTH == 16
4013 if (common->utf)
4014 {
4015 if (max < 0xd800 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;
4016
4017 if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))
4018 {
4019 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4020 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4021
4022 if (options & READ_CHAR_UTF8_NEWLINE)
4023 add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));
4024 else
4025 add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4026
4027 if (backtracks != NULL)
4028 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4029 JUMPHERE(jump);
4030 return;
4031 }
4032
4033 if (max >= 0x10000)
4034 {
4035 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4036 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
4037 /* TMP2 contains the high surrogate. */
4038 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4039 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
4040 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4041 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
4042 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4043 JUMPHERE(jump);
4044 return;
4045 }
4046
4047 /* Skip low surrogate if necessary. */
4048 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4049
4050 if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS)
4051 {
4052 if (options & READ_CHAR_UPDATE_STR_PTR)
4053 OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4054 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x400);
4055 if (options & READ_CHAR_UPDATE_STR_PTR)
4056 CMOV(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0);
4057 if (max >= 0xd800)
4058 CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, 0x10000);
4059 }
4060 else
4061 {
4062 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
4063 if (options & READ_CHAR_UPDATE_STR_PTR)
4064 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4065 if (max >= 0xd800)
4066 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
4067 JUMPHERE(jump);
4068 }
4069 }
4070 #elif PCRE2_CODE_UNIT_WIDTH == 32
4071 if (common->invalid_utf)
4072 {
4073 if (backtracks != NULL)
4074 {
4075 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4076 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4077 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
4078 }
4079 else
4080 {
4081 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4082 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x110000);
4083 CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4084 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4085 CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4086 }
4087 }
4088 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4089 #endif /* SUPPORT_UNICODE */
4090 }
4091
4092 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4093
is_char7_bitset(const sljit_u8 * bitset,BOOL nclass)4094 static BOOL is_char7_bitset(const sljit_u8 *bitset, BOOL nclass)
4095 {
4096 /* Tells whether the character codes below 128 are enough
4097 to determine a match. */
4098 const sljit_u8 value = nclass ? 0xff : 0;
4099 const sljit_u8 *end = bitset + 32;
4100
4101 bitset += 16;
4102 do
4103 {
4104 if (*bitset++ != value)
4105 return FALSE;
4106 }
4107 while (bitset < end);
4108 return TRUE;
4109 }
4110
read_char7_type(compiler_common * common,jump_list ** backtracks,BOOL negated)4111 static void read_char7_type(compiler_common *common, jump_list **backtracks, BOOL negated)
4112 {
4113 /* Reads the precise character type of a character into TMP1, if the character
4114 is less than 128. Otherwise it returns with zero. Does not check STR_END. The
4115 full_read argument tells whether characters above max are accepted or not. */
4116 DEFINE_COMPILER;
4117 struct sljit_jump *jump;
4118
4119 SLJIT_ASSERT(common->utf);
4120
4121 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
4122 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4123
4124 /* All values > 127 are zero in ctypes. */
4125 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4126
4127 if (negated)
4128 {
4129 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);
4130
4131 if (common->invalid_utf)
4132 {
4133 add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4134 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4135 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4136 }
4137 else
4138 {
4139 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4140 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4141 }
4142 JUMPHERE(jump);
4143 }
4144 }
4145
4146 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
4147
read_char8_type(compiler_common * common,jump_list ** backtracks,BOOL negated)4148 static void read_char8_type(compiler_common *common, jump_list **backtracks, BOOL negated)
4149 {
4150 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
4151 DEFINE_COMPILER;
4152 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
4153 struct sljit_jump *jump;
4154 #endif
4155 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4156 struct sljit_jump *jump2;
4157 #endif
4158
4159 SLJIT_UNUSED_ARG(backtracks);
4160 SLJIT_UNUSED_ARG(negated);
4161
4162 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
4163 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4164
4165 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4166 if (common->utf)
4167 {
4168 /* The result of this read may be unused, but saves an "else" part. */
4169 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4170 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);
4171
4172 if (!negated)
4173 {
4174 if (common->invalid_utf)
4175 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4176
4177 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4178 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4179 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4180 if (common->invalid_utf)
4181 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe0 - 0xc2));
4182
4183 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4184 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
4185 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
4186 if (common->invalid_utf)
4187 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40));
4188
4189 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4190 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4191 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4192 JUMPHERE(jump2);
4193 }
4194 else if (common->invalid_utf)
4195 {
4196 add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4197 OP1(SLJIT_MOV, TMP2, 0, TMP1, 0);
4198 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4199
4200 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4201 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4202 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4203 JUMPHERE(jump2);
4204 }
4205 else
4206 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
4207
4208 JUMPHERE(jump);
4209 return;
4210 }
4211 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
4212
4213 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32
4214 if (common->invalid_utf && negated)
4215 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x110000));
4216 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32 */
4217
4218 #if PCRE2_CODE_UNIT_WIDTH != 8
4219 /* The ctypes array contains only 256 values. */
4220 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4221 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4222 #endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
4223 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4224 #if PCRE2_CODE_UNIT_WIDTH != 8
4225 JUMPHERE(jump);
4226 #endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
4227
4228 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
4229 if (common->utf && negated)
4230 {
4231 /* Skip low surrogate if necessary. */
4232 if (!common->invalid_utf)
4233 {
4234 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
4235
4236 if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS)
4237 {
4238 OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4239 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x400);
4240 CMOV(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0);
4241 }
4242 else
4243 {
4244 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
4245 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4246 JUMPHERE(jump);
4247 }
4248 return;
4249 }
4250
4251 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
4252 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4253 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));
4254 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4255
4256 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4257 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4258 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);
4259 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));
4260
4261 JUMPHERE(jump);
4262 return;
4263 }
4264 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16 */
4265 }
4266
move_back(compiler_common * common,jump_list ** backtracks,BOOL must_be_valid)4267 static void move_back(compiler_common *common, jump_list **backtracks, BOOL must_be_valid)
4268 {
4269 /* Goes one character back. Affects STR_PTR and TMP1. If must_be_valid is TRUE,
4270 TMP2 is not used. Otherwise TMP2 must contain the start of the subject buffer,
4271 and it is destroyed. Does not modify STR_PTR for invalid character sequences. */
4272 DEFINE_COMPILER;
4273
4274 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4275 struct sljit_jump *jump;
4276 #endif
4277
4278 #ifdef SUPPORT_UNICODE
4279 #if PCRE2_CODE_UNIT_WIDTH == 8
4280 struct sljit_label *label;
4281
4282 if (common->utf)
4283 {
4284 if (!must_be_valid && common->invalid_utf)
4285 {
4286 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4287 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4288 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
4289 add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));
4290 if (backtracks != NULL)
4291 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4292 JUMPHERE(jump);
4293 return;
4294 }
4295
4296 label = LABEL();
4297 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4298 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4299 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4300 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
4301 return;
4302 }
4303 #elif PCRE2_CODE_UNIT_WIDTH == 16
4304 if (common->utf)
4305 {
4306 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4307 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4308
4309 if (!must_be_valid && common->invalid_utf)
4310 {
4311 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4312 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000 - 0xd800);
4313 add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));
4314 if (backtracks != NULL)
4315 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4316 JUMPHERE(jump);
4317 return;
4318 }
4319
4320 /* Skip low surrogate if necessary. */
4321 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4322 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
4323 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
4324 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4325 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4326 return;
4327 }
4328 #elif PCRE2_CODE_UNIT_WIDTH == 32
4329 if (common->invalid_utf && !must_be_valid)
4330 {
4331 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4332 if (backtracks != NULL)
4333 {
4334 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4335 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4336 return;
4337 }
4338
4339 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x110000);
4340 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);
4341 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4342 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4343 return;
4344 }
4345 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4346 #endif /* SUPPORT_UNICODE */
4347
4348 SLJIT_UNUSED_ARG(backtracks);
4349 SLJIT_UNUSED_ARG(must_be_valid);
4350
4351 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4352 }
4353
check_newlinechar(compiler_common * common,int nltype,jump_list ** backtracks,BOOL jumpifmatch)4354 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
4355 {
4356 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
4357 DEFINE_COMPILER;
4358 struct sljit_jump *jump;
4359
4360 if (nltype == NLTYPE_ANY)
4361 {
4362 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4363 sljit_set_current_flags(compiler, SLJIT_SET_Z);
4364 add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
4365 }
4366 else if (nltype == NLTYPE_ANYCRLF)
4367 {
4368 if (jumpifmatch)
4369 {
4370 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
4371 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4372 }
4373 else
4374 {
4375 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4376 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4377 JUMPHERE(jump);
4378 }
4379 }
4380 else
4381 {
4382 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
4383 add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4384 }
4385 }
4386
4387 #ifdef SUPPORT_UNICODE
4388
4389 #if PCRE2_CODE_UNIT_WIDTH == 8
do_utfreadchar(compiler_common * common)4390 static void do_utfreadchar(compiler_common *common)
4391 {
4392 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
4393 of the character (>= 0xc0). Return char value in TMP1. */
4394 DEFINE_COMPILER;
4395 struct sljit_jump *jump;
4396
4397 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4398 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4399 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4400 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4401 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4402
4403 /* Searching for the first zero. */
4404 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4405 jump = JUMP(SLJIT_NOT_ZERO);
4406 /* Two byte sequence. */
4407 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3000);
4408 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4409 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4410
4411 JUMPHERE(jump);
4412 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4413 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4414 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4415 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4416
4417 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4418 jump = JUMP(SLJIT_NOT_ZERO);
4419 /* Three byte sequence. */
4420 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0000);
4421 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4422 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4423
4424 /* Four byte sequence. */
4425 JUMPHERE(jump);
4426 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
4427 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0000);
4428 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4429 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4430 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4431 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4432 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4433 }
4434
do_utfreadtype8(compiler_common * common)4435 static void do_utfreadtype8(compiler_common *common)
4436 {
4437 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
4438 of the character (>= 0xc0). Return value in TMP1. */
4439 DEFINE_COMPILER;
4440 struct sljit_jump *jump;
4441 struct sljit_jump *compare;
4442
4443 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4444
4445 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
4446 jump = JUMP(SLJIT_NOT_ZERO);
4447 /* Two byte sequence. */
4448 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4449 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4450 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
4451 /* The upper 5 bits are known at this point. */
4452 compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
4453 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4454 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4455 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
4456 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4457 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4458
4459 JUMPHERE(compare);
4460 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4461 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4462
4463 /* We only have types for characters less than 256. */
4464 JUMPHERE(jump);
4465 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4466 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4467 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4468 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4469 }
4470
do_utfreadchar_invalid(compiler_common * common)4471 static void do_utfreadchar_invalid(compiler_common *common)
4472 {
4473 /* Slow decoding a UTF-8 character. TMP1 contains the first byte
4474 of the character (>= 0xc0). Return char value in TMP1. STR_PTR is
4475 undefined for invalid characters. */
4476 DEFINE_COMPILER;
4477 sljit_s32 i;
4478 sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);
4479 struct sljit_jump *jump;
4480 struct sljit_jump *buffer_end_close;
4481 struct sljit_label *three_byte_entry;
4482 struct sljit_label *exit_invalid_label;
4483 struct sljit_jump *exit_invalid[11];
4484
4485 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4486
4487 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc2);
4488
4489 /* Usually more than 3 characters remained in the subject buffer. */
4490 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4491
4492 /* Not a valid start of a multi-byte sequence, no more bytes read. */
4493 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xf5 - 0xc2);
4494
4495 buffer_end_close = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
4496
4497 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4498 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4499 /* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4500 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4501 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4502 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4503
4504 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4505 jump = JUMP(SLJIT_NOT_ZERO);
4506
4507 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4508 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4509
4510 JUMPHERE(jump);
4511
4512 /* Three-byte sequence. */
4513 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4514 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4515 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4516 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4517 if (has_cmov)
4518 {
4519 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x40);
4520 CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0x20000);
4521 exit_invalid[2] = NULL;
4522 }
4523 else
4524 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4525
4526 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4527 jump = JUMP(SLJIT_NOT_ZERO);
4528
4529 three_byte_entry = LABEL();
4530
4531 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2d800);
4532 if (has_cmov)
4533 {
4534 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4535 CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0xd800);
4536 exit_invalid[3] = NULL;
4537 }
4538 else
4539 exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4540 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4541 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4542
4543 if (has_cmov)
4544 {
4545 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4546 CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4547 exit_invalid[4] = NULL;
4548 }
4549 else
4550 exit_invalid[4] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4551 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4552
4553 JUMPHERE(jump);
4554
4555 /* Four-byte sequence. */
4556 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4557 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4558 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4559 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4560 if (has_cmov)
4561 {
4562 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x40);
4563 CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0);
4564 exit_invalid[5] = NULL;
4565 }
4566 else
4567 exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4568
4569 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc10000);
4570 if (has_cmov)
4571 {
4572 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x100000);
4573 CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000);
4574 exit_invalid[6] = NULL;
4575 }
4576 else
4577 exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
4578
4579 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4580 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4581
4582 JUMPHERE(buffer_end_close);
4583 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4584 exit_invalid[7] = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
4585
4586 /* Two-byte sequence. */
4587 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4588 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4589 /* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4590 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4591 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4592 exit_invalid[8] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4593
4594 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4595 jump = JUMP(SLJIT_NOT_ZERO);
4596
4597 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4598
4599 /* Three-byte sequence. */
4600 JUMPHERE(jump);
4601 exit_invalid[9] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4602
4603 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4604 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4605 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4606 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4607 if (has_cmov)
4608 {
4609 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x40);
4610 CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4611 exit_invalid[10] = NULL;
4612 }
4613 else
4614 exit_invalid[10] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4615
4616 /* One will be substracted from STR_PTR later. */
4617 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4618
4619 /* Four byte sequences are not possible. */
4620 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x30000, three_byte_entry);
4621
4622 exit_invalid_label = LABEL();
4623 for (i = 0; i < 11; i++)
4624 sljit_set_label(exit_invalid[i], exit_invalid_label);
4625
4626 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4627 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4628 }
4629
do_utfreadnewline_invalid(compiler_common * common)4630 static void do_utfreadnewline_invalid(compiler_common *common)
4631 {
4632 /* Slow decoding a UTF-8 character, specialized for newlines.
4633 TMP1 contains the first byte of the character (>= 0xc0). Return
4634 char value in TMP1. */
4635 DEFINE_COMPILER;
4636 struct sljit_label *loop;
4637 struct sljit_label *skip_start;
4638 struct sljit_label *three_byte_exit;
4639 struct sljit_jump *jump[5];
4640
4641 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4642
4643 if (common->nltype != NLTYPE_ANY)
4644 {
4645 SLJIT_ASSERT(common->nltype != NLTYPE_FIXED || common->newline < 128);
4646
4647 /* All newlines are ascii, just skip intermediate octets. */
4648 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4649 loop = LABEL();
4650 if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)) == SLJIT_SUCCESS)
4651 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4652 else
4653 {
4654 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4655 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4656 }
4657
4658 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4659 CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);
4660 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4661
4662 JUMPHERE(jump[0]);
4663
4664 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4665 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4666 return;
4667 }
4668
4669 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4670 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4671 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4672
4673 jump[1] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xc2);
4674 jump[2] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xe2);
4675
4676 skip_start = LABEL();
4677 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4678 jump[3] = CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80);
4679
4680 /* Skip intermediate octets. */
4681 loop = LABEL();
4682 jump[4] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4683 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4684 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4685 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4686 CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);
4687
4688 JUMPHERE(jump[3]);
4689 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4690
4691 three_byte_exit = LABEL();
4692 JUMPHERE(jump[0]);
4693 JUMPHERE(jump[4]);
4694
4695 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4696 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4697
4698 /* Two byte long newline: 0x85. */
4699 JUMPHERE(jump[1]);
4700 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x85, skip_start);
4701
4702 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x85);
4703 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4704
4705 /* Three byte long newlines: 0x2028 and 0x2029. */
4706 JUMPHERE(jump[2]);
4707 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, skip_start);
4708 CMPTO(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0, three_byte_exit);
4709
4710 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4711 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4712
4713 OP2(SLJIT_SUB, TMP1, 0, TMP2, 0, SLJIT_IMM, 0x80);
4714 CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40, skip_start);
4715
4716 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0x2000);
4717 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4718 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4719 }
4720
do_utfmoveback_invalid(compiler_common * common)4721 static void do_utfmoveback_invalid(compiler_common *common)
4722 {
4723 /* Goes one character back. */
4724 DEFINE_COMPILER;
4725 sljit_s32 i;
4726 struct sljit_jump *jump;
4727 struct sljit_jump *buffer_start_close;
4728 struct sljit_label *exit_ok_label;
4729 struct sljit_label *exit_invalid_label;
4730 struct sljit_jump *exit_invalid[7];
4731
4732 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4733
4734 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4735 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);
4736
4737 /* Two-byte sequence. */
4738 buffer_start_close = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4739
4740 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
4741
4742 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4743 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x20);
4744
4745 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4746 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4747 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4748
4749 /* Three-byte sequence. */
4750 JUMPHERE(jump);
4751 exit_invalid[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);
4752
4753 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4754
4755 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4756 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x10);
4757
4758 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4759 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4760 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4761
4762 /* Four-byte sequence. */
4763 JUMPHERE(jump);
4764 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);
4765 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40);
4766
4767 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4768 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0);
4769 exit_invalid[3] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x05);
4770
4771 exit_ok_label = LABEL();
4772 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4773 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4774
4775 /* Two-byte sequence. */
4776 JUMPHERE(buffer_start_close);
4777 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4778
4779 exit_invalid[4] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4780
4781 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4782
4783 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4784 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20, exit_ok_label);
4785
4786 /* Three-byte sequence. */
4787 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4788 exit_invalid[5] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);
4789 exit_invalid[6] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4790
4791 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4792
4793 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4794 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10, exit_ok_label);
4795
4796 /* Four-byte sequences are not possible. */
4797
4798 exit_invalid_label = LABEL();
4799 sljit_set_label(exit_invalid[5], exit_invalid_label);
4800 sljit_set_label(exit_invalid[6], exit_invalid_label);
4801 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4802 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4803 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4804
4805 JUMPHERE(exit_invalid[4]);
4806 /* -2 + 4 = 2 */
4807 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4808
4809 exit_invalid_label = LABEL();
4810 for (i = 0; i < 4; i++)
4811 sljit_set_label(exit_invalid[i], exit_invalid_label);
4812 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4813 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(4));
4814 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4815 }
4816
do_utfpeakcharback(compiler_common * common)4817 static void do_utfpeakcharback(compiler_common *common)
4818 {
4819 /* Peak a character back. Does not modify STR_PTR. */
4820 DEFINE_COMPILER;
4821 struct sljit_jump *jump[2];
4822
4823 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4824
4825 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4826 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4827 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20);
4828
4829 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4830 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4831 jump[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10);
4832
4833 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));
4834 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);
4835 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);
4836 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4837 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4838
4839 JUMPHERE(jump[1]);
4840 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4841 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4842 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4843 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4844
4845 JUMPHERE(jump[0]);
4846 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4847 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4848 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4849 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4850
4851 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4852 }
4853
do_utfpeakcharback_invalid(compiler_common * common)4854 static void do_utfpeakcharback_invalid(compiler_common *common)
4855 {
4856 /* Peak a character back. Does not modify STR_PTR. */
4857 DEFINE_COMPILER;
4858 sljit_s32 i;
4859 sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);
4860 struct sljit_jump *jump[2];
4861 struct sljit_label *two_byte_entry;
4862 struct sljit_label *three_byte_entry;
4863 struct sljit_label *exit_invalid_label;
4864 struct sljit_jump *exit_invalid[8];
4865
4866 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4867
4868 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
4869 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);
4870 jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
4871
4872 /* Two-byte sequence. */
4873 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4874 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4875 jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x1e);
4876
4877 two_byte_entry = LABEL();
4878 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4879 /* If TMP1 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4880 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4881 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4882
4883 JUMPHERE(jump[1]);
4884 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);
4885 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
4886 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4887 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4888 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4889
4890 /* Three-byte sequence. */
4891 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4892 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);
4893 jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x10);
4894
4895 three_byte_entry = LABEL();
4896 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
4897 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4898
4899 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4900 if (has_cmov)
4901 {
4902 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4903 CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, -0xd800);
4904 exit_invalid[2] = NULL;
4905 }
4906 else
4907 exit_invalid[2] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4908
4909 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4910 if (has_cmov)
4911 {
4912 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4913 CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4914 exit_invalid[3] = NULL;
4915 }
4916 else
4917 exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4918
4919 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4920
4921 JUMPHERE(jump[1]);
4922 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0 - 0x80);
4923 exit_invalid[4] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4924 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
4925 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4926
4927 /* Four-byte sequence. */
4928 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));
4929 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4930 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);
4931 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 18);
4932 /* ADD is used instead of OR because of the SUB 0x10000 above. */
4933 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4934
4935 if (has_cmov)
4936 {
4937 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x100000);
4938 CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000);
4939 exit_invalid[5] = NULL;
4940 }
4941 else
4942 exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
4943
4944 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4945 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4946
4947 JUMPHERE(jump[0]);
4948 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4949 jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
4950
4951 /* Two-byte sequence. */
4952 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4953 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4954 CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);
4955
4956 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);
4957 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
4958 exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4959 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4960 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4961
4962 /* Three-byte sequence. */
4963 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4964 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);
4965 CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x10, three_byte_entry);
4966
4967 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4968 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4969
4970 JUMPHERE(jump[0]);
4971 exit_invalid[7] = CMP(SLJIT_GREATER, TMP2, 0, STR_PTR, 0);
4972
4973 /* Two-byte sequence. */
4974 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4975 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4976 CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);
4977
4978 exit_invalid_label = LABEL();
4979 for (i = 0; i < 8; i++)
4980 sljit_set_label(exit_invalid[i], exit_invalid_label);
4981
4982 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4983 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4984 }
4985
4986 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
4987
4988 #if PCRE2_CODE_UNIT_WIDTH == 16
4989
do_utfreadchar_invalid(compiler_common * common)4990 static void do_utfreadchar_invalid(compiler_common *common)
4991 {
4992 /* Slow decoding a UTF-16 character. TMP1 contains the first half
4993 of the character (>= 0xd800). Return char value in TMP1. STR_PTR is
4994 undefined for invalid characters. */
4995 DEFINE_COMPILER;
4996 struct sljit_jump *exit_invalid[3];
4997
4998 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4999
5000 /* TMP2 contains the high surrogate. */
5001 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);
5002 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5003
5004 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5005 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
5006 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5007
5008 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
5009 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);
5010 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);
5011
5012 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5013 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5014
5015 JUMPHERE(exit_invalid[0]);
5016 JUMPHERE(exit_invalid[1]);
5017 JUMPHERE(exit_invalid[2]);
5018 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5019 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5020 }
5021
do_utfreadnewline_invalid(compiler_common * common)5022 static void do_utfreadnewline_invalid(compiler_common *common)
5023 {
5024 /* Slow decoding a UTF-16 character, specialized for newlines.
5025 TMP1 contains the first half of the character (>= 0xd800). Return
5026 char value in TMP1. */
5027
5028 DEFINE_COMPILER;
5029 struct sljit_jump *exit_invalid[2];
5030
5031 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5032
5033 /* TMP2 contains the high surrogate. */
5034 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5035
5036 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5037 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);
5038
5039 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);
5040 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x400);
5041 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
5042 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
5043 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
5044 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5045
5046 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5047
5048 JUMPHERE(exit_invalid[0]);
5049 JUMPHERE(exit_invalid[1]);
5050 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5051 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5052 }
5053
do_utfmoveback_invalid(compiler_common * common)5054 static void do_utfmoveback_invalid(compiler_common *common)
5055 {
5056 /* Goes one character back. */
5057 DEFINE_COMPILER;
5058 struct sljit_jump *exit_invalid[3];
5059
5060 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5061
5062 exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400);
5063 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5064
5065 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5066 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5067 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);
5068
5069 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5070 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
5071 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5072
5073 JUMPHERE(exit_invalid[0]);
5074 JUMPHERE(exit_invalid[1]);
5075 JUMPHERE(exit_invalid[2]);
5076
5077 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5078 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
5079 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5080 }
5081
do_utfpeakcharback_invalid(compiler_common * common)5082 static void do_utfpeakcharback_invalid(compiler_common *common)
5083 {
5084 /* Peak a character back. Does not modify STR_PTR. */
5085 DEFINE_COMPILER;
5086 struct sljit_jump *jump;
5087 struct sljit_jump *exit_invalid[3];
5088
5089 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5090
5091 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000);
5092 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5093 exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
5094 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5095
5096 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5097 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
5098 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
5099 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
5100 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
5101 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5102
5103 JUMPHERE(jump);
5104 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5105
5106 JUMPHERE(exit_invalid[0]);
5107 JUMPHERE(exit_invalid[1]);
5108 JUMPHERE(exit_invalid[2]);
5109
5110 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5111 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5112 }
5113
5114 #endif /* PCRE2_CODE_UNIT_WIDTH == 16 */
5115
5116 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
5117 #define UCD_BLOCK_MASK 127
5118 #define UCD_BLOCK_SHIFT 7
5119
do_getucd(compiler_common * common)5120 static void do_getucd(compiler_common *common)
5121 {
5122 /* Search the UCD record for the character comes in TMP1.
5123 Returns chartype in TMP1 and UCD offset in TMP2. */
5124 DEFINE_COMPILER;
5125 #if PCRE2_CODE_UNIT_WIDTH == 32
5126 struct sljit_jump *jump;
5127 #endif
5128
5129 #if defined SLJIT_DEBUG && SLJIT_DEBUG
5130 /* dummy_ucd_record */
5131 const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);
5132 SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
5133 SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
5134 #endif
5135
5136 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);
5137
5138 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5139
5140 #if PCRE2_CODE_UNIT_WIDTH == 32
5141 if (!common->utf)
5142 {
5143 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
5144 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
5145 JUMPHERE(jump);
5146 }
5147 #endif
5148
5149 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5150 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5151 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5152 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5153 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5154 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5155 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5156 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5157 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5158 }
5159
do_getucdtype(compiler_common * common)5160 static void do_getucdtype(compiler_common *common)
5161 {
5162 /* Search the UCD record for the character comes in TMP1.
5163 Returns chartype in TMP1 and UCD offset in TMP2. */
5164 DEFINE_COMPILER;
5165 #if PCRE2_CODE_UNIT_WIDTH == 32
5166 struct sljit_jump *jump;
5167 #endif
5168
5169 #if defined SLJIT_DEBUG && SLJIT_DEBUG
5170 /* dummy_ucd_record */
5171 const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);
5172 SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
5173 SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
5174 #endif
5175
5176 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);
5177
5178 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5179
5180 #if PCRE2_CODE_UNIT_WIDTH == 32
5181 if (!common->utf)
5182 {
5183 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
5184 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
5185 JUMPHERE(jump);
5186 }
5187 #endif
5188
5189 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5190 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5191 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5192 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5193 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5194 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5195 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5196 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5197
5198 /* TMP2 is multiplied by 12. Same as (TMP2 << 2) + ((TMP2 << 2) << 1). */
5199 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
5200 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
5201 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5202 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 1);
5203
5204 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5205 }
5206
5207 #endif /* SUPPORT_UNICODE */
5208
mainloop_entry(compiler_common * common)5209 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common)
5210 {
5211 DEFINE_COMPILER;
5212 struct sljit_label *mainloop;
5213 struct sljit_label *newlinelabel = NULL;
5214 struct sljit_jump *start;
5215 struct sljit_jump *end = NULL;
5216 struct sljit_jump *end2 = NULL;
5217 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5218 struct sljit_label *loop;
5219 struct sljit_jump *jump;
5220 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5221 jump_list *newline = NULL;
5222 sljit_u32 overall_options = common->re->overall_options;
5223 BOOL hascrorlf = (common->re->flags & PCRE2_HASCRORLF) != 0;
5224 BOOL newlinecheck = FALSE;
5225 BOOL readuchar = FALSE;
5226
5227 if (!(hascrorlf || (overall_options & PCRE2_FIRSTLINE) != 0)
5228 && (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
5229 newlinecheck = TRUE;
5230
5231 SLJIT_ASSERT(common->abort_label == NULL);
5232
5233 if ((overall_options & PCRE2_FIRSTLINE) != 0)
5234 {
5235 /* Search for the end of the first line. */
5236 SLJIT_ASSERT(common->match_end_ptr != 0);
5237 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
5238
5239 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5240 {
5241 mainloop = LABEL();
5242 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5243 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5244 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5245 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5246 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
5247 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
5248 JUMPHERE(end);
5249 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5250 }
5251 else
5252 {
5253 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5254 mainloop = LABEL();
5255 /* Continual stores does not cause data dependency. */
5256 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
5257 read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
5258 check_newlinechar(common, common->nltype, &newline, TRUE);
5259 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
5260 JUMPHERE(end);
5261 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
5262 set_jumps(newline, LABEL());
5263 }
5264
5265 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
5266 }
5267 else if ((overall_options & PCRE2_USE_OFFSET_LIMIT) != 0)
5268 {
5269 /* Check whether offset limit is set and valid. */
5270 SLJIT_ASSERT(common->match_end_ptr != 0);
5271
5272 if (HAS_VIRTUAL_REGISTERS)
5273 {
5274 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5275 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, offset_limit));
5276 }
5277 else
5278 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, offset_limit));
5279
5280 OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
5281 end = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw) PCRE2_UNSET);
5282 if (HAS_VIRTUAL_REGISTERS)
5283 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5284 else
5285 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
5286
5287 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5288 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5289 #endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */
5290 if (HAS_VIRTUAL_REGISTERS)
5291 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5292
5293 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
5294 end2 = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
5295 OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
5296 JUMPHERE(end2);
5297 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
5298 add_jump(compiler, &common->abort, CMP(SLJIT_LESS, TMP2, 0, STR_PTR, 0));
5299 JUMPHERE(end);
5300 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, TMP2, 0);
5301 }
5302
5303 start = JUMP(SLJIT_JUMP);
5304
5305 if (newlinecheck)
5306 {
5307 newlinelabel = LABEL();
5308 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5309 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5310 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5311 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
5312 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
5313 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5314 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5315 #endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */
5316 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5317 end2 = JUMP(SLJIT_JUMP);
5318 }
5319
5320 mainloop = LABEL();
5321
5322 /* Increasing the STR_PTR here requires one less jump in the most common case. */
5323 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5324 if (common->utf && !common->invalid_utf) readuchar = TRUE;
5325 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5326 if (newlinecheck) readuchar = TRUE;
5327
5328 if (readuchar)
5329 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5330
5331 if (newlinecheck)
5332 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
5333
5334 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5335 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5336 #if PCRE2_CODE_UNIT_WIDTH == 8
5337 if (common->invalid_utf)
5338 {
5339 /* Skip continuation code units. */
5340 loop = LABEL();
5341 jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5342 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5343 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5344 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
5345 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x40, loop);
5346 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5347 JUMPHERE(jump);
5348 }
5349 else if (common->utf)
5350 {
5351 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5352 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5353 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5354 JUMPHERE(jump);
5355 }
5356 #elif PCRE2_CODE_UNIT_WIDTH == 16
5357 if (common->invalid_utf)
5358 {
5359 /* Skip continuation code units. */
5360 loop = LABEL();
5361 jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5362 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5363 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5364 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
5365 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400, loop);
5366 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5367 JUMPHERE(jump);
5368 }
5369 else if (common->utf)
5370 {
5371 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5372
5373 if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
5374 {
5375 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5376 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
5377 CMOV(SLJIT_LESS, STR_PTR, TMP2, 0);
5378 }
5379 else
5380 {
5381 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
5382 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);
5383 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5384 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5385 }
5386 }
5387 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
5388 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5389 JUMPHERE(start);
5390
5391 if (newlinecheck)
5392 {
5393 JUMPHERE(end);
5394 JUMPHERE(end2);
5395 }
5396
5397 return mainloop;
5398 }
5399
5400
add_prefix_char(PCRE2_UCHAR chr,fast_forward_char_data * chars,BOOL last)5401 static SLJIT_INLINE void add_prefix_char(PCRE2_UCHAR chr, fast_forward_char_data *chars, BOOL last)
5402 {
5403 sljit_u32 i, count = chars->count;
5404
5405 if (count == 255)
5406 return;
5407
5408 if (count == 0)
5409 {
5410 chars->count = 1;
5411 chars->chars[0] = chr;
5412
5413 if (last)
5414 chars->last_count = 1;
5415 return;
5416 }
5417
5418 for (i = 0; i < count; i++)
5419 if (chars->chars[i] == chr)
5420 return;
5421
5422 if (count >= MAX_DIFF_CHARS)
5423 {
5424 chars->count = 255;
5425 return;
5426 }
5427
5428 chars->chars[count] = chr;
5429 chars->count = count + 1;
5430
5431 if (last)
5432 chars->last_count++;
5433 }
5434
scan_prefix(compiler_common * common,PCRE2_SPTR cc,fast_forward_char_data * chars,int max_chars,sljit_u32 * rec_count)5435 static int scan_prefix(compiler_common *common, PCRE2_SPTR cc, fast_forward_char_data *chars, int max_chars, sljit_u32 *rec_count)
5436 {
5437 /* Recursive function, which scans prefix literals. */
5438 BOOL last, any, class, caseless;
5439 int len, repeat, len_save, consumed = 0;
5440 sljit_u32 chr; /* Any unicode character. */
5441 sljit_u8 *bytes, *bytes_end, byte;
5442 PCRE2_SPTR alternative, cc_save, oc;
5443 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5444 PCRE2_UCHAR othercase[4];
5445 #elif defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
5446 PCRE2_UCHAR othercase[2];
5447 #else
5448 PCRE2_UCHAR othercase[1];
5449 #endif
5450
5451 repeat = 1;
5452 while (TRUE)
5453 {
5454 if (*rec_count == 0)
5455 return 0;
5456 (*rec_count)--;
5457
5458 last = TRUE;
5459 any = FALSE;
5460 class = FALSE;
5461 caseless = FALSE;
5462
5463 switch (*cc)
5464 {
5465 case OP_CHARI:
5466 caseless = TRUE;
5467 /* Fall through */
5468 case OP_CHAR:
5469 last = FALSE;
5470 cc++;
5471 break;
5472
5473 case OP_SOD:
5474 case OP_SOM:
5475 case OP_SET_SOM:
5476 case OP_NOT_WORD_BOUNDARY:
5477 case OP_WORD_BOUNDARY:
5478 case OP_EODN:
5479 case OP_EOD:
5480 case OP_CIRC:
5481 case OP_CIRCM:
5482 case OP_DOLL:
5483 case OP_DOLLM:
5484 /* Zero width assertions. */
5485 cc++;
5486 continue;
5487
5488 case OP_ASSERT:
5489 case OP_ASSERT_NOT:
5490 case OP_ASSERTBACK:
5491 case OP_ASSERTBACK_NOT:
5492 case OP_ASSERT_NA:
5493 case OP_ASSERTBACK_NA:
5494 cc = bracketend(cc);
5495 continue;
5496
5497 case OP_PLUSI:
5498 case OP_MINPLUSI:
5499 case OP_POSPLUSI:
5500 caseless = TRUE;
5501 /* Fall through */
5502 case OP_PLUS:
5503 case OP_MINPLUS:
5504 case OP_POSPLUS:
5505 cc++;
5506 break;
5507
5508 case OP_EXACTI:
5509 caseless = TRUE;
5510 /* Fall through */
5511 case OP_EXACT:
5512 repeat = GET2(cc, 1);
5513 last = FALSE;
5514 cc += 1 + IMM2_SIZE;
5515 break;
5516
5517 case OP_QUERYI:
5518 case OP_MINQUERYI:
5519 case OP_POSQUERYI:
5520 caseless = TRUE;
5521 /* Fall through */
5522 case OP_QUERY:
5523 case OP_MINQUERY:
5524 case OP_POSQUERY:
5525 len = 1;
5526 cc++;
5527 #ifdef SUPPORT_UNICODE
5528 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
5529 #endif
5530 max_chars = scan_prefix(common, cc + len, chars, max_chars, rec_count);
5531 if (max_chars == 0)
5532 return consumed;
5533 last = FALSE;
5534 break;
5535
5536 case OP_KET:
5537 cc += 1 + LINK_SIZE;
5538 continue;
5539
5540 case OP_ALT:
5541 cc += GET(cc, 1);
5542 continue;
5543
5544 case OP_ONCE:
5545 case OP_BRA:
5546 case OP_BRAPOS:
5547 case OP_CBRA:
5548 case OP_CBRAPOS:
5549 alternative = cc + GET(cc, 1);
5550 while (*alternative == OP_ALT)
5551 {
5552 max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars, rec_count);
5553 if (max_chars == 0)
5554 return consumed;
5555 alternative += GET(alternative, 1);
5556 }
5557
5558 if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
5559 cc += IMM2_SIZE;
5560 cc += 1 + LINK_SIZE;
5561 continue;
5562
5563 case OP_CLASS:
5564 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5565 if (common->utf && !is_char7_bitset((const sljit_u8 *)(cc + 1), FALSE))
5566 return consumed;
5567 #endif
5568 class = TRUE;
5569 break;
5570
5571 case OP_NCLASS:
5572 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5573 if (common->utf) return consumed;
5574 #endif
5575 class = TRUE;
5576 break;
5577
5578 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
5579 case OP_XCLASS:
5580 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5581 if (common->utf) return consumed;
5582 #endif
5583 any = TRUE;
5584 cc += GET(cc, 1);
5585 break;
5586 #endif
5587
5588 case OP_DIGIT:
5589 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5590 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
5591 return consumed;
5592 #endif
5593 any = TRUE;
5594 cc++;
5595 break;
5596
5597 case OP_WHITESPACE:
5598 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5599 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))
5600 return consumed;
5601 #endif
5602 any = TRUE;
5603 cc++;
5604 break;
5605
5606 case OP_WORDCHAR:
5607 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5608 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))
5609 return consumed;
5610 #endif
5611 any = TRUE;
5612 cc++;
5613 break;
5614
5615 case OP_NOT:
5616 case OP_NOTI:
5617 cc++;
5618 /* Fall through. */
5619 case OP_NOT_DIGIT:
5620 case OP_NOT_WHITESPACE:
5621 case OP_NOT_WORDCHAR:
5622 case OP_ANY:
5623 case OP_ALLANY:
5624 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5625 if (common->utf) return consumed;
5626 #endif
5627 any = TRUE;
5628 cc++;
5629 break;
5630
5631 #ifdef SUPPORT_UNICODE
5632 case OP_NOTPROP:
5633 case OP_PROP:
5634 #if PCRE2_CODE_UNIT_WIDTH != 32
5635 if (common->utf) return consumed;
5636 #endif
5637 any = TRUE;
5638 cc += 1 + 2;
5639 break;
5640 #endif
5641
5642 case OP_TYPEEXACT:
5643 repeat = GET2(cc, 1);
5644 cc += 1 + IMM2_SIZE;
5645 continue;
5646
5647 case OP_NOTEXACT:
5648 case OP_NOTEXACTI:
5649 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5650 if (common->utf) return consumed;
5651 #endif
5652 any = TRUE;
5653 repeat = GET2(cc, 1);
5654 cc += 1 + IMM2_SIZE + 1;
5655 break;
5656
5657 default:
5658 return consumed;
5659 }
5660
5661 if (any)
5662 {
5663 do
5664 {
5665 chars->count = 255;
5666
5667 consumed++;
5668 if (--max_chars == 0)
5669 return consumed;
5670 chars++;
5671 }
5672 while (--repeat > 0);
5673
5674 repeat = 1;
5675 continue;
5676 }
5677
5678 if (class)
5679 {
5680 bytes = (sljit_u8*) (cc + 1);
5681 cc += 1 + 32 / sizeof(PCRE2_UCHAR);
5682
5683 switch (*cc)
5684 {
5685 case OP_CRSTAR:
5686 case OP_CRMINSTAR:
5687 case OP_CRPOSSTAR:
5688 case OP_CRQUERY:
5689 case OP_CRMINQUERY:
5690 case OP_CRPOSQUERY:
5691 max_chars = scan_prefix(common, cc + 1, chars, max_chars, rec_count);
5692 if (max_chars == 0)
5693 return consumed;
5694 break;
5695
5696 default:
5697 case OP_CRPLUS:
5698 case OP_CRMINPLUS:
5699 case OP_CRPOSPLUS:
5700 break;
5701
5702 case OP_CRRANGE:
5703 case OP_CRMINRANGE:
5704 case OP_CRPOSRANGE:
5705 repeat = GET2(cc, 1);
5706 if (repeat <= 0)
5707 return consumed;
5708 break;
5709 }
5710
5711 do
5712 {
5713 if (bytes[31] & 0x80)
5714 chars->count = 255;
5715 else if (chars->count != 255)
5716 {
5717 bytes_end = bytes + 32;
5718 chr = 0;
5719 do
5720 {
5721 byte = *bytes++;
5722 SLJIT_ASSERT((chr & 0x7) == 0);
5723 if (byte == 0)
5724 chr += 8;
5725 else
5726 {
5727 do
5728 {
5729 if ((byte & 0x1) != 0)
5730 add_prefix_char(chr, chars, TRUE);
5731 byte >>= 1;
5732 chr++;
5733 }
5734 while (byte != 0);
5735 chr = (chr + 7) & ~7;
5736 }
5737 }
5738 while (chars->count != 255 && bytes < bytes_end);
5739 bytes = bytes_end - 32;
5740 }
5741
5742 consumed++;
5743 if (--max_chars == 0)
5744 return consumed;
5745 chars++;
5746 }
5747 while (--repeat > 0);
5748
5749 switch (*cc)
5750 {
5751 case OP_CRSTAR:
5752 case OP_CRMINSTAR:
5753 case OP_CRPOSSTAR:
5754 return consumed;
5755
5756 case OP_CRQUERY:
5757 case OP_CRMINQUERY:
5758 case OP_CRPOSQUERY:
5759 cc++;
5760 break;
5761
5762 case OP_CRRANGE:
5763 case OP_CRMINRANGE:
5764 case OP_CRPOSRANGE:
5765 if (GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE))
5766 return consumed;
5767 cc += 1 + 2 * IMM2_SIZE;
5768 break;
5769 }
5770
5771 repeat = 1;
5772 continue;
5773 }
5774
5775 len = 1;
5776 #ifdef SUPPORT_UNICODE
5777 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
5778 #endif
5779
5780 if (caseless && char_has_othercase(common, cc))
5781 {
5782 #ifdef SUPPORT_UNICODE
5783 if (common->utf)
5784 {
5785 GETCHAR(chr, cc);
5786 if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
5787 return consumed;
5788 }
5789 else
5790 #endif
5791 {
5792 chr = *cc;
5793 #ifdef SUPPORT_UNICODE
5794 if (common->ucp && chr > 127)
5795 othercase[0] = UCD_OTHERCASE(chr);
5796 else
5797 #endif
5798 othercase[0] = TABLE_GET(chr, common->fcc, chr);
5799 }
5800 }
5801 else
5802 {
5803 caseless = FALSE;
5804 othercase[0] = 0; /* Stops compiler warning - PH */
5805 }
5806
5807 len_save = len;
5808 cc_save = cc;
5809 while (TRUE)
5810 {
5811 oc = othercase;
5812 do
5813 {
5814 len--;
5815 consumed++;
5816
5817 chr = *cc;
5818 add_prefix_char(*cc, chars, len == 0);
5819
5820 if (caseless)
5821 add_prefix_char(*oc, chars, len == 0);
5822
5823 if (--max_chars == 0)
5824 return consumed;
5825 chars++;
5826 cc++;
5827 oc++;
5828 }
5829 while (len > 0);
5830
5831 if (--repeat == 0)
5832 break;
5833
5834 len = len_save;
5835 cc = cc_save;
5836 }
5837
5838 repeat = 1;
5839 if (last)
5840 return consumed;
5841 }
5842 }
5843
5844 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
jumpto_if_not_utf_char_start(struct sljit_compiler * compiler,sljit_s32 reg,struct sljit_label * label)5845 static void jumpto_if_not_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg, struct sljit_label *label)
5846 {
5847 #if PCRE2_CODE_UNIT_WIDTH == 8
5848 OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xc0);
5849 CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0x80, label);
5850 #elif PCRE2_CODE_UNIT_WIDTH == 16
5851 OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xfc00);
5852 CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00, label);
5853 #else
5854 #error "Unknown code width"
5855 #endif
5856 }
5857 #endif
5858
5859 #include "pcre2_jit_simd_inc.h"
5860
5861 #ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
5862
check_fast_forward_char_pair_simd(compiler_common * common,fast_forward_char_data * chars,int max)5863 static BOOL check_fast_forward_char_pair_simd(compiler_common *common, fast_forward_char_data *chars, int max)
5864 {
5865 sljit_s32 i, j, max_i = 0, max_j = 0;
5866 sljit_u32 max_pri = 0;
5867 PCRE2_UCHAR a1, a2, a_pri, b1, b2, b_pri;
5868
5869 for (i = max - 1; i >= 1; i--)
5870 {
5871 if (chars[i].last_count > 2)
5872 {
5873 a1 = chars[i].chars[0];
5874 a2 = chars[i].chars[1];
5875 a_pri = chars[i].last_count;
5876
5877 j = i - max_fast_forward_char_pair_offset();
5878 if (j < 0)
5879 j = 0;
5880
5881 while (j < i)
5882 {
5883 b_pri = chars[j].last_count;
5884 if (b_pri > 2 && a_pri + b_pri >= max_pri)
5885 {
5886 b1 = chars[j].chars[0];
5887 b2 = chars[j].chars[1];
5888
5889 if (a1 != b1 && a1 != b2 && a2 != b1 && a2 != b2)
5890 {
5891 max_pri = a_pri + b_pri;
5892 max_i = i;
5893 max_j = j;
5894 }
5895 }
5896 j++;
5897 }
5898 }
5899 }
5900
5901 if (max_pri == 0)
5902 return FALSE;
5903
5904 fast_forward_char_pair_simd(common, max_i, chars[max_i].chars[0], chars[max_i].chars[1], max_j, chars[max_j].chars[0], chars[max_j].chars[1]);
5905 return TRUE;
5906 }
5907
5908 #endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */
5909
fast_forward_first_char2(compiler_common * common,PCRE2_UCHAR char1,PCRE2_UCHAR char2,sljit_s32 offset)5910 static void fast_forward_first_char2(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
5911 {
5912 DEFINE_COMPILER;
5913 struct sljit_label *start;
5914 struct sljit_jump *match;
5915 struct sljit_jump *partial_quit;
5916 PCRE2_UCHAR mask;
5917 BOOL has_match_end = (common->match_end_ptr != 0);
5918
5919 SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE || offset == 0);
5920
5921 if (has_match_end)
5922 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
5923
5924 if (offset > 0)
5925 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
5926
5927 if (has_match_end)
5928 {
5929 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
5930
5931 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
5932 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0);
5933 CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
5934 }
5935
5936 #ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD
5937
5938 if (JIT_HAS_FAST_FORWARD_CHAR_SIMD)
5939 {
5940 fast_forward_char_simd(common, char1, char2, offset);
5941
5942 if (offset > 0)
5943 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
5944
5945 if (has_match_end)
5946 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
5947 return;
5948 }
5949
5950 #endif
5951
5952 start = LABEL();
5953
5954 partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5955 if (common->mode == PCRE2_JIT_COMPLETE)
5956 add_jump(compiler, &common->failed_match, partial_quit);
5957
5958 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5959 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5960
5961 if (char1 == char2)
5962 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1, start);
5963 else
5964 {
5965 mask = char1 ^ char2;
5966 if (is_powerof2(mask))
5967 {
5968 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
5969 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1 | mask, start);
5970 }
5971 else
5972 {
5973 match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1);
5974 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char2, start);
5975 JUMPHERE(match);
5976 }
5977 }
5978
5979 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5980 if (common->utf && offset > 0)
5981 {
5982 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-(offset + 1)));
5983 jumpto_if_not_utf_char_start(compiler, TMP1, start);
5984 }
5985 #endif
5986
5987 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
5988
5989 if (common->mode != PCRE2_JIT_COMPLETE)
5990 JUMPHERE(partial_quit);
5991
5992 if (has_match_end)
5993 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
5994 }
5995
fast_forward_first_n_chars(compiler_common * common)5996 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common)
5997 {
5998 DEFINE_COMPILER;
5999 struct sljit_label *start;
6000 struct sljit_jump *match;
6001 fast_forward_char_data chars[MAX_N_CHARS];
6002 sljit_s32 offset;
6003 PCRE2_UCHAR mask;
6004 PCRE2_UCHAR *char_set, *char_set_end;
6005 int i, max, from;
6006 int range_right = -1, range_len;
6007 sljit_u8 *update_table = NULL;
6008 BOOL in_range;
6009 sljit_u32 rec_count;
6010
6011 for (i = 0; i < MAX_N_CHARS; i++)
6012 {
6013 chars[i].count = 0;
6014 chars[i].last_count = 0;
6015 }
6016
6017 rec_count = 10000;
6018 max = scan_prefix(common, common->start, chars, MAX_N_CHARS, &rec_count);
6019
6020 if (max < 1)
6021 return FALSE;
6022
6023 /* Convert last_count to priority. */
6024 for (i = 0; i < max; i++)
6025 {
6026 SLJIT_ASSERT(chars[i].count > 0 && chars[i].last_count <= chars[i].count);
6027
6028 if (chars[i].count == 1)
6029 {
6030 chars[i].last_count = (chars[i].last_count == 1) ? 7 : 5;
6031 /* Simplifies algorithms later. */
6032 chars[i].chars[1] = chars[i].chars[0];
6033 }
6034 else if (chars[i].count == 2)
6035 {
6036 SLJIT_ASSERT(chars[i].chars[0] != chars[i].chars[1]);
6037
6038 if (is_powerof2(chars[i].chars[0] ^ chars[i].chars[1]))
6039 chars[i].last_count = (chars[i].last_count == 2) ? 6 : 4;
6040 else
6041 chars[i].last_count = (chars[i].last_count == 2) ? 3 : 2;
6042 }
6043 else
6044 chars[i].last_count = (chars[i].count == 255) ? 0 : 1;
6045 }
6046
6047 #ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
6048 if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && check_fast_forward_char_pair_simd(common, chars, max))
6049 return TRUE;
6050 #endif
6051
6052 in_range = FALSE;
6053 /* Prevent compiler "uninitialized" warning */
6054 from = 0;
6055 range_len = 4 /* minimum length */ - 1;
6056 for (i = 0; i <= max; i++)
6057 {
6058 if (in_range && (i - from) > range_len && (chars[i - 1].count < 255))
6059 {
6060 range_len = i - from;
6061 range_right = i - 1;
6062 }
6063
6064 if (i < max && chars[i].count < 255)
6065 {
6066 SLJIT_ASSERT(chars[i].count > 0);
6067 if (!in_range)
6068 {
6069 in_range = TRUE;
6070 from = i;
6071 }
6072 }
6073 else
6074 in_range = FALSE;
6075 }
6076
6077 if (range_right >= 0)
6078 {
6079 update_table = (sljit_u8 *)allocate_read_only_data(common, 256);
6080 if (update_table == NULL)
6081 return TRUE;
6082 memset(update_table, IN_UCHARS(range_len), 256);
6083
6084 for (i = 0; i < range_len; i++)
6085 {
6086 SLJIT_ASSERT(chars[range_right - i].count > 0 && chars[range_right - i].count < 255);
6087
6088 char_set = chars[range_right - i].chars;
6089 char_set_end = char_set + chars[range_right - i].count;
6090 do
6091 {
6092 if (update_table[(*char_set) & 0xff] > IN_UCHARS(i))
6093 update_table[(*char_set) & 0xff] = IN_UCHARS(i);
6094 char_set++;
6095 }
6096 while (char_set < char_set_end);
6097 }
6098 }
6099
6100 offset = -1;
6101 /* Scan forward. */
6102 for (i = 0; i < max; i++)
6103 {
6104 if (range_right == i)
6105 continue;
6106
6107 if (offset == -1)
6108 {
6109 if (chars[i].last_count >= 2)
6110 offset = i;
6111 }
6112 else if (chars[offset].last_count < chars[i].last_count)
6113 offset = i;
6114 }
6115
6116 SLJIT_ASSERT(offset == -1 || (chars[offset].count >= 1 && chars[offset].count <= 2));
6117
6118 if (range_right < 0)
6119 {
6120 if (offset < 0)
6121 return FALSE;
6122 /* Works regardless the value is 1 or 2. */
6123 fast_forward_first_char2(common, chars[offset].chars[0], chars[offset].chars[1], offset);
6124 return TRUE;
6125 }
6126
6127 SLJIT_ASSERT(range_right != offset);
6128
6129 if (common->match_end_ptr != 0)
6130 {
6131 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6132 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6133 OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6134 add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));
6135 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0);
6136 CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
6137 }
6138 else
6139 {
6140 OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6141 add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));
6142 }
6143
6144 SLJIT_ASSERT(range_right >= 0);
6145
6146 if (!HAS_VIRTUAL_REGISTERS)
6147 OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
6148
6149 start = LABEL();
6150 add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
6151
6152 #if PCRE2_CODE_UNIT_WIDTH == 8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
6153 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
6154 #else
6155 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
6156 #endif
6157
6158 if (!HAS_VIRTUAL_REGISTERS)
6159 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
6160 else
6161 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
6162
6163 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6164 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
6165
6166 if (offset >= 0)
6167 {
6168 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offset));
6169 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6170
6171 if (chars[offset].count == 1)
6172 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0], start);
6173 else
6174 {
6175 mask = chars[offset].chars[0] ^ chars[offset].chars[1];
6176 if (is_powerof2(mask))
6177 {
6178 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
6179 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0] | mask, start);
6180 }
6181 else
6182 {
6183 match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0]);
6184 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[1], start);
6185 JUMPHERE(match);
6186 }
6187 }
6188 }
6189
6190 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6191 if (common->utf && offset != 0)
6192 {
6193 if (offset < 0)
6194 {
6195 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6196 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6197 }
6198 else
6199 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6200
6201 jumpto_if_not_utf_char_start(compiler, TMP1, start);
6202
6203 if (offset < 0)
6204 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6205 }
6206 #endif
6207
6208 if (offset >= 0)
6209 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6210
6211 if (common->match_end_ptr != 0)
6212 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6213 else
6214 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6215 return TRUE;
6216 }
6217
fast_forward_first_char(compiler_common * common)6218 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common)
6219 {
6220 PCRE2_UCHAR first_char = (PCRE2_UCHAR)(common->re->first_codeunit);
6221 PCRE2_UCHAR oc;
6222
6223 oc = first_char;
6224 if ((common->re->flags & PCRE2_FIRSTCASELESS) != 0)
6225 {
6226 oc = TABLE_GET(first_char, common->fcc, first_char);
6227 #if defined SUPPORT_UNICODE
6228 if (first_char > 127 && (common->utf || common->ucp))
6229 oc = UCD_OTHERCASE(first_char);
6230 #endif
6231 }
6232
6233 fast_forward_first_char2(common, first_char, oc, 0);
6234 }
6235
fast_forward_newline(compiler_common * common)6236 static SLJIT_INLINE void fast_forward_newline(compiler_common *common)
6237 {
6238 DEFINE_COMPILER;
6239 struct sljit_label *loop;
6240 struct sljit_jump *lastchar = NULL;
6241 struct sljit_jump *firstchar;
6242 struct sljit_jump *quit = NULL;
6243 struct sljit_jump *foundcr = NULL;
6244 struct sljit_jump *notfoundnl;
6245 jump_list *newline = NULL;
6246
6247 if (common->match_end_ptr != 0)
6248 {
6249 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6250 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6251 }
6252
6253 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6254 {
6255 #ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
6256 if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && common->mode == PCRE2_JIT_COMPLETE)
6257 {
6258 if (HAS_VIRTUAL_REGISTERS)
6259 {
6260 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6261 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6262 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6263 }
6264 else
6265 {
6266 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6267 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
6268 }
6269 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6270
6271 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6272 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
6273 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_NOT_EQUAL);
6274 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6275 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
6276 #endif
6277 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6278
6279 fast_forward_char_pair_simd(common, 1, common->newline & 0xff, common->newline & 0xff, 0, (common->newline >> 8) & 0xff, (common->newline >> 8) & 0xff);
6280 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6281 }
6282 else
6283 #endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */
6284 {
6285 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6286 if (HAS_VIRTUAL_REGISTERS)
6287 {
6288 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6289 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6290 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6291 }
6292 else
6293 {
6294 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6295 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
6296 }
6297 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6298
6299 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
6300 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
6301 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER_EQUAL);
6302 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6303 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
6304 #endif
6305 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
6306
6307 loop = LABEL();
6308 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6309 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6310 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
6311 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6312 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
6313 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
6314
6315 JUMPHERE(quit);
6316 JUMPHERE(lastchar);
6317 }
6318
6319 JUMPHERE(firstchar);
6320
6321 if (common->match_end_ptr != 0)
6322 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6323 return;
6324 }
6325
6326 if (HAS_VIRTUAL_REGISTERS)
6327 {
6328 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6329 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6330 }
6331 else
6332 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6333
6334 /* Example: match /^/ to \r\n from offset 1. */
6335 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6336
6337 if (common->nltype == NLTYPE_ANY)
6338 move_back(common, NULL, FALSE);
6339 else
6340 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6341
6342 loop = LABEL();
6343 common->ff_newline_shortcut = loop;
6344
6345 #ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD
6346 if (JIT_HAS_FAST_FORWARD_CHAR_SIMD && (common->nltype == NLTYPE_FIXED || common->nltype == NLTYPE_ANYCRLF))
6347 {
6348 if (common->nltype == NLTYPE_ANYCRLF)
6349 {
6350 fast_forward_char_simd(common, CHAR_CR, CHAR_LF, 0);
6351 if (common->mode != PCRE2_JIT_COMPLETE)
6352 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6353
6354 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6355 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6356 quit = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6357 }
6358 else
6359 {
6360 fast_forward_char_simd(common, common->newline, common->newline, 0);
6361
6362 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6363 if (common->mode != PCRE2_JIT_COMPLETE)
6364 {
6365 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
6366 CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
6367 }
6368 }
6369 }
6370 else
6371 #endif /* JIT_HAS_FAST_FORWARD_CHAR_SIMD */
6372 {
6373 read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
6374 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6375 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
6376 foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6377 check_newlinechar(common, common->nltype, &newline, FALSE);
6378 set_jumps(newline, loop);
6379 }
6380
6381 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
6382 {
6383 if (quit == NULL)
6384 {
6385 quit = JUMP(SLJIT_JUMP);
6386 JUMPHERE(foundcr);
6387 }
6388
6389 notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6390 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6391 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
6392 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
6393 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6394 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
6395 #endif
6396 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6397 JUMPHERE(notfoundnl);
6398 JUMPHERE(quit);
6399 }
6400
6401 if (lastchar)
6402 JUMPHERE(lastchar);
6403 JUMPHERE(firstchar);
6404
6405 if (common->match_end_ptr != 0)
6406 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6407 }
6408
6409 static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
6410
fast_forward_start_bits(compiler_common * common)6411 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common)
6412 {
6413 DEFINE_COMPILER;
6414 const sljit_u8 *start_bits = common->re->start_bitmap;
6415 struct sljit_label *start;
6416 struct sljit_jump *partial_quit;
6417 #if PCRE2_CODE_UNIT_WIDTH != 8
6418 struct sljit_jump *found = NULL;
6419 #endif
6420 jump_list *matches = NULL;
6421
6422 if (common->match_end_ptr != 0)
6423 {
6424 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6425 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
6426 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
6427 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0);
6428 CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
6429 }
6430
6431 start = LABEL();
6432
6433 partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6434 if (common->mode == PCRE2_JIT_COMPLETE)
6435 add_jump(compiler, &common->failed_match, partial_quit);
6436
6437 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6438 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6439
6440 if (!optimize_class(common, start_bits, (start_bits[31] & 0x80) != 0, FALSE, &matches))
6441 {
6442 #if PCRE2_CODE_UNIT_WIDTH != 8
6443 if ((start_bits[31] & 0x80) != 0)
6444 found = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255);
6445 else
6446 CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255, start);
6447 #elif defined SUPPORT_UNICODE
6448 if (common->utf && is_char7_bitset(start_bits, FALSE))
6449 CMPTO(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 127, start);
6450 #endif
6451 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
6452 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
6453 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
6454 if (!HAS_VIRTUAL_REGISTERS)
6455 {
6456 OP2(SLJIT_SHL, TMP3, 0, SLJIT_IMM, 1, TMP2, 0);
6457 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP3, 0);
6458 }
6459 else
6460 {
6461 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6462 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
6463 }
6464 JUMPTO(SLJIT_ZERO, start);
6465 }
6466 else
6467 set_jumps(matches, start);
6468
6469 #if PCRE2_CODE_UNIT_WIDTH != 8
6470 if (found != NULL)
6471 JUMPHERE(found);
6472 #endif
6473
6474 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6475
6476 if (common->mode != PCRE2_JIT_COMPLETE)
6477 JUMPHERE(partial_quit);
6478
6479 if (common->match_end_ptr != 0)
6480 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
6481 }
6482
search_requested_char(compiler_common * common,PCRE2_UCHAR req_char,BOOL caseless,BOOL has_firstchar)6483 static SLJIT_INLINE jump_list *search_requested_char(compiler_common *common, PCRE2_UCHAR req_char, BOOL caseless, BOOL has_firstchar)
6484 {
6485 DEFINE_COMPILER;
6486 struct sljit_label *loop;
6487 struct sljit_jump *toolong;
6488 struct sljit_jump *already_found;
6489 struct sljit_jump *found;
6490 struct sljit_jump *found_oc = NULL;
6491 jump_list *not_found = NULL;
6492 sljit_u32 oc, bit;
6493
6494 SLJIT_ASSERT(common->req_char_ptr != 0);
6495 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(REQ_CU_MAX) * 100);
6496 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
6497 toolong = CMP(SLJIT_LESS, TMP2, 0, STR_END, 0);
6498 already_found = CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0);
6499
6500 if (has_firstchar)
6501 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6502 else
6503 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
6504
6505 oc = req_char;
6506 if (caseless)
6507 {
6508 oc = TABLE_GET(req_char, common->fcc, req_char);
6509 #if defined SUPPORT_UNICODE
6510 if (req_char > 127 && (common->utf || common->ucp))
6511 oc = UCD_OTHERCASE(req_char);
6512 #endif
6513 }
6514
6515 #ifdef JIT_HAS_FAST_REQUESTED_CHAR_SIMD
6516 if (JIT_HAS_FAST_REQUESTED_CHAR_SIMD)
6517 {
6518 not_found = fast_requested_char_simd(common, req_char, oc);
6519 }
6520 else
6521 #endif
6522 {
6523 loop = LABEL();
6524 add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
6525
6526 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
6527
6528 if (req_char == oc)
6529 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
6530 else
6531 {
6532 bit = req_char ^ oc;
6533 if (is_powerof2(bit))
6534 {
6535 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
6536 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
6537 }
6538 else
6539 {
6540 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
6541 found_oc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
6542 }
6543 }
6544 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
6545 JUMPTO(SLJIT_JUMP, loop);
6546
6547 JUMPHERE(found);
6548 if (found_oc)
6549 JUMPHERE(found_oc);
6550 }
6551
6552 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
6553
6554 JUMPHERE(already_found);
6555 JUMPHERE(toolong);
6556 return not_found;
6557 }
6558
do_revertframes(compiler_common * common)6559 static void do_revertframes(compiler_common *common)
6560 {
6561 DEFINE_COMPILER;
6562 struct sljit_jump *jump;
6563 struct sljit_label *mainloop;
6564
6565 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
6566 GET_LOCAL_BASE(TMP1, 0, 0);
6567
6568 /* Drop frames until we reach STACK_TOP. */
6569 mainloop = LABEL();
6570 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), -sizeof(sljit_sw));
6571 jump = CMP(SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0);
6572
6573 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
6574 if (HAS_VIRTUAL_REGISTERS)
6575 {
6576 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
6577 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(STACK_TOP), -(3 * sizeof(sljit_sw)));
6578 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
6579 }
6580 else
6581 {
6582 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
6583 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(3 * sizeof(sljit_sw)));
6584 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
6585 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
6586 GET_LOCAL_BASE(TMP1, 0, 0);
6587 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP3, 0);
6588 }
6589 JUMPTO(SLJIT_JUMP, mainloop);
6590
6591 JUMPHERE(jump);
6592 jump = CMP(SLJIT_NOT_ZERO /* SIG_LESS */, TMP2, 0, SLJIT_IMM, 0);
6593 /* End of reverting values. */
6594 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
6595
6596 JUMPHERE(jump);
6597 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
6598 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
6599 if (HAS_VIRTUAL_REGISTERS)
6600 {
6601 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
6602 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
6603 }
6604 else
6605 {
6606 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
6607 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
6608 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP3, 0);
6609 }
6610 JUMPTO(SLJIT_JUMP, mainloop);
6611 }
6612
check_wordboundary(compiler_common * common)6613 static void check_wordboundary(compiler_common *common)
6614 {
6615 DEFINE_COMPILER;
6616 struct sljit_jump *skipread;
6617 jump_list *skipread_list = NULL;
6618 #ifdef SUPPORT_UNICODE
6619 struct sljit_label *valid_utf;
6620 jump_list *invalid_utf1 = NULL;
6621 #endif /* SUPPORT_UNICODE */
6622 jump_list *invalid_utf2 = NULL;
6623 #if PCRE2_CODE_UNIT_WIDTH != 8 || defined SUPPORT_UNICODE
6624 struct sljit_jump *jump;
6625 #endif /* PCRE2_CODE_UNIT_WIDTH != 8 || SUPPORT_UNICODE */
6626
6627 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
6628
6629 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6630 /* Get type of the previous char, and put it to TMP3. */
6631 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6632 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6633 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
6634 skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6635
6636 #ifdef SUPPORT_UNICODE
6637 if (common->invalid_utf)
6638 {
6639 peek_char_back(common, READ_CHAR_MAX, &invalid_utf1);
6640
6641 if (common->mode != PCRE2_JIT_COMPLETE)
6642 {
6643 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
6644 OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
6645 move_back(common, NULL, TRUE);
6646 check_start_used_ptr(common);
6647 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
6648 OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0);
6649 }
6650 }
6651 else
6652 #endif /* SUPPORT_UNICODE */
6653 {
6654 if (common->mode == PCRE2_JIT_COMPLETE)
6655 peek_char_back(common, READ_CHAR_MAX, NULL);
6656 else
6657 {
6658 move_back(common, NULL, TRUE);
6659 check_start_used_ptr(common);
6660 read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR);
6661 }
6662 }
6663
6664 /* Testing char type. */
6665 #ifdef SUPPORT_UNICODE
6666 if (common->ucp)
6667 {
6668 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
6669 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
6670 add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
6671 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
6672 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
6673 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6674 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
6675 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
6676 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
6677 JUMPHERE(jump);
6678 OP1(SLJIT_MOV, TMP3, 0, TMP2, 0);
6679 }
6680 else
6681 #endif /* SUPPORT_UNICODE */
6682 {
6683 #if PCRE2_CODE_UNIT_WIDTH != 8
6684 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6685 #elif defined SUPPORT_UNICODE
6686 /* Here TMP3 has already been zeroed. */
6687 jump = NULL;
6688 if (common->utf)
6689 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6690 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6691 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
6692 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
6693 OP2(SLJIT_AND, TMP3, 0, TMP1, 0, SLJIT_IMM, 1);
6694 #if PCRE2_CODE_UNIT_WIDTH != 8
6695 JUMPHERE(jump);
6696 #elif defined SUPPORT_UNICODE
6697 if (jump != NULL)
6698 JUMPHERE(jump);
6699 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6700 }
6701 JUMPHERE(skipread);
6702
6703 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6704 check_str_end(common, &skipread_list);
6705 peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, &invalid_utf2);
6706
6707 /* Testing char type. This is a code duplication. */
6708 #ifdef SUPPORT_UNICODE
6709
6710 valid_utf = LABEL();
6711
6712 if (common->ucp)
6713 {
6714 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
6715 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
6716 add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
6717 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
6718 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
6719 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6720 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
6721 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
6722 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
6723 JUMPHERE(jump);
6724 }
6725 else
6726 #endif /* SUPPORT_UNICODE */
6727 {
6728 #if PCRE2_CODE_UNIT_WIDTH != 8
6729 /* TMP2 may be destroyed by peek_char. */
6730 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6731 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6732 #elif defined SUPPORT_UNICODE
6733 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6734 jump = NULL;
6735 if (common->utf)
6736 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6737 #endif
6738 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
6739 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
6740 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
6741 #if PCRE2_CODE_UNIT_WIDTH != 8
6742 JUMPHERE(jump);
6743 #elif defined SUPPORT_UNICODE
6744 if (jump != NULL)
6745 JUMPHERE(jump);
6746 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6747 }
6748 set_jumps(skipread_list, LABEL());
6749
6750 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6751 OP2(SLJIT_XOR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, TMP3, 0);
6752 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6753
6754 #ifdef SUPPORT_UNICODE
6755 if (common->invalid_utf)
6756 {
6757 set_jumps(invalid_utf1, LABEL());
6758
6759 peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, NULL);
6760 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR, valid_utf);
6761
6762 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6763 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, -1);
6764 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6765
6766 set_jumps(invalid_utf2, LABEL());
6767 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6768 OP1(SLJIT_MOV, TMP2, 0, TMP3, 0);
6769 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6770 }
6771 #endif /* SUPPORT_UNICODE */
6772 }
6773
optimize_class_ranges(compiler_common * common,const sljit_u8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)6774 static BOOL optimize_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
6775 {
6776 /* May destroy TMP1. */
6777 DEFINE_COMPILER;
6778 int ranges[MAX_CLASS_RANGE_SIZE];
6779 sljit_u8 bit, cbit, all;
6780 int i, byte, length = 0;
6781
6782 bit = bits[0] & 0x1;
6783 /* All bits will be zero or one (since bit is zero or one). */
6784 all = -bit;
6785
6786 for (i = 0; i < 256; )
6787 {
6788 byte = i >> 3;
6789 if ((i & 0x7) == 0 && bits[byte] == all)
6790 i += 8;
6791 else
6792 {
6793 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
6794 if (cbit != bit)
6795 {
6796 if (length >= MAX_CLASS_RANGE_SIZE)
6797 return FALSE;
6798 ranges[length] = i;
6799 length++;
6800 bit = cbit;
6801 all = -cbit;
6802 }
6803 i++;
6804 }
6805 }
6806
6807 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
6808 {
6809 if (length >= MAX_CLASS_RANGE_SIZE)
6810 return FALSE;
6811 ranges[length] = 256;
6812 length++;
6813 }
6814
6815 if (length < 0 || length > 4)
6816 return FALSE;
6817
6818 bit = bits[0] & 0x1;
6819 if (invert) bit ^= 0x1;
6820
6821 /* No character is accepted. */
6822 if (length == 0 && bit == 0)
6823 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6824
6825 switch(length)
6826 {
6827 case 0:
6828 /* When bit != 0, all characters are accepted. */
6829 return TRUE;
6830
6831 case 1:
6832 add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6833 return TRUE;
6834
6835 case 2:
6836 if (ranges[0] + 1 != ranges[1])
6837 {
6838 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6839 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6840 }
6841 else
6842 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6843 return TRUE;
6844
6845 case 3:
6846 if (bit != 0)
6847 {
6848 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
6849 if (ranges[0] + 1 != ranges[1])
6850 {
6851 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6852 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6853 }
6854 else
6855 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6856 return TRUE;
6857 }
6858
6859 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
6860 if (ranges[1] + 1 != ranges[2])
6861 {
6862 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
6863 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
6864 }
6865 else
6866 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
6867 return TRUE;
6868
6869 case 4:
6870 if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
6871 && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
6872 && (ranges[1] & (ranges[2] - ranges[0])) == 0
6873 && is_powerof2(ranges[2] - ranges[0]))
6874 {
6875 SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0);
6876 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
6877 if (ranges[2] + 1 != ranges[3])
6878 {
6879 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
6880 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
6881 }
6882 else
6883 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
6884 return TRUE;
6885 }
6886
6887 if (bit != 0)
6888 {
6889 i = 0;
6890 if (ranges[0] + 1 != ranges[1])
6891 {
6892 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6893 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6894 i = ranges[0];
6895 }
6896 else
6897 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6898
6899 if (ranges[2] + 1 != ranges[3])
6900 {
6901 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
6902 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
6903 }
6904 else
6905 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
6906 return TRUE;
6907 }
6908
6909 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6910 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
6911 if (ranges[1] + 1 != ranges[2])
6912 {
6913 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
6914 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
6915 }
6916 else
6917 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6918 return TRUE;
6919
6920 default:
6921 SLJIT_UNREACHABLE();
6922 return FALSE;
6923 }
6924 }
6925
optimize_class_chars(compiler_common * common,const sljit_u8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)6926 static BOOL optimize_class_chars(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
6927 {
6928 /* May destroy TMP1. */
6929 DEFINE_COMPILER;
6930 uint16_t char_list[MAX_CLASS_CHARS_SIZE];
6931 uint8_t byte;
6932 sljit_s32 type;
6933 int i, j, k, len, c;
6934
6935 if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV))
6936 return FALSE;
6937
6938 len = 0;
6939
6940 for (i = 0; i < 32; i++)
6941 {
6942 byte = bits[i];
6943
6944 if (nclass)
6945 byte = ~byte;
6946
6947 j = 0;
6948 while (byte != 0)
6949 {
6950 if (byte & 0x1)
6951 {
6952 c = i * 8 + j;
6953
6954 k = len;
6955
6956 if ((c & 0x20) != 0)
6957 {
6958 for (k = 0; k < len; k++)
6959 if (char_list[k] == c - 0x20)
6960 {
6961 char_list[k] |= 0x120;
6962 break;
6963 }
6964 }
6965
6966 if (k == len)
6967 {
6968 if (len >= MAX_CLASS_CHARS_SIZE)
6969 return FALSE;
6970
6971 char_list[len++] = (uint16_t) c;
6972 }
6973 }
6974
6975 byte >>= 1;
6976 j++;
6977 }
6978 }
6979
6980 if (len == 0) return FALSE; /* Should never occur, but stops analyzers complaining. */
6981
6982 i = 0;
6983 j = 0;
6984
6985 if (char_list[0] == 0)
6986 {
6987 i++;
6988 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0);
6989 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_ZERO);
6990 }
6991 else
6992 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6993
6994 while (i < len)
6995 {
6996 if ((char_list[i] & 0x100) != 0)
6997 j++;
6998 else
6999 {
7000 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_list[i]);
7001 CMOV(SLJIT_ZERO, TMP2, TMP1, 0);
7002 }
7003 i++;
7004 }
7005
7006 if (j != 0)
7007 {
7008 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x20);
7009
7010 for (i = 0; i < len; i++)
7011 if ((char_list[i] & 0x100) != 0)
7012 {
7013 j--;
7014 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_list[i] & 0xff);
7015 CMOV(SLJIT_ZERO, TMP2, TMP1, 0);
7016 }
7017 }
7018
7019 if (invert)
7020 nclass = !nclass;
7021
7022 type = nclass ? SLJIT_NOT_EQUAL : SLJIT_EQUAL;
7023 add_jump(compiler, backtracks, CMP(type, TMP2, 0, SLJIT_IMM, 0));
7024 return TRUE;
7025 }
7026
optimize_class(compiler_common * common,const sljit_u8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)7027 static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
7028 {
7029 /* May destroy TMP1. */
7030 if (optimize_class_ranges(common, bits, nclass, invert, backtracks))
7031 return TRUE;
7032 return optimize_class_chars(common, bits, nclass, invert, backtracks);
7033 }
7034
check_anynewline(compiler_common * common)7035 static void check_anynewline(compiler_common *common)
7036 {
7037 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7038 DEFINE_COMPILER;
7039
7040 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
7041
7042 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
7043 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
7044 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7045 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
7046 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7047 #if PCRE2_CODE_UNIT_WIDTH == 8
7048 if (common->utf)
7049 {
7050 #endif
7051 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7052 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
7053 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
7054 #if PCRE2_CODE_UNIT_WIDTH == 8
7055 }
7056 #endif
7057 #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7058 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7059 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7060 }
7061
check_hspace(compiler_common * common)7062 static void check_hspace(compiler_common *common)
7063 {
7064 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7065 DEFINE_COMPILER;
7066
7067 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
7068
7069 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
7070 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7071 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
7072 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7073 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
7074 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7075 #if PCRE2_CODE_UNIT_WIDTH == 8
7076 if (common->utf)
7077 {
7078 #endif
7079 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7080 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
7081 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7082 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
7083 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7084 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
7085 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
7086 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
7087 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
7088 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7089 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
7090 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7091 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
7092 #if PCRE2_CODE_UNIT_WIDTH == 8
7093 }
7094 #endif
7095 #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7096 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7097
7098 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7099 }
7100
check_vspace(compiler_common * common)7101 static void check_vspace(compiler_common *common)
7102 {
7103 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7104 DEFINE_COMPILER;
7105
7106 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
7107
7108 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
7109 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
7110 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7111 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
7112 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7113 #if PCRE2_CODE_UNIT_WIDTH == 8
7114 if (common->utf)
7115 {
7116 #endif
7117 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7118 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
7119 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
7120 #if PCRE2_CODE_UNIT_WIDTH == 8
7121 }
7122 #endif
7123 #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7124 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7125
7126 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7127 }
7128
do_casefulcmp(compiler_common * common)7129 static void do_casefulcmp(compiler_common *common)
7130 {
7131 DEFINE_COMPILER;
7132 struct sljit_jump *jump;
7133 struct sljit_label *label;
7134 int char1_reg;
7135 int char2_reg;
7136
7137 if (HAS_VIRTUAL_REGISTERS)
7138 {
7139 char1_reg = STR_END;
7140 char2_reg = STACK_TOP;
7141 }
7142 else
7143 {
7144 char1_reg = TMP3;
7145 char2_reg = RETURN_ADDR;
7146 }
7147
7148 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7149 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
7150
7151 if (char1_reg == STR_END)
7152 {
7153 OP1(SLJIT_MOV, TMP3, 0, char1_reg, 0);
7154 OP1(SLJIT_MOV, RETURN_ADDR, 0, char2_reg, 0);
7155 }
7156
7157 if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7158 {
7159 label = LABEL();
7160 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7161 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7162 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7163 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7164 JUMPTO(SLJIT_NOT_ZERO, label);
7165
7166 JUMPHERE(jump);
7167 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7168 }
7169 else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7170 {
7171 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7172 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7173
7174 label = LABEL();
7175 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7176 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7177 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7178 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7179 JUMPTO(SLJIT_NOT_ZERO, label);
7180
7181 JUMPHERE(jump);
7182 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7183 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7184 }
7185 else
7186 {
7187 label = LABEL();
7188 OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
7189 OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
7190 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7191 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7192 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7193 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7194 JUMPTO(SLJIT_NOT_ZERO, label);
7195
7196 JUMPHERE(jump);
7197 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7198 }
7199
7200 if (char1_reg == STR_END)
7201 {
7202 OP1(SLJIT_MOV, char1_reg, 0, TMP3, 0);
7203 OP1(SLJIT_MOV, char2_reg, 0, RETURN_ADDR, 0);
7204 }
7205
7206 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7207 }
7208
do_caselesscmp(compiler_common * common)7209 static void do_caselesscmp(compiler_common *common)
7210 {
7211 DEFINE_COMPILER;
7212 struct sljit_jump *jump;
7213 struct sljit_label *label;
7214 int char1_reg = STR_END;
7215 int char2_reg;
7216 int lcc_table;
7217 int opt_type = 0;
7218
7219 if (HAS_VIRTUAL_REGISTERS)
7220 {
7221 char2_reg = STACK_TOP;
7222 lcc_table = STACK_LIMIT;
7223 }
7224 else
7225 {
7226 char2_reg = RETURN_ADDR;
7227 lcc_table = TMP3;
7228 }
7229
7230 if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7231 opt_type = 1;
7232 else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7233 opt_type = 2;
7234
7235 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7236 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
7237
7238 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, char1_reg, 0);
7239
7240 if (char2_reg == STACK_TOP)
7241 {
7242 OP1(SLJIT_MOV, TMP3, 0, char2_reg, 0);
7243 OP1(SLJIT_MOV, RETURN_ADDR, 0, lcc_table, 0);
7244 }
7245
7246 OP1(SLJIT_MOV, lcc_table, 0, SLJIT_IMM, common->lcc);
7247
7248 if (opt_type == 1)
7249 {
7250 label = LABEL();
7251 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7252 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7253 }
7254 else if (opt_type == 2)
7255 {
7256 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7257 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7258
7259 label = LABEL();
7260 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7261 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7262 }
7263 else
7264 {
7265 label = LABEL();
7266 OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
7267 OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
7268 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7269 }
7270
7271 #if PCRE2_CODE_UNIT_WIDTH != 8
7272 jump = CMP(SLJIT_GREATER, char1_reg, 0, SLJIT_IMM, 255);
7273 #endif
7274 OP1(SLJIT_MOV_U8, char1_reg, 0, SLJIT_MEM2(lcc_table, char1_reg), 0);
7275 #if PCRE2_CODE_UNIT_WIDTH != 8
7276 JUMPHERE(jump);
7277 jump = CMP(SLJIT_GREATER, char2_reg, 0, SLJIT_IMM, 255);
7278 #endif
7279 OP1(SLJIT_MOV_U8, char2_reg, 0, SLJIT_MEM2(lcc_table, char2_reg), 0);
7280 #if PCRE2_CODE_UNIT_WIDTH != 8
7281 JUMPHERE(jump);
7282 #endif
7283
7284 if (opt_type == 0)
7285 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7286
7287 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7288 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7289 JUMPTO(SLJIT_NOT_ZERO, label);
7290
7291 JUMPHERE(jump);
7292 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7293
7294 if (opt_type == 2)
7295 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7296
7297 if (char2_reg == STACK_TOP)
7298 {
7299 OP1(SLJIT_MOV, char2_reg, 0, TMP3, 0);
7300 OP1(SLJIT_MOV, lcc_table, 0, RETURN_ADDR, 0);
7301 }
7302
7303 OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
7304 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7305 }
7306
byte_sequence_compare(compiler_common * common,BOOL caseless,PCRE2_SPTR cc,compare_context * context,jump_list ** backtracks)7307 static PCRE2_SPTR byte_sequence_compare(compiler_common *common, BOOL caseless, PCRE2_SPTR cc,
7308 compare_context *context, jump_list **backtracks)
7309 {
7310 DEFINE_COMPILER;
7311 unsigned int othercasebit = 0;
7312 PCRE2_SPTR othercasechar = NULL;
7313 #ifdef SUPPORT_UNICODE
7314 int utflength;
7315 #endif
7316
7317 if (caseless && char_has_othercase(common, cc))
7318 {
7319 othercasebit = char_get_othercase_bit(common, cc);
7320 SLJIT_ASSERT(othercasebit);
7321 /* Extracting bit difference info. */
7322 #if PCRE2_CODE_UNIT_WIDTH == 8
7323 othercasechar = cc + (othercasebit >> 8);
7324 othercasebit &= 0xff;
7325 #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7326 /* Note that this code only handles characters in the BMP. If there
7327 ever are characters outside the BMP whose othercase differs in only one
7328 bit from itself (there currently are none), this code will need to be
7329 revised for PCRE2_CODE_UNIT_WIDTH == 32. */
7330 othercasechar = cc + (othercasebit >> 9);
7331 if ((othercasebit & 0x100) != 0)
7332 othercasebit = (othercasebit & 0xff) << 8;
7333 else
7334 othercasebit &= 0xff;
7335 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
7336 }
7337
7338 if (context->sourcereg == -1)
7339 {
7340 #if PCRE2_CODE_UNIT_WIDTH == 8
7341 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
7342 if (context->length >= 4)
7343 OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7344 else if (context->length >= 2)
7345 OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7346 else
7347 #endif
7348 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7349 #elif PCRE2_CODE_UNIT_WIDTH == 16
7350 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
7351 if (context->length >= 4)
7352 OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7353 else
7354 #endif
7355 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7356 #elif PCRE2_CODE_UNIT_WIDTH == 32
7357 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7358 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
7359 context->sourcereg = TMP2;
7360 }
7361
7362 #ifdef SUPPORT_UNICODE
7363 utflength = 1;
7364 if (common->utf && HAS_EXTRALEN(*cc))
7365 utflength += GET_EXTRALEN(*cc);
7366
7367 do
7368 {
7369 #endif
7370
7371 context->length -= IN_UCHARS(1);
7372 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
7373
7374 /* Unaligned read is supported. */
7375 if (othercasebit != 0 && othercasechar == cc)
7376 {
7377 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
7378 context->oc.asuchars[context->ucharptr] = othercasebit;
7379 }
7380 else
7381 {
7382 context->c.asuchars[context->ucharptr] = *cc;
7383 context->oc.asuchars[context->ucharptr] = 0;
7384 }
7385 context->ucharptr++;
7386
7387 #if PCRE2_CODE_UNIT_WIDTH == 8
7388 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
7389 #else
7390 if (context->ucharptr >= 2 || context->length == 0)
7391 #endif
7392 {
7393 if (context->length >= 4)
7394 OP1(SLJIT_MOV_S32, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7395 else if (context->length >= 2)
7396 OP1(SLJIT_MOV_U16, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7397 #if PCRE2_CODE_UNIT_WIDTH == 8
7398 else if (context->length >= 1)
7399 OP1(SLJIT_MOV_U8, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7400 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7401 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
7402
7403 switch(context->ucharptr)
7404 {
7405 case 4 / sizeof(PCRE2_UCHAR):
7406 if (context->oc.asint != 0)
7407 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
7408 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
7409 break;
7410
7411 case 2 / sizeof(PCRE2_UCHAR):
7412 if (context->oc.asushort != 0)
7413 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
7414 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
7415 break;
7416
7417 #if PCRE2_CODE_UNIT_WIDTH == 8
7418 case 1:
7419 if (context->oc.asbyte != 0)
7420 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
7421 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
7422 break;
7423 #endif
7424
7425 default:
7426 SLJIT_UNREACHABLE();
7427 break;
7428 }
7429 context->ucharptr = 0;
7430 }
7431
7432 #else
7433
7434 /* Unaligned read is unsupported or in 32 bit mode. */
7435 if (context->length >= 1)
7436 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7437
7438 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
7439
7440 if (othercasebit != 0 && othercasechar == cc)
7441 {
7442 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
7443 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
7444 }
7445 else
7446 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
7447
7448 #endif
7449
7450 cc++;
7451 #ifdef SUPPORT_UNICODE
7452 utflength--;
7453 }
7454 while (utflength > 0);
7455 #endif
7456
7457 return cc;
7458 }
7459
7460 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
7461
7462 #define SET_TYPE_OFFSET(value) \
7463 if ((value) != typeoffset) \
7464 { \
7465 if ((value) < typeoffset) \
7466 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
7467 else \
7468 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
7469 } \
7470 typeoffset = (value);
7471
7472 #define SET_CHAR_OFFSET(value) \
7473 if ((value) != charoffset) \
7474 { \
7475 if ((value) < charoffset) \
7476 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
7477 else \
7478 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
7479 } \
7480 charoffset = (value);
7481
7482 static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr);
7483
compile_xclass_matchingpath(compiler_common * common,PCRE2_SPTR cc,jump_list ** backtracks)7484 static void compile_xclass_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
7485 {
7486 DEFINE_COMPILER;
7487 jump_list *found = NULL;
7488 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
7489 sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
7490 struct sljit_jump *jump = NULL;
7491 PCRE2_SPTR ccbegin;
7492 int compares, invertcmp, numberofcmps;
7493 #if defined SUPPORT_UNICODE && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
7494 BOOL utf = common->utf;
7495 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */
7496
7497 #ifdef SUPPORT_UNICODE
7498 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
7499 BOOL charsaved = FALSE;
7500 int typereg = TMP1;
7501 const sljit_u32 *other_cases;
7502 sljit_uw typeoffset;
7503 #endif /* SUPPORT_UNICODE */
7504
7505 /* Scanning the necessary info. */
7506 cc++;
7507 ccbegin = cc;
7508 compares = 0;
7509
7510 if (cc[-1] & XCL_MAP)
7511 {
7512 min = 0;
7513 cc += 32 / sizeof(PCRE2_UCHAR);
7514 }
7515
7516 while (*cc != XCL_END)
7517 {
7518 compares++;
7519 if (*cc == XCL_SINGLE)
7520 {
7521 cc ++;
7522 GETCHARINCTEST(c, cc);
7523 if (c > max) max = c;
7524 if (c < min) min = c;
7525 #ifdef SUPPORT_UNICODE
7526 needschar = TRUE;
7527 #endif /* SUPPORT_UNICODE */
7528 }
7529 else if (*cc == XCL_RANGE)
7530 {
7531 cc ++;
7532 GETCHARINCTEST(c, cc);
7533 if (c < min) min = c;
7534 GETCHARINCTEST(c, cc);
7535 if (c > max) max = c;
7536 #ifdef SUPPORT_UNICODE
7537 needschar = TRUE;
7538 #endif /* SUPPORT_UNICODE */
7539 }
7540 #ifdef SUPPORT_UNICODE
7541 else
7542 {
7543 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7544 cc++;
7545 if (*cc == PT_CLIST && cc[-1] == XCL_PROP)
7546 {
7547 other_cases = PRIV(ucd_caseless_sets) + cc[1];
7548 while (*other_cases != NOTACHAR)
7549 {
7550 if (*other_cases > max) max = *other_cases;
7551 if (*other_cases < min) min = *other_cases;
7552 other_cases++;
7553 }
7554 }
7555 else
7556 {
7557 max = READ_CHAR_MAX;
7558 min = 0;
7559 }
7560
7561 switch(*cc)
7562 {
7563 case PT_ANY:
7564 /* Any either accepts everything or ignored. */
7565 if (cc[-1] == XCL_PROP)
7566 {
7567 compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
7568 if (list == backtracks)
7569 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7570 return;
7571 }
7572 break;
7573
7574 case PT_LAMP:
7575 case PT_GC:
7576 case PT_PC:
7577 case PT_ALNUM:
7578 needstype = TRUE;
7579 break;
7580
7581 case PT_SC:
7582 needsscript = TRUE;
7583 break;
7584
7585 case PT_SPACE:
7586 case PT_PXSPACE:
7587 case PT_WORD:
7588 case PT_PXGRAPH:
7589 case PT_PXPRINT:
7590 case PT_PXPUNCT:
7591 needstype = TRUE;
7592 needschar = TRUE;
7593 break;
7594
7595 case PT_CLIST:
7596 case PT_UCNC:
7597 needschar = TRUE;
7598 break;
7599
7600 default:
7601 SLJIT_UNREACHABLE();
7602 break;
7603 }
7604 cc += 2;
7605 }
7606 #endif /* SUPPORT_UNICODE */
7607 }
7608 SLJIT_ASSERT(compares > 0);
7609
7610 /* We are not necessary in utf mode even in 8 bit mode. */
7611 cc = ccbegin;
7612 if ((cc[-1] & XCL_NOT) != 0)
7613 read_char(common, min, max, backtracks, READ_CHAR_UPDATE_STR_PTR);
7614 else
7615 {
7616 #ifdef SUPPORT_UNICODE
7617 read_char(common, min, max, (needstype || needsscript) ? backtracks : NULL, 0);
7618 #else /* !SUPPORT_UNICODE */
7619 read_char(common, min, max, NULL, 0);
7620 #endif /* SUPPORT_UNICODE */
7621 }
7622
7623 if ((cc[-1] & XCL_HASPROP) == 0)
7624 {
7625 if ((cc[-1] & XCL_MAP) != 0)
7626 {
7627 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7628 if (!optimize_class(common, (const sljit_u8 *)cc, (((const sljit_u8 *)cc)[31] & 0x80) != 0, TRUE, &found))
7629 {
7630 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
7631 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
7632 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
7633 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
7634 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
7635 add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
7636 }
7637
7638 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7639 JUMPHERE(jump);
7640
7641 cc += 32 / sizeof(PCRE2_UCHAR);
7642 }
7643 else
7644 {
7645 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
7646 add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min));
7647 }
7648 }
7649 else if ((cc[-1] & XCL_MAP) != 0)
7650 {
7651 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
7652 #ifdef SUPPORT_UNICODE
7653 charsaved = TRUE;
7654 #endif /* SUPPORT_UNICODE */
7655 if (!optimize_class(common, (const sljit_u8 *)cc, FALSE, TRUE, list))
7656 {
7657 #if PCRE2_CODE_UNIT_WIDTH == 8
7658 jump = NULL;
7659 if (common->utf)
7660 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7661 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7662
7663 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
7664 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
7665 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
7666 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
7667 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
7668 add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
7669
7670 #if PCRE2_CODE_UNIT_WIDTH == 8
7671 if (common->utf)
7672 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7673 JUMPHERE(jump);
7674 }
7675
7676 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7677 cc += 32 / sizeof(PCRE2_UCHAR);
7678 }
7679
7680 #ifdef SUPPORT_UNICODE
7681 if (needstype || needsscript)
7682 {
7683 if (needschar && !charsaved)
7684 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
7685
7686 #if PCRE2_CODE_UNIT_WIDTH == 32
7687 if (!common->utf)
7688 {
7689 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
7690 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
7691 JUMPHERE(jump);
7692 }
7693 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
7694
7695 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
7696 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
7697 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
7698 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
7699 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
7700 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
7701 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
7702 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
7703
7704 /* Before anything else, we deal with scripts. */
7705 if (needsscript)
7706 {
7707 OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3);
7708 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
7709 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
7710
7711 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
7712
7713 ccbegin = cc;
7714
7715 while (*cc != XCL_END)
7716 {
7717 if (*cc == XCL_SINGLE)
7718 {
7719 cc ++;
7720 GETCHARINCTEST(c, cc);
7721 }
7722 else if (*cc == XCL_RANGE)
7723 {
7724 cc ++;
7725 GETCHARINCTEST(c, cc);
7726 GETCHARINCTEST(c, cc);
7727 }
7728 else
7729 {
7730 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7731 cc++;
7732 if (*cc == PT_SC)
7733 {
7734 compares--;
7735 invertcmp = (compares == 0 && list != backtracks);
7736 if (cc[-1] == XCL_NOTPROP)
7737 invertcmp ^= 0x1;
7738 jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]);
7739 add_jump(compiler, compares > 0 ? list : backtracks, jump);
7740 }
7741 cc += 2;
7742 }
7743 }
7744
7745 cc = ccbegin;
7746
7747 if (needstype)
7748 {
7749 /* TMP2 has already been shifted by 2 */
7750 if (!needschar)
7751 {
7752 OP2(SLJIT_ADD, TMP1, 0, TMP2, 0, TMP2, 0);
7753 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
7754
7755 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
7756 }
7757 else
7758 {
7759 OP2(SLJIT_ADD, TMP1, 0, TMP2, 0, TMP2, 0);
7760 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
7761
7762 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7763 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
7764 typereg = RETURN_ADDR;
7765 }
7766 }
7767 else if (needschar)
7768 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7769 }
7770 else if (needstype)
7771 {
7772 OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3);
7773 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
7774
7775 if (!needschar)
7776 {
7777 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
7778
7779 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
7780 }
7781 else
7782 {
7783 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
7784
7785 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7786 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
7787 typereg = RETURN_ADDR;
7788 }
7789 }
7790 else if (needschar)
7791 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7792 }
7793 #endif /* SUPPORT_UNICODE */
7794
7795 /* Generating code. */
7796 charoffset = 0;
7797 numberofcmps = 0;
7798 #ifdef SUPPORT_UNICODE
7799 typeoffset = 0;
7800 #endif /* SUPPORT_UNICODE */
7801
7802 while (*cc != XCL_END)
7803 {
7804 compares--;
7805 invertcmp = (compares == 0 && list != backtracks);
7806 jump = NULL;
7807
7808 if (*cc == XCL_SINGLE)
7809 {
7810 cc ++;
7811 GETCHARINCTEST(c, cc);
7812
7813 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
7814 {
7815 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7816 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7817 numberofcmps++;
7818 }
7819 else if (numberofcmps > 0)
7820 {
7821 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7822 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7823 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7824 numberofcmps = 0;
7825 }
7826 else
7827 {
7828 jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7829 numberofcmps = 0;
7830 }
7831 }
7832 else if (*cc == XCL_RANGE)
7833 {
7834 cc ++;
7835 GETCHARINCTEST(c, cc);
7836 SET_CHAR_OFFSET(c);
7837 GETCHARINCTEST(c, cc);
7838
7839 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
7840 {
7841 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7842 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
7843 numberofcmps++;
7844 }
7845 else if (numberofcmps > 0)
7846 {
7847 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7848 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
7849 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7850 numberofcmps = 0;
7851 }
7852 else
7853 {
7854 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7855 numberofcmps = 0;
7856 }
7857 }
7858 #ifdef SUPPORT_UNICODE
7859 else
7860 {
7861 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7862 if (*cc == XCL_NOTPROP)
7863 invertcmp ^= 0x1;
7864 cc++;
7865 switch(*cc)
7866 {
7867 case PT_ANY:
7868 if (!invertcmp)
7869 jump = JUMP(SLJIT_JUMP);
7870 break;
7871
7872 case PT_LAMP:
7873 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
7874 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7875 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
7876 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7877 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
7878 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7879 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7880 break;
7881
7882 case PT_GC:
7883 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
7884 SET_TYPE_OFFSET(c);
7885 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
7886 break;
7887
7888 case PT_PC:
7889 jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
7890 break;
7891
7892 case PT_SC:
7893 compares++;
7894 /* Do nothing. */
7895 break;
7896
7897 case PT_SPACE:
7898 case PT_PXSPACE:
7899 SET_CHAR_OFFSET(9);
7900 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
7901 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7902
7903 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
7904 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7905
7906 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
7907 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7908
7909 SET_TYPE_OFFSET(ucp_Zl);
7910 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
7911 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
7912 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7913 break;
7914
7915 case PT_WORD:
7916 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
7917 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7918 /* Fall through. */
7919
7920 case PT_ALNUM:
7921 SET_TYPE_OFFSET(ucp_Ll);
7922 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
7923 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
7924 SET_TYPE_OFFSET(ucp_Nd);
7925 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
7926 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
7927 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7928 break;
7929
7930 case PT_CLIST:
7931 other_cases = PRIV(ucd_caseless_sets) + cc[1];
7932
7933 /* At least three characters are required.
7934 Otherwise this case would be handled by the normal code path. */
7935 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
7936 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
7937
7938 /* Optimizing character pairs, if their difference is power of 2. */
7939 if (is_powerof2(other_cases[1] ^ other_cases[0]))
7940 {
7941 if (charoffset == 0)
7942 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
7943 else
7944 {
7945 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
7946 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
7947 }
7948 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
7949 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7950 other_cases += 2;
7951 }
7952 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
7953 {
7954 if (charoffset == 0)
7955 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
7956 else
7957 {
7958 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
7959 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
7960 }
7961 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
7962 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7963
7964 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
7965 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
7966
7967 other_cases += 3;
7968 }
7969 else
7970 {
7971 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
7972 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7973 }
7974
7975 while (*other_cases != NOTACHAR)
7976 {
7977 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
7978 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
7979 }
7980 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7981 break;
7982
7983 case PT_UCNC:
7984 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
7985 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7986 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
7987 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7988 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
7989 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7990
7991 SET_CHAR_OFFSET(0xa0);
7992 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
7993 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
7994 SET_CHAR_OFFSET(0);
7995 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
7996 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_GREATER_EQUAL);
7997 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7998 break;
7999
8000 case PT_PXGRAPH:
8001 /* C and Z groups are the farthest two groups. */
8002 SET_TYPE_OFFSET(ucp_Ll);
8003 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
8004 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
8005
8006 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
8007
8008 /* In case of ucp_Cf, we overwrite the result. */
8009 SET_CHAR_OFFSET(0x2066);
8010 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
8011 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8012
8013 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
8014 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8015
8016 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
8017 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8018
8019 JUMPHERE(jump);
8020 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
8021 break;
8022
8023 case PT_PXPRINT:
8024 /* C and Z groups are the farthest two groups. */
8025 SET_TYPE_OFFSET(ucp_Ll);
8026 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
8027 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
8028
8029 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
8030 OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_NOT_EQUAL);
8031
8032 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
8033
8034 /* In case of ucp_Cf, we overwrite the result. */
8035 SET_CHAR_OFFSET(0x2066);
8036 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
8037 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8038
8039 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
8040 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8041
8042 JUMPHERE(jump);
8043 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
8044 break;
8045
8046 case PT_PXPUNCT:
8047 SET_TYPE_OFFSET(ucp_Sc);
8048 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
8049 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8050
8051 SET_CHAR_OFFSET(0);
8052 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x7f);
8053 OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_LESS_EQUAL);
8054
8055 SET_TYPE_OFFSET(ucp_Pc);
8056 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
8057 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
8058 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8059 break;
8060
8061 default:
8062 SLJIT_UNREACHABLE();
8063 break;
8064 }
8065 cc += 2;
8066 }
8067 #endif /* SUPPORT_UNICODE */
8068
8069 if (jump != NULL)
8070 add_jump(compiler, compares > 0 ? list : backtracks, jump);
8071 }
8072
8073 if (found != NULL)
8074 set_jumps(found, LABEL());
8075 }
8076
8077 #undef SET_TYPE_OFFSET
8078 #undef SET_CHAR_OFFSET
8079
8080 #endif
8081
compile_simple_assertion_matchingpath(compiler_common * common,PCRE2_UCHAR type,PCRE2_SPTR cc,jump_list ** backtracks)8082 static PCRE2_SPTR compile_simple_assertion_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks)
8083 {
8084 DEFINE_COMPILER;
8085 int length;
8086 struct sljit_jump *jump[4];
8087 #ifdef SUPPORT_UNICODE
8088 struct sljit_label *label;
8089 #endif /* SUPPORT_UNICODE */
8090
8091 switch(type)
8092 {
8093 case OP_SOD:
8094 if (HAS_VIRTUAL_REGISTERS)
8095 {
8096 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8097 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8098 }
8099 else
8100 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8101 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
8102 return cc;
8103
8104 case OP_SOM:
8105 if (HAS_VIRTUAL_REGISTERS)
8106 {
8107 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8108 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
8109 }
8110 else
8111 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
8112 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
8113 return cc;
8114
8115 case OP_NOT_WORD_BOUNDARY:
8116 case OP_WORD_BOUNDARY:
8117 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
8118 #ifdef SUPPORT_UNICODE
8119 if (common->invalid_utf)
8120 {
8121 add_jump(compiler, backtracks, CMP((type == OP_NOT_WORD_BOUNDARY) ? SLJIT_NOT_EQUAL : SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0));
8122 return cc;
8123 }
8124 #endif /* SUPPORT_UNICODE */
8125 sljit_set_current_flags(compiler, SLJIT_SET_Z);
8126 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
8127 return cc;
8128
8129 case OP_EODN:
8130 /* Requires rather complex checks. */
8131 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
8132 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8133 {
8134 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8135 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8136 if (common->mode == PCRE2_JIT_COMPLETE)
8137 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
8138 else
8139 {
8140 jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
8141 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
8142 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
8143 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
8144 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_EQUAL);
8145 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
8146 check_partial(common, TRUE);
8147 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8148 JUMPHERE(jump[1]);
8149 }
8150 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8151 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8152 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8153 }
8154 else if (common->nltype == NLTYPE_FIXED)
8155 {
8156 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8157 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8158 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
8159 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
8160 }
8161 else
8162 {
8163 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8164 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
8165 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8166 OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
8167 jump[2] = JUMP(SLJIT_GREATER);
8168 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL) /* LESS */);
8169 /* Equal. */
8170 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8171 jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
8172 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8173
8174 JUMPHERE(jump[1]);
8175 if (common->nltype == NLTYPE_ANYCRLF)
8176 {
8177 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8178 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
8179 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
8180 }
8181 else
8182 {
8183 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
8184 read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);
8185 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
8186 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
8187 sljit_set_current_flags(compiler, SLJIT_SET_Z);
8188 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
8189 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
8190 }
8191 JUMPHERE(jump[2]);
8192 JUMPHERE(jump[3]);
8193 }
8194 JUMPHERE(jump[0]);
8195 if (common->mode != PCRE2_JIT_COMPLETE)
8196 check_partial(common, TRUE);
8197 return cc;
8198
8199 case OP_EOD:
8200 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
8201 if (common->mode != PCRE2_JIT_COMPLETE)
8202 check_partial(common, TRUE);
8203 return cc;
8204
8205 case OP_DOLL:
8206 if (HAS_VIRTUAL_REGISTERS)
8207 {
8208 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8209 OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8210 }
8211 else
8212 OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8213 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8214
8215 if (!common->endonly)
8216 compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks);
8217 else
8218 {
8219 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
8220 check_partial(common, FALSE);
8221 }
8222 return cc;
8223
8224 case OP_DOLLM:
8225 jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
8226 if (HAS_VIRTUAL_REGISTERS)
8227 {
8228 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8229 OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8230 }
8231 else
8232 OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8233 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8234 check_partial(common, FALSE);
8235 jump[0] = JUMP(SLJIT_JUMP);
8236 JUMPHERE(jump[1]);
8237
8238 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8239 {
8240 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8241 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8242 if (common->mode == PCRE2_JIT_COMPLETE)
8243 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
8244 else
8245 {
8246 jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
8247 /* STR_PTR = STR_END - IN_UCHARS(1) */
8248 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8249 check_partial(common, TRUE);
8250 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8251 JUMPHERE(jump[1]);
8252 }
8253
8254 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8255 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8256 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8257 }
8258 else
8259 {
8260 peek_char(common, common->nlmax, TMP3, 0, NULL);
8261 check_newlinechar(common, common->nltype, backtracks, FALSE);
8262 }
8263 JUMPHERE(jump[0]);
8264 return cc;
8265
8266 case OP_CIRC:
8267 if (HAS_VIRTUAL_REGISTERS)
8268 {
8269 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8270 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
8271 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
8272 OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8273 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8274 }
8275 else
8276 {
8277 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8278 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
8279 OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8280 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8281 }
8282 return cc;
8283
8284 case OP_CIRCM:
8285 /* TMP2 might be used by peek_char_back. */
8286 if (HAS_VIRTUAL_REGISTERS)
8287 {
8288 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8289 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8290 jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0);
8291 OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8292 }
8293 else
8294 {
8295 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8296 jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0);
8297 OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8298 }
8299 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8300 jump[0] = JUMP(SLJIT_JUMP);
8301 JUMPHERE(jump[1]);
8302
8303 if (!common->alt_circumflex)
8304 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8305
8306 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8307 {
8308 OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8309 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, TMP2, 0));
8310 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
8311 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
8312 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8313 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8314 }
8315 else
8316 {
8317 peek_char_back(common, common->nlmax, backtracks);
8318 check_newlinechar(common, common->nltype, backtracks, FALSE);
8319 }
8320 JUMPHERE(jump[0]);
8321 return cc;
8322
8323 case OP_REVERSE:
8324 length = GET(cc, 0);
8325 if (length == 0)
8326 return cc + LINK_SIZE;
8327 if (HAS_VIRTUAL_REGISTERS)
8328 {
8329 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8330 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8331 }
8332 else
8333 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8334 #ifdef SUPPORT_UNICODE
8335 if (common->utf)
8336 {
8337 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, length);
8338 label = LABEL();
8339 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0));
8340 move_back(common, backtracks, FALSE);
8341 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
8342 JUMPTO(SLJIT_NOT_ZERO, label);
8343 }
8344 else
8345 #endif
8346 {
8347 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
8348 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0));
8349 }
8350 check_start_used_ptr(common);
8351 return cc + LINK_SIZE;
8352 }
8353 SLJIT_UNREACHABLE();
8354 return cc;
8355 }
8356
8357 #ifdef SUPPORT_UNICODE
8358
8359 #if PCRE2_CODE_UNIT_WIDTH != 32
8360
do_extuni_utf(jit_arguments * args,PCRE2_SPTR cc)8361 static PCRE2_SPTR SLJIT_FUNC do_extuni_utf(jit_arguments *args, PCRE2_SPTR cc)
8362 {
8363 PCRE2_SPTR start_subject = args->begin;
8364 PCRE2_SPTR end_subject = args->end;
8365 int lgb, rgb, ricount;
8366 PCRE2_SPTR prevcc, endcc, bptr;
8367 BOOL first = TRUE;
8368 uint32_t c;
8369
8370 prevcc = cc;
8371 endcc = NULL;
8372 do
8373 {
8374 GETCHARINC(c, cc);
8375 rgb = UCD_GRAPHBREAK(c);
8376
8377 if (first)
8378 {
8379 lgb = rgb;
8380 endcc = cc;
8381 first = FALSE;
8382 continue;
8383 }
8384
8385 if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8386 break;
8387
8388 /* Not breaking between Regional Indicators is allowed only if there
8389 are an even number of preceding RIs. */
8390
8391 if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
8392 {
8393 ricount = 0;
8394 bptr = prevcc;
8395
8396 /* bptr is pointing to the left-hand character */
8397 while (bptr > start_subject)
8398 {
8399 bptr--;
8400 BACKCHAR(bptr);
8401 GETCHAR(c, bptr);
8402
8403 if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator)
8404 break;
8405
8406 ricount++;
8407 }
8408
8409 if ((ricount & 1) != 0) break; /* Grapheme break required */
8410 }
8411
8412 /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
8413 allows any number of them before a following Extended_Pictographic. */
8414
8415 if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
8416 lgb != ucp_gbExtended_Pictographic)
8417 lgb = rgb;
8418
8419 prevcc = endcc;
8420 endcc = cc;
8421 }
8422 while (cc < end_subject);
8423
8424 return endcc;
8425 }
8426
8427 #endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
8428
do_extuni_utf_invalid(jit_arguments * args,PCRE2_SPTR cc)8429 static PCRE2_SPTR SLJIT_FUNC do_extuni_utf_invalid(jit_arguments *args, PCRE2_SPTR cc)
8430 {
8431 PCRE2_SPTR start_subject = args->begin;
8432 PCRE2_SPTR end_subject = args->end;
8433 int lgb, rgb, ricount;
8434 PCRE2_SPTR prevcc, endcc, bptr;
8435 BOOL first = TRUE;
8436 uint32_t c;
8437
8438 prevcc = cc;
8439 endcc = NULL;
8440 do
8441 {
8442 GETCHARINC_INVALID(c, cc, end_subject, break);
8443 rgb = UCD_GRAPHBREAK(c);
8444
8445 if (first)
8446 {
8447 lgb = rgb;
8448 endcc = cc;
8449 first = FALSE;
8450 continue;
8451 }
8452
8453 if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8454 break;
8455
8456 /* Not breaking between Regional Indicators is allowed only if there
8457 are an even number of preceding RIs. */
8458
8459 if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
8460 {
8461 ricount = 0;
8462 bptr = prevcc;
8463
8464 /* bptr is pointing to the left-hand character */
8465 while (bptr > start_subject)
8466 {
8467 GETCHARBACK_INVALID(c, bptr, start_subject, break);
8468
8469 if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator)
8470 break;
8471
8472 ricount++;
8473 }
8474
8475 if ((ricount & 1) != 0)
8476 break; /* Grapheme break required */
8477 }
8478
8479 /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
8480 allows any number of them before a following Extended_Pictographic. */
8481
8482 if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
8483 lgb != ucp_gbExtended_Pictographic)
8484 lgb = rgb;
8485
8486 prevcc = endcc;
8487 endcc = cc;
8488 }
8489 while (cc < end_subject);
8490
8491 return endcc;
8492 }
8493
do_extuni_no_utf(jit_arguments * args,PCRE2_SPTR cc)8494 static PCRE2_SPTR SLJIT_FUNC do_extuni_no_utf(jit_arguments *args, PCRE2_SPTR cc)
8495 {
8496 PCRE2_SPTR start_subject = args->begin;
8497 PCRE2_SPTR end_subject = args->end;
8498 int lgb, rgb, ricount;
8499 PCRE2_SPTR bptr;
8500 uint32_t c;
8501
8502 /* Patch by PH */
8503 /* GETCHARINC(c, cc); */
8504 c = *cc++;
8505
8506 #if PCRE2_CODE_UNIT_WIDTH == 32
8507 if (c >= 0x110000)
8508 return NULL;
8509 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8510 lgb = UCD_GRAPHBREAK(c);
8511
8512 while (cc < end_subject)
8513 {
8514 c = *cc;
8515 #if PCRE2_CODE_UNIT_WIDTH == 32
8516 if (c >= 0x110000)
8517 break;
8518 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8519 rgb = UCD_GRAPHBREAK(c);
8520
8521 if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8522 break;
8523
8524 /* Not breaking between Regional Indicators is allowed only if there
8525 are an even number of preceding RIs. */
8526
8527 if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
8528 {
8529 ricount = 0;
8530 bptr = cc - 1;
8531
8532 /* bptr is pointing to the left-hand character */
8533 while (bptr > start_subject)
8534 {
8535 bptr--;
8536 c = *bptr;
8537 #if PCRE2_CODE_UNIT_WIDTH == 32
8538 if (c >= 0x110000)
8539 break;
8540 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8541
8542 if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator) break;
8543
8544 ricount++;
8545 }
8546
8547 if ((ricount & 1) != 0)
8548 break; /* Grapheme break required */
8549 }
8550
8551 /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
8552 allows any number of them before a following Extended_Pictographic. */
8553
8554 if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
8555 lgb != ucp_gbExtended_Pictographic)
8556 lgb = rgb;
8557
8558 cc++;
8559 }
8560
8561 return cc;
8562 }
8563
8564 #endif /* SUPPORT_UNICODE */
8565
compile_char1_matchingpath(compiler_common * common,PCRE2_UCHAR type,PCRE2_SPTR cc,jump_list ** backtracks,BOOL check_str_ptr)8566 static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr)
8567 {
8568 DEFINE_COMPILER;
8569 int length;
8570 unsigned int c, oc, bit;
8571 compare_context context;
8572 struct sljit_jump *jump[3];
8573 jump_list *end_list;
8574 #ifdef SUPPORT_UNICODE
8575 PCRE2_UCHAR propdata[5];
8576 #endif /* SUPPORT_UNICODE */
8577
8578 switch(type)
8579 {
8580 case OP_NOT_DIGIT:
8581 case OP_DIGIT:
8582 /* Digits are usually 0-9, so it is worth to optimize them. */
8583 if (check_str_ptr)
8584 detect_partial_match(common, backtracks);
8585 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8586 if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_digit, FALSE))
8587 read_char7_type(common, backtracks, type == OP_NOT_DIGIT);
8588 else
8589 #endif
8590 read_char8_type(common, backtracks, type == OP_NOT_DIGIT);
8591 /* Flip the starting bit in the negative case. */
8592 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
8593 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8594 return cc;
8595
8596 case OP_NOT_WHITESPACE:
8597 case OP_WHITESPACE:
8598 if (check_str_ptr)
8599 detect_partial_match(common, backtracks);
8600 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8601 if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_space, FALSE))
8602 read_char7_type(common, backtracks, type == OP_NOT_WHITESPACE);
8603 else
8604 #endif
8605 read_char8_type(common, backtracks, type == OP_NOT_WHITESPACE);
8606 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
8607 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8608 return cc;
8609
8610 case OP_NOT_WORDCHAR:
8611 case OP_WORDCHAR:
8612 if (check_str_ptr)
8613 detect_partial_match(common, backtracks);
8614 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8615 if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_word, FALSE))
8616 read_char7_type(common, backtracks, type == OP_NOT_WORDCHAR);
8617 else
8618 #endif
8619 read_char8_type(common, backtracks, type == OP_NOT_WORDCHAR);
8620 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
8621 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8622 return cc;
8623
8624 case OP_ANY:
8625 if (check_str_ptr)
8626 detect_partial_match(common, backtracks);
8627 read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);
8628 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8629 {
8630 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
8631 end_list = NULL;
8632 if (common->mode != PCRE2_JIT_PARTIAL_HARD)
8633 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8634 else
8635 check_str_end(common, &end_list);
8636
8637 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
8638 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
8639 set_jumps(end_list, LABEL());
8640 JUMPHERE(jump[0]);
8641 }
8642 else
8643 check_newlinechar(common, common->nltype, backtracks, TRUE);
8644 return cc;
8645
8646 case OP_ALLANY:
8647 if (check_str_ptr)
8648 detect_partial_match(common, backtracks);
8649 #ifdef SUPPORT_UNICODE
8650 if (common->utf)
8651 {
8652 if (common->invalid_utf)
8653 {
8654 read_char(common, 0, READ_CHAR_MAX, backtracks, READ_CHAR_UPDATE_STR_PTR);
8655 return cc;
8656 }
8657
8658 #if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
8659 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
8660 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8661 #if PCRE2_CODE_UNIT_WIDTH == 8
8662 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
8663 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
8664 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
8665 #elif PCRE2_CODE_UNIT_WIDTH == 16
8666 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
8667 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
8668 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
8669 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
8670 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
8671 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
8672 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
8673 JUMPHERE(jump[0]);
8674 return cc;
8675 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
8676 }
8677 #endif /* SUPPORT_UNICODE */
8678 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8679 return cc;
8680
8681 case OP_ANYBYTE:
8682 if (check_str_ptr)
8683 detect_partial_match(common, backtracks);
8684 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8685 return cc;
8686
8687 #ifdef SUPPORT_UNICODE
8688 case OP_NOTPROP:
8689 case OP_PROP:
8690 propdata[0] = XCL_HASPROP;
8691 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
8692 propdata[2] = cc[0];
8693 propdata[3] = cc[1];
8694 propdata[4] = XCL_END;
8695 if (check_str_ptr)
8696 detect_partial_match(common, backtracks);
8697 compile_xclass_matchingpath(common, propdata, backtracks);
8698 return cc + 2;
8699 #endif
8700
8701 case OP_ANYNL:
8702 if (check_str_ptr)
8703 detect_partial_match(common, backtracks);
8704 read_char(common, common->bsr_nlmin, common->bsr_nlmax, NULL, 0);
8705 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
8706 /* We don't need to handle soft partial matching case. */
8707 end_list = NULL;
8708 if (common->mode != PCRE2_JIT_PARTIAL_HARD)
8709 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8710 else
8711 check_str_end(common, &end_list);
8712 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
8713 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
8714 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8715 jump[2] = JUMP(SLJIT_JUMP);
8716 JUMPHERE(jump[0]);
8717 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
8718 set_jumps(end_list, LABEL());
8719 JUMPHERE(jump[1]);
8720 JUMPHERE(jump[2]);
8721 return cc;
8722
8723 case OP_NOT_HSPACE:
8724 case OP_HSPACE:
8725 if (check_str_ptr)
8726 detect_partial_match(common, backtracks);
8727
8728 if (type == OP_NOT_HSPACE)
8729 read_char(common, 0x9, 0x3000, backtracks, READ_CHAR_UPDATE_STR_PTR);
8730 else
8731 read_char(common, 0x9, 0x3000, NULL, 0);
8732
8733 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
8734 sljit_set_current_flags(compiler, SLJIT_SET_Z);
8735 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
8736 return cc;
8737
8738 case OP_NOT_VSPACE:
8739 case OP_VSPACE:
8740 if (check_str_ptr)
8741 detect_partial_match(common, backtracks);
8742
8743 if (type == OP_NOT_VSPACE)
8744 read_char(common, 0xa, 0x2029, backtracks, READ_CHAR_UPDATE_STR_PTR);
8745 else
8746 read_char(common, 0xa, 0x2029, NULL, 0);
8747
8748 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
8749 sljit_set_current_flags(compiler, SLJIT_SET_Z);
8750 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
8751 return cc;
8752
8753 #ifdef SUPPORT_UNICODE
8754 case OP_EXTUNI:
8755 if (check_str_ptr)
8756 detect_partial_match(common, backtracks);
8757
8758 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
8759 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
8760
8761 #if PCRE2_CODE_UNIT_WIDTH != 32
8762 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM,
8763 common->utf ? (common->invalid_utf ? SLJIT_FUNC_OFFSET(do_extuni_utf_invalid) : SLJIT_FUNC_OFFSET(do_extuni_utf)) : SLJIT_FUNC_OFFSET(do_extuni_no_utf));
8764 if (common->invalid_utf)
8765 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
8766 #else
8767 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM,
8768 common->invalid_utf ? SLJIT_FUNC_OFFSET(do_extuni_utf_invalid) : SLJIT_FUNC_OFFSET(do_extuni_no_utf));
8769 if (!common->utf || common->invalid_utf)
8770 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
8771 #endif
8772
8773 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
8774
8775 if (common->mode == PCRE2_JIT_PARTIAL_HARD)
8776 {
8777 jump[0] = CMP(SLJIT_LESS, SLJIT_RETURN_REG, 0, STR_END, 0);
8778 /* Since we successfully read a char above, partial matching must occure. */
8779 check_partial(common, TRUE);
8780 JUMPHERE(jump[0]);
8781 }
8782 return cc;
8783 #endif
8784
8785 case OP_CHAR:
8786 case OP_CHARI:
8787 length = 1;
8788 #ifdef SUPPORT_UNICODE
8789 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
8790 #endif
8791
8792 if (check_str_ptr && common->mode != PCRE2_JIT_COMPLETE)
8793 detect_partial_match(common, backtracks);
8794
8795 if (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0)
8796 {
8797 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
8798 if (length > 1 || (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE))
8799 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
8800
8801 context.length = IN_UCHARS(length);
8802 context.sourcereg = -1;
8803 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
8804 context.ucharptr = 0;
8805 #endif
8806 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
8807 }
8808
8809 #ifdef SUPPORT_UNICODE
8810 if (common->utf)
8811 {
8812 GETCHAR(c, cc);
8813 }
8814 else
8815 #endif
8816 c = *cc;
8817
8818 SLJIT_ASSERT(type == OP_CHARI && char_has_othercase(common, cc));
8819
8820 if (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE)
8821 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8822
8823 oc = char_othercase(common, c);
8824 read_char(common, c < oc ? c : oc, c > oc ? c : oc, NULL, 0);
8825
8826 SLJIT_ASSERT(!is_powerof2(c ^ oc));
8827
8828 if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
8829 {
8830 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
8831 CMOV(SLJIT_EQUAL, TMP1, SLJIT_IMM, c);
8832 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
8833 }
8834 else
8835 {
8836 jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
8837 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
8838 JUMPHERE(jump[0]);
8839 }
8840 return cc + length;
8841
8842 case OP_NOT:
8843 case OP_NOTI:
8844 if (check_str_ptr)
8845 detect_partial_match(common, backtracks);
8846
8847 length = 1;
8848 #ifdef SUPPORT_UNICODE
8849 if (common->utf)
8850 {
8851 #if PCRE2_CODE_UNIT_WIDTH == 8
8852 c = *cc;
8853 if (c < 128 && !common->invalid_utf)
8854 {
8855 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
8856 if (type == OP_NOT || !char_has_othercase(common, cc))
8857 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
8858 else
8859 {
8860 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
8861 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
8862 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
8863 }
8864 /* Skip the variable-length character. */
8865 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8866 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
8867 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
8868 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
8869 JUMPHERE(jump[0]);
8870 return cc + 1;
8871 }
8872 else
8873 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
8874 {
8875 GETCHARLEN(c, cc, length);
8876 }
8877 }
8878 else
8879 #endif /* SUPPORT_UNICODE */
8880 c = *cc;
8881
8882 if (type == OP_NOT || !char_has_othercase(common, cc))
8883 {
8884 read_char(common, c, c, backtracks, READ_CHAR_UPDATE_STR_PTR);
8885 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
8886 }
8887 else
8888 {
8889 oc = char_othercase(common, c);
8890 read_char(common, c < oc ? c : oc, c > oc ? c : oc, backtracks, READ_CHAR_UPDATE_STR_PTR);
8891 bit = c ^ oc;
8892 if (is_powerof2(bit))
8893 {
8894 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
8895 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
8896 }
8897 else
8898 {
8899 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
8900 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
8901 }
8902 }
8903 return cc + length;
8904
8905 case OP_CLASS:
8906 case OP_NCLASS:
8907 if (check_str_ptr)
8908 detect_partial_match(common, backtracks);
8909
8910 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8911 bit = (common->utf && is_char7_bitset((const sljit_u8 *)cc, type == OP_NCLASS)) ? 127 : 255;
8912 if (type == OP_NCLASS)
8913 read_char(common, 0, bit, backtracks, READ_CHAR_UPDATE_STR_PTR);
8914 else
8915 read_char(common, 0, bit, NULL, 0);
8916 #else
8917 if (type == OP_NCLASS)
8918 read_char(common, 0, 255, backtracks, READ_CHAR_UPDATE_STR_PTR);
8919 else
8920 read_char(common, 0, 255, NULL, 0);
8921 #endif
8922
8923 if (optimize_class(common, (const sljit_u8 *)cc, type == OP_NCLASS, FALSE, backtracks))
8924 return cc + 32 / sizeof(PCRE2_UCHAR);
8925
8926 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8927 jump[0] = NULL;
8928 if (common->utf)
8929 {
8930 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
8931 if (type == OP_CLASS)
8932 {
8933 add_jump(compiler, backtracks, jump[0]);
8934 jump[0] = NULL;
8935 }
8936 }
8937 #elif PCRE2_CODE_UNIT_WIDTH != 8
8938 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
8939 if (type == OP_CLASS)
8940 {
8941 add_jump(compiler, backtracks, jump[0]);
8942 jump[0] = NULL;
8943 }
8944 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
8945
8946 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
8947 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
8948 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
8949 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
8950 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
8951 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
8952
8953 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
8954 if (jump[0] != NULL)
8955 JUMPHERE(jump[0]);
8956 #endif
8957 return cc + 32 / sizeof(PCRE2_UCHAR);
8958
8959 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
8960 case OP_XCLASS:
8961 if (check_str_ptr)
8962 detect_partial_match(common, backtracks);
8963 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
8964 return cc + GET(cc, 0) - 1;
8965 #endif
8966 }
8967 SLJIT_UNREACHABLE();
8968 return cc;
8969 }
8970
compile_charn_matchingpath(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,jump_list ** backtracks)8971 static SLJIT_INLINE PCRE2_SPTR compile_charn_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, jump_list **backtracks)
8972 {
8973 /* This function consumes at least one input character. */
8974 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
8975 DEFINE_COMPILER;
8976 PCRE2_SPTR ccbegin = cc;
8977 compare_context context;
8978 int size;
8979
8980 context.length = 0;
8981 do
8982 {
8983 if (cc >= ccend)
8984 break;
8985
8986 if (*cc == OP_CHAR)
8987 {
8988 size = 1;
8989 #ifdef SUPPORT_UNICODE
8990 if (common->utf && HAS_EXTRALEN(cc[1]))
8991 size += GET_EXTRALEN(cc[1]);
8992 #endif
8993 }
8994 else if (*cc == OP_CHARI)
8995 {
8996 size = 1;
8997 #ifdef SUPPORT_UNICODE
8998 if (common->utf)
8999 {
9000 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
9001 size = 0;
9002 else if (HAS_EXTRALEN(cc[1]))
9003 size += GET_EXTRALEN(cc[1]);
9004 }
9005 else
9006 #endif
9007 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
9008 size = 0;
9009 }
9010 else
9011 size = 0;
9012
9013 cc += 1 + size;
9014 context.length += IN_UCHARS(size);
9015 }
9016 while (size > 0 && context.length <= 128);
9017
9018 cc = ccbegin;
9019 if (context.length > 0)
9020 {
9021 /* We have a fixed-length byte sequence. */
9022 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
9023 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
9024
9025 context.sourcereg = -1;
9026 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
9027 context.ucharptr = 0;
9028 #endif
9029 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
9030 return cc;
9031 }
9032
9033 /* A non-fixed length character will be checked if length == 0. */
9034 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE);
9035 }
9036
9037 /* Forward definitions. */
9038 static void compile_matchingpath(compiler_common *, PCRE2_SPTR, PCRE2_SPTR, backtrack_common *);
9039 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
9040
9041 #define PUSH_BACKTRACK(size, ccstart, error) \
9042 do \
9043 { \
9044 backtrack = sljit_alloc_memory(compiler, (size)); \
9045 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
9046 return error; \
9047 memset(backtrack, 0, size); \
9048 backtrack->prev = parent->top; \
9049 backtrack->cc = (ccstart); \
9050 parent->top = backtrack; \
9051 } \
9052 while (0)
9053
9054 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
9055 do \
9056 { \
9057 backtrack = sljit_alloc_memory(compiler, (size)); \
9058 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
9059 return; \
9060 memset(backtrack, 0, size); \
9061 backtrack->prev = parent->top; \
9062 backtrack->cc = (ccstart); \
9063 parent->top = backtrack; \
9064 } \
9065 while (0)
9066
9067 #define BACKTRACK_AS(type) ((type *)backtrack)
9068
compile_dnref_search(compiler_common * common,PCRE2_SPTR cc,jump_list ** backtracks)9069 static void compile_dnref_search(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
9070 {
9071 /* The OVECTOR offset goes to TMP2. */
9072 DEFINE_COMPILER;
9073 int count = GET2(cc, 1 + IMM2_SIZE);
9074 PCRE2_SPTR slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
9075 unsigned int offset;
9076 jump_list *found = NULL;
9077
9078 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
9079
9080 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
9081
9082 count--;
9083 while (count-- > 0)
9084 {
9085 offset = GET2(slot, 0) << 1;
9086 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
9087 add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
9088 slot += common->name_entry_size;
9089 }
9090
9091 offset = GET2(slot, 0) << 1;
9092 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
9093 if (backtracks != NULL && !common->unset_backref)
9094 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
9095
9096 set_jumps(found, LABEL());
9097 }
9098
compile_ref_matchingpath(compiler_common * common,PCRE2_SPTR cc,jump_list ** backtracks,BOOL withchecks,BOOL emptyfail)9099 static void compile_ref_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
9100 {
9101 DEFINE_COMPILER;
9102 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
9103 int offset = 0;
9104 struct sljit_jump *jump = NULL;
9105 struct sljit_jump *partial;
9106 struct sljit_jump *nopartial;
9107 #if defined SUPPORT_UNICODE
9108 struct sljit_label *loop;
9109 struct sljit_label *caseless_loop;
9110 jump_list *no_match = NULL;
9111 int source_reg = COUNT_MATCH;
9112 int source_end_reg = ARGUMENTS;
9113 int char1_reg = STACK_LIMIT;
9114 #endif /* SUPPORT_UNICODE */
9115
9116 if (ref)
9117 {
9118 offset = GET2(cc, 1) << 1;
9119 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9120 /* OVECTOR(1) contains the "string begin - 1" constant. */
9121 if (withchecks && !common->unset_backref)
9122 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9123 }
9124 else
9125 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9126
9127 #if defined SUPPORT_UNICODE
9128 if (common->utf && *cc == OP_REFI)
9129 {
9130 SLJIT_ASSERT(common->iref_ptr != 0);
9131
9132 if (ref)
9133 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9134 else
9135 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9136
9137 if (withchecks && emptyfail)
9138 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0));
9139
9140 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr, source_reg, 0);
9141 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw), source_end_reg, 0);
9142 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2, char1_reg, 0);
9143
9144 OP1(SLJIT_MOV, source_reg, 0, TMP1, 0);
9145 OP1(SLJIT_MOV, source_end_reg, 0, TMP2, 0);
9146
9147 loop = LABEL();
9148 jump = CMP(SLJIT_GREATER_EQUAL, source_reg, 0, source_end_reg, 0);
9149 partial = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
9150
9151 /* Read original character. It must be a valid UTF character. */
9152 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
9153 OP1(SLJIT_MOV, STR_PTR, 0, source_reg, 0);
9154
9155 read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR | READ_CHAR_VALID_UTF);
9156
9157 OP1(SLJIT_MOV, source_reg, 0, STR_PTR, 0);
9158 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
9159 OP1(SLJIT_MOV, char1_reg, 0, TMP1, 0);
9160
9161 /* Read second character. */
9162 read_char(common, 0, READ_CHAR_MAX, &no_match, READ_CHAR_UPDATE_STR_PTR);
9163
9164 CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
9165
9166 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
9167
9168 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
9169
9170 OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
9171 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
9172 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
9173
9174 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records));
9175
9176 OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, other_case));
9177 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, caseset));
9178 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0);
9179 CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
9180
9181 add_jump(compiler, &no_match, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9182 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
9183 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_caseless_sets));
9184
9185 caseless_loop = LABEL();
9186 OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9187 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(uint32_t));
9188 OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, char1_reg, 0);
9189 JUMPTO(SLJIT_EQUAL, loop);
9190 JUMPTO(SLJIT_LESS, caseless_loop);
9191
9192 set_jumps(no_match, LABEL());
9193 if (common->mode == PCRE2_JIT_COMPLETE)
9194 JUMPHERE(partial);
9195
9196 OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9197 OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9198 OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9199 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9200
9201 if (common->mode != PCRE2_JIT_COMPLETE)
9202 {
9203 JUMPHERE(partial);
9204 OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9205 OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9206 OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9207
9208 check_partial(common, FALSE);
9209 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9210 }
9211
9212 JUMPHERE(jump);
9213 OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9214 OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9215 OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9216 return;
9217 }
9218 else
9219 #endif /* SUPPORT_UNICODE */
9220 {
9221 if (ref)
9222 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
9223 else
9224 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
9225
9226 if (withchecks)
9227 jump = JUMP(SLJIT_ZERO);
9228
9229 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
9230 partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
9231 if (common->mode == PCRE2_JIT_COMPLETE)
9232 add_jump(compiler, backtracks, partial);
9233
9234 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
9235 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9236
9237 if (common->mode != PCRE2_JIT_COMPLETE)
9238 {
9239 nopartial = JUMP(SLJIT_JUMP);
9240 JUMPHERE(partial);
9241 /* TMP2 -= STR_END - STR_PTR */
9242 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
9243 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
9244 partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
9245 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
9246 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
9247 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9248 JUMPHERE(partial);
9249 check_partial(common, FALSE);
9250 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9251 JUMPHERE(nopartial);
9252 }
9253 }
9254
9255 if (jump != NULL)
9256 {
9257 if (emptyfail)
9258 add_jump(compiler, backtracks, jump);
9259 else
9260 JUMPHERE(jump);
9261 }
9262 }
9263
compile_ref_iterator_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)9264 static SLJIT_INLINE PCRE2_SPTR compile_ref_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9265 {
9266 DEFINE_COMPILER;
9267 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
9268 backtrack_common *backtrack;
9269 PCRE2_UCHAR type;
9270 int offset = 0;
9271 struct sljit_label *label;
9272 struct sljit_jump *zerolength;
9273 struct sljit_jump *jump = NULL;
9274 PCRE2_SPTR ccbegin = cc;
9275 int min = 0, max = 0;
9276 BOOL minimize;
9277
9278 PUSH_BACKTRACK(sizeof(ref_iterator_backtrack), cc, NULL);
9279
9280 if (ref)
9281 offset = GET2(cc, 1) << 1;
9282 else
9283 cc += IMM2_SIZE;
9284 type = cc[1 + IMM2_SIZE];
9285
9286 SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
9287 minimize = (type & 0x1) != 0;
9288 switch(type)
9289 {
9290 case OP_CRSTAR:
9291 case OP_CRMINSTAR:
9292 min = 0;
9293 max = 0;
9294 cc += 1 + IMM2_SIZE + 1;
9295 break;
9296 case OP_CRPLUS:
9297 case OP_CRMINPLUS:
9298 min = 1;
9299 max = 0;
9300 cc += 1 + IMM2_SIZE + 1;
9301 break;
9302 case OP_CRQUERY:
9303 case OP_CRMINQUERY:
9304 min = 0;
9305 max = 1;
9306 cc += 1 + IMM2_SIZE + 1;
9307 break;
9308 case OP_CRRANGE:
9309 case OP_CRMINRANGE:
9310 min = GET2(cc, 1 + IMM2_SIZE + 1);
9311 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
9312 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
9313 break;
9314 default:
9315 SLJIT_UNREACHABLE();
9316 break;
9317 }
9318
9319 if (!minimize)
9320 {
9321 if (min == 0)
9322 {
9323 allocate_stack(common, 2);
9324 if (ref)
9325 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9326 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9327 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
9328 /* Temporary release of STR_PTR. */
9329 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9330 /* Handles both invalid and empty cases. Since the minimum repeat,
9331 is zero the invalid case is basically the same as an empty case. */
9332 if (ref)
9333 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9334 else
9335 {
9336 compile_dnref_search(common, ccbegin, NULL);
9337 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9338 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
9339 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9340 }
9341 /* Restore if not zero length. */
9342 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9343 }
9344 else
9345 {
9346 allocate_stack(common, 1);
9347 if (ref)
9348 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9349 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9350 if (ref)
9351 {
9352 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9353 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9354 }
9355 else
9356 {
9357 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
9358 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9359 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
9360 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9361 }
9362 }
9363
9364 if (min > 1 || max > 1)
9365 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
9366
9367 label = LABEL();
9368 if (!ref)
9369 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
9370 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
9371
9372 if (min > 1 || max > 1)
9373 {
9374 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
9375 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
9376 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
9377 if (min > 1)
9378 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
9379 if (max > 1)
9380 {
9381 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
9382 allocate_stack(common, 1);
9383 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9384 JUMPTO(SLJIT_JUMP, label);
9385 JUMPHERE(jump);
9386 }
9387 }
9388
9389 if (max == 0)
9390 {
9391 /* Includes min > 1 case as well. */
9392 allocate_stack(common, 1);
9393 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9394 JUMPTO(SLJIT_JUMP, label);
9395 }
9396
9397 JUMPHERE(zerolength);
9398 BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
9399
9400 count_match(common);
9401 return cc;
9402 }
9403
9404 allocate_stack(common, ref ? 2 : 3);
9405 if (ref)
9406 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9407 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9408 if (type != OP_CRMINSTAR)
9409 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
9410
9411 if (min == 0)
9412 {
9413 /* Handles both invalid and empty cases. Since the minimum repeat,
9414 is zero the invalid case is basically the same as an empty case. */
9415 if (ref)
9416 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9417 else
9418 {
9419 compile_dnref_search(common, ccbegin, NULL);
9420 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9421 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
9422 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9423 }
9424 /* Length is non-zero, we can match real repeats. */
9425 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9426 jump = JUMP(SLJIT_JUMP);
9427 }
9428 else
9429 {
9430 if (ref)
9431 {
9432 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9433 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9434 }
9435 else
9436 {
9437 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
9438 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9439 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
9440 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9441 }
9442 }
9443
9444 BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
9445 if (max > 0)
9446 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
9447
9448 if (!ref)
9449 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
9450 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
9451 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9452
9453 if (min > 1)
9454 {
9455 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9456 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
9457 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
9458 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(ref_iterator_backtrack)->matchingpath);
9459 }
9460 else if (max > 0)
9461 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
9462
9463 if (jump != NULL)
9464 JUMPHERE(jump);
9465 JUMPHERE(zerolength);
9466
9467 count_match(common);
9468 return cc;
9469 }
9470
compile_recurse_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)9471 static SLJIT_INLINE PCRE2_SPTR compile_recurse_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9472 {
9473 DEFINE_COMPILER;
9474 backtrack_common *backtrack;
9475 recurse_entry *entry = common->entries;
9476 recurse_entry *prev = NULL;
9477 sljit_sw start = GET(cc, 1);
9478 PCRE2_SPTR start_cc;
9479 BOOL needs_control_head;
9480
9481 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
9482
9483 /* Inlining simple patterns. */
9484 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
9485 {
9486 start_cc = common->start + start;
9487 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
9488 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
9489 return cc + 1 + LINK_SIZE;
9490 }
9491
9492 while (entry != NULL)
9493 {
9494 if (entry->start == start)
9495 break;
9496 prev = entry;
9497 entry = entry->next;
9498 }
9499
9500 if (entry == NULL)
9501 {
9502 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
9503 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9504 return NULL;
9505 entry->next = NULL;
9506 entry->entry_label = NULL;
9507 entry->backtrack_label = NULL;
9508 entry->entry_calls = NULL;
9509 entry->backtrack_calls = NULL;
9510 entry->start = start;
9511
9512 if (prev != NULL)
9513 prev->next = entry;
9514 else
9515 common->entries = entry;
9516 }
9517
9518 BACKTRACK_AS(recurse_backtrack)->entry = entry;
9519
9520 if (entry->entry_label == NULL)
9521 add_jump(compiler, &entry->entry_calls, JUMP(SLJIT_FAST_CALL));
9522 else
9523 JUMPTO(SLJIT_FAST_CALL, entry->entry_label);
9524 /* Leave if the match is failed. */
9525 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
9526 BACKTRACK_AS(recurse_backtrack)->matchingpath = LABEL();
9527 return cc + 1 + LINK_SIZE;
9528 }
9529
do_callout(struct jit_arguments * arguments,pcre2_callout_block * callout_block,PCRE2_SPTR * jit_ovector)9530 static sljit_s32 SLJIT_FUNC do_callout(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector)
9531 {
9532 PCRE2_SPTR begin;
9533 PCRE2_SIZE *ovector;
9534 sljit_u32 oveccount, capture_top;
9535
9536 if (arguments->callout == NULL)
9537 return 0;
9538
9539 SLJIT_COMPILE_ASSERT(sizeof (PCRE2_SIZE) <= sizeof (sljit_sw), pcre2_size_must_be_lower_than_sljit_sw_size);
9540
9541 begin = arguments->begin;
9542 ovector = (PCRE2_SIZE*)(callout_block + 1);
9543 oveccount = callout_block->capture_top;
9544
9545 SLJIT_ASSERT(oveccount >= 1);
9546
9547 callout_block->version = 2;
9548 callout_block->callout_flags = 0;
9549
9550 /* Offsets in subject. */
9551 callout_block->subject_length = arguments->end - arguments->begin;
9552 callout_block->start_match = jit_ovector[0] - begin;
9553 callout_block->current_position = (PCRE2_SPTR)callout_block->offset_vector - begin;
9554 callout_block->subject = begin;
9555
9556 /* Convert and copy the JIT offset vector to the ovector array. */
9557 callout_block->capture_top = 1;
9558 callout_block->offset_vector = ovector;
9559
9560 ovector[0] = PCRE2_UNSET;
9561 ovector[1] = PCRE2_UNSET;
9562 ovector += 2;
9563 jit_ovector += 2;
9564 capture_top = 1;
9565
9566 /* Convert pointers to sizes. */
9567 while (--oveccount != 0)
9568 {
9569 capture_top++;
9570
9571 ovector[0] = (PCRE2_SIZE)(jit_ovector[0] - begin);
9572 ovector[1] = (PCRE2_SIZE)(jit_ovector[1] - begin);
9573
9574 if (ovector[0] != PCRE2_UNSET)
9575 callout_block->capture_top = capture_top;
9576
9577 ovector += 2;
9578 jit_ovector += 2;
9579 }
9580
9581 return (arguments->callout)(callout_block, arguments->callout_data);
9582 }
9583
9584 #define CALLOUT_ARG_OFFSET(arg) \
9585 SLJIT_OFFSETOF(pcre2_callout_block, arg)
9586
compile_callout_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)9587 static SLJIT_INLINE PCRE2_SPTR compile_callout_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9588 {
9589 DEFINE_COMPILER;
9590 backtrack_common *backtrack;
9591 sljit_s32 mov_opcode;
9592 unsigned int callout_length = (*cc == OP_CALLOUT)
9593 ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2 * LINK_SIZE);
9594 sljit_sw value1;
9595 sljit_sw value2;
9596 sljit_sw value3;
9597 sljit_uw callout_arg_size = (common->re->top_bracket + 1) * 2 * sizeof(sljit_sw);
9598
9599 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
9600
9601 callout_arg_size = (sizeof(pcre2_callout_block) + callout_arg_size + sizeof(sljit_sw) - 1) / sizeof(sljit_sw);
9602
9603 allocate_stack(common, callout_arg_size);
9604
9605 SLJIT_ASSERT(common->capture_last_ptr != 0);
9606 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
9607 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
9608 value1 = (*cc == OP_CALLOUT) ? cc[1 + 2 * LINK_SIZE] : 0;
9609 OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, value1);
9610 OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
9611 OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_top), SLJIT_IMM, common->re->top_bracket + 1);
9612
9613 /* These pointer sized fields temporarly stores internal variables. */
9614 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
9615
9616 if (common->mark_ptr != 0)
9617 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
9618 mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
9619 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 1));
9620 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 1 + LINK_SIZE));
9621
9622 if (*cc == OP_CALLOUT)
9623 {
9624 value1 = 0;
9625 value2 = 0;
9626 value3 = 0;
9627 }
9628 else
9629 {
9630 value1 = (sljit_sw) (cc + (1 + 4*LINK_SIZE) + 1);
9631 value2 = (callout_length - (1 + 4*LINK_SIZE + 2));
9632 value3 = (sljit_sw) (GET(cc, 1 + 3*LINK_SIZE));
9633 }
9634
9635 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string), SLJIT_IMM, value1);
9636 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_length), SLJIT_IMM, value2);
9637 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_offset), SLJIT_IMM, value3);
9638 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
9639
9640 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
9641
9642 /* Needed to save important temporary registers. */
9643 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0);
9644 /* SLJIT_R0 = arguments */
9645 OP1(SLJIT_MOV, SLJIT_R1, 0, STACK_TOP, 0);
9646 GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
9647 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(S32) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
9648 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
9649 free_stack(common, callout_arg_size);
9650
9651 /* Check return value. */
9652 OP2(SLJIT_SUB32 | SLJIT_SET_Z | SLJIT_SET_SIG_GREATER, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
9653 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_SIG_GREATER));
9654 if (common->abort_label == NULL)
9655 add_jump(compiler, &common->abort, JUMP(SLJIT_NOT_EQUAL) /* SIG_LESS */);
9656 else
9657 JUMPTO(SLJIT_NOT_EQUAL /* SIG_LESS */, common->abort_label);
9658 return cc + callout_length;
9659 }
9660
9661 #undef CALLOUT_ARG_SIZE
9662 #undef CALLOUT_ARG_OFFSET
9663
assert_needs_str_ptr_saving(PCRE2_SPTR cc)9664 static SLJIT_INLINE BOOL assert_needs_str_ptr_saving(PCRE2_SPTR cc)
9665 {
9666 while (TRUE)
9667 {
9668 switch (*cc)
9669 {
9670 case OP_CALLOUT_STR:
9671 cc += GET(cc, 1 + 2*LINK_SIZE);
9672 break;
9673
9674 case OP_NOT_WORD_BOUNDARY:
9675 case OP_WORD_BOUNDARY:
9676 case OP_CIRC:
9677 case OP_CIRCM:
9678 case OP_DOLL:
9679 case OP_DOLLM:
9680 case OP_CALLOUT:
9681 case OP_ALT:
9682 cc += PRIV(OP_lengths)[*cc];
9683 break;
9684
9685 case OP_KET:
9686 return FALSE;
9687
9688 default:
9689 return TRUE;
9690 }
9691 }
9692 }
9693
compile_assert_matchingpath(compiler_common * common,PCRE2_SPTR cc,assert_backtrack * backtrack,BOOL conditional)9694 static PCRE2_SPTR compile_assert_matchingpath(compiler_common *common, PCRE2_SPTR cc, assert_backtrack *backtrack, BOOL conditional)
9695 {
9696 DEFINE_COMPILER;
9697 int framesize;
9698 int extrasize;
9699 BOOL local_quit_available = FALSE;
9700 BOOL needs_control_head;
9701 int private_data_ptr;
9702 backtrack_common altbacktrack;
9703 PCRE2_SPTR ccbegin;
9704 PCRE2_UCHAR opcode;
9705 PCRE2_UCHAR bra = OP_BRA;
9706 jump_list *tmp = NULL;
9707 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
9708 jump_list **found;
9709 /* Saving previous accept variables. */
9710 BOOL save_local_quit_available = common->local_quit_available;
9711 BOOL save_in_positive_assertion = common->in_positive_assertion;
9712 then_trap_backtrack *save_then_trap = common->then_trap;
9713 struct sljit_label *save_quit_label = common->quit_label;
9714 struct sljit_label *save_accept_label = common->accept_label;
9715 jump_list *save_quit = common->quit;
9716 jump_list *save_positive_assertion_quit = common->positive_assertion_quit;
9717 jump_list *save_accept = common->accept;
9718 struct sljit_jump *jump;
9719 struct sljit_jump *brajump = NULL;
9720
9721 /* Assert captures then. */
9722 common->then_trap = NULL;
9723
9724 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
9725 {
9726 SLJIT_ASSERT(!conditional);
9727 bra = *cc;
9728 cc++;
9729 }
9730 private_data_ptr = PRIVATE_DATA(cc);
9731 SLJIT_ASSERT(private_data_ptr != 0);
9732 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
9733 backtrack->framesize = framesize;
9734 backtrack->private_data_ptr = private_data_ptr;
9735 opcode = *cc;
9736 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
9737 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
9738 ccbegin = cc;
9739 cc += GET(cc, 1);
9740
9741 if (bra == OP_BRAMINZERO)
9742 {
9743 /* This is a braminzero backtrack path. */
9744 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9745 free_stack(common, 1);
9746 brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
9747 }
9748
9749 if (framesize < 0)
9750 {
9751 extrasize = 1;
9752 if (bra == OP_BRA && !assert_needs_str_ptr_saving(ccbegin + 1 + LINK_SIZE))
9753 extrasize = 0;
9754
9755 if (needs_control_head)
9756 extrasize++;
9757
9758 if (framesize == no_frame)
9759 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
9760
9761 if (extrasize > 0)
9762 allocate_stack(common, extrasize);
9763
9764 if (needs_control_head)
9765 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9766
9767 if (extrasize > 0)
9768 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9769
9770 if (needs_control_head)
9771 {
9772 SLJIT_ASSERT(extrasize == 2);
9773 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
9774 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
9775 }
9776 }
9777 else
9778 {
9779 extrasize = needs_control_head ? 3 : 2;
9780 allocate_stack(common, framesize + extrasize);
9781
9782 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9783 OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
9784 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
9785 if (needs_control_head)
9786 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9787 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9788
9789 if (needs_control_head)
9790 {
9791 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
9792 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
9793 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
9794 }
9795 else
9796 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
9797
9798 init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize);
9799 }
9800
9801 memset(&altbacktrack, 0, sizeof(backtrack_common));
9802 if (conditional || (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT))
9803 {
9804 /* Control verbs cannot escape from these asserts. */
9805 local_quit_available = TRUE;
9806 common->local_quit_available = TRUE;
9807 common->quit_label = NULL;
9808 common->quit = NULL;
9809 }
9810
9811 common->in_positive_assertion = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK);
9812 common->positive_assertion_quit = NULL;
9813
9814 while (1)
9815 {
9816 common->accept_label = NULL;
9817 common->accept = NULL;
9818 altbacktrack.top = NULL;
9819 altbacktrack.topbacktracks = NULL;
9820
9821 if (*ccbegin == OP_ALT && extrasize > 0)
9822 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9823
9824 altbacktrack.cc = ccbegin;
9825 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
9826 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9827 {
9828 if (local_quit_available)
9829 {
9830 common->local_quit_available = save_local_quit_available;
9831 common->quit_label = save_quit_label;
9832 common->quit = save_quit;
9833 }
9834 common->in_positive_assertion = save_in_positive_assertion;
9835 common->then_trap = save_then_trap;
9836 common->accept_label = save_accept_label;
9837 common->positive_assertion_quit = save_positive_assertion_quit;
9838 common->accept = save_accept;
9839 return NULL;
9840 }
9841 common->accept_label = LABEL();
9842 if (common->accept != NULL)
9843 set_jumps(common->accept, common->accept_label);
9844
9845 /* Reset stack. */
9846 if (framesize < 0)
9847 {
9848 if (framesize == no_frame)
9849 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9850 else if (extrasize > 0)
9851 free_stack(common, extrasize);
9852
9853 if (needs_control_head)
9854 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
9855 }
9856 else
9857 {
9858 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
9859 {
9860 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
9861 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
9862 if (needs_control_head)
9863 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
9864 }
9865 else
9866 {
9867 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9868 if (needs_control_head)
9869 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 2));
9870 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9871 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
9872 }
9873 }
9874
9875 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
9876 {
9877 /* We know that STR_PTR was stored on the top of the stack. */
9878 if (conditional)
9879 {
9880 if (extrasize > 0)
9881 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? STACK(-2) : STACK(-1));
9882 }
9883 else if (bra == OP_BRAZERO)
9884 {
9885 if (framesize < 0)
9886 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
9887 else
9888 {
9889 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
9890 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize));
9891 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
9892 }
9893 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9894 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9895 }
9896 else if (framesize >= 0)
9897 {
9898 /* For OP_BRA and OP_BRAMINZERO. */
9899 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
9900 }
9901 }
9902 add_jump(compiler, found, JUMP(SLJIT_JUMP));
9903
9904 compile_backtrackingpath(common, altbacktrack.top);
9905 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9906 {
9907 if (local_quit_available)
9908 {
9909 common->local_quit_available = save_local_quit_available;
9910 common->quit_label = save_quit_label;
9911 common->quit = save_quit;
9912 }
9913 common->in_positive_assertion = save_in_positive_assertion;
9914 common->then_trap = save_then_trap;
9915 common->accept_label = save_accept_label;
9916 common->positive_assertion_quit = save_positive_assertion_quit;
9917 common->accept = save_accept;
9918 return NULL;
9919 }
9920 set_jumps(altbacktrack.topbacktracks, LABEL());
9921
9922 if (*cc != OP_ALT)
9923 break;
9924
9925 ccbegin = cc;
9926 cc += GET(cc, 1);
9927 }
9928
9929 if (local_quit_available)
9930 {
9931 SLJIT_ASSERT(common->positive_assertion_quit == NULL);
9932 /* Makes the check less complicated below. */
9933 common->positive_assertion_quit = common->quit;
9934 }
9935
9936 /* None of them matched. */
9937 if (common->positive_assertion_quit != NULL)
9938 {
9939 jump = JUMP(SLJIT_JUMP);
9940 set_jumps(common->positive_assertion_quit, LABEL());
9941 SLJIT_ASSERT(framesize != no_stack);
9942 if (framesize < 0)
9943 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
9944 else
9945 {
9946 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9947 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9948 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (extrasize + 1) * sizeof(sljit_sw));
9949 }
9950 JUMPHERE(jump);
9951 }
9952
9953 if (needs_control_head)
9954 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
9955
9956 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
9957 {
9958 /* Assert is failed. */
9959 if ((conditional && extrasize > 0) || bra == OP_BRAZERO)
9960 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9961
9962 if (framesize < 0)
9963 {
9964 /* The topmost item should be 0. */
9965 if (bra == OP_BRAZERO)
9966 {
9967 if (extrasize == 2)
9968 free_stack(common, 1);
9969 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9970 }
9971 else if (extrasize > 0)
9972 free_stack(common, extrasize);
9973 }
9974 else
9975 {
9976 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
9977 /* The topmost item should be 0. */
9978 if (bra == OP_BRAZERO)
9979 {
9980 free_stack(common, framesize + extrasize - 1);
9981 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9982 }
9983 else
9984 free_stack(common, framesize + extrasize);
9985 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
9986 }
9987 jump = JUMP(SLJIT_JUMP);
9988 if (bra != OP_BRAZERO)
9989 add_jump(compiler, target, jump);
9990
9991 /* Assert is successful. */
9992 set_jumps(tmp, LABEL());
9993 if (framesize < 0)
9994 {
9995 /* We know that STR_PTR was stored on the top of the stack. */
9996 if (extrasize > 0)
9997 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
9998
9999 /* Keep the STR_PTR on the top of the stack. */
10000 if (bra == OP_BRAZERO)
10001 {
10002 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
10003 if (extrasize == 2)
10004 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10005 }
10006 else if (bra == OP_BRAMINZERO)
10007 {
10008 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
10009 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10010 }
10011 }
10012 else
10013 {
10014 if (bra == OP_BRA)
10015 {
10016 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
10017 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
10018 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1));
10019 }
10020 else
10021 {
10022 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
10023 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
10024 if (extrasize == 2)
10025 {
10026 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10027 if (bra == OP_BRAMINZERO)
10028 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10029 }
10030 else
10031 {
10032 SLJIT_ASSERT(extrasize == 3);
10033 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
10034 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
10035 }
10036 }
10037 }
10038
10039 if (bra == OP_BRAZERO)
10040 {
10041 backtrack->matchingpath = LABEL();
10042 SET_LABEL(jump, backtrack->matchingpath);
10043 }
10044 else if (bra == OP_BRAMINZERO)
10045 {
10046 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
10047 JUMPHERE(brajump);
10048 if (framesize >= 0)
10049 {
10050 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10051 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10052 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
10053 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
10054 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10055 }
10056 set_jumps(backtrack->common.topbacktracks, LABEL());
10057 }
10058 }
10059 else
10060 {
10061 /* AssertNot is successful. */
10062 if (framesize < 0)
10063 {
10064 if (extrasize > 0)
10065 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10066
10067 if (bra != OP_BRA)
10068 {
10069 if (extrasize == 2)
10070 free_stack(common, 1);
10071 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10072 }
10073 else if (extrasize > 0)
10074 free_stack(common, extrasize);
10075 }
10076 else
10077 {
10078 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10079 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
10080 /* The topmost item should be 0. */
10081 if (bra != OP_BRA)
10082 {
10083 free_stack(common, framesize + extrasize - 1);
10084 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10085 }
10086 else
10087 free_stack(common, framesize + extrasize);
10088 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10089 }
10090
10091 if (bra == OP_BRAZERO)
10092 backtrack->matchingpath = LABEL();
10093 else if (bra == OP_BRAMINZERO)
10094 {
10095 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
10096 JUMPHERE(brajump);
10097 }
10098
10099 if (bra != OP_BRA)
10100 {
10101 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
10102 set_jumps(backtrack->common.topbacktracks, LABEL());
10103 backtrack->common.topbacktracks = NULL;
10104 }
10105 }
10106
10107 if (local_quit_available)
10108 {
10109 common->local_quit_available = save_local_quit_available;
10110 common->quit_label = save_quit_label;
10111 common->quit = save_quit;
10112 }
10113 common->in_positive_assertion = save_in_positive_assertion;
10114 common->then_trap = save_then_trap;
10115 common->accept_label = save_accept_label;
10116 common->positive_assertion_quit = save_positive_assertion_quit;
10117 common->accept = save_accept;
10118 return cc + 1 + LINK_SIZE;
10119 }
10120
match_once_common(compiler_common * common,PCRE2_UCHAR ket,int framesize,int private_data_ptr,BOOL has_alternatives,BOOL needs_control_head)10121 static SLJIT_INLINE void match_once_common(compiler_common *common, PCRE2_UCHAR ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
10122 {
10123 DEFINE_COMPILER;
10124 int stacksize;
10125
10126 if (framesize < 0)
10127 {
10128 if (framesize == no_frame)
10129 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10130 else
10131 {
10132 stacksize = needs_control_head ? 1 : 0;
10133 if (ket != OP_KET || has_alternatives)
10134 stacksize++;
10135
10136 if (stacksize > 0)
10137 free_stack(common, stacksize);
10138 }
10139
10140 if (needs_control_head)
10141 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? STACK(-2) : STACK(-1));
10142
10143 /* TMP2 which is set here used by OP_KETRMAX below. */
10144 if (ket == OP_KETRMAX)
10145 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
10146 else if (ket == OP_KETRMIN)
10147 {
10148 /* Move the STR_PTR to the private_data_ptr. */
10149 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
10150 }
10151 }
10152 else
10153 {
10154 stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
10155 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
10156 if (needs_control_head)
10157 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
10158
10159 if (ket == OP_KETRMAX)
10160 {
10161 /* TMP2 which is set here used by OP_KETRMAX below. */
10162 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10163 }
10164 }
10165 if (needs_control_head)
10166 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
10167 }
10168
match_capture_common(compiler_common * common,int stacksize,int offset,int private_data_ptr)10169 static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
10170 {
10171 DEFINE_COMPILER;
10172
10173 if (common->capture_last_ptr != 0)
10174 {
10175 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
10176 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
10177 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10178 stacksize++;
10179 }
10180 if (common->optimized_cbracket[offset >> 1] == 0)
10181 {
10182 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
10183 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
10184 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10185 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10186 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
10187 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10188 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10189 stacksize += 2;
10190 }
10191 return stacksize;
10192 }
10193
do_script_run(PCRE2_SPTR ptr,PCRE2_SPTR endptr)10194 static PCRE2_SPTR SLJIT_FUNC do_script_run(PCRE2_SPTR ptr, PCRE2_SPTR endptr)
10195 {
10196 if (PRIV(script_run)(ptr, endptr, FALSE))
10197 return endptr;
10198 return NULL;
10199 }
10200
10201 #ifdef SUPPORT_UNICODE
10202
do_script_run_utf(PCRE2_SPTR ptr,PCRE2_SPTR endptr)10203 static PCRE2_SPTR SLJIT_FUNC do_script_run_utf(PCRE2_SPTR ptr, PCRE2_SPTR endptr)
10204 {
10205 if (PRIV(script_run)(ptr, endptr, TRUE))
10206 return endptr;
10207 return NULL;
10208 }
10209
10210 #endif /* SUPPORT_UNICODE */
10211
match_script_run_common(compiler_common * common,int private_data_ptr,backtrack_common * parent)10212 static SLJIT_INLINE void match_script_run_common(compiler_common *common, int private_data_ptr, backtrack_common *parent)
10213 {
10214 DEFINE_COMPILER;
10215
10216 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
10217
10218 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10219 #ifdef SUPPORT_UNICODE
10220 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM,
10221 common->utf ? SLJIT_FUNC_OFFSET(do_script_run_utf) : SLJIT_FUNC_OFFSET(do_script_run));
10222 #else
10223 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_script_run));
10224 #endif
10225
10226 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
10227 add_jump(compiler, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
10228 }
10229
10230 /*
10231 Handling bracketed expressions is probably the most complex part.
10232
10233 Stack layout naming characters:
10234 S - Push the current STR_PTR
10235 0 - Push a 0 (NULL)
10236 A - Push the current STR_PTR. Needed for restoring the STR_PTR
10237 before the next alternative. Not pushed if there are no alternatives.
10238 M - Any values pushed by the current alternative. Can be empty, or anything.
10239 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
10240 L - Push the previous local (pointed by localptr) to the stack
10241 () - opional values stored on the stack
10242 ()* - optonal, can be stored multiple times
10243
10244 The following list shows the regular expression templates, their PCRE byte codes
10245 and stack layout supported by pcre-sljit.
10246
10247 (?:) OP_BRA | OP_KET A M
10248 () OP_CBRA | OP_KET C M
10249 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
10250 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
10251 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
10252 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
10253 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
10254 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
10255 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
10256 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
10257 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
10258 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
10259 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
10260 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
10261 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
10262 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
10263 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
10264 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
10265 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
10266 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
10267 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
10268 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
10269
10270
10271 Stack layout naming characters:
10272 A - Push the alternative index (starting from 0) on the stack.
10273 Not pushed if there is no alternatives.
10274 M - Any values pushed by the current alternative. Can be empty, or anything.
10275
10276 The next list shows the possible content of a bracket:
10277 (|) OP_*BRA | OP_ALT ... M A
10278 (?()|) OP_*COND | OP_ALT M A
10279 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
10280 Or nothing, if trace is unnecessary
10281 */
10282
compile_bracket_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)10283 static PCRE2_SPTR compile_bracket_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
10284 {
10285 DEFINE_COMPILER;
10286 backtrack_common *backtrack;
10287 PCRE2_UCHAR opcode;
10288 int private_data_ptr = 0;
10289 int offset = 0;
10290 int i, stacksize;
10291 int repeat_ptr = 0, repeat_length = 0;
10292 int repeat_type = 0, repeat_count = 0;
10293 PCRE2_SPTR ccbegin;
10294 PCRE2_SPTR matchingpath;
10295 PCRE2_SPTR slot;
10296 PCRE2_UCHAR bra = OP_BRA;
10297 PCRE2_UCHAR ket;
10298 assert_backtrack *assert;
10299 BOOL has_alternatives;
10300 BOOL needs_control_head = FALSE;
10301 struct sljit_jump *jump;
10302 struct sljit_jump *skip;
10303 struct sljit_label *rmax_label = NULL;
10304 struct sljit_jump *braminzero = NULL;
10305
10306 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
10307
10308 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
10309 {
10310 bra = *cc;
10311 cc++;
10312 opcode = *cc;
10313 }
10314
10315 opcode = *cc;
10316 ccbegin = cc;
10317 matchingpath = bracketend(cc) - 1 - LINK_SIZE;
10318 ket = *matchingpath;
10319 if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
10320 {
10321 repeat_ptr = PRIVATE_DATA(matchingpath);
10322 repeat_length = PRIVATE_DATA(matchingpath + 1);
10323 repeat_type = PRIVATE_DATA(matchingpath + 2);
10324 repeat_count = PRIVATE_DATA(matchingpath + 3);
10325 SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
10326 if (repeat_type == OP_UPTO)
10327 ket = OP_KETRMAX;
10328 if (repeat_type == OP_MINUPTO)
10329 ket = OP_KETRMIN;
10330 }
10331
10332 matchingpath = ccbegin + 1 + LINK_SIZE;
10333 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
10334 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
10335 cc += GET(cc, 1);
10336
10337 has_alternatives = *cc == OP_ALT;
10338 if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
10339 {
10340 SLJIT_COMPILE_ASSERT(OP_DNRREF == OP_RREF + 1 && OP_FALSE == OP_RREF + 2 && OP_TRUE == OP_RREF + 3,
10341 compile_time_checks_must_be_grouped_together);
10342 has_alternatives = ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL) ? FALSE : TRUE;
10343 }
10344
10345 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
10346 opcode = OP_SCOND;
10347
10348 if (opcode == OP_CBRA || opcode == OP_SCBRA)
10349 {
10350 /* Capturing brackets has a pre-allocated space. */
10351 offset = GET2(ccbegin, 1 + LINK_SIZE);
10352 if (common->optimized_cbracket[offset] == 0)
10353 {
10354 private_data_ptr = OVECTOR_PRIV(offset);
10355 offset <<= 1;
10356 }
10357 else
10358 {
10359 offset <<= 1;
10360 private_data_ptr = OVECTOR(offset);
10361 }
10362 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
10363 matchingpath += IMM2_SIZE;
10364 }
10365 else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_ONCE || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
10366 {
10367 /* Other brackets simply allocate the next entry. */
10368 private_data_ptr = PRIVATE_DATA(ccbegin);
10369 SLJIT_ASSERT(private_data_ptr != 0);
10370 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
10371 if (opcode == OP_ONCE)
10372 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
10373 }
10374
10375 /* Instructions before the first alternative. */
10376 stacksize = 0;
10377 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
10378 stacksize++;
10379 if (bra == OP_BRAZERO)
10380 stacksize++;
10381
10382 if (stacksize > 0)
10383 allocate_stack(common, stacksize);
10384
10385 stacksize = 0;
10386 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
10387 {
10388 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10389 stacksize++;
10390 }
10391
10392 if (bra == OP_BRAZERO)
10393 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10394
10395 if (bra == OP_BRAMINZERO)
10396 {
10397 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
10398 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10399 if (ket != OP_KETRMIN)
10400 {
10401 free_stack(common, 1);
10402 braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10403 }
10404 else if (opcode == OP_ONCE || opcode >= OP_SBRA)
10405 {
10406 jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10407 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10408 /* Nothing stored during the first run. */
10409 skip = JUMP(SLJIT_JUMP);
10410 JUMPHERE(jump);
10411 /* Checking zero-length iteration. */
10412 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
10413 {
10414 /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
10415 braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10416 }
10417 else
10418 {
10419 /* Except when the whole stack frame must be saved. */
10420 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10421 braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-BACKTRACK_AS(bracket_backtrack)->u.framesize - 2));
10422 }
10423 JUMPHERE(skip);
10424 }
10425 else
10426 {
10427 jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10428 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10429 JUMPHERE(jump);
10430 }
10431 }
10432
10433 if (repeat_type != 0)
10434 {
10435 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, repeat_count);
10436 if (repeat_type == OP_EXACT)
10437 rmax_label = LABEL();
10438 }
10439
10440 if (ket == OP_KETRMIN)
10441 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
10442
10443 if (ket == OP_KETRMAX)
10444 {
10445 rmax_label = LABEL();
10446 if (has_alternatives && opcode >= OP_BRA && opcode < OP_SBRA && repeat_type == 0)
10447 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
10448 }
10449
10450 /* Handling capturing brackets and alternatives. */
10451 if (opcode == OP_ONCE)
10452 {
10453 stacksize = 0;
10454 if (needs_control_head)
10455 {
10456 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10457 stacksize++;
10458 }
10459
10460 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
10461 {
10462 /* Neither capturing brackets nor recursions are found in the block. */
10463 if (ket == OP_KETRMIN)
10464 {
10465 stacksize += 2;
10466 if (!needs_control_head)
10467 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10468 }
10469 else
10470 {
10471 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
10472 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
10473 if (ket == OP_KETRMAX || has_alternatives)
10474 stacksize++;
10475 }
10476
10477 if (stacksize > 0)
10478 allocate_stack(common, stacksize);
10479
10480 stacksize = 0;
10481 if (needs_control_head)
10482 {
10483 stacksize++;
10484 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10485 }
10486
10487 if (ket == OP_KETRMIN)
10488 {
10489 if (needs_control_head)
10490 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10491 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10492 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
10493 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
10494 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
10495 }
10496 else if (ket == OP_KETRMAX || has_alternatives)
10497 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10498 }
10499 else
10500 {
10501 if (ket != OP_KET || has_alternatives)
10502 stacksize++;
10503
10504 stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
10505 allocate_stack(common, stacksize);
10506
10507 if (needs_control_head)
10508 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10509
10510 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10511 OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
10512
10513 stacksize = needs_control_head ? 1 : 0;
10514 if (ket != OP_KET || has_alternatives)
10515 {
10516 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10517 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
10518 stacksize++;
10519 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10520 }
10521 else
10522 {
10523 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
10524 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10525 }
10526 init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1);
10527 }
10528 }
10529 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
10530 {
10531 /* Saving the previous values. */
10532 if (common->optimized_cbracket[offset >> 1] != 0)
10533 {
10534 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
10535 allocate_stack(common, 2);
10536 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10537 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
10538 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
10539 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
10540 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
10541 }
10542 else
10543 {
10544 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10545 allocate_stack(common, 1);
10546 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
10547 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10548 }
10549 }
10550 else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
10551 {
10552 /* Saving the previous value. */
10553 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10554 allocate_stack(common, 1);
10555 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
10556 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10557 }
10558 else if (has_alternatives)
10559 {
10560 /* Pushing the starting string pointer. */
10561 allocate_stack(common, 1);
10562 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10563 }
10564
10565 /* Generating code for the first alternative. */
10566 if (opcode == OP_COND || opcode == OP_SCOND)
10567 {
10568 if (*matchingpath == OP_CREF)
10569 {
10570 SLJIT_ASSERT(has_alternatives);
10571 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
10572 CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
10573 matchingpath += 1 + IMM2_SIZE;
10574 }
10575 else if (*matchingpath == OP_DNCREF)
10576 {
10577 SLJIT_ASSERT(has_alternatives);
10578
10579 i = GET2(matchingpath, 1 + IMM2_SIZE);
10580 slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
10581 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
10582 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
10583 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
10584 slot += common->name_entry_size;
10585 i--;
10586 while (i-- > 0)
10587 {
10588 OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
10589 OP2(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, STR_PTR, 0);
10590 slot += common->name_entry_size;
10591 }
10592 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
10593 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_ZERO));
10594 matchingpath += 1 + 2 * IMM2_SIZE;
10595 }
10596 else if ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL)
10597 {
10598 /* Never has other case. */
10599 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
10600 SLJIT_ASSERT(!has_alternatives);
10601
10602 if (*matchingpath == OP_TRUE)
10603 {
10604 stacksize = 1;
10605 matchingpath++;
10606 }
10607 else if (*matchingpath == OP_FALSE || *matchingpath == OP_FAIL)
10608 stacksize = 0;
10609 else if (*matchingpath == OP_RREF)
10610 {
10611 stacksize = GET2(matchingpath, 1);
10612 if (common->currententry == NULL)
10613 stacksize = 0;
10614 else if (stacksize == RREF_ANY)
10615 stacksize = 1;
10616 else if (common->currententry->start == 0)
10617 stacksize = stacksize == 0;
10618 else
10619 stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
10620
10621 if (stacksize != 0)
10622 matchingpath += 1 + IMM2_SIZE;
10623 }
10624 else
10625 {
10626 if (common->currententry == NULL || common->currententry->start == 0)
10627 stacksize = 0;
10628 else
10629 {
10630 stacksize = GET2(matchingpath, 1 + IMM2_SIZE);
10631 slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
10632 i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
10633 while (stacksize > 0)
10634 {
10635 if ((int)GET2(slot, 0) == i)
10636 break;
10637 slot += common->name_entry_size;
10638 stacksize--;
10639 }
10640 }
10641
10642 if (stacksize != 0)
10643 matchingpath += 1 + 2 * IMM2_SIZE;
10644 }
10645
10646 /* The stacksize == 0 is a common "else" case. */
10647 if (stacksize == 0)
10648 {
10649 if (*cc == OP_ALT)
10650 {
10651 matchingpath = cc + 1 + LINK_SIZE;
10652 cc += GET(cc, 1);
10653 }
10654 else
10655 matchingpath = cc;
10656 }
10657 }
10658 else
10659 {
10660 SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
10661 /* Similar code as PUSH_BACKTRACK macro. */
10662 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
10663 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10664 return NULL;
10665 memset(assert, 0, sizeof(assert_backtrack));
10666 assert->common.cc = matchingpath;
10667 BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
10668 matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
10669 }
10670 }
10671
10672 compile_matchingpath(common, matchingpath, cc, backtrack);
10673 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10674 return NULL;
10675
10676 if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA)
10677 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10678
10679 if (opcode == OP_ONCE)
10680 match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
10681
10682 if (opcode == OP_SCRIPT_RUN)
10683 match_script_run_common(common, private_data_ptr, backtrack);
10684
10685 stacksize = 0;
10686 if (repeat_type == OP_MINUPTO)
10687 {
10688 /* We need to preserve the counter. TMP2 will be used below. */
10689 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
10690 stacksize++;
10691 }
10692 if (ket != OP_KET || bra != OP_BRA)
10693 stacksize++;
10694 if (offset != 0)
10695 {
10696 if (common->capture_last_ptr != 0)
10697 stacksize++;
10698 if (common->optimized_cbracket[offset >> 1] == 0)
10699 stacksize += 2;
10700 }
10701 if (has_alternatives && opcode != OP_ONCE)
10702 stacksize++;
10703
10704 if (stacksize > 0)
10705 allocate_stack(common, stacksize);
10706
10707 stacksize = 0;
10708 if (repeat_type == OP_MINUPTO)
10709 {
10710 /* TMP2 was set above. */
10711 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
10712 stacksize++;
10713 }
10714
10715 if (ket != OP_KET || bra != OP_BRA)
10716 {
10717 if (ket != OP_KET)
10718 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10719 else
10720 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10721 stacksize++;
10722 }
10723
10724 if (offset != 0)
10725 stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
10726
10727 /* Skip and count the other alternatives. */
10728 i = 1;
10729 while (*cc == OP_ALT)
10730 {
10731 cc += GET(cc, 1);
10732 i++;
10733 }
10734
10735 if (has_alternatives)
10736 {
10737 if (opcode != OP_ONCE)
10738 {
10739 if (i <= 3)
10740 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10741 else
10742 BACKTRACK_AS(bracket_backtrack)->u.matching_put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize));
10743 }
10744 if (ket != OP_KETRMAX)
10745 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
10746 }
10747
10748 /* Must be after the matchingpath label. */
10749 if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
10750 {
10751 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
10752 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10753 }
10754
10755 if (ket == OP_KETRMAX)
10756 {
10757 if (repeat_type != 0)
10758 {
10759 if (has_alternatives)
10760 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
10761 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
10762 JUMPTO(SLJIT_NOT_ZERO, rmax_label);
10763 /* Drop STR_PTR for greedy plus quantifier. */
10764 if (opcode != OP_ONCE)
10765 free_stack(common, 1);
10766 }
10767 else if (opcode < OP_BRA || opcode >= OP_SBRA)
10768 {
10769 if (has_alternatives)
10770 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
10771
10772 /* Checking zero-length iteration. */
10773 if (opcode != OP_ONCE)
10774 {
10775 /* This case includes opcodes such as OP_SCRIPT_RUN. */
10776 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0, rmax_label);
10777 /* Drop STR_PTR for greedy plus quantifier. */
10778 if (bra != OP_BRAZERO)
10779 free_stack(common, 1);
10780 }
10781 else
10782 /* TMP2 must contain the starting STR_PTR. */
10783 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label);
10784 }
10785 else
10786 JUMPTO(SLJIT_JUMP, rmax_label);
10787 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
10788 }
10789
10790 if (repeat_type == OP_EXACT)
10791 {
10792 count_match(common);
10793 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
10794 JUMPTO(SLJIT_NOT_ZERO, rmax_label);
10795 }
10796 else if (repeat_type == OP_UPTO)
10797 {
10798 /* We need to preserve the counter. */
10799 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
10800 allocate_stack(common, 1);
10801 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10802 }
10803
10804 if (bra == OP_BRAZERO)
10805 BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
10806
10807 if (bra == OP_BRAMINZERO)
10808 {
10809 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
10810 JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
10811 if (braminzero != NULL)
10812 {
10813 JUMPHERE(braminzero);
10814 /* We need to release the end pointer to perform the
10815 backtrack for the zero-length iteration. When
10816 framesize is < 0, OP_ONCE will do the release itself. */
10817 if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
10818 {
10819 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10820 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10821 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw));
10822 }
10823 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
10824 free_stack(common, 1);
10825 }
10826 /* Continue to the normal backtrack. */
10827 }
10828
10829 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
10830 count_match(common);
10831
10832 cc += 1 + LINK_SIZE;
10833
10834 if (opcode == OP_ONCE)
10835 {
10836 /* We temporarily encode the needs_control_head in the lowest bit.
10837 Note: on the target architectures of SLJIT the ((x << 1) >> 1) returns
10838 the same value for small signed numbers (including negative numbers). */
10839 BACKTRACK_AS(bracket_backtrack)->u.framesize = (int)((unsigned)BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
10840 }
10841 return cc + repeat_length;
10842 }
10843
compile_bracketpos_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)10844 static PCRE2_SPTR compile_bracketpos_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
10845 {
10846 DEFINE_COMPILER;
10847 backtrack_common *backtrack;
10848 PCRE2_UCHAR opcode;
10849 int private_data_ptr;
10850 int cbraprivptr = 0;
10851 BOOL needs_control_head;
10852 int framesize;
10853 int stacksize;
10854 int offset = 0;
10855 BOOL zero = FALSE;
10856 PCRE2_SPTR ccbegin = NULL;
10857 int stack; /* Also contains the offset of control head. */
10858 struct sljit_label *loop = NULL;
10859 struct jump_list *emptymatch = NULL;
10860
10861 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
10862 if (*cc == OP_BRAPOSZERO)
10863 {
10864 zero = TRUE;
10865 cc++;
10866 }
10867
10868 opcode = *cc;
10869 private_data_ptr = PRIVATE_DATA(cc);
10870 SLJIT_ASSERT(private_data_ptr != 0);
10871 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
10872 switch(opcode)
10873 {
10874 case OP_BRAPOS:
10875 case OP_SBRAPOS:
10876 ccbegin = cc + 1 + LINK_SIZE;
10877 break;
10878
10879 case OP_CBRAPOS:
10880 case OP_SCBRAPOS:
10881 offset = GET2(cc, 1 + LINK_SIZE);
10882 /* This case cannot be optimized in the same was as
10883 normal capturing brackets. */
10884 SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
10885 cbraprivptr = OVECTOR_PRIV(offset);
10886 offset <<= 1;
10887 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
10888 break;
10889
10890 default:
10891 SLJIT_UNREACHABLE();
10892 break;
10893 }
10894
10895 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
10896 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
10897 if (framesize < 0)
10898 {
10899 if (offset != 0)
10900 {
10901 stacksize = 2;
10902 if (common->capture_last_ptr != 0)
10903 stacksize++;
10904 }
10905 else
10906 stacksize = 1;
10907
10908 if (needs_control_head)
10909 stacksize++;
10910 if (!zero)
10911 stacksize++;
10912
10913 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
10914 allocate_stack(common, stacksize);
10915 if (framesize == no_frame)
10916 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
10917
10918 stack = 0;
10919 if (offset != 0)
10920 {
10921 stack = 2;
10922 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
10923 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
10924 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
10925 if (common->capture_last_ptr != 0)
10926 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
10927 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
10928 if (needs_control_head)
10929 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10930 if (common->capture_last_ptr != 0)
10931 {
10932 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
10933 stack = 3;
10934 }
10935 }
10936 else
10937 {
10938 if (needs_control_head)
10939 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10940 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10941 stack = 1;
10942 }
10943
10944 if (needs_control_head)
10945 stack++;
10946 if (!zero)
10947 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
10948 if (needs_control_head)
10949 {
10950 stack--;
10951 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
10952 }
10953 }
10954 else
10955 {
10956 stacksize = framesize + 1;
10957 if (!zero)
10958 stacksize++;
10959 if (needs_control_head)
10960 stacksize++;
10961 if (offset == 0)
10962 stacksize++;
10963 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
10964
10965 allocate_stack(common, stacksize);
10966 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10967 if (needs_control_head)
10968 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10969 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
10970
10971 stack = 0;
10972 if (!zero)
10973 {
10974 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
10975 stack = 1;
10976 }
10977 if (needs_control_head)
10978 {
10979 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
10980 stack++;
10981 }
10982 if (offset == 0)
10983 {
10984 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
10985 stack++;
10986 }
10987 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
10988 init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize);
10989 stack -= 1 + (offset == 0);
10990 }
10991
10992 if (offset != 0)
10993 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
10994
10995 loop = LABEL();
10996 while (*cc != OP_KETRPOS)
10997 {
10998 backtrack->top = NULL;
10999 backtrack->topbacktracks = NULL;
11000 cc += GET(cc, 1);
11001
11002 compile_matchingpath(common, ccbegin, cc, backtrack);
11003 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11004 return NULL;
11005
11006 if (framesize < 0)
11007 {
11008 if (framesize == no_frame)
11009 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11010
11011 if (offset != 0)
11012 {
11013 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11014 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
11015 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
11016 if (common->capture_last_ptr != 0)
11017 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
11018 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
11019 }
11020 else
11021 {
11022 if (opcode == OP_SBRAPOS)
11023 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11024 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11025 }
11026
11027 /* Even if the match is empty, we need to reset the control head. */
11028 if (needs_control_head)
11029 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
11030
11031 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
11032 add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
11033
11034 if (!zero)
11035 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
11036 }
11037 else
11038 {
11039 if (offset != 0)
11040 {
11041 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
11042 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11043 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
11044 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
11045 if (common->capture_last_ptr != 0)
11046 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
11047 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
11048 }
11049 else
11050 {
11051 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11052 OP2(SLJIT_SUB, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
11053 if (opcode == OP_SBRAPOS)
11054 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
11055 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(-framesize - 2), STR_PTR, 0);
11056 }
11057
11058 /* Even if the match is empty, we need to reset the control head. */
11059 if (needs_control_head)
11060 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
11061
11062 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
11063 add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
11064
11065 if (!zero)
11066 {
11067 if (framesize < 0)
11068 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
11069 else
11070 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
11071 }
11072 }
11073
11074 JUMPTO(SLJIT_JUMP, loop);
11075 flush_stubs(common);
11076
11077 compile_backtrackingpath(common, backtrack->top);
11078 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11079 return NULL;
11080 set_jumps(backtrack->topbacktracks, LABEL());
11081
11082 if (framesize < 0)
11083 {
11084 if (offset != 0)
11085 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11086 else
11087 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11088 }
11089 else
11090 {
11091 if (offset != 0)
11092 {
11093 /* Last alternative. */
11094 if (*cc == OP_KETRPOS)
11095 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11096 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11097 }
11098 else
11099 {
11100 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11101 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
11102 }
11103 }
11104
11105 if (*cc == OP_KETRPOS)
11106 break;
11107 ccbegin = cc + 1 + LINK_SIZE;
11108 }
11109
11110 /* We don't have to restore the control head in case of a failed match. */
11111
11112 backtrack->topbacktracks = NULL;
11113 if (!zero)
11114 {
11115 if (framesize < 0)
11116 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
11117 else /* TMP2 is set to [private_data_ptr] above. */
11118 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), STACK(-stacksize), SLJIT_IMM, 0));
11119 }
11120
11121 /* None of them matched. */
11122 set_jumps(emptymatch, LABEL());
11123 count_match(common);
11124 return cc + 1 + LINK_SIZE;
11125 }
11126
get_iterator_parameters(compiler_common * common,PCRE2_SPTR cc,PCRE2_UCHAR * opcode,PCRE2_UCHAR * type,sljit_u32 * max,sljit_u32 * exact,PCRE2_SPTR * end)11127 static SLJIT_INLINE PCRE2_SPTR get_iterator_parameters(compiler_common *common, PCRE2_SPTR cc, PCRE2_UCHAR *opcode, PCRE2_UCHAR *type, sljit_u32 *max, sljit_u32 *exact, PCRE2_SPTR *end)
11128 {
11129 int class_len;
11130
11131 *opcode = *cc;
11132 *exact = 0;
11133
11134 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
11135 {
11136 cc++;
11137 *type = OP_CHAR;
11138 }
11139 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
11140 {
11141 cc++;
11142 *type = OP_CHARI;
11143 *opcode -= OP_STARI - OP_STAR;
11144 }
11145 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
11146 {
11147 cc++;
11148 *type = OP_NOT;
11149 *opcode -= OP_NOTSTAR - OP_STAR;
11150 }
11151 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
11152 {
11153 cc++;
11154 *type = OP_NOTI;
11155 *opcode -= OP_NOTSTARI - OP_STAR;
11156 }
11157 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
11158 {
11159 cc++;
11160 *opcode -= OP_TYPESTAR - OP_STAR;
11161 *type = OP_END;
11162 }
11163 else
11164 {
11165 SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS);
11166 *type = *opcode;
11167 cc++;
11168 class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(PCRE2_UCHAR))) : GET(cc, 0);
11169 *opcode = cc[class_len - 1];
11170
11171 if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
11172 {
11173 *opcode -= OP_CRSTAR - OP_STAR;
11174 *end = cc + class_len;
11175
11176 if (*opcode == OP_PLUS || *opcode == OP_MINPLUS)
11177 {
11178 *exact = 1;
11179 *opcode -= OP_PLUS - OP_STAR;
11180 }
11181 }
11182 else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY)
11183 {
11184 *opcode -= OP_CRPOSSTAR - OP_POSSTAR;
11185 *end = cc + class_len;
11186
11187 if (*opcode == OP_POSPLUS)
11188 {
11189 *exact = 1;
11190 *opcode = OP_POSSTAR;
11191 }
11192 }
11193 else
11194 {
11195 SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE);
11196 *max = GET2(cc, (class_len + IMM2_SIZE));
11197 *exact = GET2(cc, class_len);
11198
11199 if (*max == 0)
11200 {
11201 if (*opcode == OP_CRPOSRANGE)
11202 *opcode = OP_POSSTAR;
11203 else
11204 *opcode -= OP_CRRANGE - OP_STAR;
11205 }
11206 else
11207 {
11208 *max -= *exact;
11209 if (*max == 0)
11210 *opcode = OP_EXACT;
11211 else if (*max == 1)
11212 {
11213 if (*opcode == OP_CRPOSRANGE)
11214 *opcode = OP_POSQUERY;
11215 else
11216 *opcode -= OP_CRRANGE - OP_QUERY;
11217 }
11218 else
11219 {
11220 if (*opcode == OP_CRPOSRANGE)
11221 *opcode = OP_POSUPTO;
11222 else
11223 *opcode -= OP_CRRANGE - OP_UPTO;
11224 }
11225 }
11226 *end = cc + class_len + 2 * IMM2_SIZE;
11227 }
11228 return cc;
11229 }
11230
11231 switch(*opcode)
11232 {
11233 case OP_EXACT:
11234 *exact = GET2(cc, 0);
11235 cc += IMM2_SIZE;
11236 break;
11237
11238 case OP_PLUS:
11239 case OP_MINPLUS:
11240 *exact = 1;
11241 *opcode -= OP_PLUS - OP_STAR;
11242 break;
11243
11244 case OP_POSPLUS:
11245 *exact = 1;
11246 *opcode = OP_POSSTAR;
11247 break;
11248
11249 case OP_UPTO:
11250 case OP_MINUPTO:
11251 case OP_POSUPTO:
11252 *max = GET2(cc, 0);
11253 cc += IMM2_SIZE;
11254 break;
11255 }
11256
11257 if (*type == OP_END)
11258 {
11259 *type = *cc;
11260 *end = next_opcode(common, cc);
11261 cc++;
11262 return cc;
11263 }
11264
11265 *end = cc + 1;
11266 #ifdef SUPPORT_UNICODE
11267 if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
11268 #endif
11269 return cc;
11270 }
11271
compile_iterator_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)11272 static PCRE2_SPTR compile_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11273 {
11274 DEFINE_COMPILER;
11275 backtrack_common *backtrack;
11276 PCRE2_UCHAR opcode;
11277 PCRE2_UCHAR type;
11278 sljit_u32 max = 0, exact;
11279 sljit_s32 early_fail_ptr = PRIVATE_DATA(cc + 1);
11280 sljit_s32 early_fail_type;
11281 BOOL charpos_enabled;
11282 PCRE2_UCHAR charpos_char;
11283 unsigned int charpos_othercasebit;
11284 PCRE2_SPTR end;
11285 jump_list *no_match = NULL;
11286 jump_list *no_char1_match = NULL;
11287 struct sljit_jump *jump = NULL;
11288 struct sljit_label *label;
11289 int private_data_ptr = PRIVATE_DATA(cc);
11290 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
11291 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
11292 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
11293 int tmp_base, tmp_offset;
11294 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11295 BOOL use_tmp;
11296 #endif
11297
11298 PUSH_BACKTRACK(sizeof(char_iterator_backtrack), cc, NULL);
11299
11300 early_fail_type = (early_fail_ptr & 0x7);
11301 early_fail_ptr >>= 3;
11302
11303 /* During recursion, these optimizations are disabled. */
11304 if (common->early_fail_start_ptr == 0 && common->fast_forward_bc_ptr == NULL)
11305 {
11306 early_fail_ptr = 0;
11307 early_fail_type = type_skip;
11308 }
11309
11310 SLJIT_ASSERT(common->fast_forward_bc_ptr != NULL || early_fail_ptr == 0
11311 || (early_fail_ptr >= common->early_fail_start_ptr && early_fail_ptr <= common->early_fail_end_ptr));
11312
11313 if (early_fail_type == type_fail)
11314 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr));
11315
11316 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
11317
11318 if (type != OP_EXTUNI)
11319 {
11320 tmp_base = TMP3;
11321 tmp_offset = 0;
11322 }
11323 else
11324 {
11325 tmp_base = SLJIT_MEM1(SLJIT_SP);
11326 tmp_offset = POSSESSIVE0;
11327 }
11328
11329 /* Handle fixed part first. */
11330 if (exact > 1)
11331 {
11332 SLJIT_ASSERT(early_fail_ptr == 0);
11333
11334 if (common->mode == PCRE2_JIT_COMPLETE
11335 #ifdef SUPPORT_UNICODE
11336 && !common->utf
11337 #endif
11338 && type != OP_ANYNL && type != OP_EXTUNI)
11339 {
11340 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact));
11341 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER, TMP1, 0, STR_END, 0));
11342 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
11343 label = LABEL();
11344 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
11345 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11346 JUMPTO(SLJIT_NOT_ZERO, label);
11347 }
11348 else
11349 {
11350 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
11351 label = LABEL();
11352 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
11353 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11354 JUMPTO(SLJIT_NOT_ZERO, label);
11355 }
11356 }
11357 else if (exact == 1)
11358 {
11359 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
11360
11361 if (early_fail_type == type_fail_range)
11362 {
11363 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr);
11364 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + (int)sizeof(sljit_sw));
11365 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, TMP2, 0);
11366 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, TMP2, 0);
11367 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, TMP2, 0, TMP1, 0));
11368
11369 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + (int)sizeof(sljit_sw), STR_PTR, 0);
11370 }
11371 }
11372
11373 switch(opcode)
11374 {
11375 case OP_STAR:
11376 case OP_UPTO:
11377 SLJIT_ASSERT(early_fail_ptr == 0 || opcode == OP_STAR);
11378
11379 if (type == OP_ANYNL || type == OP_EXTUNI)
11380 {
11381 SLJIT_ASSERT(private_data_ptr == 0);
11382 SLJIT_ASSERT(early_fail_ptr == 0);
11383
11384 allocate_stack(common, 2);
11385 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11386 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
11387
11388 if (opcode == OP_UPTO)
11389 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, max);
11390
11391 label = LABEL();
11392 compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
11393 if (opcode == OP_UPTO)
11394 {
11395 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
11396 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
11397 jump = JUMP(SLJIT_ZERO);
11398 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
11399 }
11400
11401 /* We cannot use TMP3 because of allocate_stack. */
11402 allocate_stack(common, 1);
11403 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11404 JUMPTO(SLJIT_JUMP, label);
11405 if (jump != NULL)
11406 JUMPHERE(jump);
11407 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11408 break;
11409 }
11410 #ifdef SUPPORT_UNICODE
11411 else if (type == OP_ALLANY && !common->invalid_utf)
11412 #else
11413 else if (type == OP_ALLANY)
11414 #endif
11415 {
11416 if (opcode == OP_STAR)
11417 {
11418 if (private_data_ptr == 0)
11419 allocate_stack(common, 2);
11420
11421 OP1(SLJIT_MOV, base, offset0, STR_END, 0);
11422 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11423
11424 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
11425 process_partial_match(common);
11426
11427 if (early_fail_ptr != 0)
11428 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0);
11429 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11430 break;
11431 }
11432 #ifdef SUPPORT_UNICODE
11433 else if (!common->utf)
11434 #else
11435 else
11436 #endif
11437 {
11438 if (private_data_ptr == 0)
11439 allocate_stack(common, 2);
11440
11441 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11442 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max));
11443
11444 if (common->mode == PCRE2_JIT_COMPLETE)
11445 {
11446 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
11447 CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
11448 }
11449 else
11450 {
11451 jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0);
11452 process_partial_match(common);
11453 JUMPHERE(jump);
11454 }
11455
11456 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11457
11458 if (early_fail_ptr != 0)
11459 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11460 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11461 break;
11462 }
11463 }
11464
11465 charpos_enabled = FALSE;
11466 charpos_char = 0;
11467 charpos_othercasebit = 0;
11468
11469 if ((type != OP_CHAR && type != OP_CHARI) && (*end == OP_CHAR || *end == OP_CHARI))
11470 {
11471 #ifdef SUPPORT_UNICODE
11472 charpos_enabled = !common->utf || !HAS_EXTRALEN(end[1]);
11473 #else
11474 charpos_enabled = TRUE;
11475 #endif
11476 if (charpos_enabled && *end == OP_CHARI && char_has_othercase(common, end + 1))
11477 {
11478 charpos_othercasebit = char_get_othercase_bit(common, end + 1);
11479 if (charpos_othercasebit == 0)
11480 charpos_enabled = FALSE;
11481 }
11482
11483 if (charpos_enabled)
11484 {
11485 charpos_char = end[1];
11486 /* Consume the OP_CHAR opcode. */
11487 end += 2;
11488 #if PCRE2_CODE_UNIT_WIDTH == 8
11489 SLJIT_ASSERT((charpos_othercasebit >> 8) == 0);
11490 #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
11491 SLJIT_ASSERT((charpos_othercasebit >> 9) == 0);
11492 if ((charpos_othercasebit & 0x100) != 0)
11493 charpos_othercasebit = (charpos_othercasebit & 0xff) << 8;
11494 #endif
11495 if (charpos_othercasebit != 0)
11496 charpos_char |= charpos_othercasebit;
11497
11498 BACKTRACK_AS(char_iterator_backtrack)->u.charpos.enabled = TRUE;
11499 BACKTRACK_AS(char_iterator_backtrack)->u.charpos.chr = charpos_char;
11500 BACKTRACK_AS(char_iterator_backtrack)->u.charpos.othercasebit = charpos_othercasebit;
11501 }
11502 }
11503
11504 if (charpos_enabled)
11505 {
11506 if (opcode == OP_UPTO)
11507 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max + 1);
11508
11509 /* Search the first instance of charpos_char. */
11510 jump = JUMP(SLJIT_JUMP);
11511 label = LABEL();
11512 if (opcode == OP_UPTO)
11513 {
11514 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11515 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_ZERO));
11516 }
11517 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
11518 if (early_fail_ptr != 0)
11519 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11520 JUMPHERE(jump);
11521
11522 detect_partial_match(common, &backtrack->topbacktracks);
11523 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
11524 if (charpos_othercasebit != 0)
11525 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
11526 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
11527
11528 if (private_data_ptr == 0)
11529 allocate_stack(common, 2);
11530 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11531 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11532
11533 if (opcode == OP_UPTO)
11534 {
11535 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11536 add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11537 }
11538
11539 /* Search the last instance of charpos_char. */
11540 label = LABEL();
11541 compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
11542 if (early_fail_ptr != 0)
11543 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11544 detect_partial_match(common, &no_match);
11545 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
11546 if (charpos_othercasebit != 0)
11547 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
11548
11549 if (opcode == OP_STAR)
11550 {
11551 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
11552 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11553 JUMPTO(SLJIT_JUMP, label);
11554 }
11555 else
11556 {
11557 jump = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char);
11558 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11559 JUMPHERE(jump);
11560 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11561 JUMPTO(SLJIT_NOT_ZERO, label);
11562 }
11563
11564 set_jumps(no_match, LABEL());
11565 OP2(SLJIT_ADD, STR_PTR, 0, base, offset0, SLJIT_IMM, IN_UCHARS(1));
11566 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11567 }
11568 else
11569 {
11570 if (private_data_ptr == 0)
11571 allocate_stack(common, 2);
11572
11573 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11574 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11575 use_tmp = (!HAS_VIRTUAL_REGISTERS && opcode == OP_STAR);
11576 SLJIT_ASSERT(!use_tmp || tmp_base == TMP3);
11577
11578 if (common->utf)
11579 OP1(SLJIT_MOV, use_tmp ? TMP3 : base, use_tmp ? 0 : offset0, STR_PTR, 0);
11580 #endif
11581 if (opcode == OP_UPTO)
11582 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
11583
11584 detect_partial_match(common, &no_match);
11585 label = LABEL();
11586 compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
11587 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11588 if (common->utf)
11589 OP1(SLJIT_MOV, use_tmp ? TMP3 : base, use_tmp ? 0 : offset0, STR_PTR, 0);
11590 #endif
11591
11592 if (opcode == OP_UPTO)
11593 {
11594 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11595 add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11596 }
11597
11598 detect_partial_match_to(common, label);
11599 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11600
11601 set_jumps(no_char1_match, LABEL());
11602 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11603 if (common->utf)
11604 {
11605 set_jumps(no_match, LABEL());
11606 if (use_tmp)
11607 {
11608 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
11609 OP1(SLJIT_MOV, base, offset0, TMP3, 0);
11610 }
11611 else
11612 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
11613 }
11614 else
11615 #endif
11616 {
11617 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11618 set_jumps(no_match, LABEL());
11619 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11620 }
11621
11622 if (early_fail_ptr != 0)
11623 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11624 }
11625
11626 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11627 break;
11628
11629 case OP_MINSTAR:
11630 if (private_data_ptr == 0)
11631 allocate_stack(common, 1);
11632 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11633 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11634 if (early_fail_ptr != 0)
11635 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11636 break;
11637
11638 case OP_MINUPTO:
11639 SLJIT_ASSERT(early_fail_ptr == 0);
11640 if (private_data_ptr == 0)
11641 allocate_stack(common, 2);
11642 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11643 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, max + 1);
11644 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11645 break;
11646
11647 case OP_QUERY:
11648 case OP_MINQUERY:
11649 SLJIT_ASSERT(early_fail_ptr == 0);
11650 if (private_data_ptr == 0)
11651 allocate_stack(common, 1);
11652 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11653 if (opcode == OP_QUERY)
11654 compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
11655 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11656 break;
11657
11658 case OP_EXACT:
11659 break;
11660
11661 case OP_POSSTAR:
11662 #if defined SUPPORT_UNICODE
11663 if (type == OP_ALLANY && !common->invalid_utf)
11664 #else
11665 if (type == OP_ALLANY)
11666 #endif
11667 {
11668 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
11669 process_partial_match(common);
11670 if (early_fail_ptr != 0)
11671 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0);
11672 break;
11673 }
11674
11675 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11676 if (common->utf)
11677 {
11678 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11679 detect_partial_match(common, &no_match);
11680 label = LABEL();
11681 compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
11682 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11683 detect_partial_match_to(common, label);
11684
11685 set_jumps(no_match, LABEL());
11686 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
11687 if (early_fail_ptr != 0)
11688 {
11689 if (!HAS_VIRTUAL_REGISTERS && tmp_base == TMP3)
11690 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, TMP3, 0);
11691 else
11692 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11693 }
11694 break;
11695 }
11696 #endif
11697
11698 detect_partial_match(common, &no_match);
11699 label = LABEL();
11700 compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
11701 detect_partial_match_to(common, label);
11702 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11703
11704 set_jumps(no_char1_match, LABEL());
11705 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11706 set_jumps(no_match, LABEL());
11707 if (early_fail_ptr != 0)
11708 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11709 break;
11710
11711 case OP_POSUPTO:
11712 SLJIT_ASSERT(early_fail_ptr == 0);
11713 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11714 if (common->utf)
11715 {
11716 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
11717 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
11718
11719 detect_partial_match(common, &no_match);
11720 label = LABEL();
11721 compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
11722 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
11723 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11724 add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11725 detect_partial_match_to(common, label);
11726
11727 set_jumps(no_match, LABEL());
11728 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
11729 break;
11730 }
11731 #endif
11732
11733 if (type == OP_ALLANY)
11734 {
11735 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max));
11736
11737 if (common->mode == PCRE2_JIT_COMPLETE)
11738 {
11739 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
11740 CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
11741 }
11742 else
11743 {
11744 jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0);
11745 process_partial_match(common);
11746 JUMPHERE(jump);
11747 }
11748 break;
11749 }
11750
11751 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
11752
11753 detect_partial_match(common, &no_match);
11754 label = LABEL();
11755 compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
11756 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11757 add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11758 detect_partial_match_to(common, label);
11759 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11760
11761 set_jumps(no_char1_match, LABEL());
11762 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11763 set_jumps(no_match, LABEL());
11764 break;
11765
11766 case OP_POSQUERY:
11767 SLJIT_ASSERT(early_fail_ptr == 0);
11768 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11769 compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
11770 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11771 set_jumps(no_match, LABEL());
11772 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
11773 break;
11774
11775 default:
11776 SLJIT_UNREACHABLE();
11777 break;
11778 }
11779
11780 count_match(common);
11781 return end;
11782 }
11783
compile_fail_accept_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)11784 static SLJIT_INLINE PCRE2_SPTR compile_fail_accept_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11785 {
11786 DEFINE_COMPILER;
11787 backtrack_common *backtrack;
11788
11789 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
11790
11791 if (*cc == OP_FAIL)
11792 {
11793 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
11794 return cc + 1;
11795 }
11796
11797 if (*cc == OP_ACCEPT && common->currententry == NULL && (common->re->overall_options & PCRE2_ENDANCHORED) != 0)
11798 add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
11799
11800 if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty)
11801 {
11802 /* No need to check notempty conditions. */
11803 if (common->accept_label == NULL)
11804 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
11805 else
11806 JUMPTO(SLJIT_JUMP, common->accept_label);
11807 return cc + 1;
11808 }
11809
11810 if (common->accept_label == NULL)
11811 add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)));
11812 else
11813 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label);
11814
11815 if (HAS_VIRTUAL_REGISTERS)
11816 {
11817 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
11818 OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
11819 }
11820 else
11821 OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options));
11822
11823 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
11824 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_NOT_ZERO));
11825 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
11826 if (common->accept_label == NULL)
11827 add_jump(compiler, &common->accept, JUMP(SLJIT_ZERO));
11828 else
11829 JUMPTO(SLJIT_ZERO, common->accept_label);
11830
11831 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
11832 if (common->accept_label == NULL)
11833 add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
11834 else
11835 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label);
11836 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
11837 return cc + 1;
11838 }
11839
compile_close_matchingpath(compiler_common * common,PCRE2_SPTR cc)11840 static SLJIT_INLINE PCRE2_SPTR compile_close_matchingpath(compiler_common *common, PCRE2_SPTR cc)
11841 {
11842 DEFINE_COMPILER;
11843 int offset = GET2(cc, 1);
11844 BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0;
11845
11846 /* Data will be discarded anyway... */
11847 if (common->currententry != NULL)
11848 return cc + 1 + IMM2_SIZE;
11849
11850 if (!optimized_cbracket)
11851 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR_PRIV(offset));
11852 offset <<= 1;
11853 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
11854 if (!optimized_cbracket)
11855 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
11856 return cc + 1 + IMM2_SIZE;
11857 }
11858
compile_control_verb_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)11859 static SLJIT_INLINE PCRE2_SPTR compile_control_verb_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11860 {
11861 DEFINE_COMPILER;
11862 backtrack_common *backtrack;
11863 PCRE2_UCHAR opcode = *cc;
11864 PCRE2_SPTR ccend = cc + 1;
11865
11866 if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG ||
11867 opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
11868 ccend += 2 + cc[1];
11869
11870 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
11871
11872 if (opcode == OP_SKIP)
11873 {
11874 allocate_stack(common, 1);
11875 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11876 return ccend;
11877 }
11878
11879 if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
11880 {
11881 if (HAS_VIRTUAL_REGISTERS)
11882 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
11883 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
11884 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
11885 OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
11886 }
11887
11888 return ccend;
11889 }
11890
11891 static PCRE2_UCHAR then_trap_opcode[1] = { OP_THEN_TRAP };
11892
compile_then_trap_matchingpath(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,backtrack_common * parent)11893 static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
11894 {
11895 DEFINE_COMPILER;
11896 backtrack_common *backtrack;
11897 BOOL needs_control_head;
11898 int size;
11899
11900 PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
11901 common->then_trap = BACKTRACK_AS(then_trap_backtrack);
11902 BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
11903 BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start);
11904 BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head);
11905
11906 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
11907 size = 3 + (size < 0 ? 0 : size);
11908
11909 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
11910 allocate_stack(common, size);
11911 if (size > 3)
11912 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw));
11913 else
11914 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
11915 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start);
11916 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap);
11917 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0);
11918
11919 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
11920 if (size >= 0)
11921 init_frame(common, cc, ccend, size - 1, 0);
11922 }
11923
compile_matchingpath(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,backtrack_common * parent)11924 static void compile_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
11925 {
11926 DEFINE_COMPILER;
11927 backtrack_common *backtrack;
11928 BOOL has_then_trap = FALSE;
11929 then_trap_backtrack *save_then_trap = NULL;
11930
11931 SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS));
11932
11933 if (common->has_then && common->then_offsets[cc - common->start] != 0)
11934 {
11935 SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0);
11936 has_then_trap = TRUE;
11937 save_then_trap = common->then_trap;
11938 /* Tail item on backtrack. */
11939 compile_then_trap_matchingpath(common, cc, ccend, parent);
11940 }
11941
11942 while (cc < ccend)
11943 {
11944 switch(*cc)
11945 {
11946 case OP_SOD:
11947 case OP_SOM:
11948 case OP_NOT_WORD_BOUNDARY:
11949 case OP_WORD_BOUNDARY:
11950 case OP_EODN:
11951 case OP_EOD:
11952 case OP_DOLL:
11953 case OP_DOLLM:
11954 case OP_CIRC:
11955 case OP_CIRCM:
11956 case OP_REVERSE:
11957 cc = compile_simple_assertion_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
11958 break;
11959
11960 case OP_NOT_DIGIT:
11961 case OP_DIGIT:
11962 case OP_NOT_WHITESPACE:
11963 case OP_WHITESPACE:
11964 case OP_NOT_WORDCHAR:
11965 case OP_WORDCHAR:
11966 case OP_ANY:
11967 case OP_ALLANY:
11968 case OP_ANYBYTE:
11969 case OP_NOTPROP:
11970 case OP_PROP:
11971 case OP_ANYNL:
11972 case OP_NOT_HSPACE:
11973 case OP_HSPACE:
11974 case OP_NOT_VSPACE:
11975 case OP_VSPACE:
11976 case OP_EXTUNI:
11977 case OP_NOT:
11978 case OP_NOTI:
11979 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
11980 break;
11981
11982 case OP_SET_SOM:
11983 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
11984 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
11985 allocate_stack(common, 1);
11986 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
11987 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
11988 cc++;
11989 break;
11990
11991 case OP_CHAR:
11992 case OP_CHARI:
11993 if (common->mode == PCRE2_JIT_COMPLETE)
11994 cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
11995 else
11996 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
11997 break;
11998
11999 case OP_STAR:
12000 case OP_MINSTAR:
12001 case OP_PLUS:
12002 case OP_MINPLUS:
12003 case OP_QUERY:
12004 case OP_MINQUERY:
12005 case OP_UPTO:
12006 case OP_MINUPTO:
12007 case OP_EXACT:
12008 case OP_POSSTAR:
12009 case OP_POSPLUS:
12010 case OP_POSQUERY:
12011 case OP_POSUPTO:
12012 case OP_STARI:
12013 case OP_MINSTARI:
12014 case OP_PLUSI:
12015 case OP_MINPLUSI:
12016 case OP_QUERYI:
12017 case OP_MINQUERYI:
12018 case OP_UPTOI:
12019 case OP_MINUPTOI:
12020 case OP_EXACTI:
12021 case OP_POSSTARI:
12022 case OP_POSPLUSI:
12023 case OP_POSQUERYI:
12024 case OP_POSUPTOI:
12025 case OP_NOTSTAR:
12026 case OP_NOTMINSTAR:
12027 case OP_NOTPLUS:
12028 case OP_NOTMINPLUS:
12029 case OP_NOTQUERY:
12030 case OP_NOTMINQUERY:
12031 case OP_NOTUPTO:
12032 case OP_NOTMINUPTO:
12033 case OP_NOTEXACT:
12034 case OP_NOTPOSSTAR:
12035 case OP_NOTPOSPLUS:
12036 case OP_NOTPOSQUERY:
12037 case OP_NOTPOSUPTO:
12038 case OP_NOTSTARI:
12039 case OP_NOTMINSTARI:
12040 case OP_NOTPLUSI:
12041 case OP_NOTMINPLUSI:
12042 case OP_NOTQUERYI:
12043 case OP_NOTMINQUERYI:
12044 case OP_NOTUPTOI:
12045 case OP_NOTMINUPTOI:
12046 case OP_NOTEXACTI:
12047 case OP_NOTPOSSTARI:
12048 case OP_NOTPOSPLUSI:
12049 case OP_NOTPOSQUERYI:
12050 case OP_NOTPOSUPTOI:
12051 case OP_TYPESTAR:
12052 case OP_TYPEMINSTAR:
12053 case OP_TYPEPLUS:
12054 case OP_TYPEMINPLUS:
12055 case OP_TYPEQUERY:
12056 case OP_TYPEMINQUERY:
12057 case OP_TYPEUPTO:
12058 case OP_TYPEMINUPTO:
12059 case OP_TYPEEXACT:
12060 case OP_TYPEPOSSTAR:
12061 case OP_TYPEPOSPLUS:
12062 case OP_TYPEPOSQUERY:
12063 case OP_TYPEPOSUPTO:
12064 cc = compile_iterator_matchingpath(common, cc, parent);
12065 break;
12066
12067 case OP_CLASS:
12068 case OP_NCLASS:
12069 if (cc[1 + (32 / sizeof(PCRE2_UCHAR))] >= OP_CRSTAR && cc[1 + (32 / sizeof(PCRE2_UCHAR))] <= OP_CRPOSRANGE)
12070 cc = compile_iterator_matchingpath(common, cc, parent);
12071 else
12072 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
12073 break;
12074
12075 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
12076 case OP_XCLASS:
12077 if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE)
12078 cc = compile_iterator_matchingpath(common, cc, parent);
12079 else
12080 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
12081 break;
12082 #endif
12083
12084 case OP_REF:
12085 case OP_REFI:
12086 if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE)
12087 cc = compile_ref_iterator_matchingpath(common, cc, parent);
12088 else
12089 {
12090 compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
12091 cc += 1 + IMM2_SIZE;
12092 }
12093 break;
12094
12095 case OP_DNREF:
12096 case OP_DNREFI:
12097 if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE)
12098 cc = compile_ref_iterator_matchingpath(common, cc, parent);
12099 else
12100 {
12101 compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
12102 compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
12103 cc += 1 + 2 * IMM2_SIZE;
12104 }
12105 break;
12106
12107 case OP_RECURSE:
12108 cc = compile_recurse_matchingpath(common, cc, parent);
12109 break;
12110
12111 case OP_CALLOUT:
12112 case OP_CALLOUT_STR:
12113 cc = compile_callout_matchingpath(common, cc, parent);
12114 break;
12115
12116 case OP_ASSERT:
12117 case OP_ASSERT_NOT:
12118 case OP_ASSERTBACK:
12119 case OP_ASSERTBACK_NOT:
12120 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
12121 cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
12122 break;
12123
12124 case OP_BRAMINZERO:
12125 PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
12126 cc = bracketend(cc + 1);
12127 if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
12128 {
12129 allocate_stack(common, 1);
12130 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
12131 }
12132 else
12133 {
12134 allocate_stack(common, 2);
12135 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12136 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
12137 }
12138 BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
12139 count_match(common);
12140 break;
12141
12142 case OP_ASSERT_NA:
12143 case OP_ASSERTBACK_NA:
12144 case OP_ONCE:
12145 case OP_SCRIPT_RUN:
12146 case OP_BRA:
12147 case OP_CBRA:
12148 case OP_COND:
12149 case OP_SBRA:
12150 case OP_SCBRA:
12151 case OP_SCOND:
12152 cc = compile_bracket_matchingpath(common, cc, parent);
12153 break;
12154
12155 case OP_BRAZERO:
12156 if (cc[1] > OP_ASSERTBACK_NOT)
12157 cc = compile_bracket_matchingpath(common, cc, parent);
12158 else
12159 {
12160 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
12161 cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
12162 }
12163 break;
12164
12165 case OP_BRAPOS:
12166 case OP_CBRAPOS:
12167 case OP_SBRAPOS:
12168 case OP_SCBRAPOS:
12169 case OP_BRAPOSZERO:
12170 cc = compile_bracketpos_matchingpath(common, cc, parent);
12171 break;
12172
12173 case OP_MARK:
12174 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
12175 SLJIT_ASSERT(common->mark_ptr != 0);
12176 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
12177 allocate_stack(common, common->has_skip_arg ? 5 : 1);
12178 if (HAS_VIRTUAL_REGISTERS)
12179 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
12180 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0);
12181 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
12182 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
12183 OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
12184 if (common->has_skip_arg)
12185 {
12186 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
12187 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
12188 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark);
12189 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2));
12190 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
12191 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
12192 }
12193 cc += 1 + 2 + cc[1];
12194 break;
12195
12196 case OP_PRUNE:
12197 case OP_PRUNE_ARG:
12198 case OP_SKIP:
12199 case OP_SKIP_ARG:
12200 case OP_THEN:
12201 case OP_THEN_ARG:
12202 case OP_COMMIT:
12203 case OP_COMMIT_ARG:
12204 cc = compile_control_verb_matchingpath(common, cc, parent);
12205 break;
12206
12207 case OP_FAIL:
12208 case OP_ACCEPT:
12209 case OP_ASSERT_ACCEPT:
12210 cc = compile_fail_accept_matchingpath(common, cc, parent);
12211 break;
12212
12213 case OP_CLOSE:
12214 cc = compile_close_matchingpath(common, cc);
12215 break;
12216
12217 case OP_SKIPZERO:
12218 cc = bracketend(cc + 1);
12219 break;
12220
12221 default:
12222 SLJIT_UNREACHABLE();
12223 return;
12224 }
12225 if (cc == NULL)
12226 return;
12227 }
12228
12229 if (has_then_trap)
12230 {
12231 /* Head item on backtrack. */
12232 PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
12233 BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
12234 BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap;
12235 common->then_trap = save_then_trap;
12236 }
12237 SLJIT_ASSERT(cc == ccend);
12238 }
12239
12240 #undef PUSH_BACKTRACK
12241 #undef PUSH_BACKTRACK_NOVALUE
12242 #undef BACKTRACK_AS
12243
12244 #define COMPILE_BACKTRACKINGPATH(current) \
12245 do \
12246 { \
12247 compile_backtrackingpath(common, (current)); \
12248 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
12249 return; \
12250 } \
12251 while (0)
12252
12253 #define CURRENT_AS(type) ((type *)current)
12254
compile_iterator_backtrackingpath(compiler_common * common,struct backtrack_common * current)12255 static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12256 {
12257 DEFINE_COMPILER;
12258 PCRE2_SPTR cc = current->cc;
12259 PCRE2_UCHAR opcode;
12260 PCRE2_UCHAR type;
12261 sljit_u32 max = 0, exact;
12262 struct sljit_label *label = NULL;
12263 struct sljit_jump *jump = NULL;
12264 jump_list *jumplist = NULL;
12265 PCRE2_SPTR end;
12266 int private_data_ptr = PRIVATE_DATA(cc);
12267 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
12268 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
12269 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
12270
12271 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
12272
12273 switch(opcode)
12274 {
12275 case OP_STAR:
12276 case OP_UPTO:
12277 if (type == OP_ANYNL || type == OP_EXTUNI)
12278 {
12279 SLJIT_ASSERT(private_data_ptr == 0);
12280 set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
12281 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12282 free_stack(common, 1);
12283 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12284 }
12285 else
12286 {
12287 if (CURRENT_AS(char_iterator_backtrack)->u.charpos.enabled)
12288 {
12289 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12290 OP1(SLJIT_MOV, TMP2, 0, base, offset1);
12291 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12292
12293 jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
12294 label = LABEL();
12295 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
12296 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12297 if (CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit != 0)
12298 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit);
12299 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.chr, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12300 move_back(common, NULL, TRUE);
12301 CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP2, 0, label);
12302 }
12303 else
12304 {
12305 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12306 jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1);
12307 move_back(common, NULL, TRUE);
12308 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12309 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12310 }
12311 JUMPHERE(jump);
12312 if (private_data_ptr == 0)
12313 free_stack(common, 2);
12314 }
12315 break;
12316
12317 case OP_MINSTAR:
12318 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12319 compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12320 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12321 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12322 set_jumps(jumplist, LABEL());
12323 if (private_data_ptr == 0)
12324 free_stack(common, 1);
12325 break;
12326
12327 case OP_MINUPTO:
12328 OP1(SLJIT_MOV, TMP1, 0, base, offset1);
12329 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12330 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
12331 add_jump(compiler, &jumplist, JUMP(SLJIT_ZERO));
12332
12333 OP1(SLJIT_MOV, base, offset1, TMP1, 0);
12334 compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12335 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12336 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12337
12338 set_jumps(jumplist, LABEL());
12339 if (private_data_ptr == 0)
12340 free_stack(common, 2);
12341 break;
12342
12343 case OP_QUERY:
12344 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12345 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12346 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12347 jump = JUMP(SLJIT_JUMP);
12348 set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
12349 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12350 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12351 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12352 JUMPHERE(jump);
12353 if (private_data_ptr == 0)
12354 free_stack(common, 1);
12355 break;
12356
12357 case OP_MINQUERY:
12358 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12359 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12360 jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
12361 compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12362 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12363 set_jumps(jumplist, LABEL());
12364 JUMPHERE(jump);
12365 if (private_data_ptr == 0)
12366 free_stack(common, 1);
12367 break;
12368
12369 case OP_EXACT:
12370 case OP_POSSTAR:
12371 case OP_POSQUERY:
12372 case OP_POSUPTO:
12373 break;
12374
12375 default:
12376 SLJIT_UNREACHABLE();
12377 break;
12378 }
12379
12380 set_jumps(current->topbacktracks, LABEL());
12381 }
12382
compile_ref_iterator_backtrackingpath(compiler_common * common,struct backtrack_common * current)12383 static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12384 {
12385 DEFINE_COMPILER;
12386 PCRE2_SPTR cc = current->cc;
12387 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
12388 PCRE2_UCHAR type;
12389
12390 type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE];
12391
12392 if ((type & 0x1) == 0)
12393 {
12394 /* Maximize case. */
12395 set_jumps(current->topbacktracks, LABEL());
12396 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12397 free_stack(common, 1);
12398 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
12399 return;
12400 }
12401
12402 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12403 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
12404 set_jumps(current->topbacktracks, LABEL());
12405 free_stack(common, ref ? 2 : 3);
12406 }
12407
compile_recurse_backtrackingpath(compiler_common * common,struct backtrack_common * current)12408 static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12409 {
12410 DEFINE_COMPILER;
12411 recurse_entry *entry;
12412
12413 if (!CURRENT_AS(recurse_backtrack)->inlined_pattern)
12414 {
12415 entry = CURRENT_AS(recurse_backtrack)->entry;
12416 if (entry->backtrack_label == NULL)
12417 add_jump(compiler, &entry->backtrack_calls, JUMP(SLJIT_FAST_CALL));
12418 else
12419 JUMPTO(SLJIT_FAST_CALL, entry->backtrack_label);
12420 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(recurse_backtrack)->matchingpath);
12421 }
12422 else
12423 compile_backtrackingpath(common, current->top);
12424
12425 set_jumps(current->topbacktracks, LABEL());
12426 }
12427
compile_assert_backtrackingpath(compiler_common * common,struct backtrack_common * current)12428 static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12429 {
12430 DEFINE_COMPILER;
12431 PCRE2_SPTR cc = current->cc;
12432 PCRE2_UCHAR bra = OP_BRA;
12433 struct sljit_jump *brajump = NULL;
12434
12435 SLJIT_ASSERT(*cc != OP_BRAMINZERO);
12436 if (*cc == OP_BRAZERO)
12437 {
12438 bra = *cc;
12439 cc++;
12440 }
12441
12442 if (bra == OP_BRAZERO)
12443 {
12444 SLJIT_ASSERT(current->topbacktracks == NULL);
12445 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12446 }
12447
12448 if (CURRENT_AS(assert_backtrack)->framesize < 0)
12449 {
12450 set_jumps(current->topbacktracks, LABEL());
12451
12452 if (bra == OP_BRAZERO)
12453 {
12454 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12455 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
12456 free_stack(common, 1);
12457 }
12458 return;
12459 }
12460
12461 if (bra == OP_BRAZERO)
12462 {
12463 if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
12464 {
12465 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12466 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
12467 free_stack(common, 1);
12468 return;
12469 }
12470 free_stack(common, 1);
12471 brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
12472 }
12473
12474 if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
12475 {
12476 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr);
12477 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12478 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
12479 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(assert_backtrack)->framesize - 1) * sizeof(sljit_sw));
12480 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, TMP1, 0);
12481
12482 set_jumps(current->topbacktracks, LABEL());
12483 }
12484 else
12485 set_jumps(current->topbacktracks, LABEL());
12486
12487 if (bra == OP_BRAZERO)
12488 {
12489 /* We know there is enough place on the stack. */
12490 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
12491 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12492 JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath);
12493 JUMPHERE(brajump);
12494 }
12495 }
12496
compile_bracket_backtrackingpath(compiler_common * common,struct backtrack_common * current)12497 static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12498 {
12499 DEFINE_COMPILER;
12500 int opcode, stacksize, alt_count, alt_max;
12501 int offset = 0;
12502 int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr;
12503 int repeat_ptr = 0, repeat_type = 0, repeat_count = 0;
12504 PCRE2_SPTR cc = current->cc;
12505 PCRE2_SPTR ccbegin;
12506 PCRE2_SPTR ccprev;
12507 PCRE2_UCHAR bra = OP_BRA;
12508 PCRE2_UCHAR ket;
12509 assert_backtrack *assert;
12510 BOOL has_alternatives;
12511 BOOL needs_control_head = FALSE;
12512 struct sljit_jump *brazero = NULL;
12513 struct sljit_jump *next_alt = NULL;
12514 struct sljit_jump *once = NULL;
12515 struct sljit_jump *cond = NULL;
12516 struct sljit_label *rmin_label = NULL;
12517 struct sljit_label *exact_label = NULL;
12518 struct sljit_put_label *put_label = NULL;
12519
12520 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
12521 {
12522 bra = *cc;
12523 cc++;
12524 }
12525
12526 opcode = *cc;
12527 ccbegin = bracketend(cc) - 1 - LINK_SIZE;
12528 ket = *ccbegin;
12529 if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0)
12530 {
12531 repeat_ptr = PRIVATE_DATA(ccbegin);
12532 repeat_type = PRIVATE_DATA(ccbegin + 2);
12533 repeat_count = PRIVATE_DATA(ccbegin + 3);
12534 SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0);
12535 if (repeat_type == OP_UPTO)
12536 ket = OP_KETRMAX;
12537 if (repeat_type == OP_MINUPTO)
12538 ket = OP_KETRMIN;
12539 }
12540 ccbegin = cc;
12541 cc += GET(cc, 1);
12542 has_alternatives = *cc == OP_ALT;
12543 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
12544 has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL;
12545 if (opcode == OP_CBRA || opcode == OP_SCBRA)
12546 offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
12547 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
12548 opcode = OP_SCOND;
12549
12550 alt_max = has_alternatives ? no_alternatives(ccbegin) : 0;
12551
12552 /* Decoding the needs_control_head in framesize. */
12553 if (opcode == OP_ONCE)
12554 {
12555 needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0;
12556 CURRENT_AS(bracket_backtrack)->u.framesize >>= 1;
12557 }
12558
12559 if (ket != OP_KET && repeat_type != 0)
12560 {
12561 /* TMP1 is used in OP_KETRMIN below. */
12562 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12563 free_stack(common, 1);
12564 if (repeat_type == OP_UPTO)
12565 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0, SLJIT_IMM, 1);
12566 else
12567 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
12568 }
12569
12570 if (ket == OP_KETRMAX)
12571 {
12572 if (bra == OP_BRAZERO)
12573 {
12574 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12575 free_stack(common, 1);
12576 brazero = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12577 }
12578 }
12579 else if (ket == OP_KETRMIN)
12580 {
12581 if (bra != OP_BRAMINZERO)
12582 {
12583 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12584 if (repeat_type != 0)
12585 {
12586 /* TMP1 was set a few lines above. */
12587 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12588 /* Drop STR_PTR for non-greedy plus quantifier. */
12589 if (opcode != OP_ONCE)
12590 free_stack(common, 1);
12591 }
12592 else if (opcode >= OP_SBRA || opcode == OP_ONCE)
12593 {
12594 /* Checking zero-length iteration. */
12595 if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0)
12596 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12597 else
12598 {
12599 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12600 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 2), CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12601 }
12602 /* Drop STR_PTR for non-greedy plus quantifier. */
12603 if (opcode != OP_ONCE)
12604 free_stack(common, 1);
12605 }
12606 else
12607 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12608 }
12609 rmin_label = LABEL();
12610 if (repeat_type != 0)
12611 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
12612 }
12613 else if (bra == OP_BRAZERO)
12614 {
12615 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12616 free_stack(common, 1);
12617 brazero = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12618 }
12619 else if (repeat_type == OP_EXACT)
12620 {
12621 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
12622 exact_label = LABEL();
12623 }
12624
12625 if (offset != 0)
12626 {
12627 if (common->capture_last_ptr != 0)
12628 {
12629 SLJIT_ASSERT(common->optimized_cbracket[offset >> 1] == 0);
12630 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12631 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12632 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
12633 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
12634 free_stack(common, 3);
12635 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP2, 0);
12636 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
12637 }
12638 else if (common->optimized_cbracket[offset >> 1] == 0)
12639 {
12640 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12641 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12642 free_stack(common, 2);
12643 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12644 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
12645 }
12646 }
12647
12648 if (SLJIT_UNLIKELY(opcode == OP_ONCE))
12649 {
12650 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
12651 {
12652 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12653 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12654 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw));
12655 }
12656 once = JUMP(SLJIT_JUMP);
12657 }
12658 else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
12659 {
12660 if (has_alternatives)
12661 {
12662 /* Always exactly one alternative. */
12663 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12664 free_stack(common, 1);
12665
12666 alt_max = 2;
12667 next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12668 }
12669 }
12670 else if (has_alternatives)
12671 {
12672 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12673 free_stack(common, 1);
12674
12675 if (alt_max > 3)
12676 {
12677 sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0);
12678
12679 SLJIT_ASSERT(CURRENT_AS(bracket_backtrack)->u.matching_put_label);
12680 sljit_set_put_label(CURRENT_AS(bracket_backtrack)->u.matching_put_label, LABEL());
12681 sljit_emit_op0(compiler, SLJIT_ENDBR);
12682 }
12683 else
12684 next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12685 }
12686
12687 COMPILE_BACKTRACKINGPATH(current->top);
12688 if (current->topbacktracks)
12689 set_jumps(current->topbacktracks, LABEL());
12690
12691 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
12692 {
12693 /* Conditional block always has at most one alternative. */
12694 if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
12695 {
12696 SLJIT_ASSERT(has_alternatives);
12697 assert = CURRENT_AS(bracket_backtrack)->u.assert;
12698 if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
12699 {
12700 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
12701 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12702 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
12703 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
12704 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
12705 }
12706 cond = JUMP(SLJIT_JUMP);
12707 set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL());
12708 }
12709 else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL)
12710 {
12711 SLJIT_ASSERT(has_alternatives);
12712 cond = JUMP(SLJIT_JUMP);
12713 set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL());
12714 }
12715 else
12716 SLJIT_ASSERT(!has_alternatives);
12717 }
12718
12719 if (has_alternatives)
12720 {
12721 alt_count = 1;
12722 do
12723 {
12724 current->top = NULL;
12725 current->topbacktracks = NULL;
12726 current->nextbacktracks = NULL;
12727 /* Conditional blocks always have an additional alternative, even if it is empty. */
12728 if (*cc == OP_ALT)
12729 {
12730 ccprev = cc + 1 + LINK_SIZE;
12731 cc += GET(cc, 1);
12732 if (opcode != OP_COND && opcode != OP_SCOND)
12733 {
12734 if (opcode != OP_ONCE)
12735 {
12736 if (private_data_ptr != 0)
12737 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12738 else
12739 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12740 }
12741 else
12742 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0));
12743 }
12744 compile_matchingpath(common, ccprev, cc, current);
12745 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
12746 return;
12747
12748 if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA)
12749 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12750
12751 if (opcode == OP_SCRIPT_RUN)
12752 match_script_run_common(common, private_data_ptr, current);
12753 }
12754
12755 /* Instructions after the current alternative is successfully matched. */
12756 /* There is a similar code in compile_bracket_matchingpath. */
12757 if (opcode == OP_ONCE)
12758 match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
12759
12760 stacksize = 0;
12761 if (repeat_type == OP_MINUPTO)
12762 {
12763 /* We need to preserve the counter. TMP2 will be used below. */
12764 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
12765 stacksize++;
12766 }
12767 if (ket != OP_KET || bra != OP_BRA)
12768 stacksize++;
12769 if (offset != 0)
12770 {
12771 if (common->capture_last_ptr != 0)
12772 stacksize++;
12773 if (common->optimized_cbracket[offset >> 1] == 0)
12774 stacksize += 2;
12775 }
12776 if (opcode != OP_ONCE)
12777 stacksize++;
12778
12779 if (stacksize > 0)
12780 allocate_stack(common, stacksize);
12781
12782 stacksize = 0;
12783 if (repeat_type == OP_MINUPTO)
12784 {
12785 /* TMP2 was set above. */
12786 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
12787 stacksize++;
12788 }
12789
12790 if (ket != OP_KET || bra != OP_BRA)
12791 {
12792 if (ket != OP_KET)
12793 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
12794 else
12795 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
12796 stacksize++;
12797 }
12798
12799 if (offset != 0)
12800 stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
12801
12802 if (opcode != OP_ONCE)
12803 {
12804 if (alt_max <= 3)
12805 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count);
12806 else
12807 put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize));
12808 }
12809
12810 if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0)
12811 {
12812 /* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */
12813 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
12814 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
12815 }
12816
12817 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath);
12818
12819 if (opcode != OP_ONCE)
12820 {
12821 if (alt_max <= 3)
12822 {
12823 JUMPHERE(next_alt);
12824 alt_count++;
12825 if (alt_count < alt_max)
12826 {
12827 SLJIT_ASSERT(alt_count == 2 && alt_max == 3);
12828 next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1);
12829 }
12830 }
12831 else
12832 {
12833 sljit_set_put_label(put_label, LABEL());
12834 sljit_emit_op0(compiler, SLJIT_ENDBR);
12835 }
12836 }
12837
12838 COMPILE_BACKTRACKINGPATH(current->top);
12839 if (current->topbacktracks)
12840 set_jumps(current->topbacktracks, LABEL());
12841 SLJIT_ASSERT(!current->nextbacktracks);
12842 }
12843 while (*cc == OP_ALT);
12844
12845 if (cond != NULL)
12846 {
12847 SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
12848 assert = CURRENT_AS(bracket_backtrack)->u.assert;
12849 if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0)
12850 {
12851 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
12852 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12853 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
12854 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
12855 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
12856 }
12857 JUMPHERE(cond);
12858 }
12859
12860 /* Free the STR_PTR. */
12861 if (private_data_ptr == 0)
12862 free_stack(common, 1);
12863 }
12864
12865 if (offset != 0)
12866 {
12867 /* Using both tmp register is better for instruction scheduling. */
12868 if (common->optimized_cbracket[offset >> 1] != 0)
12869 {
12870 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12871 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12872 free_stack(common, 2);
12873 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12874 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
12875 }
12876 else
12877 {
12878 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12879 free_stack(common, 1);
12880 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
12881 }
12882 }
12883 else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
12884 {
12885 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0));
12886 free_stack(common, 1);
12887 }
12888 else if (opcode == OP_ONCE)
12889 {
12890 cc = ccbegin + GET(ccbegin, 1);
12891 stacksize = needs_control_head ? 1 : 0;
12892
12893 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
12894 {
12895 /* Reset head and drop saved frame. */
12896 stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1);
12897 }
12898 else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
12899 {
12900 /* The STR_PTR must be released. */
12901 stacksize++;
12902 }
12903
12904 if (stacksize > 0)
12905 free_stack(common, stacksize);
12906
12907 JUMPHERE(once);
12908 /* Restore previous private_data_ptr */
12909 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
12910 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 1));
12911 else if (ket == OP_KETRMIN)
12912 {
12913 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12914 /* See the comment below. */
12915 free_stack(common, 2);
12916 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
12917 }
12918 }
12919
12920 if (repeat_type == OP_EXACT)
12921 {
12922 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
12923 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
12924 CMPTO(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label);
12925 }
12926 else if (ket == OP_KETRMAX)
12927 {
12928 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12929 if (bra != OP_BRAZERO)
12930 free_stack(common, 1);
12931
12932 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12933 if (bra == OP_BRAZERO)
12934 {
12935 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12936 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
12937 JUMPHERE(brazero);
12938 free_stack(common, 1);
12939 }
12940 }
12941 else if (ket == OP_KETRMIN)
12942 {
12943 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12944
12945 /* OP_ONCE removes everything in case of a backtrack, so we don't
12946 need to explicitly release the STR_PTR. The extra release would
12947 affect badly the free_stack(2) above. */
12948 if (opcode != OP_ONCE)
12949 free_stack(common, 1);
12950 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label);
12951 if (opcode == OP_ONCE)
12952 free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
12953 else if (bra == OP_BRAMINZERO)
12954 free_stack(common, 1);
12955 }
12956 else if (bra == OP_BRAZERO)
12957 {
12958 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12959 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
12960 JUMPHERE(brazero);
12961 }
12962 }
12963
compile_bracketpos_backtrackingpath(compiler_common * common,struct backtrack_common * current)12964 static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12965 {
12966 DEFINE_COMPILER;
12967 int offset;
12968 struct sljit_jump *jump;
12969
12970 if (CURRENT_AS(bracketpos_backtrack)->framesize < 0)
12971 {
12972 if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS)
12973 {
12974 offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1;
12975 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12976 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12977 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12978 if (common->capture_last_ptr != 0)
12979 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
12980 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
12981 if (common->capture_last_ptr != 0)
12982 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
12983 }
12984 set_jumps(current->topbacktracks, LABEL());
12985 free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
12986 return;
12987 }
12988
12989 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr);
12990 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12991 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracketpos_backtrack)->framesize - 1) * sizeof(sljit_sw));
12992
12993 if (current->topbacktracks)
12994 {
12995 jump = JUMP(SLJIT_JUMP);
12996 set_jumps(current->topbacktracks, LABEL());
12997 /* Drop the stack frame. */
12998 free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
12999 JUMPHERE(jump);
13000 }
13001 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracketpos_backtrack)->framesize - 1));
13002 }
13003
compile_braminzero_backtrackingpath(compiler_common * common,struct backtrack_common * current)13004 static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13005 {
13006 assert_backtrack backtrack;
13007
13008 current->top = NULL;
13009 current->topbacktracks = NULL;
13010 current->nextbacktracks = NULL;
13011 if (current->cc[1] > OP_ASSERTBACK_NOT)
13012 {
13013 /* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */
13014 compile_bracket_matchingpath(common, current->cc, current);
13015 compile_bracket_backtrackingpath(common, current->top);
13016 }
13017 else
13018 {
13019 memset(&backtrack, 0, sizeof(backtrack));
13020 backtrack.common.cc = current->cc;
13021 backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath;
13022 /* Manual call of compile_assert_matchingpath. */
13023 compile_assert_matchingpath(common, current->cc, &backtrack, FALSE);
13024 }
13025 SLJIT_ASSERT(!current->nextbacktracks && !current->topbacktracks);
13026 }
13027
compile_control_verb_backtrackingpath(compiler_common * common,struct backtrack_common * current)13028 static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13029 {
13030 DEFINE_COMPILER;
13031 PCRE2_UCHAR opcode = *current->cc;
13032 struct sljit_label *loop;
13033 struct sljit_jump *jump;
13034
13035 if (opcode == OP_THEN || opcode == OP_THEN_ARG)
13036 {
13037 if (common->then_trap != NULL)
13038 {
13039 SLJIT_ASSERT(common->control_head_ptr != 0);
13040
13041 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
13042 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap);
13043 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start);
13044 jump = JUMP(SLJIT_JUMP);
13045
13046 loop = LABEL();
13047 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13048 JUMPHERE(jump);
13049 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0, loop);
13050 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0, loop);
13051 add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP));
13052 return;
13053 }
13054 else if (!common->local_quit_available && common->in_positive_assertion)
13055 {
13056 add_jump(compiler, &common->positive_assertion_quit, JUMP(SLJIT_JUMP));
13057 return;
13058 }
13059 }
13060
13061 if (common->local_quit_available)
13062 {
13063 /* Abort match with a fail. */
13064 if (common->quit_label == NULL)
13065 add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
13066 else
13067 JUMPTO(SLJIT_JUMP, common->quit_label);
13068 return;
13069 }
13070
13071 if (opcode == OP_SKIP_ARG)
13072 {
13073 SLJIT_ASSERT(common->control_head_ptr != 0 && TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
13074 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
13075 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2));
13076 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_search_mark));
13077
13078 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_R0, 0);
13079 add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 0));
13080 return;
13081 }
13082
13083 if (opcode == OP_SKIP)
13084 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13085 else
13086 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0);
13087 add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));
13088 }
13089
compile_then_trap_backtrackingpath(compiler_common * common,struct backtrack_common * current)13090 static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13091 {
13092 DEFINE_COMPILER;
13093 struct sljit_jump *jump;
13094 int size;
13095
13096 if (CURRENT_AS(then_trap_backtrack)->then_trap)
13097 {
13098 common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap;
13099 return;
13100 }
13101
13102 size = CURRENT_AS(then_trap_backtrack)->framesize;
13103 size = 3 + (size < 0 ? 0 : size);
13104
13105 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3));
13106 free_stack(common, size);
13107 jump = JUMP(SLJIT_JUMP);
13108
13109 set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL());
13110 /* STACK_TOP is set by THEN. */
13111 if (CURRENT_AS(then_trap_backtrack)->framesize >= 0)
13112 {
13113 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13114 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(then_trap_backtrack)->framesize - 1) * sizeof(sljit_sw));
13115 }
13116 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13117 free_stack(common, 3);
13118
13119 JUMPHERE(jump);
13120 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
13121 }
13122
compile_backtrackingpath(compiler_common * common,struct backtrack_common * current)13123 static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13124 {
13125 DEFINE_COMPILER;
13126 then_trap_backtrack *save_then_trap = common->then_trap;
13127
13128 while (current)
13129 {
13130 if (current->nextbacktracks != NULL)
13131 set_jumps(current->nextbacktracks, LABEL());
13132 switch(*current->cc)
13133 {
13134 case OP_SET_SOM:
13135 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13136 free_stack(common, 1);
13137 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP1, 0);
13138 break;
13139
13140 case OP_STAR:
13141 case OP_MINSTAR:
13142 case OP_PLUS:
13143 case OP_MINPLUS:
13144 case OP_QUERY:
13145 case OP_MINQUERY:
13146 case OP_UPTO:
13147 case OP_MINUPTO:
13148 case OP_EXACT:
13149 case OP_POSSTAR:
13150 case OP_POSPLUS:
13151 case OP_POSQUERY:
13152 case OP_POSUPTO:
13153 case OP_STARI:
13154 case OP_MINSTARI:
13155 case OP_PLUSI:
13156 case OP_MINPLUSI:
13157 case OP_QUERYI:
13158 case OP_MINQUERYI:
13159 case OP_UPTOI:
13160 case OP_MINUPTOI:
13161 case OP_EXACTI:
13162 case OP_POSSTARI:
13163 case OP_POSPLUSI:
13164 case OP_POSQUERYI:
13165 case OP_POSUPTOI:
13166 case OP_NOTSTAR:
13167 case OP_NOTMINSTAR:
13168 case OP_NOTPLUS:
13169 case OP_NOTMINPLUS:
13170 case OP_NOTQUERY:
13171 case OP_NOTMINQUERY:
13172 case OP_NOTUPTO:
13173 case OP_NOTMINUPTO:
13174 case OP_NOTEXACT:
13175 case OP_NOTPOSSTAR:
13176 case OP_NOTPOSPLUS:
13177 case OP_NOTPOSQUERY:
13178 case OP_NOTPOSUPTO:
13179 case OP_NOTSTARI:
13180 case OP_NOTMINSTARI:
13181 case OP_NOTPLUSI:
13182 case OP_NOTMINPLUSI:
13183 case OP_NOTQUERYI:
13184 case OP_NOTMINQUERYI:
13185 case OP_NOTUPTOI:
13186 case OP_NOTMINUPTOI:
13187 case OP_NOTEXACTI:
13188 case OP_NOTPOSSTARI:
13189 case OP_NOTPOSPLUSI:
13190 case OP_NOTPOSQUERYI:
13191 case OP_NOTPOSUPTOI:
13192 case OP_TYPESTAR:
13193 case OP_TYPEMINSTAR:
13194 case OP_TYPEPLUS:
13195 case OP_TYPEMINPLUS:
13196 case OP_TYPEQUERY:
13197 case OP_TYPEMINQUERY:
13198 case OP_TYPEUPTO:
13199 case OP_TYPEMINUPTO:
13200 case OP_TYPEEXACT:
13201 case OP_TYPEPOSSTAR:
13202 case OP_TYPEPOSPLUS:
13203 case OP_TYPEPOSQUERY:
13204 case OP_TYPEPOSUPTO:
13205 case OP_CLASS:
13206 case OP_NCLASS:
13207 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
13208 case OP_XCLASS:
13209 #endif
13210 compile_iterator_backtrackingpath(common, current);
13211 break;
13212
13213 case OP_REF:
13214 case OP_REFI:
13215 case OP_DNREF:
13216 case OP_DNREFI:
13217 compile_ref_iterator_backtrackingpath(common, current);
13218 break;
13219
13220 case OP_RECURSE:
13221 compile_recurse_backtrackingpath(common, current);
13222 break;
13223
13224 case OP_ASSERT:
13225 case OP_ASSERT_NOT:
13226 case OP_ASSERTBACK:
13227 case OP_ASSERTBACK_NOT:
13228 compile_assert_backtrackingpath(common, current);
13229 break;
13230
13231 case OP_ASSERT_NA:
13232 case OP_ASSERTBACK_NA:
13233 case OP_ONCE:
13234 case OP_SCRIPT_RUN:
13235 case OP_BRA:
13236 case OP_CBRA:
13237 case OP_COND:
13238 case OP_SBRA:
13239 case OP_SCBRA:
13240 case OP_SCOND:
13241 compile_bracket_backtrackingpath(common, current);
13242 break;
13243
13244 case OP_BRAZERO:
13245 if (current->cc[1] > OP_ASSERTBACK_NOT)
13246 compile_bracket_backtrackingpath(common, current);
13247 else
13248 compile_assert_backtrackingpath(common, current);
13249 break;
13250
13251 case OP_BRAPOS:
13252 case OP_CBRAPOS:
13253 case OP_SBRAPOS:
13254 case OP_SCBRAPOS:
13255 case OP_BRAPOSZERO:
13256 compile_bracketpos_backtrackingpath(common, current);
13257 break;
13258
13259 case OP_BRAMINZERO:
13260 compile_braminzero_backtrackingpath(common, current);
13261 break;
13262
13263 case OP_MARK:
13264 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0));
13265 if (common->has_skip_arg)
13266 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13267 free_stack(common, common->has_skip_arg ? 5 : 1);
13268 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
13269 if (common->has_skip_arg)
13270 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
13271 break;
13272
13273 case OP_THEN:
13274 case OP_THEN_ARG:
13275 case OP_PRUNE:
13276 case OP_PRUNE_ARG:
13277 case OP_SKIP:
13278 case OP_SKIP_ARG:
13279 compile_control_verb_backtrackingpath(common, current);
13280 break;
13281
13282 case OP_COMMIT:
13283 case OP_COMMIT_ARG:
13284 if (!common->local_quit_available)
13285 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13286 if (common->quit_label == NULL)
13287 add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
13288 else
13289 JUMPTO(SLJIT_JUMP, common->quit_label);
13290 break;
13291
13292 case OP_CALLOUT:
13293 case OP_CALLOUT_STR:
13294 case OP_FAIL:
13295 case OP_ACCEPT:
13296 case OP_ASSERT_ACCEPT:
13297 set_jumps(current->topbacktracks, LABEL());
13298 break;
13299
13300 case OP_THEN_TRAP:
13301 /* A virtual opcode for then traps. */
13302 compile_then_trap_backtrackingpath(common, current);
13303 break;
13304
13305 default:
13306 SLJIT_UNREACHABLE();
13307 break;
13308 }
13309 current = current->prev;
13310 }
13311 common->then_trap = save_then_trap;
13312 }
13313
compile_recurse(compiler_common * common)13314 static SLJIT_INLINE void compile_recurse(compiler_common *common)
13315 {
13316 DEFINE_COMPILER;
13317 PCRE2_SPTR cc = common->start + common->currententry->start;
13318 PCRE2_SPTR ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
13319 PCRE2_SPTR ccend = bracketend(cc) - (1 + LINK_SIZE);
13320 BOOL needs_control_head;
13321 BOOL has_quit;
13322 BOOL has_accept;
13323 int private_data_size = get_recurse_data_length(common, ccbegin, ccend, &needs_control_head, &has_quit, &has_accept);
13324 int alt_count, alt_max, local_size;
13325 backtrack_common altbacktrack;
13326 jump_list *match = NULL;
13327 struct sljit_jump *next_alt = NULL;
13328 struct sljit_jump *accept_exit = NULL;
13329 struct sljit_label *quit;
13330 struct sljit_put_label *put_label = NULL;
13331
13332 /* Recurse captures then. */
13333 common->then_trap = NULL;
13334
13335 SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
13336
13337 alt_max = no_alternatives(cc);
13338 alt_count = 0;
13339
13340 /* Matching path. */
13341 SLJIT_ASSERT(common->currententry->entry_label == NULL && common->recursive_head_ptr != 0);
13342 common->currententry->entry_label = LABEL();
13343 set_jumps(common->currententry->entry_calls, common->currententry->entry_label);
13344
13345 sljit_emit_fast_enter(compiler, TMP2, 0);
13346 count_match(common);
13347
13348 local_size = (alt_max > 1) ? 2 : 1;
13349
13350 /* (Reversed) stack layout:
13351 [private data][return address][optional: str ptr] ... [optional: alternative index][recursive_head_ptr] */
13352
13353 allocate_stack(common, private_data_size + local_size);
13354 /* Save return address. */
13355 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP2, 0);
13356
13357 copy_recurse_data(common, ccbegin, ccend, recurse_copy_from_global, local_size, private_data_size + local_size, has_quit);
13358
13359 /* This variable is saved and restored all time when we enter or exit from a recursive context. */
13360 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0);
13361
13362 if (needs_control_head)
13363 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
13364
13365 if (alt_max > 1)
13366 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
13367
13368 memset(&altbacktrack, 0, sizeof(backtrack_common));
13369 common->quit_label = NULL;
13370 common->accept_label = NULL;
13371 common->quit = NULL;
13372 common->accept = NULL;
13373 altbacktrack.cc = ccbegin;
13374 cc += GET(cc, 1);
13375 while (1)
13376 {
13377 altbacktrack.top = NULL;
13378 altbacktrack.topbacktracks = NULL;
13379
13380 if (altbacktrack.cc != ccbegin)
13381 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13382
13383 compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack);
13384 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13385 return;
13386
13387 allocate_stack(common, (alt_max > 1 || has_accept) ? 2 : 1);
13388 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
13389
13390 if (alt_max > 1 || has_accept)
13391 {
13392 if (alt_max > 3)
13393 put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(1));
13394 else
13395 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_count);
13396 }
13397
13398 add_jump(compiler, &match, JUMP(SLJIT_JUMP));
13399
13400 if (alt_count == 0)
13401 {
13402 /* Backtracking path entry. */
13403 SLJIT_ASSERT(common->currententry->backtrack_label == NULL);
13404 common->currententry->backtrack_label = LABEL();
13405 set_jumps(common->currententry->backtrack_calls, common->currententry->backtrack_label);
13406
13407 sljit_emit_fast_enter(compiler, TMP1, 0);
13408
13409 if (has_accept)
13410 accept_exit = CMP(SLJIT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);
13411
13412 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13413 /* Save return address. */
13414 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(local_size - 1), TMP1, 0);
13415
13416 copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, has_quit);
13417
13418 if (alt_max > 1)
13419 {
13420 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13421 free_stack(common, 2);
13422
13423 if (alt_max > 3)
13424 {
13425 sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0);
13426 sljit_set_put_label(put_label, LABEL());
13427 sljit_emit_op0(compiler, SLJIT_ENDBR);
13428 }
13429 else
13430 next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
13431 }
13432 else
13433 free_stack(common, has_accept ? 2 : 1);
13434 }
13435 else if (alt_max > 3)
13436 {
13437 sljit_set_put_label(put_label, LABEL());
13438 sljit_emit_op0(compiler, SLJIT_ENDBR);
13439 }
13440 else
13441 {
13442 JUMPHERE(next_alt);
13443 if (alt_count + 1 < alt_max)
13444 {
13445 SLJIT_ASSERT(alt_count == 1 && alt_max == 3);
13446 next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1);
13447 }
13448 }
13449
13450 alt_count++;
13451
13452 compile_backtrackingpath(common, altbacktrack.top);
13453 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13454 return;
13455 set_jumps(altbacktrack.topbacktracks, LABEL());
13456
13457 if (*cc != OP_ALT)
13458 break;
13459
13460 altbacktrack.cc = cc + 1 + LINK_SIZE;
13461 cc += GET(cc, 1);
13462 }
13463
13464 /* No alternative is matched. */
13465
13466 quit = LABEL();
13467
13468 copy_recurse_data(common, ccbegin, ccend, recurse_copy_private_to_global, local_size, private_data_size + local_size, has_quit);
13469
13470 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
13471 free_stack(common, private_data_size + local_size);
13472 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
13473 OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
13474
13475 if (common->quit != NULL)
13476 {
13477 SLJIT_ASSERT(has_quit);
13478
13479 set_jumps(common->quit, LABEL());
13480 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
13481 copy_recurse_data(common, ccbegin, ccend, recurse_copy_shared_to_global, local_size, private_data_size + local_size, has_quit);
13482 JUMPTO(SLJIT_JUMP, quit);
13483 }
13484
13485 if (has_accept)
13486 {
13487 JUMPHERE(accept_exit);
13488 free_stack(common, 2);
13489
13490 /* Save return address. */
13491 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP1, 0);
13492
13493 copy_recurse_data(common, ccbegin, ccend, recurse_copy_kept_shared_to_global, local_size, private_data_size + local_size, has_quit);
13494
13495 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
13496 free_stack(common, private_data_size + local_size);
13497 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
13498 OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
13499 }
13500
13501 if (common->accept != NULL)
13502 {
13503 SLJIT_ASSERT(has_accept);
13504
13505 set_jumps(common->accept, LABEL());
13506
13507 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
13508 OP1(SLJIT_MOV, TMP2, 0, STACK_TOP, 0);
13509
13510 allocate_stack(common, 2);
13511 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);
13512 }
13513
13514 set_jumps(match, LABEL());
13515
13516 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
13517
13518 copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, has_quit);
13519
13520 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), STACK(local_size - 1));
13521 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
13522 OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
13523 }
13524
13525 #undef COMPILE_BACKTRACKINGPATH
13526 #undef CURRENT_AS
13527
13528 #define PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS \
13529 (PCRE2_JIT_INVALID_UTF)
13530
jit_compile(pcre2_code * code,sljit_u32 mode)13531 static int jit_compile(pcre2_code *code, sljit_u32 mode)
13532 {
13533 pcre2_real_code *re = (pcre2_real_code *)code;
13534 struct sljit_compiler *compiler;
13535 backtrack_common rootbacktrack;
13536 compiler_common common_data;
13537 compiler_common *common = &common_data;
13538 const sljit_u8 *tables = re->tables;
13539 void *allocator_data = &re->memctl;
13540 int private_data_size;
13541 PCRE2_SPTR ccend;
13542 executable_functions *functions;
13543 void *executable_func;
13544 sljit_uw executable_size;
13545 sljit_uw total_length;
13546 struct sljit_label *mainloop_label = NULL;
13547 struct sljit_label *continue_match_label;
13548 struct sljit_label *empty_match_found_label = NULL;
13549 struct sljit_label *empty_match_backtrack_label = NULL;
13550 struct sljit_label *reset_match_label;
13551 struct sljit_label *quit_label;
13552 struct sljit_jump *jump;
13553 struct sljit_jump *minlength_check_failed = NULL;
13554 struct sljit_jump *empty_match = NULL;
13555 struct sljit_jump *end_anchor_failed = NULL;
13556 jump_list *reqcu_not_found = NULL;
13557
13558 SLJIT_ASSERT(tables);
13559
13560 #if HAS_VIRTUAL_REGISTERS == 1
13561 SLJIT_ASSERT(sljit_get_register_index(TMP3) < 0 && sljit_get_register_index(ARGUMENTS) < 0 && sljit_get_register_index(RETURN_ADDR) < 0);
13562 #elif HAS_VIRTUAL_REGISTERS == 0
13563 SLJIT_ASSERT(sljit_get_register_index(TMP3) >= 0 && sljit_get_register_index(ARGUMENTS) >= 0 && sljit_get_register_index(RETURN_ADDR) >= 0);
13564 #else
13565 #error "Invalid value for HAS_VIRTUAL_REGISTERS"
13566 #endif
13567
13568 memset(&rootbacktrack, 0, sizeof(backtrack_common));
13569 memset(common, 0, sizeof(compiler_common));
13570 common->re = re;
13571 common->name_table = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
13572 rootbacktrack.cc = common->name_table + re->name_count * re->name_entry_size;
13573
13574 #ifdef SUPPORT_UNICODE
13575 common->invalid_utf = (mode & PCRE2_JIT_INVALID_UTF) != 0;
13576 #endif /* SUPPORT_UNICODE */
13577 mode &= ~PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS;
13578
13579 common->start = rootbacktrack.cc;
13580 common->read_only_data_head = NULL;
13581 common->fcc = tables + fcc_offset;
13582 common->lcc = (sljit_sw)(tables + lcc_offset);
13583 common->mode = mode;
13584 common->might_be_empty = (re->minlength == 0) || (re->flags & PCRE2_MATCH_EMPTY);
13585 common->allow_empty_partial = (re->max_lookbehind > 0) || (re->flags & PCRE2_MATCH_EMPTY);
13586 common->nltype = NLTYPE_FIXED;
13587 switch(re->newline_convention)
13588 {
13589 case PCRE2_NEWLINE_CR: common->newline = CHAR_CR; break;
13590 case PCRE2_NEWLINE_LF: common->newline = CHAR_NL; break;
13591 case PCRE2_NEWLINE_CRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
13592 case PCRE2_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
13593 case PCRE2_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
13594 case PCRE2_NEWLINE_NUL: common->newline = CHAR_NUL; break;
13595 default: return PCRE2_ERROR_INTERNAL;
13596 }
13597 common->nlmax = READ_CHAR_MAX;
13598 common->nlmin = 0;
13599 if (re->bsr_convention == PCRE2_BSR_UNICODE)
13600 common->bsr_nltype = NLTYPE_ANY;
13601 else if (re->bsr_convention == PCRE2_BSR_ANYCRLF)
13602 common->bsr_nltype = NLTYPE_ANYCRLF;
13603 else
13604 {
13605 #ifdef BSR_ANYCRLF
13606 common->bsr_nltype = NLTYPE_ANYCRLF;
13607 #else
13608 common->bsr_nltype = NLTYPE_ANY;
13609 #endif
13610 }
13611 common->bsr_nlmax = READ_CHAR_MAX;
13612 common->bsr_nlmin = 0;
13613 common->endonly = (re->overall_options & PCRE2_DOLLAR_ENDONLY) != 0;
13614 common->ctypes = (sljit_sw)(tables + ctypes_offset);
13615 common->name_count = re->name_count;
13616 common->name_entry_size = re->name_entry_size;
13617 common->unset_backref = (re->overall_options & PCRE2_MATCH_UNSET_BACKREF) != 0;
13618 common->alt_circumflex = (re->overall_options & PCRE2_ALT_CIRCUMFLEX) != 0;
13619 #ifdef SUPPORT_UNICODE
13620 /* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */
13621 common->utf = (re->overall_options & PCRE2_UTF) != 0;
13622 common->ucp = (re->overall_options & PCRE2_UCP) != 0;
13623 if (common->utf)
13624 {
13625 if (common->nltype == NLTYPE_ANY)
13626 common->nlmax = 0x2029;
13627 else if (common->nltype == NLTYPE_ANYCRLF)
13628 common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
13629 else
13630 {
13631 /* We only care about the first newline character. */
13632 common->nlmax = common->newline & 0xff;
13633 }
13634
13635 if (common->nltype == NLTYPE_FIXED)
13636 common->nlmin = common->newline & 0xff;
13637 else
13638 common->nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
13639
13640 if (common->bsr_nltype == NLTYPE_ANY)
13641 common->bsr_nlmax = 0x2029;
13642 else
13643 common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
13644 common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
13645 }
13646 else
13647 common->invalid_utf = FALSE;
13648 #endif /* SUPPORT_UNICODE */
13649 ccend = bracketend(common->start);
13650
13651 /* Calculate the local space size on the stack. */
13652 common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw);
13653 common->optimized_cbracket = (sljit_u8 *)SLJIT_MALLOC(re->top_bracket + 1, allocator_data);
13654 if (!common->optimized_cbracket)
13655 return PCRE2_ERROR_NOMEMORY;
13656 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1
13657 memset(common->optimized_cbracket, 0, re->top_bracket + 1);
13658 #else
13659 memset(common->optimized_cbracket, 1, re->top_bracket + 1);
13660 #endif
13661
13662 SLJIT_ASSERT(*common->start == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
13663 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2
13664 common->capture_last_ptr = common->ovector_start;
13665 common->ovector_start += sizeof(sljit_sw);
13666 #endif
13667 if (!check_opcode_types(common, common->start, ccend))
13668 {
13669 SLJIT_FREE(common->optimized_cbracket, allocator_data);
13670 return PCRE2_ERROR_NOMEMORY;
13671 }
13672
13673 /* Checking flags and updating ovector_start. */
13674 if (mode == PCRE2_JIT_COMPLETE && (re->flags & PCRE2_LASTSET) != 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
13675 {
13676 common->req_char_ptr = common->ovector_start;
13677 common->ovector_start += sizeof(sljit_sw);
13678 }
13679 if (mode != PCRE2_JIT_COMPLETE)
13680 {
13681 common->start_used_ptr = common->ovector_start;
13682 common->ovector_start += sizeof(sljit_sw);
13683 if (mode == PCRE2_JIT_PARTIAL_SOFT)
13684 {
13685 common->hit_start = common->ovector_start;
13686 common->ovector_start += sizeof(sljit_sw);
13687 }
13688 }
13689 if ((re->overall_options & (PCRE2_FIRSTLINE | PCRE2_USE_OFFSET_LIMIT)) != 0)
13690 {
13691 common->match_end_ptr = common->ovector_start;
13692 common->ovector_start += sizeof(sljit_sw);
13693 }
13694 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
13695 common->control_head_ptr = 1;
13696 #endif
13697 if (common->control_head_ptr != 0)
13698 {
13699 common->control_head_ptr = common->ovector_start;
13700 common->ovector_start += sizeof(sljit_sw);
13701 }
13702 if (common->has_set_som)
13703 {
13704 /* Saving the real start pointer is necessary. */
13705 common->start_ptr = common->ovector_start;
13706 common->ovector_start += sizeof(sljit_sw);
13707 }
13708
13709 /* Aligning ovector to even number of sljit words. */
13710 if ((common->ovector_start & sizeof(sljit_sw)) != 0)
13711 common->ovector_start += sizeof(sljit_sw);
13712
13713 if (common->start_ptr == 0)
13714 common->start_ptr = OVECTOR(0);
13715
13716 /* Capturing brackets cannot be optimized if callouts are allowed. */
13717 if (common->capture_last_ptr != 0)
13718 memset(common->optimized_cbracket, 0, re->top_bracket + 1);
13719
13720 SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
13721 common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);
13722
13723 total_length = ccend - common->start;
13724 common->private_data_ptrs = (sljit_s32*)SLJIT_MALLOC(total_length * (sizeof(sljit_s32) + (common->has_then ? 1 : 0)), allocator_data);
13725 if (!common->private_data_ptrs)
13726 {
13727 SLJIT_FREE(common->optimized_cbracket, allocator_data);
13728 return PCRE2_ERROR_NOMEMORY;
13729 }
13730 memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_s32));
13731
13732 private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
13733
13734 if ((re->overall_options & PCRE2_ANCHORED) == 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 && !common->has_skip_in_assert_back)
13735 detect_early_fail(common, common->start, &private_data_size, 0, 0, TRUE);
13736
13737 set_private_data_ptrs(common, &private_data_size, ccend);
13738
13739 SLJIT_ASSERT(common->early_fail_start_ptr <= common->early_fail_end_ptr);
13740
13741 if (private_data_size > SLJIT_MAX_LOCAL_SIZE)
13742 {
13743 SLJIT_FREE(common->private_data_ptrs, allocator_data);
13744 SLJIT_FREE(common->optimized_cbracket, allocator_data);
13745 return PCRE2_ERROR_NOMEMORY;
13746 }
13747
13748 if (common->has_then)
13749 {
13750 common->then_offsets = (sljit_u8 *)(common->private_data_ptrs + total_length);
13751 memset(common->then_offsets, 0, total_length);
13752 set_then_offsets(common, common->start, NULL);
13753 }
13754
13755 compiler = sljit_create_compiler(allocator_data, NULL);
13756 if (!compiler)
13757 {
13758 SLJIT_FREE(common->optimized_cbracket, allocator_data);
13759 SLJIT_FREE(common->private_data_ptrs, allocator_data);
13760 return PCRE2_ERROR_NOMEMORY;
13761 }
13762 common->compiler = compiler;
13763
13764 /* Main pcre_jit_exec entry. */
13765 SLJIT_ASSERT((private_data_size & (sizeof(sljit_sw) - 1)) == 0);
13766 sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 5, 5, 0, 0, private_data_size);
13767
13768 /* Register init. */
13769 reset_ovector(common, (re->top_bracket + 1) * 2);
13770 if (common->req_char_ptr != 0)
13771 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, SLJIT_R0, 0);
13772
13773 OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_S0, 0);
13774 OP1(SLJIT_MOV, TMP1, 0, SLJIT_S0, 0);
13775 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
13776 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
13777 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
13778 OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match));
13779 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, end));
13780 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, start));
13781 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
13782 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0);
13783
13784 if (common->early_fail_start_ptr < common->early_fail_end_ptr)
13785 reset_early_fail(common);
13786
13787 if (mode == PCRE2_JIT_PARTIAL_SOFT)
13788 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
13789 if (common->mark_ptr != 0)
13790 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
13791 if (common->control_head_ptr != 0)
13792 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
13793
13794 /* Main part of the matching */
13795 if ((re->overall_options & PCRE2_ANCHORED) == 0)
13796 {
13797 mainloop_label = mainloop_entry(common);
13798 continue_match_label = LABEL();
13799 /* Forward search if possible. */
13800 if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
13801 {
13802 if (mode == PCRE2_JIT_COMPLETE && fast_forward_first_n_chars(common))
13803 ;
13804 else if ((re->flags & PCRE2_FIRSTSET) != 0)
13805 fast_forward_first_char(common);
13806 else if ((re->flags & PCRE2_STARTLINE) != 0)
13807 fast_forward_newline(common);
13808 else if ((re->flags & PCRE2_FIRSTMAPSET) != 0)
13809 fast_forward_start_bits(common);
13810 }
13811 }
13812 else
13813 continue_match_label = LABEL();
13814
13815 if (mode == PCRE2_JIT_COMPLETE && re->minlength > 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
13816 {
13817 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13818 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(re->minlength));
13819 minlength_check_failed = CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0);
13820 }
13821 if (common->req_char_ptr != 0)
13822 reqcu_not_found = search_requested_char(common, (PCRE2_UCHAR)(re->last_codeunit), (re->flags & PCRE2_LASTCASELESS) != 0, (re->flags & PCRE2_FIRSTSET) != 0);
13823
13824 /* Store the current STR_PTR in OVECTOR(0). */
13825 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
13826 /* Copy the limit of allowed recursions. */
13827 OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH);
13828 if (common->capture_last_ptr != 0)
13829 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, 0);
13830 if (common->fast_forward_bc_ptr != NULL)
13831 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3, STR_PTR, 0);
13832
13833 if (common->start_ptr != OVECTOR(0))
13834 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0);
13835
13836 /* Copy the beginning of the string. */
13837 if (mode == PCRE2_JIT_PARTIAL_SOFT)
13838 {
13839 jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
13840 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
13841 JUMPHERE(jump);
13842 }
13843 else if (mode == PCRE2_JIT_PARTIAL_HARD)
13844 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
13845
13846 compile_matchingpath(common, common->start, ccend, &rootbacktrack);
13847 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13848 {
13849 sljit_free_compiler(compiler);
13850 SLJIT_FREE(common->optimized_cbracket, allocator_data);
13851 SLJIT_FREE(common->private_data_ptrs, allocator_data);
13852 PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
13853 return PCRE2_ERROR_NOMEMORY;
13854 }
13855
13856 if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
13857 end_anchor_failed = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0);
13858
13859 if (common->might_be_empty)
13860 {
13861 empty_match = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
13862 empty_match_found_label = LABEL();
13863 }
13864
13865 common->accept_label = LABEL();
13866 if (common->accept != NULL)
13867 set_jumps(common->accept, common->accept_label);
13868
13869 /* This means we have a match. Update the ovector. */
13870 copy_ovector(common, re->top_bracket + 1);
13871 common->quit_label = common->abort_label = LABEL();
13872 if (common->quit != NULL)
13873 set_jumps(common->quit, common->quit_label);
13874 if (common->abort != NULL)
13875 set_jumps(common->abort, common->abort_label);
13876 if (minlength_check_failed != NULL)
13877 SET_LABEL(minlength_check_failed, common->abort_label);
13878
13879 sljit_emit_op0(compiler, SLJIT_SKIP_FRAMES_BEFORE_RETURN);
13880 sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0);
13881
13882 if (common->failed_match != NULL)
13883 {
13884 SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
13885 set_jumps(common->failed_match, LABEL());
13886 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13887 JUMPTO(SLJIT_JUMP, common->abort_label);
13888 }
13889
13890 if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
13891 JUMPHERE(end_anchor_failed);
13892
13893 if (mode != PCRE2_JIT_COMPLETE)
13894 {
13895 common->partialmatchlabel = LABEL();
13896 set_jumps(common->partialmatch, common->partialmatchlabel);
13897 return_with_partial_match(common, common->quit_label);
13898 }
13899
13900 if (common->might_be_empty)
13901 empty_match_backtrack_label = LABEL();
13902 compile_backtrackingpath(common, rootbacktrack.top);
13903 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13904 {
13905 sljit_free_compiler(compiler);
13906 SLJIT_FREE(common->optimized_cbracket, allocator_data);
13907 SLJIT_FREE(common->private_data_ptrs, allocator_data);
13908 PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
13909 return PCRE2_ERROR_NOMEMORY;
13910 }
13911
13912 SLJIT_ASSERT(rootbacktrack.prev == NULL);
13913 reset_match_label = LABEL();
13914
13915 if (mode == PCRE2_JIT_PARTIAL_SOFT)
13916 {
13917 /* Update hit_start only in the first time. */
13918 jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
13919 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
13920 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
13921 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, TMP1, 0);
13922 JUMPHERE(jump);
13923 }
13924
13925 /* Check we have remaining characters. */
13926 if ((re->overall_options & PCRE2_ANCHORED) == 0 && common->match_end_ptr != 0)
13927 {
13928 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
13929 }
13930
13931 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP),
13932 (common->fast_forward_bc_ptr != NULL) ? (PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3) : common->start_ptr);
13933
13934 if ((re->overall_options & PCRE2_ANCHORED) == 0)
13935 {
13936 if (common->ff_newline_shortcut != NULL)
13937 {
13938 /* There cannot be more newlines if PCRE2_FIRSTLINE is set. */
13939 if ((re->overall_options & PCRE2_FIRSTLINE) == 0)
13940 {
13941 if (common->match_end_ptr != 0)
13942 {
13943 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
13944 OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
13945 CMPTO(SLJIT_LESS, STR_PTR, 0, TMP1, 0, common->ff_newline_shortcut);
13946 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
13947 }
13948 else
13949 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut);
13950 }
13951 }
13952 else
13953 CMPTO(SLJIT_LESS, STR_PTR, 0, (common->match_end_ptr == 0) ? STR_END : TMP1, 0, mainloop_label);
13954 }
13955
13956 /* No more remaining characters. */
13957 if (reqcu_not_found != NULL)
13958 set_jumps(reqcu_not_found, LABEL());
13959
13960 if (mode == PCRE2_JIT_PARTIAL_SOFT)
13961 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel);
13962
13963 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13964 JUMPTO(SLJIT_JUMP, common->quit_label);
13965
13966 flush_stubs(common);
13967
13968 if (common->might_be_empty)
13969 {
13970 JUMPHERE(empty_match);
13971 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
13972 OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
13973 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
13974 JUMPTO(SLJIT_NOT_ZERO, empty_match_backtrack_label);
13975 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
13976 JUMPTO(SLJIT_ZERO, empty_match_found_label);
13977 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
13978 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);
13979 JUMPTO(SLJIT_JUMP, empty_match_backtrack_label);
13980 }
13981
13982 common->fast_forward_bc_ptr = NULL;
13983 common->early_fail_start_ptr = 0;
13984 common->early_fail_end_ptr = 0;
13985 common->currententry = common->entries;
13986 common->local_quit_available = TRUE;
13987 quit_label = common->quit_label;
13988 if (common->currententry != NULL)
13989 {
13990 /* A free bit for each private data. */
13991 common->recurse_bitset_size = ((private_data_size / (int)sizeof(sljit_sw)) + 7) >> 3;
13992 SLJIT_ASSERT(common->recurse_bitset_size > 0);
13993 common->recurse_bitset = (sljit_u8*)SLJIT_MALLOC(common->recurse_bitset_size, allocator_data);;
13994
13995 if (common->recurse_bitset != NULL)
13996 {
13997 do
13998 {
13999 /* Might add new entries. */
14000 compile_recurse(common);
14001 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
14002 break;
14003 flush_stubs(common);
14004 common->currententry = common->currententry->next;
14005 }
14006 while (common->currententry != NULL);
14007
14008 SLJIT_FREE(common->recurse_bitset, allocator_data);
14009 }
14010
14011 if (common->currententry != NULL)
14012 {
14013 /* The common->recurse_bitset has been freed. */
14014 SLJIT_ASSERT(sljit_get_compiler_error(compiler) || common->recurse_bitset == NULL);
14015
14016 sljit_free_compiler(compiler);
14017 SLJIT_FREE(common->optimized_cbracket, allocator_data);
14018 SLJIT_FREE(common->private_data_ptrs, allocator_data);
14019 PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14020 return PCRE2_ERROR_NOMEMORY;
14021 }
14022 }
14023 common->local_quit_available = FALSE;
14024 common->quit_label = quit_label;
14025
14026 /* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */
14027 /* This is a (really) rare case. */
14028 set_jumps(common->stackalloc, LABEL());
14029 /* RETURN_ADDR is not a saved register. */
14030 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
14031
14032 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
14033
14034 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
14035 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
14036 OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_LIMIT, 0, SLJIT_IMM, STACK_GROWTH_RATE);
14037 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, stack));
14038 OP1(SLJIT_MOV, STACK_LIMIT, 0, TMP2, 0);
14039
14040 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize));
14041
14042 jump = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
14043 OP1(SLJIT_MOV, TMP2, 0, STACK_LIMIT, 0);
14044 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_RETURN_REG, 0);
14045 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
14046 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
14047 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
14048
14049 /* Allocation failed. */
14050 JUMPHERE(jump);
14051 /* We break the return address cache here, but this is a really rare case. */
14052 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_JIT_STACKLIMIT);
14053 JUMPTO(SLJIT_JUMP, common->quit_label);
14054
14055 /* Call limit reached. */
14056 set_jumps(common->calllimit, LABEL());
14057 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_MATCHLIMIT);
14058 JUMPTO(SLJIT_JUMP, common->quit_label);
14059
14060 if (common->revertframes != NULL)
14061 {
14062 set_jumps(common->revertframes, LABEL());
14063 do_revertframes(common);
14064 }
14065 if (common->wordboundary != NULL)
14066 {
14067 set_jumps(common->wordboundary, LABEL());
14068 check_wordboundary(common);
14069 }
14070 if (common->anynewline != NULL)
14071 {
14072 set_jumps(common->anynewline, LABEL());
14073 check_anynewline(common);
14074 }
14075 if (common->hspace != NULL)
14076 {
14077 set_jumps(common->hspace, LABEL());
14078 check_hspace(common);
14079 }
14080 if (common->vspace != NULL)
14081 {
14082 set_jumps(common->vspace, LABEL());
14083 check_vspace(common);
14084 }
14085 if (common->casefulcmp != NULL)
14086 {
14087 set_jumps(common->casefulcmp, LABEL());
14088 do_casefulcmp(common);
14089 }
14090 if (common->caselesscmp != NULL)
14091 {
14092 set_jumps(common->caselesscmp, LABEL());
14093 do_caselesscmp(common);
14094 }
14095 if (common->reset_match != NULL)
14096 {
14097 set_jumps(common->reset_match, LABEL());
14098 do_reset_match(common, (re->top_bracket + 1) * 2);
14099 CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label);
14100 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
14101 JUMPTO(SLJIT_JUMP, reset_match_label);
14102 }
14103 #ifdef SUPPORT_UNICODE
14104 #if PCRE2_CODE_UNIT_WIDTH == 8
14105 if (common->utfreadchar != NULL)
14106 {
14107 set_jumps(common->utfreadchar, LABEL());
14108 do_utfreadchar(common);
14109 }
14110 if (common->utfreadtype8 != NULL)
14111 {
14112 set_jumps(common->utfreadtype8, LABEL());
14113 do_utfreadtype8(common);
14114 }
14115 if (common->utfpeakcharback != NULL)
14116 {
14117 set_jumps(common->utfpeakcharback, LABEL());
14118 do_utfpeakcharback(common);
14119 }
14120 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
14121 #if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
14122 if (common->utfreadchar_invalid != NULL)
14123 {
14124 set_jumps(common->utfreadchar_invalid, LABEL());
14125 do_utfreadchar_invalid(common);
14126 }
14127 if (common->utfreadnewline_invalid != NULL)
14128 {
14129 set_jumps(common->utfreadnewline_invalid, LABEL());
14130 do_utfreadnewline_invalid(common);
14131 }
14132 if (common->utfmoveback_invalid)
14133 {
14134 set_jumps(common->utfmoveback_invalid, LABEL());
14135 do_utfmoveback_invalid(common);
14136 }
14137 if (common->utfpeakcharback_invalid)
14138 {
14139 set_jumps(common->utfpeakcharback_invalid, LABEL());
14140 do_utfpeakcharback_invalid(common);
14141 }
14142 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16 */
14143 if (common->getucd != NULL)
14144 {
14145 set_jumps(common->getucd, LABEL());
14146 do_getucd(common);
14147 }
14148 if (common->getucdtype != NULL)
14149 {
14150 set_jumps(common->getucdtype, LABEL());
14151 do_getucdtype(common);
14152 }
14153 #endif /* SUPPORT_UNICODE */
14154
14155 SLJIT_FREE(common->optimized_cbracket, allocator_data);
14156 SLJIT_FREE(common->private_data_ptrs, allocator_data);
14157
14158 executable_func = sljit_generate_code(compiler);
14159 executable_size = sljit_get_generated_code_size(compiler);
14160 sljit_free_compiler(compiler);
14161
14162 if (executable_func == NULL)
14163 {
14164 PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14165 return PCRE2_ERROR_NOMEMORY;
14166 }
14167
14168 /* Reuse the function descriptor if possible. */
14169 if (re->executable_jit != NULL)
14170 functions = (executable_functions *)re->executable_jit;
14171 else
14172 {
14173 functions = SLJIT_MALLOC(sizeof(executable_functions), allocator_data);
14174 if (functions == NULL)
14175 {
14176 /* This case is highly unlikely since we just recently
14177 freed a lot of memory. Not impossible though. */
14178 sljit_free_code(executable_func, NULL);
14179 PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14180 return PCRE2_ERROR_NOMEMORY;
14181 }
14182 memset(functions, 0, sizeof(executable_functions));
14183 functions->top_bracket = re->top_bracket + 1;
14184 functions->limit_match = re->limit_match;
14185 re->executable_jit = functions;
14186 }
14187
14188 /* Turn mode into an index. */
14189 if (mode == PCRE2_JIT_COMPLETE)
14190 mode = 0;
14191 else
14192 mode = (mode == PCRE2_JIT_PARTIAL_SOFT) ? 1 : 2;
14193
14194 SLJIT_ASSERT(mode < JIT_NUMBER_OF_COMPILE_MODES);
14195 functions->executable_funcs[mode] = executable_func;
14196 functions->read_only_data_heads[mode] = common->read_only_data_head;
14197 functions->executable_sizes[mode] = executable_size;
14198 return 0;
14199 }
14200
14201 #endif
14202
14203 /*************************************************
14204 * JIT compile a Regular Expression *
14205 *************************************************/
14206
14207 /* This function used JIT to convert a previously-compiled pattern into machine
14208 code.
14209
14210 Arguments:
14211 code a compiled pattern
14212 options JIT option bits
14213
14214 Returns: 0: success or (*NOJIT) was used
14215 <0: an error code
14216 */
14217
14218 #define PUBLIC_JIT_COMPILE_OPTIONS \
14219 (PCRE2_JIT_COMPLETE|PCRE2_JIT_PARTIAL_SOFT|PCRE2_JIT_PARTIAL_HARD|PCRE2_JIT_INVALID_UTF)
14220
14221 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_jit_compile(pcre2_code * code,uint32_t options)14222 pcre2_jit_compile(pcre2_code *code, uint32_t options)
14223 {
14224 pcre2_real_code *re = (pcre2_real_code *)code;
14225 #ifdef SUPPORT_JIT
14226 executable_functions *functions;
14227 static int executable_allocator_is_working = 0;
14228 #endif
14229
14230 if (code == NULL)
14231 return PCRE2_ERROR_NULL;
14232
14233 if ((options & ~PUBLIC_JIT_COMPILE_OPTIONS) != 0)
14234 return PCRE2_ERROR_JIT_BADOPTION;
14235
14236 /* Support for invalid UTF was first introduced in JIT, with the option
14237 PCRE2_JIT_INVALID_UTF. Later, support was added to the interpreter, and the
14238 compile-time option PCRE2_MATCH_INVALID_UTF was created. This is now the
14239 preferred feature, with the earlier option deprecated. However, for backward
14240 compatibility, if the earlier option is set, it forces the new option so that
14241 if JIT matching falls back to the interpreter, there is still support for
14242 invalid UTF. However, if this function has already been successfully called
14243 without PCRE2_JIT_INVALID_UTF and without PCRE2_MATCH_INVALID_UTF (meaning that
14244 non-invalid-supporting JIT code was compiled), give an error.
14245
14246 If in the future support for PCRE2_JIT_INVALID_UTF is withdrawn, the following
14247 actions are needed:
14248
14249 1. Remove the definition from pcre2.h.in and from the list in
14250 PUBLIC_JIT_COMPILE_OPTIONS above.
14251
14252 2. Replace PCRE2_JIT_INVALID_UTF with a local flag in this module.
14253
14254 3. Replace PCRE2_JIT_INVALID_UTF in pcre2_jit_test.c.
14255
14256 4. Delete the following short block of code. The setting of "re" and
14257 "functions" can be moved into the JIT-only block below, but if that is
14258 done, (void)re and (void)functions will be needed in the non-JIT case, to
14259 avoid compiler warnings.
14260 */
14261
14262 #ifdef SUPPORT_JIT
14263 functions = (executable_functions *)re->executable_jit;
14264 #endif
14265
14266 if ((options & PCRE2_JIT_INVALID_UTF) != 0)
14267 {
14268 if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) == 0)
14269 {
14270 #ifdef SUPPORT_JIT
14271 if (functions != NULL) return PCRE2_ERROR_JIT_BADOPTION;
14272 #endif
14273 re->overall_options |= PCRE2_MATCH_INVALID_UTF;
14274 }
14275 }
14276
14277 /* The above tests are run with and without JIT support. This means that
14278 PCRE2_JIT_INVALID_UTF propagates back into the regex options (ensuring
14279 interpreter support) even in the absence of JIT. But now, if there is no JIT
14280 support, give an error return. */
14281
14282 #ifndef SUPPORT_JIT
14283 return PCRE2_ERROR_JIT_BADOPTION;
14284 #else /* SUPPORT_JIT */
14285
14286 /* There is JIT support. Do the necessary. */
14287
14288 if ((re->flags & PCRE2_NOJIT) != 0) return 0;
14289
14290 if (executable_allocator_is_working == 0)
14291 {
14292 /* Checks whether the executable allocator is working. This check
14293 might run multiple times in multi-threaded environments, but the
14294 result should not be affected by it. */
14295 void *ptr = SLJIT_MALLOC_EXEC(32, NULL);
14296
14297 executable_allocator_is_working = -1;
14298
14299 if (ptr != NULL)
14300 {
14301 SLJIT_FREE_EXEC(((sljit_u8*)(ptr)) + SLJIT_EXEC_OFFSET(ptr), NULL);
14302 executable_allocator_is_working = 1;
14303 }
14304 }
14305
14306 if (executable_allocator_is_working < 0)
14307 return PCRE2_ERROR_NOMEMORY;
14308
14309 if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0)
14310 options |= PCRE2_JIT_INVALID_UTF;
14311
14312 if ((options & PCRE2_JIT_COMPLETE) != 0 && (functions == NULL
14313 || functions->executable_funcs[0] == NULL)) {
14314 uint32_t excluded_options = (PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_PARTIAL_HARD);
14315 int result = jit_compile(code, options & ~excluded_options);
14316 if (result != 0)
14317 return result;
14318 }
14319
14320 if ((options & PCRE2_JIT_PARTIAL_SOFT) != 0 && (functions == NULL
14321 || functions->executable_funcs[1] == NULL)) {
14322 uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_HARD);
14323 int result = jit_compile(code, options & ~excluded_options);
14324 if (result != 0)
14325 return result;
14326 }
14327
14328 if ((options & PCRE2_JIT_PARTIAL_HARD) != 0 && (functions == NULL
14329 || functions->executable_funcs[2] == NULL)) {
14330 uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT);
14331 int result = jit_compile(code, options & ~excluded_options);
14332 if (result != 0)
14333 return result;
14334 }
14335
14336 return 0;
14337
14338 #endif /* SUPPORT_JIT */
14339 }
14340
14341 /* JIT compiler uses an all-in-one approach. This improves security,
14342 since the code generator functions are not exported. */
14343
14344 #define INCLUDED_FROM_PCRE2_JIT_COMPILE
14345
14346 #include "pcre2_jit_match.c"
14347 #include "pcre2_jit_misc.c"
14348
14349 /* End of pcre2_jit_compile.c */
14350