1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 This module by Zoltan Herczeg
10 Original API code Copyright (c) 1997-2012 University of Cambridge
11 New API code Copyright (c) 2016-2019 University of Cambridge
12
13 -----------------------------------------------------------------------------
14 Redistribution and use in source and binary forms, with or without
15 modification, are permitted provided that the following conditions are met:
16
17 * Redistributions of source code must retain the above copyright notice,
18 this list of conditions and the following disclaimer.
19
20 * Redistributions in binary form must reproduce the above copyright
21 notice, this list of conditions and the following disclaimer in the
22 documentation and/or other materials provided with the distribution.
23
24 * Neither the name of the University of Cambridge nor the names of its
25 contributors may be used to endorse or promote products derived from
26 this software without specific prior written permission.
27
28 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
29 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 POSSIBILITY OF SUCH DAMAGE.
39 -----------------------------------------------------------------------------
40 */
41
42 #ifdef HAVE_CONFIG_H
43 #include "config.h"
44 #endif
45
46 #include "pcre2_internal.h"
47
48 #ifdef SUPPORT_JIT
49
50 /* All-in-one: Since we use the JIT compiler only from here,
51 we just include it. This way we don't need to touch the build
52 system files. */
53
54 #define SLJIT_CONFIG_AUTO 1
55 #define SLJIT_CONFIG_STATIC 1
56 #define SLJIT_VERBOSE 0
57
58 #ifdef PCRE2_DEBUG
59 #define SLJIT_DEBUG 1
60 #else
61 #define SLJIT_DEBUG 0
62 #endif
63
64 #define SLJIT_MALLOC(size, allocator_data) pcre2_jit_malloc(size, allocator_data)
65 #define SLJIT_FREE(ptr, allocator_data) pcre2_jit_free(ptr, allocator_data)
66
pcre2_jit_malloc(size_t size,void * allocator_data)67 static void * pcre2_jit_malloc(size_t size, void *allocator_data)
68 {
69 pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
70 return allocator->malloc(size, allocator->memory_data);
71 }
72
pcre2_jit_free(void * ptr,void * allocator_data)73 static void pcre2_jit_free(void *ptr, void *allocator_data)
74 {
75 pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
76 allocator->free(ptr, allocator->memory_data);
77 }
78
79 #include "sljit/sljitLir.c"
80
81 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
82 #error Unsupported architecture
83 #endif
84
85 /* Defines for debugging purposes. */
86
87 /* 1 - Use unoptimized capturing brackets.
88 2 - Enable capture_last_ptr (includes option 1). */
89 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
90
91 /* 1 - Always have a control head. */
92 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
93
94 /* Allocate memory for the regex stack on the real machine stack.
95 Fast, but limited size. */
96 #define MACHINE_STACK_SIZE 32768
97
98 /* Growth rate for stack allocated by the OS. Should be the multiply
99 of page size. */
100 #define STACK_GROWTH_RATE 8192
101
102 /* Enable to check that the allocation could destroy temporaries. */
103 #if defined SLJIT_DEBUG && SLJIT_DEBUG
104 #define DESTROY_REGISTERS 1
105 #endif
106
107 /*
108 Short summary about the backtracking mechanism empolyed by the jit code generator:
109
110 The code generator follows the recursive nature of the PERL compatible regular
111 expressions. The basic blocks of regular expressions are condition checkers
112 whose execute different commands depending on the result of the condition check.
113 The relationship between the operators can be horizontal (concatenation) and
114 vertical (sub-expression) (See struct backtrack_common for more details).
115
116 'ab' - 'a' and 'b' regexps are concatenated
117 'a+' - 'a' is the sub-expression of the '+' operator
118
119 The condition checkers are boolean (true/false) checkers. Machine code is generated
120 for the checker itself and for the actions depending on the result of the checker.
121 The 'true' case is called as the matching path (expected path), and the other is called as
122 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
123 branches on the matching path.
124
125 Greedy star operator (*) :
126 Matching path: match happens.
127 Backtrack path: match failed.
128 Non-greedy star operator (*?) :
129 Matching path: no need to perform a match.
130 Backtrack path: match is required.
131
132 The following example shows how the code generated for a capturing bracket
133 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
134 we have the following regular expression:
135
136 A(B|C)D
137
138 The generated code will be the following:
139
140 A matching path
141 '(' matching path (pushing arguments to the stack)
142 B matching path
143 ')' matching path (pushing arguments to the stack)
144 D matching path
145 return with successful match
146
147 D backtrack path
148 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
149 B backtrack path
150 C expected path
151 jump to D matching path
152 C backtrack path
153 A backtrack path
154
155 Notice, that the order of backtrack code paths are the opposite of the fast
156 code paths. In this way the topmost value on the stack is always belong
157 to the current backtrack code path. The backtrack path must check
158 whether there is a next alternative. If so, it needs to jump back to
159 the matching path eventually. Otherwise it needs to clear out its own stack
160 frame and continue the execution on the backtrack code paths.
161 */
162
163 /*
164 Saved stack frames:
165
166 Atomic blocks and asserts require reloading the values of private data
167 when the backtrack mechanism performed. Because of OP_RECURSE, the data
168 are not necessarly known in compile time, thus we need a dynamic restore
169 mechanism.
170
171 The stack frames are stored in a chain list, and have the following format:
172 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
173
174 Thus we can restore the private data to a particular point in the stack.
175 */
176
177 typedef struct jit_arguments {
178 /* Pointers first. */
179 struct sljit_stack *stack;
180 PCRE2_SPTR str;
181 PCRE2_SPTR begin;
182 PCRE2_SPTR end;
183 pcre2_match_data *match_data;
184 PCRE2_SPTR startchar_ptr;
185 PCRE2_UCHAR *mark_ptr;
186 int (*callout)(pcre2_callout_block *, void *);
187 void *callout_data;
188 /* Everything else after. */
189 sljit_uw offset_limit;
190 sljit_u32 limit_match;
191 sljit_u32 oveccount;
192 sljit_u32 options;
193 } jit_arguments;
194
195 #define JIT_NUMBER_OF_COMPILE_MODES 3
196
197 typedef struct executable_functions {
198 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
199 void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
200 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
201 sljit_u32 top_bracket;
202 sljit_u32 limit_match;
203 } executable_functions;
204
205 typedef struct jump_list {
206 struct sljit_jump *jump;
207 struct jump_list *next;
208 } jump_list;
209
210 typedef struct stub_list {
211 struct sljit_jump *start;
212 struct sljit_label *quit;
213 struct stub_list *next;
214 } stub_list;
215
216 enum frame_types {
217 no_frame = -1,
218 no_stack = -2
219 };
220
221 enum control_types {
222 type_mark = 0,
223 type_then_trap = 1
224 };
225
226 enum early_fail_types {
227 type_skip = 0,
228 type_fail = 1,
229 type_fail_range = 2
230 };
231
232 typedef int (SLJIT_FUNC *jit_function)(jit_arguments *args);
233
234 /* The following structure is the key data type for the recursive
235 code generator. It is allocated by compile_matchingpath, and contains
236 the arguments for compile_backtrackingpath. Must be the first member
237 of its descendants. */
238 typedef struct backtrack_common {
239 /* Concatenation stack. */
240 struct backtrack_common *prev;
241 jump_list *nextbacktracks;
242 /* Internal stack (for component operators). */
243 struct backtrack_common *top;
244 jump_list *topbacktracks;
245 /* Opcode pointer. */
246 PCRE2_SPTR cc;
247 } backtrack_common;
248
249 typedef struct assert_backtrack {
250 backtrack_common common;
251 jump_list *condfailed;
252 /* Less than 0 if a frame is not needed. */
253 int framesize;
254 /* Points to our private memory word on the stack. */
255 int private_data_ptr;
256 /* For iterators. */
257 struct sljit_label *matchingpath;
258 } assert_backtrack;
259
260 typedef struct bracket_backtrack {
261 backtrack_common common;
262 /* Where to coninue if an alternative is successfully matched. */
263 struct sljit_label *alternative_matchingpath;
264 /* For rmin and rmax iterators. */
265 struct sljit_label *recursive_matchingpath;
266 /* For greedy ? operator. */
267 struct sljit_label *zero_matchingpath;
268 /* Contains the branches of a failed condition. */
269 union {
270 /* Both for OP_COND, OP_SCOND. */
271 jump_list *condfailed;
272 assert_backtrack *assert;
273 /* For OP_ONCE. Less than 0 if not needed. */
274 int framesize;
275 /* For brackets with >3 alternatives. */
276 struct sljit_put_label *matching_put_label;
277 } u;
278 /* Points to our private memory word on the stack. */
279 int private_data_ptr;
280 } bracket_backtrack;
281
282 typedef struct bracketpos_backtrack {
283 backtrack_common common;
284 /* Points to our private memory word on the stack. */
285 int private_data_ptr;
286 /* Reverting stack is needed. */
287 int framesize;
288 /* Allocated stack size. */
289 int stacksize;
290 } bracketpos_backtrack;
291
292 typedef struct braminzero_backtrack {
293 backtrack_common common;
294 struct sljit_label *matchingpath;
295 } braminzero_backtrack;
296
297 typedef struct char_iterator_backtrack {
298 backtrack_common common;
299 /* Next iteration. */
300 struct sljit_label *matchingpath;
301 union {
302 jump_list *backtracks;
303 struct {
304 unsigned int othercasebit;
305 PCRE2_UCHAR chr;
306 BOOL enabled;
307 } charpos;
308 } u;
309 } char_iterator_backtrack;
310
311 typedef struct ref_iterator_backtrack {
312 backtrack_common common;
313 /* Next iteration. */
314 struct sljit_label *matchingpath;
315 } ref_iterator_backtrack;
316
317 typedef struct recurse_entry {
318 struct recurse_entry *next;
319 /* Contains the function entry label. */
320 struct sljit_label *entry_label;
321 /* Contains the function entry label. */
322 struct sljit_label *backtrack_label;
323 /* Collects the entry calls until the function is not created. */
324 jump_list *entry_calls;
325 /* Collects the backtrack calls until the function is not created. */
326 jump_list *backtrack_calls;
327 /* Points to the starting opcode. */
328 sljit_sw start;
329 } recurse_entry;
330
331 typedef struct recurse_backtrack {
332 backtrack_common common;
333 /* Return to the matching path. */
334 struct sljit_label *matchingpath;
335 /* Recursive pattern. */
336 recurse_entry *entry;
337 /* Pattern is inlined. */
338 BOOL inlined_pattern;
339 } recurse_backtrack;
340
341 #define OP_THEN_TRAP OP_TABLE_LENGTH
342
343 typedef struct then_trap_backtrack {
344 backtrack_common common;
345 /* If then_trap is not NULL, this structure contains the real
346 then_trap for the backtracking path. */
347 struct then_trap_backtrack *then_trap;
348 /* Points to the starting opcode. */
349 sljit_sw start;
350 /* Exit point for the then opcodes of this alternative. */
351 jump_list *quit;
352 /* Frame size of the current alternative. */
353 int framesize;
354 } then_trap_backtrack;
355
356 #define MAX_N_CHARS 12
357 #define MAX_DIFF_CHARS 5
358
359 typedef struct fast_forward_char_data {
360 /* Number of characters in the chars array, 255 for any character. */
361 sljit_u8 count;
362 /* Number of last UTF-8 characters in the chars array. */
363 sljit_u8 last_count;
364 /* Available characters in the current position. */
365 PCRE2_UCHAR chars[MAX_DIFF_CHARS];
366 } fast_forward_char_data;
367
368 #define MAX_CLASS_RANGE_SIZE 4
369 #define MAX_CLASS_CHARS_SIZE 3
370
371 typedef struct compiler_common {
372 /* The sljit ceneric compiler. */
373 struct sljit_compiler *compiler;
374 /* Compiled regular expression. */
375 pcre2_real_code *re;
376 /* First byte code. */
377 PCRE2_SPTR start;
378 /* Maps private data offset to each opcode. */
379 sljit_s32 *private_data_ptrs;
380 /* Chain list of read-only data ptrs. */
381 void *read_only_data_head;
382 /* Tells whether the capturing bracket is optimized. */
383 sljit_u8 *optimized_cbracket;
384 /* Tells whether the starting offset is a target of then. */
385 sljit_u8 *then_offsets;
386 /* Current position where a THEN must jump. */
387 then_trap_backtrack *then_trap;
388 /* Starting offset of private data for capturing brackets. */
389 sljit_s32 cbra_ptr;
390 /* Output vector starting point. Must be divisible by 2. */
391 sljit_s32 ovector_start;
392 /* Points to the starting character of the current match. */
393 sljit_s32 start_ptr;
394 /* Last known position of the requested byte. */
395 sljit_s32 req_char_ptr;
396 /* Head of the last recursion. */
397 sljit_s32 recursive_head_ptr;
398 /* First inspected character for partial matching.
399 (Needed for avoiding zero length partial matches.) */
400 sljit_s32 start_used_ptr;
401 /* Starting pointer for partial soft matches. */
402 sljit_s32 hit_start;
403 /* Pointer of the match end position. */
404 sljit_s32 match_end_ptr;
405 /* Points to the marked string. */
406 sljit_s32 mark_ptr;
407 /* Recursive control verb management chain. */
408 sljit_s32 control_head_ptr;
409 /* Points to the last matched capture block index. */
410 sljit_s32 capture_last_ptr;
411 /* Fast forward skipping byte code pointer. */
412 PCRE2_SPTR fast_forward_bc_ptr;
413 /* Locals used by fast fail optimization. */
414 sljit_s32 early_fail_start_ptr;
415 sljit_s32 early_fail_end_ptr;
416
417 /* Flipped and lower case tables. */
418 const sljit_u8 *fcc;
419 sljit_sw lcc;
420 /* Mode can be PCRE2_JIT_COMPLETE and others. */
421 int mode;
422 /* TRUE, when empty match is accepted for partial matching. */
423 BOOL allow_empty_partial;
424 /* TRUE, when minlength is greater than 0. */
425 BOOL might_be_empty;
426 /* \K is found in the pattern. */
427 BOOL has_set_som;
428 /* (*SKIP:arg) is found in the pattern. */
429 BOOL has_skip_arg;
430 /* (*THEN) is found in the pattern. */
431 BOOL has_then;
432 /* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */
433 BOOL has_skip_in_assert_back;
434 /* Quit is redirected by recurse, negative assertion, or positive assertion in conditional block. */
435 BOOL local_quit_available;
436 /* Currently in a positive assertion. */
437 BOOL in_positive_assertion;
438 /* Newline control. */
439 int nltype;
440 sljit_u32 nlmax;
441 sljit_u32 nlmin;
442 int newline;
443 int bsr_nltype;
444 sljit_u32 bsr_nlmax;
445 sljit_u32 bsr_nlmin;
446 /* Dollar endonly. */
447 int endonly;
448 /* Tables. */
449 sljit_sw ctypes;
450 /* Named capturing brackets. */
451 PCRE2_SPTR name_table;
452 sljit_sw name_count;
453 sljit_sw name_entry_size;
454
455 /* Labels and jump lists. */
456 struct sljit_label *partialmatchlabel;
457 struct sljit_label *quit_label;
458 struct sljit_label *abort_label;
459 struct sljit_label *accept_label;
460 struct sljit_label *ff_newline_shortcut;
461 stub_list *stubs;
462 recurse_entry *entries;
463 recurse_entry *currententry;
464 jump_list *partialmatch;
465 jump_list *quit;
466 jump_list *positive_assertion_quit;
467 jump_list *abort;
468 jump_list *failed_match;
469 jump_list *accept;
470 jump_list *calllimit;
471 jump_list *stackalloc;
472 jump_list *revertframes;
473 jump_list *wordboundary;
474 jump_list *anynewline;
475 jump_list *hspace;
476 jump_list *vspace;
477 jump_list *casefulcmp;
478 jump_list *caselesscmp;
479 jump_list *reset_match;
480 BOOL unset_backref;
481 BOOL alt_circumflex;
482 #ifdef SUPPORT_UNICODE
483 BOOL utf;
484 BOOL invalid_utf;
485 BOOL ucp;
486 /* Points to saving area for iref. */
487 sljit_s32 iref_ptr;
488 jump_list *getucd;
489 jump_list *getucdtype;
490 #if PCRE2_CODE_UNIT_WIDTH == 8
491 jump_list *utfreadchar;
492 jump_list *utfreadtype8;
493 jump_list *utfpeakcharback;
494 #endif
495 #if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
496 jump_list *utfreadchar_invalid;
497 jump_list *utfreadnewline_invalid;
498 jump_list *utfmoveback_invalid;
499 jump_list *utfpeakcharback_invalid;
500 #endif
501 #endif /* SUPPORT_UNICODE */
502 } compiler_common;
503
504 /* For byte_sequence_compare. */
505
506 typedef struct compare_context {
507 int length;
508 int sourcereg;
509 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
510 int ucharptr;
511 union {
512 sljit_s32 asint;
513 sljit_u16 asushort;
514 #if PCRE2_CODE_UNIT_WIDTH == 8
515 sljit_u8 asbyte;
516 sljit_u8 asuchars[4];
517 #elif PCRE2_CODE_UNIT_WIDTH == 16
518 sljit_u16 asuchars[2];
519 #elif PCRE2_CODE_UNIT_WIDTH == 32
520 sljit_u32 asuchars[1];
521 #endif
522 } c;
523 union {
524 sljit_s32 asint;
525 sljit_u16 asushort;
526 #if PCRE2_CODE_UNIT_WIDTH == 8
527 sljit_u8 asbyte;
528 sljit_u8 asuchars[4];
529 #elif PCRE2_CODE_UNIT_WIDTH == 16
530 sljit_u16 asuchars[2];
531 #elif PCRE2_CODE_UNIT_WIDTH == 32
532 sljit_u32 asuchars[1];
533 #endif
534 } oc;
535 #endif
536 } compare_context;
537
538 /* Undefine sljit macros. */
539 #undef CMP
540
541 /* Used for accessing the elements of the stack. */
542 #define STACK(i) ((i) * (int)sizeof(sljit_sw))
543
544 #ifdef SLJIT_PREF_SHIFT_REG
545 #if SLJIT_PREF_SHIFT_REG == SLJIT_R2
546 /* Nothing. */
547 #elif SLJIT_PREF_SHIFT_REG == SLJIT_R3
548 #define SHIFT_REG_IS_R3
549 #else
550 #error "Unsupported shift register"
551 #endif
552 #endif
553
554 #define TMP1 SLJIT_R0
555 #ifdef SHIFT_REG_IS_R3
556 #define TMP2 SLJIT_R3
557 #define TMP3 SLJIT_R2
558 #else
559 #define TMP2 SLJIT_R2
560 #define TMP3 SLJIT_R3
561 #endif
562 #define STR_PTR SLJIT_R1
563 #define STR_END SLJIT_S0
564 #define STACK_TOP SLJIT_S1
565 #define STACK_LIMIT SLJIT_S2
566 #define COUNT_MATCH SLJIT_S3
567 #define ARGUMENTS SLJIT_S4
568 #define RETURN_ADDR SLJIT_R4
569
570 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
571 #define HAS_VIRTUAL_REGISTERS 1
572 #else
573 #define HAS_VIRTUAL_REGISTERS 0
574 #endif
575
576 /* Local space layout. */
577 /* These two locals can be used by the current opcode. */
578 #define LOCALS0 (0 * sizeof(sljit_sw))
579 #define LOCALS1 (1 * sizeof(sljit_sw))
580 /* Two local variables for possessive quantifiers (char1 cannot use them). */
581 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
582 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
583 /* Max limit of recursions. */
584 #define LIMIT_MATCH (4 * sizeof(sljit_sw))
585 /* The output vector is stored on the stack, and contains pointers
586 to characters. The vector data is divided into two groups: the first
587 group contains the start / end character pointers, and the second is
588 the start pointers when the end of the capturing group has not yet reached. */
589 #define OVECTOR_START (common->ovector_start)
590 #define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
591 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
592 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
593
594 #if PCRE2_CODE_UNIT_WIDTH == 8
595 #define MOV_UCHAR SLJIT_MOV_U8
596 #define IN_UCHARS(x) (x)
597 #elif PCRE2_CODE_UNIT_WIDTH == 16
598 #define MOV_UCHAR SLJIT_MOV_U16
599 #define UCHAR_SHIFT (1)
600 #define IN_UCHARS(x) ((x) * 2)
601 #elif PCRE2_CODE_UNIT_WIDTH == 32
602 #define MOV_UCHAR SLJIT_MOV_U32
603 #define UCHAR_SHIFT (2)
604 #define IN_UCHARS(x) ((x) * 4)
605 #else
606 #error Unsupported compiling mode
607 #endif
608
609 /* Shortcuts. */
610 #define DEFINE_COMPILER \
611 struct sljit_compiler *compiler = common->compiler
612 #define OP1(op, dst, dstw, src, srcw) \
613 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
614 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
615 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
616 #define OP_SRC(op, src, srcw) \
617 sljit_emit_op_src(compiler, (op), (src), (srcw))
618 #define LABEL() \
619 sljit_emit_label(compiler)
620 #define JUMP(type) \
621 sljit_emit_jump(compiler, (type))
622 #define JUMPTO(type, label) \
623 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
624 #define JUMPHERE(jump) \
625 sljit_set_label((jump), sljit_emit_label(compiler))
626 #define SET_LABEL(jump, label) \
627 sljit_set_label((jump), (label))
628 #define CMP(type, src1, src1w, src2, src2w) \
629 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
630 #define CMPTO(type, src1, src1w, src2, src2w, label) \
631 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
632 #define OP_FLAGS(op, dst, dstw, type) \
633 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (type))
634 #define CMOV(type, dst_reg, src, srcw) \
635 sljit_emit_cmov(compiler, (type), (dst_reg), (src), (srcw))
636 #define GET_LOCAL_BASE(dst, dstw, offset) \
637 sljit_get_local_base(compiler, (dst), (dstw), (offset))
638
639 #define READ_CHAR_MAX 0x7fffffff
640
641 #define INVALID_UTF_CHAR -1
642 #define UNASSIGNED_UTF_CHAR 888
643
644 #if defined SUPPORT_UNICODE
645 #if PCRE2_CODE_UNIT_WIDTH == 8
646
647 #define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
648 { \
649 if (ptr[0] <= 0x7f) \
650 c = *ptr++; \
651 else if (ptr + 1 < end && ptr[1] >= 0x80 && ptr[1] < 0xc0) \
652 { \
653 c = ptr[1] - 0x80; \
654 \
655 if (ptr[0] >= 0xc2 && ptr[0] <= 0xdf) \
656 { \
657 c |= (ptr[0] - 0xc0) << 6; \
658 ptr += 2; \
659 } \
660 else if (ptr + 2 < end && ptr[2] >= 0x80 && ptr[2] < 0xc0) \
661 { \
662 c = c << 6 | (ptr[2] - 0x80); \
663 \
664 if (ptr[0] >= 0xe0 && ptr[0] <= 0xef) \
665 { \
666 c |= (ptr[0] - 0xe0) << 12; \
667 ptr += 3; \
668 \
669 if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \
670 { \
671 invalid_action; \
672 } \
673 } \
674 else if (ptr + 3 < end && ptr[3] >= 0x80 && ptr[3] < 0xc0) \
675 { \
676 c = c << 6 | (ptr[3] - 0x80); \
677 \
678 if (ptr[0] >= 0xf0 && ptr[0] <= 0xf4) \
679 { \
680 c |= (ptr[0] - 0xf0) << 18; \
681 ptr += 4; \
682 \
683 if (c >= 0x110000 || c < 0x10000) \
684 { \
685 invalid_action; \
686 } \
687 } \
688 else \
689 { \
690 invalid_action; \
691 } \
692 } \
693 else \
694 { \
695 invalid_action; \
696 } \
697 } \
698 else \
699 { \
700 invalid_action; \
701 } \
702 } \
703 else \
704 { \
705 invalid_action; \
706 } \
707 }
708
709 #define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
710 { \
711 c = ptr[-1]; \
712 if (c <= 0x7f) \
713 ptr--; \
714 else if (ptr - 1 > start && ptr[-1] >= 0x80 && ptr[-1] < 0xc0) \
715 { \
716 c -= 0x80; \
717 \
718 if (ptr[-2] >= 0xc2 && ptr[-2] <= 0xdf) \
719 { \
720 c |= (ptr[-2] - 0xc0) << 6; \
721 ptr -= 2; \
722 } \
723 else if (ptr - 2 > start && ptr[-2] >= 0x80 && ptr[-2] < 0xc0) \
724 { \
725 c = c << 6 | (ptr[-2] - 0x80); \
726 \
727 if (ptr[-3] >= 0xe0 && ptr[-3] <= 0xef) \
728 { \
729 c |= (ptr[-3] - 0xe0) << 12; \
730 ptr -= 3; \
731 \
732 if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \
733 { \
734 invalid_action; \
735 } \
736 } \
737 else if (ptr - 3 > start && ptr[-3] >= 0x80 && ptr[-3] < 0xc0) \
738 { \
739 c = c << 6 | (ptr[-3] - 0x80); \
740 \
741 if (ptr[-4] >= 0xf0 && ptr[-4] <= 0xf4) \
742 { \
743 c |= (ptr[-4] - 0xf0) << 18; \
744 ptr -= 4; \
745 \
746 if (c >= 0x110000 || c < 0x10000) \
747 { \
748 invalid_action; \
749 } \
750 } \
751 else \
752 { \
753 invalid_action; \
754 } \
755 } \
756 else \
757 { \
758 invalid_action; \
759 } \
760 } \
761 else \
762 { \
763 invalid_action; \
764 } \
765 } \
766 else \
767 { \
768 invalid_action; \
769 } \
770 }
771
772 #elif PCRE2_CODE_UNIT_WIDTH == 16
773
774 #define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
775 { \
776 if (ptr[0] < 0xd800 || ptr[0] >= 0xe000) \
777 c = *ptr++; \
778 else if (ptr[0] < 0xdc00 && ptr + 1 < end && ptr[1] >= 0xdc00 && ptr[1] < 0xe000) \
779 { \
780 c = (((ptr[0] - 0xd800) << 10) | (ptr[1] - 0xdc00)) + 0x10000; \
781 ptr += 2; \
782 } \
783 else \
784 { \
785 invalid_action; \
786 } \
787 }
788
789 #define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
790 { \
791 c = ptr[-1]; \
792 if (c < 0xd800 || c >= 0xe000) \
793 ptr--; \
794 else if (c >= 0xdc00 && ptr - 1 > start && ptr[-2] >= 0xd800 && ptr[-2] < 0xdc00) \
795 { \
796 c = (((ptr[-2] - 0xd800) << 10) | (c - 0xdc00)) + 0x10000; \
797 ptr -= 2; \
798 } \
799 else \
800 { \
801 invalid_action; \
802 } \
803 }
804
805
806 #elif PCRE2_CODE_UNIT_WIDTH == 32
807
808 #define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
809 { \
810 if (ptr[0] < 0xd800 || (ptr[0] >= 0xe000 && ptr[0] < 0x110000)) \
811 c = *ptr++; \
812 else \
813 { \
814 invalid_action; \
815 } \
816 }
817
818 #define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
819 { \
820 c = ptr[-1]; \
821 if (ptr[-1] < 0xd800 || (ptr[-1] >= 0xe000 && ptr[-1] < 0x110000)) \
822 ptr--; \
823 else \
824 { \
825 invalid_action; \
826 } \
827 }
828
829 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
830 #endif /* SUPPORT_UNICODE */
831
bracketend(PCRE2_SPTR cc)832 static PCRE2_SPTR bracketend(PCRE2_SPTR cc)
833 {
834 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
835 do cc += GET(cc, 1); while (*cc == OP_ALT);
836 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
837 cc += 1 + LINK_SIZE;
838 return cc;
839 }
840
no_alternatives(PCRE2_SPTR cc)841 static int no_alternatives(PCRE2_SPTR cc)
842 {
843 int count = 0;
844 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
845 do
846 {
847 cc += GET(cc, 1);
848 count++;
849 }
850 while (*cc == OP_ALT);
851 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
852 return count;
853 }
854
855 /* Functions whose might need modification for all new supported opcodes:
856 next_opcode
857 check_opcode_types
858 set_private_data_ptrs
859 get_framesize
860 init_frame
861 get_recurse_data_length
862 copy_recurse_data
863 compile_matchingpath
864 compile_backtrackingpath
865 */
866
next_opcode(compiler_common * common,PCRE2_SPTR cc)867 static PCRE2_SPTR next_opcode(compiler_common *common, PCRE2_SPTR cc)
868 {
869 SLJIT_UNUSED_ARG(common);
870 switch(*cc)
871 {
872 case OP_SOD:
873 case OP_SOM:
874 case OP_SET_SOM:
875 case OP_NOT_WORD_BOUNDARY:
876 case OP_WORD_BOUNDARY:
877 case OP_NOT_DIGIT:
878 case OP_DIGIT:
879 case OP_NOT_WHITESPACE:
880 case OP_WHITESPACE:
881 case OP_NOT_WORDCHAR:
882 case OP_WORDCHAR:
883 case OP_ANY:
884 case OP_ALLANY:
885 case OP_NOTPROP:
886 case OP_PROP:
887 case OP_ANYNL:
888 case OP_NOT_HSPACE:
889 case OP_HSPACE:
890 case OP_NOT_VSPACE:
891 case OP_VSPACE:
892 case OP_EXTUNI:
893 case OP_EODN:
894 case OP_EOD:
895 case OP_CIRC:
896 case OP_CIRCM:
897 case OP_DOLL:
898 case OP_DOLLM:
899 case OP_CRSTAR:
900 case OP_CRMINSTAR:
901 case OP_CRPLUS:
902 case OP_CRMINPLUS:
903 case OP_CRQUERY:
904 case OP_CRMINQUERY:
905 case OP_CRRANGE:
906 case OP_CRMINRANGE:
907 case OP_CRPOSSTAR:
908 case OP_CRPOSPLUS:
909 case OP_CRPOSQUERY:
910 case OP_CRPOSRANGE:
911 case OP_CLASS:
912 case OP_NCLASS:
913 case OP_REF:
914 case OP_REFI:
915 case OP_DNREF:
916 case OP_DNREFI:
917 case OP_RECURSE:
918 case OP_CALLOUT:
919 case OP_ALT:
920 case OP_KET:
921 case OP_KETRMAX:
922 case OP_KETRMIN:
923 case OP_KETRPOS:
924 case OP_REVERSE:
925 case OP_ASSERT:
926 case OP_ASSERT_NOT:
927 case OP_ASSERTBACK:
928 case OP_ASSERTBACK_NOT:
929 case OP_ASSERT_NA:
930 case OP_ASSERTBACK_NA:
931 case OP_ONCE:
932 case OP_SCRIPT_RUN:
933 case OP_BRA:
934 case OP_BRAPOS:
935 case OP_CBRA:
936 case OP_CBRAPOS:
937 case OP_COND:
938 case OP_SBRA:
939 case OP_SBRAPOS:
940 case OP_SCBRA:
941 case OP_SCBRAPOS:
942 case OP_SCOND:
943 case OP_CREF:
944 case OP_DNCREF:
945 case OP_RREF:
946 case OP_DNRREF:
947 case OP_FALSE:
948 case OP_TRUE:
949 case OP_BRAZERO:
950 case OP_BRAMINZERO:
951 case OP_BRAPOSZERO:
952 case OP_PRUNE:
953 case OP_SKIP:
954 case OP_THEN:
955 case OP_COMMIT:
956 case OP_FAIL:
957 case OP_ACCEPT:
958 case OP_ASSERT_ACCEPT:
959 case OP_CLOSE:
960 case OP_SKIPZERO:
961 return cc + PRIV(OP_lengths)[*cc];
962
963 case OP_CHAR:
964 case OP_CHARI:
965 case OP_NOT:
966 case OP_NOTI:
967 case OP_STAR:
968 case OP_MINSTAR:
969 case OP_PLUS:
970 case OP_MINPLUS:
971 case OP_QUERY:
972 case OP_MINQUERY:
973 case OP_UPTO:
974 case OP_MINUPTO:
975 case OP_EXACT:
976 case OP_POSSTAR:
977 case OP_POSPLUS:
978 case OP_POSQUERY:
979 case OP_POSUPTO:
980 case OP_STARI:
981 case OP_MINSTARI:
982 case OP_PLUSI:
983 case OP_MINPLUSI:
984 case OP_QUERYI:
985 case OP_MINQUERYI:
986 case OP_UPTOI:
987 case OP_MINUPTOI:
988 case OP_EXACTI:
989 case OP_POSSTARI:
990 case OP_POSPLUSI:
991 case OP_POSQUERYI:
992 case OP_POSUPTOI:
993 case OP_NOTSTAR:
994 case OP_NOTMINSTAR:
995 case OP_NOTPLUS:
996 case OP_NOTMINPLUS:
997 case OP_NOTQUERY:
998 case OP_NOTMINQUERY:
999 case OP_NOTUPTO:
1000 case OP_NOTMINUPTO:
1001 case OP_NOTEXACT:
1002 case OP_NOTPOSSTAR:
1003 case OP_NOTPOSPLUS:
1004 case OP_NOTPOSQUERY:
1005 case OP_NOTPOSUPTO:
1006 case OP_NOTSTARI:
1007 case OP_NOTMINSTARI:
1008 case OP_NOTPLUSI:
1009 case OP_NOTMINPLUSI:
1010 case OP_NOTQUERYI:
1011 case OP_NOTMINQUERYI:
1012 case OP_NOTUPTOI:
1013 case OP_NOTMINUPTOI:
1014 case OP_NOTEXACTI:
1015 case OP_NOTPOSSTARI:
1016 case OP_NOTPOSPLUSI:
1017 case OP_NOTPOSQUERYI:
1018 case OP_NOTPOSUPTOI:
1019 cc += PRIV(OP_lengths)[*cc];
1020 #ifdef SUPPORT_UNICODE
1021 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1022 #endif
1023 return cc;
1024
1025 /* Special cases. */
1026 case OP_TYPESTAR:
1027 case OP_TYPEMINSTAR:
1028 case OP_TYPEPLUS:
1029 case OP_TYPEMINPLUS:
1030 case OP_TYPEQUERY:
1031 case OP_TYPEMINQUERY:
1032 case OP_TYPEUPTO:
1033 case OP_TYPEMINUPTO:
1034 case OP_TYPEEXACT:
1035 case OP_TYPEPOSSTAR:
1036 case OP_TYPEPOSPLUS:
1037 case OP_TYPEPOSQUERY:
1038 case OP_TYPEPOSUPTO:
1039 return cc + PRIV(OP_lengths)[*cc] - 1;
1040
1041 case OP_ANYBYTE:
1042 #ifdef SUPPORT_UNICODE
1043 if (common->utf) return NULL;
1044 #endif
1045 return cc + 1;
1046
1047 case OP_CALLOUT_STR:
1048 return cc + GET(cc, 1 + 2*LINK_SIZE);
1049
1050 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1051 case OP_XCLASS:
1052 return cc + GET(cc, 1);
1053 #endif
1054
1055 case OP_MARK:
1056 case OP_COMMIT_ARG:
1057 case OP_PRUNE_ARG:
1058 case OP_SKIP_ARG:
1059 case OP_THEN_ARG:
1060 return cc + 1 + 2 + cc[1];
1061
1062 default:
1063 SLJIT_UNREACHABLE();
1064 return NULL;
1065 }
1066 }
1067
check_opcode_types(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend)1068 static BOOL check_opcode_types(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend)
1069 {
1070 int count;
1071 PCRE2_SPTR slot;
1072 PCRE2_SPTR assert_back_end = cc - 1;
1073 PCRE2_SPTR assert_na_end = cc - 1;
1074
1075 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
1076 while (cc < ccend)
1077 {
1078 switch(*cc)
1079 {
1080 case OP_SET_SOM:
1081 common->has_set_som = TRUE;
1082 common->might_be_empty = TRUE;
1083 cc += 1;
1084 break;
1085
1086 case OP_REFI:
1087 #ifdef SUPPORT_UNICODE
1088 if (common->iref_ptr == 0)
1089 {
1090 common->iref_ptr = common->ovector_start;
1091 common->ovector_start += 3 * sizeof(sljit_sw);
1092 }
1093 #endif /* SUPPORT_UNICODE */
1094 /* Fall through. */
1095 case OP_REF:
1096 common->optimized_cbracket[GET2(cc, 1)] = 0;
1097 cc += 1 + IMM2_SIZE;
1098 break;
1099
1100 case OP_ASSERT_NA:
1101 case OP_ASSERTBACK_NA:
1102 slot = bracketend(cc);
1103 if (slot > assert_na_end)
1104 assert_na_end = slot;
1105 cc += 1 + LINK_SIZE;
1106 break;
1107
1108 case OP_CBRAPOS:
1109 case OP_SCBRAPOS:
1110 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
1111 cc += 1 + LINK_SIZE + IMM2_SIZE;
1112 break;
1113
1114 case OP_COND:
1115 case OP_SCOND:
1116 /* Only AUTO_CALLOUT can insert this opcode. We do
1117 not intend to support this case. */
1118 if (cc[1 + LINK_SIZE] == OP_CALLOUT || cc[1 + LINK_SIZE] == OP_CALLOUT_STR)
1119 return FALSE;
1120 cc += 1 + LINK_SIZE;
1121 break;
1122
1123 case OP_CREF:
1124 common->optimized_cbracket[GET2(cc, 1)] = 0;
1125 cc += 1 + IMM2_SIZE;
1126 break;
1127
1128 case OP_DNREF:
1129 case OP_DNREFI:
1130 case OP_DNCREF:
1131 count = GET2(cc, 1 + IMM2_SIZE);
1132 slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
1133 while (count-- > 0)
1134 {
1135 common->optimized_cbracket[GET2(slot, 0)] = 0;
1136 slot += common->name_entry_size;
1137 }
1138 cc += 1 + 2 * IMM2_SIZE;
1139 break;
1140
1141 case OP_RECURSE:
1142 /* Set its value only once. */
1143 if (common->recursive_head_ptr == 0)
1144 {
1145 common->recursive_head_ptr = common->ovector_start;
1146 common->ovector_start += sizeof(sljit_sw);
1147 }
1148 cc += 1 + LINK_SIZE;
1149 break;
1150
1151 case OP_CALLOUT:
1152 case OP_CALLOUT_STR:
1153 if (common->capture_last_ptr == 0)
1154 {
1155 common->capture_last_ptr = common->ovector_start;
1156 common->ovector_start += sizeof(sljit_sw);
1157 }
1158 cc += (*cc == OP_CALLOUT) ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2*LINK_SIZE);
1159 break;
1160
1161 case OP_ASSERTBACK:
1162 slot = bracketend(cc);
1163 if (slot > assert_back_end)
1164 assert_back_end = slot;
1165 cc += 1 + LINK_SIZE;
1166 break;
1167
1168 case OP_THEN_ARG:
1169 common->has_then = TRUE;
1170 common->control_head_ptr = 1;
1171 /* Fall through. */
1172
1173 case OP_COMMIT_ARG:
1174 case OP_PRUNE_ARG:
1175 if (cc < assert_na_end)
1176 return FALSE;
1177 /* Fall through */
1178 case OP_MARK:
1179 if (common->mark_ptr == 0)
1180 {
1181 common->mark_ptr = common->ovector_start;
1182 common->ovector_start += sizeof(sljit_sw);
1183 }
1184 cc += 1 + 2 + cc[1];
1185 break;
1186
1187 case OP_THEN:
1188 common->has_then = TRUE;
1189 common->control_head_ptr = 1;
1190 cc += 1;
1191 break;
1192
1193 case OP_SKIP:
1194 if (cc < assert_back_end)
1195 common->has_skip_in_assert_back = TRUE;
1196 if (cc < assert_na_end)
1197 return FALSE;
1198 cc += 1;
1199 break;
1200
1201 case OP_SKIP_ARG:
1202 common->control_head_ptr = 1;
1203 common->has_skip_arg = TRUE;
1204 if (cc < assert_back_end)
1205 common->has_skip_in_assert_back = TRUE;
1206 if (cc < assert_na_end)
1207 return FALSE;
1208 cc += 1 + 2 + cc[1];
1209 break;
1210
1211 case OP_PRUNE:
1212 case OP_COMMIT:
1213 case OP_ASSERT_ACCEPT:
1214 if (cc < assert_na_end)
1215 return FALSE;
1216 cc++;
1217 break;
1218
1219 default:
1220 cc = next_opcode(common, cc);
1221 if (cc == NULL)
1222 return FALSE;
1223 break;
1224 }
1225 }
1226 return TRUE;
1227 }
1228
1229 #define EARLY_FAIL_ENHANCE_MAX (1 + 1)
1230
1231 /*
1232 start:
1233 0 - skip / early fail allowed
1234 1 - only early fail with range allowed
1235 >1 - (start - 1) early fail is processed
1236
1237 return: current number of iterators enhanced with fast fail
1238 */
detect_early_fail(compiler_common * common,PCRE2_SPTR cc,int * private_data_start,sljit_s32 depth,int start)1239 static int detect_early_fail(compiler_common *common, PCRE2_SPTR cc, int *private_data_start, sljit_s32 depth, int start)
1240 {
1241 PCRE2_SPTR next_alt;
1242 PCRE2_SPTR end;
1243 PCRE2_SPTR accelerated_start;
1244 int result = 0;
1245 int count;
1246 BOOL fast_forward_allowed = TRUE;
1247
1248 SLJIT_ASSERT(*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA);
1249 SLJIT_ASSERT(*cc != OP_CBRA || common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] != 0);
1250 SLJIT_ASSERT(start < EARLY_FAIL_ENHANCE_MAX);
1251
1252 do
1253 {
1254 count = start;
1255 next_alt = cc + GET(cc, 1);
1256 cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0);
1257
1258 while (TRUE)
1259 {
1260 accelerated_start = NULL;
1261
1262 switch(*cc)
1263 {
1264 case OP_SOD:
1265 case OP_SOM:
1266 case OP_SET_SOM:
1267 case OP_NOT_WORD_BOUNDARY:
1268 case OP_WORD_BOUNDARY:
1269 case OP_EODN:
1270 case OP_EOD:
1271 case OP_CIRC:
1272 case OP_CIRCM:
1273 case OP_DOLL:
1274 case OP_DOLLM:
1275 /* Zero width assertions. */
1276 cc++;
1277 continue;
1278
1279 case OP_NOT_DIGIT:
1280 case OP_DIGIT:
1281 case OP_NOT_WHITESPACE:
1282 case OP_WHITESPACE:
1283 case OP_NOT_WORDCHAR:
1284 case OP_WORDCHAR:
1285 case OP_ANY:
1286 case OP_ALLANY:
1287 case OP_ANYBYTE:
1288 case OP_NOT_HSPACE:
1289 case OP_HSPACE:
1290 case OP_NOT_VSPACE:
1291 case OP_VSPACE:
1292 fast_forward_allowed = FALSE;
1293 cc++;
1294 continue;
1295
1296 case OP_ANYNL:
1297 case OP_EXTUNI:
1298 fast_forward_allowed = FALSE;
1299 if (count == 0)
1300 count = 1;
1301 cc++;
1302 continue;
1303
1304 case OP_NOTPROP:
1305 case OP_PROP:
1306 fast_forward_allowed = FALSE;
1307 cc += 1 + 2;
1308 continue;
1309
1310 case OP_CHAR:
1311 case OP_CHARI:
1312 case OP_NOT:
1313 case OP_NOTI:
1314 fast_forward_allowed = FALSE;
1315 cc += 2;
1316 #ifdef SUPPORT_UNICODE
1317 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1318 #endif
1319 continue;
1320
1321 case OP_TYPESTAR:
1322 case OP_TYPEMINSTAR:
1323 case OP_TYPEPLUS:
1324 case OP_TYPEMINPLUS:
1325 case OP_TYPEPOSSTAR:
1326 case OP_TYPEPOSPLUS:
1327 /* The type or prop opcode is skipped in the next iteration. */
1328 cc += 1;
1329
1330 if (cc[0] != OP_ANYNL && cc[0] != OP_EXTUNI)
1331 {
1332 accelerated_start = cc - 1;
1333 break;
1334 }
1335
1336 if (count == 0)
1337 count = 1;
1338 fast_forward_allowed = FALSE;
1339 continue;
1340
1341 case OP_TYPEUPTO:
1342 case OP_TYPEMINUPTO:
1343 case OP_TYPEEXACT:
1344 case OP_TYPEPOSUPTO:
1345 cc += IMM2_SIZE;
1346 /* Fall through */
1347
1348 case OP_TYPEQUERY:
1349 case OP_TYPEMINQUERY:
1350 case OP_TYPEPOSQUERY:
1351 /* The type or prop opcode is skipped in the next iteration. */
1352 fast_forward_allowed = FALSE;
1353 if (count == 0)
1354 count = 1;
1355 cc += 1;
1356 continue;
1357
1358 case OP_STAR:
1359 case OP_MINSTAR:
1360 case OP_PLUS:
1361 case OP_MINPLUS:
1362 case OP_POSSTAR:
1363 case OP_POSPLUS:
1364
1365 case OP_STARI:
1366 case OP_MINSTARI:
1367 case OP_PLUSI:
1368 case OP_MINPLUSI:
1369 case OP_POSSTARI:
1370 case OP_POSPLUSI:
1371
1372 case OP_NOTSTAR:
1373 case OP_NOTMINSTAR:
1374 case OP_NOTPLUS:
1375 case OP_NOTMINPLUS:
1376 case OP_NOTPOSSTAR:
1377 case OP_NOTPOSPLUS:
1378
1379 case OP_NOTSTARI:
1380 case OP_NOTMINSTARI:
1381 case OP_NOTPLUSI:
1382 case OP_NOTMINPLUSI:
1383 case OP_NOTPOSSTARI:
1384 case OP_NOTPOSPLUSI:
1385 accelerated_start = cc;
1386 cc += 2;
1387 #ifdef SUPPORT_UNICODE
1388 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1389 #endif
1390 break;
1391
1392 case OP_UPTO:
1393 case OP_MINUPTO:
1394 case OP_EXACT:
1395 case OP_POSUPTO:
1396 case OP_UPTOI:
1397 case OP_MINUPTOI:
1398 case OP_EXACTI:
1399 case OP_POSUPTOI:
1400 case OP_NOTUPTO:
1401 case OP_NOTMINUPTO:
1402 case OP_NOTEXACT:
1403 case OP_NOTPOSUPTO:
1404 case OP_NOTUPTOI:
1405 case OP_NOTMINUPTOI:
1406 case OP_NOTEXACTI:
1407 case OP_NOTPOSUPTOI:
1408 cc += IMM2_SIZE;
1409 /* Fall through */
1410
1411 case OP_QUERY:
1412 case OP_MINQUERY:
1413 case OP_POSQUERY:
1414 case OP_QUERYI:
1415 case OP_MINQUERYI:
1416 case OP_POSQUERYI:
1417 case OP_NOTQUERY:
1418 case OP_NOTMINQUERY:
1419 case OP_NOTPOSQUERY:
1420 case OP_NOTQUERYI:
1421 case OP_NOTMINQUERYI:
1422 case OP_NOTPOSQUERYI:
1423 fast_forward_allowed = FALSE;
1424 if (count == 0)
1425 count = 1;
1426 cc += 2;
1427 #ifdef SUPPORT_UNICODE
1428 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1429 #endif
1430 continue;
1431
1432 case OP_CLASS:
1433 case OP_NCLASS:
1434 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1435 case OP_XCLASS:
1436 accelerated_start = cc;
1437 cc += ((*cc == OP_XCLASS) ? GET(cc, 1) : (unsigned int)(1 + (32 / sizeof(PCRE2_UCHAR))));
1438 #else
1439 accelerated_start = cc;
1440 cc += (1 + (32 / sizeof(PCRE2_UCHAR)));
1441 #endif
1442
1443 switch (*cc)
1444 {
1445 case OP_CRSTAR:
1446 case OP_CRMINSTAR:
1447 case OP_CRPLUS:
1448 case OP_CRMINPLUS:
1449 case OP_CRPOSSTAR:
1450 case OP_CRPOSPLUS:
1451 cc++;
1452 break;
1453
1454 case OP_CRRANGE:
1455 case OP_CRMINRANGE:
1456 case OP_CRPOSRANGE:
1457 cc += 2 * IMM2_SIZE;
1458 /* Fall through */
1459 case OP_CRQUERY:
1460 case OP_CRMINQUERY:
1461 case OP_CRPOSQUERY:
1462 cc++;
1463 if (count == 0)
1464 count = 1;
1465 /* Fall through */
1466 default:
1467 accelerated_start = NULL;
1468 fast_forward_allowed = FALSE;
1469 continue;
1470 }
1471 break;
1472
1473 case OP_ONCE:
1474 case OP_BRA:
1475 case OP_CBRA:
1476 end = cc + GET(cc, 1);
1477
1478 if (*end == OP_KET && PRIVATE_DATA(end) == 0)
1479 {
1480 if (*cc == OP_CBRA)
1481 {
1482 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1483 break;
1484 cc += IMM2_SIZE;
1485 }
1486
1487 cc += 1 + LINK_SIZE;
1488 continue;
1489 }
1490
1491 fast_forward_allowed = FALSE;
1492 if (depth >= 4)
1493 break;
1494
1495 end = bracketend(cc) - (1 + LINK_SIZE);
1496 if (*end != OP_KET || PRIVATE_DATA(end) != 0)
1497 break;
1498
1499 if (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1500 break;
1501
1502 count = detect_early_fail(common, cc, private_data_start, depth + 1, count);
1503 if (count < EARLY_FAIL_ENHANCE_MAX)
1504 {
1505 cc = end + (1 + LINK_SIZE);
1506 continue;
1507 }
1508 break;
1509
1510 case OP_KET:
1511 SLJIT_ASSERT(PRIVATE_DATA(cc) == 0);
1512 if (cc >= next_alt)
1513 break;
1514 cc += 1 + LINK_SIZE;
1515 continue;
1516 }
1517
1518 if (accelerated_start != NULL)
1519 {
1520 if (count == 0)
1521 {
1522 count++;
1523
1524 if (fast_forward_allowed && *next_alt == OP_KET)
1525 {
1526 common->fast_forward_bc_ptr = accelerated_start;
1527 common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_skip;
1528 *private_data_start += sizeof(sljit_sw);
1529 }
1530 else
1531 {
1532 common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail;
1533
1534 if (common->early_fail_start_ptr == 0)
1535 common->early_fail_start_ptr = *private_data_start;
1536
1537 *private_data_start += sizeof(sljit_sw);
1538 common->early_fail_end_ptr = *private_data_start;
1539
1540 if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1541 return EARLY_FAIL_ENHANCE_MAX;
1542 }
1543 }
1544 else
1545 {
1546 common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail_range;
1547
1548 if (common->early_fail_start_ptr == 0)
1549 common->early_fail_start_ptr = *private_data_start;
1550
1551 *private_data_start += 2 * sizeof(sljit_sw);
1552 common->early_fail_end_ptr = *private_data_start;
1553
1554 if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1555 return EARLY_FAIL_ENHANCE_MAX;
1556 }
1557
1558 count++;
1559
1560 if (count < EARLY_FAIL_ENHANCE_MAX)
1561 continue;
1562 }
1563
1564 break;
1565 }
1566
1567 if (*cc != OP_ALT && *cc != OP_KET)
1568 result = EARLY_FAIL_ENHANCE_MAX;
1569 else if (result < count)
1570 result = count;
1571
1572 fast_forward_allowed = FALSE;
1573 cc = next_alt;
1574 }
1575 while (*cc == OP_ALT);
1576
1577 return result;
1578 }
1579
get_class_iterator_size(PCRE2_SPTR cc)1580 static int get_class_iterator_size(PCRE2_SPTR cc)
1581 {
1582 sljit_u32 min;
1583 sljit_u32 max;
1584 switch(*cc)
1585 {
1586 case OP_CRSTAR:
1587 case OP_CRPLUS:
1588 return 2;
1589
1590 case OP_CRMINSTAR:
1591 case OP_CRMINPLUS:
1592 case OP_CRQUERY:
1593 case OP_CRMINQUERY:
1594 return 1;
1595
1596 case OP_CRRANGE:
1597 case OP_CRMINRANGE:
1598 min = GET2(cc, 1);
1599 max = GET2(cc, 1 + IMM2_SIZE);
1600 if (max == 0)
1601 return (*cc == OP_CRRANGE) ? 2 : 1;
1602 max -= min;
1603 if (max > 2)
1604 max = 2;
1605 return max;
1606
1607 default:
1608 return 0;
1609 }
1610 }
1611
detect_repeat(compiler_common * common,PCRE2_SPTR begin)1612 static BOOL detect_repeat(compiler_common *common, PCRE2_SPTR begin)
1613 {
1614 PCRE2_SPTR end = bracketend(begin);
1615 PCRE2_SPTR next;
1616 PCRE2_SPTR next_end;
1617 PCRE2_SPTR max_end;
1618 PCRE2_UCHAR type;
1619 sljit_sw length = end - begin;
1620 sljit_s32 min, max, i;
1621
1622 /* Detect fixed iterations first. */
1623 if (end[-(1 + LINK_SIZE)] != OP_KET)
1624 return FALSE;
1625
1626 /* Already detected repeat. */
1627 if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
1628 return TRUE;
1629
1630 next = end;
1631 min = 1;
1632 while (1)
1633 {
1634 if (*next != *begin)
1635 break;
1636 next_end = bracketend(next);
1637 if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
1638 break;
1639 next = next_end;
1640 min++;
1641 }
1642
1643 if (min == 2)
1644 return FALSE;
1645
1646 max = 0;
1647 max_end = next;
1648 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
1649 {
1650 type = *next;
1651 while (1)
1652 {
1653 if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
1654 break;
1655 next_end = bracketend(next + 2 + LINK_SIZE);
1656 if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
1657 break;
1658 next = next_end;
1659 max++;
1660 }
1661
1662 if (next[0] == type && next[1] == *begin && max >= 1)
1663 {
1664 next_end = bracketend(next + 1);
1665 if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
1666 {
1667 for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
1668 if (*next_end != OP_KET)
1669 break;
1670
1671 if (i == max)
1672 {
1673 common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
1674 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
1675 /* +2 the original and the last. */
1676 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
1677 if (min == 1)
1678 return TRUE;
1679 min--;
1680 max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
1681 }
1682 }
1683 }
1684 }
1685
1686 if (min >= 3)
1687 {
1688 common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1689 common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1690 common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1691 return TRUE;
1692 }
1693
1694 return FALSE;
1695 }
1696
1697 #define CASE_ITERATOR_PRIVATE_DATA_1 \
1698 case OP_MINSTAR: \
1699 case OP_MINPLUS: \
1700 case OP_QUERY: \
1701 case OP_MINQUERY: \
1702 case OP_MINSTARI: \
1703 case OP_MINPLUSI: \
1704 case OP_QUERYI: \
1705 case OP_MINQUERYI: \
1706 case OP_NOTMINSTAR: \
1707 case OP_NOTMINPLUS: \
1708 case OP_NOTQUERY: \
1709 case OP_NOTMINQUERY: \
1710 case OP_NOTMINSTARI: \
1711 case OP_NOTMINPLUSI: \
1712 case OP_NOTQUERYI: \
1713 case OP_NOTMINQUERYI:
1714
1715 #define CASE_ITERATOR_PRIVATE_DATA_2A \
1716 case OP_STAR: \
1717 case OP_PLUS: \
1718 case OP_STARI: \
1719 case OP_PLUSI: \
1720 case OP_NOTSTAR: \
1721 case OP_NOTPLUS: \
1722 case OP_NOTSTARI: \
1723 case OP_NOTPLUSI:
1724
1725 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1726 case OP_UPTO: \
1727 case OP_MINUPTO: \
1728 case OP_UPTOI: \
1729 case OP_MINUPTOI: \
1730 case OP_NOTUPTO: \
1731 case OP_NOTMINUPTO: \
1732 case OP_NOTUPTOI: \
1733 case OP_NOTMINUPTOI:
1734
1735 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1736 case OP_TYPEMINSTAR: \
1737 case OP_TYPEMINPLUS: \
1738 case OP_TYPEQUERY: \
1739 case OP_TYPEMINQUERY:
1740
1741 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1742 case OP_TYPESTAR: \
1743 case OP_TYPEPLUS:
1744
1745 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1746 case OP_TYPEUPTO: \
1747 case OP_TYPEMINUPTO:
1748
set_private_data_ptrs(compiler_common * common,int * private_data_start,PCRE2_SPTR ccend)1749 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, PCRE2_SPTR ccend)
1750 {
1751 PCRE2_SPTR cc = common->start;
1752 PCRE2_SPTR alternative;
1753 PCRE2_SPTR end = NULL;
1754 int private_data_ptr = *private_data_start;
1755 int space, size, bracketlen;
1756 BOOL repeat_check = TRUE;
1757
1758 while (cc < ccend)
1759 {
1760 space = 0;
1761 size = 0;
1762 bracketlen = 0;
1763 if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1764 break;
1765
1766 if (repeat_check && (*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
1767 {
1768 if (detect_repeat(common, cc))
1769 {
1770 /* These brackets are converted to repeats, so no global
1771 based single character repeat is allowed. */
1772 if (cc >= end)
1773 end = bracketend(cc);
1774 }
1775 }
1776 repeat_check = TRUE;
1777
1778 switch(*cc)
1779 {
1780 case OP_KET:
1781 if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1782 {
1783 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1784 private_data_ptr += sizeof(sljit_sw);
1785 cc += common->private_data_ptrs[cc + 1 - common->start];
1786 }
1787 cc += 1 + LINK_SIZE;
1788 break;
1789
1790 case OP_ASSERT:
1791 case OP_ASSERT_NOT:
1792 case OP_ASSERTBACK:
1793 case OP_ASSERTBACK_NOT:
1794 case OP_ASSERT_NA:
1795 case OP_ASSERTBACK_NA:
1796 case OP_ONCE:
1797 case OP_SCRIPT_RUN:
1798 case OP_BRAPOS:
1799 case OP_SBRA:
1800 case OP_SBRAPOS:
1801 case OP_SCOND:
1802 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1803 private_data_ptr += sizeof(sljit_sw);
1804 bracketlen = 1 + LINK_SIZE;
1805 break;
1806
1807 case OP_CBRAPOS:
1808 case OP_SCBRAPOS:
1809 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1810 private_data_ptr += sizeof(sljit_sw);
1811 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1812 break;
1813
1814 case OP_COND:
1815 /* Might be a hidden SCOND. */
1816 alternative = cc + GET(cc, 1);
1817 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1818 {
1819 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1820 private_data_ptr += sizeof(sljit_sw);
1821 }
1822 bracketlen = 1 + LINK_SIZE;
1823 break;
1824
1825 case OP_BRA:
1826 bracketlen = 1 + LINK_SIZE;
1827 break;
1828
1829 case OP_CBRA:
1830 case OP_SCBRA:
1831 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1832 break;
1833
1834 case OP_BRAZERO:
1835 case OP_BRAMINZERO:
1836 case OP_BRAPOSZERO:
1837 size = 1;
1838 repeat_check = FALSE;
1839 break;
1840
1841 CASE_ITERATOR_PRIVATE_DATA_1
1842 size = -2;
1843 space = 1;
1844 break;
1845
1846 CASE_ITERATOR_PRIVATE_DATA_2A
1847 size = -2;
1848 space = 2;
1849 break;
1850
1851 CASE_ITERATOR_PRIVATE_DATA_2B
1852 size = -(2 + IMM2_SIZE);
1853 space = 2;
1854 break;
1855
1856 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1857 size = 1;
1858 space = 1;
1859 break;
1860
1861 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1862 size = 1;
1863 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1864 space = 2;
1865 break;
1866
1867 case OP_TYPEUPTO:
1868 size = 1 + IMM2_SIZE;
1869 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1870 space = 2;
1871 break;
1872
1873 case OP_TYPEMINUPTO:
1874 size = 1 + IMM2_SIZE;
1875 space = 2;
1876 break;
1877
1878 case OP_CLASS:
1879 case OP_NCLASS:
1880 size = 1 + 32 / sizeof(PCRE2_UCHAR);
1881 space = get_class_iterator_size(cc + size);
1882 break;
1883
1884 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1885 case OP_XCLASS:
1886 size = GET(cc, 1);
1887 space = get_class_iterator_size(cc + size);
1888 break;
1889 #endif
1890
1891 default:
1892 cc = next_opcode(common, cc);
1893 SLJIT_ASSERT(cc != NULL);
1894 break;
1895 }
1896
1897 /* Character iterators, which are not inside a repeated bracket,
1898 gets a private slot instead of allocating it on the stack. */
1899 if (space > 0 && cc >= end)
1900 {
1901 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1902 private_data_ptr += sizeof(sljit_sw) * space;
1903 }
1904
1905 if (size != 0)
1906 {
1907 if (size < 0)
1908 {
1909 cc += -size;
1910 #ifdef SUPPORT_UNICODE
1911 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1912 #endif
1913 }
1914 else
1915 cc += size;
1916 }
1917
1918 if (bracketlen > 0)
1919 {
1920 if (cc >= end)
1921 {
1922 end = bracketend(cc);
1923 if (end[-1 - LINK_SIZE] == OP_KET)
1924 end = NULL;
1925 }
1926 cc += bracketlen;
1927 }
1928 }
1929 *private_data_start = private_data_ptr;
1930 }
1931
1932 /* Returns with a frame_types (always < 0) if no need for frame. */
get_framesize(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,BOOL recursive,BOOL * needs_control_head)1933 static int get_framesize(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, BOOL recursive, BOOL *needs_control_head)
1934 {
1935 int length = 0;
1936 int possessive = 0;
1937 BOOL stack_restore = FALSE;
1938 BOOL setsom_found = recursive;
1939 BOOL setmark_found = recursive;
1940 /* The last capture is a local variable even for recursions. */
1941 BOOL capture_last_found = FALSE;
1942
1943 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1944 SLJIT_ASSERT(common->control_head_ptr != 0);
1945 *needs_control_head = TRUE;
1946 #else
1947 *needs_control_head = FALSE;
1948 #endif
1949
1950 if (ccend == NULL)
1951 {
1952 ccend = bracketend(cc) - (1 + LINK_SIZE);
1953 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1954 {
1955 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1956 /* This is correct regardless of common->capture_last_ptr. */
1957 capture_last_found = TRUE;
1958 }
1959 cc = next_opcode(common, cc);
1960 }
1961
1962 SLJIT_ASSERT(cc != NULL);
1963 while (cc < ccend)
1964 switch(*cc)
1965 {
1966 case OP_SET_SOM:
1967 SLJIT_ASSERT(common->has_set_som);
1968 stack_restore = TRUE;
1969 if (!setsom_found)
1970 {
1971 length += 2;
1972 setsom_found = TRUE;
1973 }
1974 cc += 1;
1975 break;
1976
1977 case OP_MARK:
1978 case OP_COMMIT_ARG:
1979 case OP_PRUNE_ARG:
1980 case OP_THEN_ARG:
1981 SLJIT_ASSERT(common->mark_ptr != 0);
1982 stack_restore = TRUE;
1983 if (!setmark_found)
1984 {
1985 length += 2;
1986 setmark_found = TRUE;
1987 }
1988 if (common->control_head_ptr != 0)
1989 *needs_control_head = TRUE;
1990 cc += 1 + 2 + cc[1];
1991 break;
1992
1993 case OP_RECURSE:
1994 stack_restore = TRUE;
1995 if (common->has_set_som && !setsom_found)
1996 {
1997 length += 2;
1998 setsom_found = TRUE;
1999 }
2000 if (common->mark_ptr != 0 && !setmark_found)
2001 {
2002 length += 2;
2003 setmark_found = TRUE;
2004 }
2005 if (common->capture_last_ptr != 0 && !capture_last_found)
2006 {
2007 length += 2;
2008 capture_last_found = TRUE;
2009 }
2010 cc += 1 + LINK_SIZE;
2011 break;
2012
2013 case OP_CBRA:
2014 case OP_CBRAPOS:
2015 case OP_SCBRA:
2016 case OP_SCBRAPOS:
2017 stack_restore = TRUE;
2018 if (common->capture_last_ptr != 0 && !capture_last_found)
2019 {
2020 length += 2;
2021 capture_last_found = TRUE;
2022 }
2023 length += 3;
2024 cc += 1 + LINK_SIZE + IMM2_SIZE;
2025 break;
2026
2027 case OP_THEN:
2028 stack_restore = TRUE;
2029 if (common->control_head_ptr != 0)
2030 *needs_control_head = TRUE;
2031 cc ++;
2032 break;
2033
2034 default:
2035 stack_restore = TRUE;
2036 /* Fall through. */
2037
2038 case OP_NOT_WORD_BOUNDARY:
2039 case OP_WORD_BOUNDARY:
2040 case OP_NOT_DIGIT:
2041 case OP_DIGIT:
2042 case OP_NOT_WHITESPACE:
2043 case OP_WHITESPACE:
2044 case OP_NOT_WORDCHAR:
2045 case OP_WORDCHAR:
2046 case OP_ANY:
2047 case OP_ALLANY:
2048 case OP_ANYBYTE:
2049 case OP_NOTPROP:
2050 case OP_PROP:
2051 case OP_ANYNL:
2052 case OP_NOT_HSPACE:
2053 case OP_HSPACE:
2054 case OP_NOT_VSPACE:
2055 case OP_VSPACE:
2056 case OP_EXTUNI:
2057 case OP_EODN:
2058 case OP_EOD:
2059 case OP_CIRC:
2060 case OP_CIRCM:
2061 case OP_DOLL:
2062 case OP_DOLLM:
2063 case OP_CHAR:
2064 case OP_CHARI:
2065 case OP_NOT:
2066 case OP_NOTI:
2067
2068 case OP_EXACT:
2069 case OP_POSSTAR:
2070 case OP_POSPLUS:
2071 case OP_POSQUERY:
2072 case OP_POSUPTO:
2073
2074 case OP_EXACTI:
2075 case OP_POSSTARI:
2076 case OP_POSPLUSI:
2077 case OP_POSQUERYI:
2078 case OP_POSUPTOI:
2079
2080 case OP_NOTEXACT:
2081 case OP_NOTPOSSTAR:
2082 case OP_NOTPOSPLUS:
2083 case OP_NOTPOSQUERY:
2084 case OP_NOTPOSUPTO:
2085
2086 case OP_NOTEXACTI:
2087 case OP_NOTPOSSTARI:
2088 case OP_NOTPOSPLUSI:
2089 case OP_NOTPOSQUERYI:
2090 case OP_NOTPOSUPTOI:
2091
2092 case OP_TYPEEXACT:
2093 case OP_TYPEPOSSTAR:
2094 case OP_TYPEPOSPLUS:
2095 case OP_TYPEPOSQUERY:
2096 case OP_TYPEPOSUPTO:
2097
2098 case OP_CLASS:
2099 case OP_NCLASS:
2100 case OP_XCLASS:
2101
2102 case OP_CALLOUT:
2103 case OP_CALLOUT_STR:
2104
2105 cc = next_opcode(common, cc);
2106 SLJIT_ASSERT(cc != NULL);
2107 break;
2108 }
2109
2110 /* Possessive quantifiers can use a special case. */
2111 if (SLJIT_UNLIKELY(possessive == length))
2112 return stack_restore ? no_frame : no_stack;
2113
2114 if (length > 0)
2115 return length + 1;
2116 return stack_restore ? no_frame : no_stack;
2117 }
2118
init_frame(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,int stackpos,int stacktop)2119 static void init_frame(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, int stackpos, int stacktop)
2120 {
2121 DEFINE_COMPILER;
2122 BOOL setsom_found = FALSE;
2123 BOOL setmark_found = FALSE;
2124 /* The last capture is a local variable even for recursions. */
2125 BOOL capture_last_found = FALSE;
2126 int offset;
2127
2128 /* >= 1 + shortest item size (2) */
2129 SLJIT_UNUSED_ARG(stacktop);
2130 SLJIT_ASSERT(stackpos >= stacktop + 2);
2131
2132 stackpos = STACK(stackpos);
2133 if (ccend == NULL)
2134 {
2135 ccend = bracketend(cc) - (1 + LINK_SIZE);
2136 if (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS)
2137 cc = next_opcode(common, cc);
2138 }
2139
2140 SLJIT_ASSERT(cc != NULL);
2141 while (cc < ccend)
2142 switch(*cc)
2143 {
2144 case OP_SET_SOM:
2145 SLJIT_ASSERT(common->has_set_som);
2146 if (!setsom_found)
2147 {
2148 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
2149 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
2150 stackpos -= (int)sizeof(sljit_sw);
2151 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2152 stackpos -= (int)sizeof(sljit_sw);
2153 setsom_found = TRUE;
2154 }
2155 cc += 1;
2156 break;
2157
2158 case OP_MARK:
2159 case OP_COMMIT_ARG:
2160 case OP_PRUNE_ARG:
2161 case OP_THEN_ARG:
2162 SLJIT_ASSERT(common->mark_ptr != 0);
2163 if (!setmark_found)
2164 {
2165 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2166 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
2167 stackpos -= (int)sizeof(sljit_sw);
2168 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2169 stackpos -= (int)sizeof(sljit_sw);
2170 setmark_found = TRUE;
2171 }
2172 cc += 1 + 2 + cc[1];
2173 break;
2174
2175 case OP_RECURSE:
2176 if (common->has_set_som && !setsom_found)
2177 {
2178 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
2179 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
2180 stackpos -= (int)sizeof(sljit_sw);
2181 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2182 stackpos -= (int)sizeof(sljit_sw);
2183 setsom_found = TRUE;
2184 }
2185 if (common->mark_ptr != 0 && !setmark_found)
2186 {
2187 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2188 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
2189 stackpos -= (int)sizeof(sljit_sw);
2190 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2191 stackpos -= (int)sizeof(sljit_sw);
2192 setmark_found = TRUE;
2193 }
2194 if (common->capture_last_ptr != 0 && !capture_last_found)
2195 {
2196 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
2197 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
2198 stackpos -= (int)sizeof(sljit_sw);
2199 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2200 stackpos -= (int)sizeof(sljit_sw);
2201 capture_last_found = TRUE;
2202 }
2203 cc += 1 + LINK_SIZE;
2204 break;
2205
2206 case OP_CBRA:
2207 case OP_CBRAPOS:
2208 case OP_SCBRA:
2209 case OP_SCBRAPOS:
2210 if (common->capture_last_ptr != 0 && !capture_last_found)
2211 {
2212 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
2213 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
2214 stackpos -= (int)sizeof(sljit_sw);
2215 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2216 stackpos -= (int)sizeof(sljit_sw);
2217 capture_last_found = TRUE;
2218 }
2219 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
2220 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
2221 stackpos -= (int)sizeof(sljit_sw);
2222 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
2223 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
2224 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2225 stackpos -= (int)sizeof(sljit_sw);
2226 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
2227 stackpos -= (int)sizeof(sljit_sw);
2228
2229 cc += 1 + LINK_SIZE + IMM2_SIZE;
2230 break;
2231
2232 default:
2233 cc = next_opcode(common, cc);
2234 SLJIT_ASSERT(cc != NULL);
2235 break;
2236 }
2237
2238 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
2239 SLJIT_ASSERT(stackpos == STACK(stacktop));
2240 }
2241
2242 #define RECURSE_TMP_REG_COUNT 3
2243
2244 typedef struct delayed_mem_copy_status {
2245 struct sljit_compiler *compiler;
2246 int store_bases[RECURSE_TMP_REG_COUNT];
2247 int store_offsets[RECURSE_TMP_REG_COUNT];
2248 int tmp_regs[RECURSE_TMP_REG_COUNT];
2249 int saved_tmp_regs[RECURSE_TMP_REG_COUNT];
2250 int next_tmp_reg;
2251 } delayed_mem_copy_status;
2252
delayed_mem_copy_init(delayed_mem_copy_status * status,compiler_common * common)2253 static void delayed_mem_copy_init(delayed_mem_copy_status *status, compiler_common *common)
2254 {
2255 int i;
2256
2257 for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
2258 {
2259 SLJIT_ASSERT(status->tmp_regs[i] >= 0);
2260 SLJIT_ASSERT(sljit_get_register_index(status->saved_tmp_regs[i]) < 0 || status->tmp_regs[i] == status->saved_tmp_regs[i]);
2261
2262 status->store_bases[i] = -1;
2263 }
2264 status->next_tmp_reg = 0;
2265 status->compiler = common->compiler;
2266 }
2267
delayed_mem_copy_move(delayed_mem_copy_status * status,int load_base,sljit_sw load_offset,int store_base,sljit_sw store_offset)2268 static void delayed_mem_copy_move(delayed_mem_copy_status *status, int load_base, sljit_sw load_offset,
2269 int store_base, sljit_sw store_offset)
2270 {
2271 struct sljit_compiler *compiler = status->compiler;
2272 int next_tmp_reg = status->next_tmp_reg;
2273 int tmp_reg = status->tmp_regs[next_tmp_reg];
2274
2275 SLJIT_ASSERT(load_base > 0 && store_base > 0);
2276
2277 if (status->store_bases[next_tmp_reg] == -1)
2278 {
2279 /* Preserve virtual registers. */
2280 if (sljit_get_register_index(status->saved_tmp_regs[next_tmp_reg]) < 0)
2281 OP1(SLJIT_MOV, status->saved_tmp_regs[next_tmp_reg], 0, tmp_reg, 0);
2282 }
2283 else
2284 OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
2285
2286 OP1(SLJIT_MOV, tmp_reg, 0, SLJIT_MEM1(load_base), load_offset);
2287 status->store_bases[next_tmp_reg] = store_base;
2288 status->store_offsets[next_tmp_reg] = store_offset;
2289
2290 status->next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
2291 }
2292
delayed_mem_copy_finish(delayed_mem_copy_status * status)2293 static void delayed_mem_copy_finish(delayed_mem_copy_status *status)
2294 {
2295 struct sljit_compiler *compiler = status->compiler;
2296 int next_tmp_reg = status->next_tmp_reg;
2297 int tmp_reg, saved_tmp_reg, i;
2298
2299 for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
2300 {
2301 if (status->store_bases[next_tmp_reg] != -1)
2302 {
2303 tmp_reg = status->tmp_regs[next_tmp_reg];
2304 saved_tmp_reg = status->saved_tmp_regs[next_tmp_reg];
2305
2306 OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
2307
2308 /* Restore virtual registers. */
2309 if (sljit_get_register_index(saved_tmp_reg) < 0)
2310 OP1(SLJIT_MOV, tmp_reg, 0, saved_tmp_reg, 0);
2311 }
2312
2313 next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
2314 }
2315 }
2316
2317 #undef RECURSE_TMP_REG_COUNT
2318
get_recurse_data_length(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,BOOL * needs_control_head,BOOL * has_quit,BOOL * has_accept)2319 static int get_recurse_data_length(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend,
2320 BOOL *needs_control_head, BOOL *has_quit, BOOL *has_accept)
2321 {
2322 int length = 1;
2323 int size;
2324 PCRE2_SPTR alternative;
2325 BOOL quit_found = FALSE;
2326 BOOL accept_found = FALSE;
2327 BOOL setsom_found = FALSE;
2328 BOOL setmark_found = FALSE;
2329 BOOL capture_last_found = FALSE;
2330 BOOL control_head_found = FALSE;
2331
2332 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2333 SLJIT_ASSERT(common->control_head_ptr != 0);
2334 control_head_found = TRUE;
2335 #endif
2336
2337 /* Calculate the sum of the private machine words. */
2338 while (cc < ccend)
2339 {
2340 size = 0;
2341 switch(*cc)
2342 {
2343 case OP_SET_SOM:
2344 SLJIT_ASSERT(common->has_set_som);
2345 setsom_found = TRUE;
2346 cc += 1;
2347 break;
2348
2349 case OP_RECURSE:
2350 if (common->has_set_som)
2351 setsom_found = TRUE;
2352 if (common->mark_ptr != 0)
2353 setmark_found = TRUE;
2354 if (common->capture_last_ptr != 0)
2355 capture_last_found = TRUE;
2356 cc += 1 + LINK_SIZE;
2357 break;
2358
2359 case OP_KET:
2360 if (PRIVATE_DATA(cc) != 0)
2361 {
2362 length++;
2363 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
2364 cc += PRIVATE_DATA(cc + 1);
2365 }
2366 cc += 1 + LINK_SIZE;
2367 break;
2368
2369 case OP_ASSERT:
2370 case OP_ASSERT_NOT:
2371 case OP_ASSERTBACK:
2372 case OP_ASSERTBACK_NOT:
2373 case OP_ASSERT_NA:
2374 case OP_ASSERTBACK_NA:
2375 case OP_ONCE:
2376 case OP_SCRIPT_RUN:
2377 case OP_BRAPOS:
2378 case OP_SBRA:
2379 case OP_SBRAPOS:
2380 case OP_SCOND:
2381 length++;
2382 SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
2383 cc += 1 + LINK_SIZE;
2384 break;
2385
2386 case OP_CBRA:
2387 case OP_SCBRA:
2388 length += 2;
2389 if (common->capture_last_ptr != 0)
2390 capture_last_found = TRUE;
2391 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
2392 length++;
2393 cc += 1 + LINK_SIZE + IMM2_SIZE;
2394 break;
2395
2396 case OP_CBRAPOS:
2397 case OP_SCBRAPOS:
2398 length += 2 + 2;
2399 if (common->capture_last_ptr != 0)
2400 capture_last_found = TRUE;
2401 cc += 1 + LINK_SIZE + IMM2_SIZE;
2402 break;
2403
2404 case OP_COND:
2405 /* Might be a hidden SCOND. */
2406 alternative = cc + GET(cc, 1);
2407 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
2408 length++;
2409 cc += 1 + LINK_SIZE;
2410 break;
2411
2412 CASE_ITERATOR_PRIVATE_DATA_1
2413 if (PRIVATE_DATA(cc) != 0)
2414 length++;
2415 cc += 2;
2416 #ifdef SUPPORT_UNICODE
2417 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2418 #endif
2419 break;
2420
2421 CASE_ITERATOR_PRIVATE_DATA_2A
2422 if (PRIVATE_DATA(cc) != 0)
2423 length += 2;
2424 cc += 2;
2425 #ifdef SUPPORT_UNICODE
2426 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2427 #endif
2428 break;
2429
2430 CASE_ITERATOR_PRIVATE_DATA_2B
2431 if (PRIVATE_DATA(cc) != 0)
2432 length += 2;
2433 cc += 2 + IMM2_SIZE;
2434 #ifdef SUPPORT_UNICODE
2435 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2436 #endif
2437 break;
2438
2439 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2440 if (PRIVATE_DATA(cc) != 0)
2441 length++;
2442 cc += 1;
2443 break;
2444
2445 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2446 if (PRIVATE_DATA(cc) != 0)
2447 length += 2;
2448 cc += 1;
2449 break;
2450
2451 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2452 if (PRIVATE_DATA(cc) != 0)
2453 length += 2;
2454 cc += 1 + IMM2_SIZE;
2455 break;
2456
2457 case OP_CLASS:
2458 case OP_NCLASS:
2459 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
2460 case OP_XCLASS:
2461 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2462 #else
2463 size = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2464 #endif
2465 if (PRIVATE_DATA(cc) != 0)
2466 length += get_class_iterator_size(cc + size);
2467 cc += size;
2468 break;
2469
2470 case OP_MARK:
2471 case OP_COMMIT_ARG:
2472 case OP_PRUNE_ARG:
2473 case OP_THEN_ARG:
2474 SLJIT_ASSERT(common->mark_ptr != 0);
2475 if (!setmark_found)
2476 setmark_found = TRUE;
2477 if (common->control_head_ptr != 0)
2478 control_head_found = TRUE;
2479 if (*cc != OP_MARK)
2480 quit_found = TRUE;
2481
2482 cc += 1 + 2 + cc[1];
2483 break;
2484
2485 case OP_PRUNE:
2486 case OP_SKIP:
2487 case OP_COMMIT:
2488 quit_found = TRUE;
2489 cc++;
2490 break;
2491
2492 case OP_SKIP_ARG:
2493 quit_found = TRUE;
2494 cc += 1 + 2 + cc[1];
2495 break;
2496
2497 case OP_THEN:
2498 SLJIT_ASSERT(common->control_head_ptr != 0);
2499 quit_found = TRUE;
2500 if (!control_head_found)
2501 control_head_found = TRUE;
2502 cc++;
2503 break;
2504
2505 case OP_ACCEPT:
2506 case OP_ASSERT_ACCEPT:
2507 accept_found = TRUE;
2508 cc++;
2509 break;
2510
2511 default:
2512 cc = next_opcode(common, cc);
2513 SLJIT_ASSERT(cc != NULL);
2514 break;
2515 }
2516 }
2517 SLJIT_ASSERT(cc == ccend);
2518
2519 if (control_head_found)
2520 length++;
2521 if (capture_last_found)
2522 length++;
2523 if (quit_found)
2524 {
2525 if (setsom_found)
2526 length++;
2527 if (setmark_found)
2528 length++;
2529 }
2530
2531 *needs_control_head = control_head_found;
2532 *has_quit = quit_found;
2533 *has_accept = accept_found;
2534 return length;
2535 }
2536
2537 enum copy_recurse_data_types {
2538 recurse_copy_from_global,
2539 recurse_copy_private_to_global,
2540 recurse_copy_shared_to_global,
2541 recurse_copy_kept_shared_to_global,
2542 recurse_swap_global
2543 };
2544
copy_recurse_data(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,int type,int stackptr,int stacktop,BOOL has_quit)2545 static void copy_recurse_data(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend,
2546 int type, int stackptr, int stacktop, BOOL has_quit)
2547 {
2548 delayed_mem_copy_status status;
2549 PCRE2_SPTR alternative;
2550 sljit_sw private_srcw[2];
2551 sljit_sw shared_srcw[3];
2552 sljit_sw kept_shared_srcw[2];
2553 int private_count, shared_count, kept_shared_count;
2554 int from_sp, base_reg, offset, i;
2555 BOOL setsom_found = FALSE;
2556 BOOL setmark_found = FALSE;
2557 BOOL capture_last_found = FALSE;
2558 BOOL control_head_found = FALSE;
2559
2560 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2561 SLJIT_ASSERT(common->control_head_ptr != 0);
2562 control_head_found = TRUE;
2563 #endif
2564
2565 switch (type)
2566 {
2567 case recurse_copy_from_global:
2568 from_sp = TRUE;
2569 base_reg = STACK_TOP;
2570 break;
2571
2572 case recurse_copy_private_to_global:
2573 case recurse_copy_shared_to_global:
2574 case recurse_copy_kept_shared_to_global:
2575 from_sp = FALSE;
2576 base_reg = STACK_TOP;
2577 break;
2578
2579 default:
2580 SLJIT_ASSERT(type == recurse_swap_global);
2581 from_sp = FALSE;
2582 base_reg = TMP2;
2583 break;
2584 }
2585
2586 stackptr = STACK(stackptr);
2587 stacktop = STACK(stacktop);
2588
2589 status.tmp_regs[0] = TMP1;
2590 status.saved_tmp_regs[0] = TMP1;
2591
2592 if (base_reg != TMP2)
2593 {
2594 status.tmp_regs[1] = TMP2;
2595 status.saved_tmp_regs[1] = TMP2;
2596 }
2597 else
2598 {
2599 status.saved_tmp_regs[1] = RETURN_ADDR;
2600 if (HAS_VIRTUAL_REGISTERS)
2601 status.tmp_regs[1] = STR_PTR;
2602 else
2603 status.tmp_regs[1] = RETURN_ADDR;
2604 }
2605
2606 status.saved_tmp_regs[2] = TMP3;
2607 if (HAS_VIRTUAL_REGISTERS)
2608 status.tmp_regs[2] = STR_END;
2609 else
2610 status.tmp_regs[2] = TMP3;
2611
2612 delayed_mem_copy_init(&status, common);
2613
2614 if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
2615 {
2616 SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
2617
2618 if (!from_sp)
2619 delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->recursive_head_ptr);
2620
2621 if (from_sp || type == recurse_swap_global)
2622 delayed_mem_copy_move(&status, SLJIT_SP, common->recursive_head_ptr, base_reg, stackptr);
2623 }
2624
2625 stackptr += sizeof(sljit_sw);
2626
2627 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2628 if (type != recurse_copy_shared_to_global)
2629 {
2630 if (!from_sp)
2631 delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->control_head_ptr);
2632
2633 if (from_sp || type == recurse_swap_global)
2634 delayed_mem_copy_move(&status, SLJIT_SP, common->control_head_ptr, base_reg, stackptr);
2635 }
2636
2637 stackptr += sizeof(sljit_sw);
2638 #endif
2639
2640 while (cc < ccend)
2641 {
2642 private_count = 0;
2643 shared_count = 0;
2644 kept_shared_count = 0;
2645
2646 switch(*cc)
2647 {
2648 case OP_SET_SOM:
2649 SLJIT_ASSERT(common->has_set_som);
2650 if (has_quit && !setsom_found)
2651 {
2652 kept_shared_srcw[0] = OVECTOR(0);
2653 kept_shared_count = 1;
2654 setsom_found = TRUE;
2655 }
2656 cc += 1;
2657 break;
2658
2659 case OP_RECURSE:
2660 if (has_quit)
2661 {
2662 if (common->has_set_som && !setsom_found)
2663 {
2664 kept_shared_srcw[0] = OVECTOR(0);
2665 kept_shared_count = 1;
2666 setsom_found = TRUE;
2667 }
2668 if (common->mark_ptr != 0 && !setmark_found)
2669 {
2670 kept_shared_srcw[kept_shared_count] = common->mark_ptr;
2671 kept_shared_count++;
2672 setmark_found = TRUE;
2673 }
2674 }
2675 if (common->capture_last_ptr != 0 && !capture_last_found)
2676 {
2677 shared_srcw[0] = common->capture_last_ptr;
2678 shared_count = 1;
2679 capture_last_found = TRUE;
2680 }
2681 cc += 1 + LINK_SIZE;
2682 break;
2683
2684 case OP_KET:
2685 if (PRIVATE_DATA(cc) != 0)
2686 {
2687 private_count = 1;
2688 private_srcw[0] = PRIVATE_DATA(cc);
2689 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
2690 cc += PRIVATE_DATA(cc + 1);
2691 }
2692 cc += 1 + LINK_SIZE;
2693 break;
2694
2695 case OP_ASSERT:
2696 case OP_ASSERT_NOT:
2697 case OP_ASSERTBACK:
2698 case OP_ASSERTBACK_NOT:
2699 case OP_ASSERT_NA:
2700 case OP_ASSERTBACK_NA:
2701 case OP_ONCE:
2702 case OP_SCRIPT_RUN:
2703 case OP_BRAPOS:
2704 case OP_SBRA:
2705 case OP_SBRAPOS:
2706 case OP_SCOND:
2707 private_count = 1;
2708 private_srcw[0] = PRIVATE_DATA(cc);
2709 cc += 1 + LINK_SIZE;
2710 break;
2711
2712 case OP_CBRA:
2713 case OP_SCBRA:
2714 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
2715 shared_srcw[0] = OVECTOR(offset);
2716 shared_srcw[1] = OVECTOR(offset + 1);
2717 shared_count = 2;
2718
2719 if (common->capture_last_ptr != 0 && !capture_last_found)
2720 {
2721 shared_srcw[2] = common->capture_last_ptr;
2722 shared_count = 3;
2723 capture_last_found = TRUE;
2724 }
2725
2726 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
2727 {
2728 private_count = 1;
2729 private_srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
2730 }
2731 cc += 1 + LINK_SIZE + IMM2_SIZE;
2732 break;
2733
2734 case OP_CBRAPOS:
2735 case OP_SCBRAPOS:
2736 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
2737 shared_srcw[0] = OVECTOR(offset);
2738 shared_srcw[1] = OVECTOR(offset + 1);
2739 shared_count = 2;
2740
2741 if (common->capture_last_ptr != 0 && !capture_last_found)
2742 {
2743 shared_srcw[2] = common->capture_last_ptr;
2744 shared_count = 3;
2745 capture_last_found = TRUE;
2746 }
2747
2748 private_count = 2;
2749 private_srcw[0] = PRIVATE_DATA(cc);
2750 private_srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
2751 cc += 1 + LINK_SIZE + IMM2_SIZE;
2752 break;
2753
2754 case OP_COND:
2755 /* Might be a hidden SCOND. */
2756 alternative = cc + GET(cc, 1);
2757 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
2758 {
2759 private_count = 1;
2760 private_srcw[0] = PRIVATE_DATA(cc);
2761 }
2762 cc += 1 + LINK_SIZE;
2763 break;
2764
2765 CASE_ITERATOR_PRIVATE_DATA_1
2766 if (PRIVATE_DATA(cc))
2767 {
2768 private_count = 1;
2769 private_srcw[0] = PRIVATE_DATA(cc);
2770 }
2771 cc += 2;
2772 #ifdef SUPPORT_UNICODE
2773 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2774 #endif
2775 break;
2776
2777 CASE_ITERATOR_PRIVATE_DATA_2A
2778 if (PRIVATE_DATA(cc))
2779 {
2780 private_count = 2;
2781 private_srcw[0] = PRIVATE_DATA(cc);
2782 private_srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
2783 }
2784 cc += 2;
2785 #ifdef SUPPORT_UNICODE
2786 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2787 #endif
2788 break;
2789
2790 CASE_ITERATOR_PRIVATE_DATA_2B
2791 if (PRIVATE_DATA(cc))
2792 {
2793 private_count = 2;
2794 private_srcw[0] = PRIVATE_DATA(cc);
2795 private_srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
2796 }
2797 cc += 2 + IMM2_SIZE;
2798 #ifdef SUPPORT_UNICODE
2799 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2800 #endif
2801 break;
2802
2803 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2804 if (PRIVATE_DATA(cc))
2805 {
2806 private_count = 1;
2807 private_srcw[0] = PRIVATE_DATA(cc);
2808 }
2809 cc += 1;
2810 break;
2811
2812 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2813 if (PRIVATE_DATA(cc))
2814 {
2815 private_count = 2;
2816 private_srcw[0] = PRIVATE_DATA(cc);
2817 private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2818 }
2819 cc += 1;
2820 break;
2821
2822 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2823 if (PRIVATE_DATA(cc))
2824 {
2825 private_count = 2;
2826 private_srcw[0] = PRIVATE_DATA(cc);
2827 private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2828 }
2829 cc += 1 + IMM2_SIZE;
2830 break;
2831
2832 case OP_CLASS:
2833 case OP_NCLASS:
2834 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
2835 case OP_XCLASS:
2836 i = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2837 #else
2838 i = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2839 #endif
2840 if (PRIVATE_DATA(cc) != 0)
2841 switch(get_class_iterator_size(cc + i))
2842 {
2843 case 1:
2844 private_count = 1;
2845 private_srcw[0] = PRIVATE_DATA(cc);
2846 break;
2847
2848 case 2:
2849 private_count = 2;
2850 private_srcw[0] = PRIVATE_DATA(cc);
2851 private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2852 break;
2853
2854 default:
2855 SLJIT_UNREACHABLE();
2856 break;
2857 }
2858 cc += i;
2859 break;
2860
2861 case OP_MARK:
2862 case OP_COMMIT_ARG:
2863 case OP_PRUNE_ARG:
2864 case OP_THEN_ARG:
2865 SLJIT_ASSERT(common->mark_ptr != 0);
2866 if (has_quit && !setmark_found)
2867 {
2868 kept_shared_srcw[0] = common->mark_ptr;
2869 kept_shared_count = 1;
2870 setmark_found = TRUE;
2871 }
2872 if (common->control_head_ptr != 0 && !control_head_found)
2873 {
2874 private_srcw[0] = common->control_head_ptr;
2875 private_count = 1;
2876 control_head_found = TRUE;
2877 }
2878 cc += 1 + 2 + cc[1];
2879 break;
2880
2881 case OP_THEN:
2882 SLJIT_ASSERT(common->control_head_ptr != 0);
2883 if (!control_head_found)
2884 {
2885 private_srcw[0] = common->control_head_ptr;
2886 private_count = 1;
2887 control_head_found = TRUE;
2888 }
2889 cc++;
2890 break;
2891
2892 default:
2893 cc = next_opcode(common, cc);
2894 SLJIT_ASSERT(cc != NULL);
2895 break;
2896 }
2897
2898 if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
2899 {
2900 SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
2901
2902 for (i = 0; i < private_count; i++)
2903 {
2904 SLJIT_ASSERT(private_srcw[i] != 0);
2905
2906 if (!from_sp)
2907 delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, private_srcw[i]);
2908
2909 if (from_sp || type == recurse_swap_global)
2910 delayed_mem_copy_move(&status, SLJIT_SP, private_srcw[i], base_reg, stackptr);
2911
2912 stackptr += sizeof(sljit_sw);
2913 }
2914 }
2915 else
2916 stackptr += sizeof(sljit_sw) * private_count;
2917
2918 if (type != recurse_copy_private_to_global && type != recurse_copy_kept_shared_to_global)
2919 {
2920 SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_swap_global);
2921
2922 for (i = 0; i < shared_count; i++)
2923 {
2924 SLJIT_ASSERT(shared_srcw[i] != 0);
2925
2926 if (!from_sp)
2927 delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, shared_srcw[i]);
2928
2929 if (from_sp || type == recurse_swap_global)
2930 delayed_mem_copy_move(&status, SLJIT_SP, shared_srcw[i], base_reg, stackptr);
2931
2932 stackptr += sizeof(sljit_sw);
2933 }
2934 }
2935 else
2936 stackptr += sizeof(sljit_sw) * shared_count;
2937
2938 if (type != recurse_copy_private_to_global && type != recurse_swap_global)
2939 {
2940 SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_copy_kept_shared_to_global);
2941
2942 for (i = 0; i < kept_shared_count; i++)
2943 {
2944 SLJIT_ASSERT(kept_shared_srcw[i] != 0);
2945
2946 if (!from_sp)
2947 delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, kept_shared_srcw[i]);
2948
2949 if (from_sp || type == recurse_swap_global)
2950 delayed_mem_copy_move(&status, SLJIT_SP, kept_shared_srcw[i], base_reg, stackptr);
2951
2952 stackptr += sizeof(sljit_sw);
2953 }
2954 }
2955 else
2956 stackptr += sizeof(sljit_sw) * kept_shared_count;
2957 }
2958
2959 SLJIT_ASSERT(cc == ccend && stackptr == stacktop);
2960
2961 delayed_mem_copy_finish(&status);
2962 }
2963
set_then_offsets(compiler_common * common,PCRE2_SPTR cc,sljit_u8 * current_offset)2964 static SLJIT_INLINE PCRE2_SPTR set_then_offsets(compiler_common *common, PCRE2_SPTR cc, sljit_u8 *current_offset)
2965 {
2966 PCRE2_SPTR end = bracketend(cc);
2967 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
2968
2969 /* Assert captures then. */
2970 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA)
2971 current_offset = NULL;
2972 /* Conditional block does not. */
2973 if (*cc == OP_COND || *cc == OP_SCOND)
2974 has_alternatives = FALSE;
2975
2976 cc = next_opcode(common, cc);
2977 if (has_alternatives)
2978 current_offset = common->then_offsets + (cc - common->start);
2979
2980 while (cc < end)
2981 {
2982 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
2983 cc = set_then_offsets(common, cc, current_offset);
2984 else
2985 {
2986 if (*cc == OP_ALT && has_alternatives)
2987 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
2988 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
2989 *current_offset = 1;
2990 cc = next_opcode(common, cc);
2991 }
2992 }
2993
2994 return end;
2995 }
2996
2997 #undef CASE_ITERATOR_PRIVATE_DATA_1
2998 #undef CASE_ITERATOR_PRIVATE_DATA_2A
2999 #undef CASE_ITERATOR_PRIVATE_DATA_2B
3000 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
3001 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
3002 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
3003
is_powerof2(unsigned int value)3004 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
3005 {
3006 return (value & (value - 1)) == 0;
3007 }
3008
set_jumps(jump_list * list,struct sljit_label * label)3009 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
3010 {
3011 while (list)
3012 {
3013 /* sljit_set_label is clever enough to do nothing
3014 if either the jump or the label is NULL. */
3015 SET_LABEL(list->jump, label);
3016 list = list->next;
3017 }
3018 }
3019
add_jump(struct sljit_compiler * compiler,jump_list ** list,struct sljit_jump * jump)3020 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
3021 {
3022 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
3023 if (list_item)
3024 {
3025 list_item->next = *list;
3026 list_item->jump = jump;
3027 *list = list_item;
3028 }
3029 }
3030
add_stub(compiler_common * common,struct sljit_jump * start)3031 static void add_stub(compiler_common *common, struct sljit_jump *start)
3032 {
3033 DEFINE_COMPILER;
3034 stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
3035
3036 if (list_item)
3037 {
3038 list_item->start = start;
3039 list_item->quit = LABEL();
3040 list_item->next = common->stubs;
3041 common->stubs = list_item;
3042 }
3043 }
3044
flush_stubs(compiler_common * common)3045 static void flush_stubs(compiler_common *common)
3046 {
3047 DEFINE_COMPILER;
3048 stub_list *list_item = common->stubs;
3049
3050 while (list_item)
3051 {
3052 JUMPHERE(list_item->start);
3053 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
3054 JUMPTO(SLJIT_JUMP, list_item->quit);
3055 list_item = list_item->next;
3056 }
3057 common->stubs = NULL;
3058 }
3059
count_match(compiler_common * common)3060 static SLJIT_INLINE void count_match(compiler_common *common)
3061 {
3062 DEFINE_COMPILER;
3063
3064 OP2(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
3065 add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
3066 }
3067
allocate_stack(compiler_common * common,int size)3068 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
3069 {
3070 /* May destroy all locals and registers except TMP2. */
3071 DEFINE_COMPILER;
3072
3073 SLJIT_ASSERT(size > 0);
3074 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
3075 #ifdef DESTROY_REGISTERS
3076 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
3077 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3078 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
3079 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
3080 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
3081 #endif
3082 add_stub(common, CMP(SLJIT_LESS, STACK_TOP, 0, STACK_LIMIT, 0));
3083 }
3084
free_stack(compiler_common * common,int size)3085 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
3086 {
3087 DEFINE_COMPILER;
3088
3089 SLJIT_ASSERT(size > 0);
3090 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
3091 }
3092
allocate_read_only_data(compiler_common * common,sljit_uw size)3093 static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
3094 {
3095 DEFINE_COMPILER;
3096 sljit_uw *result;
3097
3098 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
3099 return NULL;
3100
3101 result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
3102 if (SLJIT_UNLIKELY(result == NULL))
3103 {
3104 sljit_set_compiler_memory_error(compiler);
3105 return NULL;
3106 }
3107
3108 *(void**)result = common->read_only_data_head;
3109 common->read_only_data_head = (void *)result;
3110 return result + 1;
3111 }
3112
reset_ovector(compiler_common * common,int length)3113 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
3114 {
3115 DEFINE_COMPILER;
3116 struct sljit_label *loop;
3117 sljit_s32 i;
3118
3119 /* At this point we can freely use all temporary registers. */
3120 SLJIT_ASSERT(length > 1);
3121 /* TMP1 returns with begin - 1. */
3122 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
3123 if (length < 8)
3124 {
3125 for (i = 1; i < length; i++)
3126 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
3127 }
3128 else
3129 {
3130 if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)) == SLJIT_SUCCESS)
3131 {
3132 GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
3133 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
3134 loop = LABEL();
3135 sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw));
3136 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
3137 JUMPTO(SLJIT_NOT_ZERO, loop);
3138 }
3139 else
3140 {
3141 GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START + sizeof(sljit_sw));
3142 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
3143 loop = LABEL();
3144 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R0, 0);
3145 OP2(SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, sizeof(sljit_sw));
3146 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
3147 JUMPTO(SLJIT_NOT_ZERO, loop);
3148 }
3149 }
3150 }
3151
reset_early_fail(compiler_common * common)3152 static SLJIT_INLINE void reset_early_fail(compiler_common *common)
3153 {
3154 DEFINE_COMPILER;
3155 sljit_u32 size = (sljit_u32)(common->early_fail_end_ptr - common->early_fail_start_ptr);
3156 sljit_u32 uncleared_size;
3157 sljit_s32 src = SLJIT_IMM;
3158 sljit_s32 i;
3159 struct sljit_label *loop;
3160
3161 SLJIT_ASSERT(common->early_fail_start_ptr < common->early_fail_end_ptr);
3162
3163 if (size == sizeof(sljit_sw))
3164 {
3165 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->early_fail_start_ptr, SLJIT_IMM, 0);
3166 return;
3167 }
3168
3169 if (sljit_get_register_index(TMP3) >= 0 && !sljit_has_cpu_feature(SLJIT_HAS_ZERO_REGISTER))
3170 {
3171 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
3172 src = TMP3;
3173 }
3174
3175 if (size <= 6 * sizeof(sljit_sw))
3176 {
3177 for (i = common->early_fail_start_ptr; i < common->early_fail_end_ptr; i += sizeof(sljit_sw))
3178 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, src, 0);
3179 return;
3180 }
3181
3182 GET_LOCAL_BASE(TMP1, 0, common->early_fail_start_ptr);
3183
3184 uncleared_size = ((size / sizeof(sljit_sw)) % 3) * sizeof(sljit_sw);
3185
3186 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, size - uncleared_size);
3187
3188 loop = LABEL();
3189 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);
3190 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3191 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -2 * (sljit_sw)sizeof(sljit_sw), src, 0);
3192 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -1 * (sljit_sw)sizeof(sljit_sw), src, 0);
3193 CMPTO(SLJIT_LESS, TMP1, 0, TMP2, 0, loop);
3194
3195 if (uncleared_size >= sizeof(sljit_sw))
3196 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);
3197
3198 if (uncleared_size >= 2 * sizeof(sljit_sw))
3199 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), sizeof(sljit_sw), src, 0);
3200 }
3201
do_reset_match(compiler_common * common,int length)3202 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
3203 {
3204 DEFINE_COMPILER;
3205 struct sljit_label *loop;
3206 int i;
3207
3208 SLJIT_ASSERT(length > 1);
3209 /* OVECTOR(1) contains the "string begin - 1" constant. */
3210 if (length > 2)
3211 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
3212 if (length < 8)
3213 {
3214 for (i = 2; i < length; i++)
3215 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
3216 }
3217 else
3218 {
3219 if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)) == SLJIT_SUCCESS)
3220 {
3221 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
3222 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
3223 loop = LABEL();
3224 sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
3225 OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
3226 JUMPTO(SLJIT_NOT_ZERO, loop);
3227 }
3228 else
3229 {
3230 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + 2 * sizeof(sljit_sw));
3231 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
3232 loop = LABEL();
3233 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
3234 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(sljit_sw));
3235 OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
3236 JUMPTO(SLJIT_NOT_ZERO, loop);
3237 }
3238 }
3239
3240 if (!HAS_VIRTUAL_REGISTERS)
3241 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, stack));
3242 else
3243 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
3244
3245 if (common->mark_ptr != 0)
3246 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
3247 if (common->control_head_ptr != 0)
3248 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
3249 if (HAS_VIRTUAL_REGISTERS)
3250 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
3251
3252 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3253 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, end));
3254 }
3255
do_search_mark(sljit_sw * current,PCRE2_SPTR skip_arg)3256 static sljit_sw SLJIT_FUNC do_search_mark(sljit_sw *current, PCRE2_SPTR skip_arg)
3257 {
3258 while (current != NULL)
3259 {
3260 switch (current[1])
3261 {
3262 case type_then_trap:
3263 break;
3264
3265 case type_mark:
3266 if (PRIV(strcmp)(skip_arg, (PCRE2_SPTR)current[2]) == 0)
3267 return current[3];
3268 break;
3269
3270 default:
3271 SLJIT_UNREACHABLE();
3272 break;
3273 }
3274 SLJIT_ASSERT(current[0] == 0 || current < (sljit_sw*)current[0]);
3275 current = (sljit_sw*)current[0];
3276 }
3277 return 0;
3278 }
3279
copy_ovector(compiler_common * common,int topbracket)3280 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
3281 {
3282 DEFINE_COMPILER;
3283 struct sljit_label *loop;
3284 BOOL has_pre;
3285
3286 /* At this point we can freely use all registers. */
3287 OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
3288 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
3289
3290 if (HAS_VIRTUAL_REGISTERS)
3291 {
3292 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
3293 OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3294 if (common->mark_ptr != 0)
3295 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
3296 OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, oveccount));
3297 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);
3298 if (common->mark_ptr != 0)
3299 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
3300 OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, match_data),
3301 SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
3302 }
3303 else
3304 {
3305 OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3306 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, match_data));
3307 if (common->mark_ptr != 0)
3308 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
3309 OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, oveccount));
3310 OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);
3311 if (common->mark_ptr != 0)
3312 OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R0, 0);
3313 OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
3314 }
3315
3316 has_pre = sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)) == SLJIT_SUCCESS;
3317
3318 GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START - (has_pre ? sizeof(sljit_sw) : 0));
3319 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? SLJIT_R0 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
3320
3321 loop = LABEL();
3322
3323 if (has_pre)
3324 sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw));
3325 else
3326 {
3327 OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0);
3328 OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
3329 }
3330
3331 OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, sizeof(PCRE2_SIZE));
3332 OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_R0, 0);
3333 /* Copy the integer value to the output buffer */
3334 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3335 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
3336 #endif
3337
3338 SLJIT_ASSERT(sizeof(PCRE2_SIZE) == 4 || sizeof(PCRE2_SIZE) == 8);
3339 OP1(((sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV), SLJIT_MEM1(SLJIT_R2), 0, SLJIT_S1, 0);
3340
3341 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3342 JUMPTO(SLJIT_NOT_ZERO, loop);
3343
3344 /* Calculate the return value, which is the maximum ovector value. */
3345 if (topbracket > 1)
3346 {
3347 if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw))) == SLJIT_SUCCESS)
3348 {
3349 GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
3350 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
3351
3352 /* OVECTOR(0) is never equal to SLJIT_S2. */
3353 loop = LABEL();
3354 sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
3355 OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3356 CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
3357 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
3358 }
3359 else
3360 {
3361 GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + (topbracket - 1) * 2 * sizeof(sljit_sw));
3362 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
3363
3364 /* OVECTOR(0) is never equal to SLJIT_S2. */
3365 loop = LABEL();
3366 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), 0);
3367 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2 * (sljit_sw)sizeof(sljit_sw));
3368 OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3369 CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
3370 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
3371 }
3372 }
3373 else
3374 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
3375 }
3376
return_with_partial_match(compiler_common * common,struct sljit_label * quit)3377 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
3378 {
3379 DEFINE_COMPILER;
3380 sljit_s32 mov_opcode;
3381 sljit_s32 arguments_reg = !HAS_VIRTUAL_REGISTERS ? ARGUMENTS : SLJIT_R1;
3382
3383 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S0, str_end_must_be_saved_reg0);
3384 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
3385 && (common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start != 0 : common->hit_start == 0));
3386
3387 if (arguments_reg != ARGUMENTS)
3388 OP1(SLJIT_MOV, arguments_reg, 0, ARGUMENTS, 0);
3389 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP),
3390 common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start : common->start_ptr);
3391 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_PARTIAL);
3392
3393 /* Store match begin and end. */
3394 OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, begin));
3395 OP1(SLJIT_MOV, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_R2, 0);
3396 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, match_data));
3397
3398 mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
3399
3400 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S1, 0);
3401 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3402 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
3403 #endif
3404 OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector), SLJIT_R2, 0);
3405
3406 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_S1, 0);
3407 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3408 OP2(SLJIT_ASHR, STR_END, 0, STR_END, 0, SLJIT_IMM, UCHAR_SHIFT);
3409 #endif
3410 OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector) + sizeof(PCRE2_SIZE), STR_END, 0);
3411
3412 JUMPTO(SLJIT_JUMP, quit);
3413 }
3414
check_start_used_ptr(compiler_common * common)3415 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
3416 {
3417 /* May destroy TMP1. */
3418 DEFINE_COMPILER;
3419 struct sljit_jump *jump;
3420
3421 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3422 {
3423 /* The value of -1 must be kept for start_used_ptr! */
3424 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
3425 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
3426 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
3427 jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
3428 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3429 JUMPHERE(jump);
3430 }
3431 else if (common->mode == PCRE2_JIT_PARTIAL_HARD)
3432 {
3433 jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3434 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3435 JUMPHERE(jump);
3436 }
3437 }
3438
char_has_othercase(compiler_common * common,PCRE2_SPTR cc)3439 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, PCRE2_SPTR cc)
3440 {
3441 /* Detects if the character has an othercase. */
3442 unsigned int c;
3443
3444 #ifdef SUPPORT_UNICODE
3445 if (common->utf || common->ucp)
3446 {
3447 if (common->utf)
3448 {
3449 GETCHAR(c, cc);
3450 }
3451 else
3452 c = *cc;
3453
3454 if (c > 127)
3455 return c != UCD_OTHERCASE(c);
3456
3457 return common->fcc[c] != c;
3458 }
3459 else
3460 #endif
3461 c = *cc;
3462 return MAX_255(c) ? common->fcc[c] != c : FALSE;
3463 }
3464
char_othercase(compiler_common * common,unsigned int c)3465 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
3466 {
3467 /* Returns with the othercase. */
3468 #ifdef SUPPORT_UNICODE
3469 if ((common->utf || common->ucp) && c > 127)
3470 return UCD_OTHERCASE(c);
3471 #endif
3472 return TABLE_GET(c, common->fcc, c);
3473 }
3474
char_get_othercase_bit(compiler_common * common,PCRE2_SPTR cc)3475 static unsigned int char_get_othercase_bit(compiler_common *common, PCRE2_SPTR cc)
3476 {
3477 /* Detects if the character and its othercase has only 1 bit difference. */
3478 unsigned int c, oc, bit;
3479 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3480 int n;
3481 #endif
3482
3483 #ifdef SUPPORT_UNICODE
3484 if (common->utf || common->ucp)
3485 {
3486 if (common->utf)
3487 {
3488 GETCHAR(c, cc);
3489 }
3490 else
3491 c = *cc;
3492
3493 if (c <= 127)
3494 oc = common->fcc[c];
3495 else
3496 oc = UCD_OTHERCASE(c);
3497 }
3498 else
3499 {
3500 c = *cc;
3501 oc = TABLE_GET(c, common->fcc, c);
3502 }
3503 #else
3504 c = *cc;
3505 oc = TABLE_GET(c, common->fcc, c);
3506 #endif
3507
3508 SLJIT_ASSERT(c != oc);
3509
3510 bit = c ^ oc;
3511 /* Optimized for English alphabet. */
3512 if (c <= 127 && bit == 0x20)
3513 return (0 << 8) | 0x20;
3514
3515 /* Since c != oc, they must have at least 1 bit difference. */
3516 if (!is_powerof2(bit))
3517 return 0;
3518
3519 #if PCRE2_CODE_UNIT_WIDTH == 8
3520
3521 #ifdef SUPPORT_UNICODE
3522 if (common->utf && c > 127)
3523 {
3524 n = GET_EXTRALEN(*cc);
3525 while ((bit & 0x3f) == 0)
3526 {
3527 n--;
3528 bit >>= 6;
3529 }
3530 return (n << 8) | bit;
3531 }
3532 #endif /* SUPPORT_UNICODE */
3533 return (0 << 8) | bit;
3534
3535 #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3536
3537 #ifdef SUPPORT_UNICODE
3538 if (common->utf && c > 65535)
3539 {
3540 if (bit >= (1u << 10))
3541 bit >>= 10;
3542 else
3543 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
3544 }
3545 #endif /* SUPPORT_UNICODE */
3546 return (bit < 256) ? ((0u << 8) | bit) : ((1u << 8) | (bit >> 8));
3547
3548 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3549 }
3550
check_partial(compiler_common * common,BOOL force)3551 static void check_partial(compiler_common *common, BOOL force)
3552 {
3553 /* Checks whether a partial matching is occurred. Does not modify registers. */
3554 DEFINE_COMPILER;
3555 struct sljit_jump *jump = NULL;
3556
3557 SLJIT_ASSERT(!force || common->mode != PCRE2_JIT_COMPLETE);
3558
3559 if (common->mode == PCRE2_JIT_COMPLETE)
3560 return;
3561
3562 if (!force && !common->allow_empty_partial)
3563 jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3564 else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3565 jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
3566
3567 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3568 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3569 else
3570 {
3571 if (common->partialmatchlabel != NULL)
3572 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3573 else
3574 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3575 }
3576
3577 if (jump != NULL)
3578 JUMPHERE(jump);
3579 }
3580
check_str_end(compiler_common * common,jump_list ** end_reached)3581 static void check_str_end(compiler_common *common, jump_list **end_reached)
3582 {
3583 /* Does not affect registers. Usually used in a tight spot. */
3584 DEFINE_COMPILER;
3585 struct sljit_jump *jump;
3586
3587 if (common->mode == PCRE2_JIT_COMPLETE)
3588 {
3589 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3590 return;
3591 }
3592
3593 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
3594 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3595 {
3596 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3597 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3598 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
3599 }
3600 else
3601 {
3602 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3603 if (common->partialmatchlabel != NULL)
3604 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3605 else
3606 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3607 }
3608 JUMPHERE(jump);
3609 }
3610
detect_partial_match(compiler_common * common,jump_list ** backtracks)3611 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
3612 {
3613 DEFINE_COMPILER;
3614 struct sljit_jump *jump;
3615
3616 if (common->mode == PCRE2_JIT_COMPLETE)
3617 {
3618 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3619 return;
3620 }
3621
3622 /* Partial matching mode. */
3623 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
3624 if (!common->allow_empty_partial)
3625 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3626 else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3627 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1));
3628
3629 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3630 {
3631 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3632 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
3633 }
3634 else
3635 {
3636 if (common->partialmatchlabel != NULL)
3637 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3638 else
3639 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3640 }
3641 JUMPHERE(jump);
3642 }
3643
process_partial_match(compiler_common * common)3644 static void process_partial_match(compiler_common *common)
3645 {
3646 DEFINE_COMPILER;
3647 struct sljit_jump *jump;
3648
3649 /* Partial matching mode. */
3650 if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3651 {
3652 jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3653 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3654 JUMPHERE(jump);
3655 }
3656 else if (common->mode == PCRE2_JIT_PARTIAL_HARD)
3657 {
3658 if (common->partialmatchlabel != NULL)
3659 CMPTO(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0, common->partialmatchlabel);
3660 else
3661 add_jump(compiler, &common->partialmatch, CMP(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3662 }
3663 }
3664
detect_partial_match_to(compiler_common * common,struct sljit_label * label)3665 static void detect_partial_match_to(compiler_common *common, struct sljit_label *label)
3666 {
3667 DEFINE_COMPILER;
3668
3669 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, label);
3670 process_partial_match(common);
3671 }
3672
peek_char(compiler_common * common,sljit_u32 max,sljit_s32 dst,sljit_sw dstw,jump_list ** backtracks)3673 static void peek_char(compiler_common *common, sljit_u32 max, sljit_s32 dst, sljit_sw dstw, jump_list **backtracks)
3674 {
3675 /* Reads the character into TMP1, keeps STR_PTR.
3676 Does not check STR_END. TMP2, dst, RETURN_ADDR Destroyed. */
3677 DEFINE_COMPILER;
3678 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3679 struct sljit_jump *jump;
3680 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
3681
3682 SLJIT_UNUSED_ARG(max);
3683 SLJIT_UNUSED_ARG(dst);
3684 SLJIT_UNUSED_ARG(dstw);
3685 SLJIT_UNUSED_ARG(backtracks);
3686
3687 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3688
3689 #ifdef SUPPORT_UNICODE
3690 #if PCRE2_CODE_UNIT_WIDTH == 8
3691 if (common->utf)
3692 {
3693 if (max < 128) return;
3694
3695 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3696 OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);
3697 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3698 add_jump(compiler, common->invalid_utf ? &common->utfreadchar_invalid : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
3699 OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);
3700 if (backtracks && common->invalid_utf)
3701 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3702 JUMPHERE(jump);
3703 }
3704 #elif PCRE2_CODE_UNIT_WIDTH == 16
3705 if (common->utf)
3706 {
3707 if (max < 0xd800) return;
3708
3709 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3710
3711 if (common->invalid_utf)
3712 {
3713 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
3714 OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);
3715 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3716 add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
3717 OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);
3718 if (backtracks && common->invalid_utf)
3719 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3720 }
3721 else
3722 {
3723 /* TMP2 contains the high surrogate. */
3724 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
3725 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3726 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
3727 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
3728 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3729 }
3730
3731 JUMPHERE(jump);
3732 }
3733 #elif PCRE2_CODE_UNIT_WIDTH == 32
3734 if (common->invalid_utf)
3735 {
3736 if (max < 0xd800) return;
3737
3738 if (backtracks != NULL)
3739 {
3740 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3741 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
3742 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
3743 }
3744 else
3745 {
3746 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3747 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x110000);
3748 CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
3749 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
3750 CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
3751 }
3752 }
3753 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3754 #endif /* SUPPORT_UNICODE */
3755 }
3756
peek_char_back(compiler_common * common,sljit_u32 max,jump_list ** backtracks)3757 static void peek_char_back(compiler_common *common, sljit_u32 max, jump_list **backtracks)
3758 {
3759 /* Reads one character back without moving STR_PTR. TMP2 must
3760 contain the start of the subject buffer. Affects TMP1, TMP2, and RETURN_ADDR. */
3761 DEFINE_COMPILER;
3762
3763 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3764 struct sljit_jump *jump;
3765 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
3766
3767 SLJIT_UNUSED_ARG(max);
3768 SLJIT_UNUSED_ARG(backtracks);
3769
3770 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3771
3772 #ifdef SUPPORT_UNICODE
3773 #if PCRE2_CODE_UNIT_WIDTH == 8
3774 if (common->utf)
3775 {
3776 if (max < 128) return;
3777
3778 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3779 if (common->invalid_utf)
3780 {
3781 add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));
3782 if (backtracks != NULL)
3783 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3784 }
3785 else
3786 add_jump(compiler, &common->utfpeakcharback, JUMP(SLJIT_FAST_CALL));
3787 JUMPHERE(jump);
3788 }
3789 #elif PCRE2_CODE_UNIT_WIDTH == 16
3790 if (common->utf)
3791 {
3792 if (max < 0xd800) return;
3793
3794 if (common->invalid_utf)
3795 {
3796 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3797 add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));
3798 if (backtracks != NULL)
3799 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3800 }
3801 else
3802 {
3803 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
3804 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xdc00);
3805 /* TMP2 contains the low surrogate. */
3806 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3807 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);
3808 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3809 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
3810 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3811 }
3812 JUMPHERE(jump);
3813 }
3814 #elif PCRE2_CODE_UNIT_WIDTH == 32
3815 if (common->invalid_utf)
3816 {
3817 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3818 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
3819 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
3820 }
3821 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3822 #endif /* SUPPORT_UNICODE */
3823 }
3824
3825 #define READ_CHAR_UPDATE_STR_PTR 0x1
3826 #define READ_CHAR_UTF8_NEWLINE 0x2
3827 #define READ_CHAR_NEWLINE (READ_CHAR_UPDATE_STR_PTR | READ_CHAR_UTF8_NEWLINE)
3828 #define READ_CHAR_VALID_UTF 0x4
3829
read_char(compiler_common * common,sljit_u32 min,sljit_u32 max,jump_list ** backtracks,sljit_u32 options)3830 static void read_char(compiler_common *common, sljit_u32 min, sljit_u32 max,
3831 jump_list **backtracks, sljit_u32 options)
3832 {
3833 /* Reads the precise value of a character into TMP1, if the character is
3834 between min and max (c >= min && c <= max). Otherwise it returns with a value
3835 outside the range. Does not check STR_END. */
3836 DEFINE_COMPILER;
3837 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3838 struct sljit_jump *jump;
3839 #endif
3840 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3841 struct sljit_jump *jump2;
3842 #endif
3843
3844 SLJIT_UNUSED_ARG(min);
3845 SLJIT_UNUSED_ARG(max);
3846 SLJIT_UNUSED_ARG(backtracks);
3847 SLJIT_UNUSED_ARG(options);
3848 SLJIT_ASSERT(min <= max);
3849
3850 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3851 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3852
3853 #ifdef SUPPORT_UNICODE
3854 #if PCRE2_CODE_UNIT_WIDTH == 8
3855 if (common->utf)
3856 {
3857 if (max < 128 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;
3858
3859 if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))
3860 {
3861 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3862
3863 if (options & READ_CHAR_UTF8_NEWLINE)
3864 add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));
3865 else
3866 add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
3867
3868 if (backtracks != NULL)
3869 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3870 JUMPHERE(jump);
3871 return;
3872 }
3873
3874 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3875 if (min >= 0x10000)
3876 {
3877 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
3878 if (options & READ_CHAR_UPDATE_STR_PTR)
3879 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3880 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3881 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
3882 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3883 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3884 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3885 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3886 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3887 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3888 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3889 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
3890 if (!(options & READ_CHAR_UPDATE_STR_PTR))
3891 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
3892 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3893 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3894 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3895 JUMPHERE(jump2);
3896 if (options & READ_CHAR_UPDATE_STR_PTR)
3897 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
3898 }
3899 else if (min >= 0x800 && max <= 0xffff)
3900 {
3901 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
3902 if (options & READ_CHAR_UPDATE_STR_PTR)
3903 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3904 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3905 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
3906 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3907 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3908 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3909 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3910 if (!(options & READ_CHAR_UPDATE_STR_PTR))
3911 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3912 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3913 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3914 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3915 JUMPHERE(jump2);
3916 if (options & READ_CHAR_UPDATE_STR_PTR)
3917 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
3918 }
3919 else if (max >= 0x800)
3920 {
3921 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
3922 }
3923 else if (max < 128)
3924 {
3925 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3926 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3927 }
3928 else
3929 {
3930 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3931 if (!(options & READ_CHAR_UPDATE_STR_PTR))
3932 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3933 else
3934 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3935 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3936 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3937 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3938 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3939 if (options & READ_CHAR_UPDATE_STR_PTR)
3940 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
3941 }
3942 JUMPHERE(jump);
3943 }
3944 #elif PCRE2_CODE_UNIT_WIDTH == 16
3945 if (common->utf)
3946 {
3947 if (max < 0xd800 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;
3948
3949 if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))
3950 {
3951 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3952 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
3953
3954 if (options & READ_CHAR_UTF8_NEWLINE)
3955 add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));
3956 else
3957 add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
3958
3959 if (backtracks != NULL)
3960 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3961 JUMPHERE(jump);
3962 return;
3963 }
3964
3965 if (max >= 0x10000)
3966 {
3967 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3968 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
3969 /* TMP2 contains the high surrogate. */
3970 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3971 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
3972 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3973 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
3974 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3975 JUMPHERE(jump);
3976 return;
3977 }
3978
3979 /* Skip low surrogate if necessary. */
3980 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3981
3982 if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS)
3983 {
3984 if (options & READ_CHAR_UPDATE_STR_PTR)
3985 OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3986 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x400);
3987 if (options & READ_CHAR_UPDATE_STR_PTR)
3988 CMOV(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0);
3989 if (max >= 0xd800)
3990 CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, 0x10000);
3991 }
3992 else
3993 {
3994 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
3995 if (options & READ_CHAR_UPDATE_STR_PTR)
3996 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3997 if (max >= 0xd800)
3998 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
3999 JUMPHERE(jump);
4000 }
4001 }
4002 #elif PCRE2_CODE_UNIT_WIDTH == 32
4003 if (common->invalid_utf)
4004 {
4005 if (backtracks != NULL)
4006 {
4007 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4008 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4009 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
4010 }
4011 else
4012 {
4013 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4014 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x110000);
4015 CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4016 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4017 CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4018 }
4019 }
4020 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4021 #endif /* SUPPORT_UNICODE */
4022 }
4023
4024 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4025
is_char7_bitset(const sljit_u8 * bitset,BOOL nclass)4026 static BOOL is_char7_bitset(const sljit_u8 *bitset, BOOL nclass)
4027 {
4028 /* Tells whether the character codes below 128 are enough
4029 to determine a match. */
4030 const sljit_u8 value = nclass ? 0xff : 0;
4031 const sljit_u8 *end = bitset + 32;
4032
4033 bitset += 16;
4034 do
4035 {
4036 if (*bitset++ != value)
4037 return FALSE;
4038 }
4039 while (bitset < end);
4040 return TRUE;
4041 }
4042
read_char7_type(compiler_common * common,jump_list ** backtracks,BOOL negated)4043 static void read_char7_type(compiler_common *common, jump_list **backtracks, BOOL negated)
4044 {
4045 /* Reads the precise character type of a character into TMP1, if the character
4046 is less than 128. Otherwise it returns with zero. Does not check STR_END. The
4047 full_read argument tells whether characters above max are accepted or not. */
4048 DEFINE_COMPILER;
4049 struct sljit_jump *jump;
4050
4051 SLJIT_ASSERT(common->utf);
4052
4053 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
4054 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4055
4056 /* All values > 127 are zero in ctypes. */
4057 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4058
4059 if (negated)
4060 {
4061 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);
4062
4063 if (common->invalid_utf)
4064 {
4065 add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4066 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4067 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4068 }
4069 else
4070 {
4071 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4072 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4073 }
4074 JUMPHERE(jump);
4075 }
4076 }
4077
4078 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
4079
read_char8_type(compiler_common * common,jump_list ** backtracks,BOOL negated)4080 static void read_char8_type(compiler_common *common, jump_list **backtracks, BOOL negated)
4081 {
4082 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
4083 DEFINE_COMPILER;
4084 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
4085 struct sljit_jump *jump;
4086 #endif
4087 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4088 struct sljit_jump *jump2;
4089 #endif
4090
4091 SLJIT_UNUSED_ARG(backtracks);
4092 SLJIT_UNUSED_ARG(negated);
4093
4094 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
4095 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4096
4097 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4098 if (common->utf)
4099 {
4100 /* The result of this read may be unused, but saves an "else" part. */
4101 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4102 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);
4103
4104 if (!negated)
4105 {
4106 if (common->invalid_utf)
4107 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4108
4109 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4110 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4111 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4112 if (common->invalid_utf)
4113 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe0 - 0xc2));
4114
4115 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4116 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
4117 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
4118 if (common->invalid_utf)
4119 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40));
4120
4121 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4122 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4123 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4124 JUMPHERE(jump2);
4125 }
4126 else if (common->invalid_utf)
4127 {
4128 add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4129 OP1(SLJIT_MOV, TMP2, 0, TMP1, 0);
4130 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4131
4132 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4133 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4134 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4135 JUMPHERE(jump2);
4136 }
4137 else
4138 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
4139
4140 JUMPHERE(jump);
4141 return;
4142 }
4143 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
4144
4145 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32
4146 if (common->invalid_utf && negated)
4147 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x110000));
4148 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32 */
4149
4150 #if PCRE2_CODE_UNIT_WIDTH != 8
4151 /* The ctypes array contains only 256 values. */
4152 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4153 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4154 #endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
4155 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4156 #if PCRE2_CODE_UNIT_WIDTH != 8
4157 JUMPHERE(jump);
4158 #endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
4159
4160 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
4161 if (common->utf && negated)
4162 {
4163 /* Skip low surrogate if necessary. */
4164 if (!common->invalid_utf)
4165 {
4166 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
4167
4168 if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS)
4169 {
4170 OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4171 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x400);
4172 CMOV(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0);
4173 }
4174 else
4175 {
4176 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
4177 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4178 JUMPHERE(jump);
4179 }
4180 return;
4181 }
4182
4183 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
4184 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4185 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));
4186 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4187
4188 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4189 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4190 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);
4191 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));
4192
4193 JUMPHERE(jump);
4194 return;
4195 }
4196 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16 */
4197 }
4198
move_back(compiler_common * common,jump_list ** backtracks,BOOL must_be_valid)4199 static void move_back(compiler_common *common, jump_list **backtracks, BOOL must_be_valid)
4200 {
4201 /* Goes one character back. Affects STR_PTR and TMP1. If must_be_valid is TRUE,
4202 TMP2 is not used. Otherwise TMP2 must contain the start of the subject buffer,
4203 and it is destroyed. Does not modify STR_PTR for invalid character sequences. */
4204 DEFINE_COMPILER;
4205
4206 SLJIT_UNUSED_ARG(backtracks);
4207 SLJIT_UNUSED_ARG(must_be_valid);
4208
4209 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4210 struct sljit_jump *jump;
4211 #endif
4212
4213 #ifdef SUPPORT_UNICODE
4214 #if PCRE2_CODE_UNIT_WIDTH == 8
4215 struct sljit_label *label;
4216
4217 if (common->utf)
4218 {
4219 if (!must_be_valid && common->invalid_utf)
4220 {
4221 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4222 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4223 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
4224 add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));
4225 if (backtracks != NULL)
4226 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4227 JUMPHERE(jump);
4228 return;
4229 }
4230
4231 label = LABEL();
4232 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4233 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4234 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4235 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
4236 return;
4237 }
4238 #elif PCRE2_CODE_UNIT_WIDTH == 16
4239 if (common->utf)
4240 {
4241 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4242 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4243
4244 if (!must_be_valid && common->invalid_utf)
4245 {
4246 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4247 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000 - 0xd800);
4248 add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));
4249 if (backtracks != NULL)
4250 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4251 JUMPHERE(jump);
4252 return;
4253 }
4254
4255 /* Skip low surrogate if necessary. */
4256 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4257 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
4258 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
4259 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4260 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4261 return;
4262 }
4263 #elif PCRE2_CODE_UNIT_WIDTH == 32
4264 if (common->invalid_utf && !must_be_valid)
4265 {
4266 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4267 if (backtracks != NULL)
4268 {
4269 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4270 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4271 return;
4272 }
4273
4274 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x110000);
4275 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);
4276 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4277 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4278 return;
4279 }
4280 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4281 #endif /* SUPPORT_UNICODE */
4282 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4283 }
4284
check_newlinechar(compiler_common * common,int nltype,jump_list ** backtracks,BOOL jumpifmatch)4285 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
4286 {
4287 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
4288 DEFINE_COMPILER;
4289 struct sljit_jump *jump;
4290
4291 if (nltype == NLTYPE_ANY)
4292 {
4293 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4294 sljit_set_current_flags(compiler, SLJIT_SET_Z);
4295 add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
4296 }
4297 else if (nltype == NLTYPE_ANYCRLF)
4298 {
4299 if (jumpifmatch)
4300 {
4301 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
4302 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4303 }
4304 else
4305 {
4306 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4307 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4308 JUMPHERE(jump);
4309 }
4310 }
4311 else
4312 {
4313 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
4314 add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4315 }
4316 }
4317
4318 #ifdef SUPPORT_UNICODE
4319
4320 #if PCRE2_CODE_UNIT_WIDTH == 8
do_utfreadchar(compiler_common * common)4321 static void do_utfreadchar(compiler_common *common)
4322 {
4323 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
4324 of the character (>= 0xc0). Return char value in TMP1. */
4325 DEFINE_COMPILER;
4326 struct sljit_jump *jump;
4327
4328 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4329 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4330 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4331 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4332 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4333
4334 /* Searching for the first zero. */
4335 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4336 jump = JUMP(SLJIT_NOT_ZERO);
4337 /* Two byte sequence. */
4338 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3000);
4339 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4340 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4341
4342 JUMPHERE(jump);
4343 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4344 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4345 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4346 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4347
4348 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4349 jump = JUMP(SLJIT_NOT_ZERO);
4350 /* Three byte sequence. */
4351 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0000);
4352 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4353 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4354
4355 /* Four byte sequence. */
4356 JUMPHERE(jump);
4357 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
4358 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0000);
4359 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4360 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4361 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4362 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4363 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4364 }
4365
do_utfreadtype8(compiler_common * common)4366 static void do_utfreadtype8(compiler_common *common)
4367 {
4368 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
4369 of the character (>= 0xc0). Return value in TMP1. */
4370 DEFINE_COMPILER;
4371 struct sljit_jump *jump;
4372 struct sljit_jump *compare;
4373
4374 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4375
4376 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
4377 jump = JUMP(SLJIT_NOT_ZERO);
4378 /* Two byte sequence. */
4379 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4380 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4381 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
4382 /* The upper 5 bits are known at this point. */
4383 compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
4384 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4385 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4386 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
4387 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4388 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4389
4390 JUMPHERE(compare);
4391 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4392 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4393
4394 /* We only have types for characters less than 256. */
4395 JUMPHERE(jump);
4396 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4397 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4398 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4399 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4400 }
4401
do_utfreadchar_invalid(compiler_common * common)4402 static void do_utfreadchar_invalid(compiler_common *common)
4403 {
4404 /* Slow decoding a UTF-8 character. TMP1 contains the first byte
4405 of the character (>= 0xc0). Return char value in TMP1. STR_PTR is
4406 undefined for invalid characters. */
4407 DEFINE_COMPILER;
4408 sljit_s32 i;
4409 sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);
4410 struct sljit_jump *jump;
4411 struct sljit_jump *buffer_end_close;
4412 struct sljit_label *three_byte_entry;
4413 struct sljit_label *exit_invalid_label;
4414 struct sljit_jump *exit_invalid[11];
4415
4416 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4417
4418 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc2);
4419
4420 /* Usually more than 3 characters remained in the subject buffer. */
4421 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4422
4423 /* Not a valid start of a multi-byte sequence, no more bytes read. */
4424 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xf5 - 0xc2);
4425
4426 buffer_end_close = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
4427
4428 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4429 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4430 /* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4431 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4432 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4433 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4434
4435 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4436 jump = JUMP(SLJIT_NOT_ZERO);
4437
4438 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4439 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4440
4441 JUMPHERE(jump);
4442
4443 /* Three-byte sequence. */
4444 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4445 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4446 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4447 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4448 if (has_cmov)
4449 {
4450 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x40);
4451 CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0x20000);
4452 exit_invalid[2] = NULL;
4453 }
4454 else
4455 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4456
4457 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4458 jump = JUMP(SLJIT_NOT_ZERO);
4459
4460 three_byte_entry = LABEL();
4461
4462 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2d800);
4463 if (has_cmov)
4464 {
4465 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4466 CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0xd800);
4467 exit_invalid[3] = NULL;
4468 }
4469 else
4470 exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4471 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4472 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4473
4474 if (has_cmov)
4475 {
4476 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4477 CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4478 exit_invalid[4] = NULL;
4479 }
4480 else
4481 exit_invalid[4] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4482 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4483
4484 JUMPHERE(jump);
4485
4486 /* Four-byte sequence. */
4487 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4488 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4489 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4490 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4491 if (has_cmov)
4492 {
4493 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x40);
4494 CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0);
4495 exit_invalid[5] = NULL;
4496 }
4497 else
4498 exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4499
4500 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc10000);
4501 if (has_cmov)
4502 {
4503 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x100000);
4504 CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000);
4505 exit_invalid[6] = NULL;
4506 }
4507 else
4508 exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
4509
4510 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4511 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4512
4513 JUMPHERE(buffer_end_close);
4514 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4515 exit_invalid[7] = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
4516
4517 /* Two-byte sequence. */
4518 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4519 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4520 /* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4521 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4522 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4523 exit_invalid[8] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4524
4525 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4526 jump = JUMP(SLJIT_NOT_ZERO);
4527
4528 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4529
4530 /* Three-byte sequence. */
4531 JUMPHERE(jump);
4532 exit_invalid[9] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4533
4534 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4535 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4536 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4537 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4538 if (has_cmov)
4539 {
4540 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x40);
4541 CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4542 exit_invalid[10] = NULL;
4543 }
4544 else
4545 exit_invalid[10] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4546
4547 /* One will be substracted from STR_PTR later. */
4548 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4549
4550 /* Four byte sequences are not possible. */
4551 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x30000, three_byte_entry);
4552
4553 exit_invalid_label = LABEL();
4554 for (i = 0; i < 11; i++)
4555 sljit_set_label(exit_invalid[i], exit_invalid_label);
4556
4557 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4558 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4559 }
4560
do_utfreadnewline_invalid(compiler_common * common)4561 static void do_utfreadnewline_invalid(compiler_common *common)
4562 {
4563 /* Slow decoding a UTF-8 character, specialized for newlines.
4564 TMP1 contains the first byte of the character (>= 0xc0). Return
4565 char value in TMP1. */
4566 DEFINE_COMPILER;
4567 struct sljit_label *loop;
4568 struct sljit_label *skip_start;
4569 struct sljit_label *three_byte_exit;
4570 struct sljit_jump *jump[5];
4571
4572 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4573
4574 if (common->nltype != NLTYPE_ANY)
4575 {
4576 SLJIT_ASSERT(common->nltype != NLTYPE_FIXED || common->newline < 128);
4577
4578 /* All newlines are ascii, just skip intermediate octets. */
4579 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4580 loop = LABEL();
4581 if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)) == SLJIT_SUCCESS)
4582 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4583 else
4584 {
4585 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4586 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4587 }
4588
4589 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4590 CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);
4591 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4592
4593 JUMPHERE(jump[0]);
4594
4595 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4596 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4597 return;
4598 }
4599
4600 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4601 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4602 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4603
4604 jump[1] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xc2);
4605 jump[2] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xe2);
4606
4607 skip_start = LABEL();
4608 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4609 jump[3] = CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80);
4610
4611 /* Skip intermediate octets. */
4612 loop = LABEL();
4613 jump[4] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4614 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4615 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4616 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4617 CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);
4618
4619 JUMPHERE(jump[3]);
4620 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4621
4622 three_byte_exit = LABEL();
4623 JUMPHERE(jump[0]);
4624 JUMPHERE(jump[4]);
4625
4626 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4627 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4628
4629 /* Two byte long newline: 0x85. */
4630 JUMPHERE(jump[1]);
4631 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x85, skip_start);
4632
4633 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x85);
4634 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4635
4636 /* Three byte long newlines: 0x2028 and 0x2029. */
4637 JUMPHERE(jump[2]);
4638 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, skip_start);
4639 CMPTO(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0, three_byte_exit);
4640
4641 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4642 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4643
4644 OP2(SLJIT_SUB, TMP1, 0, TMP2, 0, SLJIT_IMM, 0x80);
4645 CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40, skip_start);
4646
4647 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0x2000);
4648 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4649 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4650 }
4651
do_utfmoveback_invalid(compiler_common * common)4652 static void do_utfmoveback_invalid(compiler_common *common)
4653 {
4654 /* Goes one character back. */
4655 DEFINE_COMPILER;
4656 sljit_s32 i;
4657 struct sljit_jump *jump;
4658 struct sljit_jump *buffer_start_close;
4659 struct sljit_label *exit_ok_label;
4660 struct sljit_label *exit_invalid_label;
4661 struct sljit_jump *exit_invalid[7];
4662
4663 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4664
4665 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4666 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);
4667
4668 /* Two-byte sequence. */
4669 buffer_start_close = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4670
4671 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
4672
4673 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4674 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x20);
4675
4676 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4677 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4678 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4679
4680 /* Three-byte sequence. */
4681 JUMPHERE(jump);
4682 exit_invalid[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);
4683
4684 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4685
4686 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4687 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x10);
4688
4689 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4690 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4691 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4692
4693 /* Four-byte sequence. */
4694 JUMPHERE(jump);
4695 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);
4696 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40);
4697
4698 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4699 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0);
4700 exit_invalid[3] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x05);
4701
4702 exit_ok_label = LABEL();
4703 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4704 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4705
4706 /* Two-byte sequence. */
4707 JUMPHERE(buffer_start_close);
4708 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4709
4710 exit_invalid[4] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4711
4712 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4713
4714 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4715 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20, exit_ok_label);
4716
4717 /* Three-byte sequence. */
4718 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4719 exit_invalid[5] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);
4720 exit_invalid[6] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4721
4722 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4723
4724 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4725 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10, exit_ok_label);
4726
4727 /* Four-byte sequences are not possible. */
4728
4729 exit_invalid_label = LABEL();
4730 sljit_set_label(exit_invalid[5], exit_invalid_label);
4731 sljit_set_label(exit_invalid[6], exit_invalid_label);
4732 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4733 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4734 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4735
4736 JUMPHERE(exit_invalid[4]);
4737 /* -2 + 4 = 2 */
4738 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4739
4740 exit_invalid_label = LABEL();
4741 for (i = 0; i < 4; i++)
4742 sljit_set_label(exit_invalid[i], exit_invalid_label);
4743 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4744 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(4));
4745 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4746 }
4747
do_utfpeakcharback(compiler_common * common)4748 static void do_utfpeakcharback(compiler_common *common)
4749 {
4750 /* Peak a character back. Does not modify STR_PTR. */
4751 DEFINE_COMPILER;
4752 struct sljit_jump *jump[2];
4753
4754 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4755
4756 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4757 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4758 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20);
4759
4760 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4761 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4762 jump[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10);
4763
4764 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));
4765 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);
4766 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);
4767 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4768 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4769
4770 JUMPHERE(jump[1]);
4771 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4772 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4773 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4774 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4775
4776 JUMPHERE(jump[0]);
4777 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4778 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4779 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4780 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4781
4782 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4783 }
4784
do_utfpeakcharback_invalid(compiler_common * common)4785 static void do_utfpeakcharback_invalid(compiler_common *common)
4786 {
4787 /* Peak a character back. Does not modify STR_PTR. */
4788 DEFINE_COMPILER;
4789 sljit_s32 i;
4790 sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);
4791 struct sljit_jump *jump[2];
4792 struct sljit_label *two_byte_entry;
4793 struct sljit_label *three_byte_entry;
4794 struct sljit_label *exit_invalid_label;
4795 struct sljit_jump *exit_invalid[8];
4796
4797 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4798
4799 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
4800 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);
4801 jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
4802
4803 /* Two-byte sequence. */
4804 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4805 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4806 jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x1e);
4807
4808 two_byte_entry = LABEL();
4809 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4810 /* If TMP1 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4811 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4812 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4813
4814 JUMPHERE(jump[1]);
4815 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);
4816 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
4817 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4818 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4819 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4820
4821 /* Three-byte sequence. */
4822 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4823 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);
4824 jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x10);
4825
4826 three_byte_entry = LABEL();
4827 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
4828 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4829
4830 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4831 if (has_cmov)
4832 {
4833 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4834 CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, -0xd800);
4835 exit_invalid[2] = NULL;
4836 }
4837 else
4838 exit_invalid[2] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4839
4840 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4841 if (has_cmov)
4842 {
4843 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4844 CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4845 exit_invalid[3] = NULL;
4846 }
4847 else
4848 exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4849
4850 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4851
4852 JUMPHERE(jump[1]);
4853 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0 - 0x80);
4854 exit_invalid[4] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4855 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
4856 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4857
4858 /* Four-byte sequence. */
4859 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));
4860 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4861 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);
4862 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 18);
4863 /* ADD is used instead of OR because of the SUB 0x10000 above. */
4864 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4865
4866 if (has_cmov)
4867 {
4868 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x100000);
4869 CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000);
4870 exit_invalid[5] = NULL;
4871 }
4872 else
4873 exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
4874
4875 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4876 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4877
4878 JUMPHERE(jump[0]);
4879 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4880 jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
4881
4882 /* Two-byte sequence. */
4883 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4884 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4885 CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);
4886
4887 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);
4888 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
4889 exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4890 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4891 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4892
4893 /* Three-byte sequence. */
4894 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4895 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);
4896 CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x10, three_byte_entry);
4897
4898 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4899 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4900
4901 JUMPHERE(jump[0]);
4902 exit_invalid[7] = CMP(SLJIT_GREATER, TMP2, 0, STR_PTR, 0);
4903
4904 /* Two-byte sequence. */
4905 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4906 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4907 CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);
4908
4909 exit_invalid_label = LABEL();
4910 for (i = 0; i < 8; i++)
4911 sljit_set_label(exit_invalid[i], exit_invalid_label);
4912
4913 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4914 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4915 }
4916
4917 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
4918
4919 #if PCRE2_CODE_UNIT_WIDTH == 16
4920
do_utfreadchar_invalid(compiler_common * common)4921 static void do_utfreadchar_invalid(compiler_common *common)
4922 {
4923 /* Slow decoding a UTF-16 character. TMP1 contains the first half
4924 of the character (>= 0xd800). Return char value in TMP1. STR_PTR is
4925 undefined for invalid characters. */
4926 DEFINE_COMPILER;
4927 struct sljit_jump *exit_invalid[3];
4928
4929 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4930
4931 /* TMP2 contains the high surrogate. */
4932 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);
4933 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4934
4935 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4936 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
4937 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4938
4939 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
4940 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);
4941 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);
4942
4943 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4944 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4945
4946 JUMPHERE(exit_invalid[0]);
4947 JUMPHERE(exit_invalid[1]);
4948 JUMPHERE(exit_invalid[2]);
4949 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4950 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4951 }
4952
do_utfreadnewline_invalid(compiler_common * common)4953 static void do_utfreadnewline_invalid(compiler_common *common)
4954 {
4955 /* Slow decoding a UTF-16 character, specialized for newlines.
4956 TMP1 contains the first half of the character (>= 0xd800). Return
4957 char value in TMP1. */
4958
4959 DEFINE_COMPILER;
4960 struct sljit_jump *exit_invalid[2];
4961
4962 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4963
4964 /* TMP2 contains the high surrogate. */
4965 exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4966
4967 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4968 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);
4969
4970 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);
4971 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x400);
4972 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
4973 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
4974 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
4975 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4976
4977 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4978
4979 JUMPHERE(exit_invalid[0]);
4980 JUMPHERE(exit_invalid[1]);
4981 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4982 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4983 }
4984
do_utfmoveback_invalid(compiler_common * common)4985 static void do_utfmoveback_invalid(compiler_common *common)
4986 {
4987 /* Goes one character back. */
4988 DEFINE_COMPILER;
4989 struct sljit_jump *exit_invalid[3];
4990
4991 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4992
4993 exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400);
4994 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
4995
4996 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4997 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4998 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);
4999
5000 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5001 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
5002 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5003
5004 JUMPHERE(exit_invalid[0]);
5005 JUMPHERE(exit_invalid[1]);
5006 JUMPHERE(exit_invalid[2]);
5007
5008 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5009 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
5010 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5011 }
5012
do_utfpeakcharback_invalid(compiler_common * common)5013 static void do_utfpeakcharback_invalid(compiler_common *common)
5014 {
5015 /* Peak a character back. Does not modify STR_PTR. */
5016 DEFINE_COMPILER;
5017 struct sljit_jump *jump;
5018 struct sljit_jump *exit_invalid[3];
5019
5020 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5021
5022 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000);
5023 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5024 exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
5025 exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5026
5027 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5028 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
5029 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
5030 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
5031 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
5032 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5033
5034 JUMPHERE(jump);
5035 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5036
5037 JUMPHERE(exit_invalid[0]);
5038 JUMPHERE(exit_invalid[1]);
5039 JUMPHERE(exit_invalid[2]);
5040
5041 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5042 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5043 }
5044
5045 #endif /* PCRE2_CODE_UNIT_WIDTH == 16 */
5046
5047 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
5048 #define UCD_BLOCK_MASK 127
5049 #define UCD_BLOCK_SHIFT 7
5050
do_getucd(compiler_common * common)5051 static void do_getucd(compiler_common *common)
5052 {
5053 /* Search the UCD record for the character comes in TMP1.
5054 Returns chartype in TMP1 and UCD offset in TMP2. */
5055 DEFINE_COMPILER;
5056 #if PCRE2_CODE_UNIT_WIDTH == 32
5057 struct sljit_jump *jump;
5058 #endif
5059
5060 #if defined SLJIT_DEBUG && SLJIT_DEBUG
5061 /* dummy_ucd_record */
5062 const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);
5063 SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
5064 SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
5065 #endif
5066
5067 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);
5068
5069 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5070
5071 #if PCRE2_CODE_UNIT_WIDTH == 32
5072 if (!common->utf)
5073 {
5074 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
5075 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
5076 JUMPHERE(jump);
5077 }
5078 #endif
5079
5080 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5081 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5082 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5083 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5084 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5085 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5086 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5087 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5088 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5089 }
5090
do_getucdtype(compiler_common * common)5091 static void do_getucdtype(compiler_common *common)
5092 {
5093 /* Search the UCD record for the character comes in TMP1.
5094 Returns chartype in TMP1 and UCD offset in TMP2. */
5095 DEFINE_COMPILER;
5096 #if PCRE2_CODE_UNIT_WIDTH == 32
5097 struct sljit_jump *jump;
5098 #endif
5099
5100 #if defined SLJIT_DEBUG && SLJIT_DEBUG
5101 /* dummy_ucd_record */
5102 const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);
5103 SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
5104 SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
5105 #endif
5106
5107 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);
5108
5109 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5110
5111 #if PCRE2_CODE_UNIT_WIDTH == 32
5112 if (!common->utf)
5113 {
5114 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
5115 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
5116 JUMPHERE(jump);
5117 }
5118 #endif
5119
5120 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5121 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5122 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5123 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5124 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5125 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5126 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5127 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5128
5129 /* TMP2 is multiplied by 12. Same as (TMP2 << 2) + ((TMP2 << 2) << 1). */
5130 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
5131 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
5132 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5133 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 1);
5134
5135 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5136 }
5137
5138 #endif /* SUPPORT_UNICODE */
5139
mainloop_entry(compiler_common * common)5140 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common)
5141 {
5142 DEFINE_COMPILER;
5143 struct sljit_label *mainloop;
5144 struct sljit_label *newlinelabel = NULL;
5145 struct sljit_jump *start;
5146 struct sljit_jump *end = NULL;
5147 struct sljit_jump *end2 = NULL;
5148 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5149 struct sljit_label *loop;
5150 struct sljit_jump *jump;
5151 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5152 jump_list *newline = NULL;
5153 sljit_u32 overall_options = common->re->overall_options;
5154 BOOL hascrorlf = (common->re->flags & PCRE2_HASCRORLF) != 0;
5155 BOOL newlinecheck = FALSE;
5156 BOOL readuchar = FALSE;
5157
5158 if (!(hascrorlf || (overall_options & PCRE2_FIRSTLINE) != 0)
5159 && (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
5160 newlinecheck = TRUE;
5161
5162 SLJIT_ASSERT(common->abort_label == NULL);
5163
5164 if ((overall_options & PCRE2_FIRSTLINE) != 0)
5165 {
5166 /* Search for the end of the first line. */
5167 SLJIT_ASSERT(common->match_end_ptr != 0);
5168 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
5169
5170 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5171 {
5172 mainloop = LABEL();
5173 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5174 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5175 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5176 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5177 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
5178 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
5179 JUMPHERE(end);
5180 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5181 }
5182 else
5183 {
5184 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5185 mainloop = LABEL();
5186 /* Continual stores does not cause data dependency. */
5187 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
5188 read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
5189 check_newlinechar(common, common->nltype, &newline, TRUE);
5190 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
5191 JUMPHERE(end);
5192 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
5193 set_jumps(newline, LABEL());
5194 }
5195
5196 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
5197 }
5198 else if ((overall_options & PCRE2_USE_OFFSET_LIMIT) != 0)
5199 {
5200 /* Check whether offset limit is set and valid. */
5201 SLJIT_ASSERT(common->match_end_ptr != 0);
5202
5203 if (HAS_VIRTUAL_REGISTERS)
5204 {
5205 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5206 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, offset_limit));
5207 }
5208 else
5209 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, offset_limit));
5210
5211 OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
5212 end = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw) PCRE2_UNSET);
5213 if (HAS_VIRTUAL_REGISTERS)
5214 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5215 else
5216 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
5217
5218 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5219 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5220 #endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */
5221 if (HAS_VIRTUAL_REGISTERS)
5222 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5223
5224 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
5225 end2 = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
5226 OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
5227 JUMPHERE(end2);
5228 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
5229 add_jump(compiler, &common->abort, CMP(SLJIT_LESS, TMP2, 0, STR_PTR, 0));
5230 JUMPHERE(end);
5231 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, TMP2, 0);
5232 }
5233
5234 start = JUMP(SLJIT_JUMP);
5235
5236 if (newlinecheck)
5237 {
5238 newlinelabel = LABEL();
5239 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5240 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5241 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5242 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
5243 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
5244 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5245 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5246 #endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */
5247 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5248 end2 = JUMP(SLJIT_JUMP);
5249 }
5250
5251 mainloop = LABEL();
5252
5253 /* Increasing the STR_PTR here requires one less jump in the most common case. */
5254 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5255 if (common->utf && !common->invalid_utf) readuchar = TRUE;
5256 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5257 if (newlinecheck) readuchar = TRUE;
5258
5259 if (readuchar)
5260 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5261
5262 if (newlinecheck)
5263 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
5264
5265 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5266 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5267 #if PCRE2_CODE_UNIT_WIDTH == 8
5268 if (common->invalid_utf)
5269 {
5270 /* Skip continuation code units. */
5271 loop = LABEL();
5272 jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5273 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5274 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5275 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
5276 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x40, loop);
5277 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5278 JUMPHERE(jump);
5279 }
5280 else if (common->utf)
5281 {
5282 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5283 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5284 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5285 JUMPHERE(jump);
5286 }
5287 #elif PCRE2_CODE_UNIT_WIDTH == 16
5288 if (common->invalid_utf)
5289 {
5290 /* Skip continuation code units. */
5291 loop = LABEL();
5292 jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5293 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5294 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5295 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
5296 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400, loop);
5297 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5298 JUMPHERE(jump);
5299 }
5300 else if (common->utf)
5301 {
5302 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5303
5304 if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
5305 {
5306 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5307 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
5308 CMOV(SLJIT_LESS, STR_PTR, TMP2, 0);
5309 }
5310 else
5311 {
5312 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
5313 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);
5314 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5315 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5316 }
5317 }
5318 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
5319 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5320 JUMPHERE(start);
5321
5322 if (newlinecheck)
5323 {
5324 JUMPHERE(end);
5325 JUMPHERE(end2);
5326 }
5327
5328 return mainloop;
5329 }
5330
5331
add_prefix_char(PCRE2_UCHAR chr,fast_forward_char_data * chars,BOOL last)5332 static SLJIT_INLINE void add_prefix_char(PCRE2_UCHAR chr, fast_forward_char_data *chars, BOOL last)
5333 {
5334 sljit_u32 i, count = chars->count;
5335
5336 if (count == 255)
5337 return;
5338
5339 if (count == 0)
5340 {
5341 chars->count = 1;
5342 chars->chars[0] = chr;
5343
5344 if (last)
5345 chars->last_count = 1;
5346 return;
5347 }
5348
5349 for (i = 0; i < count; i++)
5350 if (chars->chars[i] == chr)
5351 return;
5352
5353 if (count >= MAX_DIFF_CHARS)
5354 {
5355 chars->count = 255;
5356 return;
5357 }
5358
5359 chars->chars[count] = chr;
5360 chars->count = count + 1;
5361
5362 if (last)
5363 chars->last_count++;
5364 }
5365
scan_prefix(compiler_common * common,PCRE2_SPTR cc,fast_forward_char_data * chars,int max_chars,sljit_u32 * rec_count)5366 static int scan_prefix(compiler_common *common, PCRE2_SPTR cc, fast_forward_char_data *chars, int max_chars, sljit_u32 *rec_count)
5367 {
5368 /* Recursive function, which scans prefix literals. */
5369 BOOL last, any, class, caseless;
5370 int len, repeat, len_save, consumed = 0;
5371 sljit_u32 chr; /* Any unicode character. */
5372 sljit_u8 *bytes, *bytes_end, byte;
5373 PCRE2_SPTR alternative, cc_save, oc;
5374 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5375 PCRE2_UCHAR othercase[4];
5376 #elif defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
5377 PCRE2_UCHAR othercase[2];
5378 #else
5379 PCRE2_UCHAR othercase[1];
5380 #endif
5381
5382 repeat = 1;
5383 while (TRUE)
5384 {
5385 if (*rec_count == 0)
5386 return 0;
5387 (*rec_count)--;
5388
5389 last = TRUE;
5390 any = FALSE;
5391 class = FALSE;
5392 caseless = FALSE;
5393
5394 switch (*cc)
5395 {
5396 case OP_CHARI:
5397 caseless = TRUE;
5398 /* Fall through */
5399 case OP_CHAR:
5400 last = FALSE;
5401 cc++;
5402 break;
5403
5404 case OP_SOD:
5405 case OP_SOM:
5406 case OP_SET_SOM:
5407 case OP_NOT_WORD_BOUNDARY:
5408 case OP_WORD_BOUNDARY:
5409 case OP_EODN:
5410 case OP_EOD:
5411 case OP_CIRC:
5412 case OP_CIRCM:
5413 case OP_DOLL:
5414 case OP_DOLLM:
5415 /* Zero width assertions. */
5416 cc++;
5417 continue;
5418
5419 case OP_ASSERT:
5420 case OP_ASSERT_NOT:
5421 case OP_ASSERTBACK:
5422 case OP_ASSERTBACK_NOT:
5423 case OP_ASSERT_NA:
5424 case OP_ASSERTBACK_NA:
5425 cc = bracketend(cc);
5426 continue;
5427
5428 case OP_PLUSI:
5429 case OP_MINPLUSI:
5430 case OP_POSPLUSI:
5431 caseless = TRUE;
5432 /* Fall through */
5433 case OP_PLUS:
5434 case OP_MINPLUS:
5435 case OP_POSPLUS:
5436 cc++;
5437 break;
5438
5439 case OP_EXACTI:
5440 caseless = TRUE;
5441 /* Fall through */
5442 case OP_EXACT:
5443 repeat = GET2(cc, 1);
5444 last = FALSE;
5445 cc += 1 + IMM2_SIZE;
5446 break;
5447
5448 case OP_QUERYI:
5449 case OP_MINQUERYI:
5450 case OP_POSQUERYI:
5451 caseless = TRUE;
5452 /* Fall through */
5453 case OP_QUERY:
5454 case OP_MINQUERY:
5455 case OP_POSQUERY:
5456 len = 1;
5457 cc++;
5458 #ifdef SUPPORT_UNICODE
5459 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
5460 #endif
5461 max_chars = scan_prefix(common, cc + len, chars, max_chars, rec_count);
5462 if (max_chars == 0)
5463 return consumed;
5464 last = FALSE;
5465 break;
5466
5467 case OP_KET:
5468 cc += 1 + LINK_SIZE;
5469 continue;
5470
5471 case OP_ALT:
5472 cc += GET(cc, 1);
5473 continue;
5474
5475 case OP_ONCE:
5476 case OP_BRA:
5477 case OP_BRAPOS:
5478 case OP_CBRA:
5479 case OP_CBRAPOS:
5480 alternative = cc + GET(cc, 1);
5481 while (*alternative == OP_ALT)
5482 {
5483 max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars, rec_count);
5484 if (max_chars == 0)
5485 return consumed;
5486 alternative += GET(alternative, 1);
5487 }
5488
5489 if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
5490 cc += IMM2_SIZE;
5491 cc += 1 + LINK_SIZE;
5492 continue;
5493
5494 case OP_CLASS:
5495 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5496 if (common->utf && !is_char7_bitset((const sljit_u8 *)(cc + 1), FALSE))
5497 return consumed;
5498 #endif
5499 class = TRUE;
5500 break;
5501
5502 case OP_NCLASS:
5503 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5504 if (common->utf) return consumed;
5505 #endif
5506 class = TRUE;
5507 break;
5508
5509 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
5510 case OP_XCLASS:
5511 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5512 if (common->utf) return consumed;
5513 #endif
5514 any = TRUE;
5515 cc += GET(cc, 1);
5516 break;
5517 #endif
5518
5519 case OP_DIGIT:
5520 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5521 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
5522 return consumed;
5523 #endif
5524 any = TRUE;
5525 cc++;
5526 break;
5527
5528 case OP_WHITESPACE:
5529 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5530 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))
5531 return consumed;
5532 #endif
5533 any = TRUE;
5534 cc++;
5535 break;
5536
5537 case OP_WORDCHAR:
5538 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5539 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))
5540 return consumed;
5541 #endif
5542 any = TRUE;
5543 cc++;
5544 break;
5545
5546 case OP_NOT:
5547 case OP_NOTI:
5548 cc++;
5549 /* Fall through. */
5550 case OP_NOT_DIGIT:
5551 case OP_NOT_WHITESPACE:
5552 case OP_NOT_WORDCHAR:
5553 case OP_ANY:
5554 case OP_ALLANY:
5555 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5556 if (common->utf) return consumed;
5557 #endif
5558 any = TRUE;
5559 cc++;
5560 break;
5561
5562 #ifdef SUPPORT_UNICODE
5563 case OP_NOTPROP:
5564 case OP_PROP:
5565 #if PCRE2_CODE_UNIT_WIDTH != 32
5566 if (common->utf) return consumed;
5567 #endif
5568 any = TRUE;
5569 cc += 1 + 2;
5570 break;
5571 #endif
5572
5573 case OP_TYPEEXACT:
5574 repeat = GET2(cc, 1);
5575 cc += 1 + IMM2_SIZE;
5576 continue;
5577
5578 case OP_NOTEXACT:
5579 case OP_NOTEXACTI:
5580 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5581 if (common->utf) return consumed;
5582 #endif
5583 any = TRUE;
5584 repeat = GET2(cc, 1);
5585 cc += 1 + IMM2_SIZE + 1;
5586 break;
5587
5588 default:
5589 return consumed;
5590 }
5591
5592 if (any)
5593 {
5594 do
5595 {
5596 chars->count = 255;
5597
5598 consumed++;
5599 if (--max_chars == 0)
5600 return consumed;
5601 chars++;
5602 }
5603 while (--repeat > 0);
5604
5605 repeat = 1;
5606 continue;
5607 }
5608
5609 if (class)
5610 {
5611 bytes = (sljit_u8*) (cc + 1);
5612 cc += 1 + 32 / sizeof(PCRE2_UCHAR);
5613
5614 switch (*cc)
5615 {
5616 case OP_CRSTAR:
5617 case OP_CRMINSTAR:
5618 case OP_CRPOSSTAR:
5619 case OP_CRQUERY:
5620 case OP_CRMINQUERY:
5621 case OP_CRPOSQUERY:
5622 max_chars = scan_prefix(common, cc + 1, chars, max_chars, rec_count);
5623 if (max_chars == 0)
5624 return consumed;
5625 break;
5626
5627 default:
5628 case OP_CRPLUS:
5629 case OP_CRMINPLUS:
5630 case OP_CRPOSPLUS:
5631 break;
5632
5633 case OP_CRRANGE:
5634 case OP_CRMINRANGE:
5635 case OP_CRPOSRANGE:
5636 repeat = GET2(cc, 1);
5637 if (repeat <= 0)
5638 return consumed;
5639 break;
5640 }
5641
5642 do
5643 {
5644 if (bytes[31] & 0x80)
5645 chars->count = 255;
5646 else if (chars->count != 255)
5647 {
5648 bytes_end = bytes + 32;
5649 chr = 0;
5650 do
5651 {
5652 byte = *bytes++;
5653 SLJIT_ASSERT((chr & 0x7) == 0);
5654 if (byte == 0)
5655 chr += 8;
5656 else
5657 {
5658 do
5659 {
5660 if ((byte & 0x1) != 0)
5661 add_prefix_char(chr, chars, TRUE);
5662 byte >>= 1;
5663 chr++;
5664 }
5665 while (byte != 0);
5666 chr = (chr + 7) & ~7;
5667 }
5668 }
5669 while (chars->count != 255 && bytes < bytes_end);
5670 bytes = bytes_end - 32;
5671 }
5672
5673 consumed++;
5674 if (--max_chars == 0)
5675 return consumed;
5676 chars++;
5677 }
5678 while (--repeat > 0);
5679
5680 switch (*cc)
5681 {
5682 case OP_CRSTAR:
5683 case OP_CRMINSTAR:
5684 case OP_CRPOSSTAR:
5685 return consumed;
5686
5687 case OP_CRQUERY:
5688 case OP_CRMINQUERY:
5689 case OP_CRPOSQUERY:
5690 cc++;
5691 break;
5692
5693 case OP_CRRANGE:
5694 case OP_CRMINRANGE:
5695 case OP_CRPOSRANGE:
5696 if (GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE))
5697 return consumed;
5698 cc += 1 + 2 * IMM2_SIZE;
5699 break;
5700 }
5701
5702 repeat = 1;
5703 continue;
5704 }
5705
5706 len = 1;
5707 #ifdef SUPPORT_UNICODE
5708 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
5709 #endif
5710
5711 if (caseless && char_has_othercase(common, cc))
5712 {
5713 #ifdef SUPPORT_UNICODE
5714 if (common->utf)
5715 {
5716 GETCHAR(chr, cc);
5717 if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
5718 return consumed;
5719 }
5720 else
5721 #endif
5722 {
5723 chr = *cc;
5724 #ifdef SUPPORT_UNICODE
5725 if (common->ucp && chr > 127)
5726 othercase[0] = UCD_OTHERCASE(chr);
5727 else
5728 #endif
5729 othercase[0] = TABLE_GET(chr, common->fcc, chr);
5730 }
5731 }
5732 else
5733 {
5734 caseless = FALSE;
5735 othercase[0] = 0; /* Stops compiler warning - PH */
5736 }
5737
5738 len_save = len;
5739 cc_save = cc;
5740 while (TRUE)
5741 {
5742 oc = othercase;
5743 do
5744 {
5745 len--;
5746 consumed++;
5747
5748 chr = *cc;
5749 add_prefix_char(*cc, chars, len == 0);
5750
5751 if (caseless)
5752 add_prefix_char(*oc, chars, len == 0);
5753
5754 if (--max_chars == 0)
5755 return consumed;
5756 chars++;
5757 cc++;
5758 oc++;
5759 }
5760 while (len > 0);
5761
5762 if (--repeat == 0)
5763 break;
5764
5765 len = len_save;
5766 cc = cc_save;
5767 }
5768
5769 repeat = 1;
5770 if (last)
5771 return consumed;
5772 }
5773 }
5774
5775 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
jumpto_if_not_utf_char_start(struct sljit_compiler * compiler,sljit_s32 reg,struct sljit_label * label)5776 static void jumpto_if_not_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg, struct sljit_label *label)
5777 {
5778 #if PCRE2_CODE_UNIT_WIDTH == 8
5779 OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xc0);
5780 CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0x80, label);
5781 #elif PCRE2_CODE_UNIT_WIDTH == 16
5782 OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xfc00);
5783 CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00, label);
5784 #else
5785 #error "Unknown code width"
5786 #endif
5787 }
5788 #endif
5789
5790 #include "pcre2_jit_simd_inc.h"
5791
5792 #ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
5793
check_fast_forward_char_pair_simd(compiler_common * common,fast_forward_char_data * chars,int max)5794 static BOOL check_fast_forward_char_pair_simd(compiler_common *common, fast_forward_char_data *chars, int max)
5795 {
5796 sljit_s32 i, j, max_i = 0, max_j = 0;
5797 sljit_u32 max_pri = 0;
5798 PCRE2_UCHAR a1, a2, a_pri, b1, b2, b_pri;
5799
5800 for (i = max - 1; i >= 1; i--)
5801 {
5802 if (chars[i].last_count > 2)
5803 {
5804 a1 = chars[i].chars[0];
5805 a2 = chars[i].chars[1];
5806 a_pri = chars[i].last_count;
5807
5808 j = i - max_fast_forward_char_pair_offset();
5809 if (j < 0)
5810 j = 0;
5811
5812 while (j < i)
5813 {
5814 b_pri = chars[j].last_count;
5815 if (b_pri > 2 && a_pri + b_pri >= max_pri)
5816 {
5817 b1 = chars[j].chars[0];
5818 b2 = chars[j].chars[1];
5819
5820 if (a1 != b1 && a1 != b2 && a2 != b1 && a2 != b2)
5821 {
5822 max_pri = a_pri + b_pri;
5823 max_i = i;
5824 max_j = j;
5825 }
5826 }
5827 j++;
5828 }
5829 }
5830 }
5831
5832 if (max_pri == 0)
5833 return FALSE;
5834
5835 fast_forward_char_pair_simd(common, max_i, chars[max_i].chars[0], chars[max_i].chars[1], max_j, chars[max_j].chars[0], chars[max_j].chars[1]);
5836 return TRUE;
5837 }
5838
5839 #endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */
5840
fast_forward_first_char2(compiler_common * common,PCRE2_UCHAR char1,PCRE2_UCHAR char2,sljit_s32 offset)5841 static void fast_forward_first_char2(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
5842 {
5843 DEFINE_COMPILER;
5844 struct sljit_label *start;
5845 struct sljit_jump *match;
5846 struct sljit_jump *partial_quit;
5847 PCRE2_UCHAR mask;
5848 BOOL has_match_end = (common->match_end_ptr != 0);
5849
5850 SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE || offset == 0);
5851
5852 if (has_match_end)
5853 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
5854
5855 if (offset > 0)
5856 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
5857
5858 if (has_match_end)
5859 {
5860 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
5861
5862 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
5863 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0);
5864 CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
5865 }
5866
5867 #ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD
5868
5869 if (JIT_HAS_FAST_FORWARD_CHAR_SIMD)
5870 {
5871 fast_forward_char_simd(common, char1, char2, offset);
5872
5873 if (offset > 0)
5874 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
5875
5876 if (has_match_end)
5877 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
5878 return;
5879 }
5880
5881 #endif
5882
5883 start = LABEL();
5884
5885 partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5886 if (common->mode == PCRE2_JIT_COMPLETE)
5887 add_jump(compiler, &common->failed_match, partial_quit);
5888
5889 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5890 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5891
5892 if (char1 == char2)
5893 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1, start);
5894 else
5895 {
5896 mask = char1 ^ char2;
5897 if (is_powerof2(mask))
5898 {
5899 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
5900 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1 | mask, start);
5901 }
5902 else
5903 {
5904 match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1);
5905 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char2, start);
5906 JUMPHERE(match);
5907 }
5908 }
5909
5910 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5911 if (common->utf && offset > 0)
5912 {
5913 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-(offset + 1)));
5914 jumpto_if_not_utf_char_start(compiler, TMP1, start);
5915 }
5916 #endif
5917
5918 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
5919
5920 if (common->mode != PCRE2_JIT_COMPLETE)
5921 JUMPHERE(partial_quit);
5922
5923 if (has_match_end)
5924 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
5925 }
5926
fast_forward_first_n_chars(compiler_common * common)5927 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common)
5928 {
5929 DEFINE_COMPILER;
5930 struct sljit_label *start;
5931 struct sljit_jump *match;
5932 fast_forward_char_data chars[MAX_N_CHARS];
5933 sljit_s32 offset;
5934 PCRE2_UCHAR mask;
5935 PCRE2_UCHAR *char_set, *char_set_end;
5936 int i, max, from;
5937 int range_right = -1, range_len;
5938 sljit_u8 *update_table = NULL;
5939 BOOL in_range;
5940 sljit_u32 rec_count;
5941
5942 for (i = 0; i < MAX_N_CHARS; i++)
5943 {
5944 chars[i].count = 0;
5945 chars[i].last_count = 0;
5946 }
5947
5948 rec_count = 10000;
5949 max = scan_prefix(common, common->start, chars, MAX_N_CHARS, &rec_count);
5950
5951 if (max < 1)
5952 return FALSE;
5953
5954 /* Convert last_count to priority. */
5955 for (i = 0; i < max; i++)
5956 {
5957 SLJIT_ASSERT(chars[i].count > 0 && chars[i].last_count <= chars[i].count);
5958
5959 if (chars[i].count == 1)
5960 {
5961 chars[i].last_count = (chars[i].last_count == 1) ? 7 : 5;
5962 /* Simplifies algorithms later. */
5963 chars[i].chars[1] = chars[i].chars[0];
5964 }
5965 else if (chars[i].count == 2)
5966 {
5967 SLJIT_ASSERT(chars[i].chars[0] != chars[i].chars[1]);
5968
5969 if (is_powerof2(chars[i].chars[0] ^ chars[i].chars[1]))
5970 chars[i].last_count = (chars[i].last_count == 2) ? 6 : 4;
5971 else
5972 chars[i].last_count = (chars[i].last_count == 2) ? 3 : 2;
5973 }
5974 else
5975 chars[i].last_count = (chars[i].count == 255) ? 0 : 1;
5976 }
5977
5978 #ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
5979 if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && check_fast_forward_char_pair_simd(common, chars, max))
5980 return TRUE;
5981 #endif
5982
5983 in_range = FALSE;
5984 /* Prevent compiler "uninitialized" warning */
5985 from = 0;
5986 range_len = 4 /* minimum length */ - 1;
5987 for (i = 0; i <= max; i++)
5988 {
5989 if (in_range && (i - from) > range_len && (chars[i - 1].count < 255))
5990 {
5991 range_len = i - from;
5992 range_right = i - 1;
5993 }
5994
5995 if (i < max && chars[i].count < 255)
5996 {
5997 SLJIT_ASSERT(chars[i].count > 0);
5998 if (!in_range)
5999 {
6000 in_range = TRUE;
6001 from = i;
6002 }
6003 }
6004 else
6005 in_range = FALSE;
6006 }
6007
6008 if (range_right >= 0)
6009 {
6010 update_table = (sljit_u8 *)allocate_read_only_data(common, 256);
6011 if (update_table == NULL)
6012 return TRUE;
6013 memset(update_table, IN_UCHARS(range_len), 256);
6014
6015 for (i = 0; i < range_len; i++)
6016 {
6017 SLJIT_ASSERT(chars[range_right - i].count > 0 && chars[range_right - i].count < 255);
6018
6019 char_set = chars[range_right - i].chars;
6020 char_set_end = char_set + chars[range_right - i].count;
6021 do
6022 {
6023 if (update_table[(*char_set) & 0xff] > IN_UCHARS(i))
6024 update_table[(*char_set) & 0xff] = IN_UCHARS(i);
6025 char_set++;
6026 }
6027 while (char_set < char_set_end);
6028 }
6029 }
6030
6031 offset = -1;
6032 /* Scan forward. */
6033 for (i = 0; i < max; i++)
6034 {
6035 if (range_right == i)
6036 continue;
6037
6038 if (offset == -1)
6039 {
6040 if (chars[i].last_count >= 2)
6041 offset = i;
6042 }
6043 else if (chars[offset].last_count < chars[i].last_count)
6044 offset = i;
6045 }
6046
6047 SLJIT_ASSERT(offset == -1 || (chars[offset].count >= 1 && chars[offset].count <= 2));
6048
6049 if (range_right < 0)
6050 {
6051 if (offset < 0)
6052 return FALSE;
6053 /* Works regardless the value is 1 or 2. */
6054 fast_forward_first_char2(common, chars[offset].chars[0], chars[offset].chars[1], offset);
6055 return TRUE;
6056 }
6057
6058 SLJIT_ASSERT(range_right != offset);
6059
6060 if (common->match_end_ptr != 0)
6061 {
6062 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6063 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6064 OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6065 add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));
6066 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0);
6067 CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
6068 }
6069 else
6070 {
6071 OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6072 add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));
6073 }
6074
6075 SLJIT_ASSERT(range_right >= 0);
6076
6077 if (!HAS_VIRTUAL_REGISTERS)
6078 OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
6079
6080 start = LABEL();
6081 add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
6082
6083 #if PCRE2_CODE_UNIT_WIDTH == 8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
6084 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
6085 #else
6086 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
6087 #endif
6088
6089 if (!HAS_VIRTUAL_REGISTERS)
6090 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
6091 else
6092 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
6093
6094 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6095 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
6096
6097 if (offset >= 0)
6098 {
6099 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offset));
6100 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6101
6102 if (chars[offset].count == 1)
6103 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0], start);
6104 else
6105 {
6106 mask = chars[offset].chars[0] ^ chars[offset].chars[1];
6107 if (is_powerof2(mask))
6108 {
6109 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
6110 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0] | mask, start);
6111 }
6112 else
6113 {
6114 match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0]);
6115 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[1], start);
6116 JUMPHERE(match);
6117 }
6118 }
6119 }
6120
6121 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6122 if (common->utf && offset != 0)
6123 {
6124 if (offset < 0)
6125 {
6126 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6127 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6128 }
6129 else
6130 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6131
6132 jumpto_if_not_utf_char_start(compiler, TMP1, start);
6133
6134 if (offset < 0)
6135 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6136 }
6137 #endif
6138
6139 if (offset >= 0)
6140 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6141
6142 if (common->match_end_ptr != 0)
6143 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6144 else
6145 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6146 return TRUE;
6147 }
6148
fast_forward_first_char(compiler_common * common)6149 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common)
6150 {
6151 PCRE2_UCHAR first_char = (PCRE2_UCHAR)(common->re->first_codeunit);
6152 PCRE2_UCHAR oc;
6153
6154 oc = first_char;
6155 if ((common->re->flags & PCRE2_FIRSTCASELESS) != 0)
6156 {
6157 oc = TABLE_GET(first_char, common->fcc, first_char);
6158 #if defined SUPPORT_UNICODE
6159 if (first_char > 127 && (common->utf || common->ucp))
6160 oc = UCD_OTHERCASE(first_char);
6161 #endif
6162 }
6163
6164 fast_forward_first_char2(common, first_char, oc, 0);
6165 }
6166
fast_forward_newline(compiler_common * common)6167 static SLJIT_INLINE void fast_forward_newline(compiler_common *common)
6168 {
6169 DEFINE_COMPILER;
6170 struct sljit_label *loop;
6171 struct sljit_jump *lastchar = NULL;
6172 struct sljit_jump *firstchar;
6173 struct sljit_jump *quit = NULL;
6174 struct sljit_jump *foundcr = NULL;
6175 struct sljit_jump *notfoundnl;
6176 jump_list *newline = NULL;
6177
6178 if (common->match_end_ptr != 0)
6179 {
6180 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6181 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6182 }
6183
6184 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6185 {
6186 #ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
6187 if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && common->mode == PCRE2_JIT_COMPLETE)
6188 {
6189 if (HAS_VIRTUAL_REGISTERS)
6190 {
6191 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6192 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6193 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6194 }
6195 else
6196 {
6197 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6198 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
6199 }
6200 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6201
6202 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6203 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
6204 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_NOT_EQUAL);
6205 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6206 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
6207 #endif
6208 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6209
6210 fast_forward_char_pair_simd(common, 1, common->newline & 0xff, common->newline & 0xff, 0, (common->newline >> 8) & 0xff, (common->newline >> 8) & 0xff);
6211 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6212 }
6213 else
6214 #endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */
6215 {
6216 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6217 if (HAS_VIRTUAL_REGISTERS)
6218 {
6219 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6220 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6221 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6222 }
6223 else
6224 {
6225 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6226 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
6227 }
6228 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6229
6230 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
6231 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
6232 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER_EQUAL);
6233 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6234 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
6235 #endif
6236 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
6237
6238 loop = LABEL();
6239 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6240 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6241 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
6242 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6243 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
6244 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
6245
6246 JUMPHERE(quit);
6247 JUMPHERE(lastchar);
6248 }
6249
6250 JUMPHERE(firstchar);
6251
6252 if (common->match_end_ptr != 0)
6253 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6254 return;
6255 }
6256
6257 if (HAS_VIRTUAL_REGISTERS)
6258 {
6259 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6260 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6261 }
6262 else
6263 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6264
6265 /* Example: match /^/ to \r\n from offset 1. */
6266 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6267
6268 if (common->nltype == NLTYPE_ANY)
6269 move_back(common, NULL, FALSE);
6270 else
6271 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6272
6273 loop = LABEL();
6274 common->ff_newline_shortcut = loop;
6275
6276 #ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD
6277 if (JIT_HAS_FAST_FORWARD_CHAR_SIMD && (common->nltype == NLTYPE_FIXED || common->nltype == NLTYPE_ANYCRLF))
6278 {
6279 if (common->nltype == NLTYPE_ANYCRLF)
6280 {
6281 fast_forward_char_simd(common, CHAR_CR, CHAR_LF, 0);
6282 if (common->mode != PCRE2_JIT_COMPLETE)
6283 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6284
6285 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6286 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6287 quit = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6288 }
6289 else
6290 {
6291 fast_forward_char_simd(common, common->newline, common->newline, 0);
6292
6293 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6294 if (common->mode != PCRE2_JIT_COMPLETE)
6295 {
6296 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
6297 CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
6298 }
6299 }
6300 }
6301 else
6302 #endif /* JIT_HAS_FAST_FORWARD_CHAR_SIMD */
6303 {
6304 read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
6305 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6306 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
6307 foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6308 check_newlinechar(common, common->nltype, &newline, FALSE);
6309 set_jumps(newline, loop);
6310 }
6311
6312 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
6313 {
6314 if (quit == NULL)
6315 {
6316 quit = JUMP(SLJIT_JUMP);
6317 JUMPHERE(foundcr);
6318 }
6319
6320 notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6321 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6322 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
6323 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
6324 #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6325 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
6326 #endif
6327 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6328 JUMPHERE(notfoundnl);
6329 JUMPHERE(quit);
6330 }
6331
6332 if (lastchar)
6333 JUMPHERE(lastchar);
6334 JUMPHERE(firstchar);
6335
6336 if (common->match_end_ptr != 0)
6337 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6338 }
6339
6340 static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
6341
fast_forward_start_bits(compiler_common * common)6342 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common)
6343 {
6344 DEFINE_COMPILER;
6345 const sljit_u8 *start_bits = common->re->start_bitmap;
6346 struct sljit_label *start;
6347 struct sljit_jump *partial_quit;
6348 #if PCRE2_CODE_UNIT_WIDTH != 8
6349 struct sljit_jump *found = NULL;
6350 #endif
6351 jump_list *matches = NULL;
6352
6353 if (common->match_end_ptr != 0)
6354 {
6355 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6356 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
6357 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
6358 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0);
6359 CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
6360 }
6361
6362 start = LABEL();
6363
6364 partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6365 if (common->mode == PCRE2_JIT_COMPLETE)
6366 add_jump(compiler, &common->failed_match, partial_quit);
6367
6368 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6369 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6370
6371 if (!optimize_class(common, start_bits, (start_bits[31] & 0x80) != 0, FALSE, &matches))
6372 {
6373 #if PCRE2_CODE_UNIT_WIDTH != 8
6374 if ((start_bits[31] & 0x80) != 0)
6375 found = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255);
6376 else
6377 CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255, start);
6378 #elif defined SUPPORT_UNICODE
6379 if (common->utf && is_char7_bitset(start_bits, FALSE))
6380 CMPTO(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 127, start);
6381 #endif
6382 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
6383 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
6384 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
6385 if (!HAS_VIRTUAL_REGISTERS)
6386 {
6387 OP2(SLJIT_SHL, TMP3, 0, SLJIT_IMM, 1, TMP2, 0);
6388 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP3, 0);
6389 }
6390 else
6391 {
6392 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6393 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
6394 }
6395 JUMPTO(SLJIT_ZERO, start);
6396 }
6397 else
6398 set_jumps(matches, start);
6399
6400 #if PCRE2_CODE_UNIT_WIDTH != 8
6401 if (found != NULL)
6402 JUMPHERE(found);
6403 #endif
6404
6405 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6406
6407 if (common->mode != PCRE2_JIT_COMPLETE)
6408 JUMPHERE(partial_quit);
6409
6410 if (common->match_end_ptr != 0)
6411 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
6412 }
6413
search_requested_char(compiler_common * common,PCRE2_UCHAR req_char,BOOL caseless,BOOL has_firstchar)6414 static SLJIT_INLINE jump_list *search_requested_char(compiler_common *common, PCRE2_UCHAR req_char, BOOL caseless, BOOL has_firstchar)
6415 {
6416 DEFINE_COMPILER;
6417 struct sljit_label *loop;
6418 struct sljit_jump *toolong;
6419 struct sljit_jump *already_found;
6420 struct sljit_jump *found;
6421 struct sljit_jump *found_oc = NULL;
6422 jump_list *not_found = NULL;
6423 sljit_u32 oc, bit;
6424
6425 SLJIT_ASSERT(common->req_char_ptr != 0);
6426 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(REQ_CU_MAX) * 100);
6427 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
6428 toolong = CMP(SLJIT_LESS, TMP2, 0, STR_END, 0);
6429 already_found = CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0);
6430
6431 if (has_firstchar)
6432 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6433 else
6434 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
6435
6436 oc = req_char;
6437 if (caseless)
6438 {
6439 oc = TABLE_GET(req_char, common->fcc, req_char);
6440 #if defined SUPPORT_UNICODE
6441 if (req_char > 127 && (common->utf || common->ucp))
6442 oc = UCD_OTHERCASE(req_char);
6443 #endif
6444 }
6445
6446 #ifdef JIT_HAS_FAST_REQUESTED_CHAR_SIMD
6447 if (JIT_HAS_FAST_REQUESTED_CHAR_SIMD)
6448 {
6449 not_found = fast_requested_char_simd(common, req_char, oc);
6450 }
6451 else
6452 #endif
6453 {
6454 loop = LABEL();
6455 add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
6456
6457 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
6458
6459 if (req_char == oc)
6460 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
6461 else
6462 {
6463 bit = req_char ^ oc;
6464 if (is_powerof2(bit))
6465 {
6466 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
6467 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
6468 }
6469 else
6470 {
6471 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
6472 found_oc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
6473 }
6474 }
6475 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
6476 JUMPTO(SLJIT_JUMP, loop);
6477
6478 JUMPHERE(found);
6479 if (found_oc)
6480 JUMPHERE(found_oc);
6481 }
6482
6483 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
6484
6485 JUMPHERE(already_found);
6486 JUMPHERE(toolong);
6487 return not_found;
6488 }
6489
do_revertframes(compiler_common * common)6490 static void do_revertframes(compiler_common *common)
6491 {
6492 DEFINE_COMPILER;
6493 struct sljit_jump *jump;
6494 struct sljit_label *mainloop;
6495
6496 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
6497 GET_LOCAL_BASE(TMP1, 0, 0);
6498
6499 /* Drop frames until we reach STACK_TOP. */
6500 mainloop = LABEL();
6501 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), -sizeof(sljit_sw));
6502 jump = CMP(SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0);
6503
6504 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
6505 if (HAS_VIRTUAL_REGISTERS)
6506 {
6507 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
6508 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(STACK_TOP), -(3 * sizeof(sljit_sw)));
6509 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
6510 }
6511 else
6512 {
6513 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
6514 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(3 * sizeof(sljit_sw)));
6515 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
6516 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
6517 GET_LOCAL_BASE(TMP1, 0, 0);
6518 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP3, 0);
6519 }
6520 JUMPTO(SLJIT_JUMP, mainloop);
6521
6522 JUMPHERE(jump);
6523 jump = CMP(SLJIT_NOT_ZERO /* SIG_LESS */, TMP2, 0, SLJIT_IMM, 0);
6524 /* End of reverting values. */
6525 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
6526
6527 JUMPHERE(jump);
6528 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
6529 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
6530 if (HAS_VIRTUAL_REGISTERS)
6531 {
6532 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
6533 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
6534 }
6535 else
6536 {
6537 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
6538 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
6539 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP3, 0);
6540 }
6541 JUMPTO(SLJIT_JUMP, mainloop);
6542 }
6543
check_wordboundary(compiler_common * common)6544 static void check_wordboundary(compiler_common *common)
6545 {
6546 DEFINE_COMPILER;
6547 struct sljit_jump *skipread;
6548 jump_list *skipread_list = NULL;
6549 #ifdef SUPPORT_UNICODE
6550 struct sljit_label *valid_utf;
6551 jump_list *invalid_utf1 = NULL;
6552 #endif /* SUPPORT_UNICODE */
6553 jump_list *invalid_utf2 = NULL;
6554 #if PCRE2_CODE_UNIT_WIDTH != 8 || defined SUPPORT_UNICODE
6555 struct sljit_jump *jump;
6556 #endif /* PCRE2_CODE_UNIT_WIDTH != 8 || SUPPORT_UNICODE */
6557
6558 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
6559
6560 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6561 /* Get type of the previous char, and put it to TMP3. */
6562 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6563 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6564 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
6565 skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6566
6567 #ifdef SUPPORT_UNICODE
6568 if (common->invalid_utf)
6569 {
6570 peek_char_back(common, READ_CHAR_MAX, &invalid_utf1);
6571
6572 if (common->mode != PCRE2_JIT_COMPLETE)
6573 {
6574 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
6575 OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
6576 move_back(common, NULL, TRUE);
6577 check_start_used_ptr(common);
6578 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
6579 OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0);
6580 }
6581 }
6582 else
6583 #endif /* SUPPORT_UNICODE */
6584 {
6585 if (common->mode == PCRE2_JIT_COMPLETE)
6586 peek_char_back(common, READ_CHAR_MAX, NULL);
6587 else
6588 {
6589 move_back(common, NULL, TRUE);
6590 check_start_used_ptr(common);
6591 read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR);
6592 }
6593 }
6594
6595 /* Testing char type. */
6596 #ifdef SUPPORT_UNICODE
6597 if (common->ucp)
6598 {
6599 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
6600 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
6601 add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
6602 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
6603 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
6604 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6605 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
6606 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
6607 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
6608 JUMPHERE(jump);
6609 OP1(SLJIT_MOV, TMP3, 0, TMP2, 0);
6610 }
6611 else
6612 #endif /* SUPPORT_UNICODE */
6613 {
6614 #if PCRE2_CODE_UNIT_WIDTH != 8
6615 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6616 #elif defined SUPPORT_UNICODE
6617 /* Here TMP3 has already been zeroed. */
6618 jump = NULL;
6619 if (common->utf)
6620 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6621 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6622 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
6623 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
6624 OP2(SLJIT_AND, TMP3, 0, TMP1, 0, SLJIT_IMM, 1);
6625 #if PCRE2_CODE_UNIT_WIDTH != 8
6626 JUMPHERE(jump);
6627 #elif defined SUPPORT_UNICODE
6628 if (jump != NULL)
6629 JUMPHERE(jump);
6630 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6631 }
6632 JUMPHERE(skipread);
6633
6634 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6635 check_str_end(common, &skipread_list);
6636 peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, &invalid_utf2);
6637
6638 /* Testing char type. This is a code duplication. */
6639 #ifdef SUPPORT_UNICODE
6640
6641 valid_utf = LABEL();
6642
6643 if (common->ucp)
6644 {
6645 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
6646 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
6647 add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
6648 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
6649 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
6650 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6651 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
6652 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
6653 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
6654 JUMPHERE(jump);
6655 }
6656 else
6657 #endif /* SUPPORT_UNICODE */
6658 {
6659 #if PCRE2_CODE_UNIT_WIDTH != 8
6660 /* TMP2 may be destroyed by peek_char. */
6661 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6662 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6663 #elif defined SUPPORT_UNICODE
6664 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6665 jump = NULL;
6666 if (common->utf)
6667 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6668 #endif
6669 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
6670 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
6671 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
6672 #if PCRE2_CODE_UNIT_WIDTH != 8
6673 JUMPHERE(jump);
6674 #elif defined SUPPORT_UNICODE
6675 if (jump != NULL)
6676 JUMPHERE(jump);
6677 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6678 }
6679 set_jumps(skipread_list, LABEL());
6680
6681 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6682 OP2(SLJIT_XOR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, TMP3, 0);
6683 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6684
6685 #ifdef SUPPORT_UNICODE
6686 if (common->invalid_utf)
6687 {
6688 set_jumps(invalid_utf1, LABEL());
6689
6690 peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, NULL);
6691 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR, valid_utf);
6692
6693 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6694 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, -1);
6695 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6696
6697 set_jumps(invalid_utf2, LABEL());
6698 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6699 OP1(SLJIT_MOV, TMP2, 0, TMP3, 0);
6700 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6701 }
6702 #endif /* SUPPORT_UNICODE */
6703 }
6704
optimize_class_ranges(compiler_common * common,const sljit_u8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)6705 static BOOL optimize_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
6706 {
6707 /* May destroy TMP1. */
6708 DEFINE_COMPILER;
6709 int ranges[MAX_CLASS_RANGE_SIZE];
6710 sljit_u8 bit, cbit, all;
6711 int i, byte, length = 0;
6712
6713 bit = bits[0] & 0x1;
6714 /* All bits will be zero or one (since bit is zero or one). */
6715 all = -bit;
6716
6717 for (i = 0; i < 256; )
6718 {
6719 byte = i >> 3;
6720 if ((i & 0x7) == 0 && bits[byte] == all)
6721 i += 8;
6722 else
6723 {
6724 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
6725 if (cbit != bit)
6726 {
6727 if (length >= MAX_CLASS_RANGE_SIZE)
6728 return FALSE;
6729 ranges[length] = i;
6730 length++;
6731 bit = cbit;
6732 all = -cbit;
6733 }
6734 i++;
6735 }
6736 }
6737
6738 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
6739 {
6740 if (length >= MAX_CLASS_RANGE_SIZE)
6741 return FALSE;
6742 ranges[length] = 256;
6743 length++;
6744 }
6745
6746 if (length < 0 || length > 4)
6747 return FALSE;
6748
6749 bit = bits[0] & 0x1;
6750 if (invert) bit ^= 0x1;
6751
6752 /* No character is accepted. */
6753 if (length == 0 && bit == 0)
6754 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6755
6756 switch(length)
6757 {
6758 case 0:
6759 /* When bit != 0, all characters are accepted. */
6760 return TRUE;
6761
6762 case 1:
6763 add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6764 return TRUE;
6765
6766 case 2:
6767 if (ranges[0] + 1 != ranges[1])
6768 {
6769 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6770 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6771 }
6772 else
6773 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6774 return TRUE;
6775
6776 case 3:
6777 if (bit != 0)
6778 {
6779 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
6780 if (ranges[0] + 1 != ranges[1])
6781 {
6782 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6783 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6784 }
6785 else
6786 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6787 return TRUE;
6788 }
6789
6790 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
6791 if (ranges[1] + 1 != ranges[2])
6792 {
6793 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
6794 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
6795 }
6796 else
6797 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
6798 return TRUE;
6799
6800 case 4:
6801 if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
6802 && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
6803 && (ranges[1] & (ranges[2] - ranges[0])) == 0
6804 && is_powerof2(ranges[2] - ranges[0]))
6805 {
6806 SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0);
6807 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
6808 if (ranges[2] + 1 != ranges[3])
6809 {
6810 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
6811 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
6812 }
6813 else
6814 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
6815 return TRUE;
6816 }
6817
6818 if (bit != 0)
6819 {
6820 i = 0;
6821 if (ranges[0] + 1 != ranges[1])
6822 {
6823 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6824 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6825 i = ranges[0];
6826 }
6827 else
6828 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6829
6830 if (ranges[2] + 1 != ranges[3])
6831 {
6832 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
6833 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
6834 }
6835 else
6836 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
6837 return TRUE;
6838 }
6839
6840 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6841 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
6842 if (ranges[1] + 1 != ranges[2])
6843 {
6844 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
6845 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
6846 }
6847 else
6848 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6849 return TRUE;
6850
6851 default:
6852 SLJIT_UNREACHABLE();
6853 return FALSE;
6854 }
6855 }
6856
optimize_class_chars(compiler_common * common,const sljit_u8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)6857 static BOOL optimize_class_chars(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
6858 {
6859 /* May destroy TMP1. */
6860 DEFINE_COMPILER;
6861 uint16_t char_list[MAX_CLASS_CHARS_SIZE];
6862 uint8_t byte;
6863 sljit_s32 type;
6864 int i, j, k, len, c;
6865
6866 if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV))
6867 return FALSE;
6868
6869 len = 0;
6870
6871 for (i = 0; i < 32; i++)
6872 {
6873 byte = bits[i];
6874
6875 if (nclass)
6876 byte = ~byte;
6877
6878 j = 0;
6879 while (byte != 0)
6880 {
6881 if (byte & 0x1)
6882 {
6883 c = i * 8 + j;
6884
6885 k = len;
6886
6887 if ((c & 0x20) != 0)
6888 {
6889 for (k = 0; k < len; k++)
6890 if (char_list[k] == c - 0x20)
6891 {
6892 char_list[k] |= 0x120;
6893 break;
6894 }
6895 }
6896
6897 if (k == len)
6898 {
6899 if (len >= MAX_CLASS_CHARS_SIZE)
6900 return FALSE;
6901
6902 char_list[len++] = (uint16_t) c;
6903 }
6904 }
6905
6906 byte >>= 1;
6907 j++;
6908 }
6909 }
6910
6911 if (len == 0) return FALSE; /* Should never occur, but stops analyzers complaining. */
6912
6913 i = 0;
6914 j = 0;
6915
6916 if (char_list[0] == 0)
6917 {
6918 i++;
6919 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0);
6920 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_ZERO);
6921 }
6922 else
6923 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6924
6925 while (i < len)
6926 {
6927 if ((char_list[i] & 0x100) != 0)
6928 j++;
6929 else
6930 {
6931 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_list[i]);
6932 CMOV(SLJIT_ZERO, TMP2, TMP1, 0);
6933 }
6934 i++;
6935 }
6936
6937 if (j != 0)
6938 {
6939 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x20);
6940
6941 for (i = 0; i < len; i++)
6942 if ((char_list[i] & 0x100) != 0)
6943 {
6944 j--;
6945 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_list[i] & 0xff);
6946 CMOV(SLJIT_ZERO, TMP2, TMP1, 0);
6947 }
6948 }
6949
6950 if (invert)
6951 nclass = !nclass;
6952
6953 type = nclass ? SLJIT_NOT_EQUAL : SLJIT_EQUAL;
6954 add_jump(compiler, backtracks, CMP(type, TMP2, 0, SLJIT_IMM, 0));
6955 return TRUE;
6956 }
6957
optimize_class(compiler_common * common,const sljit_u8 * bits,BOOL nclass,BOOL invert,jump_list ** backtracks)6958 static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
6959 {
6960 /* May destroy TMP1. */
6961 if (optimize_class_ranges(common, bits, nclass, invert, backtracks))
6962 return TRUE;
6963 return optimize_class_chars(common, bits, nclass, invert, backtracks);
6964 }
6965
check_anynewline(compiler_common * common)6966 static void check_anynewline(compiler_common *common)
6967 {
6968 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
6969 DEFINE_COMPILER;
6970
6971 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
6972
6973 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
6974 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
6975 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6976 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
6977 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6978 #if PCRE2_CODE_UNIT_WIDTH == 8
6979 if (common->utf)
6980 {
6981 #endif
6982 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6983 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
6984 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
6985 #if PCRE2_CODE_UNIT_WIDTH == 8
6986 }
6987 #endif
6988 #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
6989 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
6990 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
6991 }
6992
check_hspace(compiler_common * common)6993 static void check_hspace(compiler_common *common)
6994 {
6995 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
6996 DEFINE_COMPILER;
6997
6998 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
6999
7000 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
7001 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7002 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
7003 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7004 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
7005 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7006 #if PCRE2_CODE_UNIT_WIDTH == 8
7007 if (common->utf)
7008 {
7009 #endif
7010 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7011 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
7012 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7013 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
7014 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7015 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
7016 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
7017 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
7018 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
7019 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7020 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
7021 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7022 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
7023 #if PCRE2_CODE_UNIT_WIDTH == 8
7024 }
7025 #endif
7026 #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7027 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7028
7029 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7030 }
7031
check_vspace(compiler_common * common)7032 static void check_vspace(compiler_common *common)
7033 {
7034 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7035 DEFINE_COMPILER;
7036
7037 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
7038
7039 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
7040 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
7041 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7042 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
7043 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7044 #if PCRE2_CODE_UNIT_WIDTH == 8
7045 if (common->utf)
7046 {
7047 #endif
7048 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7049 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
7050 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
7051 #if PCRE2_CODE_UNIT_WIDTH == 8
7052 }
7053 #endif
7054 #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7055 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7056
7057 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7058 }
7059
do_casefulcmp(compiler_common * common)7060 static void do_casefulcmp(compiler_common *common)
7061 {
7062 DEFINE_COMPILER;
7063 struct sljit_jump *jump;
7064 struct sljit_label *label;
7065 int char1_reg;
7066 int char2_reg;
7067
7068 if (HAS_VIRTUAL_REGISTERS)
7069 {
7070 char1_reg = STR_END;
7071 char2_reg = STACK_TOP;
7072 }
7073 else
7074 {
7075 char1_reg = TMP3;
7076 char2_reg = RETURN_ADDR;
7077 }
7078
7079 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7080 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
7081
7082 if (char1_reg == STR_END)
7083 {
7084 OP1(SLJIT_MOV, TMP3, 0, char1_reg, 0);
7085 OP1(SLJIT_MOV, RETURN_ADDR, 0, char2_reg, 0);
7086 }
7087
7088 if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7089 {
7090 label = LABEL();
7091 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7092 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7093 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7094 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7095 JUMPTO(SLJIT_NOT_ZERO, label);
7096
7097 JUMPHERE(jump);
7098 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7099 }
7100 else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7101 {
7102 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7103 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7104
7105 label = LABEL();
7106 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7107 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7108 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7109 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7110 JUMPTO(SLJIT_NOT_ZERO, label);
7111
7112 JUMPHERE(jump);
7113 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7114 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7115 }
7116 else
7117 {
7118 label = LABEL();
7119 OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
7120 OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
7121 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7122 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7123 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7124 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7125 JUMPTO(SLJIT_NOT_ZERO, label);
7126
7127 JUMPHERE(jump);
7128 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7129 }
7130
7131 if (char1_reg == STR_END)
7132 {
7133 OP1(SLJIT_MOV, char1_reg, 0, TMP3, 0);
7134 OP1(SLJIT_MOV, char2_reg, 0, RETURN_ADDR, 0);
7135 }
7136
7137 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7138 }
7139
do_caselesscmp(compiler_common * common)7140 static void do_caselesscmp(compiler_common *common)
7141 {
7142 DEFINE_COMPILER;
7143 struct sljit_jump *jump;
7144 struct sljit_label *label;
7145 int char1_reg = STR_END;
7146 int char2_reg;
7147 int lcc_table;
7148 int opt_type = 0;
7149
7150 if (HAS_VIRTUAL_REGISTERS)
7151 {
7152 char2_reg = STACK_TOP;
7153 lcc_table = STACK_LIMIT;
7154 }
7155 else
7156 {
7157 char2_reg = RETURN_ADDR;
7158 lcc_table = TMP3;
7159 }
7160
7161 if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7162 opt_type = 1;
7163 else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7164 opt_type = 2;
7165
7166 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7167 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
7168
7169 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, char1_reg, 0);
7170
7171 if (char2_reg == STACK_TOP)
7172 {
7173 OP1(SLJIT_MOV, TMP3, 0, char2_reg, 0);
7174 OP1(SLJIT_MOV, RETURN_ADDR, 0, lcc_table, 0);
7175 }
7176
7177 OP1(SLJIT_MOV, lcc_table, 0, SLJIT_IMM, common->lcc);
7178
7179 if (opt_type == 1)
7180 {
7181 label = LABEL();
7182 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7183 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7184 }
7185 else if (opt_type == 2)
7186 {
7187 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7188 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7189
7190 label = LABEL();
7191 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7192 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7193 }
7194 else
7195 {
7196 label = LABEL();
7197 OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
7198 OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
7199 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7200 }
7201
7202 #if PCRE2_CODE_UNIT_WIDTH != 8
7203 jump = CMP(SLJIT_GREATER, char1_reg, 0, SLJIT_IMM, 255);
7204 #endif
7205 OP1(SLJIT_MOV_U8, char1_reg, 0, SLJIT_MEM2(lcc_table, char1_reg), 0);
7206 #if PCRE2_CODE_UNIT_WIDTH != 8
7207 JUMPHERE(jump);
7208 jump = CMP(SLJIT_GREATER, char2_reg, 0, SLJIT_IMM, 255);
7209 #endif
7210 OP1(SLJIT_MOV_U8, char2_reg, 0, SLJIT_MEM2(lcc_table, char2_reg), 0);
7211 #if PCRE2_CODE_UNIT_WIDTH != 8
7212 JUMPHERE(jump);
7213 #endif
7214
7215 if (opt_type == 0)
7216 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7217
7218 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7219 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7220 JUMPTO(SLJIT_NOT_ZERO, label);
7221
7222 JUMPHERE(jump);
7223 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7224
7225 if (opt_type == 2)
7226 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7227
7228 if (char2_reg == STACK_TOP)
7229 {
7230 OP1(SLJIT_MOV, char2_reg, 0, TMP3, 0);
7231 OP1(SLJIT_MOV, lcc_table, 0, RETURN_ADDR, 0);
7232 }
7233
7234 OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
7235 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7236 }
7237
byte_sequence_compare(compiler_common * common,BOOL caseless,PCRE2_SPTR cc,compare_context * context,jump_list ** backtracks)7238 static PCRE2_SPTR byte_sequence_compare(compiler_common *common, BOOL caseless, PCRE2_SPTR cc,
7239 compare_context *context, jump_list **backtracks)
7240 {
7241 DEFINE_COMPILER;
7242 unsigned int othercasebit = 0;
7243 PCRE2_SPTR othercasechar = NULL;
7244 #ifdef SUPPORT_UNICODE
7245 int utflength;
7246 #endif
7247
7248 if (caseless && char_has_othercase(common, cc))
7249 {
7250 othercasebit = char_get_othercase_bit(common, cc);
7251 SLJIT_ASSERT(othercasebit);
7252 /* Extracting bit difference info. */
7253 #if PCRE2_CODE_UNIT_WIDTH == 8
7254 othercasechar = cc + (othercasebit >> 8);
7255 othercasebit &= 0xff;
7256 #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7257 /* Note that this code only handles characters in the BMP. If there
7258 ever are characters outside the BMP whose othercase differs in only one
7259 bit from itself (there currently are none), this code will need to be
7260 revised for PCRE2_CODE_UNIT_WIDTH == 32. */
7261 othercasechar = cc + (othercasebit >> 9);
7262 if ((othercasebit & 0x100) != 0)
7263 othercasebit = (othercasebit & 0xff) << 8;
7264 else
7265 othercasebit &= 0xff;
7266 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
7267 }
7268
7269 if (context->sourcereg == -1)
7270 {
7271 #if PCRE2_CODE_UNIT_WIDTH == 8
7272 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
7273 if (context->length >= 4)
7274 OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7275 else if (context->length >= 2)
7276 OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7277 else
7278 #endif
7279 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7280 #elif PCRE2_CODE_UNIT_WIDTH == 16
7281 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
7282 if (context->length >= 4)
7283 OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7284 else
7285 #endif
7286 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7287 #elif PCRE2_CODE_UNIT_WIDTH == 32
7288 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7289 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
7290 context->sourcereg = TMP2;
7291 }
7292
7293 #ifdef SUPPORT_UNICODE
7294 utflength = 1;
7295 if (common->utf && HAS_EXTRALEN(*cc))
7296 utflength += GET_EXTRALEN(*cc);
7297
7298 do
7299 {
7300 #endif
7301
7302 context->length -= IN_UCHARS(1);
7303 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
7304
7305 /* Unaligned read is supported. */
7306 if (othercasebit != 0 && othercasechar == cc)
7307 {
7308 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
7309 context->oc.asuchars[context->ucharptr] = othercasebit;
7310 }
7311 else
7312 {
7313 context->c.asuchars[context->ucharptr] = *cc;
7314 context->oc.asuchars[context->ucharptr] = 0;
7315 }
7316 context->ucharptr++;
7317
7318 #if PCRE2_CODE_UNIT_WIDTH == 8
7319 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
7320 #else
7321 if (context->ucharptr >= 2 || context->length == 0)
7322 #endif
7323 {
7324 if (context->length >= 4)
7325 OP1(SLJIT_MOV_S32, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7326 else if (context->length >= 2)
7327 OP1(SLJIT_MOV_U16, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7328 #if PCRE2_CODE_UNIT_WIDTH == 8
7329 else if (context->length >= 1)
7330 OP1(SLJIT_MOV_U8, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7331 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7332 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
7333
7334 switch(context->ucharptr)
7335 {
7336 case 4 / sizeof(PCRE2_UCHAR):
7337 if (context->oc.asint != 0)
7338 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
7339 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
7340 break;
7341
7342 case 2 / sizeof(PCRE2_UCHAR):
7343 if (context->oc.asushort != 0)
7344 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
7345 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
7346 break;
7347
7348 #if PCRE2_CODE_UNIT_WIDTH == 8
7349 case 1:
7350 if (context->oc.asbyte != 0)
7351 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
7352 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
7353 break;
7354 #endif
7355
7356 default:
7357 SLJIT_UNREACHABLE();
7358 break;
7359 }
7360 context->ucharptr = 0;
7361 }
7362
7363 #else
7364
7365 /* Unaligned read is unsupported or in 32 bit mode. */
7366 if (context->length >= 1)
7367 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7368
7369 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
7370
7371 if (othercasebit != 0 && othercasechar == cc)
7372 {
7373 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
7374 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
7375 }
7376 else
7377 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
7378
7379 #endif
7380
7381 cc++;
7382 #ifdef SUPPORT_UNICODE
7383 utflength--;
7384 }
7385 while (utflength > 0);
7386 #endif
7387
7388 return cc;
7389 }
7390
7391 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
7392
7393 #define SET_TYPE_OFFSET(value) \
7394 if ((value) != typeoffset) \
7395 { \
7396 if ((value) < typeoffset) \
7397 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
7398 else \
7399 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
7400 } \
7401 typeoffset = (value);
7402
7403 #define SET_CHAR_OFFSET(value) \
7404 if ((value) != charoffset) \
7405 { \
7406 if ((value) < charoffset) \
7407 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
7408 else \
7409 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
7410 } \
7411 charoffset = (value);
7412
7413 static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr);
7414
compile_xclass_matchingpath(compiler_common * common,PCRE2_SPTR cc,jump_list ** backtracks)7415 static void compile_xclass_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
7416 {
7417 DEFINE_COMPILER;
7418 jump_list *found = NULL;
7419 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
7420 sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
7421 struct sljit_jump *jump = NULL;
7422 PCRE2_SPTR ccbegin;
7423 int compares, invertcmp, numberofcmps;
7424 #if defined SUPPORT_UNICODE && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
7425 BOOL utf = common->utf;
7426 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */
7427
7428 #ifdef SUPPORT_UNICODE
7429 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
7430 BOOL charsaved = FALSE;
7431 int typereg = TMP1;
7432 const sljit_u32 *other_cases;
7433 sljit_uw typeoffset;
7434 #endif /* SUPPORT_UNICODE */
7435
7436 /* Scanning the necessary info. */
7437 cc++;
7438 ccbegin = cc;
7439 compares = 0;
7440
7441 if (cc[-1] & XCL_MAP)
7442 {
7443 min = 0;
7444 cc += 32 / sizeof(PCRE2_UCHAR);
7445 }
7446
7447 while (*cc != XCL_END)
7448 {
7449 compares++;
7450 if (*cc == XCL_SINGLE)
7451 {
7452 cc ++;
7453 GETCHARINCTEST(c, cc);
7454 if (c > max) max = c;
7455 if (c < min) min = c;
7456 #ifdef SUPPORT_UNICODE
7457 needschar = TRUE;
7458 #endif /* SUPPORT_UNICODE */
7459 }
7460 else if (*cc == XCL_RANGE)
7461 {
7462 cc ++;
7463 GETCHARINCTEST(c, cc);
7464 if (c < min) min = c;
7465 GETCHARINCTEST(c, cc);
7466 if (c > max) max = c;
7467 #ifdef SUPPORT_UNICODE
7468 needschar = TRUE;
7469 #endif /* SUPPORT_UNICODE */
7470 }
7471 #ifdef SUPPORT_UNICODE
7472 else
7473 {
7474 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7475 cc++;
7476 if (*cc == PT_CLIST)
7477 {
7478 other_cases = PRIV(ucd_caseless_sets) + cc[1];
7479 while (*other_cases != NOTACHAR)
7480 {
7481 if (*other_cases > max) max = *other_cases;
7482 if (*other_cases < min) min = *other_cases;
7483 other_cases++;
7484 }
7485 }
7486 else
7487 {
7488 max = READ_CHAR_MAX;
7489 min = 0;
7490 }
7491
7492 switch(*cc)
7493 {
7494 case PT_ANY:
7495 /* Any either accepts everything or ignored. */
7496 if (cc[-1] == XCL_PROP)
7497 {
7498 compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
7499 if (list == backtracks)
7500 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7501 return;
7502 }
7503 break;
7504
7505 case PT_LAMP:
7506 case PT_GC:
7507 case PT_PC:
7508 case PT_ALNUM:
7509 needstype = TRUE;
7510 break;
7511
7512 case PT_SC:
7513 needsscript = TRUE;
7514 break;
7515
7516 case PT_SPACE:
7517 case PT_PXSPACE:
7518 case PT_WORD:
7519 case PT_PXGRAPH:
7520 case PT_PXPRINT:
7521 case PT_PXPUNCT:
7522 needstype = TRUE;
7523 needschar = TRUE;
7524 break;
7525
7526 case PT_CLIST:
7527 case PT_UCNC:
7528 needschar = TRUE;
7529 break;
7530
7531 default:
7532 SLJIT_UNREACHABLE();
7533 break;
7534 }
7535 cc += 2;
7536 }
7537 #endif /* SUPPORT_UNICODE */
7538 }
7539 SLJIT_ASSERT(compares > 0);
7540
7541 /* We are not necessary in utf mode even in 8 bit mode. */
7542 cc = ccbegin;
7543 if ((cc[-1] & XCL_NOT) != 0)
7544 read_char(common, min, max, backtracks, READ_CHAR_UPDATE_STR_PTR);
7545 else
7546 {
7547 #ifdef SUPPORT_UNICODE
7548 read_char(common, min, max, (needstype || needsscript) ? backtracks : NULL, 0);
7549 #else /* !SUPPORT_UNICODE */
7550 read_char(common, min, max, NULL, 0);
7551 #endif /* SUPPORT_UNICODE */
7552 }
7553
7554 if ((cc[-1] & XCL_HASPROP) == 0)
7555 {
7556 if ((cc[-1] & XCL_MAP) != 0)
7557 {
7558 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7559 if (!optimize_class(common, (const sljit_u8 *)cc, (((const sljit_u8 *)cc)[31] & 0x80) != 0, TRUE, &found))
7560 {
7561 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
7562 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
7563 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
7564 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
7565 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
7566 add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
7567 }
7568
7569 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7570 JUMPHERE(jump);
7571
7572 cc += 32 / sizeof(PCRE2_UCHAR);
7573 }
7574 else
7575 {
7576 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
7577 add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min));
7578 }
7579 }
7580 else if ((cc[-1] & XCL_MAP) != 0)
7581 {
7582 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
7583 #ifdef SUPPORT_UNICODE
7584 charsaved = TRUE;
7585 #endif /* SUPPORT_UNICODE */
7586 if (!optimize_class(common, (const sljit_u8 *)cc, FALSE, TRUE, list))
7587 {
7588 #if PCRE2_CODE_UNIT_WIDTH == 8
7589 jump = NULL;
7590 if (common->utf)
7591 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7592 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7593
7594 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
7595 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
7596 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
7597 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
7598 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
7599 add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
7600
7601 #if PCRE2_CODE_UNIT_WIDTH == 8
7602 if (common->utf)
7603 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7604 JUMPHERE(jump);
7605 }
7606
7607 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7608 cc += 32 / sizeof(PCRE2_UCHAR);
7609 }
7610
7611 #ifdef SUPPORT_UNICODE
7612 if (needstype || needsscript)
7613 {
7614 if (needschar && !charsaved)
7615 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
7616
7617 #if PCRE2_CODE_UNIT_WIDTH == 32
7618 if (!common->utf)
7619 {
7620 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
7621 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
7622 JUMPHERE(jump);
7623 }
7624 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
7625
7626 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
7627 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
7628 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
7629 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
7630 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
7631 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
7632 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
7633 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
7634
7635 /* Before anything else, we deal with scripts. */
7636 if (needsscript)
7637 {
7638 OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3);
7639 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
7640 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
7641
7642 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
7643
7644 ccbegin = cc;
7645
7646 while (*cc != XCL_END)
7647 {
7648 if (*cc == XCL_SINGLE)
7649 {
7650 cc ++;
7651 GETCHARINCTEST(c, cc);
7652 }
7653 else if (*cc == XCL_RANGE)
7654 {
7655 cc ++;
7656 GETCHARINCTEST(c, cc);
7657 GETCHARINCTEST(c, cc);
7658 }
7659 else
7660 {
7661 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7662 cc++;
7663 if (*cc == PT_SC)
7664 {
7665 compares--;
7666 invertcmp = (compares == 0 && list != backtracks);
7667 if (cc[-1] == XCL_NOTPROP)
7668 invertcmp ^= 0x1;
7669 jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]);
7670 add_jump(compiler, compares > 0 ? list : backtracks, jump);
7671 }
7672 cc += 2;
7673 }
7674 }
7675
7676 cc = ccbegin;
7677
7678 if (needstype)
7679 {
7680 /* TMP2 has already been shifted by 2 */
7681 if (!needschar)
7682 {
7683 OP2(SLJIT_ADD, TMP1, 0, TMP2, 0, TMP2, 0);
7684 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
7685
7686 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
7687 }
7688 else
7689 {
7690 OP2(SLJIT_ADD, TMP1, 0, TMP2, 0, TMP2, 0);
7691 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
7692
7693 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7694 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
7695 typereg = RETURN_ADDR;
7696 }
7697 }
7698 else if (needschar)
7699 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7700 }
7701 else if (needstype)
7702 {
7703 OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3);
7704 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
7705
7706 if (!needschar)
7707 {
7708 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
7709
7710 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
7711 }
7712 else
7713 {
7714 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
7715
7716 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7717 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
7718 typereg = RETURN_ADDR;
7719 }
7720 }
7721 else if (needschar)
7722 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7723 }
7724 #endif /* SUPPORT_UNICODE */
7725
7726 /* Generating code. */
7727 charoffset = 0;
7728 numberofcmps = 0;
7729 #ifdef SUPPORT_UNICODE
7730 typeoffset = 0;
7731 #endif /* SUPPORT_UNICODE */
7732
7733 while (*cc != XCL_END)
7734 {
7735 compares--;
7736 invertcmp = (compares == 0 && list != backtracks);
7737 jump = NULL;
7738
7739 if (*cc == XCL_SINGLE)
7740 {
7741 cc ++;
7742 GETCHARINCTEST(c, cc);
7743
7744 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
7745 {
7746 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7747 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7748 numberofcmps++;
7749 }
7750 else if (numberofcmps > 0)
7751 {
7752 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7753 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7754 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7755 numberofcmps = 0;
7756 }
7757 else
7758 {
7759 jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7760 numberofcmps = 0;
7761 }
7762 }
7763 else if (*cc == XCL_RANGE)
7764 {
7765 cc ++;
7766 GETCHARINCTEST(c, cc);
7767 SET_CHAR_OFFSET(c);
7768 GETCHARINCTEST(c, cc);
7769
7770 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
7771 {
7772 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7773 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
7774 numberofcmps++;
7775 }
7776 else if (numberofcmps > 0)
7777 {
7778 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7779 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
7780 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7781 numberofcmps = 0;
7782 }
7783 else
7784 {
7785 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7786 numberofcmps = 0;
7787 }
7788 }
7789 #ifdef SUPPORT_UNICODE
7790 else
7791 {
7792 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7793 if (*cc == XCL_NOTPROP)
7794 invertcmp ^= 0x1;
7795 cc++;
7796 switch(*cc)
7797 {
7798 case PT_ANY:
7799 if (!invertcmp)
7800 jump = JUMP(SLJIT_JUMP);
7801 break;
7802
7803 case PT_LAMP:
7804 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
7805 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7806 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
7807 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7808 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
7809 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7810 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7811 break;
7812
7813 case PT_GC:
7814 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
7815 SET_TYPE_OFFSET(c);
7816 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
7817 break;
7818
7819 case PT_PC:
7820 jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
7821 break;
7822
7823 case PT_SC:
7824 compares++;
7825 /* Do nothing. */
7826 break;
7827
7828 case PT_SPACE:
7829 case PT_PXSPACE:
7830 SET_CHAR_OFFSET(9);
7831 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
7832 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7833
7834 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
7835 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7836
7837 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
7838 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7839
7840 SET_TYPE_OFFSET(ucp_Zl);
7841 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
7842 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
7843 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7844 break;
7845
7846 case PT_WORD:
7847 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
7848 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7849 /* Fall through. */
7850
7851 case PT_ALNUM:
7852 SET_TYPE_OFFSET(ucp_Ll);
7853 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
7854 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
7855 SET_TYPE_OFFSET(ucp_Nd);
7856 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
7857 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
7858 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7859 break;
7860
7861 case PT_CLIST:
7862 other_cases = PRIV(ucd_caseless_sets) + cc[1];
7863
7864 /* At least three characters are required.
7865 Otherwise this case would be handled by the normal code path. */
7866 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
7867 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
7868
7869 /* Optimizing character pairs, if their difference is power of 2. */
7870 if (is_powerof2(other_cases[1] ^ other_cases[0]))
7871 {
7872 if (charoffset == 0)
7873 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
7874 else
7875 {
7876 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
7877 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
7878 }
7879 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
7880 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7881 other_cases += 2;
7882 }
7883 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
7884 {
7885 if (charoffset == 0)
7886 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
7887 else
7888 {
7889 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
7890 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
7891 }
7892 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
7893 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7894
7895 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
7896 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
7897
7898 other_cases += 3;
7899 }
7900 else
7901 {
7902 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
7903 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7904 }
7905
7906 while (*other_cases != NOTACHAR)
7907 {
7908 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
7909 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
7910 }
7911 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7912 break;
7913
7914 case PT_UCNC:
7915 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
7916 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7917 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
7918 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7919 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
7920 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7921
7922 SET_CHAR_OFFSET(0xa0);
7923 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
7924 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
7925 SET_CHAR_OFFSET(0);
7926 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
7927 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_GREATER_EQUAL);
7928 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7929 break;
7930
7931 case PT_PXGRAPH:
7932 /* C and Z groups are the farthest two groups. */
7933 SET_TYPE_OFFSET(ucp_Ll);
7934 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
7935 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
7936
7937 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
7938
7939 /* In case of ucp_Cf, we overwrite the result. */
7940 SET_CHAR_OFFSET(0x2066);
7941 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
7942 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7943
7944 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
7945 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7946
7947 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
7948 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7949
7950 JUMPHERE(jump);
7951 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
7952 break;
7953
7954 case PT_PXPRINT:
7955 /* C and Z groups are the farthest two groups. */
7956 SET_TYPE_OFFSET(ucp_Ll);
7957 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
7958 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
7959
7960 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
7961 OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_NOT_EQUAL);
7962
7963 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
7964
7965 /* In case of ucp_Cf, we overwrite the result. */
7966 SET_CHAR_OFFSET(0x2066);
7967 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
7968 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7969
7970 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
7971 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7972
7973 JUMPHERE(jump);
7974 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
7975 break;
7976
7977 case PT_PXPUNCT:
7978 SET_TYPE_OFFSET(ucp_Sc);
7979 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
7980 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7981
7982 SET_CHAR_OFFSET(0);
7983 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x7f);
7984 OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_LESS_EQUAL);
7985
7986 SET_TYPE_OFFSET(ucp_Pc);
7987 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
7988 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
7989 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7990 break;
7991
7992 default:
7993 SLJIT_UNREACHABLE();
7994 break;
7995 }
7996 cc += 2;
7997 }
7998 #endif /* SUPPORT_UNICODE */
7999
8000 if (jump != NULL)
8001 add_jump(compiler, compares > 0 ? list : backtracks, jump);
8002 }
8003
8004 if (found != NULL)
8005 set_jumps(found, LABEL());
8006 }
8007
8008 #undef SET_TYPE_OFFSET
8009 #undef SET_CHAR_OFFSET
8010
8011 #endif
8012
compile_simple_assertion_matchingpath(compiler_common * common,PCRE2_UCHAR type,PCRE2_SPTR cc,jump_list ** backtracks)8013 static PCRE2_SPTR compile_simple_assertion_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks)
8014 {
8015 DEFINE_COMPILER;
8016 int length;
8017 struct sljit_jump *jump[4];
8018 #ifdef SUPPORT_UNICODE
8019 struct sljit_label *label;
8020 #endif /* SUPPORT_UNICODE */
8021
8022 switch(type)
8023 {
8024 case OP_SOD:
8025 if (HAS_VIRTUAL_REGISTERS)
8026 {
8027 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8028 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8029 }
8030 else
8031 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8032 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
8033 return cc;
8034
8035 case OP_SOM:
8036 if (HAS_VIRTUAL_REGISTERS)
8037 {
8038 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8039 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
8040 }
8041 else
8042 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
8043 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
8044 return cc;
8045
8046 case OP_NOT_WORD_BOUNDARY:
8047 case OP_WORD_BOUNDARY:
8048 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
8049 #ifdef SUPPORT_UNICODE
8050 if (common->invalid_utf)
8051 {
8052 add_jump(compiler, backtracks, CMP((type == OP_NOT_WORD_BOUNDARY) ? SLJIT_NOT_EQUAL : SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0));
8053 return cc;
8054 }
8055 #endif /* SUPPORT_UNICODE */
8056 sljit_set_current_flags(compiler, SLJIT_SET_Z);
8057 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
8058 return cc;
8059
8060 case OP_EODN:
8061 /* Requires rather complex checks. */
8062 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
8063 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8064 {
8065 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8066 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8067 if (common->mode == PCRE2_JIT_COMPLETE)
8068 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
8069 else
8070 {
8071 jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
8072 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
8073 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
8074 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
8075 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_EQUAL);
8076 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
8077 check_partial(common, TRUE);
8078 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8079 JUMPHERE(jump[1]);
8080 }
8081 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8082 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8083 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8084 }
8085 else if (common->nltype == NLTYPE_FIXED)
8086 {
8087 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8088 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8089 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
8090 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
8091 }
8092 else
8093 {
8094 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8095 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
8096 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8097 OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
8098 jump[2] = JUMP(SLJIT_GREATER);
8099 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL) /* LESS */);
8100 /* Equal. */
8101 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8102 jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
8103 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8104
8105 JUMPHERE(jump[1]);
8106 if (common->nltype == NLTYPE_ANYCRLF)
8107 {
8108 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8109 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
8110 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
8111 }
8112 else
8113 {
8114 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
8115 read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);
8116 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
8117 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
8118 sljit_set_current_flags(compiler, SLJIT_SET_Z);
8119 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
8120 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
8121 }
8122 JUMPHERE(jump[2]);
8123 JUMPHERE(jump[3]);
8124 }
8125 JUMPHERE(jump[0]);
8126 if (common->mode != PCRE2_JIT_COMPLETE)
8127 check_partial(common, TRUE);
8128 return cc;
8129
8130 case OP_EOD:
8131 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
8132 if (common->mode != PCRE2_JIT_COMPLETE)
8133 check_partial(common, TRUE);
8134 return cc;
8135
8136 case OP_DOLL:
8137 if (HAS_VIRTUAL_REGISTERS)
8138 {
8139 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8140 OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8141 }
8142 else
8143 OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8144 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32));
8145
8146 if (!common->endonly)
8147 compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks);
8148 else
8149 {
8150 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
8151 check_partial(common, FALSE);
8152 }
8153 return cc;
8154
8155 case OP_DOLLM:
8156 jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
8157 if (HAS_VIRTUAL_REGISTERS)
8158 {
8159 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8160 OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8161 }
8162 else
8163 OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8164 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32));
8165 check_partial(common, FALSE);
8166 jump[0] = JUMP(SLJIT_JUMP);
8167 JUMPHERE(jump[1]);
8168
8169 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8170 {
8171 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8172 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8173 if (common->mode == PCRE2_JIT_COMPLETE)
8174 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
8175 else
8176 {
8177 jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
8178 /* STR_PTR = STR_END - IN_UCHARS(1) */
8179 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8180 check_partial(common, TRUE);
8181 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8182 JUMPHERE(jump[1]);
8183 }
8184
8185 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8186 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8187 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8188 }
8189 else
8190 {
8191 peek_char(common, common->nlmax, TMP3, 0, NULL);
8192 check_newlinechar(common, common->nltype, backtracks, FALSE);
8193 }
8194 JUMPHERE(jump[0]);
8195 return cc;
8196
8197 case OP_CIRC:
8198 if (HAS_VIRTUAL_REGISTERS)
8199 {
8200 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8201 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
8202 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
8203 OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8204 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32));
8205 }
8206 else
8207 {
8208 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8209 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
8210 OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8211 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32));
8212 }
8213 return cc;
8214
8215 case OP_CIRCM:
8216 /* TMP2 might be used by peek_char_back. */
8217 if (HAS_VIRTUAL_REGISTERS)
8218 {
8219 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8220 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8221 jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0);
8222 OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8223 }
8224 else
8225 {
8226 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8227 jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0);
8228 OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8229 }
8230 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32));
8231 jump[0] = JUMP(SLJIT_JUMP);
8232 JUMPHERE(jump[1]);
8233
8234 if (!common->alt_circumflex)
8235 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8236
8237 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8238 {
8239 OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8240 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, TMP2, 0));
8241 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
8242 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
8243 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8244 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8245 }
8246 else
8247 {
8248 peek_char_back(common, common->nlmax, backtracks);
8249 check_newlinechar(common, common->nltype, backtracks, FALSE);
8250 }
8251 JUMPHERE(jump[0]);
8252 return cc;
8253
8254 case OP_REVERSE:
8255 length = GET(cc, 0);
8256 if (length == 0)
8257 return cc + LINK_SIZE;
8258 if (HAS_VIRTUAL_REGISTERS)
8259 {
8260 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8261 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8262 }
8263 else
8264 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8265 #ifdef SUPPORT_UNICODE
8266 if (common->utf)
8267 {
8268 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, length);
8269 label = LABEL();
8270 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0));
8271 move_back(common, backtracks, FALSE);
8272 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
8273 JUMPTO(SLJIT_NOT_ZERO, label);
8274 }
8275 else
8276 #endif
8277 {
8278 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
8279 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0));
8280 }
8281 check_start_used_ptr(common);
8282 return cc + LINK_SIZE;
8283 }
8284 SLJIT_UNREACHABLE();
8285 return cc;
8286 }
8287
8288 #ifdef SUPPORT_UNICODE
8289
8290 #if PCRE2_CODE_UNIT_WIDTH != 32
8291
do_extuni_utf(jit_arguments * args,PCRE2_SPTR cc)8292 static PCRE2_SPTR SLJIT_FUNC do_extuni_utf(jit_arguments *args, PCRE2_SPTR cc)
8293 {
8294 PCRE2_SPTR start_subject = args->begin;
8295 PCRE2_SPTR end_subject = args->end;
8296 int lgb, rgb, ricount;
8297 PCRE2_SPTR prevcc, endcc, bptr;
8298 BOOL first = TRUE;
8299 uint32_t c;
8300
8301 prevcc = cc;
8302 endcc = NULL;
8303 do
8304 {
8305 GETCHARINC(c, cc);
8306 rgb = UCD_GRAPHBREAK(c);
8307
8308 if (first)
8309 {
8310 lgb = rgb;
8311 endcc = cc;
8312 first = FALSE;
8313 continue;
8314 }
8315
8316 if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8317 break;
8318
8319 /* Not breaking between Regional Indicators is allowed only if there
8320 are an even number of preceding RIs. */
8321
8322 if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
8323 {
8324 ricount = 0;
8325 bptr = prevcc;
8326
8327 /* bptr is pointing to the left-hand character */
8328 while (bptr > start_subject)
8329 {
8330 bptr--;
8331 BACKCHAR(bptr);
8332 GETCHAR(c, bptr);
8333
8334 if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator)
8335 break;
8336
8337 ricount++;
8338 }
8339
8340 if ((ricount & 1) != 0) break; /* Grapheme break required */
8341 }
8342
8343 /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
8344 allows any number of them before a following Extended_Pictographic. */
8345
8346 if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
8347 lgb != ucp_gbExtended_Pictographic)
8348 lgb = rgb;
8349
8350 prevcc = endcc;
8351 endcc = cc;
8352 }
8353 while (cc < end_subject);
8354
8355 return endcc;
8356 }
8357
8358 #endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
8359
do_extuni_utf_invalid(jit_arguments * args,PCRE2_SPTR cc)8360 static PCRE2_SPTR SLJIT_FUNC do_extuni_utf_invalid(jit_arguments *args, PCRE2_SPTR cc)
8361 {
8362 PCRE2_SPTR start_subject = args->begin;
8363 PCRE2_SPTR end_subject = args->end;
8364 int lgb, rgb, ricount;
8365 PCRE2_SPTR prevcc, endcc, bptr;
8366 BOOL first = TRUE;
8367 uint32_t c;
8368
8369 prevcc = cc;
8370 endcc = NULL;
8371 do
8372 {
8373 GETCHARINC_INVALID(c, cc, end_subject, break);
8374 rgb = UCD_GRAPHBREAK(c);
8375
8376 if (first)
8377 {
8378 lgb = rgb;
8379 endcc = cc;
8380 first = FALSE;
8381 continue;
8382 }
8383
8384 if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8385 break;
8386
8387 /* Not breaking between Regional Indicators is allowed only if there
8388 are an even number of preceding RIs. */
8389
8390 if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
8391 {
8392 ricount = 0;
8393 bptr = prevcc;
8394
8395 /* bptr is pointing to the left-hand character */
8396 while (bptr > start_subject)
8397 {
8398 GETCHARBACK_INVALID(c, bptr, start_subject, break);
8399
8400 if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator)
8401 break;
8402
8403 ricount++;
8404 }
8405
8406 if ((ricount & 1) != 0)
8407 break; /* Grapheme break required */
8408 }
8409
8410 /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
8411 allows any number of them before a following Extended_Pictographic. */
8412
8413 if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
8414 lgb != ucp_gbExtended_Pictographic)
8415 lgb = rgb;
8416
8417 prevcc = endcc;
8418 endcc = cc;
8419 }
8420 while (cc < end_subject);
8421
8422 return endcc;
8423 }
8424
do_extuni_no_utf(jit_arguments * args,PCRE2_SPTR cc)8425 static PCRE2_SPTR SLJIT_FUNC do_extuni_no_utf(jit_arguments *args, PCRE2_SPTR cc)
8426 {
8427 PCRE2_SPTR start_subject = args->begin;
8428 PCRE2_SPTR end_subject = args->end;
8429 int lgb, rgb, ricount;
8430 PCRE2_SPTR bptr;
8431 uint32_t c;
8432
8433 /* Patch by PH */
8434 /* GETCHARINC(c, cc); */
8435 c = *cc++;
8436
8437 #if PCRE2_CODE_UNIT_WIDTH == 32
8438 if (c >= 0x110000)
8439 return NULL;
8440 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8441 lgb = UCD_GRAPHBREAK(c);
8442
8443 while (cc < end_subject)
8444 {
8445 c = *cc;
8446 #if PCRE2_CODE_UNIT_WIDTH == 32
8447 if (c >= 0x110000)
8448 break;
8449 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8450 rgb = UCD_GRAPHBREAK(c);
8451
8452 if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8453 break;
8454
8455 /* Not breaking between Regional Indicators is allowed only if there
8456 are an even number of preceding RIs. */
8457
8458 if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
8459 {
8460 ricount = 0;
8461 bptr = cc - 1;
8462
8463 /* bptr is pointing to the left-hand character */
8464 while (bptr > start_subject)
8465 {
8466 bptr--;
8467 c = *bptr;
8468 #if PCRE2_CODE_UNIT_WIDTH == 32
8469 if (c >= 0x110000)
8470 break;
8471 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8472
8473 if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator) break;
8474
8475 ricount++;
8476 }
8477
8478 if ((ricount & 1) != 0)
8479 break; /* Grapheme break required */
8480 }
8481
8482 /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
8483 allows any number of them before a following Extended_Pictographic. */
8484
8485 if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
8486 lgb != ucp_gbExtended_Pictographic)
8487 lgb = rgb;
8488
8489 cc++;
8490 }
8491
8492 return cc;
8493 }
8494
8495 #endif /* SUPPORT_UNICODE */
8496
compile_char1_matchingpath(compiler_common * common,PCRE2_UCHAR type,PCRE2_SPTR cc,jump_list ** backtracks,BOOL check_str_ptr)8497 static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr)
8498 {
8499 DEFINE_COMPILER;
8500 int length;
8501 unsigned int c, oc, bit;
8502 compare_context context;
8503 struct sljit_jump *jump[3];
8504 jump_list *end_list;
8505 #ifdef SUPPORT_UNICODE
8506 PCRE2_UCHAR propdata[5];
8507 #endif /* SUPPORT_UNICODE */
8508
8509 switch(type)
8510 {
8511 case OP_NOT_DIGIT:
8512 case OP_DIGIT:
8513 /* Digits are usually 0-9, so it is worth to optimize them. */
8514 if (check_str_ptr)
8515 detect_partial_match(common, backtracks);
8516 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8517 if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_digit, FALSE))
8518 read_char7_type(common, backtracks, type == OP_NOT_DIGIT);
8519 else
8520 #endif
8521 read_char8_type(common, backtracks, type == OP_NOT_DIGIT);
8522 /* Flip the starting bit in the negative case. */
8523 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
8524 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8525 return cc;
8526
8527 case OP_NOT_WHITESPACE:
8528 case OP_WHITESPACE:
8529 if (check_str_ptr)
8530 detect_partial_match(common, backtracks);
8531 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8532 if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_space, FALSE))
8533 read_char7_type(common, backtracks, type == OP_NOT_WHITESPACE);
8534 else
8535 #endif
8536 read_char8_type(common, backtracks, type == OP_NOT_WHITESPACE);
8537 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
8538 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8539 return cc;
8540
8541 case OP_NOT_WORDCHAR:
8542 case OP_WORDCHAR:
8543 if (check_str_ptr)
8544 detect_partial_match(common, backtracks);
8545 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8546 if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_word, FALSE))
8547 read_char7_type(common, backtracks, type == OP_NOT_WORDCHAR);
8548 else
8549 #endif
8550 read_char8_type(common, backtracks, type == OP_NOT_WORDCHAR);
8551 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
8552 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8553 return cc;
8554
8555 case OP_ANY:
8556 if (check_str_ptr)
8557 detect_partial_match(common, backtracks);
8558 read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);
8559 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8560 {
8561 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
8562 end_list = NULL;
8563 if (common->mode != PCRE2_JIT_PARTIAL_HARD)
8564 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8565 else
8566 check_str_end(common, &end_list);
8567
8568 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
8569 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
8570 set_jumps(end_list, LABEL());
8571 JUMPHERE(jump[0]);
8572 }
8573 else
8574 check_newlinechar(common, common->nltype, backtracks, TRUE);
8575 return cc;
8576
8577 case OP_ALLANY:
8578 if (check_str_ptr)
8579 detect_partial_match(common, backtracks);
8580 #ifdef SUPPORT_UNICODE
8581 if (common->utf)
8582 {
8583 if (common->invalid_utf)
8584 {
8585 read_char(common, 0, READ_CHAR_MAX, backtracks, READ_CHAR_UPDATE_STR_PTR);
8586 return cc;
8587 }
8588
8589 #if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
8590 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
8591 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8592 #if PCRE2_CODE_UNIT_WIDTH == 8
8593 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
8594 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
8595 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
8596 #elif PCRE2_CODE_UNIT_WIDTH == 16
8597 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
8598 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
8599 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
8600 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
8601 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
8602 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
8603 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
8604 JUMPHERE(jump[0]);
8605 return cc;
8606 #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
8607 }
8608 #endif /* SUPPORT_UNICODE */
8609 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8610 return cc;
8611
8612 case OP_ANYBYTE:
8613 if (check_str_ptr)
8614 detect_partial_match(common, backtracks);
8615 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8616 return cc;
8617
8618 #ifdef SUPPORT_UNICODE
8619 case OP_NOTPROP:
8620 case OP_PROP:
8621 propdata[0] = XCL_HASPROP;
8622 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
8623 propdata[2] = cc[0];
8624 propdata[3] = cc[1];
8625 propdata[4] = XCL_END;
8626 if (check_str_ptr)
8627 detect_partial_match(common, backtracks);
8628 compile_xclass_matchingpath(common, propdata, backtracks);
8629 return cc + 2;
8630 #endif
8631
8632 case OP_ANYNL:
8633 if (check_str_ptr)
8634 detect_partial_match(common, backtracks);
8635 read_char(common, common->bsr_nlmin, common->bsr_nlmax, NULL, 0);
8636 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
8637 /* We don't need to handle soft partial matching case. */
8638 end_list = NULL;
8639 if (common->mode != PCRE2_JIT_PARTIAL_HARD)
8640 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8641 else
8642 check_str_end(common, &end_list);
8643 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
8644 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
8645 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8646 jump[2] = JUMP(SLJIT_JUMP);
8647 JUMPHERE(jump[0]);
8648 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
8649 set_jumps(end_list, LABEL());
8650 JUMPHERE(jump[1]);
8651 JUMPHERE(jump[2]);
8652 return cc;
8653
8654 case OP_NOT_HSPACE:
8655 case OP_HSPACE:
8656 if (check_str_ptr)
8657 detect_partial_match(common, backtracks);
8658
8659 if (type == OP_NOT_HSPACE)
8660 read_char(common, 0x9, 0x3000, backtracks, READ_CHAR_UPDATE_STR_PTR);
8661 else
8662 read_char(common, 0x9, 0x3000, NULL, 0);
8663
8664 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
8665 sljit_set_current_flags(compiler, SLJIT_SET_Z);
8666 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
8667 return cc;
8668
8669 case OP_NOT_VSPACE:
8670 case OP_VSPACE:
8671 if (check_str_ptr)
8672 detect_partial_match(common, backtracks);
8673
8674 if (type == OP_NOT_VSPACE)
8675 read_char(common, 0xa, 0x2029, backtracks, READ_CHAR_UPDATE_STR_PTR);
8676 else
8677 read_char(common, 0xa, 0x2029, NULL, 0);
8678
8679 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
8680 sljit_set_current_flags(compiler, SLJIT_SET_Z);
8681 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
8682 return cc;
8683
8684 #ifdef SUPPORT_UNICODE
8685 case OP_EXTUNI:
8686 if (check_str_ptr)
8687 detect_partial_match(common, backtracks);
8688
8689 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
8690 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
8691
8692 #if PCRE2_CODE_UNIT_WIDTH != 32
8693 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM,
8694 common->utf ? (common->invalid_utf ? SLJIT_FUNC_OFFSET(do_extuni_utf_invalid) : SLJIT_FUNC_OFFSET(do_extuni_utf)) : SLJIT_FUNC_OFFSET(do_extuni_no_utf));
8695 if (common->invalid_utf)
8696 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
8697 #else
8698 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM,
8699 common->invalid_utf ? SLJIT_FUNC_OFFSET(do_extuni_utf_invalid) : SLJIT_FUNC_OFFSET(do_extuni_no_utf));
8700 if (!common->utf || common->invalid_utf)
8701 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
8702 #endif
8703
8704 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
8705
8706 if (common->mode == PCRE2_JIT_PARTIAL_HARD)
8707 {
8708 jump[0] = CMP(SLJIT_LESS, SLJIT_RETURN_REG, 0, STR_END, 0);
8709 /* Since we successfully read a char above, partial matching must occure. */
8710 check_partial(common, TRUE);
8711 JUMPHERE(jump[0]);
8712 }
8713 return cc;
8714 #endif
8715
8716 case OP_CHAR:
8717 case OP_CHARI:
8718 length = 1;
8719 #ifdef SUPPORT_UNICODE
8720 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
8721 #endif
8722
8723 if (check_str_ptr && common->mode != PCRE2_JIT_COMPLETE)
8724 detect_partial_match(common, backtracks);
8725
8726 if (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0)
8727 {
8728 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
8729 if (length > 1 || (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE))
8730 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
8731
8732 context.length = IN_UCHARS(length);
8733 context.sourcereg = -1;
8734 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
8735 context.ucharptr = 0;
8736 #endif
8737 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
8738 }
8739
8740 #ifdef SUPPORT_UNICODE
8741 if (common->utf)
8742 {
8743 GETCHAR(c, cc);
8744 }
8745 else
8746 #endif
8747 c = *cc;
8748
8749 SLJIT_ASSERT(type == OP_CHARI && char_has_othercase(common, cc));
8750
8751 if (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE)
8752 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8753
8754 oc = char_othercase(common, c);
8755 read_char(common, c < oc ? c : oc, c > oc ? c : oc, NULL, 0);
8756
8757 SLJIT_ASSERT(!is_powerof2(c ^ oc));
8758
8759 if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
8760 {
8761 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
8762 CMOV(SLJIT_EQUAL, TMP1, SLJIT_IMM, c);
8763 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
8764 }
8765 else
8766 {
8767 jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
8768 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
8769 JUMPHERE(jump[0]);
8770 }
8771 return cc + length;
8772
8773 case OP_NOT:
8774 case OP_NOTI:
8775 if (check_str_ptr)
8776 detect_partial_match(common, backtracks);
8777
8778 length = 1;
8779 #ifdef SUPPORT_UNICODE
8780 if (common->utf)
8781 {
8782 #if PCRE2_CODE_UNIT_WIDTH == 8
8783 c = *cc;
8784 if (c < 128 && !common->invalid_utf)
8785 {
8786 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
8787 if (type == OP_NOT || !char_has_othercase(common, cc))
8788 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
8789 else
8790 {
8791 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
8792 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
8793 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
8794 }
8795 /* Skip the variable-length character. */
8796 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8797 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
8798 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
8799 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
8800 JUMPHERE(jump[0]);
8801 return cc + 1;
8802 }
8803 else
8804 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
8805 {
8806 GETCHARLEN(c, cc, length);
8807 }
8808 }
8809 else
8810 #endif /* SUPPORT_UNICODE */
8811 c = *cc;
8812
8813 if (type == OP_NOT || !char_has_othercase(common, cc))
8814 {
8815 read_char(common, c, c, backtracks, READ_CHAR_UPDATE_STR_PTR);
8816 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
8817 }
8818 else
8819 {
8820 oc = char_othercase(common, c);
8821 read_char(common, c < oc ? c : oc, c > oc ? c : oc, backtracks, READ_CHAR_UPDATE_STR_PTR);
8822 bit = c ^ oc;
8823 if (is_powerof2(bit))
8824 {
8825 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
8826 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
8827 }
8828 else
8829 {
8830 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
8831 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
8832 }
8833 }
8834 return cc + length;
8835
8836 case OP_CLASS:
8837 case OP_NCLASS:
8838 if (check_str_ptr)
8839 detect_partial_match(common, backtracks);
8840
8841 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8842 bit = (common->utf && is_char7_bitset((const sljit_u8 *)cc, type == OP_NCLASS)) ? 127 : 255;
8843 if (type == OP_NCLASS)
8844 read_char(common, 0, bit, backtracks, READ_CHAR_UPDATE_STR_PTR);
8845 else
8846 read_char(common, 0, bit, NULL, 0);
8847 #else
8848 if (type == OP_NCLASS)
8849 read_char(common, 0, 255, backtracks, READ_CHAR_UPDATE_STR_PTR);
8850 else
8851 read_char(common, 0, 255, NULL, 0);
8852 #endif
8853
8854 if (optimize_class(common, (const sljit_u8 *)cc, type == OP_NCLASS, FALSE, backtracks))
8855 return cc + 32 / sizeof(PCRE2_UCHAR);
8856
8857 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8858 jump[0] = NULL;
8859 if (common->utf)
8860 {
8861 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
8862 if (type == OP_CLASS)
8863 {
8864 add_jump(compiler, backtracks, jump[0]);
8865 jump[0] = NULL;
8866 }
8867 }
8868 #elif PCRE2_CODE_UNIT_WIDTH != 8
8869 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
8870 if (type == OP_CLASS)
8871 {
8872 add_jump(compiler, backtracks, jump[0]);
8873 jump[0] = NULL;
8874 }
8875 #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
8876
8877 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
8878 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
8879 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
8880 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
8881 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
8882 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
8883
8884 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
8885 if (jump[0] != NULL)
8886 JUMPHERE(jump[0]);
8887 #endif
8888 return cc + 32 / sizeof(PCRE2_UCHAR);
8889
8890 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
8891 case OP_XCLASS:
8892 if (check_str_ptr)
8893 detect_partial_match(common, backtracks);
8894 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
8895 return cc + GET(cc, 0) - 1;
8896 #endif
8897 }
8898 SLJIT_UNREACHABLE();
8899 return cc;
8900 }
8901
compile_charn_matchingpath(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,jump_list ** backtracks)8902 static SLJIT_INLINE PCRE2_SPTR compile_charn_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, jump_list **backtracks)
8903 {
8904 /* This function consumes at least one input character. */
8905 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
8906 DEFINE_COMPILER;
8907 PCRE2_SPTR ccbegin = cc;
8908 compare_context context;
8909 int size;
8910
8911 context.length = 0;
8912 do
8913 {
8914 if (cc >= ccend)
8915 break;
8916
8917 if (*cc == OP_CHAR)
8918 {
8919 size = 1;
8920 #ifdef SUPPORT_UNICODE
8921 if (common->utf && HAS_EXTRALEN(cc[1]))
8922 size += GET_EXTRALEN(cc[1]);
8923 #endif
8924 }
8925 else if (*cc == OP_CHARI)
8926 {
8927 size = 1;
8928 #ifdef SUPPORT_UNICODE
8929 if (common->utf)
8930 {
8931 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
8932 size = 0;
8933 else if (HAS_EXTRALEN(cc[1]))
8934 size += GET_EXTRALEN(cc[1]);
8935 }
8936 else
8937 #endif
8938 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
8939 size = 0;
8940 }
8941 else
8942 size = 0;
8943
8944 cc += 1 + size;
8945 context.length += IN_UCHARS(size);
8946 }
8947 while (size > 0 && context.length <= 128);
8948
8949 cc = ccbegin;
8950 if (context.length > 0)
8951 {
8952 /* We have a fixed-length byte sequence. */
8953 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
8954 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
8955
8956 context.sourcereg = -1;
8957 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
8958 context.ucharptr = 0;
8959 #endif
8960 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
8961 return cc;
8962 }
8963
8964 /* A non-fixed length character will be checked if length == 0. */
8965 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE);
8966 }
8967
8968 /* Forward definitions. */
8969 static void compile_matchingpath(compiler_common *, PCRE2_SPTR, PCRE2_SPTR, backtrack_common *);
8970 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
8971
8972 #define PUSH_BACKTRACK(size, ccstart, error) \
8973 do \
8974 { \
8975 backtrack = sljit_alloc_memory(compiler, (size)); \
8976 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
8977 return error; \
8978 memset(backtrack, 0, size); \
8979 backtrack->prev = parent->top; \
8980 backtrack->cc = (ccstart); \
8981 parent->top = backtrack; \
8982 } \
8983 while (0)
8984
8985 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
8986 do \
8987 { \
8988 backtrack = sljit_alloc_memory(compiler, (size)); \
8989 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
8990 return; \
8991 memset(backtrack, 0, size); \
8992 backtrack->prev = parent->top; \
8993 backtrack->cc = (ccstart); \
8994 parent->top = backtrack; \
8995 } \
8996 while (0)
8997
8998 #define BACKTRACK_AS(type) ((type *)backtrack)
8999
compile_dnref_search(compiler_common * common,PCRE2_SPTR cc,jump_list ** backtracks)9000 static void compile_dnref_search(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
9001 {
9002 /* The OVECTOR offset goes to TMP2. */
9003 DEFINE_COMPILER;
9004 int count = GET2(cc, 1 + IMM2_SIZE);
9005 PCRE2_SPTR slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
9006 unsigned int offset;
9007 jump_list *found = NULL;
9008
9009 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
9010
9011 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
9012
9013 count--;
9014 while (count-- > 0)
9015 {
9016 offset = GET2(slot, 0) << 1;
9017 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
9018 add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
9019 slot += common->name_entry_size;
9020 }
9021
9022 offset = GET2(slot, 0) << 1;
9023 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
9024 if (backtracks != NULL && !common->unset_backref)
9025 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
9026
9027 set_jumps(found, LABEL());
9028 }
9029
compile_ref_matchingpath(compiler_common * common,PCRE2_SPTR cc,jump_list ** backtracks,BOOL withchecks,BOOL emptyfail)9030 static void compile_ref_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
9031 {
9032 DEFINE_COMPILER;
9033 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
9034 int offset = 0;
9035 struct sljit_jump *jump = NULL;
9036 struct sljit_jump *partial;
9037 struct sljit_jump *nopartial;
9038 #if defined SUPPORT_UNICODE
9039 struct sljit_label *loop;
9040 struct sljit_label *caseless_loop;
9041 jump_list *no_match = NULL;
9042 int source_reg = COUNT_MATCH;
9043 int source_end_reg = ARGUMENTS;
9044 int char1_reg = STACK_LIMIT;
9045 #endif /* SUPPORT_UNICODE */
9046
9047 if (ref)
9048 {
9049 offset = GET2(cc, 1) << 1;
9050 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9051 /* OVECTOR(1) contains the "string begin - 1" constant. */
9052 if (withchecks && !common->unset_backref)
9053 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9054 }
9055 else
9056 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9057
9058 #if defined SUPPORT_UNICODE
9059 if (common->utf && *cc == OP_REFI)
9060 {
9061 SLJIT_ASSERT(common->iref_ptr != 0);
9062
9063 if (ref)
9064 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9065 else
9066 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9067
9068 if (withchecks && emptyfail)
9069 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0));
9070
9071 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr, source_reg, 0);
9072 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw), source_end_reg, 0);
9073 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2, char1_reg, 0);
9074
9075 OP1(SLJIT_MOV, source_reg, 0, TMP1, 0);
9076 OP1(SLJIT_MOV, source_end_reg, 0, TMP2, 0);
9077
9078 loop = LABEL();
9079 jump = CMP(SLJIT_GREATER_EQUAL, source_reg, 0, source_end_reg, 0);
9080 partial = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
9081
9082 /* Read original character. It must be a valid UTF character. */
9083 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
9084 OP1(SLJIT_MOV, STR_PTR, 0, source_reg, 0);
9085
9086 read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR | READ_CHAR_VALID_UTF);
9087
9088 OP1(SLJIT_MOV, source_reg, 0, STR_PTR, 0);
9089 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
9090 OP1(SLJIT_MOV, char1_reg, 0, TMP1, 0);
9091
9092 /* Read second character. */
9093 read_char(common, 0, READ_CHAR_MAX, &no_match, READ_CHAR_UPDATE_STR_PTR);
9094
9095 CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
9096
9097 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
9098
9099 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
9100
9101 OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
9102 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
9103 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
9104
9105 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records));
9106
9107 OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, other_case));
9108 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, caseset));
9109 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0);
9110 CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
9111
9112 add_jump(compiler, &no_match, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9113 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
9114 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_caseless_sets));
9115
9116 caseless_loop = LABEL();
9117 OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9118 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(uint32_t));
9119 OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, char1_reg, 0);
9120 JUMPTO(SLJIT_EQUAL, loop);
9121 JUMPTO(SLJIT_LESS, caseless_loop);
9122
9123 set_jumps(no_match, LABEL());
9124 if (common->mode == PCRE2_JIT_COMPLETE)
9125 JUMPHERE(partial);
9126
9127 OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9128 OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9129 OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9130 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9131
9132 if (common->mode != PCRE2_JIT_COMPLETE)
9133 {
9134 JUMPHERE(partial);
9135 OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9136 OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9137 OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9138
9139 check_partial(common, FALSE);
9140 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9141 }
9142
9143 JUMPHERE(jump);
9144 OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9145 OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9146 OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9147 return;
9148 }
9149 else
9150 #endif /* SUPPORT_UNICODE */
9151 {
9152 if (ref)
9153 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
9154 else
9155 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
9156
9157 if (withchecks)
9158 jump = JUMP(SLJIT_ZERO);
9159
9160 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
9161 partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
9162 if (common->mode == PCRE2_JIT_COMPLETE)
9163 add_jump(compiler, backtracks, partial);
9164
9165 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
9166 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9167
9168 if (common->mode != PCRE2_JIT_COMPLETE)
9169 {
9170 nopartial = JUMP(SLJIT_JUMP);
9171 JUMPHERE(partial);
9172 /* TMP2 -= STR_END - STR_PTR */
9173 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
9174 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
9175 partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
9176 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
9177 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
9178 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9179 JUMPHERE(partial);
9180 check_partial(common, FALSE);
9181 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9182 JUMPHERE(nopartial);
9183 }
9184 }
9185
9186 if (jump != NULL)
9187 {
9188 if (emptyfail)
9189 add_jump(compiler, backtracks, jump);
9190 else
9191 JUMPHERE(jump);
9192 }
9193 }
9194
compile_ref_iterator_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)9195 static SLJIT_INLINE PCRE2_SPTR compile_ref_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9196 {
9197 DEFINE_COMPILER;
9198 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
9199 backtrack_common *backtrack;
9200 PCRE2_UCHAR type;
9201 int offset = 0;
9202 struct sljit_label *label;
9203 struct sljit_jump *zerolength;
9204 struct sljit_jump *jump = NULL;
9205 PCRE2_SPTR ccbegin = cc;
9206 int min = 0, max = 0;
9207 BOOL minimize;
9208
9209 PUSH_BACKTRACK(sizeof(ref_iterator_backtrack), cc, NULL);
9210
9211 if (ref)
9212 offset = GET2(cc, 1) << 1;
9213 else
9214 cc += IMM2_SIZE;
9215 type = cc[1 + IMM2_SIZE];
9216
9217 SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
9218 minimize = (type & 0x1) != 0;
9219 switch(type)
9220 {
9221 case OP_CRSTAR:
9222 case OP_CRMINSTAR:
9223 min = 0;
9224 max = 0;
9225 cc += 1 + IMM2_SIZE + 1;
9226 break;
9227 case OP_CRPLUS:
9228 case OP_CRMINPLUS:
9229 min = 1;
9230 max = 0;
9231 cc += 1 + IMM2_SIZE + 1;
9232 break;
9233 case OP_CRQUERY:
9234 case OP_CRMINQUERY:
9235 min = 0;
9236 max = 1;
9237 cc += 1 + IMM2_SIZE + 1;
9238 break;
9239 case OP_CRRANGE:
9240 case OP_CRMINRANGE:
9241 min = GET2(cc, 1 + IMM2_SIZE + 1);
9242 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
9243 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
9244 break;
9245 default:
9246 SLJIT_UNREACHABLE();
9247 break;
9248 }
9249
9250 if (!minimize)
9251 {
9252 if (min == 0)
9253 {
9254 allocate_stack(common, 2);
9255 if (ref)
9256 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9257 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9258 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
9259 /* Temporary release of STR_PTR. */
9260 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9261 /* Handles both invalid and empty cases. Since the minimum repeat,
9262 is zero the invalid case is basically the same as an empty case. */
9263 if (ref)
9264 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9265 else
9266 {
9267 compile_dnref_search(common, ccbegin, NULL);
9268 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9269 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
9270 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9271 }
9272 /* Restore if not zero length. */
9273 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9274 }
9275 else
9276 {
9277 allocate_stack(common, 1);
9278 if (ref)
9279 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9280 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9281 if (ref)
9282 {
9283 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9284 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9285 }
9286 else
9287 {
9288 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
9289 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9290 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
9291 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9292 }
9293 }
9294
9295 if (min > 1 || max > 1)
9296 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
9297
9298 label = LABEL();
9299 if (!ref)
9300 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
9301 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
9302
9303 if (min > 1 || max > 1)
9304 {
9305 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
9306 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
9307 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
9308 if (min > 1)
9309 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
9310 if (max > 1)
9311 {
9312 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
9313 allocate_stack(common, 1);
9314 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9315 JUMPTO(SLJIT_JUMP, label);
9316 JUMPHERE(jump);
9317 }
9318 }
9319
9320 if (max == 0)
9321 {
9322 /* Includes min > 1 case as well. */
9323 allocate_stack(common, 1);
9324 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9325 JUMPTO(SLJIT_JUMP, label);
9326 }
9327
9328 JUMPHERE(zerolength);
9329 BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
9330
9331 count_match(common);
9332 return cc;
9333 }
9334
9335 allocate_stack(common, ref ? 2 : 3);
9336 if (ref)
9337 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9338 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9339 if (type != OP_CRMINSTAR)
9340 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
9341
9342 if (min == 0)
9343 {
9344 /* Handles both invalid and empty cases. Since the minimum repeat,
9345 is zero the invalid case is basically the same as an empty case. */
9346 if (ref)
9347 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9348 else
9349 {
9350 compile_dnref_search(common, ccbegin, NULL);
9351 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9352 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
9353 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9354 }
9355 /* Length is non-zero, we can match real repeats. */
9356 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9357 jump = JUMP(SLJIT_JUMP);
9358 }
9359 else
9360 {
9361 if (ref)
9362 {
9363 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9364 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9365 }
9366 else
9367 {
9368 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
9369 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9370 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
9371 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9372 }
9373 }
9374
9375 BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
9376 if (max > 0)
9377 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
9378
9379 if (!ref)
9380 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
9381 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
9382 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9383
9384 if (min > 1)
9385 {
9386 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9387 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
9388 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
9389 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(ref_iterator_backtrack)->matchingpath);
9390 }
9391 else if (max > 0)
9392 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
9393
9394 if (jump != NULL)
9395 JUMPHERE(jump);
9396 JUMPHERE(zerolength);
9397
9398 count_match(common);
9399 return cc;
9400 }
9401
compile_recurse_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)9402 static SLJIT_INLINE PCRE2_SPTR compile_recurse_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9403 {
9404 DEFINE_COMPILER;
9405 backtrack_common *backtrack;
9406 recurse_entry *entry = common->entries;
9407 recurse_entry *prev = NULL;
9408 sljit_sw start = GET(cc, 1);
9409 PCRE2_SPTR start_cc;
9410 BOOL needs_control_head;
9411
9412 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
9413
9414 /* Inlining simple patterns. */
9415 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
9416 {
9417 start_cc = common->start + start;
9418 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
9419 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
9420 return cc + 1 + LINK_SIZE;
9421 }
9422
9423 while (entry != NULL)
9424 {
9425 if (entry->start == start)
9426 break;
9427 prev = entry;
9428 entry = entry->next;
9429 }
9430
9431 if (entry == NULL)
9432 {
9433 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
9434 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9435 return NULL;
9436 entry->next = NULL;
9437 entry->entry_label = NULL;
9438 entry->backtrack_label = NULL;
9439 entry->entry_calls = NULL;
9440 entry->backtrack_calls = NULL;
9441 entry->start = start;
9442
9443 if (prev != NULL)
9444 prev->next = entry;
9445 else
9446 common->entries = entry;
9447 }
9448
9449 BACKTRACK_AS(recurse_backtrack)->entry = entry;
9450
9451 if (entry->entry_label == NULL)
9452 add_jump(compiler, &entry->entry_calls, JUMP(SLJIT_FAST_CALL));
9453 else
9454 JUMPTO(SLJIT_FAST_CALL, entry->entry_label);
9455 /* Leave if the match is failed. */
9456 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
9457 BACKTRACK_AS(recurse_backtrack)->matchingpath = LABEL();
9458 return cc + 1 + LINK_SIZE;
9459 }
9460
do_callout(struct jit_arguments * arguments,pcre2_callout_block * callout_block,PCRE2_SPTR * jit_ovector)9461 static sljit_s32 SLJIT_FUNC do_callout(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector)
9462 {
9463 PCRE2_SPTR begin;
9464 PCRE2_SIZE *ovector;
9465 sljit_u32 oveccount, capture_top;
9466
9467 if (arguments->callout == NULL)
9468 return 0;
9469
9470 SLJIT_COMPILE_ASSERT(sizeof (PCRE2_SIZE) <= sizeof (sljit_sw), pcre2_size_must_be_lower_than_sljit_sw_size);
9471
9472 begin = arguments->begin;
9473 ovector = (PCRE2_SIZE*)(callout_block + 1);
9474 oveccount = callout_block->capture_top;
9475
9476 SLJIT_ASSERT(oveccount >= 1);
9477
9478 callout_block->version = 2;
9479 callout_block->callout_flags = 0;
9480
9481 /* Offsets in subject. */
9482 callout_block->subject_length = arguments->end - arguments->begin;
9483 callout_block->start_match = jit_ovector[0] - begin;
9484 callout_block->current_position = (PCRE2_SPTR)callout_block->offset_vector - begin;
9485 callout_block->subject = begin;
9486
9487 /* Convert and copy the JIT offset vector to the ovector array. */
9488 callout_block->capture_top = 1;
9489 callout_block->offset_vector = ovector;
9490
9491 ovector[0] = PCRE2_UNSET;
9492 ovector[1] = PCRE2_UNSET;
9493 ovector += 2;
9494 jit_ovector += 2;
9495 capture_top = 1;
9496
9497 /* Convert pointers to sizes. */
9498 while (--oveccount != 0)
9499 {
9500 capture_top++;
9501
9502 ovector[0] = (PCRE2_SIZE)(jit_ovector[0] - begin);
9503 ovector[1] = (PCRE2_SIZE)(jit_ovector[1] - begin);
9504
9505 if (ovector[0] != PCRE2_UNSET)
9506 callout_block->capture_top = capture_top;
9507
9508 ovector += 2;
9509 jit_ovector += 2;
9510 }
9511
9512 return (arguments->callout)(callout_block, arguments->callout_data);
9513 }
9514
9515 #define CALLOUT_ARG_OFFSET(arg) \
9516 SLJIT_OFFSETOF(pcre2_callout_block, arg)
9517
compile_callout_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)9518 static SLJIT_INLINE PCRE2_SPTR compile_callout_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9519 {
9520 DEFINE_COMPILER;
9521 backtrack_common *backtrack;
9522 sljit_s32 mov_opcode;
9523 unsigned int callout_length = (*cc == OP_CALLOUT)
9524 ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2 * LINK_SIZE);
9525 sljit_sw value1;
9526 sljit_sw value2;
9527 sljit_sw value3;
9528 sljit_uw callout_arg_size = (common->re->top_bracket + 1) * 2 * sizeof(sljit_sw);
9529
9530 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
9531
9532 callout_arg_size = (sizeof(pcre2_callout_block) + callout_arg_size + sizeof(sljit_sw) - 1) / sizeof(sljit_sw);
9533
9534 allocate_stack(common, callout_arg_size);
9535
9536 SLJIT_ASSERT(common->capture_last_ptr != 0);
9537 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
9538 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
9539 value1 = (*cc == OP_CALLOUT) ? cc[1 + 2 * LINK_SIZE] : 0;
9540 OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, value1);
9541 OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
9542 OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_top), SLJIT_IMM, common->re->top_bracket + 1);
9543
9544 /* These pointer sized fields temporarly stores internal variables. */
9545 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
9546
9547 if (common->mark_ptr != 0)
9548 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
9549 mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
9550 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 1));
9551 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 1 + LINK_SIZE));
9552
9553 if (*cc == OP_CALLOUT)
9554 {
9555 value1 = 0;
9556 value2 = 0;
9557 value3 = 0;
9558 }
9559 else
9560 {
9561 value1 = (sljit_sw) (cc + (1 + 4*LINK_SIZE) + 1);
9562 value2 = (callout_length - (1 + 4*LINK_SIZE + 2));
9563 value3 = (sljit_sw) (GET(cc, 1 + 3*LINK_SIZE));
9564 }
9565
9566 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string), SLJIT_IMM, value1);
9567 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_length), SLJIT_IMM, value2);
9568 OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_offset), SLJIT_IMM, value3);
9569 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
9570
9571 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
9572
9573 /* Needed to save important temporary registers. */
9574 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0);
9575 /* SLJIT_R0 = arguments */
9576 OP1(SLJIT_MOV, SLJIT_R1, 0, STACK_TOP, 0);
9577 GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
9578 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(S32) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
9579 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
9580 free_stack(common, callout_arg_size);
9581
9582 /* Check return value. */
9583 OP2(SLJIT_SUB32 | SLJIT_SET_Z | SLJIT_SET_SIG_GREATER, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
9584 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_SIG_GREATER32));
9585 if (common->abort_label == NULL)
9586 add_jump(compiler, &common->abort, JUMP(SLJIT_NOT_EQUAL32) /* SIG_LESS */);
9587 else
9588 JUMPTO(SLJIT_NOT_EQUAL32 /* SIG_LESS */, common->abort_label);
9589 return cc + callout_length;
9590 }
9591
9592 #undef CALLOUT_ARG_SIZE
9593 #undef CALLOUT_ARG_OFFSET
9594
assert_needs_str_ptr_saving(PCRE2_SPTR cc)9595 static SLJIT_INLINE BOOL assert_needs_str_ptr_saving(PCRE2_SPTR cc)
9596 {
9597 while (TRUE)
9598 {
9599 switch (*cc)
9600 {
9601 case OP_CALLOUT_STR:
9602 cc += GET(cc, 1 + 2*LINK_SIZE);
9603 break;
9604
9605 case OP_NOT_WORD_BOUNDARY:
9606 case OP_WORD_BOUNDARY:
9607 case OP_CIRC:
9608 case OP_CIRCM:
9609 case OP_DOLL:
9610 case OP_DOLLM:
9611 case OP_CALLOUT:
9612 case OP_ALT:
9613 cc += PRIV(OP_lengths)[*cc];
9614 break;
9615
9616 case OP_KET:
9617 return FALSE;
9618
9619 default:
9620 return TRUE;
9621 }
9622 }
9623 }
9624
compile_assert_matchingpath(compiler_common * common,PCRE2_SPTR cc,assert_backtrack * backtrack,BOOL conditional)9625 static PCRE2_SPTR compile_assert_matchingpath(compiler_common *common, PCRE2_SPTR cc, assert_backtrack *backtrack, BOOL conditional)
9626 {
9627 DEFINE_COMPILER;
9628 int framesize;
9629 int extrasize;
9630 BOOL local_quit_available = FALSE;
9631 BOOL needs_control_head;
9632 int private_data_ptr;
9633 backtrack_common altbacktrack;
9634 PCRE2_SPTR ccbegin;
9635 PCRE2_UCHAR opcode;
9636 PCRE2_UCHAR bra = OP_BRA;
9637 jump_list *tmp = NULL;
9638 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
9639 jump_list **found;
9640 /* Saving previous accept variables. */
9641 BOOL save_local_quit_available = common->local_quit_available;
9642 BOOL save_in_positive_assertion = common->in_positive_assertion;
9643 then_trap_backtrack *save_then_trap = common->then_trap;
9644 struct sljit_label *save_quit_label = common->quit_label;
9645 struct sljit_label *save_accept_label = common->accept_label;
9646 jump_list *save_quit = common->quit;
9647 jump_list *save_positive_assertion_quit = common->positive_assertion_quit;
9648 jump_list *save_accept = common->accept;
9649 struct sljit_jump *jump;
9650 struct sljit_jump *brajump = NULL;
9651
9652 /* Assert captures then. */
9653 common->then_trap = NULL;
9654
9655 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
9656 {
9657 SLJIT_ASSERT(!conditional);
9658 bra = *cc;
9659 cc++;
9660 }
9661 private_data_ptr = PRIVATE_DATA(cc);
9662 SLJIT_ASSERT(private_data_ptr != 0);
9663 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
9664 backtrack->framesize = framesize;
9665 backtrack->private_data_ptr = private_data_ptr;
9666 opcode = *cc;
9667 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
9668 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
9669 ccbegin = cc;
9670 cc += GET(cc, 1);
9671
9672 if (bra == OP_BRAMINZERO)
9673 {
9674 /* This is a braminzero backtrack path. */
9675 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9676 free_stack(common, 1);
9677 brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
9678 }
9679
9680 if (framesize < 0)
9681 {
9682 extrasize = 1;
9683 if (bra == OP_BRA && !assert_needs_str_ptr_saving(ccbegin + 1 + LINK_SIZE))
9684 extrasize = 0;
9685
9686 if (needs_control_head)
9687 extrasize++;
9688
9689 if (framesize == no_frame)
9690 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
9691
9692 if (extrasize > 0)
9693 allocate_stack(common, extrasize);
9694
9695 if (needs_control_head)
9696 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9697
9698 if (extrasize > 0)
9699 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9700
9701 if (needs_control_head)
9702 {
9703 SLJIT_ASSERT(extrasize == 2);
9704 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
9705 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
9706 }
9707 }
9708 else
9709 {
9710 extrasize = needs_control_head ? 3 : 2;
9711 allocate_stack(common, framesize + extrasize);
9712
9713 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9714 OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
9715 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
9716 if (needs_control_head)
9717 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9718 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9719
9720 if (needs_control_head)
9721 {
9722 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
9723 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
9724 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
9725 }
9726 else
9727 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
9728
9729 init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize);
9730 }
9731
9732 memset(&altbacktrack, 0, sizeof(backtrack_common));
9733 if (conditional || (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT))
9734 {
9735 /* Control verbs cannot escape from these asserts. */
9736 local_quit_available = TRUE;
9737 common->local_quit_available = TRUE;
9738 common->quit_label = NULL;
9739 common->quit = NULL;
9740 }
9741
9742 common->in_positive_assertion = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK);
9743 common->positive_assertion_quit = NULL;
9744
9745 while (1)
9746 {
9747 common->accept_label = NULL;
9748 common->accept = NULL;
9749 altbacktrack.top = NULL;
9750 altbacktrack.topbacktracks = NULL;
9751
9752 if (*ccbegin == OP_ALT && extrasize > 0)
9753 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9754
9755 altbacktrack.cc = ccbegin;
9756 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
9757 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9758 {
9759 if (local_quit_available)
9760 {
9761 common->local_quit_available = save_local_quit_available;
9762 common->quit_label = save_quit_label;
9763 common->quit = save_quit;
9764 }
9765 common->in_positive_assertion = save_in_positive_assertion;
9766 common->then_trap = save_then_trap;
9767 common->accept_label = save_accept_label;
9768 common->positive_assertion_quit = save_positive_assertion_quit;
9769 common->accept = save_accept;
9770 return NULL;
9771 }
9772 common->accept_label = LABEL();
9773 if (common->accept != NULL)
9774 set_jumps(common->accept, common->accept_label);
9775
9776 /* Reset stack. */
9777 if (framesize < 0)
9778 {
9779 if (framesize == no_frame)
9780 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9781 else if (extrasize > 0)
9782 free_stack(common, extrasize);
9783
9784 if (needs_control_head)
9785 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
9786 }
9787 else
9788 {
9789 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
9790 {
9791 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
9792 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
9793 if (needs_control_head)
9794 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
9795 }
9796 else
9797 {
9798 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9799 if (needs_control_head)
9800 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 2));
9801 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9802 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
9803 }
9804 }
9805
9806 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
9807 {
9808 /* We know that STR_PTR was stored on the top of the stack. */
9809 if (conditional)
9810 {
9811 if (extrasize > 0)
9812 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? STACK(-2) : STACK(-1));
9813 }
9814 else if (bra == OP_BRAZERO)
9815 {
9816 if (framesize < 0)
9817 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
9818 else
9819 {
9820 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
9821 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize));
9822 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
9823 }
9824 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9825 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9826 }
9827 else if (framesize >= 0)
9828 {
9829 /* For OP_BRA and OP_BRAMINZERO. */
9830 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
9831 }
9832 }
9833 add_jump(compiler, found, JUMP(SLJIT_JUMP));
9834
9835 compile_backtrackingpath(common, altbacktrack.top);
9836 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9837 {
9838 if (local_quit_available)
9839 {
9840 common->local_quit_available = save_local_quit_available;
9841 common->quit_label = save_quit_label;
9842 common->quit = save_quit;
9843 }
9844 common->in_positive_assertion = save_in_positive_assertion;
9845 common->then_trap = save_then_trap;
9846 common->accept_label = save_accept_label;
9847 common->positive_assertion_quit = save_positive_assertion_quit;
9848 common->accept = save_accept;
9849 return NULL;
9850 }
9851 set_jumps(altbacktrack.topbacktracks, LABEL());
9852
9853 if (*cc != OP_ALT)
9854 break;
9855
9856 ccbegin = cc;
9857 cc += GET(cc, 1);
9858 }
9859
9860 if (local_quit_available)
9861 {
9862 SLJIT_ASSERT(common->positive_assertion_quit == NULL);
9863 /* Makes the check less complicated below. */
9864 common->positive_assertion_quit = common->quit;
9865 }
9866
9867 /* None of them matched. */
9868 if (common->positive_assertion_quit != NULL)
9869 {
9870 jump = JUMP(SLJIT_JUMP);
9871 set_jumps(common->positive_assertion_quit, LABEL());
9872 SLJIT_ASSERT(framesize != no_stack);
9873 if (framesize < 0)
9874 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
9875 else
9876 {
9877 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9878 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9879 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (extrasize + 1) * sizeof(sljit_sw));
9880 }
9881 JUMPHERE(jump);
9882 }
9883
9884 if (needs_control_head)
9885 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
9886
9887 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
9888 {
9889 /* Assert is failed. */
9890 if ((conditional && extrasize > 0) || bra == OP_BRAZERO)
9891 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9892
9893 if (framesize < 0)
9894 {
9895 /* The topmost item should be 0. */
9896 if (bra == OP_BRAZERO)
9897 {
9898 if (extrasize == 2)
9899 free_stack(common, 1);
9900 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9901 }
9902 else if (extrasize > 0)
9903 free_stack(common, extrasize);
9904 }
9905 else
9906 {
9907 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
9908 /* The topmost item should be 0. */
9909 if (bra == OP_BRAZERO)
9910 {
9911 free_stack(common, framesize + extrasize - 1);
9912 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9913 }
9914 else
9915 free_stack(common, framesize + extrasize);
9916 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
9917 }
9918 jump = JUMP(SLJIT_JUMP);
9919 if (bra != OP_BRAZERO)
9920 add_jump(compiler, target, jump);
9921
9922 /* Assert is successful. */
9923 set_jumps(tmp, LABEL());
9924 if (framesize < 0)
9925 {
9926 /* We know that STR_PTR was stored on the top of the stack. */
9927 if (extrasize > 0)
9928 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
9929
9930 /* Keep the STR_PTR on the top of the stack. */
9931 if (bra == OP_BRAZERO)
9932 {
9933 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9934 if (extrasize == 2)
9935 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9936 }
9937 else if (bra == OP_BRAMINZERO)
9938 {
9939 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9940 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9941 }
9942 }
9943 else
9944 {
9945 if (bra == OP_BRA)
9946 {
9947 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
9948 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
9949 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1));
9950 }
9951 else
9952 {
9953 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
9954 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
9955 if (extrasize == 2)
9956 {
9957 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9958 if (bra == OP_BRAMINZERO)
9959 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9960 }
9961 else
9962 {
9963 SLJIT_ASSERT(extrasize == 3);
9964 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
9965 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
9966 }
9967 }
9968 }
9969
9970 if (bra == OP_BRAZERO)
9971 {
9972 backtrack->matchingpath = LABEL();
9973 SET_LABEL(jump, backtrack->matchingpath);
9974 }
9975 else if (bra == OP_BRAMINZERO)
9976 {
9977 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
9978 JUMPHERE(brajump);
9979 if (framesize >= 0)
9980 {
9981 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9982 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9983 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
9984 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
9985 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
9986 }
9987 set_jumps(backtrack->common.topbacktracks, LABEL());
9988 }
9989 }
9990 else
9991 {
9992 /* AssertNot is successful. */
9993 if (framesize < 0)
9994 {
9995 if (extrasize > 0)
9996 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9997
9998 if (bra != OP_BRA)
9999 {
10000 if (extrasize == 2)
10001 free_stack(common, 1);
10002 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10003 }
10004 else if (extrasize > 0)
10005 free_stack(common, extrasize);
10006 }
10007 else
10008 {
10009 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10010 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
10011 /* The topmost item should be 0. */
10012 if (bra != OP_BRA)
10013 {
10014 free_stack(common, framesize + extrasize - 1);
10015 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10016 }
10017 else
10018 free_stack(common, framesize + extrasize);
10019 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10020 }
10021
10022 if (bra == OP_BRAZERO)
10023 backtrack->matchingpath = LABEL();
10024 else if (bra == OP_BRAMINZERO)
10025 {
10026 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
10027 JUMPHERE(brajump);
10028 }
10029
10030 if (bra != OP_BRA)
10031 {
10032 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
10033 set_jumps(backtrack->common.topbacktracks, LABEL());
10034 backtrack->common.topbacktracks = NULL;
10035 }
10036 }
10037
10038 if (local_quit_available)
10039 {
10040 common->local_quit_available = save_local_quit_available;
10041 common->quit_label = save_quit_label;
10042 common->quit = save_quit;
10043 }
10044 common->in_positive_assertion = save_in_positive_assertion;
10045 common->then_trap = save_then_trap;
10046 common->accept_label = save_accept_label;
10047 common->positive_assertion_quit = save_positive_assertion_quit;
10048 common->accept = save_accept;
10049 return cc + 1 + LINK_SIZE;
10050 }
10051
match_once_common(compiler_common * common,PCRE2_UCHAR ket,int framesize,int private_data_ptr,BOOL has_alternatives,BOOL needs_control_head)10052 static SLJIT_INLINE void match_once_common(compiler_common *common, PCRE2_UCHAR ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
10053 {
10054 DEFINE_COMPILER;
10055 int stacksize;
10056
10057 if (framesize < 0)
10058 {
10059 if (framesize == no_frame)
10060 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10061 else
10062 {
10063 stacksize = needs_control_head ? 1 : 0;
10064 if (ket != OP_KET || has_alternatives)
10065 stacksize++;
10066
10067 if (stacksize > 0)
10068 free_stack(common, stacksize);
10069 }
10070
10071 if (needs_control_head)
10072 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? STACK(-2) : STACK(-1));
10073
10074 /* TMP2 which is set here used by OP_KETRMAX below. */
10075 if (ket == OP_KETRMAX)
10076 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
10077 else if (ket == OP_KETRMIN)
10078 {
10079 /* Move the STR_PTR to the private_data_ptr. */
10080 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
10081 }
10082 }
10083 else
10084 {
10085 stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
10086 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
10087 if (needs_control_head)
10088 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
10089
10090 if (ket == OP_KETRMAX)
10091 {
10092 /* TMP2 which is set here used by OP_KETRMAX below. */
10093 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10094 }
10095 }
10096 if (needs_control_head)
10097 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
10098 }
10099
match_capture_common(compiler_common * common,int stacksize,int offset,int private_data_ptr)10100 static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
10101 {
10102 DEFINE_COMPILER;
10103
10104 if (common->capture_last_ptr != 0)
10105 {
10106 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
10107 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
10108 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10109 stacksize++;
10110 }
10111 if (common->optimized_cbracket[offset >> 1] == 0)
10112 {
10113 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
10114 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
10115 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10116 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10117 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
10118 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10119 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10120 stacksize += 2;
10121 }
10122 return stacksize;
10123 }
10124
do_script_run(PCRE2_SPTR ptr,PCRE2_SPTR endptr)10125 static PCRE2_SPTR SLJIT_FUNC do_script_run(PCRE2_SPTR ptr, PCRE2_SPTR endptr)
10126 {
10127 if (PRIV(script_run)(ptr, endptr, FALSE))
10128 return endptr;
10129 return NULL;
10130 }
10131
10132 #ifdef SUPPORT_UNICODE
10133
do_script_run_utf(PCRE2_SPTR ptr,PCRE2_SPTR endptr)10134 static PCRE2_SPTR SLJIT_FUNC do_script_run_utf(PCRE2_SPTR ptr, PCRE2_SPTR endptr)
10135 {
10136 if (PRIV(script_run)(ptr, endptr, TRUE))
10137 return endptr;
10138 return NULL;
10139 }
10140
10141 #endif /* SUPPORT_UNICODE */
10142
match_script_run_common(compiler_common * common,int private_data_ptr,backtrack_common * parent)10143 static SLJIT_INLINE void match_script_run_common(compiler_common *common, int private_data_ptr, backtrack_common *parent)
10144 {
10145 DEFINE_COMPILER;
10146
10147 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
10148
10149 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10150 #ifdef SUPPORT_UNICODE
10151 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM,
10152 common->utf ? SLJIT_FUNC_OFFSET(do_script_run_utf) : SLJIT_FUNC_OFFSET(do_script_run));
10153 #else
10154 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_script_run));
10155 #endif
10156
10157 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
10158 add_jump(compiler, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
10159 }
10160
10161 /*
10162 Handling bracketed expressions is probably the most complex part.
10163
10164 Stack layout naming characters:
10165 S - Push the current STR_PTR
10166 0 - Push a 0 (NULL)
10167 A - Push the current STR_PTR. Needed for restoring the STR_PTR
10168 before the next alternative. Not pushed if there are no alternatives.
10169 M - Any values pushed by the current alternative. Can be empty, or anything.
10170 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
10171 L - Push the previous local (pointed by localptr) to the stack
10172 () - opional values stored on the stack
10173 ()* - optonal, can be stored multiple times
10174
10175 The following list shows the regular expression templates, their PCRE byte codes
10176 and stack layout supported by pcre-sljit.
10177
10178 (?:) OP_BRA | OP_KET A M
10179 () OP_CBRA | OP_KET C M
10180 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
10181 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
10182 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
10183 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
10184 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
10185 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
10186 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
10187 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
10188 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
10189 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
10190 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
10191 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
10192 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
10193 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
10194 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
10195 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
10196 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
10197 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
10198 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
10199 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
10200
10201
10202 Stack layout naming characters:
10203 A - Push the alternative index (starting from 0) on the stack.
10204 Not pushed if there is no alternatives.
10205 M - Any values pushed by the current alternative. Can be empty, or anything.
10206
10207 The next list shows the possible content of a bracket:
10208 (|) OP_*BRA | OP_ALT ... M A
10209 (?()|) OP_*COND | OP_ALT M A
10210 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
10211 Or nothing, if trace is unnecessary
10212 */
10213
compile_bracket_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)10214 static PCRE2_SPTR compile_bracket_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
10215 {
10216 DEFINE_COMPILER;
10217 backtrack_common *backtrack;
10218 PCRE2_UCHAR opcode;
10219 int private_data_ptr = 0;
10220 int offset = 0;
10221 int i, stacksize;
10222 int repeat_ptr = 0, repeat_length = 0;
10223 int repeat_type = 0, repeat_count = 0;
10224 PCRE2_SPTR ccbegin;
10225 PCRE2_SPTR matchingpath;
10226 PCRE2_SPTR slot;
10227 PCRE2_UCHAR bra = OP_BRA;
10228 PCRE2_UCHAR ket;
10229 assert_backtrack *assert;
10230 BOOL has_alternatives;
10231 BOOL needs_control_head = FALSE;
10232 struct sljit_jump *jump;
10233 struct sljit_jump *skip;
10234 struct sljit_label *rmax_label = NULL;
10235 struct sljit_jump *braminzero = NULL;
10236
10237 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
10238
10239 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
10240 {
10241 bra = *cc;
10242 cc++;
10243 opcode = *cc;
10244 }
10245
10246 opcode = *cc;
10247 ccbegin = cc;
10248 matchingpath = bracketend(cc) - 1 - LINK_SIZE;
10249 ket = *matchingpath;
10250 if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
10251 {
10252 repeat_ptr = PRIVATE_DATA(matchingpath);
10253 repeat_length = PRIVATE_DATA(matchingpath + 1);
10254 repeat_type = PRIVATE_DATA(matchingpath + 2);
10255 repeat_count = PRIVATE_DATA(matchingpath + 3);
10256 SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
10257 if (repeat_type == OP_UPTO)
10258 ket = OP_KETRMAX;
10259 if (repeat_type == OP_MINUPTO)
10260 ket = OP_KETRMIN;
10261 }
10262
10263 matchingpath = ccbegin + 1 + LINK_SIZE;
10264 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
10265 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
10266 cc += GET(cc, 1);
10267
10268 has_alternatives = *cc == OP_ALT;
10269 if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
10270 {
10271 SLJIT_COMPILE_ASSERT(OP_DNRREF == OP_RREF + 1 && OP_FALSE == OP_RREF + 2 && OP_TRUE == OP_RREF + 3,
10272 compile_time_checks_must_be_grouped_together);
10273 has_alternatives = ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL) ? FALSE : TRUE;
10274 }
10275
10276 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
10277 opcode = OP_SCOND;
10278
10279 if (opcode == OP_CBRA || opcode == OP_SCBRA)
10280 {
10281 /* Capturing brackets has a pre-allocated space. */
10282 offset = GET2(ccbegin, 1 + LINK_SIZE);
10283 if (common->optimized_cbracket[offset] == 0)
10284 {
10285 private_data_ptr = OVECTOR_PRIV(offset);
10286 offset <<= 1;
10287 }
10288 else
10289 {
10290 offset <<= 1;
10291 private_data_ptr = OVECTOR(offset);
10292 }
10293 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
10294 matchingpath += IMM2_SIZE;
10295 }
10296 else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_ONCE || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
10297 {
10298 /* Other brackets simply allocate the next entry. */
10299 private_data_ptr = PRIVATE_DATA(ccbegin);
10300 SLJIT_ASSERT(private_data_ptr != 0);
10301 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
10302 if (opcode == OP_ONCE)
10303 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
10304 }
10305
10306 /* Instructions before the first alternative. */
10307 stacksize = 0;
10308 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
10309 stacksize++;
10310 if (bra == OP_BRAZERO)
10311 stacksize++;
10312
10313 if (stacksize > 0)
10314 allocate_stack(common, stacksize);
10315
10316 stacksize = 0;
10317 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
10318 {
10319 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10320 stacksize++;
10321 }
10322
10323 if (bra == OP_BRAZERO)
10324 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10325
10326 if (bra == OP_BRAMINZERO)
10327 {
10328 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
10329 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10330 if (ket != OP_KETRMIN)
10331 {
10332 free_stack(common, 1);
10333 braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10334 }
10335 else if (opcode == OP_ONCE || opcode >= OP_SBRA)
10336 {
10337 jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10338 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10339 /* Nothing stored during the first run. */
10340 skip = JUMP(SLJIT_JUMP);
10341 JUMPHERE(jump);
10342 /* Checking zero-length iteration. */
10343 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
10344 {
10345 /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
10346 braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10347 }
10348 else
10349 {
10350 /* Except when the whole stack frame must be saved. */
10351 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10352 braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-BACKTRACK_AS(bracket_backtrack)->u.framesize - 2));
10353 }
10354 JUMPHERE(skip);
10355 }
10356 else
10357 {
10358 jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10359 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10360 JUMPHERE(jump);
10361 }
10362 }
10363
10364 if (repeat_type != 0)
10365 {
10366 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, repeat_count);
10367 if (repeat_type == OP_EXACT)
10368 rmax_label = LABEL();
10369 }
10370
10371 if (ket == OP_KETRMIN)
10372 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
10373
10374 if (ket == OP_KETRMAX)
10375 {
10376 rmax_label = LABEL();
10377 if (has_alternatives && opcode >= OP_BRA && opcode < OP_SBRA && repeat_type == 0)
10378 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
10379 }
10380
10381 /* Handling capturing brackets and alternatives. */
10382 if (opcode == OP_ONCE)
10383 {
10384 stacksize = 0;
10385 if (needs_control_head)
10386 {
10387 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10388 stacksize++;
10389 }
10390
10391 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
10392 {
10393 /* Neither capturing brackets nor recursions are found in the block. */
10394 if (ket == OP_KETRMIN)
10395 {
10396 stacksize += 2;
10397 if (!needs_control_head)
10398 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10399 }
10400 else
10401 {
10402 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
10403 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
10404 if (ket == OP_KETRMAX || has_alternatives)
10405 stacksize++;
10406 }
10407
10408 if (stacksize > 0)
10409 allocate_stack(common, stacksize);
10410
10411 stacksize = 0;
10412 if (needs_control_head)
10413 {
10414 stacksize++;
10415 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10416 }
10417
10418 if (ket == OP_KETRMIN)
10419 {
10420 if (needs_control_head)
10421 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10422 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10423 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
10424 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
10425 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
10426 }
10427 else if (ket == OP_KETRMAX || has_alternatives)
10428 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10429 }
10430 else
10431 {
10432 if (ket != OP_KET || has_alternatives)
10433 stacksize++;
10434
10435 stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
10436 allocate_stack(common, stacksize);
10437
10438 if (needs_control_head)
10439 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10440
10441 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10442 OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
10443
10444 stacksize = needs_control_head ? 1 : 0;
10445 if (ket != OP_KET || has_alternatives)
10446 {
10447 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10448 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
10449 stacksize++;
10450 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10451 }
10452 else
10453 {
10454 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
10455 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10456 }
10457 init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1);
10458 }
10459 }
10460 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
10461 {
10462 /* Saving the previous values. */
10463 if (common->optimized_cbracket[offset >> 1] != 0)
10464 {
10465 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
10466 allocate_stack(common, 2);
10467 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10468 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
10469 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
10470 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
10471 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
10472 }
10473 else
10474 {
10475 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10476 allocate_stack(common, 1);
10477 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
10478 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10479 }
10480 }
10481 else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
10482 {
10483 /* Saving the previous value. */
10484 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10485 allocate_stack(common, 1);
10486 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
10487 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10488 }
10489 else if (has_alternatives)
10490 {
10491 /* Pushing the starting string pointer. */
10492 allocate_stack(common, 1);
10493 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10494 }
10495
10496 /* Generating code for the first alternative. */
10497 if (opcode == OP_COND || opcode == OP_SCOND)
10498 {
10499 if (*matchingpath == OP_CREF)
10500 {
10501 SLJIT_ASSERT(has_alternatives);
10502 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
10503 CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
10504 matchingpath += 1 + IMM2_SIZE;
10505 }
10506 else if (*matchingpath == OP_DNCREF)
10507 {
10508 SLJIT_ASSERT(has_alternatives);
10509
10510 i = GET2(matchingpath, 1 + IMM2_SIZE);
10511 slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
10512 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
10513 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
10514 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
10515 slot += common->name_entry_size;
10516 i--;
10517 while (i-- > 0)
10518 {
10519 OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
10520 OP2(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, STR_PTR, 0);
10521 slot += common->name_entry_size;
10522 }
10523 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
10524 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_ZERO));
10525 matchingpath += 1 + 2 * IMM2_SIZE;
10526 }
10527 else if ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL)
10528 {
10529 /* Never has other case. */
10530 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
10531 SLJIT_ASSERT(!has_alternatives);
10532
10533 if (*matchingpath == OP_TRUE)
10534 {
10535 stacksize = 1;
10536 matchingpath++;
10537 }
10538 else if (*matchingpath == OP_FALSE || *matchingpath == OP_FAIL)
10539 stacksize = 0;
10540 else if (*matchingpath == OP_RREF)
10541 {
10542 stacksize = GET2(matchingpath, 1);
10543 if (common->currententry == NULL)
10544 stacksize = 0;
10545 else if (stacksize == RREF_ANY)
10546 stacksize = 1;
10547 else if (common->currententry->start == 0)
10548 stacksize = stacksize == 0;
10549 else
10550 stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
10551
10552 if (stacksize != 0)
10553 matchingpath += 1 + IMM2_SIZE;
10554 }
10555 else
10556 {
10557 if (common->currententry == NULL || common->currententry->start == 0)
10558 stacksize = 0;
10559 else
10560 {
10561 stacksize = GET2(matchingpath, 1 + IMM2_SIZE);
10562 slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
10563 i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
10564 while (stacksize > 0)
10565 {
10566 if ((int)GET2(slot, 0) == i)
10567 break;
10568 slot += common->name_entry_size;
10569 stacksize--;
10570 }
10571 }
10572
10573 if (stacksize != 0)
10574 matchingpath += 1 + 2 * IMM2_SIZE;
10575 }
10576
10577 /* The stacksize == 0 is a common "else" case. */
10578 if (stacksize == 0)
10579 {
10580 if (*cc == OP_ALT)
10581 {
10582 matchingpath = cc + 1 + LINK_SIZE;
10583 cc += GET(cc, 1);
10584 }
10585 else
10586 matchingpath = cc;
10587 }
10588 }
10589 else
10590 {
10591 SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
10592 /* Similar code as PUSH_BACKTRACK macro. */
10593 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
10594 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10595 return NULL;
10596 memset(assert, 0, sizeof(assert_backtrack));
10597 assert->common.cc = matchingpath;
10598 BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
10599 matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
10600 }
10601 }
10602
10603 compile_matchingpath(common, matchingpath, cc, backtrack);
10604 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10605 return NULL;
10606
10607 if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA)
10608 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10609
10610 if (opcode == OP_ONCE)
10611 match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
10612
10613 if (opcode == OP_SCRIPT_RUN)
10614 match_script_run_common(common, private_data_ptr, backtrack);
10615
10616 stacksize = 0;
10617 if (repeat_type == OP_MINUPTO)
10618 {
10619 /* We need to preserve the counter. TMP2 will be used below. */
10620 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
10621 stacksize++;
10622 }
10623 if (ket != OP_KET || bra != OP_BRA)
10624 stacksize++;
10625 if (offset != 0)
10626 {
10627 if (common->capture_last_ptr != 0)
10628 stacksize++;
10629 if (common->optimized_cbracket[offset >> 1] == 0)
10630 stacksize += 2;
10631 }
10632 if (has_alternatives && opcode != OP_ONCE)
10633 stacksize++;
10634
10635 if (stacksize > 0)
10636 allocate_stack(common, stacksize);
10637
10638 stacksize = 0;
10639 if (repeat_type == OP_MINUPTO)
10640 {
10641 /* TMP2 was set above. */
10642 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
10643 stacksize++;
10644 }
10645
10646 if (ket != OP_KET || bra != OP_BRA)
10647 {
10648 if (ket != OP_KET)
10649 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10650 else
10651 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10652 stacksize++;
10653 }
10654
10655 if (offset != 0)
10656 stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
10657
10658 /* Skip and count the other alternatives. */
10659 i = 1;
10660 while (*cc == OP_ALT)
10661 {
10662 cc += GET(cc, 1);
10663 i++;
10664 }
10665
10666 if (has_alternatives)
10667 {
10668 if (opcode != OP_ONCE)
10669 {
10670 if (i <= 3)
10671 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10672 else
10673 BACKTRACK_AS(bracket_backtrack)->u.matching_put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize));
10674 }
10675 if (ket != OP_KETRMAX)
10676 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
10677 }
10678
10679 /* Must be after the matchingpath label. */
10680 if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
10681 {
10682 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
10683 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10684 }
10685
10686 if (ket == OP_KETRMAX)
10687 {
10688 if (repeat_type != 0)
10689 {
10690 if (has_alternatives)
10691 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
10692 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
10693 JUMPTO(SLJIT_NOT_ZERO, rmax_label);
10694 /* Drop STR_PTR for greedy plus quantifier. */
10695 if (opcode != OP_ONCE)
10696 free_stack(common, 1);
10697 }
10698 else if (opcode < OP_BRA || opcode >= OP_SBRA)
10699 {
10700 if (has_alternatives)
10701 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
10702
10703 /* Checking zero-length iteration. */
10704 if (opcode != OP_ONCE)
10705 {
10706 /* This case includes opcodes such as OP_SCRIPT_RUN. */
10707 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0, rmax_label);
10708 /* Drop STR_PTR for greedy plus quantifier. */
10709 if (bra != OP_BRAZERO)
10710 free_stack(common, 1);
10711 }
10712 else
10713 /* TMP2 must contain the starting STR_PTR. */
10714 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label);
10715 }
10716 else
10717 JUMPTO(SLJIT_JUMP, rmax_label);
10718 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
10719 }
10720
10721 if (repeat_type == OP_EXACT)
10722 {
10723 count_match(common);
10724 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
10725 JUMPTO(SLJIT_NOT_ZERO, rmax_label);
10726 }
10727 else if (repeat_type == OP_UPTO)
10728 {
10729 /* We need to preserve the counter. */
10730 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
10731 allocate_stack(common, 1);
10732 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10733 }
10734
10735 if (bra == OP_BRAZERO)
10736 BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
10737
10738 if (bra == OP_BRAMINZERO)
10739 {
10740 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
10741 JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
10742 if (braminzero != NULL)
10743 {
10744 JUMPHERE(braminzero);
10745 /* We need to release the end pointer to perform the
10746 backtrack for the zero-length iteration. When
10747 framesize is < 0, OP_ONCE will do the release itself. */
10748 if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
10749 {
10750 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10751 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10752 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw));
10753 }
10754 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
10755 free_stack(common, 1);
10756 }
10757 /* Continue to the normal backtrack. */
10758 }
10759
10760 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
10761 count_match(common);
10762
10763 cc += 1 + LINK_SIZE;
10764
10765 if (opcode == OP_ONCE)
10766 {
10767 /* We temporarily encode the needs_control_head in the lowest bit.
10768 Note: on the target architectures of SLJIT the ((x << 1) >> 1) returns
10769 the same value for small signed numbers (including negative numbers). */
10770 BACKTRACK_AS(bracket_backtrack)->u.framesize = (int)((unsigned)BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
10771 }
10772 return cc + repeat_length;
10773 }
10774
compile_bracketpos_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)10775 static PCRE2_SPTR compile_bracketpos_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
10776 {
10777 DEFINE_COMPILER;
10778 backtrack_common *backtrack;
10779 PCRE2_UCHAR opcode;
10780 int private_data_ptr;
10781 int cbraprivptr = 0;
10782 BOOL needs_control_head;
10783 int framesize;
10784 int stacksize;
10785 int offset = 0;
10786 BOOL zero = FALSE;
10787 PCRE2_SPTR ccbegin = NULL;
10788 int stack; /* Also contains the offset of control head. */
10789 struct sljit_label *loop = NULL;
10790 struct jump_list *emptymatch = NULL;
10791
10792 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
10793 if (*cc == OP_BRAPOSZERO)
10794 {
10795 zero = TRUE;
10796 cc++;
10797 }
10798
10799 opcode = *cc;
10800 private_data_ptr = PRIVATE_DATA(cc);
10801 SLJIT_ASSERT(private_data_ptr != 0);
10802 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
10803 switch(opcode)
10804 {
10805 case OP_BRAPOS:
10806 case OP_SBRAPOS:
10807 ccbegin = cc + 1 + LINK_SIZE;
10808 break;
10809
10810 case OP_CBRAPOS:
10811 case OP_SCBRAPOS:
10812 offset = GET2(cc, 1 + LINK_SIZE);
10813 /* This case cannot be optimized in the same was as
10814 normal capturing brackets. */
10815 SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
10816 cbraprivptr = OVECTOR_PRIV(offset);
10817 offset <<= 1;
10818 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
10819 break;
10820
10821 default:
10822 SLJIT_UNREACHABLE();
10823 break;
10824 }
10825
10826 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
10827 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
10828 if (framesize < 0)
10829 {
10830 if (offset != 0)
10831 {
10832 stacksize = 2;
10833 if (common->capture_last_ptr != 0)
10834 stacksize++;
10835 }
10836 else
10837 stacksize = 1;
10838
10839 if (needs_control_head)
10840 stacksize++;
10841 if (!zero)
10842 stacksize++;
10843
10844 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
10845 allocate_stack(common, stacksize);
10846 if (framesize == no_frame)
10847 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
10848
10849 stack = 0;
10850 if (offset != 0)
10851 {
10852 stack = 2;
10853 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
10854 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
10855 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
10856 if (common->capture_last_ptr != 0)
10857 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
10858 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
10859 if (needs_control_head)
10860 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10861 if (common->capture_last_ptr != 0)
10862 {
10863 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
10864 stack = 3;
10865 }
10866 }
10867 else
10868 {
10869 if (needs_control_head)
10870 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10871 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10872 stack = 1;
10873 }
10874
10875 if (needs_control_head)
10876 stack++;
10877 if (!zero)
10878 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
10879 if (needs_control_head)
10880 {
10881 stack--;
10882 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
10883 }
10884 }
10885 else
10886 {
10887 stacksize = framesize + 1;
10888 if (!zero)
10889 stacksize++;
10890 if (needs_control_head)
10891 stacksize++;
10892 if (offset == 0)
10893 stacksize++;
10894 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
10895
10896 allocate_stack(common, stacksize);
10897 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10898 if (needs_control_head)
10899 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10900 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
10901
10902 stack = 0;
10903 if (!zero)
10904 {
10905 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
10906 stack = 1;
10907 }
10908 if (needs_control_head)
10909 {
10910 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
10911 stack++;
10912 }
10913 if (offset == 0)
10914 {
10915 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
10916 stack++;
10917 }
10918 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
10919 init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize);
10920 stack -= 1 + (offset == 0);
10921 }
10922
10923 if (offset != 0)
10924 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
10925
10926 loop = LABEL();
10927 while (*cc != OP_KETRPOS)
10928 {
10929 backtrack->top = NULL;
10930 backtrack->topbacktracks = NULL;
10931 cc += GET(cc, 1);
10932
10933 compile_matchingpath(common, ccbegin, cc, backtrack);
10934 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10935 return NULL;
10936
10937 if (framesize < 0)
10938 {
10939 if (framesize == no_frame)
10940 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10941
10942 if (offset != 0)
10943 {
10944 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
10945 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10946 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
10947 if (common->capture_last_ptr != 0)
10948 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
10949 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10950 }
10951 else
10952 {
10953 if (opcode == OP_SBRAPOS)
10954 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10955 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10956 }
10957
10958 /* Even if the match is empty, we need to reset the control head. */
10959 if (needs_control_head)
10960 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
10961
10962 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
10963 add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
10964
10965 if (!zero)
10966 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
10967 }
10968 else
10969 {
10970 if (offset != 0)
10971 {
10972 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
10973 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
10974 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10975 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
10976 if (common->capture_last_ptr != 0)
10977 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
10978 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10979 }
10980 else
10981 {
10982 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10983 OP2(SLJIT_SUB, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
10984 if (opcode == OP_SBRAPOS)
10985 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
10986 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(-framesize - 2), STR_PTR, 0);
10987 }
10988
10989 /* Even if the match is empty, we need to reset the control head. */
10990 if (needs_control_head)
10991 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
10992
10993 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
10994 add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
10995
10996 if (!zero)
10997 {
10998 if (framesize < 0)
10999 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
11000 else
11001 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
11002 }
11003 }
11004
11005 JUMPTO(SLJIT_JUMP, loop);
11006 flush_stubs(common);
11007
11008 compile_backtrackingpath(common, backtrack->top);
11009 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11010 return NULL;
11011 set_jumps(backtrack->topbacktracks, LABEL());
11012
11013 if (framesize < 0)
11014 {
11015 if (offset != 0)
11016 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11017 else
11018 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11019 }
11020 else
11021 {
11022 if (offset != 0)
11023 {
11024 /* Last alternative. */
11025 if (*cc == OP_KETRPOS)
11026 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11027 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11028 }
11029 else
11030 {
11031 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11032 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
11033 }
11034 }
11035
11036 if (*cc == OP_KETRPOS)
11037 break;
11038 ccbegin = cc + 1 + LINK_SIZE;
11039 }
11040
11041 /* We don't have to restore the control head in case of a failed match. */
11042
11043 backtrack->topbacktracks = NULL;
11044 if (!zero)
11045 {
11046 if (framesize < 0)
11047 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
11048 else /* TMP2 is set to [private_data_ptr] above. */
11049 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), STACK(-stacksize), SLJIT_IMM, 0));
11050 }
11051
11052 /* None of them matched. */
11053 set_jumps(emptymatch, LABEL());
11054 count_match(common);
11055 return cc + 1 + LINK_SIZE;
11056 }
11057
get_iterator_parameters(compiler_common * common,PCRE2_SPTR cc,PCRE2_UCHAR * opcode,PCRE2_UCHAR * type,sljit_u32 * max,sljit_u32 * exact,PCRE2_SPTR * end)11058 static SLJIT_INLINE PCRE2_SPTR get_iterator_parameters(compiler_common *common, PCRE2_SPTR cc, PCRE2_UCHAR *opcode, PCRE2_UCHAR *type, sljit_u32 *max, sljit_u32 *exact, PCRE2_SPTR *end)
11059 {
11060 int class_len;
11061
11062 *opcode = *cc;
11063 *exact = 0;
11064
11065 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
11066 {
11067 cc++;
11068 *type = OP_CHAR;
11069 }
11070 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
11071 {
11072 cc++;
11073 *type = OP_CHARI;
11074 *opcode -= OP_STARI - OP_STAR;
11075 }
11076 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
11077 {
11078 cc++;
11079 *type = OP_NOT;
11080 *opcode -= OP_NOTSTAR - OP_STAR;
11081 }
11082 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
11083 {
11084 cc++;
11085 *type = OP_NOTI;
11086 *opcode -= OP_NOTSTARI - OP_STAR;
11087 }
11088 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
11089 {
11090 cc++;
11091 *opcode -= OP_TYPESTAR - OP_STAR;
11092 *type = OP_END;
11093 }
11094 else
11095 {
11096 SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS);
11097 *type = *opcode;
11098 cc++;
11099 class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(PCRE2_UCHAR))) : GET(cc, 0);
11100 *opcode = cc[class_len - 1];
11101
11102 if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
11103 {
11104 *opcode -= OP_CRSTAR - OP_STAR;
11105 *end = cc + class_len;
11106
11107 if (*opcode == OP_PLUS || *opcode == OP_MINPLUS)
11108 {
11109 *exact = 1;
11110 *opcode -= OP_PLUS - OP_STAR;
11111 }
11112 }
11113 else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY)
11114 {
11115 *opcode -= OP_CRPOSSTAR - OP_POSSTAR;
11116 *end = cc + class_len;
11117
11118 if (*opcode == OP_POSPLUS)
11119 {
11120 *exact = 1;
11121 *opcode = OP_POSSTAR;
11122 }
11123 }
11124 else
11125 {
11126 SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE);
11127 *max = GET2(cc, (class_len + IMM2_SIZE));
11128 *exact = GET2(cc, class_len);
11129
11130 if (*max == 0)
11131 {
11132 if (*opcode == OP_CRPOSRANGE)
11133 *opcode = OP_POSSTAR;
11134 else
11135 *opcode -= OP_CRRANGE - OP_STAR;
11136 }
11137 else
11138 {
11139 *max -= *exact;
11140 if (*max == 0)
11141 *opcode = OP_EXACT;
11142 else if (*max == 1)
11143 {
11144 if (*opcode == OP_CRPOSRANGE)
11145 *opcode = OP_POSQUERY;
11146 else
11147 *opcode -= OP_CRRANGE - OP_QUERY;
11148 }
11149 else
11150 {
11151 if (*opcode == OP_CRPOSRANGE)
11152 *opcode = OP_POSUPTO;
11153 else
11154 *opcode -= OP_CRRANGE - OP_UPTO;
11155 }
11156 }
11157 *end = cc + class_len + 2 * IMM2_SIZE;
11158 }
11159 return cc;
11160 }
11161
11162 switch(*opcode)
11163 {
11164 case OP_EXACT:
11165 *exact = GET2(cc, 0);
11166 cc += IMM2_SIZE;
11167 break;
11168
11169 case OP_PLUS:
11170 case OP_MINPLUS:
11171 *exact = 1;
11172 *opcode -= OP_PLUS - OP_STAR;
11173 break;
11174
11175 case OP_POSPLUS:
11176 *exact = 1;
11177 *opcode = OP_POSSTAR;
11178 break;
11179
11180 case OP_UPTO:
11181 case OP_MINUPTO:
11182 case OP_POSUPTO:
11183 *max = GET2(cc, 0);
11184 cc += IMM2_SIZE;
11185 break;
11186 }
11187
11188 if (*type == OP_END)
11189 {
11190 *type = *cc;
11191 *end = next_opcode(common, cc);
11192 cc++;
11193 return cc;
11194 }
11195
11196 *end = cc + 1;
11197 #ifdef SUPPORT_UNICODE
11198 if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
11199 #endif
11200 return cc;
11201 }
11202
compile_iterator_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)11203 static PCRE2_SPTR compile_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11204 {
11205 DEFINE_COMPILER;
11206 backtrack_common *backtrack;
11207 PCRE2_UCHAR opcode;
11208 PCRE2_UCHAR type;
11209 sljit_u32 max = 0, exact;
11210 sljit_s32 early_fail_ptr = PRIVATE_DATA(cc + 1);
11211 sljit_s32 early_fail_type;
11212 BOOL charpos_enabled;
11213 PCRE2_UCHAR charpos_char;
11214 unsigned int charpos_othercasebit;
11215 PCRE2_SPTR end;
11216 jump_list *no_match = NULL;
11217 jump_list *no_char1_match = NULL;
11218 struct sljit_jump *jump = NULL;
11219 struct sljit_label *label;
11220 int private_data_ptr = PRIVATE_DATA(cc);
11221 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
11222 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
11223 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
11224 int tmp_base, tmp_offset;
11225 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11226 BOOL use_tmp;
11227 #endif
11228
11229 PUSH_BACKTRACK(sizeof(char_iterator_backtrack), cc, NULL);
11230
11231 early_fail_type = (early_fail_ptr & 0x7);
11232 early_fail_ptr >>= 3;
11233
11234 /* During recursion, these optimizations are disabled. */
11235 if (common->early_fail_start_ptr == 0)
11236 {
11237 early_fail_ptr = 0;
11238 early_fail_type = type_skip;
11239 }
11240
11241 SLJIT_ASSERT(common->fast_forward_bc_ptr != NULL || early_fail_ptr == 0
11242 || (early_fail_ptr >= common->early_fail_start_ptr && early_fail_ptr <= common->early_fail_end_ptr));
11243
11244 if (early_fail_type == type_fail)
11245 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr));
11246
11247 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
11248
11249 if (type != OP_EXTUNI)
11250 {
11251 tmp_base = TMP3;
11252 tmp_offset = 0;
11253 }
11254 else
11255 {
11256 tmp_base = SLJIT_MEM1(SLJIT_SP);
11257 tmp_offset = POSSESSIVE0;
11258 }
11259
11260 /* Handle fixed part first. */
11261 if (exact > 1)
11262 {
11263 SLJIT_ASSERT(early_fail_ptr == 0);
11264
11265 if (common->mode == PCRE2_JIT_COMPLETE
11266 #ifdef SUPPORT_UNICODE
11267 && !common->utf
11268 #endif
11269 && type != OP_ANYNL && type != OP_EXTUNI)
11270 {
11271 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact));
11272 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER, TMP1, 0, STR_END, 0));
11273 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
11274 label = LABEL();
11275 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
11276 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11277 JUMPTO(SLJIT_NOT_ZERO, label);
11278 }
11279 else
11280 {
11281 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
11282 label = LABEL();
11283 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
11284 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11285 JUMPTO(SLJIT_NOT_ZERO, label);
11286 }
11287 }
11288 else if (exact == 1)
11289 {
11290 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
11291
11292 if (early_fail_type == type_fail_range)
11293 {
11294 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr);
11295 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + (int)sizeof(sljit_sw));
11296 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, TMP2, 0);
11297 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, TMP2, 0);
11298 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, TMP2, 0, TMP1, 0));
11299
11300 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + (int)sizeof(sljit_sw), STR_PTR, 0);
11301 }
11302 }
11303
11304 switch(opcode)
11305 {
11306 case OP_STAR:
11307 case OP_UPTO:
11308 SLJIT_ASSERT(early_fail_ptr == 0 || opcode == OP_STAR);
11309
11310 if (type == OP_ANYNL || type == OP_EXTUNI)
11311 {
11312 SLJIT_ASSERT(private_data_ptr == 0);
11313 SLJIT_ASSERT(early_fail_ptr == 0);
11314
11315 allocate_stack(common, 2);
11316 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11317 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
11318
11319 if (opcode == OP_UPTO)
11320 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, max);
11321
11322 label = LABEL();
11323 compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
11324 if (opcode == OP_UPTO)
11325 {
11326 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
11327 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
11328 jump = JUMP(SLJIT_ZERO);
11329 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
11330 }
11331
11332 /* We cannot use TMP3 because of allocate_stack. */
11333 allocate_stack(common, 1);
11334 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11335 JUMPTO(SLJIT_JUMP, label);
11336 if (jump != NULL)
11337 JUMPHERE(jump);
11338 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11339 break;
11340 }
11341 #ifdef SUPPORT_UNICODE
11342 else if (type == OP_ALLANY && !common->invalid_utf)
11343 #else
11344 else if (type == OP_ALLANY)
11345 #endif
11346 {
11347 if (opcode == OP_STAR)
11348 {
11349 if (private_data_ptr == 0)
11350 allocate_stack(common, 2);
11351
11352 OP1(SLJIT_MOV, base, offset0, STR_END, 0);
11353 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11354
11355 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
11356 process_partial_match(common);
11357
11358 if (early_fail_ptr != 0)
11359 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0);
11360 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11361 break;
11362 }
11363 #ifdef SUPPORT_UNICODE
11364 else if (!common->utf)
11365 #else
11366 else
11367 #endif
11368 {
11369 if (private_data_ptr == 0)
11370 allocate_stack(common, 2);
11371
11372 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11373 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max));
11374
11375 if (common->mode == PCRE2_JIT_COMPLETE)
11376 {
11377 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
11378 CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
11379 }
11380 else
11381 {
11382 jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0);
11383 process_partial_match(common);
11384 JUMPHERE(jump);
11385 }
11386
11387 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11388
11389 if (early_fail_ptr != 0)
11390 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11391 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11392 break;
11393 }
11394 }
11395
11396 charpos_enabled = FALSE;
11397 charpos_char = 0;
11398 charpos_othercasebit = 0;
11399
11400 if ((type != OP_CHAR && type != OP_CHARI) && (*end == OP_CHAR || *end == OP_CHARI))
11401 {
11402 #ifdef SUPPORT_UNICODE
11403 charpos_enabled = !common->utf || !HAS_EXTRALEN(end[1]);
11404 #else
11405 charpos_enabled = TRUE;
11406 #endif
11407 if (charpos_enabled && *end == OP_CHARI && char_has_othercase(common, end + 1))
11408 {
11409 charpos_othercasebit = char_get_othercase_bit(common, end + 1);
11410 if (charpos_othercasebit == 0)
11411 charpos_enabled = FALSE;
11412 }
11413
11414 if (charpos_enabled)
11415 {
11416 charpos_char = end[1];
11417 /* Consume the OP_CHAR opcode. */
11418 end += 2;
11419 #if PCRE2_CODE_UNIT_WIDTH == 8
11420 SLJIT_ASSERT((charpos_othercasebit >> 8) == 0);
11421 #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
11422 SLJIT_ASSERT((charpos_othercasebit >> 9) == 0);
11423 if ((charpos_othercasebit & 0x100) != 0)
11424 charpos_othercasebit = (charpos_othercasebit & 0xff) << 8;
11425 #endif
11426 if (charpos_othercasebit != 0)
11427 charpos_char |= charpos_othercasebit;
11428
11429 BACKTRACK_AS(char_iterator_backtrack)->u.charpos.enabled = TRUE;
11430 BACKTRACK_AS(char_iterator_backtrack)->u.charpos.chr = charpos_char;
11431 BACKTRACK_AS(char_iterator_backtrack)->u.charpos.othercasebit = charpos_othercasebit;
11432 }
11433 }
11434
11435 if (charpos_enabled)
11436 {
11437 if (opcode == OP_UPTO)
11438 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max + 1);
11439
11440 /* Search the first instance of charpos_char. */
11441 jump = JUMP(SLJIT_JUMP);
11442 label = LABEL();
11443 if (opcode == OP_UPTO)
11444 {
11445 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11446 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_ZERO));
11447 }
11448 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
11449 if (early_fail_ptr != 0)
11450 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11451 JUMPHERE(jump);
11452
11453 detect_partial_match(common, &backtrack->topbacktracks);
11454 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
11455 if (charpos_othercasebit != 0)
11456 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
11457 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
11458
11459 if (private_data_ptr == 0)
11460 allocate_stack(common, 2);
11461 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11462 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11463
11464 if (opcode == OP_UPTO)
11465 {
11466 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11467 add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11468 }
11469
11470 /* Search the last instance of charpos_char. */
11471 label = LABEL();
11472 compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
11473 if (early_fail_ptr != 0)
11474 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11475 detect_partial_match(common, &no_match);
11476 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
11477 if (charpos_othercasebit != 0)
11478 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
11479
11480 if (opcode == OP_STAR)
11481 {
11482 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
11483 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11484 JUMPTO(SLJIT_JUMP, label);
11485 }
11486 else
11487 {
11488 jump = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char);
11489 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11490 JUMPHERE(jump);
11491 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11492 JUMPTO(SLJIT_NOT_ZERO, label);
11493 }
11494
11495 set_jumps(no_match, LABEL());
11496 OP2(SLJIT_ADD, STR_PTR, 0, base, offset0, SLJIT_IMM, IN_UCHARS(1));
11497 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11498 }
11499 else
11500 {
11501 if (private_data_ptr == 0)
11502 allocate_stack(common, 2);
11503
11504 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11505 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11506 use_tmp = (!HAS_VIRTUAL_REGISTERS && opcode == OP_STAR);
11507 SLJIT_ASSERT(!use_tmp || tmp_base == TMP3);
11508
11509 if (common->utf)
11510 OP1(SLJIT_MOV, use_tmp ? TMP3 : base, use_tmp ? 0 : offset0, STR_PTR, 0);
11511 #endif
11512 if (opcode == OP_UPTO)
11513 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
11514
11515 detect_partial_match(common, &no_match);
11516 label = LABEL();
11517 compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
11518 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11519 if (common->utf)
11520 OP1(SLJIT_MOV, use_tmp ? TMP3 : base, use_tmp ? 0 : offset0, STR_PTR, 0);
11521 #endif
11522
11523 if (opcode == OP_UPTO)
11524 {
11525 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11526 add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11527 }
11528
11529 detect_partial_match_to(common, label);
11530 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11531
11532 set_jumps(no_char1_match, LABEL());
11533 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11534 if (common->utf)
11535 {
11536 set_jumps(no_match, LABEL());
11537 if (use_tmp)
11538 {
11539 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
11540 OP1(SLJIT_MOV, base, offset0, TMP3, 0);
11541 }
11542 else
11543 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
11544 }
11545 else
11546 #endif
11547 {
11548 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11549 set_jumps(no_match, LABEL());
11550 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11551 }
11552
11553 if (early_fail_ptr != 0)
11554 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11555 }
11556
11557 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11558 break;
11559
11560 case OP_MINSTAR:
11561 if (private_data_ptr == 0)
11562 allocate_stack(common, 1);
11563 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11564 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11565 if (early_fail_ptr != 0)
11566 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11567 break;
11568
11569 case OP_MINUPTO:
11570 SLJIT_ASSERT(early_fail_ptr == 0);
11571 if (private_data_ptr == 0)
11572 allocate_stack(common, 2);
11573 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11574 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, max + 1);
11575 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11576 break;
11577
11578 case OP_QUERY:
11579 case OP_MINQUERY:
11580 SLJIT_ASSERT(early_fail_ptr == 0);
11581 if (private_data_ptr == 0)
11582 allocate_stack(common, 1);
11583 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11584 if (opcode == OP_QUERY)
11585 compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
11586 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11587 break;
11588
11589 case OP_EXACT:
11590 break;
11591
11592 case OP_POSSTAR:
11593 #if defined SUPPORT_UNICODE
11594 if (type == OP_ALLANY && !common->invalid_utf)
11595 #else
11596 if (type == OP_ALLANY)
11597 #endif
11598 {
11599 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
11600 process_partial_match(common);
11601 if (early_fail_ptr != 0)
11602 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0);
11603 break;
11604 }
11605
11606 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11607 if (common->utf)
11608 {
11609 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11610 detect_partial_match(common, &no_match);
11611 label = LABEL();
11612 compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
11613 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11614 detect_partial_match_to(common, label);
11615
11616 set_jumps(no_match, LABEL());
11617 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
11618 if (early_fail_ptr != 0)
11619 {
11620 if (!HAS_VIRTUAL_REGISTERS && tmp_base == TMP3)
11621 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, TMP3, 0);
11622 else
11623 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11624 }
11625 break;
11626 }
11627 #endif
11628
11629 detect_partial_match(common, &no_match);
11630 label = LABEL();
11631 compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
11632 detect_partial_match_to(common, label);
11633 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11634
11635 set_jumps(no_char1_match, LABEL());
11636 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11637 set_jumps(no_match, LABEL());
11638 if (early_fail_ptr != 0)
11639 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11640 break;
11641
11642 case OP_POSUPTO:
11643 SLJIT_ASSERT(early_fail_ptr == 0);
11644 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11645 if (common->utf)
11646 {
11647 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
11648 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
11649
11650 detect_partial_match(common, &no_match);
11651 label = LABEL();
11652 compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
11653 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
11654 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11655 add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11656 detect_partial_match_to(common, label);
11657
11658 set_jumps(no_match, LABEL());
11659 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
11660 break;
11661 }
11662 #endif
11663
11664 if (type == OP_ALLANY)
11665 {
11666 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max));
11667
11668 if (common->mode == PCRE2_JIT_COMPLETE)
11669 {
11670 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
11671 CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
11672 }
11673 else
11674 {
11675 jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0);
11676 process_partial_match(common);
11677 JUMPHERE(jump);
11678 }
11679 break;
11680 }
11681
11682 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
11683
11684 detect_partial_match(common, &no_match);
11685 label = LABEL();
11686 compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
11687 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11688 add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11689 detect_partial_match_to(common, label);
11690 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11691
11692 set_jumps(no_char1_match, LABEL());
11693 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11694 set_jumps(no_match, LABEL());
11695 break;
11696
11697 case OP_POSQUERY:
11698 SLJIT_ASSERT(early_fail_ptr == 0);
11699 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11700 compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
11701 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11702 set_jumps(no_match, LABEL());
11703 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
11704 break;
11705
11706 default:
11707 SLJIT_UNREACHABLE();
11708 break;
11709 }
11710
11711 count_match(common);
11712 return end;
11713 }
11714
compile_fail_accept_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)11715 static SLJIT_INLINE PCRE2_SPTR compile_fail_accept_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11716 {
11717 DEFINE_COMPILER;
11718 backtrack_common *backtrack;
11719
11720 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
11721
11722 if (*cc == OP_FAIL)
11723 {
11724 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
11725 return cc + 1;
11726 }
11727
11728 if (*cc == OP_ACCEPT && common->currententry == NULL && (common->re->overall_options & PCRE2_ENDANCHORED) != 0)
11729 add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
11730
11731 if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty)
11732 {
11733 /* No need to check notempty conditions. */
11734 if (common->accept_label == NULL)
11735 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
11736 else
11737 JUMPTO(SLJIT_JUMP, common->accept_label);
11738 return cc + 1;
11739 }
11740
11741 if (common->accept_label == NULL)
11742 add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)));
11743 else
11744 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label);
11745
11746 if (HAS_VIRTUAL_REGISTERS)
11747 {
11748 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
11749 OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
11750 }
11751 else
11752 OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options));
11753
11754 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
11755 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_NOT_ZERO));
11756 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
11757 if (common->accept_label == NULL)
11758 add_jump(compiler, &common->accept, JUMP(SLJIT_ZERO));
11759 else
11760 JUMPTO(SLJIT_ZERO, common->accept_label);
11761
11762 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
11763 if (common->accept_label == NULL)
11764 add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
11765 else
11766 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label);
11767 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
11768 return cc + 1;
11769 }
11770
compile_close_matchingpath(compiler_common * common,PCRE2_SPTR cc)11771 static SLJIT_INLINE PCRE2_SPTR compile_close_matchingpath(compiler_common *common, PCRE2_SPTR cc)
11772 {
11773 DEFINE_COMPILER;
11774 int offset = GET2(cc, 1);
11775 BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0;
11776
11777 /* Data will be discarded anyway... */
11778 if (common->currententry != NULL)
11779 return cc + 1 + IMM2_SIZE;
11780
11781 if (!optimized_cbracket)
11782 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR_PRIV(offset));
11783 offset <<= 1;
11784 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
11785 if (!optimized_cbracket)
11786 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
11787 return cc + 1 + IMM2_SIZE;
11788 }
11789
compile_control_verb_matchingpath(compiler_common * common,PCRE2_SPTR cc,backtrack_common * parent)11790 static SLJIT_INLINE PCRE2_SPTR compile_control_verb_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11791 {
11792 DEFINE_COMPILER;
11793 backtrack_common *backtrack;
11794 PCRE2_UCHAR opcode = *cc;
11795 PCRE2_SPTR ccend = cc + 1;
11796
11797 if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG ||
11798 opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
11799 ccend += 2 + cc[1];
11800
11801 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
11802
11803 if (opcode == OP_SKIP)
11804 {
11805 allocate_stack(common, 1);
11806 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11807 return ccend;
11808 }
11809
11810 if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
11811 {
11812 if (HAS_VIRTUAL_REGISTERS)
11813 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
11814 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
11815 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
11816 OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
11817 }
11818
11819 return ccend;
11820 }
11821
11822 static PCRE2_UCHAR then_trap_opcode[1] = { OP_THEN_TRAP };
11823
compile_then_trap_matchingpath(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,backtrack_common * parent)11824 static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
11825 {
11826 DEFINE_COMPILER;
11827 backtrack_common *backtrack;
11828 BOOL needs_control_head;
11829 int size;
11830
11831 PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
11832 common->then_trap = BACKTRACK_AS(then_trap_backtrack);
11833 BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
11834 BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start);
11835 BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head);
11836
11837 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
11838 size = 3 + (size < 0 ? 0 : size);
11839
11840 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
11841 allocate_stack(common, size);
11842 if (size > 3)
11843 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw));
11844 else
11845 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
11846 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start);
11847 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap);
11848 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0);
11849
11850 size = BACKTRACK_AS(then_trap_backtrack)->framesize;
11851 if (size >= 0)
11852 init_frame(common, cc, ccend, size - 1, 0);
11853 }
11854
compile_matchingpath(compiler_common * common,PCRE2_SPTR cc,PCRE2_SPTR ccend,backtrack_common * parent)11855 static void compile_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
11856 {
11857 DEFINE_COMPILER;
11858 backtrack_common *backtrack;
11859 BOOL has_then_trap = FALSE;
11860 then_trap_backtrack *save_then_trap = NULL;
11861
11862 SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS));
11863
11864 if (common->has_then && common->then_offsets[cc - common->start] != 0)
11865 {
11866 SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0);
11867 has_then_trap = TRUE;
11868 save_then_trap = common->then_trap;
11869 /* Tail item on backtrack. */
11870 compile_then_trap_matchingpath(common, cc, ccend, parent);
11871 }
11872
11873 while (cc < ccend)
11874 {
11875 switch(*cc)
11876 {
11877 case OP_SOD:
11878 case OP_SOM:
11879 case OP_NOT_WORD_BOUNDARY:
11880 case OP_WORD_BOUNDARY:
11881 case OP_EODN:
11882 case OP_EOD:
11883 case OP_DOLL:
11884 case OP_DOLLM:
11885 case OP_CIRC:
11886 case OP_CIRCM:
11887 case OP_REVERSE:
11888 cc = compile_simple_assertion_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
11889 break;
11890
11891 case OP_NOT_DIGIT:
11892 case OP_DIGIT:
11893 case OP_NOT_WHITESPACE:
11894 case OP_WHITESPACE:
11895 case OP_NOT_WORDCHAR:
11896 case OP_WORDCHAR:
11897 case OP_ANY:
11898 case OP_ALLANY:
11899 case OP_ANYBYTE:
11900 case OP_NOTPROP:
11901 case OP_PROP:
11902 case OP_ANYNL:
11903 case OP_NOT_HSPACE:
11904 case OP_HSPACE:
11905 case OP_NOT_VSPACE:
11906 case OP_VSPACE:
11907 case OP_EXTUNI:
11908 case OP_NOT:
11909 case OP_NOTI:
11910 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
11911 break;
11912
11913 case OP_SET_SOM:
11914 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
11915 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
11916 allocate_stack(common, 1);
11917 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
11918 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
11919 cc++;
11920 break;
11921
11922 case OP_CHAR:
11923 case OP_CHARI:
11924 if (common->mode == PCRE2_JIT_COMPLETE)
11925 cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
11926 else
11927 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
11928 break;
11929
11930 case OP_STAR:
11931 case OP_MINSTAR:
11932 case OP_PLUS:
11933 case OP_MINPLUS:
11934 case OP_QUERY:
11935 case OP_MINQUERY:
11936 case OP_UPTO:
11937 case OP_MINUPTO:
11938 case OP_EXACT:
11939 case OP_POSSTAR:
11940 case OP_POSPLUS:
11941 case OP_POSQUERY:
11942 case OP_POSUPTO:
11943 case OP_STARI:
11944 case OP_MINSTARI:
11945 case OP_PLUSI:
11946 case OP_MINPLUSI:
11947 case OP_QUERYI:
11948 case OP_MINQUERYI:
11949 case OP_UPTOI:
11950 case OP_MINUPTOI:
11951 case OP_EXACTI:
11952 case OP_POSSTARI:
11953 case OP_POSPLUSI:
11954 case OP_POSQUERYI:
11955 case OP_POSUPTOI:
11956 case OP_NOTSTAR:
11957 case OP_NOTMINSTAR:
11958 case OP_NOTPLUS:
11959 case OP_NOTMINPLUS:
11960 case OP_NOTQUERY:
11961 case OP_NOTMINQUERY:
11962 case OP_NOTUPTO:
11963 case OP_NOTMINUPTO:
11964 case OP_NOTEXACT:
11965 case OP_NOTPOSSTAR:
11966 case OP_NOTPOSPLUS:
11967 case OP_NOTPOSQUERY:
11968 case OP_NOTPOSUPTO:
11969 case OP_NOTSTARI:
11970 case OP_NOTMINSTARI:
11971 case OP_NOTPLUSI:
11972 case OP_NOTMINPLUSI:
11973 case OP_NOTQUERYI:
11974 case OP_NOTMINQUERYI:
11975 case OP_NOTUPTOI:
11976 case OP_NOTMINUPTOI:
11977 case OP_NOTEXACTI:
11978 case OP_NOTPOSSTARI:
11979 case OP_NOTPOSPLUSI:
11980 case OP_NOTPOSQUERYI:
11981 case OP_NOTPOSUPTOI:
11982 case OP_TYPESTAR:
11983 case OP_TYPEMINSTAR:
11984 case OP_TYPEPLUS:
11985 case OP_TYPEMINPLUS:
11986 case OP_TYPEQUERY:
11987 case OP_TYPEMINQUERY:
11988 case OP_TYPEUPTO:
11989 case OP_TYPEMINUPTO:
11990 case OP_TYPEEXACT:
11991 case OP_TYPEPOSSTAR:
11992 case OP_TYPEPOSPLUS:
11993 case OP_TYPEPOSQUERY:
11994 case OP_TYPEPOSUPTO:
11995 cc = compile_iterator_matchingpath(common, cc, parent);
11996 break;
11997
11998 case OP_CLASS:
11999 case OP_NCLASS:
12000 if (cc[1 + (32 / sizeof(PCRE2_UCHAR))] >= OP_CRSTAR && cc[1 + (32 / sizeof(PCRE2_UCHAR))] <= OP_CRPOSRANGE)
12001 cc = compile_iterator_matchingpath(common, cc, parent);
12002 else
12003 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
12004 break;
12005
12006 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
12007 case OP_XCLASS:
12008 if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE)
12009 cc = compile_iterator_matchingpath(common, cc, parent);
12010 else
12011 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
12012 break;
12013 #endif
12014
12015 case OP_REF:
12016 case OP_REFI:
12017 if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE)
12018 cc = compile_ref_iterator_matchingpath(common, cc, parent);
12019 else
12020 {
12021 compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
12022 cc += 1 + IMM2_SIZE;
12023 }
12024 break;
12025
12026 case OP_DNREF:
12027 case OP_DNREFI:
12028 if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE)
12029 cc = compile_ref_iterator_matchingpath(common, cc, parent);
12030 else
12031 {
12032 compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
12033 compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
12034 cc += 1 + 2 * IMM2_SIZE;
12035 }
12036 break;
12037
12038 case OP_RECURSE:
12039 cc = compile_recurse_matchingpath(common, cc, parent);
12040 break;
12041
12042 case OP_CALLOUT:
12043 case OP_CALLOUT_STR:
12044 cc = compile_callout_matchingpath(common, cc, parent);
12045 break;
12046
12047 case OP_ASSERT:
12048 case OP_ASSERT_NOT:
12049 case OP_ASSERTBACK:
12050 case OP_ASSERTBACK_NOT:
12051 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
12052 cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
12053 break;
12054
12055 case OP_BRAMINZERO:
12056 PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
12057 cc = bracketend(cc + 1);
12058 if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
12059 {
12060 allocate_stack(common, 1);
12061 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
12062 }
12063 else
12064 {
12065 allocate_stack(common, 2);
12066 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12067 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
12068 }
12069 BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
12070 count_match(common);
12071 break;
12072
12073 case OP_ASSERT_NA:
12074 case OP_ASSERTBACK_NA:
12075 case OP_ONCE:
12076 case OP_SCRIPT_RUN:
12077 case OP_BRA:
12078 case OP_CBRA:
12079 case OP_COND:
12080 case OP_SBRA:
12081 case OP_SCBRA:
12082 case OP_SCOND:
12083 cc = compile_bracket_matchingpath(common, cc, parent);
12084 break;
12085
12086 case OP_BRAZERO:
12087 if (cc[1] > OP_ASSERTBACK_NOT)
12088 cc = compile_bracket_matchingpath(common, cc, parent);
12089 else
12090 {
12091 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
12092 cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
12093 }
12094 break;
12095
12096 case OP_BRAPOS:
12097 case OP_CBRAPOS:
12098 case OP_SBRAPOS:
12099 case OP_SCBRAPOS:
12100 case OP_BRAPOSZERO:
12101 cc = compile_bracketpos_matchingpath(common, cc, parent);
12102 break;
12103
12104 case OP_MARK:
12105 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
12106 SLJIT_ASSERT(common->mark_ptr != 0);
12107 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
12108 allocate_stack(common, common->has_skip_arg ? 5 : 1);
12109 if (HAS_VIRTUAL_REGISTERS)
12110 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
12111 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0);
12112 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
12113 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
12114 OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
12115 if (common->has_skip_arg)
12116 {
12117 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
12118 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
12119 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark);
12120 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2));
12121 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
12122 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
12123 }
12124 cc += 1 + 2 + cc[1];
12125 break;
12126
12127 case OP_PRUNE:
12128 case OP_PRUNE_ARG:
12129 case OP_SKIP:
12130 case OP_SKIP_ARG:
12131 case OP_THEN:
12132 case OP_THEN_ARG:
12133 case OP_COMMIT:
12134 case OP_COMMIT_ARG:
12135 cc = compile_control_verb_matchingpath(common, cc, parent);
12136 break;
12137
12138 case OP_FAIL:
12139 case OP_ACCEPT:
12140 case OP_ASSERT_ACCEPT:
12141 cc = compile_fail_accept_matchingpath(common, cc, parent);
12142 break;
12143
12144 case OP_CLOSE:
12145 cc = compile_close_matchingpath(common, cc);
12146 break;
12147
12148 case OP_SKIPZERO:
12149 cc = bracketend(cc + 1);
12150 break;
12151
12152 default:
12153 SLJIT_UNREACHABLE();
12154 return;
12155 }
12156 if (cc == NULL)
12157 return;
12158 }
12159
12160 if (has_then_trap)
12161 {
12162 /* Head item on backtrack. */
12163 PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
12164 BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
12165 BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap;
12166 common->then_trap = save_then_trap;
12167 }
12168 SLJIT_ASSERT(cc == ccend);
12169 }
12170
12171 #undef PUSH_BACKTRACK
12172 #undef PUSH_BACKTRACK_NOVALUE
12173 #undef BACKTRACK_AS
12174
12175 #define COMPILE_BACKTRACKINGPATH(current) \
12176 do \
12177 { \
12178 compile_backtrackingpath(common, (current)); \
12179 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
12180 return; \
12181 } \
12182 while (0)
12183
12184 #define CURRENT_AS(type) ((type *)current)
12185
compile_iterator_backtrackingpath(compiler_common * common,struct backtrack_common * current)12186 static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12187 {
12188 DEFINE_COMPILER;
12189 PCRE2_SPTR cc = current->cc;
12190 PCRE2_UCHAR opcode;
12191 PCRE2_UCHAR type;
12192 sljit_u32 max = 0, exact;
12193 struct sljit_label *label = NULL;
12194 struct sljit_jump *jump = NULL;
12195 jump_list *jumplist = NULL;
12196 PCRE2_SPTR end;
12197 int private_data_ptr = PRIVATE_DATA(cc);
12198 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
12199 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
12200 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
12201
12202 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
12203
12204 switch(opcode)
12205 {
12206 case OP_STAR:
12207 case OP_UPTO:
12208 if (type == OP_ANYNL || type == OP_EXTUNI)
12209 {
12210 SLJIT_ASSERT(private_data_ptr == 0);
12211 set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
12212 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12213 free_stack(common, 1);
12214 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12215 }
12216 else
12217 {
12218 if (CURRENT_AS(char_iterator_backtrack)->u.charpos.enabled)
12219 {
12220 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12221 OP1(SLJIT_MOV, TMP2, 0, base, offset1);
12222 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12223
12224 jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
12225 label = LABEL();
12226 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
12227 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12228 if (CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit != 0)
12229 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit);
12230 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.chr, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12231 move_back(common, NULL, TRUE);
12232 CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP2, 0, label);
12233 }
12234 else
12235 {
12236 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12237 jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1);
12238 move_back(common, NULL, TRUE);
12239 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12240 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12241 }
12242 JUMPHERE(jump);
12243 if (private_data_ptr == 0)
12244 free_stack(common, 2);
12245 }
12246 break;
12247
12248 case OP_MINSTAR:
12249 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12250 compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12251 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12252 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12253 set_jumps(jumplist, LABEL());
12254 if (private_data_ptr == 0)
12255 free_stack(common, 1);
12256 break;
12257
12258 case OP_MINUPTO:
12259 OP1(SLJIT_MOV, TMP1, 0, base, offset1);
12260 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12261 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
12262 add_jump(compiler, &jumplist, JUMP(SLJIT_ZERO));
12263
12264 OP1(SLJIT_MOV, base, offset1, TMP1, 0);
12265 compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12266 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12267 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12268
12269 set_jumps(jumplist, LABEL());
12270 if (private_data_ptr == 0)
12271 free_stack(common, 2);
12272 break;
12273
12274 case OP_QUERY:
12275 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12276 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12277 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12278 jump = JUMP(SLJIT_JUMP);
12279 set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
12280 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12281 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12282 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12283 JUMPHERE(jump);
12284 if (private_data_ptr == 0)
12285 free_stack(common, 1);
12286 break;
12287
12288 case OP_MINQUERY:
12289 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12290 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12291 jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
12292 compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12293 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12294 set_jumps(jumplist, LABEL());
12295 JUMPHERE(jump);
12296 if (private_data_ptr == 0)
12297 free_stack(common, 1);
12298 break;
12299
12300 case OP_EXACT:
12301 case OP_POSSTAR:
12302 case OP_POSQUERY:
12303 case OP_POSUPTO:
12304 break;
12305
12306 default:
12307 SLJIT_UNREACHABLE();
12308 break;
12309 }
12310
12311 set_jumps(current->topbacktracks, LABEL());
12312 }
12313
compile_ref_iterator_backtrackingpath(compiler_common * common,struct backtrack_common * current)12314 static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12315 {
12316 DEFINE_COMPILER;
12317 PCRE2_SPTR cc = current->cc;
12318 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
12319 PCRE2_UCHAR type;
12320
12321 type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE];
12322
12323 if ((type & 0x1) == 0)
12324 {
12325 /* Maximize case. */
12326 set_jumps(current->topbacktracks, LABEL());
12327 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12328 free_stack(common, 1);
12329 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
12330 return;
12331 }
12332
12333 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12334 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
12335 set_jumps(current->topbacktracks, LABEL());
12336 free_stack(common, ref ? 2 : 3);
12337 }
12338
compile_recurse_backtrackingpath(compiler_common * common,struct backtrack_common * current)12339 static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12340 {
12341 DEFINE_COMPILER;
12342 recurse_entry *entry;
12343
12344 if (!CURRENT_AS(recurse_backtrack)->inlined_pattern)
12345 {
12346 entry = CURRENT_AS(recurse_backtrack)->entry;
12347 if (entry->backtrack_label == NULL)
12348 add_jump(compiler, &entry->backtrack_calls, JUMP(SLJIT_FAST_CALL));
12349 else
12350 JUMPTO(SLJIT_FAST_CALL, entry->backtrack_label);
12351 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(recurse_backtrack)->matchingpath);
12352 }
12353 else
12354 compile_backtrackingpath(common, current->top);
12355
12356 set_jumps(current->topbacktracks, LABEL());
12357 }
12358
compile_assert_backtrackingpath(compiler_common * common,struct backtrack_common * current)12359 static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12360 {
12361 DEFINE_COMPILER;
12362 PCRE2_SPTR cc = current->cc;
12363 PCRE2_UCHAR bra = OP_BRA;
12364 struct sljit_jump *brajump = NULL;
12365
12366 SLJIT_ASSERT(*cc != OP_BRAMINZERO);
12367 if (*cc == OP_BRAZERO)
12368 {
12369 bra = *cc;
12370 cc++;
12371 }
12372
12373 if (bra == OP_BRAZERO)
12374 {
12375 SLJIT_ASSERT(current->topbacktracks == NULL);
12376 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12377 }
12378
12379 if (CURRENT_AS(assert_backtrack)->framesize < 0)
12380 {
12381 set_jumps(current->topbacktracks, LABEL());
12382
12383 if (bra == OP_BRAZERO)
12384 {
12385 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12386 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
12387 free_stack(common, 1);
12388 }
12389 return;
12390 }
12391
12392 if (bra == OP_BRAZERO)
12393 {
12394 if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
12395 {
12396 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12397 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
12398 free_stack(common, 1);
12399 return;
12400 }
12401 free_stack(common, 1);
12402 brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
12403 }
12404
12405 if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
12406 {
12407 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr);
12408 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12409 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
12410 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(assert_backtrack)->framesize - 1) * sizeof(sljit_sw));
12411 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, TMP1, 0);
12412
12413 set_jumps(current->topbacktracks, LABEL());
12414 }
12415 else
12416 set_jumps(current->topbacktracks, LABEL());
12417
12418 if (bra == OP_BRAZERO)
12419 {
12420 /* We know there is enough place on the stack. */
12421 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
12422 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12423 JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath);
12424 JUMPHERE(brajump);
12425 }
12426 }
12427
compile_bracket_backtrackingpath(compiler_common * common,struct backtrack_common * current)12428 static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12429 {
12430 DEFINE_COMPILER;
12431 int opcode, stacksize, alt_count, alt_max;
12432 int offset = 0;
12433 int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr;
12434 int repeat_ptr = 0, repeat_type = 0, repeat_count = 0;
12435 PCRE2_SPTR cc = current->cc;
12436 PCRE2_SPTR ccbegin;
12437 PCRE2_SPTR ccprev;
12438 PCRE2_UCHAR bra = OP_BRA;
12439 PCRE2_UCHAR ket;
12440 assert_backtrack *assert;
12441 BOOL has_alternatives;
12442 BOOL needs_control_head = FALSE;
12443 struct sljit_jump *brazero = NULL;
12444 struct sljit_jump *next_alt = NULL;
12445 struct sljit_jump *once = NULL;
12446 struct sljit_jump *cond = NULL;
12447 struct sljit_label *rmin_label = NULL;
12448 struct sljit_label *exact_label = NULL;
12449 struct sljit_put_label *put_label = NULL;
12450
12451 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
12452 {
12453 bra = *cc;
12454 cc++;
12455 }
12456
12457 opcode = *cc;
12458 ccbegin = bracketend(cc) - 1 - LINK_SIZE;
12459 ket = *ccbegin;
12460 if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0)
12461 {
12462 repeat_ptr = PRIVATE_DATA(ccbegin);
12463 repeat_type = PRIVATE_DATA(ccbegin + 2);
12464 repeat_count = PRIVATE_DATA(ccbegin + 3);
12465 SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0);
12466 if (repeat_type == OP_UPTO)
12467 ket = OP_KETRMAX;
12468 if (repeat_type == OP_MINUPTO)
12469 ket = OP_KETRMIN;
12470 }
12471 ccbegin = cc;
12472 cc += GET(cc, 1);
12473 has_alternatives = *cc == OP_ALT;
12474 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
12475 has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL;
12476 if (opcode == OP_CBRA || opcode == OP_SCBRA)
12477 offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
12478 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
12479 opcode = OP_SCOND;
12480
12481 alt_max = has_alternatives ? no_alternatives(ccbegin) : 0;
12482
12483 /* Decoding the needs_control_head in framesize. */
12484 if (opcode == OP_ONCE)
12485 {
12486 needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0;
12487 CURRENT_AS(bracket_backtrack)->u.framesize >>= 1;
12488 }
12489
12490 if (ket != OP_KET && repeat_type != 0)
12491 {
12492 /* TMP1 is used in OP_KETRMIN below. */
12493 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12494 free_stack(common, 1);
12495 if (repeat_type == OP_UPTO)
12496 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0, SLJIT_IMM, 1);
12497 else
12498 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
12499 }
12500
12501 if (ket == OP_KETRMAX)
12502 {
12503 if (bra == OP_BRAZERO)
12504 {
12505 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12506 free_stack(common, 1);
12507 brazero = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12508 }
12509 }
12510 else if (ket == OP_KETRMIN)
12511 {
12512 if (bra != OP_BRAMINZERO)
12513 {
12514 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12515 if (repeat_type != 0)
12516 {
12517 /* TMP1 was set a few lines above. */
12518 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12519 /* Drop STR_PTR for non-greedy plus quantifier. */
12520 if (opcode != OP_ONCE)
12521 free_stack(common, 1);
12522 }
12523 else if (opcode >= OP_SBRA || opcode == OP_ONCE)
12524 {
12525 /* Checking zero-length iteration. */
12526 if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0)
12527 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12528 else
12529 {
12530 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12531 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 2), CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12532 }
12533 /* Drop STR_PTR for non-greedy plus quantifier. */
12534 if (opcode != OP_ONCE)
12535 free_stack(common, 1);
12536 }
12537 else
12538 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12539 }
12540 rmin_label = LABEL();
12541 if (repeat_type != 0)
12542 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
12543 }
12544 else if (bra == OP_BRAZERO)
12545 {
12546 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12547 free_stack(common, 1);
12548 brazero = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12549 }
12550 else if (repeat_type == OP_EXACT)
12551 {
12552 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
12553 exact_label = LABEL();
12554 }
12555
12556 if (offset != 0)
12557 {
12558 if (common->capture_last_ptr != 0)
12559 {
12560 SLJIT_ASSERT(common->optimized_cbracket[offset >> 1] == 0);
12561 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12562 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12563 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
12564 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
12565 free_stack(common, 3);
12566 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP2, 0);
12567 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
12568 }
12569 else if (common->optimized_cbracket[offset >> 1] == 0)
12570 {
12571 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12572 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12573 free_stack(common, 2);
12574 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12575 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
12576 }
12577 }
12578
12579 if (SLJIT_UNLIKELY(opcode == OP_ONCE))
12580 {
12581 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
12582 {
12583 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12584 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12585 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw));
12586 }
12587 once = JUMP(SLJIT_JUMP);
12588 }
12589 else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
12590 {
12591 if (has_alternatives)
12592 {
12593 /* Always exactly one alternative. */
12594 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12595 free_stack(common, 1);
12596
12597 alt_max = 2;
12598 next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12599 }
12600 }
12601 else if (has_alternatives)
12602 {
12603 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12604 free_stack(common, 1);
12605
12606 if (alt_max > 3)
12607 {
12608 sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0);
12609
12610 SLJIT_ASSERT(CURRENT_AS(bracket_backtrack)->u.matching_put_label);
12611 sljit_set_put_label(CURRENT_AS(bracket_backtrack)->u.matching_put_label, LABEL());
12612 sljit_emit_op0(compiler, SLJIT_ENDBR);
12613 }
12614 else
12615 next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12616 }
12617
12618 COMPILE_BACKTRACKINGPATH(current->top);
12619 if (current->topbacktracks)
12620 set_jumps(current->topbacktracks, LABEL());
12621
12622 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
12623 {
12624 /* Conditional block always has at most one alternative. */
12625 if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
12626 {
12627 SLJIT_ASSERT(has_alternatives);
12628 assert = CURRENT_AS(bracket_backtrack)->u.assert;
12629 if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
12630 {
12631 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
12632 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12633 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
12634 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
12635 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
12636 }
12637 cond = JUMP(SLJIT_JUMP);
12638 set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL());
12639 }
12640 else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL)
12641 {
12642 SLJIT_ASSERT(has_alternatives);
12643 cond = JUMP(SLJIT_JUMP);
12644 set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL());
12645 }
12646 else
12647 SLJIT_ASSERT(!has_alternatives);
12648 }
12649
12650 if (has_alternatives)
12651 {
12652 alt_count = 1;
12653 do
12654 {
12655 current->top = NULL;
12656 current->topbacktracks = NULL;
12657 current->nextbacktracks = NULL;
12658 /* Conditional blocks always have an additional alternative, even if it is empty. */
12659 if (*cc == OP_ALT)
12660 {
12661 ccprev = cc + 1 + LINK_SIZE;
12662 cc += GET(cc, 1);
12663 if (opcode != OP_COND && opcode != OP_SCOND)
12664 {
12665 if (opcode != OP_ONCE)
12666 {
12667 if (private_data_ptr != 0)
12668 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12669 else
12670 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12671 }
12672 else
12673 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0));
12674 }
12675 compile_matchingpath(common, ccprev, cc, current);
12676 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
12677 return;
12678
12679 if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA)
12680 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12681
12682 if (opcode == OP_SCRIPT_RUN)
12683 match_script_run_common(common, private_data_ptr, current);
12684 }
12685
12686 /* Instructions after the current alternative is successfully matched. */
12687 /* There is a similar code in compile_bracket_matchingpath. */
12688 if (opcode == OP_ONCE)
12689 match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
12690
12691 stacksize = 0;
12692 if (repeat_type == OP_MINUPTO)
12693 {
12694 /* We need to preserve the counter. TMP2 will be used below. */
12695 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
12696 stacksize++;
12697 }
12698 if (ket != OP_KET || bra != OP_BRA)
12699 stacksize++;
12700 if (offset != 0)
12701 {
12702 if (common->capture_last_ptr != 0)
12703 stacksize++;
12704 if (common->optimized_cbracket[offset >> 1] == 0)
12705 stacksize += 2;
12706 }
12707 if (opcode != OP_ONCE)
12708 stacksize++;
12709
12710 if (stacksize > 0)
12711 allocate_stack(common, stacksize);
12712
12713 stacksize = 0;
12714 if (repeat_type == OP_MINUPTO)
12715 {
12716 /* TMP2 was set above. */
12717 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
12718 stacksize++;
12719 }
12720
12721 if (ket != OP_KET || bra != OP_BRA)
12722 {
12723 if (ket != OP_KET)
12724 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
12725 else
12726 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
12727 stacksize++;
12728 }
12729
12730 if (offset != 0)
12731 stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
12732
12733 if (opcode != OP_ONCE)
12734 {
12735 if (alt_max <= 3)
12736 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count);
12737 else
12738 put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize));
12739 }
12740
12741 if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0)
12742 {
12743 /* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */
12744 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
12745 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
12746 }
12747
12748 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath);
12749
12750 if (opcode != OP_ONCE)
12751 {
12752 if (alt_max <= 3)
12753 {
12754 JUMPHERE(next_alt);
12755 alt_count++;
12756 if (alt_count < alt_max)
12757 {
12758 SLJIT_ASSERT(alt_count == 2 && alt_max == 3);
12759 next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1);
12760 }
12761 }
12762 else
12763 {
12764 sljit_set_put_label(put_label, LABEL());
12765 sljit_emit_op0(compiler, SLJIT_ENDBR);
12766 }
12767 }
12768
12769 COMPILE_BACKTRACKINGPATH(current->top);
12770 if (current->topbacktracks)
12771 set_jumps(current->topbacktracks, LABEL());
12772 SLJIT_ASSERT(!current->nextbacktracks);
12773 }
12774 while (*cc == OP_ALT);
12775
12776 if (cond != NULL)
12777 {
12778 SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
12779 assert = CURRENT_AS(bracket_backtrack)->u.assert;
12780 if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0)
12781 {
12782 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
12783 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12784 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
12785 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
12786 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
12787 }
12788 JUMPHERE(cond);
12789 }
12790
12791 /* Free the STR_PTR. */
12792 if (private_data_ptr == 0)
12793 free_stack(common, 1);
12794 }
12795
12796 if (offset != 0)
12797 {
12798 /* Using both tmp register is better for instruction scheduling. */
12799 if (common->optimized_cbracket[offset >> 1] != 0)
12800 {
12801 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12802 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12803 free_stack(common, 2);
12804 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12805 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
12806 }
12807 else
12808 {
12809 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12810 free_stack(common, 1);
12811 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
12812 }
12813 }
12814 else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
12815 {
12816 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0));
12817 free_stack(common, 1);
12818 }
12819 else if (opcode == OP_ONCE)
12820 {
12821 cc = ccbegin + GET(ccbegin, 1);
12822 stacksize = needs_control_head ? 1 : 0;
12823
12824 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
12825 {
12826 /* Reset head and drop saved frame. */
12827 stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1);
12828 }
12829 else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
12830 {
12831 /* The STR_PTR must be released. */
12832 stacksize++;
12833 }
12834
12835 if (stacksize > 0)
12836 free_stack(common, stacksize);
12837
12838 JUMPHERE(once);
12839 /* Restore previous private_data_ptr */
12840 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
12841 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 1));
12842 else if (ket == OP_KETRMIN)
12843 {
12844 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12845 /* See the comment below. */
12846 free_stack(common, 2);
12847 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
12848 }
12849 }
12850
12851 if (repeat_type == OP_EXACT)
12852 {
12853 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
12854 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
12855 CMPTO(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label);
12856 }
12857 else if (ket == OP_KETRMAX)
12858 {
12859 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12860 if (bra != OP_BRAZERO)
12861 free_stack(common, 1);
12862
12863 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12864 if (bra == OP_BRAZERO)
12865 {
12866 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12867 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
12868 JUMPHERE(brazero);
12869 free_stack(common, 1);
12870 }
12871 }
12872 else if (ket == OP_KETRMIN)
12873 {
12874 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12875
12876 /* OP_ONCE removes everything in case of a backtrack, so we don't
12877 need to explicitly release the STR_PTR. The extra release would
12878 affect badly the free_stack(2) above. */
12879 if (opcode != OP_ONCE)
12880 free_stack(common, 1);
12881 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label);
12882 if (opcode == OP_ONCE)
12883 free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
12884 else if (bra == OP_BRAMINZERO)
12885 free_stack(common, 1);
12886 }
12887 else if (bra == OP_BRAZERO)
12888 {
12889 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12890 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
12891 JUMPHERE(brazero);
12892 }
12893 }
12894
compile_bracketpos_backtrackingpath(compiler_common * common,struct backtrack_common * current)12895 static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12896 {
12897 DEFINE_COMPILER;
12898 int offset;
12899 struct sljit_jump *jump;
12900
12901 if (CURRENT_AS(bracketpos_backtrack)->framesize < 0)
12902 {
12903 if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS)
12904 {
12905 offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1;
12906 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12907 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12908 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12909 if (common->capture_last_ptr != 0)
12910 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
12911 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
12912 if (common->capture_last_ptr != 0)
12913 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
12914 }
12915 set_jumps(current->topbacktracks, LABEL());
12916 free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
12917 return;
12918 }
12919
12920 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr);
12921 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12922 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracketpos_backtrack)->framesize - 1) * sizeof(sljit_sw));
12923
12924 if (current->topbacktracks)
12925 {
12926 jump = JUMP(SLJIT_JUMP);
12927 set_jumps(current->topbacktracks, LABEL());
12928 /* Drop the stack frame. */
12929 free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
12930 JUMPHERE(jump);
12931 }
12932 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracketpos_backtrack)->framesize - 1));
12933 }
12934
compile_braminzero_backtrackingpath(compiler_common * common,struct backtrack_common * current)12935 static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12936 {
12937 assert_backtrack backtrack;
12938
12939 current->top = NULL;
12940 current->topbacktracks = NULL;
12941 current->nextbacktracks = NULL;
12942 if (current->cc[1] > OP_ASSERTBACK_NOT)
12943 {
12944 /* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */
12945 compile_bracket_matchingpath(common, current->cc, current);
12946 compile_bracket_backtrackingpath(common, current->top);
12947 }
12948 else
12949 {
12950 memset(&backtrack, 0, sizeof(backtrack));
12951 backtrack.common.cc = current->cc;
12952 backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath;
12953 /* Manual call of compile_assert_matchingpath. */
12954 compile_assert_matchingpath(common, current->cc, &backtrack, FALSE);
12955 }
12956 SLJIT_ASSERT(!current->nextbacktracks && !current->topbacktracks);
12957 }
12958
compile_control_verb_backtrackingpath(compiler_common * common,struct backtrack_common * current)12959 static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12960 {
12961 DEFINE_COMPILER;
12962 PCRE2_UCHAR opcode = *current->cc;
12963 struct sljit_label *loop;
12964 struct sljit_jump *jump;
12965
12966 if (opcode == OP_THEN || opcode == OP_THEN_ARG)
12967 {
12968 if (common->then_trap != NULL)
12969 {
12970 SLJIT_ASSERT(common->control_head_ptr != 0);
12971
12972 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
12973 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap);
12974 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start);
12975 jump = JUMP(SLJIT_JUMP);
12976
12977 loop = LABEL();
12978 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12979 JUMPHERE(jump);
12980 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0, loop);
12981 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0, loop);
12982 add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP));
12983 return;
12984 }
12985 else if (!common->local_quit_available && common->in_positive_assertion)
12986 {
12987 add_jump(compiler, &common->positive_assertion_quit, JUMP(SLJIT_JUMP));
12988 return;
12989 }
12990 }
12991
12992 if (common->local_quit_available)
12993 {
12994 /* Abort match with a fail. */
12995 if (common->quit_label == NULL)
12996 add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
12997 else
12998 JUMPTO(SLJIT_JUMP, common->quit_label);
12999 return;
13000 }
13001
13002 if (opcode == OP_SKIP_ARG)
13003 {
13004 SLJIT_ASSERT(common->control_head_ptr != 0 && TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
13005 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
13006 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2));
13007 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_search_mark));
13008
13009 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_R0, 0);
13010 add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 0));
13011 return;
13012 }
13013
13014 if (opcode == OP_SKIP)
13015 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13016 else
13017 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0);
13018 add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));
13019 }
13020
compile_then_trap_backtrackingpath(compiler_common * common,struct backtrack_common * current)13021 static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13022 {
13023 DEFINE_COMPILER;
13024 struct sljit_jump *jump;
13025 int size;
13026
13027 if (CURRENT_AS(then_trap_backtrack)->then_trap)
13028 {
13029 common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap;
13030 return;
13031 }
13032
13033 size = CURRENT_AS(then_trap_backtrack)->framesize;
13034 size = 3 + (size < 0 ? 0 : size);
13035
13036 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3));
13037 free_stack(common, size);
13038 jump = JUMP(SLJIT_JUMP);
13039
13040 set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL());
13041 /* STACK_TOP is set by THEN. */
13042 if (CURRENT_AS(then_trap_backtrack)->framesize >= 0)
13043 {
13044 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13045 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(then_trap_backtrack)->framesize - 1) * sizeof(sljit_sw));
13046 }
13047 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13048 free_stack(common, 3);
13049
13050 JUMPHERE(jump);
13051 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
13052 }
13053
compile_backtrackingpath(compiler_common * common,struct backtrack_common * current)13054 static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13055 {
13056 DEFINE_COMPILER;
13057 then_trap_backtrack *save_then_trap = common->then_trap;
13058
13059 while (current)
13060 {
13061 if (current->nextbacktracks != NULL)
13062 set_jumps(current->nextbacktracks, LABEL());
13063 switch(*current->cc)
13064 {
13065 case OP_SET_SOM:
13066 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13067 free_stack(common, 1);
13068 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP1, 0);
13069 break;
13070
13071 case OP_STAR:
13072 case OP_MINSTAR:
13073 case OP_PLUS:
13074 case OP_MINPLUS:
13075 case OP_QUERY:
13076 case OP_MINQUERY:
13077 case OP_UPTO:
13078 case OP_MINUPTO:
13079 case OP_EXACT:
13080 case OP_POSSTAR:
13081 case OP_POSPLUS:
13082 case OP_POSQUERY:
13083 case OP_POSUPTO:
13084 case OP_STARI:
13085 case OP_MINSTARI:
13086 case OP_PLUSI:
13087 case OP_MINPLUSI:
13088 case OP_QUERYI:
13089 case OP_MINQUERYI:
13090 case OP_UPTOI:
13091 case OP_MINUPTOI:
13092 case OP_EXACTI:
13093 case OP_POSSTARI:
13094 case OP_POSPLUSI:
13095 case OP_POSQUERYI:
13096 case OP_POSUPTOI:
13097 case OP_NOTSTAR:
13098 case OP_NOTMINSTAR:
13099 case OP_NOTPLUS:
13100 case OP_NOTMINPLUS:
13101 case OP_NOTQUERY:
13102 case OP_NOTMINQUERY:
13103 case OP_NOTUPTO:
13104 case OP_NOTMINUPTO:
13105 case OP_NOTEXACT:
13106 case OP_NOTPOSSTAR:
13107 case OP_NOTPOSPLUS:
13108 case OP_NOTPOSQUERY:
13109 case OP_NOTPOSUPTO:
13110 case OP_NOTSTARI:
13111 case OP_NOTMINSTARI:
13112 case OP_NOTPLUSI:
13113 case OP_NOTMINPLUSI:
13114 case OP_NOTQUERYI:
13115 case OP_NOTMINQUERYI:
13116 case OP_NOTUPTOI:
13117 case OP_NOTMINUPTOI:
13118 case OP_NOTEXACTI:
13119 case OP_NOTPOSSTARI:
13120 case OP_NOTPOSPLUSI:
13121 case OP_NOTPOSQUERYI:
13122 case OP_NOTPOSUPTOI:
13123 case OP_TYPESTAR:
13124 case OP_TYPEMINSTAR:
13125 case OP_TYPEPLUS:
13126 case OP_TYPEMINPLUS:
13127 case OP_TYPEQUERY:
13128 case OP_TYPEMINQUERY:
13129 case OP_TYPEUPTO:
13130 case OP_TYPEMINUPTO:
13131 case OP_TYPEEXACT:
13132 case OP_TYPEPOSSTAR:
13133 case OP_TYPEPOSPLUS:
13134 case OP_TYPEPOSQUERY:
13135 case OP_TYPEPOSUPTO:
13136 case OP_CLASS:
13137 case OP_NCLASS:
13138 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
13139 case OP_XCLASS:
13140 #endif
13141 compile_iterator_backtrackingpath(common, current);
13142 break;
13143
13144 case OP_REF:
13145 case OP_REFI:
13146 case OP_DNREF:
13147 case OP_DNREFI:
13148 compile_ref_iterator_backtrackingpath(common, current);
13149 break;
13150
13151 case OP_RECURSE:
13152 compile_recurse_backtrackingpath(common, current);
13153 break;
13154
13155 case OP_ASSERT:
13156 case OP_ASSERT_NOT:
13157 case OP_ASSERTBACK:
13158 case OP_ASSERTBACK_NOT:
13159 compile_assert_backtrackingpath(common, current);
13160 break;
13161
13162 case OP_ASSERT_NA:
13163 case OP_ASSERTBACK_NA:
13164 case OP_ONCE:
13165 case OP_SCRIPT_RUN:
13166 case OP_BRA:
13167 case OP_CBRA:
13168 case OP_COND:
13169 case OP_SBRA:
13170 case OP_SCBRA:
13171 case OP_SCOND:
13172 compile_bracket_backtrackingpath(common, current);
13173 break;
13174
13175 case OP_BRAZERO:
13176 if (current->cc[1] > OP_ASSERTBACK_NOT)
13177 compile_bracket_backtrackingpath(common, current);
13178 else
13179 compile_assert_backtrackingpath(common, current);
13180 break;
13181
13182 case OP_BRAPOS:
13183 case OP_CBRAPOS:
13184 case OP_SBRAPOS:
13185 case OP_SCBRAPOS:
13186 case OP_BRAPOSZERO:
13187 compile_bracketpos_backtrackingpath(common, current);
13188 break;
13189
13190 case OP_BRAMINZERO:
13191 compile_braminzero_backtrackingpath(common, current);
13192 break;
13193
13194 case OP_MARK:
13195 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0));
13196 if (common->has_skip_arg)
13197 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13198 free_stack(common, common->has_skip_arg ? 5 : 1);
13199 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
13200 if (common->has_skip_arg)
13201 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
13202 break;
13203
13204 case OP_THEN:
13205 case OP_THEN_ARG:
13206 case OP_PRUNE:
13207 case OP_PRUNE_ARG:
13208 case OP_SKIP:
13209 case OP_SKIP_ARG:
13210 compile_control_verb_backtrackingpath(common, current);
13211 break;
13212
13213 case OP_COMMIT:
13214 case OP_COMMIT_ARG:
13215 if (!common->local_quit_available)
13216 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13217 if (common->quit_label == NULL)
13218 add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
13219 else
13220 JUMPTO(SLJIT_JUMP, common->quit_label);
13221 break;
13222
13223 case OP_CALLOUT:
13224 case OP_CALLOUT_STR:
13225 case OP_FAIL:
13226 case OP_ACCEPT:
13227 case OP_ASSERT_ACCEPT:
13228 set_jumps(current->topbacktracks, LABEL());
13229 break;
13230
13231 case OP_THEN_TRAP:
13232 /* A virtual opcode for then traps. */
13233 compile_then_trap_backtrackingpath(common, current);
13234 break;
13235
13236 default:
13237 SLJIT_UNREACHABLE();
13238 break;
13239 }
13240 current = current->prev;
13241 }
13242 common->then_trap = save_then_trap;
13243 }
13244
compile_recurse(compiler_common * common)13245 static SLJIT_INLINE void compile_recurse(compiler_common *common)
13246 {
13247 DEFINE_COMPILER;
13248 PCRE2_SPTR cc = common->start + common->currententry->start;
13249 PCRE2_SPTR ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
13250 PCRE2_SPTR ccend = bracketend(cc) - (1 + LINK_SIZE);
13251 BOOL needs_control_head;
13252 BOOL has_quit;
13253 BOOL has_accept;
13254 int private_data_size = get_recurse_data_length(common, ccbegin, ccend, &needs_control_head, &has_quit, &has_accept);
13255 int alt_count, alt_max, local_size;
13256 backtrack_common altbacktrack;
13257 jump_list *match = NULL;
13258 struct sljit_jump *next_alt = NULL;
13259 struct sljit_jump *accept_exit = NULL;
13260 struct sljit_label *quit;
13261 struct sljit_put_label *put_label = NULL;
13262
13263 /* Recurse captures then. */
13264 common->then_trap = NULL;
13265
13266 SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
13267
13268 alt_max = no_alternatives(cc);
13269 alt_count = 0;
13270
13271 /* Matching path. */
13272 SLJIT_ASSERT(common->currententry->entry_label == NULL && common->recursive_head_ptr != 0);
13273 common->currententry->entry_label = LABEL();
13274 set_jumps(common->currententry->entry_calls, common->currententry->entry_label);
13275
13276 sljit_emit_fast_enter(compiler, TMP2, 0);
13277 count_match(common);
13278
13279 local_size = (alt_max > 1) ? 2 : 1;
13280
13281 /* (Reversed) stack layout:
13282 [private data][return address][optional: str ptr] ... [optional: alternative index][recursive_head_ptr] */
13283
13284 allocate_stack(common, private_data_size + local_size);
13285 /* Save return address. */
13286 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP2, 0);
13287
13288 copy_recurse_data(common, ccbegin, ccend, recurse_copy_from_global, local_size, private_data_size + local_size, has_quit);
13289
13290 /* This variable is saved and restored all time when we enter or exit from a recursive context. */
13291 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0);
13292
13293 if (needs_control_head)
13294 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
13295
13296 if (alt_max > 1)
13297 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
13298
13299 memset(&altbacktrack, 0, sizeof(backtrack_common));
13300 common->quit_label = NULL;
13301 common->accept_label = NULL;
13302 common->quit = NULL;
13303 common->accept = NULL;
13304 altbacktrack.cc = ccbegin;
13305 cc += GET(cc, 1);
13306 while (1)
13307 {
13308 altbacktrack.top = NULL;
13309 altbacktrack.topbacktracks = NULL;
13310
13311 if (altbacktrack.cc != ccbegin)
13312 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13313
13314 compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack);
13315 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13316 return;
13317
13318 allocate_stack(common, (alt_max > 1 || has_accept) ? 2 : 1);
13319 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
13320
13321 if (alt_max > 1 || has_accept)
13322 {
13323 if (alt_max > 3)
13324 put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(1));
13325 else
13326 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_count);
13327 }
13328
13329 add_jump(compiler, &match, JUMP(SLJIT_JUMP));
13330
13331 if (alt_count == 0)
13332 {
13333 /* Backtracking path entry. */
13334 SLJIT_ASSERT(common->currententry->backtrack_label == NULL);
13335 common->currententry->backtrack_label = LABEL();
13336 set_jumps(common->currententry->backtrack_calls, common->currententry->backtrack_label);
13337
13338 sljit_emit_fast_enter(compiler, TMP1, 0);
13339
13340 if (has_accept)
13341 accept_exit = CMP(SLJIT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);
13342
13343 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13344 /* Save return address. */
13345 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(local_size - 1), TMP1, 0);
13346
13347 copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, has_quit);
13348
13349 if (alt_max > 1)
13350 {
13351 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13352 free_stack(common, 2);
13353
13354 if (alt_max > 3)
13355 {
13356 sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0);
13357 sljit_set_put_label(put_label, LABEL());
13358 sljit_emit_op0(compiler, SLJIT_ENDBR);
13359 }
13360 else
13361 next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
13362 }
13363 else
13364 free_stack(common, has_accept ? 2 : 1);
13365 }
13366 else if (alt_max > 3)
13367 {
13368 sljit_set_put_label(put_label, LABEL());
13369 sljit_emit_op0(compiler, SLJIT_ENDBR);
13370 }
13371 else
13372 {
13373 JUMPHERE(next_alt);
13374 if (alt_count + 1 < alt_max)
13375 {
13376 SLJIT_ASSERT(alt_count == 1 && alt_max == 3);
13377 next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1);
13378 }
13379 }
13380
13381 alt_count++;
13382
13383 compile_backtrackingpath(common, altbacktrack.top);
13384 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13385 return;
13386 set_jumps(altbacktrack.topbacktracks, LABEL());
13387
13388 if (*cc != OP_ALT)
13389 break;
13390
13391 altbacktrack.cc = cc + 1 + LINK_SIZE;
13392 cc += GET(cc, 1);
13393 }
13394
13395 /* No alternative is matched. */
13396
13397 quit = LABEL();
13398
13399 copy_recurse_data(common, ccbegin, ccend, recurse_copy_private_to_global, local_size, private_data_size + local_size, has_quit);
13400
13401 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
13402 free_stack(common, private_data_size + local_size);
13403 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
13404 OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
13405
13406 if (common->quit != NULL)
13407 {
13408 SLJIT_ASSERT(has_quit);
13409
13410 set_jumps(common->quit, LABEL());
13411 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
13412 copy_recurse_data(common, ccbegin, ccend, recurse_copy_shared_to_global, local_size, private_data_size + local_size, has_quit);
13413 JUMPTO(SLJIT_JUMP, quit);
13414 }
13415
13416 if (has_accept)
13417 {
13418 JUMPHERE(accept_exit);
13419 free_stack(common, 2);
13420
13421 /* Save return address. */
13422 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP1, 0);
13423
13424 copy_recurse_data(common, ccbegin, ccend, recurse_copy_kept_shared_to_global, local_size, private_data_size + local_size, has_quit);
13425
13426 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
13427 free_stack(common, private_data_size + local_size);
13428 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
13429 OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
13430 }
13431
13432 if (common->accept != NULL)
13433 {
13434 SLJIT_ASSERT(has_accept);
13435
13436 set_jumps(common->accept, LABEL());
13437
13438 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
13439 OP1(SLJIT_MOV, TMP2, 0, STACK_TOP, 0);
13440
13441 allocate_stack(common, 2);
13442 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);
13443 }
13444
13445 set_jumps(match, LABEL());
13446
13447 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
13448
13449 copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, has_quit);
13450
13451 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), STACK(local_size - 1));
13452 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
13453 OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
13454 }
13455
13456 #undef COMPILE_BACKTRACKINGPATH
13457 #undef CURRENT_AS
13458
13459 #define PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS \
13460 (PCRE2_JIT_INVALID_UTF)
13461
jit_compile(pcre2_code * code,sljit_u32 mode)13462 static int jit_compile(pcre2_code *code, sljit_u32 mode)
13463 {
13464 pcre2_real_code *re = (pcre2_real_code *)code;
13465 struct sljit_compiler *compiler;
13466 backtrack_common rootbacktrack;
13467 compiler_common common_data;
13468 compiler_common *common = &common_data;
13469 const sljit_u8 *tables = re->tables;
13470 void *allocator_data = &re->memctl;
13471 int private_data_size;
13472 PCRE2_SPTR ccend;
13473 executable_functions *functions;
13474 void *executable_func;
13475 sljit_uw executable_size;
13476 sljit_uw total_length;
13477 struct sljit_label *mainloop_label = NULL;
13478 struct sljit_label *continue_match_label;
13479 struct sljit_label *empty_match_found_label = NULL;
13480 struct sljit_label *empty_match_backtrack_label = NULL;
13481 struct sljit_label *reset_match_label;
13482 struct sljit_label *quit_label;
13483 struct sljit_jump *jump;
13484 struct sljit_jump *minlength_check_failed = NULL;
13485 struct sljit_jump *empty_match = NULL;
13486 struct sljit_jump *end_anchor_failed = NULL;
13487 jump_list *reqcu_not_found = NULL;
13488
13489 SLJIT_ASSERT(tables);
13490
13491 #if HAS_VIRTUAL_REGISTERS == 1
13492 SLJIT_ASSERT(sljit_get_register_index(TMP3) < 0 && sljit_get_register_index(ARGUMENTS) < 0 && sljit_get_register_index(RETURN_ADDR) < 0);
13493 #elif HAS_VIRTUAL_REGISTERS == 0
13494 SLJIT_ASSERT(sljit_get_register_index(TMP3) >= 0 && sljit_get_register_index(ARGUMENTS) >= 0 && sljit_get_register_index(RETURN_ADDR) >= 0);
13495 #else
13496 #error "Invalid value for HAS_VIRTUAL_REGISTERS"
13497 #endif
13498
13499 memset(&rootbacktrack, 0, sizeof(backtrack_common));
13500 memset(common, 0, sizeof(compiler_common));
13501 common->re = re;
13502 common->name_table = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
13503 rootbacktrack.cc = common->name_table + re->name_count * re->name_entry_size;
13504
13505 #ifdef SUPPORT_UNICODE
13506 common->invalid_utf = (mode & PCRE2_JIT_INVALID_UTF) != 0;
13507 #endif /* SUPPORT_UNICODE */
13508 mode &= ~PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS;
13509
13510 common->start = rootbacktrack.cc;
13511 common->read_only_data_head = NULL;
13512 common->fcc = tables + fcc_offset;
13513 common->lcc = (sljit_sw)(tables + lcc_offset);
13514 common->mode = mode;
13515 common->might_be_empty = (re->minlength == 0) || (re->flags & PCRE2_MATCH_EMPTY);
13516 common->allow_empty_partial = (re->max_lookbehind > 0) || (re->flags & PCRE2_MATCH_EMPTY);
13517 common->nltype = NLTYPE_FIXED;
13518 switch(re->newline_convention)
13519 {
13520 case PCRE2_NEWLINE_CR: common->newline = CHAR_CR; break;
13521 case PCRE2_NEWLINE_LF: common->newline = CHAR_NL; break;
13522 case PCRE2_NEWLINE_CRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
13523 case PCRE2_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
13524 case PCRE2_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
13525 case PCRE2_NEWLINE_NUL: common->newline = CHAR_NUL; break;
13526 default: return PCRE2_ERROR_INTERNAL;
13527 }
13528 common->nlmax = READ_CHAR_MAX;
13529 common->nlmin = 0;
13530 if (re->bsr_convention == PCRE2_BSR_UNICODE)
13531 common->bsr_nltype = NLTYPE_ANY;
13532 else if (re->bsr_convention == PCRE2_BSR_ANYCRLF)
13533 common->bsr_nltype = NLTYPE_ANYCRLF;
13534 else
13535 {
13536 #ifdef BSR_ANYCRLF
13537 common->bsr_nltype = NLTYPE_ANYCRLF;
13538 #else
13539 common->bsr_nltype = NLTYPE_ANY;
13540 #endif
13541 }
13542 common->bsr_nlmax = READ_CHAR_MAX;
13543 common->bsr_nlmin = 0;
13544 common->endonly = (re->overall_options & PCRE2_DOLLAR_ENDONLY) != 0;
13545 common->ctypes = (sljit_sw)(tables + ctypes_offset);
13546 common->name_count = re->name_count;
13547 common->name_entry_size = re->name_entry_size;
13548 common->unset_backref = (re->overall_options & PCRE2_MATCH_UNSET_BACKREF) != 0;
13549 common->alt_circumflex = (re->overall_options & PCRE2_ALT_CIRCUMFLEX) != 0;
13550 #ifdef SUPPORT_UNICODE
13551 /* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */
13552 common->utf = (re->overall_options & PCRE2_UTF) != 0;
13553 common->ucp = (re->overall_options & PCRE2_UCP) != 0;
13554 if (common->utf)
13555 {
13556 if (common->nltype == NLTYPE_ANY)
13557 common->nlmax = 0x2029;
13558 else if (common->nltype == NLTYPE_ANYCRLF)
13559 common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
13560 else
13561 {
13562 /* We only care about the first newline character. */
13563 common->nlmax = common->newline & 0xff;
13564 }
13565
13566 if (common->nltype == NLTYPE_FIXED)
13567 common->nlmin = common->newline & 0xff;
13568 else
13569 common->nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
13570
13571 if (common->bsr_nltype == NLTYPE_ANY)
13572 common->bsr_nlmax = 0x2029;
13573 else
13574 common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
13575 common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
13576 }
13577 else
13578 common->invalid_utf = FALSE;
13579 #endif /* SUPPORT_UNICODE */
13580 ccend = bracketend(common->start);
13581
13582 /* Calculate the local space size on the stack. */
13583 common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw);
13584 common->optimized_cbracket = (sljit_u8 *)SLJIT_MALLOC(re->top_bracket + 1, allocator_data);
13585 if (!common->optimized_cbracket)
13586 return PCRE2_ERROR_NOMEMORY;
13587 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1
13588 memset(common->optimized_cbracket, 0, re->top_bracket + 1);
13589 #else
13590 memset(common->optimized_cbracket, 1, re->top_bracket + 1);
13591 #endif
13592
13593 SLJIT_ASSERT(*common->start == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
13594 #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2
13595 common->capture_last_ptr = common->ovector_start;
13596 common->ovector_start += sizeof(sljit_sw);
13597 #endif
13598 if (!check_opcode_types(common, common->start, ccend))
13599 {
13600 SLJIT_FREE(common->optimized_cbracket, allocator_data);
13601 return PCRE2_ERROR_NOMEMORY;
13602 }
13603
13604 /* Checking flags and updating ovector_start. */
13605 if (mode == PCRE2_JIT_COMPLETE && (re->flags & PCRE2_LASTSET) != 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
13606 {
13607 common->req_char_ptr = common->ovector_start;
13608 common->ovector_start += sizeof(sljit_sw);
13609 }
13610 if (mode != PCRE2_JIT_COMPLETE)
13611 {
13612 common->start_used_ptr = common->ovector_start;
13613 common->ovector_start += sizeof(sljit_sw);
13614 if (mode == PCRE2_JIT_PARTIAL_SOFT)
13615 {
13616 common->hit_start = common->ovector_start;
13617 common->ovector_start += sizeof(sljit_sw);
13618 }
13619 }
13620 if ((re->overall_options & (PCRE2_FIRSTLINE | PCRE2_USE_OFFSET_LIMIT)) != 0)
13621 {
13622 common->match_end_ptr = common->ovector_start;
13623 common->ovector_start += sizeof(sljit_sw);
13624 }
13625 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
13626 common->control_head_ptr = 1;
13627 #endif
13628 if (common->control_head_ptr != 0)
13629 {
13630 common->control_head_ptr = common->ovector_start;
13631 common->ovector_start += sizeof(sljit_sw);
13632 }
13633 if (common->has_set_som)
13634 {
13635 /* Saving the real start pointer is necessary. */
13636 common->start_ptr = common->ovector_start;
13637 common->ovector_start += sizeof(sljit_sw);
13638 }
13639
13640 /* Aligning ovector to even number of sljit words. */
13641 if ((common->ovector_start & sizeof(sljit_sw)) != 0)
13642 common->ovector_start += sizeof(sljit_sw);
13643
13644 if (common->start_ptr == 0)
13645 common->start_ptr = OVECTOR(0);
13646
13647 /* Capturing brackets cannot be optimized if callouts are allowed. */
13648 if (common->capture_last_ptr != 0)
13649 memset(common->optimized_cbracket, 0, re->top_bracket + 1);
13650
13651 SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
13652 common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);
13653
13654 total_length = ccend - common->start;
13655 common->private_data_ptrs = (sljit_s32 *)SLJIT_MALLOC(total_length * (sizeof(sljit_s32) + (common->has_then ? 1 : 0)), allocator_data);
13656 if (!common->private_data_ptrs)
13657 {
13658 SLJIT_FREE(common->optimized_cbracket, allocator_data);
13659 return PCRE2_ERROR_NOMEMORY;
13660 }
13661 memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_s32));
13662
13663 private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
13664 set_private_data_ptrs(common, &private_data_size, ccend);
13665 if ((re->overall_options & PCRE2_ANCHORED) == 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 && !common->has_skip_in_assert_back)
13666 detect_early_fail(common, common->start, &private_data_size, 0, 0);
13667
13668 SLJIT_ASSERT(common->early_fail_start_ptr <= common->early_fail_end_ptr);
13669
13670 if (private_data_size > SLJIT_MAX_LOCAL_SIZE)
13671 {
13672 SLJIT_FREE(common->private_data_ptrs, allocator_data);
13673 SLJIT_FREE(common->optimized_cbracket, allocator_data);
13674 return PCRE2_ERROR_NOMEMORY;
13675 }
13676
13677 if (common->has_then)
13678 {
13679 common->then_offsets = (sljit_u8 *)(common->private_data_ptrs + total_length);
13680 memset(common->then_offsets, 0, total_length);
13681 set_then_offsets(common, common->start, NULL);
13682 }
13683
13684 compiler = sljit_create_compiler(allocator_data, NULL);
13685 if (!compiler)
13686 {
13687 SLJIT_FREE(common->optimized_cbracket, allocator_data);
13688 SLJIT_FREE(common->private_data_ptrs, allocator_data);
13689 return PCRE2_ERROR_NOMEMORY;
13690 }
13691 common->compiler = compiler;
13692
13693 /* Main pcre_jit_exec entry. */
13694 sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 5, 5, 0, 0, private_data_size);
13695
13696 /* Register init. */
13697 reset_ovector(common, (re->top_bracket + 1) * 2);
13698 if (common->req_char_ptr != 0)
13699 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, SLJIT_R0, 0);
13700
13701 OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_S0, 0);
13702 OP1(SLJIT_MOV, TMP1, 0, SLJIT_S0, 0);
13703 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
13704 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
13705 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
13706 OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match));
13707 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, end));
13708 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, start));
13709 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
13710 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0);
13711
13712 if (common->early_fail_start_ptr < common->early_fail_end_ptr)
13713 reset_early_fail(common);
13714
13715 if (mode == PCRE2_JIT_PARTIAL_SOFT)
13716 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
13717 if (common->mark_ptr != 0)
13718 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
13719 if (common->control_head_ptr != 0)
13720 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
13721
13722 /* Main part of the matching */
13723 if ((re->overall_options & PCRE2_ANCHORED) == 0)
13724 {
13725 mainloop_label = mainloop_entry(common);
13726 continue_match_label = LABEL();
13727 /* Forward search if possible. */
13728 if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
13729 {
13730 if (mode == PCRE2_JIT_COMPLETE && fast_forward_first_n_chars(common))
13731 ;
13732 else if ((re->flags & PCRE2_FIRSTSET) != 0)
13733 fast_forward_first_char(common);
13734 else if ((re->flags & PCRE2_STARTLINE) != 0)
13735 fast_forward_newline(common);
13736 else if ((re->flags & PCRE2_FIRSTMAPSET) != 0)
13737 fast_forward_start_bits(common);
13738 }
13739 }
13740 else
13741 continue_match_label = LABEL();
13742
13743 if (mode == PCRE2_JIT_COMPLETE && re->minlength > 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
13744 {
13745 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13746 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(re->minlength));
13747 minlength_check_failed = CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0);
13748 }
13749 if (common->req_char_ptr != 0)
13750 reqcu_not_found = search_requested_char(common, (PCRE2_UCHAR)(re->last_codeunit), (re->flags & PCRE2_LASTCASELESS) != 0, (re->flags & PCRE2_FIRSTSET) != 0);
13751
13752 /* Store the current STR_PTR in OVECTOR(0). */
13753 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
13754 /* Copy the limit of allowed recursions. */
13755 OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH);
13756 if (common->capture_last_ptr != 0)
13757 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, 0);
13758 if (common->fast_forward_bc_ptr != NULL)
13759 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3, STR_PTR, 0);
13760
13761 if (common->start_ptr != OVECTOR(0))
13762 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0);
13763
13764 /* Copy the beginning of the string. */
13765 if (mode == PCRE2_JIT_PARTIAL_SOFT)
13766 {
13767 jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
13768 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
13769 JUMPHERE(jump);
13770 }
13771 else if (mode == PCRE2_JIT_PARTIAL_HARD)
13772 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
13773
13774 compile_matchingpath(common, common->start, ccend, &rootbacktrack);
13775 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13776 {
13777 sljit_free_compiler(compiler);
13778 SLJIT_FREE(common->optimized_cbracket, allocator_data);
13779 SLJIT_FREE(common->private_data_ptrs, allocator_data);
13780 PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
13781 return PCRE2_ERROR_NOMEMORY;
13782 }
13783
13784 if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
13785 end_anchor_failed = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0);
13786
13787 if (common->might_be_empty)
13788 {
13789 empty_match = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
13790 empty_match_found_label = LABEL();
13791 }
13792
13793 common->accept_label = LABEL();
13794 if (common->accept != NULL)
13795 set_jumps(common->accept, common->accept_label);
13796
13797 /* This means we have a match. Update the ovector. */
13798 copy_ovector(common, re->top_bracket + 1);
13799 common->quit_label = common->abort_label = LABEL();
13800 if (common->quit != NULL)
13801 set_jumps(common->quit, common->quit_label);
13802 if (common->abort != NULL)
13803 set_jumps(common->abort, common->abort_label);
13804 if (minlength_check_failed != NULL)
13805 SET_LABEL(minlength_check_failed, common->abort_label);
13806
13807 sljit_emit_op0(compiler, SLJIT_SKIP_FRAMES_BEFORE_RETURN);
13808 sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0);
13809
13810 if (common->failed_match != NULL)
13811 {
13812 SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
13813 set_jumps(common->failed_match, LABEL());
13814 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13815 JUMPTO(SLJIT_JUMP, common->abort_label);
13816 }
13817
13818 if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
13819 JUMPHERE(end_anchor_failed);
13820
13821 if (mode != PCRE2_JIT_COMPLETE)
13822 {
13823 common->partialmatchlabel = LABEL();
13824 set_jumps(common->partialmatch, common->partialmatchlabel);
13825 return_with_partial_match(common, common->quit_label);
13826 }
13827
13828 if (common->might_be_empty)
13829 empty_match_backtrack_label = LABEL();
13830 compile_backtrackingpath(common, rootbacktrack.top);
13831 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13832 {
13833 sljit_free_compiler(compiler);
13834 SLJIT_FREE(common->optimized_cbracket, allocator_data);
13835 SLJIT_FREE(common->private_data_ptrs, allocator_data);
13836 PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
13837 return PCRE2_ERROR_NOMEMORY;
13838 }
13839
13840 SLJIT_ASSERT(rootbacktrack.prev == NULL);
13841 reset_match_label = LABEL();
13842
13843 if (mode == PCRE2_JIT_PARTIAL_SOFT)
13844 {
13845 /* Update hit_start only in the first time. */
13846 jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
13847 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
13848 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
13849 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, TMP1, 0);
13850 JUMPHERE(jump);
13851 }
13852
13853 /* Check we have remaining characters. */
13854 if ((re->overall_options & PCRE2_ANCHORED) == 0 && common->match_end_ptr != 0)
13855 {
13856 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
13857 }
13858
13859 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP),
13860 (common->fast_forward_bc_ptr != NULL) ? (PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3) : common->start_ptr);
13861
13862 if ((re->overall_options & PCRE2_ANCHORED) == 0)
13863 {
13864 if (common->ff_newline_shortcut != NULL)
13865 {
13866 /* There cannot be more newlines if PCRE2_FIRSTLINE is set. */
13867 if ((re->overall_options & PCRE2_FIRSTLINE) == 0)
13868 {
13869 if (common->match_end_ptr != 0)
13870 {
13871 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
13872 OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
13873 CMPTO(SLJIT_LESS, STR_PTR, 0, TMP1, 0, common->ff_newline_shortcut);
13874 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
13875 }
13876 else
13877 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut);
13878 }
13879 }
13880 else
13881 CMPTO(SLJIT_LESS, STR_PTR, 0, (common->match_end_ptr == 0) ? STR_END : TMP1, 0, mainloop_label);
13882 }
13883
13884 /* No more remaining characters. */
13885 if (reqcu_not_found != NULL)
13886 set_jumps(reqcu_not_found, LABEL());
13887
13888 if (mode == PCRE2_JIT_PARTIAL_SOFT)
13889 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel);
13890
13891 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13892 JUMPTO(SLJIT_JUMP, common->quit_label);
13893
13894 flush_stubs(common);
13895
13896 if (common->might_be_empty)
13897 {
13898 JUMPHERE(empty_match);
13899 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
13900 OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
13901 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
13902 JUMPTO(SLJIT_NOT_ZERO, empty_match_backtrack_label);
13903 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
13904 JUMPTO(SLJIT_ZERO, empty_match_found_label);
13905 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
13906 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);
13907 JUMPTO(SLJIT_JUMP, empty_match_backtrack_label);
13908 }
13909
13910 common->fast_forward_bc_ptr = NULL;
13911 common->early_fail_start_ptr = 0;
13912 common->early_fail_end_ptr = 0;
13913 common->currententry = common->entries;
13914 common->local_quit_available = TRUE;
13915 quit_label = common->quit_label;
13916 while (common->currententry != NULL)
13917 {
13918 /* Might add new entries. */
13919 compile_recurse(common);
13920 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13921 {
13922 sljit_free_compiler(compiler);
13923 SLJIT_FREE(common->optimized_cbracket, allocator_data);
13924 SLJIT_FREE(common->private_data_ptrs, allocator_data);
13925 PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
13926 return PCRE2_ERROR_NOMEMORY;
13927 }
13928 flush_stubs(common);
13929 common->currententry = common->currententry->next;
13930 }
13931 common->local_quit_available = FALSE;
13932 common->quit_label = quit_label;
13933
13934 /* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */
13935 /* This is a (really) rare case. */
13936 set_jumps(common->stackalloc, LABEL());
13937 /* RETURN_ADDR is not a saved register. */
13938 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
13939
13940 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
13941
13942 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
13943 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
13944 OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_LIMIT, 0, SLJIT_IMM, STACK_GROWTH_RATE);
13945 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, stack));
13946 OP1(SLJIT_MOV, STACK_LIMIT, 0, TMP2, 0);
13947
13948 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize));
13949
13950 jump = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
13951 OP1(SLJIT_MOV, TMP2, 0, STACK_LIMIT, 0);
13952 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_RETURN_REG, 0);
13953 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
13954 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
13955 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
13956
13957 /* Allocation failed. */
13958 JUMPHERE(jump);
13959 /* We break the return address cache here, but this is a really rare case. */
13960 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_JIT_STACKLIMIT);
13961 JUMPTO(SLJIT_JUMP, common->quit_label);
13962
13963 /* Call limit reached. */
13964 set_jumps(common->calllimit, LABEL());
13965 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_MATCHLIMIT);
13966 JUMPTO(SLJIT_JUMP, common->quit_label);
13967
13968 if (common->revertframes != NULL)
13969 {
13970 set_jumps(common->revertframes, LABEL());
13971 do_revertframes(common);
13972 }
13973 if (common->wordboundary != NULL)
13974 {
13975 set_jumps(common->wordboundary, LABEL());
13976 check_wordboundary(common);
13977 }
13978 if (common->anynewline != NULL)
13979 {
13980 set_jumps(common->anynewline, LABEL());
13981 check_anynewline(common);
13982 }
13983 if (common->hspace != NULL)
13984 {
13985 set_jumps(common->hspace, LABEL());
13986 check_hspace(common);
13987 }
13988 if (common->vspace != NULL)
13989 {
13990 set_jumps(common->vspace, LABEL());
13991 check_vspace(common);
13992 }
13993 if (common->casefulcmp != NULL)
13994 {
13995 set_jumps(common->casefulcmp, LABEL());
13996 do_casefulcmp(common);
13997 }
13998 if (common->caselesscmp != NULL)
13999 {
14000 set_jumps(common->caselesscmp, LABEL());
14001 do_caselesscmp(common);
14002 }
14003 if (common->reset_match != NULL)
14004 {
14005 set_jumps(common->reset_match, LABEL());
14006 do_reset_match(common, (re->top_bracket + 1) * 2);
14007 CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label);
14008 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
14009 JUMPTO(SLJIT_JUMP, reset_match_label);
14010 }
14011 #ifdef SUPPORT_UNICODE
14012 #if PCRE2_CODE_UNIT_WIDTH == 8
14013 if (common->utfreadchar != NULL)
14014 {
14015 set_jumps(common->utfreadchar, LABEL());
14016 do_utfreadchar(common);
14017 }
14018 if (common->utfreadtype8 != NULL)
14019 {
14020 set_jumps(common->utfreadtype8, LABEL());
14021 do_utfreadtype8(common);
14022 }
14023 if (common->utfpeakcharback != NULL)
14024 {
14025 set_jumps(common->utfpeakcharback, LABEL());
14026 do_utfpeakcharback(common);
14027 }
14028 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
14029 #if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
14030 if (common->utfreadchar_invalid != NULL)
14031 {
14032 set_jumps(common->utfreadchar_invalid, LABEL());
14033 do_utfreadchar_invalid(common);
14034 }
14035 if (common->utfreadnewline_invalid != NULL)
14036 {
14037 set_jumps(common->utfreadnewline_invalid, LABEL());
14038 do_utfreadnewline_invalid(common);
14039 }
14040 if (common->utfmoveback_invalid)
14041 {
14042 set_jumps(common->utfmoveback_invalid, LABEL());
14043 do_utfmoveback_invalid(common);
14044 }
14045 if (common->utfpeakcharback_invalid)
14046 {
14047 set_jumps(common->utfpeakcharback_invalid, LABEL());
14048 do_utfpeakcharback_invalid(common);
14049 }
14050 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16 */
14051 if (common->getucd != NULL)
14052 {
14053 set_jumps(common->getucd, LABEL());
14054 do_getucd(common);
14055 }
14056 if (common->getucdtype != NULL)
14057 {
14058 set_jumps(common->getucdtype, LABEL());
14059 do_getucdtype(common);
14060 }
14061 #endif /* SUPPORT_UNICODE */
14062
14063 SLJIT_FREE(common->optimized_cbracket, allocator_data);
14064 SLJIT_FREE(common->private_data_ptrs, allocator_data);
14065
14066 executable_func = sljit_generate_code(compiler);
14067 executable_size = sljit_get_generated_code_size(compiler);
14068 sljit_free_compiler(compiler);
14069
14070 if (executable_func == NULL)
14071 {
14072 PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14073 return PCRE2_ERROR_NOMEMORY;
14074 }
14075
14076 /* Reuse the function descriptor if possible. */
14077 if (re->executable_jit != NULL)
14078 functions = (executable_functions *)re->executable_jit;
14079 else
14080 {
14081 functions = SLJIT_MALLOC(sizeof(executable_functions), allocator_data);
14082 if (functions == NULL)
14083 {
14084 /* This case is highly unlikely since we just recently
14085 freed a lot of memory. Not impossible though. */
14086 sljit_free_code(executable_func, NULL);
14087 PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14088 return PCRE2_ERROR_NOMEMORY;
14089 }
14090 memset(functions, 0, sizeof(executable_functions));
14091 functions->top_bracket = re->top_bracket + 1;
14092 functions->limit_match = re->limit_match;
14093 re->executable_jit = functions;
14094 }
14095
14096 /* Turn mode into an index. */
14097 if (mode == PCRE2_JIT_COMPLETE)
14098 mode = 0;
14099 else
14100 mode = (mode == PCRE2_JIT_PARTIAL_SOFT) ? 1 : 2;
14101
14102 SLJIT_ASSERT(mode < JIT_NUMBER_OF_COMPILE_MODES);
14103 functions->executable_funcs[mode] = executable_func;
14104 functions->read_only_data_heads[mode] = common->read_only_data_head;
14105 functions->executable_sizes[mode] = executable_size;
14106 return 0;
14107 }
14108
14109 #endif
14110
14111 /*************************************************
14112 * JIT compile a Regular Expression *
14113 *************************************************/
14114
14115 /* This function used JIT to convert a previously-compiled pattern into machine
14116 code.
14117
14118 Arguments:
14119 code a compiled pattern
14120 options JIT option bits
14121
14122 Returns: 0: success or (*NOJIT) was used
14123 <0: an error code
14124 */
14125
14126 #define PUBLIC_JIT_COMPILE_OPTIONS \
14127 (PCRE2_JIT_COMPLETE|PCRE2_JIT_PARTIAL_SOFT|PCRE2_JIT_PARTIAL_HARD|PCRE2_JIT_INVALID_UTF)
14128
14129 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_jit_compile(pcre2_code * code,uint32_t options)14130 pcre2_jit_compile(pcre2_code *code, uint32_t options)
14131 {
14132 pcre2_real_code *re = (pcre2_real_code *)code;
14133
14134 if (code == NULL)
14135 return PCRE2_ERROR_NULL;
14136
14137 if ((options & ~PUBLIC_JIT_COMPILE_OPTIONS) != 0)
14138 return PCRE2_ERROR_JIT_BADOPTION;
14139
14140 /* Support for invalid UTF was first introduced in JIT, with the option
14141 PCRE2_JIT_INVALID_UTF. Later, support was added to the interpreter, and the
14142 compile-time option PCRE2_MATCH_INVALID_UTF was created. This is now the
14143 preferred feature, with the earlier option deprecated. However, for backward
14144 compatibility, if the earlier option is set, it forces the new option so that
14145 if JIT matching falls back to the interpreter, there is still support for
14146 invalid UTF. However, if this function has already been successfully called
14147 without PCRE2_JIT_INVALID_UTF and without PCRE2_MATCH_INVALID_UTF (meaning that
14148 non-invalid-supporting JIT code was compiled), give an error.
14149
14150 If in the future support for PCRE2_JIT_INVALID_UTF is withdrawn, the following
14151 actions are needed:
14152
14153 1. Remove the definition from pcre2.h.in and from the list in
14154 PUBLIC_JIT_COMPILE_OPTIONS above.
14155
14156 2. Replace PCRE2_JIT_INVALID_UTF with a local flag in this module.
14157
14158 3. Replace PCRE2_JIT_INVALID_UTF in pcre2_jit_test.c.
14159
14160 4. Delete the following short block of code. The setting of "re" and
14161 "functions" can be moved into the JIT-only block below, but if that is
14162 done, (void)re and (void)functions will be needed in the non-JIT case, to
14163 avoid compiler warnings.
14164 */
14165
14166 #ifdef SUPPORT_JIT
14167 executable_functions *functions = (executable_functions *)re->executable_jit;
14168 static int executable_allocator_is_working = 0;
14169 #endif
14170
14171 if ((options & PCRE2_JIT_INVALID_UTF) != 0)
14172 {
14173 if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) == 0)
14174 {
14175 #ifdef SUPPORT_JIT
14176 if (functions != NULL) return PCRE2_ERROR_JIT_BADOPTION;
14177 #endif
14178 re->overall_options |= PCRE2_MATCH_INVALID_UTF;
14179 }
14180 }
14181
14182 /* The above tests are run with and without JIT support. This means that
14183 PCRE2_JIT_INVALID_UTF propagates back into the regex options (ensuring
14184 interpreter support) even in the absence of JIT. But now, if there is no JIT
14185 support, give an error return. */
14186
14187 #ifndef SUPPORT_JIT
14188 return PCRE2_ERROR_JIT_BADOPTION;
14189 #else /* SUPPORT_JIT */
14190
14191 /* There is JIT support. Do the necessary. */
14192
14193 if ((re->flags & PCRE2_NOJIT) != 0) return 0;
14194
14195 if (executable_allocator_is_working == 0)
14196 {
14197 /* Checks whether the executable allocator is working. This check
14198 might run multiple times in multi-threaded environments, but the
14199 result should not be affected by it. */
14200 void *ptr = SLJIT_MALLOC_EXEC(32, NULL);
14201
14202 executable_allocator_is_working = -1;
14203
14204 if (ptr != NULL)
14205 {
14206 SLJIT_FREE_EXEC(((sljit_u8*)(ptr)) + SLJIT_EXEC_OFFSET(ptr), NULL);
14207 executable_allocator_is_working = 1;
14208 }
14209 }
14210
14211 if (executable_allocator_is_working < 0)
14212 return PCRE2_ERROR_NOMEMORY;
14213
14214 if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0)
14215 options |= PCRE2_JIT_INVALID_UTF;
14216
14217 if ((options & PCRE2_JIT_COMPLETE) != 0 && (functions == NULL
14218 || functions->executable_funcs[0] == NULL)) {
14219 uint32_t excluded_options = (PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_PARTIAL_HARD);
14220 int result = jit_compile(code, options & ~excluded_options);
14221 if (result != 0)
14222 return result;
14223 }
14224
14225 if ((options & PCRE2_JIT_PARTIAL_SOFT) != 0 && (functions == NULL
14226 || functions->executable_funcs[1] == NULL)) {
14227 uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_HARD);
14228 int result = jit_compile(code, options & ~excluded_options);
14229 if (result != 0)
14230 return result;
14231 }
14232
14233 if ((options & PCRE2_JIT_PARTIAL_HARD) != 0 && (functions == NULL
14234 || functions->executable_funcs[2] == NULL)) {
14235 uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT);
14236 int result = jit_compile(code, options & ~excluded_options);
14237 if (result != 0)
14238 return result;
14239 }
14240
14241 return 0;
14242
14243 #endif /* SUPPORT_JIT */
14244 }
14245
14246 /* JIT compiler uses an all-in-one approach. This improves security,
14247 since the code generator functions are not exported. */
14248
14249 #define INCLUDED_FROM_PCRE2_JIT_COMPILE
14250
14251 #include "pcre2_jit_match.c"
14252 #include "pcre2_jit_misc.c"
14253
14254 /* End of pcre2_jit_compile.c */
14255